comparison postprocess_template.c @ 112:d4d919ebc31c libpostproc

Convert asm keyword into __asm__. Neither the asm() nor the __asm__() keyword is part of the C99 standard, but while GCC accepts the former in C89 syntax, it is not accepted in C99 unless GNU extensions are turned on (with -fasm). The latter form is accepted in any syntax as an extension (without requiring further command-line options). Sun Studio C99 compiler also does not accept asm() while accepting __asm__(), albeit reporting warnings that it's not valid C99 syntax.
author flameeyes
date Thu, 16 Oct 2008 13:34:09 +0000
parents 83d51d1fb580
children bf8f52662dc3
comparison
equal deleted inserted replaced
111:bf39174d9785 112:d4d919ebc31c
61 * Check if the middle 8x8 Block in the given 8x16 block is flat 61 * Check if the middle 8x8 Block in the given 8x16 block is flat
62 */ 62 */
63 static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){ 63 static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){
64 int numEq= 0, dcOk; 64 int numEq= 0, dcOk;
65 src+= stride*4; // src points to begin of the 8x8 Block 65 src+= stride*4; // src points to begin of the 8x8 Block
66 asm volatile( 66 __asm__ volatile(
67 "movq %0, %%mm7 \n\t" 67 "movq %0, %%mm7 \n\t"
68 "movq %1, %%mm6 \n\t" 68 "movq %1, %%mm6 \n\t"
69 : : "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP]) 69 : : "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP])
70 ); 70 );
71 71
72 asm volatile( 72 __asm__ volatile(
73 "lea (%2, %3), %%"REG_a" \n\t" 73 "lea (%2, %3), %%"REG_a" \n\t"
74 // 0 1 2 3 4 5 6 7 8 9 74 // 0 1 2 3 4 5 6 7 8 9
75 // %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2 75 // %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2
76 76
77 "movq (%2), %%mm0 \n\t" 77 "movq (%2), %%mm0 \n\t"
179 #ifndef HAVE_ALTIVEC 179 #ifndef HAVE_ALTIVEC
180 static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) 180 static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
181 { 181 {
182 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) 182 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
183 src+= stride*3; 183 src+= stride*3;
184 asm volatile( //"movv %0 %1 %2\n\t" 184 __asm__ volatile( //"movv %0 %1 %2\n\t"
185 "movq %2, %%mm0 \n\t" // QP,..., QP 185 "movq %2, %%mm0 \n\t" // QP,..., QP
186 "pxor %%mm4, %%mm4 \n\t" 186 "pxor %%mm4, %%mm4 \n\t"
187 187
188 "movq (%0), %%mm6 \n\t" 188 "movq (%0), %%mm6 \n\t"
189 "movq (%0, %1), %%mm5 \n\t" 189 "movq (%0, %1), %%mm5 \n\t"
365 static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP) 365 static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP)
366 { 366 {
367 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) 367 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
368 src+= stride*3; 368 src+= stride*3;
369 // FIXME rounding 369 // FIXME rounding
370 asm volatile( 370 __asm__ volatile(
371 "pxor %%mm7, %%mm7 \n\t" // 0 371 "pxor %%mm7, %%mm7 \n\t" // 0
372 "movq "MANGLE(b80)", %%mm6 \n\t" // MIN_SIGNED_BYTE 372 "movq "MANGLE(b80)", %%mm6 \n\t" // MIN_SIGNED_BYTE
373 "leal (%0, %1), %%"REG_a" \n\t" 373 "leal (%0, %1), %%"REG_a" \n\t"
374 "leal (%%"REG_a", %1, 4), %%"REG_c" \n\t" 374 "leal (%%"REG_a", %1, 4), %%"REG_c" \n\t"
375 // 0 1 2 3 4 5 6 7 8 9 375 // 0 1 2 3 4 5 6 7 8 9
463 static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co) 463 static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
464 { 464 {
465 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) 465 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
466 src+= stride*3; 466 src+= stride*3;
467 467
468 asm volatile( 468 __asm__ volatile(
469 "pxor %%mm7, %%mm7 \n\t" // 0 469 "pxor %%mm7, %%mm7 \n\t" // 0
470 "lea (%0, %1), %%"REG_a" \n\t" 470 "lea (%0, %1), %%"REG_a" \n\t"
471 "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t" 471 "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t"
472 // 0 1 2 3 4 5 6 7 8 9 472 // 0 1 2 3 4 5 6 7 8 9
473 // %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1 %0+8%1 ecx+4%1 473 // %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1 %0+8%1 ecx+4%1
602 602
603 memcpy(tmp, src+stride*7, 8); 603 memcpy(tmp, src+stride*7, 8);
604 memcpy(tmp+8, src+stride*8, 8); 604 memcpy(tmp+8, src+stride*8, 8);
605 */ 605 */
606 src+= stride*4; 606 src+= stride*4;
607 asm volatile( 607 __asm__ volatile(
608 608
609 #if 0 //slightly more accurate and slightly slower 609 #if 0 //slightly more accurate and slightly slower
610 "pxor %%mm7, %%mm7 \n\t" // 0 610 "pxor %%mm7, %%mm7 \n\t" // 0
611 "lea (%0, %1), %%"REG_a" \n\t" 611 "lea (%0, %1), %%"REG_a" \n\t"
612 "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t" 612 "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t"
869 } 869 }
870 } 870 }
871 */ 871 */
872 #elif defined (HAVE_MMX) 872 #elif defined (HAVE_MMX)
873 src+= stride*4; 873 src+= stride*4;
874 asm volatile( 874 __asm__ volatile(
875 "pxor %%mm7, %%mm7 \n\t" 875 "pxor %%mm7, %%mm7 \n\t"
876 "lea -40(%%"REG_SP"), %%"REG_c" \n\t" // make space for 4 8-byte vars 876 "lea -40(%%"REG_SP"), %%"REG_c" \n\t" // make space for 4 8-byte vars
877 "and "ALIGN_MASK", %%"REG_c" \n\t" // align 877 "and "ALIGN_MASK", %%"REG_c" \n\t" // align
878 // 0 1 2 3 4 5 6 7 878 // 0 1 2 3 4 5 6 7
879 // %0 %0+%1 %0+2%1 eax+2%1 %0+4%1 eax+4%1 edx+%1 edx+2%1 879 // %0 %0+%1 %0+2%1 eax+2%1 %0+4%1 eax+4%1 edx+%1 edx+2%1
1145 1145
1146 #ifndef HAVE_ALTIVEC 1146 #ifndef HAVE_ALTIVEC
1147 static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c) 1147 static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c)
1148 { 1148 {
1149 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) 1149 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1150 asm volatile( 1150 __asm__ volatile(
1151 "pxor %%mm6, %%mm6 \n\t" 1151 "pxor %%mm6, %%mm6 \n\t"
1152 "pcmpeqb %%mm7, %%mm7 \n\t" 1152 "pcmpeqb %%mm7, %%mm7 \n\t"
1153 "movq %2, %%mm0 \n\t" 1153 "movq %2, %%mm0 \n\t"
1154 "punpcklbw %%mm6, %%mm0 \n\t" 1154 "punpcklbw %%mm6, %%mm0 \n\t"
1155 "psrlw $1, %%mm0 \n\t" 1155 "psrlw $1, %%mm0 \n\t"
1429 +2*(*(p -1)) + 4*(*p ) + 2*(*(p +1)) 1429 +2*(*(p -1)) + 4*(*p ) + 2*(*(p +1))
1430 +(*(p+stride-1)) + 2*(*(p+stride)) + (*(p+stride+1)); 1430 +(*(p+stride-1)) + 2*(*(p+stride)) + (*(p+stride+1));
1431 f= (f + 8)>>4; 1431 f= (f + 8)>>4;
1432 1432
1433 #ifdef DEBUG_DERING_THRESHOLD 1433 #ifdef DEBUG_DERING_THRESHOLD
1434 asm volatile("emms\n\t":); 1434 __asm__ volatile("emms\n\t":);
1435 { 1435 {
1436 static long long numPixels=0; 1436 static long long numPixels=0;
1437 if(x!=1 && x!=8 && y!=1 && y!=8) numPixels++; 1437 if(x!=1 && x!=8 && y!=1 && y!=8) numPixels++;
1438 // if((max-min)<20 || (max-min)*QP<200) 1438 // if((max-min)<20 || (max-min)*QP<200)
1439 // if((max-min)*QP < 500) 1439 // if((max-min)*QP < 500)
1499 */ 1499 */
1500 static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int stride) 1500 static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int stride)
1501 { 1501 {
1502 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) 1502 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1503 src+= 4*stride; 1503 src+= 4*stride;
1504 asm volatile( 1504 __asm__ volatile(
1505 "lea (%0, %1), %%"REG_a" \n\t" 1505 "lea (%0, %1), %%"REG_a" \n\t"
1506 "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t" 1506 "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t"
1507 // 0 1 2 3 4 5 6 7 8 9 1507 // 0 1 2 3 4 5 6 7 8 9
1508 // %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1 %0+8%1 ecx+4%1 1508 // %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1 %0+8%1 ecx+4%1
1509 1509
1552 */ 1552 */
1553 static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride) 1553 static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride)
1554 { 1554 {
1555 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) 1555 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1556 src+= stride*3; 1556 src+= stride*3;
1557 asm volatile( 1557 __asm__ volatile(
1558 "lea (%0, %1), %%"REG_a" \n\t" 1558 "lea (%0, %1), %%"REG_a" \n\t"
1559 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" 1559 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
1560 "lea (%%"REG_d", %1, 4), %%"REG_c" \n\t" 1560 "lea (%%"REG_d", %1, 4), %%"REG_c" \n\t"
1561 "add %1, %%"REG_c" \n\t" 1561 "add %1, %%"REG_c" \n\t"
1562 "pxor %%mm7, %%mm7 \n\t" 1562 "pxor %%mm7, %%mm7 \n\t"
1616 */ 1616 */
1617 static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp) 1617 static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp)
1618 { 1618 {
1619 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) 1619 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1620 src+= stride*4; 1620 src+= stride*4;
1621 asm volatile( 1621 __asm__ volatile(
1622 "lea (%0, %1), %%"REG_a" \n\t" 1622 "lea (%0, %1), %%"REG_a" \n\t"
1623 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" 1623 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
1624 "pxor %%mm7, %%mm7 \n\t" 1624 "pxor %%mm7, %%mm7 \n\t"
1625 "movq (%2), %%mm0 \n\t" 1625 "movq (%2), %%mm0 \n\t"
1626 // 0 1 2 3 4 5 6 7 8 9 10 1626 // 0 1 2 3 4 5 6 7 8 9 10
1695 */ 1695 */
1696 static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp, uint8_t *tmp2) 1696 static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp, uint8_t *tmp2)
1697 { 1697 {
1698 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) 1698 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1699 src+= stride*4; 1699 src+= stride*4;
1700 asm volatile( 1700 __asm__ volatile(
1701 "lea (%0, %1), %%"REG_a" \n\t" 1701 "lea (%0, %1), %%"REG_a" \n\t"
1702 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" 1702 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
1703 "pxor %%mm7, %%mm7 \n\t" 1703 "pxor %%mm7, %%mm7 \n\t"
1704 "movq (%2), %%mm0 \n\t" 1704 "movq (%2), %%mm0 \n\t"
1705 "movq (%3), %%mm1 \n\t" 1705 "movq (%3), %%mm1 \n\t"
1796 */ 1796 */
1797 static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uint8_t *tmp) 1797 static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uint8_t *tmp)
1798 { 1798 {
1799 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) 1799 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1800 src+= 4*stride; 1800 src+= 4*stride;
1801 asm volatile( 1801 __asm__ volatile(
1802 "lea (%0, %1), %%"REG_a" \n\t" 1802 "lea (%0, %1), %%"REG_a" \n\t"
1803 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" 1803 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
1804 // 0 1 2 3 4 5 6 7 8 9 1804 // 0 1 2 3 4 5 6 7 8 9
1805 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 1805 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1
1806 1806
1898 static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride) 1898 static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride)
1899 { 1899 {
1900 #ifdef HAVE_MMX 1900 #ifdef HAVE_MMX
1901 src+= 4*stride; 1901 src+= 4*stride;
1902 #ifdef HAVE_MMX2 1902 #ifdef HAVE_MMX2
1903 asm volatile( 1903 __asm__ volatile(
1904 "lea (%0, %1), %%"REG_a" \n\t" 1904 "lea (%0, %1), %%"REG_a" \n\t"
1905 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" 1905 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
1906 // 0 1 2 3 4 5 6 7 8 9 1906 // 0 1 2 3 4 5 6 7 8 9
1907 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 1907 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1
1908 1908
1947 : : "r" (src), "r" ((long)stride) 1947 : : "r" (src), "r" ((long)stride)
1948 : "%"REG_a, "%"REG_d 1948 : "%"REG_a, "%"REG_d
1949 ); 1949 );
1950 1950
1951 #else // MMX without MMX2 1951 #else // MMX without MMX2
1952 asm volatile( 1952 __asm__ volatile(
1953 "lea (%0, %1), %%"REG_a" \n\t" 1953 "lea (%0, %1), %%"REG_a" \n\t"
1954 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" 1954 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
1955 // 0 1 2 3 4 5 6 7 8 9 1955 // 0 1 2 3 4 5 6 7 8 9
1956 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 1956 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1
1957 "pxor %%mm7, %%mm7 \n\t" 1957 "pxor %%mm7, %%mm7 \n\t"
2016 /** 2016 /**
2017 * transposes and shift the given 8x8 Block into dst1 and dst2 2017 * transposes and shift the given 8x8 Block into dst1 and dst2
2018 */ 2018 */
2019 static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, uint8_t *src, int srcStride) 2019 static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, uint8_t *src, int srcStride)
2020 { 2020 {
2021 asm( 2021 __asm__(
2022 "lea (%0, %1), %%"REG_a" \n\t" 2022 "lea (%0, %1), %%"REG_a" \n\t"
2023 // 0 1 2 3 4 5 6 7 8 9 2023 // 0 1 2 3 4 5 6 7 8 9
2024 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 2024 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1
2025 "movq (%0), %%mm0 \n\t" // 12345678 2025 "movq (%0), %%mm0 \n\t" // 12345678
2026 "movq (%%"REG_a"), %%mm1 \n\t" // abcdefgh 2026 "movq (%%"REG_a"), %%mm1 \n\t" // abcdefgh
2101 /** 2101 /**
2102 * transposes the given 8x8 block 2102 * transposes the given 8x8 block
2103 */ 2103 */
2104 static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, uint8_t *src) 2104 static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, uint8_t *src)
2105 { 2105 {
2106 asm( 2106 __asm__(
2107 "lea (%0, %1), %%"REG_a" \n\t" 2107 "lea (%0, %1), %%"REG_a" \n\t"
2108 "lea (%%"REG_a",%1,4), %%"REG_d" \n\t" 2108 "lea (%%"REG_a",%1,4), %%"REG_d" \n\t"
2109 // 0 1 2 3 4 5 6 7 8 9 2109 // 0 1 2 3 4 5 6 7 8 9
2110 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 2110 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1
2111 "movq (%2), %%mm0 \n\t" // 12345678 2111 "movq (%2), %%mm0 \n\t" // 12345678
2190 tempBlurredPast[129]= maxNoise[2]; 2190 tempBlurredPast[129]= maxNoise[2];
2191 2191
2192 #define FAST_L2_DIFF 2192 #define FAST_L2_DIFF
2193 //#define L1_DIFF //u should change the thresholds too if u try that one 2193 //#define L1_DIFF //u should change the thresholds too if u try that one
2194 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) 2194 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
2195 asm volatile( 2195 __asm__ volatile(
2196 "lea (%2, %2, 2), %%"REG_a" \n\t" // 3*stride 2196 "lea (%2, %2, 2), %%"REG_a" \n\t" // 3*stride
2197 "lea (%2, %2, 4), %%"REG_d" \n\t" // 5*stride 2197 "lea (%2, %2, 4), %%"REG_d" \n\t" // 5*stride
2198 "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t" // 7*stride 2198 "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t" // 7*stride
2199 // 0 1 2 3 4 5 6 7 8 9 2199 // 0 1 2 3 4 5 6 7 8 9
2200 // %x %x+%2 %x+2%2 %x+eax %x+4%2 %x+edx %x+2eax %x+ecx %x+8%2 2200 // %x %x+%2 %x+2%2 %x+eax %x+4%2 %x+edx %x+2eax %x+ecx %x+8%2
2573 static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int stride, PPContext *c){ 2573 static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int stride, PPContext *c){
2574 int64_t dc_mask, eq_mask, both_masks; 2574 int64_t dc_mask, eq_mask, both_masks;
2575 int64_t sums[10*8*2]; 2575 int64_t sums[10*8*2];
2576 src+= step*3; // src points to begin of the 8x8 Block 2576 src+= step*3; // src points to begin of the 8x8 Block
2577 //START_TIMER 2577 //START_TIMER
2578 asm volatile( 2578 __asm__ volatile(
2579 "movq %0, %%mm7 \n\t" 2579 "movq %0, %%mm7 \n\t"
2580 "movq %1, %%mm6 \n\t" 2580 "movq %1, %%mm6 \n\t"
2581 : : "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP]) 2581 : : "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP])
2582 ); 2582 );
2583 2583
2584 asm volatile( 2584 __asm__ volatile(
2585 "lea (%2, %3), %%"REG_a" \n\t" 2585 "lea (%2, %3), %%"REG_a" \n\t"
2586 // 0 1 2 3 4 5 6 7 8 9 2586 // 0 1 2 3 4 5 6 7 8 9
2587 // %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2 2587 // %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2
2588 2588
2589 "movq (%2), %%mm0 \n\t" 2589 "movq (%2), %%mm0 \n\t"
2684 2684
2685 if(both_masks){ 2685 if(both_masks){
2686 long offset= -8*step; 2686 long offset= -8*step;
2687 int64_t *temp_sums= sums; 2687 int64_t *temp_sums= sums;
2688 2688
2689 asm volatile( 2689 __asm__ volatile(
2690 "movq %2, %%mm0 \n\t" // QP,..., QP 2690 "movq %2, %%mm0 \n\t" // QP,..., QP
2691 "pxor %%mm4, %%mm4 \n\t" 2691 "pxor %%mm4, %%mm4 \n\t"
2692 2692
2693 "movq (%0), %%mm6 \n\t" 2693 "movq (%0), %%mm6 \n\t"
2694 "movq (%0, %1), %%mm5 \n\t" 2694 "movq (%0, %1), %%mm5 \n\t"
2823 : "r" ((long)step), "m" (c->pQPb), "r"(sums), "g"(src) 2823 : "r" ((long)step), "m" (c->pQPb), "r"(sums), "g"(src)
2824 ); 2824 );
2825 2825
2826 src+= step; // src points to begin of the 8x8 Block 2826 src+= step; // src points to begin of the 8x8 Block
2827 2827
2828 asm volatile( 2828 __asm__ volatile(
2829 "movq %4, %%mm6 \n\t" 2829 "movq %4, %%mm6 \n\t"
2830 "pcmpeqb %%mm5, %%mm5 \n\t" 2830 "pcmpeqb %%mm5, %%mm5 \n\t"
2831 "pxor %%mm6, %%mm5 \n\t" 2831 "pxor %%mm6, %%mm5 \n\t"
2832 "pxor %%mm7, %%mm7 \n\t" 2832 "pxor %%mm7, %%mm7 \n\t"
2833 2833
2862 }else 2862 }else
2863 src+= step; // src points to begin of the 8x8 Block 2863 src+= step; // src points to begin of the 8x8 Block
2864 2864
2865 if(eq_mask != -1LL){ 2865 if(eq_mask != -1LL){
2866 uint8_t *temp_src= src; 2866 uint8_t *temp_src= src;
2867 asm volatile( 2867 __asm__ volatile(
2868 "pxor %%mm7, %%mm7 \n\t" 2868 "pxor %%mm7, %%mm7 \n\t"
2869 "lea -40(%%"REG_SP"), %%"REG_c" \n\t" // make space for 4 8-byte vars 2869 "lea -40(%%"REG_SP"), %%"REG_c" \n\t" // make space for 4 8-byte vars
2870 "and "ALIGN_MASK", %%"REG_c" \n\t" // align 2870 "and "ALIGN_MASK", %%"REG_c" \n\t" // align
2871 // 0 1 2 3 4 5 6 7 8 9 2871 // 0 1 2 3 4 5 6 7 8 9
2872 // %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1 %1+8%1 ecx+4%1 2872 // %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1 %1+8%1 ecx+4%1
3119 #ifndef HAVE_MMX 3119 #ifndef HAVE_MMX
3120 int i; 3120 int i;
3121 #endif 3121 #endif
3122 if(levelFix){ 3122 if(levelFix){
3123 #ifdef HAVE_MMX 3123 #ifdef HAVE_MMX
3124 asm volatile( 3124 __asm__ volatile(
3125 "movq (%%"REG_a"), %%mm2 \n\t" // packedYOffset 3125 "movq (%%"REG_a"), %%mm2 \n\t" // packedYOffset
3126 "movq 8(%%"REG_a"), %%mm3 \n\t" // packedYScale 3126 "movq 8(%%"REG_a"), %%mm3 \n\t" // packedYScale
3127 "lea (%2,%4), %%"REG_a" \n\t" 3127 "lea (%2,%4), %%"REG_a" \n\t"
3128 "lea (%3,%5), %%"REG_d" \n\t" 3128 "lea (%3,%5), %%"REG_d" \n\t"
3129 "pxor %%mm4, %%mm4 \n\t" 3129 "pxor %%mm4, %%mm4 \n\t"
3202 memcpy( &(dst[dstStride*i]), 3202 memcpy( &(dst[dstStride*i]),
3203 &(src[srcStride*i]), BLOCK_SIZE); 3203 &(src[srcStride*i]), BLOCK_SIZE);
3204 #endif //HAVE_MMX 3204 #endif //HAVE_MMX
3205 }else{ 3205 }else{
3206 #ifdef HAVE_MMX 3206 #ifdef HAVE_MMX
3207 asm volatile( 3207 __asm__ volatile(
3208 "lea (%0,%2), %%"REG_a" \n\t" 3208 "lea (%0,%2), %%"REG_a" \n\t"
3209 "lea (%1,%3), %%"REG_d" \n\t" 3209 "lea (%1,%3), %%"REG_d" \n\t"
3210 3210
3211 #define REAL_SIMPLE_CPY(src1, src2, dst1, dst2) \ 3211 #define REAL_SIMPLE_CPY(src1, src2, dst1, dst2) \
3212 "movq " #src1 ", %%mm0 \n\t"\ 3212 "movq " #src1 ", %%mm0 \n\t"\
3242 * Duplicates the given 8 src pixels ? times upward 3242 * Duplicates the given 8 src pixels ? times upward
3243 */ 3243 */
3244 static inline void RENAME(duplicate)(uint8_t src[], int stride) 3244 static inline void RENAME(duplicate)(uint8_t src[], int stride)
3245 { 3245 {
3246 #ifdef HAVE_MMX 3246 #ifdef HAVE_MMX
3247 asm volatile( 3247 __asm__ volatile(
3248 "movq (%0), %%mm0 \n\t" 3248 "movq (%0), %%mm0 \n\t"
3249 "add %1, %0 \n\t" 3249 "add %1, %0 \n\t"
3250 "movq %%mm0, (%0) \n\t" 3250 "movq %%mm0, (%0) \n\t"
3251 "movq %%mm0, (%0, %1) \n\t" 3251 "movq %%mm0, (%0, %1) \n\t"
3252 "movq %%mm0, (%0, %1, 2) \n\t" 3252 "movq %%mm0, (%0, %1, 2) \n\t"
3390 prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32); 3390 prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32);
3391 prefetcht0(dstBlock + (((x>>2)&6) + 5)*dstStride + 32); 3391 prefetcht0(dstBlock + (((x>>2)&6) + 5)*dstStride + 32);
3392 prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32); 3392 prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32);
3393 */ 3393 */
3394 3394
3395 asm( 3395 __asm__(
3396 "mov %4, %%"REG_a" \n\t" 3396 "mov %4, %%"REG_a" \n\t"
3397 "shr $2, %%"REG_a" \n\t" 3397 "shr $2, %%"REG_a" \n\t"
3398 "and $6, %%"REG_a" \n\t" 3398 "and $6, %%"REG_a" \n\t"
3399 "add %5, %%"REG_a" \n\t" 3399 "add %5, %%"REG_a" \n\t"
3400 "mov %%"REG_a", %%"REG_d" \n\t" 3400 "mov %%"REG_a", %%"REG_d" \n\t"
3506 c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16; 3506 c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16;
3507 yHistogram[ srcBlock[srcStride*12 + 4] ]++; 3507 yHistogram[ srcBlock[srcStride*12 + 4] ]++;
3508 } 3508 }
3509 c.QP= QP; 3509 c.QP= QP;
3510 #ifdef HAVE_MMX 3510 #ifdef HAVE_MMX
3511 asm volatile( 3511 __asm__ volatile(
3512 "movd %1, %%mm7 \n\t" 3512 "movd %1, %%mm7 \n\t"
3513 "packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP 3513 "packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP
3514 "packuswb %%mm7, %%mm7 \n\t" // 0,QP, 0, QP, 0,QP, 0, QP 3514 "packuswb %%mm7, %%mm7 \n\t" // 0,QP, 0, QP, 0,QP, 0, QP
3515 "packuswb %%mm7, %%mm7 \n\t" // QP,..., QP 3515 "packuswb %%mm7, %%mm7 \n\t" // QP,..., QP
3516 "movq %%mm7, %0 \n\t" 3516 "movq %%mm7, %0 \n\t"
3526 prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32); 3526 prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32);
3527 prefetcht0(dstBlock + (((x>>2)&6) + 5)*dstStride + 32); 3527 prefetcht0(dstBlock + (((x>>2)&6) + 5)*dstStride + 32);
3528 prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32); 3528 prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32);
3529 */ 3529 */
3530 3530
3531 asm( 3531 __asm__(
3532 "mov %4, %%"REG_a" \n\t" 3532 "mov %4, %%"REG_a" \n\t"
3533 "shr $2, %%"REG_a" \n\t" 3533 "shr $2, %%"REG_a" \n\t"
3534 "and $6, %%"REG_a" \n\t" 3534 "and $6, %%"REG_a" \n\t"
3535 "add %5, %%"REG_a" \n\t" 3535 "add %5, %%"REG_a" \n\t"
3536 "mov %%"REG_a", %%"REG_d" \n\t" 3536 "mov %%"REG_a", %%"REG_d" \n\t"
3698 + dstBlock[x +13*dstStride] 3698 + dstBlock[x +13*dstStride]
3699 + dstBlock[x +14*dstStride] + dstBlock[x +15*dstStride]; 3699 + dstBlock[x +14*dstStride] + dstBlock[x +15*dstStride];
3700 }*/ 3700 }*/
3701 } 3701 }
3702 #ifdef HAVE_3DNOW 3702 #ifdef HAVE_3DNOW
3703 asm volatile("femms"); 3703 __asm__ volatile("femms");
3704 #elif defined (HAVE_MMX) 3704 #elif defined (HAVE_MMX)
3705 asm volatile("emms"); 3705 __asm__ volatile("emms");
3706 #endif 3706 #endif
3707 3707
3708 #ifdef DEBUG_BRIGHTNESS 3708 #ifdef DEBUG_BRIGHTNESS
3709 if(!isColor){ 3709 if(!isColor){
3710 int max=1; 3710 int max=1;