Mercurial > libpostproc.hg
comparison postprocess_template.c @ 112:d4d919ebc31c libpostproc
Convert asm keyword into __asm__.
Neither the asm() nor the __asm__() keyword is part of the C99
standard, but while GCC accepts the former in C89 syntax, it is not
accepted in C99 unless GNU extensions are turned on (with -fasm). The
latter form is accepted in any syntax as an extension (without
requiring further command-line options).
Sun Studio C99 compiler also does not accept asm() while accepting
__asm__(), albeit reporting warnings that it's not valid C99 syntax.
| author | flameeyes |
|---|---|
| date | Thu, 16 Oct 2008 13:34:09 +0000 |
| parents | 83d51d1fb580 |
| children | bf8f52662dc3 |
comparison
equal
deleted
inserted
replaced
| 111:bf39174d9785 | 112:d4d919ebc31c |
|---|---|
| 61 * Check if the middle 8x8 Block in the given 8x16 block is flat | 61 * Check if the middle 8x8 Block in the given 8x16 block is flat |
| 62 */ | 62 */ |
| 63 static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){ | 63 static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){ |
| 64 int numEq= 0, dcOk; | 64 int numEq= 0, dcOk; |
| 65 src+= stride*4; // src points to begin of the 8x8 Block | 65 src+= stride*4; // src points to begin of the 8x8 Block |
| 66 asm volatile( | 66 __asm__ volatile( |
| 67 "movq %0, %%mm7 \n\t" | 67 "movq %0, %%mm7 \n\t" |
| 68 "movq %1, %%mm6 \n\t" | 68 "movq %1, %%mm6 \n\t" |
| 69 : : "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP]) | 69 : : "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP]) |
| 70 ); | 70 ); |
| 71 | 71 |
| 72 asm volatile( | 72 __asm__ volatile( |
| 73 "lea (%2, %3), %%"REG_a" \n\t" | 73 "lea (%2, %3), %%"REG_a" \n\t" |
| 74 // 0 1 2 3 4 5 6 7 8 9 | 74 // 0 1 2 3 4 5 6 7 8 9 |
| 75 // %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2 | 75 // %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2 |
| 76 | 76 |
| 77 "movq (%2), %%mm0 \n\t" | 77 "movq (%2), %%mm0 \n\t" |
| 179 #ifndef HAVE_ALTIVEC | 179 #ifndef HAVE_ALTIVEC |
| 180 static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) | 180 static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) |
| 181 { | 181 { |
| 182 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 182 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 183 src+= stride*3; | 183 src+= stride*3; |
| 184 asm volatile( //"movv %0 %1 %2\n\t" | 184 __asm__ volatile( //"movv %0 %1 %2\n\t" |
| 185 "movq %2, %%mm0 \n\t" // QP,..., QP | 185 "movq %2, %%mm0 \n\t" // QP,..., QP |
| 186 "pxor %%mm4, %%mm4 \n\t" | 186 "pxor %%mm4, %%mm4 \n\t" |
| 187 | 187 |
| 188 "movq (%0), %%mm6 \n\t" | 188 "movq (%0), %%mm6 \n\t" |
| 189 "movq (%0, %1), %%mm5 \n\t" | 189 "movq (%0, %1), %%mm5 \n\t" |
| 365 static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP) | 365 static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP) |
| 366 { | 366 { |
| 367 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 367 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 368 src+= stride*3; | 368 src+= stride*3; |
| 369 // FIXME rounding | 369 // FIXME rounding |
| 370 asm volatile( | 370 __asm__ volatile( |
| 371 "pxor %%mm7, %%mm7 \n\t" // 0 | 371 "pxor %%mm7, %%mm7 \n\t" // 0 |
| 372 "movq "MANGLE(b80)", %%mm6 \n\t" // MIN_SIGNED_BYTE | 372 "movq "MANGLE(b80)", %%mm6 \n\t" // MIN_SIGNED_BYTE |
| 373 "leal (%0, %1), %%"REG_a" \n\t" | 373 "leal (%0, %1), %%"REG_a" \n\t" |
| 374 "leal (%%"REG_a", %1, 4), %%"REG_c" \n\t" | 374 "leal (%%"REG_a", %1, 4), %%"REG_c" \n\t" |
| 375 // 0 1 2 3 4 5 6 7 8 9 | 375 // 0 1 2 3 4 5 6 7 8 9 |
| 463 static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co) | 463 static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co) |
| 464 { | 464 { |
| 465 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 465 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 466 src+= stride*3; | 466 src+= stride*3; |
| 467 | 467 |
| 468 asm volatile( | 468 __asm__ volatile( |
| 469 "pxor %%mm7, %%mm7 \n\t" // 0 | 469 "pxor %%mm7, %%mm7 \n\t" // 0 |
| 470 "lea (%0, %1), %%"REG_a" \n\t" | 470 "lea (%0, %1), %%"REG_a" \n\t" |
| 471 "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t" | 471 "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t" |
| 472 // 0 1 2 3 4 5 6 7 8 9 | 472 // 0 1 2 3 4 5 6 7 8 9 |
| 473 // %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1 %0+8%1 ecx+4%1 | 473 // %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1 %0+8%1 ecx+4%1 |
| 602 | 602 |
| 603 memcpy(tmp, src+stride*7, 8); | 603 memcpy(tmp, src+stride*7, 8); |
| 604 memcpy(tmp+8, src+stride*8, 8); | 604 memcpy(tmp+8, src+stride*8, 8); |
| 605 */ | 605 */ |
| 606 src+= stride*4; | 606 src+= stride*4; |
| 607 asm volatile( | 607 __asm__ volatile( |
| 608 | 608 |
| 609 #if 0 //slightly more accurate and slightly slower | 609 #if 0 //slightly more accurate and slightly slower |
| 610 "pxor %%mm7, %%mm7 \n\t" // 0 | 610 "pxor %%mm7, %%mm7 \n\t" // 0 |
| 611 "lea (%0, %1), %%"REG_a" \n\t" | 611 "lea (%0, %1), %%"REG_a" \n\t" |
| 612 "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t" | 612 "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t" |
| 869 } | 869 } |
| 870 } | 870 } |
| 871 */ | 871 */ |
| 872 #elif defined (HAVE_MMX) | 872 #elif defined (HAVE_MMX) |
| 873 src+= stride*4; | 873 src+= stride*4; |
| 874 asm volatile( | 874 __asm__ volatile( |
| 875 "pxor %%mm7, %%mm7 \n\t" | 875 "pxor %%mm7, %%mm7 \n\t" |
| 876 "lea -40(%%"REG_SP"), %%"REG_c" \n\t" // make space for 4 8-byte vars | 876 "lea -40(%%"REG_SP"), %%"REG_c" \n\t" // make space for 4 8-byte vars |
| 877 "and "ALIGN_MASK", %%"REG_c" \n\t" // align | 877 "and "ALIGN_MASK", %%"REG_c" \n\t" // align |
| 878 // 0 1 2 3 4 5 6 7 | 878 // 0 1 2 3 4 5 6 7 |
| 879 // %0 %0+%1 %0+2%1 eax+2%1 %0+4%1 eax+4%1 edx+%1 edx+2%1 | 879 // %0 %0+%1 %0+2%1 eax+2%1 %0+4%1 eax+4%1 edx+%1 edx+2%1 |
| 1145 | 1145 |
| 1146 #ifndef HAVE_ALTIVEC | 1146 #ifndef HAVE_ALTIVEC |
| 1147 static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c) | 1147 static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c) |
| 1148 { | 1148 { |
| 1149 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1149 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 1150 asm volatile( | 1150 __asm__ volatile( |
| 1151 "pxor %%mm6, %%mm6 \n\t" | 1151 "pxor %%mm6, %%mm6 \n\t" |
| 1152 "pcmpeqb %%mm7, %%mm7 \n\t" | 1152 "pcmpeqb %%mm7, %%mm7 \n\t" |
| 1153 "movq %2, %%mm0 \n\t" | 1153 "movq %2, %%mm0 \n\t" |
| 1154 "punpcklbw %%mm6, %%mm0 \n\t" | 1154 "punpcklbw %%mm6, %%mm0 \n\t" |
| 1155 "psrlw $1, %%mm0 \n\t" | 1155 "psrlw $1, %%mm0 \n\t" |
| 1429 +2*(*(p -1)) + 4*(*p ) + 2*(*(p +1)) | 1429 +2*(*(p -1)) + 4*(*p ) + 2*(*(p +1)) |
| 1430 +(*(p+stride-1)) + 2*(*(p+stride)) + (*(p+stride+1)); | 1430 +(*(p+stride-1)) + 2*(*(p+stride)) + (*(p+stride+1)); |
| 1431 f= (f + 8)>>4; | 1431 f= (f + 8)>>4; |
| 1432 | 1432 |
| 1433 #ifdef DEBUG_DERING_THRESHOLD | 1433 #ifdef DEBUG_DERING_THRESHOLD |
| 1434 asm volatile("emms\n\t":); | 1434 __asm__ volatile("emms\n\t":); |
| 1435 { | 1435 { |
| 1436 static long long numPixels=0; | 1436 static long long numPixels=0; |
| 1437 if(x!=1 && x!=8 && y!=1 && y!=8) numPixels++; | 1437 if(x!=1 && x!=8 && y!=1 && y!=8) numPixels++; |
| 1438 // if((max-min)<20 || (max-min)*QP<200) | 1438 // if((max-min)<20 || (max-min)*QP<200) |
| 1439 // if((max-min)*QP < 500) | 1439 // if((max-min)*QP < 500) |
| 1499 */ | 1499 */ |
| 1500 static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int stride) | 1500 static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int stride) |
| 1501 { | 1501 { |
| 1502 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1502 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 1503 src+= 4*stride; | 1503 src+= 4*stride; |
| 1504 asm volatile( | 1504 __asm__ volatile( |
| 1505 "lea (%0, %1), %%"REG_a" \n\t" | 1505 "lea (%0, %1), %%"REG_a" \n\t" |
| 1506 "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t" | 1506 "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t" |
| 1507 // 0 1 2 3 4 5 6 7 8 9 | 1507 // 0 1 2 3 4 5 6 7 8 9 |
| 1508 // %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1 %0+8%1 ecx+4%1 | 1508 // %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1 %0+8%1 ecx+4%1 |
| 1509 | 1509 |
| 1552 */ | 1552 */ |
| 1553 static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride) | 1553 static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride) |
| 1554 { | 1554 { |
| 1555 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1555 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 1556 src+= stride*3; | 1556 src+= stride*3; |
| 1557 asm volatile( | 1557 __asm__ volatile( |
| 1558 "lea (%0, %1), %%"REG_a" \n\t" | 1558 "lea (%0, %1), %%"REG_a" \n\t" |
| 1559 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" | 1559 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" |
| 1560 "lea (%%"REG_d", %1, 4), %%"REG_c" \n\t" | 1560 "lea (%%"REG_d", %1, 4), %%"REG_c" \n\t" |
| 1561 "add %1, %%"REG_c" \n\t" | 1561 "add %1, %%"REG_c" \n\t" |
| 1562 "pxor %%mm7, %%mm7 \n\t" | 1562 "pxor %%mm7, %%mm7 \n\t" |
| 1616 */ | 1616 */ |
| 1617 static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp) | 1617 static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp) |
| 1618 { | 1618 { |
| 1619 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1619 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 1620 src+= stride*4; | 1620 src+= stride*4; |
| 1621 asm volatile( | 1621 __asm__ volatile( |
| 1622 "lea (%0, %1), %%"REG_a" \n\t" | 1622 "lea (%0, %1), %%"REG_a" \n\t" |
| 1623 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" | 1623 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" |
| 1624 "pxor %%mm7, %%mm7 \n\t" | 1624 "pxor %%mm7, %%mm7 \n\t" |
| 1625 "movq (%2), %%mm0 \n\t" | 1625 "movq (%2), %%mm0 \n\t" |
| 1626 // 0 1 2 3 4 5 6 7 8 9 10 | 1626 // 0 1 2 3 4 5 6 7 8 9 10 |
| 1695 */ | 1695 */ |
| 1696 static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp, uint8_t *tmp2) | 1696 static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp, uint8_t *tmp2) |
| 1697 { | 1697 { |
| 1698 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1698 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 1699 src+= stride*4; | 1699 src+= stride*4; |
| 1700 asm volatile( | 1700 __asm__ volatile( |
| 1701 "lea (%0, %1), %%"REG_a" \n\t" | 1701 "lea (%0, %1), %%"REG_a" \n\t" |
| 1702 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" | 1702 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" |
| 1703 "pxor %%mm7, %%mm7 \n\t" | 1703 "pxor %%mm7, %%mm7 \n\t" |
| 1704 "movq (%2), %%mm0 \n\t" | 1704 "movq (%2), %%mm0 \n\t" |
| 1705 "movq (%3), %%mm1 \n\t" | 1705 "movq (%3), %%mm1 \n\t" |
| 1796 */ | 1796 */ |
| 1797 static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uint8_t *tmp) | 1797 static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uint8_t *tmp) |
| 1798 { | 1798 { |
| 1799 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1799 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 1800 src+= 4*stride; | 1800 src+= 4*stride; |
| 1801 asm volatile( | 1801 __asm__ volatile( |
| 1802 "lea (%0, %1), %%"REG_a" \n\t" | 1802 "lea (%0, %1), %%"REG_a" \n\t" |
| 1803 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" | 1803 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" |
| 1804 // 0 1 2 3 4 5 6 7 8 9 | 1804 // 0 1 2 3 4 5 6 7 8 9 |
| 1805 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 | 1805 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 |
| 1806 | 1806 |
| 1898 static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride) | 1898 static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride) |
| 1899 { | 1899 { |
| 1900 #ifdef HAVE_MMX | 1900 #ifdef HAVE_MMX |
| 1901 src+= 4*stride; | 1901 src+= 4*stride; |
| 1902 #ifdef HAVE_MMX2 | 1902 #ifdef HAVE_MMX2 |
| 1903 asm volatile( | 1903 __asm__ volatile( |
| 1904 "lea (%0, %1), %%"REG_a" \n\t" | 1904 "lea (%0, %1), %%"REG_a" \n\t" |
| 1905 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" | 1905 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" |
| 1906 // 0 1 2 3 4 5 6 7 8 9 | 1906 // 0 1 2 3 4 5 6 7 8 9 |
| 1907 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 | 1907 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 |
| 1908 | 1908 |
| 1947 : : "r" (src), "r" ((long)stride) | 1947 : : "r" (src), "r" ((long)stride) |
| 1948 : "%"REG_a, "%"REG_d | 1948 : "%"REG_a, "%"REG_d |
| 1949 ); | 1949 ); |
| 1950 | 1950 |
| 1951 #else // MMX without MMX2 | 1951 #else // MMX without MMX2 |
| 1952 asm volatile( | 1952 __asm__ volatile( |
| 1953 "lea (%0, %1), %%"REG_a" \n\t" | 1953 "lea (%0, %1), %%"REG_a" \n\t" |
| 1954 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" | 1954 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" |
| 1955 // 0 1 2 3 4 5 6 7 8 9 | 1955 // 0 1 2 3 4 5 6 7 8 9 |
| 1956 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 | 1956 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 |
| 1957 "pxor %%mm7, %%mm7 \n\t" | 1957 "pxor %%mm7, %%mm7 \n\t" |
| 2016 /** | 2016 /** |
| 2017 * transposes and shift the given 8x8 Block into dst1 and dst2 | 2017 * transposes and shift the given 8x8 Block into dst1 and dst2 |
| 2018 */ | 2018 */ |
| 2019 static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, uint8_t *src, int srcStride) | 2019 static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, uint8_t *src, int srcStride) |
| 2020 { | 2020 { |
| 2021 asm( | 2021 __asm__( |
| 2022 "lea (%0, %1), %%"REG_a" \n\t" | 2022 "lea (%0, %1), %%"REG_a" \n\t" |
| 2023 // 0 1 2 3 4 5 6 7 8 9 | 2023 // 0 1 2 3 4 5 6 7 8 9 |
| 2024 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 | 2024 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 |
| 2025 "movq (%0), %%mm0 \n\t" // 12345678 | 2025 "movq (%0), %%mm0 \n\t" // 12345678 |
| 2026 "movq (%%"REG_a"), %%mm1 \n\t" // abcdefgh | 2026 "movq (%%"REG_a"), %%mm1 \n\t" // abcdefgh |
| 2101 /** | 2101 /** |
| 2102 * transposes the given 8x8 block | 2102 * transposes the given 8x8 block |
| 2103 */ | 2103 */ |
| 2104 static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, uint8_t *src) | 2104 static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, uint8_t *src) |
| 2105 { | 2105 { |
| 2106 asm( | 2106 __asm__( |
| 2107 "lea (%0, %1), %%"REG_a" \n\t" | 2107 "lea (%0, %1), %%"REG_a" \n\t" |
| 2108 "lea (%%"REG_a",%1,4), %%"REG_d" \n\t" | 2108 "lea (%%"REG_a",%1,4), %%"REG_d" \n\t" |
| 2109 // 0 1 2 3 4 5 6 7 8 9 | 2109 // 0 1 2 3 4 5 6 7 8 9 |
| 2110 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 | 2110 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 |
| 2111 "movq (%2), %%mm0 \n\t" // 12345678 | 2111 "movq (%2), %%mm0 \n\t" // 12345678 |
| 2190 tempBlurredPast[129]= maxNoise[2]; | 2190 tempBlurredPast[129]= maxNoise[2]; |
| 2191 | 2191 |
| 2192 #define FAST_L2_DIFF | 2192 #define FAST_L2_DIFF |
| 2193 //#define L1_DIFF //u should change the thresholds too if u try that one | 2193 //#define L1_DIFF //u should change the thresholds too if u try that one |
| 2194 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 2194 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 2195 asm volatile( | 2195 __asm__ volatile( |
| 2196 "lea (%2, %2, 2), %%"REG_a" \n\t" // 3*stride | 2196 "lea (%2, %2, 2), %%"REG_a" \n\t" // 3*stride |
| 2197 "lea (%2, %2, 4), %%"REG_d" \n\t" // 5*stride | 2197 "lea (%2, %2, 4), %%"REG_d" \n\t" // 5*stride |
| 2198 "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t" // 7*stride | 2198 "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t" // 7*stride |
| 2199 // 0 1 2 3 4 5 6 7 8 9 | 2199 // 0 1 2 3 4 5 6 7 8 9 |
| 2200 // %x %x+%2 %x+2%2 %x+eax %x+4%2 %x+edx %x+2eax %x+ecx %x+8%2 | 2200 // %x %x+%2 %x+2%2 %x+eax %x+4%2 %x+edx %x+2eax %x+ecx %x+8%2 |
| 2573 static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int stride, PPContext *c){ | 2573 static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int stride, PPContext *c){ |
| 2574 int64_t dc_mask, eq_mask, both_masks; | 2574 int64_t dc_mask, eq_mask, both_masks; |
| 2575 int64_t sums[10*8*2]; | 2575 int64_t sums[10*8*2]; |
| 2576 src+= step*3; // src points to begin of the 8x8 Block | 2576 src+= step*3; // src points to begin of the 8x8 Block |
| 2577 //START_TIMER | 2577 //START_TIMER |
| 2578 asm volatile( | 2578 __asm__ volatile( |
| 2579 "movq %0, %%mm7 \n\t" | 2579 "movq %0, %%mm7 \n\t" |
| 2580 "movq %1, %%mm6 \n\t" | 2580 "movq %1, %%mm6 \n\t" |
| 2581 : : "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP]) | 2581 : : "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP]) |
| 2582 ); | 2582 ); |
| 2583 | 2583 |
| 2584 asm volatile( | 2584 __asm__ volatile( |
| 2585 "lea (%2, %3), %%"REG_a" \n\t" | 2585 "lea (%2, %3), %%"REG_a" \n\t" |
| 2586 // 0 1 2 3 4 5 6 7 8 9 | 2586 // 0 1 2 3 4 5 6 7 8 9 |
| 2587 // %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2 | 2587 // %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2 |
| 2588 | 2588 |
| 2589 "movq (%2), %%mm0 \n\t" | 2589 "movq (%2), %%mm0 \n\t" |
| 2684 | 2684 |
| 2685 if(both_masks){ | 2685 if(both_masks){ |
| 2686 long offset= -8*step; | 2686 long offset= -8*step; |
| 2687 int64_t *temp_sums= sums; | 2687 int64_t *temp_sums= sums; |
| 2688 | 2688 |
| 2689 asm volatile( | 2689 __asm__ volatile( |
| 2690 "movq %2, %%mm0 \n\t" // QP,..., QP | 2690 "movq %2, %%mm0 \n\t" // QP,..., QP |
| 2691 "pxor %%mm4, %%mm4 \n\t" | 2691 "pxor %%mm4, %%mm4 \n\t" |
| 2692 | 2692 |
| 2693 "movq (%0), %%mm6 \n\t" | 2693 "movq (%0), %%mm6 \n\t" |
| 2694 "movq (%0, %1), %%mm5 \n\t" | 2694 "movq (%0, %1), %%mm5 \n\t" |
| 2823 : "r" ((long)step), "m" (c->pQPb), "r"(sums), "g"(src) | 2823 : "r" ((long)step), "m" (c->pQPb), "r"(sums), "g"(src) |
| 2824 ); | 2824 ); |
| 2825 | 2825 |
| 2826 src+= step; // src points to begin of the 8x8 Block | 2826 src+= step; // src points to begin of the 8x8 Block |
| 2827 | 2827 |
| 2828 asm volatile( | 2828 __asm__ volatile( |
| 2829 "movq %4, %%mm6 \n\t" | 2829 "movq %4, %%mm6 \n\t" |
| 2830 "pcmpeqb %%mm5, %%mm5 \n\t" | 2830 "pcmpeqb %%mm5, %%mm5 \n\t" |
| 2831 "pxor %%mm6, %%mm5 \n\t" | 2831 "pxor %%mm6, %%mm5 \n\t" |
| 2832 "pxor %%mm7, %%mm7 \n\t" | 2832 "pxor %%mm7, %%mm7 \n\t" |
| 2833 | 2833 |
| 2862 }else | 2862 }else |
| 2863 src+= step; // src points to begin of the 8x8 Block | 2863 src+= step; // src points to begin of the 8x8 Block |
| 2864 | 2864 |
| 2865 if(eq_mask != -1LL){ | 2865 if(eq_mask != -1LL){ |
| 2866 uint8_t *temp_src= src; | 2866 uint8_t *temp_src= src; |
| 2867 asm volatile( | 2867 __asm__ volatile( |
| 2868 "pxor %%mm7, %%mm7 \n\t" | 2868 "pxor %%mm7, %%mm7 \n\t" |
| 2869 "lea -40(%%"REG_SP"), %%"REG_c" \n\t" // make space for 4 8-byte vars | 2869 "lea -40(%%"REG_SP"), %%"REG_c" \n\t" // make space for 4 8-byte vars |
| 2870 "and "ALIGN_MASK", %%"REG_c" \n\t" // align | 2870 "and "ALIGN_MASK", %%"REG_c" \n\t" // align |
| 2871 // 0 1 2 3 4 5 6 7 8 9 | 2871 // 0 1 2 3 4 5 6 7 8 9 |
| 2872 // %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1 %1+8%1 ecx+4%1 | 2872 // %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1 %1+8%1 ecx+4%1 |
| 3119 #ifndef HAVE_MMX | 3119 #ifndef HAVE_MMX |
| 3120 int i; | 3120 int i; |
| 3121 #endif | 3121 #endif |
| 3122 if(levelFix){ | 3122 if(levelFix){ |
| 3123 #ifdef HAVE_MMX | 3123 #ifdef HAVE_MMX |
| 3124 asm volatile( | 3124 __asm__ volatile( |
| 3125 "movq (%%"REG_a"), %%mm2 \n\t" // packedYOffset | 3125 "movq (%%"REG_a"), %%mm2 \n\t" // packedYOffset |
| 3126 "movq 8(%%"REG_a"), %%mm3 \n\t" // packedYScale | 3126 "movq 8(%%"REG_a"), %%mm3 \n\t" // packedYScale |
| 3127 "lea (%2,%4), %%"REG_a" \n\t" | 3127 "lea (%2,%4), %%"REG_a" \n\t" |
| 3128 "lea (%3,%5), %%"REG_d" \n\t" | 3128 "lea (%3,%5), %%"REG_d" \n\t" |
| 3129 "pxor %%mm4, %%mm4 \n\t" | 3129 "pxor %%mm4, %%mm4 \n\t" |
| 3202 memcpy( &(dst[dstStride*i]), | 3202 memcpy( &(dst[dstStride*i]), |
| 3203 &(src[srcStride*i]), BLOCK_SIZE); | 3203 &(src[srcStride*i]), BLOCK_SIZE); |
| 3204 #endif //HAVE_MMX | 3204 #endif //HAVE_MMX |
| 3205 }else{ | 3205 }else{ |
| 3206 #ifdef HAVE_MMX | 3206 #ifdef HAVE_MMX |
| 3207 asm volatile( | 3207 __asm__ volatile( |
| 3208 "lea (%0,%2), %%"REG_a" \n\t" | 3208 "lea (%0,%2), %%"REG_a" \n\t" |
| 3209 "lea (%1,%3), %%"REG_d" \n\t" | 3209 "lea (%1,%3), %%"REG_d" \n\t" |
| 3210 | 3210 |
| 3211 #define REAL_SIMPLE_CPY(src1, src2, dst1, dst2) \ | 3211 #define REAL_SIMPLE_CPY(src1, src2, dst1, dst2) \ |
| 3212 "movq " #src1 ", %%mm0 \n\t"\ | 3212 "movq " #src1 ", %%mm0 \n\t"\ |
| 3242 * Duplicates the given 8 src pixels ? times upward | 3242 * Duplicates the given 8 src pixels ? times upward |
| 3243 */ | 3243 */ |
| 3244 static inline void RENAME(duplicate)(uint8_t src[], int stride) | 3244 static inline void RENAME(duplicate)(uint8_t src[], int stride) |
| 3245 { | 3245 { |
| 3246 #ifdef HAVE_MMX | 3246 #ifdef HAVE_MMX |
| 3247 asm volatile( | 3247 __asm__ volatile( |
| 3248 "movq (%0), %%mm0 \n\t" | 3248 "movq (%0), %%mm0 \n\t" |
| 3249 "add %1, %0 \n\t" | 3249 "add %1, %0 \n\t" |
| 3250 "movq %%mm0, (%0) \n\t" | 3250 "movq %%mm0, (%0) \n\t" |
| 3251 "movq %%mm0, (%0, %1) \n\t" | 3251 "movq %%mm0, (%0, %1) \n\t" |
| 3252 "movq %%mm0, (%0, %1, 2) \n\t" | 3252 "movq %%mm0, (%0, %1, 2) \n\t" |
| 3390 prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32); | 3390 prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32); |
| 3391 prefetcht0(dstBlock + (((x>>2)&6) + 5)*dstStride + 32); | 3391 prefetcht0(dstBlock + (((x>>2)&6) + 5)*dstStride + 32); |
| 3392 prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32); | 3392 prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32); |
| 3393 */ | 3393 */ |
| 3394 | 3394 |
| 3395 asm( | 3395 __asm__( |
| 3396 "mov %4, %%"REG_a" \n\t" | 3396 "mov %4, %%"REG_a" \n\t" |
| 3397 "shr $2, %%"REG_a" \n\t" | 3397 "shr $2, %%"REG_a" \n\t" |
| 3398 "and $6, %%"REG_a" \n\t" | 3398 "and $6, %%"REG_a" \n\t" |
| 3399 "add %5, %%"REG_a" \n\t" | 3399 "add %5, %%"REG_a" \n\t" |
| 3400 "mov %%"REG_a", %%"REG_d" \n\t" | 3400 "mov %%"REG_a", %%"REG_d" \n\t" |
| 3506 c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16; | 3506 c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16; |
| 3507 yHistogram[ srcBlock[srcStride*12 + 4] ]++; | 3507 yHistogram[ srcBlock[srcStride*12 + 4] ]++; |
| 3508 } | 3508 } |
| 3509 c.QP= QP; | 3509 c.QP= QP; |
| 3510 #ifdef HAVE_MMX | 3510 #ifdef HAVE_MMX |
| 3511 asm volatile( | 3511 __asm__ volatile( |
| 3512 "movd %1, %%mm7 \n\t" | 3512 "movd %1, %%mm7 \n\t" |
| 3513 "packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP | 3513 "packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP |
| 3514 "packuswb %%mm7, %%mm7 \n\t" // 0,QP, 0, QP, 0,QP, 0, QP | 3514 "packuswb %%mm7, %%mm7 \n\t" // 0,QP, 0, QP, 0,QP, 0, QP |
| 3515 "packuswb %%mm7, %%mm7 \n\t" // QP,..., QP | 3515 "packuswb %%mm7, %%mm7 \n\t" // QP,..., QP |
| 3516 "movq %%mm7, %0 \n\t" | 3516 "movq %%mm7, %0 \n\t" |
| 3526 prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32); | 3526 prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32); |
| 3527 prefetcht0(dstBlock + (((x>>2)&6) + 5)*dstStride + 32); | 3527 prefetcht0(dstBlock + (((x>>2)&6) + 5)*dstStride + 32); |
| 3528 prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32); | 3528 prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32); |
| 3529 */ | 3529 */ |
| 3530 | 3530 |
| 3531 asm( | 3531 __asm__( |
| 3532 "mov %4, %%"REG_a" \n\t" | 3532 "mov %4, %%"REG_a" \n\t" |
| 3533 "shr $2, %%"REG_a" \n\t" | 3533 "shr $2, %%"REG_a" \n\t" |
| 3534 "and $6, %%"REG_a" \n\t" | 3534 "and $6, %%"REG_a" \n\t" |
| 3535 "add %5, %%"REG_a" \n\t" | 3535 "add %5, %%"REG_a" \n\t" |
| 3536 "mov %%"REG_a", %%"REG_d" \n\t" | 3536 "mov %%"REG_a", %%"REG_d" \n\t" |
| 3698 + dstBlock[x +13*dstStride] | 3698 + dstBlock[x +13*dstStride] |
| 3699 + dstBlock[x +14*dstStride] + dstBlock[x +15*dstStride]; | 3699 + dstBlock[x +14*dstStride] + dstBlock[x +15*dstStride]; |
| 3700 }*/ | 3700 }*/ |
| 3701 } | 3701 } |
| 3702 #ifdef HAVE_3DNOW | 3702 #ifdef HAVE_3DNOW |
| 3703 asm volatile("femms"); | 3703 __asm__ volatile("femms"); |
| 3704 #elif defined (HAVE_MMX) | 3704 #elif defined (HAVE_MMX) |
| 3705 asm volatile("emms"); | 3705 __asm__ volatile("emms"); |
| 3706 #endif | 3706 #endif |
| 3707 | 3707 |
| 3708 #ifdef DEBUG_BRIGHTNESS | 3708 #ifdef DEBUG_BRIGHTNESS |
| 3709 if(!isColor){ | 3709 if(!isColor){ |
| 3710 int max=1; | 3710 int max=1; |
