Mercurial > libavcodec.hg
comparison libpostproc/postprocess_template.c @ 2978:403183bbb505 libavcodec
Add some comments to #ifdef #else #endif blocks and fix wrong ones.
| author | diego |
|---|---|
| date | Wed, 21 Dec 2005 17:50:40 +0000 |
| parents | ef2149182f1c |
| children | bfabfdf9ce55 |
comparison
equal
deleted
inserted
replaced
| 2977:49c6f104a76f | 2978:403183bbb505 |
|---|---|
| 184 else return 1; | 184 else return 1; |
| 185 }else{ | 185 }else{ |
| 186 return 2; | 186 return 2; |
| 187 } | 187 } |
| 188 } | 188 } |
| 189 #endif | 189 #endif //HAVE_MMX |
| 190 | 190 |
| 191 /** | 191 /** |
| 192 * Do a vertical low pass filter on the 8x16 block (only write to the 8x8 block in the middle) | 192 * Do a vertical low pass filter on the 8x16 block (only write to the 8x8 block in the middle) |
| 193 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 | 193 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 |
| 194 */ | 194 */ |
| 320 | 320 |
| 321 : | 321 : |
| 322 : "r" (src), "r" ((long)stride), "m" (c->pQPb) | 322 : "r" (src), "r" ((long)stride), "m" (c->pQPb) |
| 323 : "%"REG_a, "%"REG_c | 323 : "%"REG_a, "%"REG_c |
| 324 ); | 324 ); |
| 325 #else | 325 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 326 const int l1= stride; | 326 const int l1= stride; |
| 327 const int l2= stride + l1; | 327 const int l2= stride + l1; |
| 328 const int l3= stride + l2; | 328 const int l3= stride + l2; |
| 329 const int l4= stride + l3; | 329 const int l4= stride + l3; |
| 330 const int l5= stride + l4; | 330 const int l5= stride + l4; |
| 360 src[l7]= (sums[6] + sums[8] + 2*src[l7])>>4; | 360 src[l7]= (sums[6] + sums[8] + 2*src[l7])>>4; |
| 361 src[l8]= (sums[7] + sums[9] + 2*src[l8])>>4; | 361 src[l8]= (sums[7] + sums[9] + 2*src[l8])>>4; |
| 362 | 362 |
| 363 src++; | 363 src++; |
| 364 } | 364 } |
| 365 #endif | 365 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 366 } | 366 } |
| 367 #endif //HAVE_ALTIVEC | 367 #endif //HAVE_ALTIVEC |
| 368 | 368 |
| 369 #if 0 | 369 #if 0 |
| 370 /** | 370 /** |
| 440 | 440 |
| 441 : | 441 : |
| 442 : "r" (src), "r" ((long)stride) | 442 : "r" (src), "r" ((long)stride) |
| 443 : "%"REG_a, "%"REG_c | 443 : "%"REG_a, "%"REG_c |
| 444 ); | 444 ); |
| 445 #else | 445 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 446 const int l1= stride; | 446 const int l1= stride; |
| 447 const int l2= stride + l1; | 447 const int l2= stride + l1; |
| 448 const int l3= stride + l2; | 448 const int l3= stride + l2; |
| 449 const int l4= stride + l3; | 449 const int l4= stride + l3; |
| 450 const int l5= stride + l4; | 450 const int l5= stride + l4; |
| 466 src[x+l6] -=v>>3; | 466 src[x+l6] -=v>>3; |
| 467 | 467 |
| 468 } | 468 } |
| 469 } | 469 } |
| 470 | 470 |
| 471 #endif | 471 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 472 } | 472 } |
| 473 #endif | 473 #endif //0 |
| 474 | 474 |
| 475 /** | 475 /** |
| 476 * Experimental Filter 1 | 476 * Experimental Filter 1 |
| 477 * will not damage linear gradients | 477 * will not damage linear gradients |
| 478 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter | 478 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter |
| 565 | 565 |
| 566 : | 566 : |
| 567 : "r" (src), "r" ((long)stride), "m" (co->pQPb) | 567 : "r" (src), "r" ((long)stride), "m" (co->pQPb) |
| 568 : "%"REG_a, "%"REG_c | 568 : "%"REG_a, "%"REG_c |
| 569 ); | 569 ); |
| 570 #else | 570 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 571 | 571 |
| 572 const int l1= stride; | 572 const int l1= stride; |
| 573 const int l2= stride + l1; | 573 const int l2= stride + l1; |
| 574 const int l3= stride + l2; | 574 const int l3= stride + l2; |
| 575 const int l4= stride + l3; | 575 const int l4= stride + l3; |
| 602 src[l7] -=v>>3; | 602 src[l7] -=v>>3; |
| 603 | 603 |
| 604 } | 604 } |
| 605 src++; | 605 src++; |
| 606 } | 606 } |
| 607 #endif | 607 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 608 } | 608 } |
| 609 | 609 |
| 610 #ifndef HAVE_ALTIVEC | 610 #ifndef HAVE_ALTIVEC |
| 611 static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext *c) | 611 static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext *c) |
| 612 { | 612 { |
| 731 "paddb %%mm3, %%mm2 \n\t" | 731 "paddb %%mm3, %%mm2 \n\t" |
| 732 "pxor %%mm6, %%mm0 \n\t" | 732 "pxor %%mm6, %%mm0 \n\t" |
| 733 "pxor %%mm6, %%mm2 \n\t" | 733 "pxor %%mm6, %%mm2 \n\t" |
| 734 "movq %%mm0, (%%"REG_a", %1, 2) \n\t" | 734 "movq %%mm0, (%%"REG_a", %1, 2) \n\t" |
| 735 "movq %%mm2, (%0, %1, 4) \n\t" | 735 "movq %%mm2, (%0, %1, 4) \n\t" |
| 736 #endif | 736 #endif //0 |
| 737 | 737 |
| 738 "lea (%0, %1), %%"REG_a" \n\t" | 738 "lea (%0, %1), %%"REG_a" \n\t" |
| 739 "pcmpeqb %%mm6, %%mm6 \n\t" // -1 | 739 "pcmpeqb %%mm6, %%mm6 \n\t" // -1 |
| 740 // 0 1 2 3 4 5 6 7 | 740 // 0 1 2 3 4 5 6 7 |
| 741 // %0 %0+%1 %0+2%1 eax+2%1 %0+4%1 eax+4%1 ecx+%1 ecx+2%1 | 741 // %0 %0+%1 %0+2%1 eax+2%1 %0+4%1 eax+4%1 ecx+%1 ecx+2%1 |
| 1130 | 1130 |
| 1131 : "+r" (src) | 1131 : "+r" (src) |
| 1132 : "r" ((long)stride), "m" (c->pQPb) | 1132 : "r" ((long)stride), "m" (c->pQPb) |
| 1133 : "%"REG_a, "%"REG_c | 1133 : "%"REG_a, "%"REG_c |
| 1134 ); | 1134 ); |
| 1135 #else | 1135 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 1136 const int l1= stride; | 1136 const int l1= stride; |
| 1137 const int l2= stride + l1; | 1137 const int l2= stride + l1; |
| 1138 const int l3= stride + l2; | 1138 const int l3= stride + l2; |
| 1139 const int l4= stride + l3; | 1139 const int l4= stride + l3; |
| 1140 const int l5= stride + l4; | 1140 const int l5= stride + l4; |
| 1173 src[l4]-= d; | 1173 src[l4]-= d; |
| 1174 src[l5]+= d; | 1174 src[l5]+= d; |
| 1175 } | 1175 } |
| 1176 src++; | 1176 src++; |
| 1177 } | 1177 } |
| 1178 #endif | 1178 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 1179 } | 1179 } |
| 1180 #endif //HAVE_ALTIVEC | 1180 #endif //HAVE_ALTIVEC |
| 1181 | 1181 |
| 1182 #ifndef HAVE_ALTIVEC | 1182 #ifndef HAVE_ALTIVEC |
| 1183 static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c) | 1183 static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c) |
| 1404 | 1404 |
| 1405 "1: \n\t" | 1405 "1: \n\t" |
| 1406 : : "r" (src), "r" ((long)stride), "m" (c->pQPb), "m"(c->pQPb2) | 1406 : : "r" (src), "r" ((long)stride), "m" (c->pQPb), "m"(c->pQPb2) |
| 1407 : "%"REG_a, "%"REG_d, "%"REG_c | 1407 : "%"REG_a, "%"REG_d, "%"REG_c |
| 1408 ); | 1408 ); |
| 1409 #else | 1409 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 1410 int y; | 1410 int y; |
| 1411 int min=255; | 1411 int min=255; |
| 1412 int max=0; | 1412 int max=0; |
| 1413 int avg; | 1413 int avg; |
| 1414 uint8_t *p; | 1414 uint8_t *p; |
| 1534 } | 1534 } |
| 1535 } | 1535 } |
| 1536 // src[0] = src[7]=src[stride*7]=src[stride*7 + 7]=255; | 1536 // src[0] = src[7]=src[stride*7]=src[stride*7 + 7]=255; |
| 1537 } | 1537 } |
| 1538 #endif | 1538 #endif |
| 1539 #endif | 1539 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 1540 } | 1540 } |
| 1541 #endif //HAVE_ALTIVEC | 1541 #endif //HAVE_ALTIVEC |
| 1542 | 1542 |
| 1543 /** | 1543 /** |
| 1544 * Deinterlaces the given block by linearly interpolating every second line. | 1544 * Deinterlaces the given block by linearly interpolating every second line. |
| 1641 DEINT_CUBIC((%%REGd, %1), (%0, %1, 8), (%%REGd, %1, 4), (%%REGc), (%%REGc, %1, 2)) | 1641 DEINT_CUBIC((%%REGd, %1), (%0, %1, 8), (%%REGd, %1, 4), (%%REGc), (%%REGc, %1, 2)) |
| 1642 | 1642 |
| 1643 : : "r" (src), "r" ((long)stride) | 1643 : : "r" (src), "r" ((long)stride) |
| 1644 : "%"REG_a, "%"REG_d, "%"REG_c | 1644 : "%"REG_a, "%"REG_d, "%"REG_c |
| 1645 ); | 1645 ); |
| 1646 #else | 1646 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 1647 int x; | 1647 int x; |
| 1648 src+= stride*3; | 1648 src+= stride*3; |
| 1649 for(x=0; x<8; x++) | 1649 for(x=0; x<8; x++) |
| 1650 { | 1650 { |
| 1651 src[stride*3] = CLIP((-src[0] + 9*src[stride*2] + 9*src[stride*4] - src[stride*6])>>4); | 1651 src[stride*3] = CLIP((-src[0] + 9*src[stride*2] + 9*src[stride*4] - src[stride*6])>>4); |
| 1652 src[stride*5] = CLIP((-src[stride*2] + 9*src[stride*4] + 9*src[stride*6] - src[stride*8])>>4); | 1652 src[stride*5] = CLIP((-src[stride*2] + 9*src[stride*4] + 9*src[stride*6] - src[stride*8])>>4); |
| 1653 src[stride*7] = CLIP((-src[stride*4] + 9*src[stride*6] + 9*src[stride*8] - src[stride*10])>>4); | 1653 src[stride*7] = CLIP((-src[stride*4] + 9*src[stride*6] + 9*src[stride*8] - src[stride*10])>>4); |
| 1654 src[stride*9] = CLIP((-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4); | 1654 src[stride*9] = CLIP((-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4); |
| 1655 src++; | 1655 src++; |
| 1656 } | 1656 } |
| 1657 #endif | 1657 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 1658 } | 1658 } |
| 1659 | 1659 |
| 1660 /** | 1660 /** |
| 1661 * Deinterlaces the given block by filtering every second line with a (-1 4 2 4 -1) filter. | 1661 * Deinterlaces the given block by filtering every second line with a (-1 4 2 4 -1) filter. |
| 1662 * will be called for every 8x8 block and can read & write from line 4-15 | 1662 * will be called for every 8x8 block and can read & write from line 4-15 |
| 1713 | 1713 |
| 1714 "movq %%mm0, (%2) \n\t" | 1714 "movq %%mm0, (%2) \n\t" |
| 1715 : : "r" (src), "r" ((long)stride), "r"(tmp) | 1715 : : "r" (src), "r" ((long)stride), "r"(tmp) |
| 1716 : "%"REG_a, "%"REG_d | 1716 : "%"REG_a, "%"REG_d |
| 1717 ); | 1717 ); |
| 1718 #else | 1718 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 1719 int x; | 1719 int x; |
| 1720 src+= stride*4; | 1720 src+= stride*4; |
| 1721 for(x=0; x<8; x++) | 1721 for(x=0; x<8; x++) |
| 1722 { | 1722 { |
| 1723 int t1= tmp[x]; | 1723 int t1= tmp[x]; |
| 1732 src[stride*7]= CLIP((-t2 + 4*src[stride*6] + 2*t1 + 4*src[stride*8] - src[stride*9] + 4)>>3); | 1732 src[stride*7]= CLIP((-t2 + 4*src[stride*6] + 2*t1 + 4*src[stride*8] - src[stride*9] + 4)>>3); |
| 1733 tmp[x]= t1; | 1733 tmp[x]= t1; |
| 1734 | 1734 |
| 1735 src++; | 1735 src++; |
| 1736 } | 1736 } |
| 1737 #endif | 1737 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 1738 } | 1738 } |
| 1739 | 1739 |
| 1740 /** | 1740 /** |
| 1741 * Deinterlaces the given block by filtering every line with a (-1 2 6 2 -1) filter. | 1741 * Deinterlaces the given block by filtering every line with a (-1 2 6 2 -1) filter. |
| 1742 * will be called for every 8x8 block and can read & write from line 4-15 | 1742 * will be called for every 8x8 block and can read & write from line 4-15 |
| 1804 "movq %%mm0, (%2) \n\t" | 1804 "movq %%mm0, (%2) \n\t" |
| 1805 "movq %%mm1, (%3) \n\t" | 1805 "movq %%mm1, (%3) \n\t" |
| 1806 : : "r" (src), "r" ((long)stride), "r"(tmp), "r"(tmp2) | 1806 : : "r" (src), "r" ((long)stride), "r"(tmp), "r"(tmp2) |
| 1807 : "%"REG_a, "%"REG_d | 1807 : "%"REG_a, "%"REG_d |
| 1808 ); | 1808 ); |
| 1809 #else | 1809 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 1810 int x; | 1810 int x; |
| 1811 src+= stride*4; | 1811 src+= stride*4; |
| 1812 for(x=0; x<8; x++) | 1812 for(x=0; x<8; x++) |
| 1813 { | 1813 { |
| 1814 int t1= tmp[x]; | 1814 int t1= tmp[x]; |
| 1834 tmp[x]= t3; | 1834 tmp[x]= t3; |
| 1835 tmp2[x]= t1; | 1835 tmp2[x]= t1; |
| 1836 | 1836 |
| 1837 src++; | 1837 src++; |
| 1838 } | 1838 } |
| 1839 #endif | 1839 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 1840 } | 1840 } |
| 1841 | 1841 |
| 1842 /** | 1842 /** |
| 1843 * Deinterlaces the given block by filtering all lines with a (1 2 1) filter. | 1843 * Deinterlaces the given block by filtering all lines with a (1 2 1) filter. |
| 1844 * will be called for every 8x8 block and can read & write from line 4-15 | 1844 * will be called for every 8x8 block and can read & write from line 4-15 |
| 1893 "movq %%mm1, (%2) \n\t" | 1893 "movq %%mm1, (%2) \n\t" |
| 1894 | 1894 |
| 1895 : : "r" (src), "r" ((long)stride), "r" (tmp) | 1895 : : "r" (src), "r" ((long)stride), "r" (tmp) |
| 1896 : "%"REG_a, "%"REG_d | 1896 : "%"REG_a, "%"REG_d |
| 1897 ); | 1897 ); |
| 1898 #else | 1898 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 1899 int a, b, c, x; | 1899 int a, b, c, x; |
| 1900 src+= 4*stride; | 1900 src+= 4*stride; |
| 1901 | 1901 |
| 1902 for(x=0; x<2; x++){ | 1902 for(x=0; x<2; x++){ |
| 1903 a= *(uint32_t*)&tmp[stride*0]; | 1903 a= *(uint32_t*)&tmp[stride*0]; |
| 1936 | 1936 |
| 1937 *(uint32_t*)&tmp[stride*0]= c; | 1937 *(uint32_t*)&tmp[stride*0]= c; |
| 1938 src += 4; | 1938 src += 4; |
| 1939 tmp += 4; | 1939 tmp += 4; |
| 1940 } | 1940 } |
| 1941 #endif | 1941 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 1942 } | 1942 } |
| 1943 | 1943 |
| 1944 /** | 1944 /** |
| 1945 * Deinterlaces the given block by applying a median filter to every second line. | 1945 * Deinterlaces the given block by applying a median filter to every second line. |
| 1946 * will be called for every 8x8 block and can read & write from line 4-15, | 1946 * will be called for every 8x8 block and can read & write from line 4-15, |
| 2039 MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8)) | 2039 MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8)) |
| 2040 | 2040 |
| 2041 : : "r" (src), "r" ((long)stride) | 2041 : : "r" (src), "r" ((long)stride) |
| 2042 : "%"REG_a, "%"REG_d | 2042 : "%"REG_a, "%"REG_d |
| 2043 ); | 2043 ); |
| 2044 #endif // MMX | 2044 #endif //HAVE_MMX2 |
| 2045 #else | 2045 #else //HAVE_MMX |
| 2046 int x, y; | 2046 int x, y; |
| 2047 src+= 4*stride; | 2047 src+= 4*stride; |
| 2048 // FIXME - there should be a way to do a few columns in parallel like w/mmx | 2048 // FIXME - there should be a way to do a few columns in parallel like w/mmx |
| 2049 for(x=0; x<8; x++) | 2049 for(x=0; x<8; x++) |
| 2050 { | 2050 { |
| 2061 colsrc[stride ] = (a|(d^f)) & (b|(d^e)) & (c|(e^f)); | 2061 colsrc[stride ] = (a|(d^f)) & (b|(d^e)) & (c|(e^f)); |
| 2062 colsrc += stride*2; | 2062 colsrc += stride*2; |
| 2063 } | 2063 } |
| 2064 src++; | 2064 src++; |
| 2065 } | 2065 } |
| 2066 #endif | 2066 #endif //HAVE_MMX |
| 2067 } | 2067 } |
| 2068 | 2068 |
| 2069 #ifdef HAVE_MMX | 2069 #ifdef HAVE_MMX |
| 2070 /** | 2070 /** |
| 2071 * transposes and shift the given 8x8 Block into dst1 and dst2 | 2071 * transposes and shift the given 8x8 Block into dst1 and dst2 |
| 2229 | 2229 |
| 2230 :: "r" (dst), "r" ((long)dstStride), "r" (src) | 2230 :: "r" (dst), "r" ((long)dstStride), "r" (src) |
| 2231 : "%"REG_a, "%"REG_d | 2231 : "%"REG_a, "%"REG_d |
| 2232 ); | 2232 ); |
| 2233 } | 2233 } |
| 2234 #endif | 2234 #endif //HAVE_MMX |
| 2235 //static long test=0; | 2235 //static long test=0; |
| 2236 | 2236 |
| 2237 #ifndef HAVE_ALTIVEC | 2237 #ifndef HAVE_ALTIVEC |
| 2238 static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, | 2238 static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, |
| 2239 uint8_t *tempBlured, uint32_t *tempBluredPast, int *maxNoise) | 2239 uint8_t *tempBlured, uint32_t *tempBluredPast, int *maxNoise) |
| 2276 "paddw %%mm4, %%mm0 \n\t" | 2276 "paddw %%mm4, %%mm0 \n\t" |
| 2277 "psadbw (%1, %%"REG_c"), %%mm7 \n\t" // |L7-R7| | 2277 "psadbw (%1, %%"REG_c"), %%mm7 \n\t" // |L7-R7| |
| 2278 "paddw %%mm5, %%mm6 \n\t" | 2278 "paddw %%mm5, %%mm6 \n\t" |
| 2279 "paddw %%mm7, %%mm6 \n\t" | 2279 "paddw %%mm7, %%mm6 \n\t" |
| 2280 "paddw %%mm6, %%mm0 \n\t" | 2280 "paddw %%mm6, %%mm0 \n\t" |
| 2281 #else | 2281 #else //L1_DIFF |
| 2282 #if defined (FAST_L2_DIFF) | 2282 #if defined (FAST_L2_DIFF) |
| 2283 "pcmpeqb %%mm7, %%mm7 \n\t" | 2283 "pcmpeqb %%mm7, %%mm7 \n\t" |
| 2284 "movq "MANGLE(b80)", %%mm6 \n\t" | 2284 "movq "MANGLE(b80)", %%mm6 \n\t" |
| 2285 "pxor %%mm0, %%mm0 \n\t" | 2285 "pxor %%mm0, %%mm0 \n\t" |
| 2286 #define REAL_L2_DIFF_CORE(a, b)\ | 2286 #define REAL_L2_DIFF_CORE(a, b)\ |
| 2295 "pmaddwd %%mm2, %%mm2 \n\t"\ | 2295 "pmaddwd %%mm2, %%mm2 \n\t"\ |
| 2296 "paddd %%mm2, %%mm5 \n\t"\ | 2296 "paddd %%mm2, %%mm5 \n\t"\ |
| 2297 "psrld $14, %%mm5 \n\t"\ | 2297 "psrld $14, %%mm5 \n\t"\ |
| 2298 "paddd %%mm5, %%mm0 \n\t" | 2298 "paddd %%mm5, %%mm0 \n\t" |
| 2299 | 2299 |
| 2300 #else | 2300 #else //defined (FAST_L2_DIFF) |
| 2301 "pxor %%mm7, %%mm7 \n\t" | 2301 "pxor %%mm7, %%mm7 \n\t" |
| 2302 "pxor %%mm0, %%mm0 \n\t" | 2302 "pxor %%mm0, %%mm0 \n\t" |
| 2303 #define REAL_L2_DIFF_CORE(a, b)\ | 2303 #define REAL_L2_DIFF_CORE(a, b)\ |
| 2304 "movq " #a ", %%mm5 \n\t"\ | 2304 "movq " #a ", %%mm5 \n\t"\ |
| 2305 "movq " #b ", %%mm2 \n\t"\ | 2305 "movq " #b ", %%mm2 \n\t"\ |
| 2314 "pmaddwd %%mm5, %%mm5 \n\t"\ | 2314 "pmaddwd %%mm5, %%mm5 \n\t"\ |
| 2315 "pmaddwd %%mm1, %%mm1 \n\t"\ | 2315 "pmaddwd %%mm1, %%mm1 \n\t"\ |
| 2316 "paddd %%mm1, %%mm5 \n\t"\ | 2316 "paddd %%mm1, %%mm5 \n\t"\ |
| 2317 "paddd %%mm5, %%mm0 \n\t" | 2317 "paddd %%mm5, %%mm0 \n\t" |
| 2318 | 2318 |
| 2319 #endif | 2319 #endif //defined (FAST_L2_DIFF) |
| 2320 | 2320 |
| 2321 #define L2_DIFF_CORE(a, b) REAL_L2_DIFF_CORE(a, b) | 2321 #define L2_DIFF_CORE(a, b) REAL_L2_DIFF_CORE(a, b) |
| 2322 | 2322 |
| 2323 L2_DIFF_CORE((%0), (%1)) | 2323 L2_DIFF_CORE((%0), (%1)) |
| 2324 L2_DIFF_CORE((%0, %2), (%1, %2)) | 2324 L2_DIFF_CORE((%0, %2), (%1, %2)) |
| 2327 L2_DIFF_CORE((%0, %2, 4), (%1, %2, 4)) | 2327 L2_DIFF_CORE((%0, %2, 4), (%1, %2, 4)) |
| 2328 L2_DIFF_CORE((%0, %%REGd), (%1, %%REGd)) | 2328 L2_DIFF_CORE((%0, %%REGd), (%1, %%REGd)) |
| 2329 L2_DIFF_CORE((%0, %%REGa,2), (%1, %%REGa,2)) | 2329 L2_DIFF_CORE((%0, %%REGa,2), (%1, %%REGa,2)) |
| 2330 L2_DIFF_CORE((%0, %%REGc), (%1, %%REGc)) | 2330 L2_DIFF_CORE((%0, %%REGc), (%1, %%REGc)) |
| 2331 | 2331 |
| 2332 #endif | 2332 #endif //L1_DIFF |
| 2333 | 2333 |
| 2334 "movq %%mm0, %%mm4 \n\t" | 2334 "movq %%mm0, %%mm4 \n\t" |
| 2335 "psrlq $32, %%mm0 \n\t" | 2335 "psrlq $32, %%mm0 \n\t" |
| 2336 "paddd %%mm0, %%mm4 \n\t" | 2336 "paddd %%mm0, %%mm4 \n\t" |
| 2337 "movd %%mm4, %%ecx \n\t" | 2337 "movd %%mm4, %%ecx \n\t" |
| 2532 | 2532 |
| 2533 :: "r" (src), "r" (tempBlured), "r"((long)stride), "m" (tempBluredPast) | 2533 :: "r" (src), "r" (tempBlured), "r"((long)stride), "m" (tempBluredPast) |
| 2534 : "%"REG_a, "%"REG_d, "%"REG_c, "memory" | 2534 : "%"REG_a, "%"REG_d, "%"REG_c, "memory" |
| 2535 ); | 2535 ); |
| 2536 //printf("%d\n", test); | 2536 //printf("%d\n", test); |
| 2537 #else | 2537 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 2538 { | 2538 { |
| 2539 int y; | 2539 int y; |
| 2540 int d=0; | 2540 int d=0; |
| 2541 // int sysd=0; | 2541 // int sysd=0; |
| 2542 int i; | 2542 int i; |
| 2635 } | 2635 } |
| 2636 } | 2636 } |
| 2637 } | 2637 } |
| 2638 } | 2638 } |
| 2639 } | 2639 } |
| 2640 #endif | 2640 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 2641 } | 2641 } |
| 2642 #endif //HAVE_ALTIVEC | 2642 #endif //HAVE_ALTIVEC |
| 2643 | 2643 |
| 2644 #ifdef HAVE_MMX | 2644 #ifdef HAVE_MMX |
| 2645 /** | 2645 /** |
| 3251 "packuswb %%mm5, %%mm0 \n\t"\ | 3251 "packuswb %%mm5, %%mm0 \n\t"\ |
| 3252 "packuswb %%mm6, %%mm1 \n\t"\ | 3252 "packuswb %%mm6, %%mm1 \n\t"\ |
| 3253 "movq %%mm0, " #dst1 " \n\t"\ | 3253 "movq %%mm0, " #dst1 " \n\t"\ |
| 3254 "movq %%mm1, " #dst2 " \n\t"\ | 3254 "movq %%mm1, " #dst2 " \n\t"\ |
| 3255 | 3255 |
| 3256 #endif //!HAVE_MMX2 | 3256 #endif //HAVE_MMX2 |
| 3257 #define SCALED_CPY(src1, src2, dst1, dst2)\ | 3257 #define SCALED_CPY(src1, src2, dst1, dst2)\ |
| 3258 REAL_SCALED_CPY(src1, src2, dst1, dst2) | 3258 REAL_SCALED_CPY(src1, src2, dst1, dst2) |
| 3259 | 3259 |
| 3260 SCALED_CPY((%2) , (%2, %4) , (%3) , (%3, %5)) | 3260 SCALED_CPY((%2) , (%2, %4) , (%3) , (%3, %5)) |
| 3261 SCALED_CPY((%2, %4, 2), (%%REGa, %4, 2), (%3, %5, 2), (%%REGd, %5, 2)) | 3261 SCALED_CPY((%2, %4, 2), (%%REGa, %4, 2), (%3, %5, 2), (%%REGd, %5, 2)) |
| 3271 "r"(dst), | 3271 "r"(dst), |
| 3272 "r" ((long)srcStride), | 3272 "r" ((long)srcStride), |
| 3273 "r" ((long)dstStride) | 3273 "r" ((long)dstStride) |
| 3274 : "%"REG_d | 3274 : "%"REG_d |
| 3275 ); | 3275 ); |
| 3276 #else | 3276 #else //HAVE_MMX |
| 3277 for(i=0; i<8; i++) | 3277 for(i=0; i<8; i++) |
| 3278 memcpy( &(dst[dstStride*i]), | 3278 memcpy( &(dst[dstStride*i]), |
| 3279 &(src[srcStride*i]), BLOCK_SIZE); | 3279 &(src[srcStride*i]), BLOCK_SIZE); |
| 3280 #endif | 3280 #endif //HAVE_MMX |
| 3281 } | 3281 } |
| 3282 else | 3282 else |
| 3283 { | 3283 { |
| 3284 #ifdef HAVE_MMX | 3284 #ifdef HAVE_MMX |
| 3285 asm volatile( | 3285 asm volatile( |
| 3306 "r" (dst), | 3306 "r" (dst), |
| 3307 "r" ((long)srcStride), | 3307 "r" ((long)srcStride), |
| 3308 "r" ((long)dstStride) | 3308 "r" ((long)dstStride) |
| 3309 : "%"REG_a, "%"REG_d | 3309 : "%"REG_a, "%"REG_d |
| 3310 ); | 3310 ); |
| 3311 #else | 3311 #else //HAVE_MMX |
| 3312 for(i=0; i<8; i++) | 3312 for(i=0; i<8; i++) |
| 3313 memcpy( &(dst[dstStride*i]), | 3313 memcpy( &(dst[dstStride*i]), |
| 3314 &(src[srcStride*i]), BLOCK_SIZE); | 3314 &(src[srcStride*i]), BLOCK_SIZE); |
| 3315 #endif | 3315 #endif //HAVE_MMX |
| 3316 } | 3316 } |
| 3317 } | 3317 } |
| 3318 | 3318 |
| 3319 /** | 3319 /** |
| 3320 * Duplicates the given 8 src pixels ? times upward | 3320 * Duplicates the given 8 src pixels ? times upward |
| 3742 RENAME(doHorizDefFilter)(dstBlock-4, stride, &c); | 3742 RENAME(doHorizDefFilter)(dstBlock-4, stride, &c); |
| 3743 #endif | 3743 #endif |
| 3744 }else if(mode & H_A_DEBLOCK){ | 3744 }else if(mode & H_A_DEBLOCK){ |
| 3745 RENAME(do_a_deblock)(dstBlock-8, 1, stride, &c); | 3745 RENAME(do_a_deblock)(dstBlock-8, 1, stride, &c); |
| 3746 } | 3746 } |
| 3747 #endif | 3747 #endif //HAVE_MMX |
| 3748 if(mode & DERING) | 3748 if(mode & DERING) |
| 3749 { | 3749 { |
| 3750 //FIXME filter first line | 3750 //FIXME filter first line |
| 3751 if(y>0) RENAME(dering)(dstBlock - stride - 8, stride, &c); | 3751 if(y>0) RENAME(dering)(dstBlock - stride - 8, stride, &c); |
| 3752 } | 3752 } |
