comparison libpostproc/postprocess_template.c @ 2978:403183bbb505 libavcodec

Add some comments to #ifdef #else #endif blocks and fix wrong ones.
author diego
date Wed, 21 Dec 2005 17:50:40 +0000
parents ef2149182f1c
children bfabfdf9ce55
comparison
equal deleted inserted replaced
2977:49c6f104a76f 2978:403183bbb505
184 else return 1; 184 else return 1;
185 }else{ 185 }else{
186 return 2; 186 return 2;
187 } 187 }
188 } 188 }
189 #endif 189 #endif //HAVE_MMX
190 190
191 /** 191 /**
192 * Do a vertical low pass filter on the 8x16 block (only write to the 8x8 block in the middle) 192 * Do a vertical low pass filter on the 8x16 block (only write to the 8x8 block in the middle)
193 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 193 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16
194 */ 194 */
320 320
321 : 321 :
322 : "r" (src), "r" ((long)stride), "m" (c->pQPb) 322 : "r" (src), "r" ((long)stride), "m" (c->pQPb)
323 : "%"REG_a, "%"REG_c 323 : "%"REG_a, "%"REG_c
324 ); 324 );
325 #else 325 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
326 const int l1= stride; 326 const int l1= stride;
327 const int l2= stride + l1; 327 const int l2= stride + l1;
328 const int l3= stride + l2; 328 const int l3= stride + l2;
329 const int l4= stride + l3; 329 const int l4= stride + l3;
330 const int l5= stride + l4; 330 const int l5= stride + l4;
360 src[l7]= (sums[6] + sums[8] + 2*src[l7])>>4; 360 src[l7]= (sums[6] + sums[8] + 2*src[l7])>>4;
361 src[l8]= (sums[7] + sums[9] + 2*src[l8])>>4; 361 src[l8]= (sums[7] + sums[9] + 2*src[l8])>>4;
362 362
363 src++; 363 src++;
364 } 364 }
365 #endif 365 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
366 } 366 }
367 #endif //HAVE_ALTIVEC 367 #endif //HAVE_ALTIVEC
368 368
369 #if 0 369 #if 0
370 /** 370 /**
440 440
441 : 441 :
442 : "r" (src), "r" ((long)stride) 442 : "r" (src), "r" ((long)stride)
443 : "%"REG_a, "%"REG_c 443 : "%"REG_a, "%"REG_c
444 ); 444 );
445 #else 445 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
446 const int l1= stride; 446 const int l1= stride;
447 const int l2= stride + l1; 447 const int l2= stride + l1;
448 const int l3= stride + l2; 448 const int l3= stride + l2;
449 const int l4= stride + l3; 449 const int l4= stride + l3;
450 const int l5= stride + l4; 450 const int l5= stride + l4;
466 src[x+l6] -=v>>3; 466 src[x+l6] -=v>>3;
467 467
468 } 468 }
469 } 469 }
470 470
471 #endif 471 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
472 } 472 }
473 #endif 473 #endif //0
474 474
475 /** 475 /**
476 * Experimental Filter 1 476 * Experimental Filter 1
477 * will not damage linear gradients 477 * will not damage linear gradients
478 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter 478 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
565 565
566 : 566 :
567 : "r" (src), "r" ((long)stride), "m" (co->pQPb) 567 : "r" (src), "r" ((long)stride), "m" (co->pQPb)
568 : "%"REG_a, "%"REG_c 568 : "%"REG_a, "%"REG_c
569 ); 569 );
570 #else 570 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
571 571
572 const int l1= stride; 572 const int l1= stride;
573 const int l2= stride + l1; 573 const int l2= stride + l1;
574 const int l3= stride + l2; 574 const int l3= stride + l2;
575 const int l4= stride + l3; 575 const int l4= stride + l3;
602 src[l7] -=v>>3; 602 src[l7] -=v>>3;
603 603
604 } 604 }
605 src++; 605 src++;
606 } 606 }
607 #endif 607 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
608 } 608 }
609 609
610 #ifndef HAVE_ALTIVEC 610 #ifndef HAVE_ALTIVEC
611 static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext *c) 611 static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext *c)
612 { 612 {
731 "paddb %%mm3, %%mm2 \n\t" 731 "paddb %%mm3, %%mm2 \n\t"
732 "pxor %%mm6, %%mm0 \n\t" 732 "pxor %%mm6, %%mm0 \n\t"
733 "pxor %%mm6, %%mm2 \n\t" 733 "pxor %%mm6, %%mm2 \n\t"
734 "movq %%mm0, (%%"REG_a", %1, 2) \n\t" 734 "movq %%mm0, (%%"REG_a", %1, 2) \n\t"
735 "movq %%mm2, (%0, %1, 4) \n\t" 735 "movq %%mm2, (%0, %1, 4) \n\t"
736 #endif 736 #endif //0
737 737
738 "lea (%0, %1), %%"REG_a" \n\t" 738 "lea (%0, %1), %%"REG_a" \n\t"
739 "pcmpeqb %%mm6, %%mm6 \n\t" // -1 739 "pcmpeqb %%mm6, %%mm6 \n\t" // -1
740 // 0 1 2 3 4 5 6 7 740 // 0 1 2 3 4 5 6 7
741 // %0 %0+%1 %0+2%1 eax+2%1 %0+4%1 eax+4%1 ecx+%1 ecx+2%1 741 // %0 %0+%1 %0+2%1 eax+2%1 %0+4%1 eax+4%1 ecx+%1 ecx+2%1
1130 1130
1131 : "+r" (src) 1131 : "+r" (src)
1132 : "r" ((long)stride), "m" (c->pQPb) 1132 : "r" ((long)stride), "m" (c->pQPb)
1133 : "%"REG_a, "%"REG_c 1133 : "%"REG_a, "%"REG_c
1134 ); 1134 );
1135 #else 1135 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1136 const int l1= stride; 1136 const int l1= stride;
1137 const int l2= stride + l1; 1137 const int l2= stride + l1;
1138 const int l3= stride + l2; 1138 const int l3= stride + l2;
1139 const int l4= stride + l3; 1139 const int l4= stride + l3;
1140 const int l5= stride + l4; 1140 const int l5= stride + l4;
1173 src[l4]-= d; 1173 src[l4]-= d;
1174 src[l5]+= d; 1174 src[l5]+= d;
1175 } 1175 }
1176 src++; 1176 src++;
1177 } 1177 }
1178 #endif 1178 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1179 } 1179 }
1180 #endif //HAVE_ALTIVEC 1180 #endif //HAVE_ALTIVEC
1181 1181
1182 #ifndef HAVE_ALTIVEC 1182 #ifndef HAVE_ALTIVEC
1183 static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c) 1183 static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c)
1404 1404
1405 "1: \n\t" 1405 "1: \n\t"
1406 : : "r" (src), "r" ((long)stride), "m" (c->pQPb), "m"(c->pQPb2) 1406 : : "r" (src), "r" ((long)stride), "m" (c->pQPb), "m"(c->pQPb2)
1407 : "%"REG_a, "%"REG_d, "%"REG_c 1407 : "%"REG_a, "%"REG_d, "%"REG_c
1408 ); 1408 );
1409 #else 1409 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1410 int y; 1410 int y;
1411 int min=255; 1411 int min=255;
1412 int max=0; 1412 int max=0;
1413 int avg; 1413 int avg;
1414 uint8_t *p; 1414 uint8_t *p;
1534 } 1534 }
1535 } 1535 }
1536 // src[0] = src[7]=src[stride*7]=src[stride*7 + 7]=255; 1536 // src[0] = src[7]=src[stride*7]=src[stride*7 + 7]=255;
1537 } 1537 }
1538 #endif 1538 #endif
1539 #endif 1539 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1540 } 1540 }
1541 #endif //HAVE_ALTIVEC 1541 #endif //HAVE_ALTIVEC
1542 1542
1543 /** 1543 /**
1544 * Deinterlaces the given block by linearly interpolating every second line. 1544 * Deinterlaces the given block by linearly interpolating every second line.
1641 DEINT_CUBIC((%%REGd, %1), (%0, %1, 8), (%%REGd, %1, 4), (%%REGc), (%%REGc, %1, 2)) 1641 DEINT_CUBIC((%%REGd, %1), (%0, %1, 8), (%%REGd, %1, 4), (%%REGc), (%%REGc, %1, 2))
1642 1642
1643 : : "r" (src), "r" ((long)stride) 1643 : : "r" (src), "r" ((long)stride)
1644 : "%"REG_a, "%"REG_d, "%"REG_c 1644 : "%"REG_a, "%"REG_d, "%"REG_c
1645 ); 1645 );
1646 #else 1646 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1647 int x; 1647 int x;
1648 src+= stride*3; 1648 src+= stride*3;
1649 for(x=0; x<8; x++) 1649 for(x=0; x<8; x++)
1650 { 1650 {
1651 src[stride*3] = CLIP((-src[0] + 9*src[stride*2] + 9*src[stride*4] - src[stride*6])>>4); 1651 src[stride*3] = CLIP((-src[0] + 9*src[stride*2] + 9*src[stride*4] - src[stride*6])>>4);
1652 src[stride*5] = CLIP((-src[stride*2] + 9*src[stride*4] + 9*src[stride*6] - src[stride*8])>>4); 1652 src[stride*5] = CLIP((-src[stride*2] + 9*src[stride*4] + 9*src[stride*6] - src[stride*8])>>4);
1653 src[stride*7] = CLIP((-src[stride*4] + 9*src[stride*6] + 9*src[stride*8] - src[stride*10])>>4); 1653 src[stride*7] = CLIP((-src[stride*4] + 9*src[stride*6] + 9*src[stride*8] - src[stride*10])>>4);
1654 src[stride*9] = CLIP((-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4); 1654 src[stride*9] = CLIP((-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4);
1655 src++; 1655 src++;
1656 } 1656 }
1657 #endif 1657 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1658 } 1658 }
1659 1659
1660 /** 1660 /**
1661 * Deinterlaces the given block by filtering every second line with a (-1 4 2 4 -1) filter. 1661 * Deinterlaces the given block by filtering every second line with a (-1 4 2 4 -1) filter.
1662 * will be called for every 8x8 block and can read & write from line 4-15 1662 * will be called for every 8x8 block and can read & write from line 4-15
1713 1713
1714 "movq %%mm0, (%2) \n\t" 1714 "movq %%mm0, (%2) \n\t"
1715 : : "r" (src), "r" ((long)stride), "r"(tmp) 1715 : : "r" (src), "r" ((long)stride), "r"(tmp)
1716 : "%"REG_a, "%"REG_d 1716 : "%"REG_a, "%"REG_d
1717 ); 1717 );
1718 #else 1718 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1719 int x; 1719 int x;
1720 src+= stride*4; 1720 src+= stride*4;
1721 for(x=0; x<8; x++) 1721 for(x=0; x<8; x++)
1722 { 1722 {
1723 int t1= tmp[x]; 1723 int t1= tmp[x];
1732 src[stride*7]= CLIP((-t2 + 4*src[stride*6] + 2*t1 + 4*src[stride*8] - src[stride*9] + 4)>>3); 1732 src[stride*7]= CLIP((-t2 + 4*src[stride*6] + 2*t1 + 4*src[stride*8] - src[stride*9] + 4)>>3);
1733 tmp[x]= t1; 1733 tmp[x]= t1;
1734 1734
1735 src++; 1735 src++;
1736 } 1736 }
1737 #endif 1737 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1738 } 1738 }
1739 1739
1740 /** 1740 /**
1741 * Deinterlaces the given block by filtering every line with a (-1 2 6 2 -1) filter. 1741 * Deinterlaces the given block by filtering every line with a (-1 2 6 2 -1) filter.
1742 * will be called for every 8x8 block and can read & write from line 4-15 1742 * will be called for every 8x8 block and can read & write from line 4-15
1804 "movq %%mm0, (%2) \n\t" 1804 "movq %%mm0, (%2) \n\t"
1805 "movq %%mm1, (%3) \n\t" 1805 "movq %%mm1, (%3) \n\t"
1806 : : "r" (src), "r" ((long)stride), "r"(tmp), "r"(tmp2) 1806 : : "r" (src), "r" ((long)stride), "r"(tmp), "r"(tmp2)
1807 : "%"REG_a, "%"REG_d 1807 : "%"REG_a, "%"REG_d
1808 ); 1808 );
1809 #else 1809 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1810 int x; 1810 int x;
1811 src+= stride*4; 1811 src+= stride*4;
1812 for(x=0; x<8; x++) 1812 for(x=0; x<8; x++)
1813 { 1813 {
1814 int t1= tmp[x]; 1814 int t1= tmp[x];
1834 tmp[x]= t3; 1834 tmp[x]= t3;
1835 tmp2[x]= t1; 1835 tmp2[x]= t1;
1836 1836
1837 src++; 1837 src++;
1838 } 1838 }
1839 #endif 1839 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1840 } 1840 }
1841 1841
1842 /** 1842 /**
1843 * Deinterlaces the given block by filtering all lines with a (1 2 1) filter. 1843 * Deinterlaces the given block by filtering all lines with a (1 2 1) filter.
1844 * will be called for every 8x8 block and can read & write from line 4-15 1844 * will be called for every 8x8 block and can read & write from line 4-15
1893 "movq %%mm1, (%2) \n\t" 1893 "movq %%mm1, (%2) \n\t"
1894 1894
1895 : : "r" (src), "r" ((long)stride), "r" (tmp) 1895 : : "r" (src), "r" ((long)stride), "r" (tmp)
1896 : "%"REG_a, "%"REG_d 1896 : "%"REG_a, "%"REG_d
1897 ); 1897 );
1898 #else 1898 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1899 int a, b, c, x; 1899 int a, b, c, x;
1900 src+= 4*stride; 1900 src+= 4*stride;
1901 1901
1902 for(x=0; x<2; x++){ 1902 for(x=0; x<2; x++){
1903 a= *(uint32_t*)&tmp[stride*0]; 1903 a= *(uint32_t*)&tmp[stride*0];
1936 1936
1937 *(uint32_t*)&tmp[stride*0]= c; 1937 *(uint32_t*)&tmp[stride*0]= c;
1938 src += 4; 1938 src += 4;
1939 tmp += 4; 1939 tmp += 4;
1940 } 1940 }
1941 #endif 1941 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1942 } 1942 }
1943 1943
1944 /** 1944 /**
1945 * Deinterlaces the given block by applying a median filter to every second line. 1945 * Deinterlaces the given block by applying a median filter to every second line.
1946 * will be called for every 8x8 block and can read & write from line 4-15, 1946 * will be called for every 8x8 block and can read & write from line 4-15,
2039 MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8)) 2039 MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8))
2040 2040
2041 : : "r" (src), "r" ((long)stride) 2041 : : "r" (src), "r" ((long)stride)
2042 : "%"REG_a, "%"REG_d 2042 : "%"REG_a, "%"REG_d
2043 ); 2043 );
2044 #endif // MMX 2044 #endif //HAVE_MMX2
2045 #else 2045 #else //HAVE_MMX
2046 int x, y; 2046 int x, y;
2047 src+= 4*stride; 2047 src+= 4*stride;
2048 // FIXME - there should be a way to do a few columns in parallel like w/mmx 2048 // FIXME - there should be a way to do a few columns in parallel like w/mmx
2049 for(x=0; x<8; x++) 2049 for(x=0; x<8; x++)
2050 { 2050 {
2061 colsrc[stride ] = (a|(d^f)) & (b|(d^e)) & (c|(e^f)); 2061 colsrc[stride ] = (a|(d^f)) & (b|(d^e)) & (c|(e^f));
2062 colsrc += stride*2; 2062 colsrc += stride*2;
2063 } 2063 }
2064 src++; 2064 src++;
2065 } 2065 }
2066 #endif 2066 #endif //HAVE_MMX
2067 } 2067 }
2068 2068
2069 #ifdef HAVE_MMX 2069 #ifdef HAVE_MMX
2070 /** 2070 /**
2071 * transposes and shift the given 8x8 Block into dst1 and dst2 2071 * transposes and shift the given 8x8 Block into dst1 and dst2
2229 2229
2230 :: "r" (dst), "r" ((long)dstStride), "r" (src) 2230 :: "r" (dst), "r" ((long)dstStride), "r" (src)
2231 : "%"REG_a, "%"REG_d 2231 : "%"REG_a, "%"REG_d
2232 ); 2232 );
2233 } 2233 }
2234 #endif 2234 #endif //HAVE_MMX
2235 //static long test=0; 2235 //static long test=0;
2236 2236
2237 #ifndef HAVE_ALTIVEC 2237 #ifndef HAVE_ALTIVEC
2238 static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, 2238 static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
2239 uint8_t *tempBlured, uint32_t *tempBluredPast, int *maxNoise) 2239 uint8_t *tempBlured, uint32_t *tempBluredPast, int *maxNoise)
2276 "paddw %%mm4, %%mm0 \n\t" 2276 "paddw %%mm4, %%mm0 \n\t"
2277 "psadbw (%1, %%"REG_c"), %%mm7 \n\t" // |L7-R7| 2277 "psadbw (%1, %%"REG_c"), %%mm7 \n\t" // |L7-R7|
2278 "paddw %%mm5, %%mm6 \n\t" 2278 "paddw %%mm5, %%mm6 \n\t"
2279 "paddw %%mm7, %%mm6 \n\t" 2279 "paddw %%mm7, %%mm6 \n\t"
2280 "paddw %%mm6, %%mm0 \n\t" 2280 "paddw %%mm6, %%mm0 \n\t"
2281 #else 2281 #else //L1_DIFF
2282 #if defined (FAST_L2_DIFF) 2282 #if defined (FAST_L2_DIFF)
2283 "pcmpeqb %%mm7, %%mm7 \n\t" 2283 "pcmpeqb %%mm7, %%mm7 \n\t"
2284 "movq "MANGLE(b80)", %%mm6 \n\t" 2284 "movq "MANGLE(b80)", %%mm6 \n\t"
2285 "pxor %%mm0, %%mm0 \n\t" 2285 "pxor %%mm0, %%mm0 \n\t"
2286 #define REAL_L2_DIFF_CORE(a, b)\ 2286 #define REAL_L2_DIFF_CORE(a, b)\
2295 "pmaddwd %%mm2, %%mm2 \n\t"\ 2295 "pmaddwd %%mm2, %%mm2 \n\t"\
2296 "paddd %%mm2, %%mm5 \n\t"\ 2296 "paddd %%mm2, %%mm5 \n\t"\
2297 "psrld $14, %%mm5 \n\t"\ 2297 "psrld $14, %%mm5 \n\t"\
2298 "paddd %%mm5, %%mm0 \n\t" 2298 "paddd %%mm5, %%mm0 \n\t"
2299 2299
2300 #else 2300 #else //defined (FAST_L2_DIFF)
2301 "pxor %%mm7, %%mm7 \n\t" 2301 "pxor %%mm7, %%mm7 \n\t"
2302 "pxor %%mm0, %%mm0 \n\t" 2302 "pxor %%mm0, %%mm0 \n\t"
2303 #define REAL_L2_DIFF_CORE(a, b)\ 2303 #define REAL_L2_DIFF_CORE(a, b)\
2304 "movq " #a ", %%mm5 \n\t"\ 2304 "movq " #a ", %%mm5 \n\t"\
2305 "movq " #b ", %%mm2 \n\t"\ 2305 "movq " #b ", %%mm2 \n\t"\
2314 "pmaddwd %%mm5, %%mm5 \n\t"\ 2314 "pmaddwd %%mm5, %%mm5 \n\t"\
2315 "pmaddwd %%mm1, %%mm1 \n\t"\ 2315 "pmaddwd %%mm1, %%mm1 \n\t"\
2316 "paddd %%mm1, %%mm5 \n\t"\ 2316 "paddd %%mm1, %%mm5 \n\t"\
2317 "paddd %%mm5, %%mm0 \n\t" 2317 "paddd %%mm5, %%mm0 \n\t"
2318 2318
2319 #endif 2319 #endif //defined (FAST_L2_DIFF)
2320 2320
2321 #define L2_DIFF_CORE(a, b) REAL_L2_DIFF_CORE(a, b) 2321 #define L2_DIFF_CORE(a, b) REAL_L2_DIFF_CORE(a, b)
2322 2322
2323 L2_DIFF_CORE((%0), (%1)) 2323 L2_DIFF_CORE((%0), (%1))
2324 L2_DIFF_CORE((%0, %2), (%1, %2)) 2324 L2_DIFF_CORE((%0, %2), (%1, %2))
2327 L2_DIFF_CORE((%0, %2, 4), (%1, %2, 4)) 2327 L2_DIFF_CORE((%0, %2, 4), (%1, %2, 4))
2328 L2_DIFF_CORE((%0, %%REGd), (%1, %%REGd)) 2328 L2_DIFF_CORE((%0, %%REGd), (%1, %%REGd))
2329 L2_DIFF_CORE((%0, %%REGa,2), (%1, %%REGa,2)) 2329 L2_DIFF_CORE((%0, %%REGa,2), (%1, %%REGa,2))
2330 L2_DIFF_CORE((%0, %%REGc), (%1, %%REGc)) 2330 L2_DIFF_CORE((%0, %%REGc), (%1, %%REGc))
2331 2331
2332 #endif 2332 #endif //L1_DIFF
2333 2333
2334 "movq %%mm0, %%mm4 \n\t" 2334 "movq %%mm0, %%mm4 \n\t"
2335 "psrlq $32, %%mm0 \n\t" 2335 "psrlq $32, %%mm0 \n\t"
2336 "paddd %%mm0, %%mm4 \n\t" 2336 "paddd %%mm0, %%mm4 \n\t"
2337 "movd %%mm4, %%ecx \n\t" 2337 "movd %%mm4, %%ecx \n\t"
2532 2532
2533 :: "r" (src), "r" (tempBlured), "r"((long)stride), "m" (tempBluredPast) 2533 :: "r" (src), "r" (tempBlured), "r"((long)stride), "m" (tempBluredPast)
2534 : "%"REG_a, "%"REG_d, "%"REG_c, "memory" 2534 : "%"REG_a, "%"REG_d, "%"REG_c, "memory"
2535 ); 2535 );
2536 //printf("%d\n", test); 2536 //printf("%d\n", test);
2537 #else 2537 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
2538 { 2538 {
2539 int y; 2539 int y;
2540 int d=0; 2540 int d=0;
2541 // int sysd=0; 2541 // int sysd=0;
2542 int i; 2542 int i;
2635 } 2635 }
2636 } 2636 }
2637 } 2637 }
2638 } 2638 }
2639 } 2639 }
2640 #endif 2640 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
2641 } 2641 }
2642 #endif //HAVE_ALTIVEC 2642 #endif //HAVE_ALTIVEC
2643 2643
2644 #ifdef HAVE_MMX 2644 #ifdef HAVE_MMX
2645 /** 2645 /**
3251 "packuswb %%mm5, %%mm0 \n\t"\ 3251 "packuswb %%mm5, %%mm0 \n\t"\
3252 "packuswb %%mm6, %%mm1 \n\t"\ 3252 "packuswb %%mm6, %%mm1 \n\t"\
3253 "movq %%mm0, " #dst1 " \n\t"\ 3253 "movq %%mm0, " #dst1 " \n\t"\
3254 "movq %%mm1, " #dst2 " \n\t"\ 3254 "movq %%mm1, " #dst2 " \n\t"\
3255 3255
3256 #endif //!HAVE_MMX2 3256 #endif //HAVE_MMX2
3257 #define SCALED_CPY(src1, src2, dst1, dst2)\ 3257 #define SCALED_CPY(src1, src2, dst1, dst2)\
3258 REAL_SCALED_CPY(src1, src2, dst1, dst2) 3258 REAL_SCALED_CPY(src1, src2, dst1, dst2)
3259 3259
3260 SCALED_CPY((%2) , (%2, %4) , (%3) , (%3, %5)) 3260 SCALED_CPY((%2) , (%2, %4) , (%3) , (%3, %5))
3261 SCALED_CPY((%2, %4, 2), (%%REGa, %4, 2), (%3, %5, 2), (%%REGd, %5, 2)) 3261 SCALED_CPY((%2, %4, 2), (%%REGa, %4, 2), (%3, %5, 2), (%%REGd, %5, 2))
3271 "r"(dst), 3271 "r"(dst),
3272 "r" ((long)srcStride), 3272 "r" ((long)srcStride),
3273 "r" ((long)dstStride) 3273 "r" ((long)dstStride)
3274 : "%"REG_d 3274 : "%"REG_d
3275 ); 3275 );
3276 #else 3276 #else //HAVE_MMX
3277 for(i=0; i<8; i++) 3277 for(i=0; i<8; i++)
3278 memcpy( &(dst[dstStride*i]), 3278 memcpy( &(dst[dstStride*i]),
3279 &(src[srcStride*i]), BLOCK_SIZE); 3279 &(src[srcStride*i]), BLOCK_SIZE);
3280 #endif 3280 #endif //HAVE_MMX
3281 } 3281 }
3282 else 3282 else
3283 { 3283 {
3284 #ifdef HAVE_MMX 3284 #ifdef HAVE_MMX
3285 asm volatile( 3285 asm volatile(
3306 "r" (dst), 3306 "r" (dst),
3307 "r" ((long)srcStride), 3307 "r" ((long)srcStride),
3308 "r" ((long)dstStride) 3308 "r" ((long)dstStride)
3309 : "%"REG_a, "%"REG_d 3309 : "%"REG_a, "%"REG_d
3310 ); 3310 );
3311 #else 3311 #else //HAVE_MMX
3312 for(i=0; i<8; i++) 3312 for(i=0; i<8; i++)
3313 memcpy( &(dst[dstStride*i]), 3313 memcpy( &(dst[dstStride*i]),
3314 &(src[srcStride*i]), BLOCK_SIZE); 3314 &(src[srcStride*i]), BLOCK_SIZE);
3315 #endif 3315 #endif //HAVE_MMX
3316 } 3316 }
3317 } 3317 }
3318 3318
3319 /** 3319 /**
3320 * Duplicates the given 8 src pixels ? times upward 3320 * Duplicates the given 8 src pixels ? times upward
3742 RENAME(doHorizDefFilter)(dstBlock-4, stride, &c); 3742 RENAME(doHorizDefFilter)(dstBlock-4, stride, &c);
3743 #endif 3743 #endif
3744 }else if(mode & H_A_DEBLOCK){ 3744 }else if(mode & H_A_DEBLOCK){
3745 RENAME(do_a_deblock)(dstBlock-8, 1, stride, &c); 3745 RENAME(do_a_deblock)(dstBlock-8, 1, stride, &c);
3746 } 3746 }
3747 #endif 3747 #endif //HAVE_MMX
3748 if(mode & DERING) 3748 if(mode & DERING)
3749 { 3749 {
3750 //FIXME filter first line 3750 //FIXME filter first line
3751 if(y>0) RENAME(dering)(dstBlock - stride - 8, stride, &c); 3751 if(y>0) RENAME(dering)(dstBlock - stride - 8, stride, &c);
3752 } 3752 }