Mercurial > libavcodec.hg
comparison libpostproc/postprocess.c @ 129:be35346e27c1 libavcodec
fixed difference with -vo md5 between doVertDefFilter() C and MMX / MMX2 versions
fixed some warnings
fixed the cant compile on non x86 systems (i didnt apply the patch from Oliver Schoenbrunner <oliver.schoenbrunner@jku.at> because it used ARCH_X86 instead of HAVE_MMX)
| author | michael |
|---|---|
| date | Thu, 25 Oct 2001 11:42:34 +0000 |
| parents | e5266b8e79be |
| children | 0cce5d30d1d8 |
comparison
equal
deleted
inserted
replaced
| 128:e5266b8e79be | 129:be35346e27c1 |
|---|---|
| 23 doVertLowPass E e e | 23 doVertLowPass E e e |
| 24 doVertDefFilter Ec Ec Ec | 24 doVertDefFilter Ec Ec Ec |
| 25 isHorizDC Ec Ec | 25 isHorizDC Ec Ec |
| 26 isHorizMinMaxOk a E | 26 isHorizMinMaxOk a E |
| 27 doHorizLowPass E e e | 27 doHorizLowPass E e e |
| 28 doHorizDefFilter E E E | 28 doHorizDefFilter Ec Ec Ec |
| 29 deRing | 29 deRing |
| 30 Vertical RKAlgo1 E a a | 30 Vertical RKAlgo1 E a a |
| 31 Horizontal RKAlgo1 a a | |
| 31 Vertical X1 a E E | 32 Vertical X1 a E E |
| 32 Horizontal X1 a E E | 33 Horizontal X1 a E E |
| 33 LinIpolDeinterlace e E E* | 34 LinIpolDeinterlace e E E* |
| 34 CubicIpolDeinterlace a e e* | 35 CubicIpolDeinterlace a e e* |
| 35 LinBlendDeinterlace e E E* | 36 LinBlendDeinterlace e E E* |
| 58 (the if/else stuff per block is slowing things down) | 59 (the if/else stuff per block is slowing things down) |
| 59 compare the quality & speed of all filters | 60 compare the quality & speed of all filters |
| 60 split this huge file | 61 split this huge file |
| 61 fix warnings (unused vars, ...) | 62 fix warnings (unused vars, ...) |
| 62 noise reduction filters | 63 noise reduction filters |
| 64 border remover | |
| 63 ... | 65 ... |
| 64 | 66 |
| 65 Notes: | 67 Notes: |
| 66 | 68 fixed difference with -vo md5 between doVertDefFilter() C and MMX / MMX2 versions |
| 67 */ | 69 */ |
| 68 | 70 |
| 69 //Changelog: use the CVS log | 71 //Changelog: use the CVS log |
| 70 | 72 |
| 71 #include <inttypes.h> | 73 #include <inttypes.h> |
| 161 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels", | 163 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels", |
| 162 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels", | 164 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels", |
| 163 NULL //End Marker | 165 NULL //End Marker |
| 164 }; | 166 }; |
| 165 | 167 |
| 168 static inline void unusedVariableWarningFixer() | |
| 169 { | |
| 170 if( | |
| 171 packedYOffset + packedYScale + w05 + w20 + w1400 + bm00000001 + bm00010000 | |
| 172 + bm00001000 + bm10000000 + bm10000001 + bm11000011 + bm00000011 + bm11111110 | |
| 173 + bm11000000 + bm00011000 + bm00110011 + bm11001100 + b00 + b01 + b02 + b0F | |
| 174 + bFF + b20 + b80 + b7E + b7C + b3F + temp0 + temp1 + temp2 + temp3 + temp4 | |
| 175 + temp5 + pQPb== 0) b00=0; | |
| 176 } | |
| 177 | |
| 166 #ifdef TIMING | 178 #ifdef TIMING |
| 167 static inline long long rdtsc() | 179 static inline long long rdtsc() |
| 168 { | 180 { |
| 169 long long l; | 181 long long l; |
| 170 asm volatile( "rdtsc\n\t" | 182 asm volatile( "rdtsc\n\t" |
| 209 /** | 221 /** |
| 210 * Check if the middle 8x8 Block in the given 8x16 block is flat | 222 * Check if the middle 8x8 Block in the given 8x16 block is flat |
| 211 */ | 223 */ |
| 212 static inline int isVertDC(uint8_t src[], int stride){ | 224 static inline int isVertDC(uint8_t src[], int stride){ |
| 213 int numEq= 0; | 225 int numEq= 0; |
| 226 #ifndef HAVE_MMX | |
| 214 int y; | 227 int y; |
| 228 #endif | |
| 215 src+= stride*4; // src points to begin of the 8x8 Block | 229 src+= stride*4; // src points to begin of the 8x8 Block |
| 216 #ifdef HAVE_MMX | 230 #ifdef HAVE_MMX |
| 217 asm volatile( | 231 asm volatile( |
| 218 "leal (%1, %2), %%eax \n\t" | 232 "leal (%1, %2), %%eax \n\t" |
| 219 "leal (%%eax, %2, 4), %%ebx \n\t" | 233 "leal (%%eax, %2, 4), %%ebx \n\t" |
| 265 | 279 |
| 266 " \n\t" | 280 " \n\t" |
| 267 "movq %%mm0, %%mm1 \n\t" | 281 "movq %%mm0, %%mm1 \n\t" |
| 268 "psrlw $8, %%mm0 \n\t" | 282 "psrlw $8, %%mm0 \n\t" |
| 269 "paddb %%mm1, %%mm0 \n\t" | 283 "paddb %%mm1, %%mm0 \n\t" |
| 284 #ifdef HAVE_MMX2 | |
| 285 "pshufw $0xF9, %%mm0, %%mm1 \n\t" | |
| 286 "paddb %%mm1, %%mm0 \n\t" | |
| 287 "pshufw $0xFE, %%mm0, %%mm1 \n\t" | |
| 288 #else | |
| 270 "movq %%mm0, %%mm1 \n\t" | 289 "movq %%mm0, %%mm1 \n\t" |
| 271 "psrlq $16, %%mm0 \n\t" | 290 "psrlq $16, %%mm0 \n\t" |
| 272 "paddb %%mm1, %%mm0 \n\t" | 291 "paddb %%mm1, %%mm0 \n\t" |
| 273 "movq %%mm0, %%mm1 \n\t" | 292 "movq %%mm0, %%mm1 \n\t" |
| 274 "psrlq $32, %%mm0 \n\t" | 293 "psrlq $32, %%mm0 \n\t" |
| 294 #endif | |
| 275 "paddb %%mm1, %%mm0 \n\t" | 295 "paddb %%mm1, %%mm0 \n\t" |
| 276 "movd %%mm0, %0 \n\t" | 296 "movd %%mm0, %0 \n\t" |
| 277 : "=r" (numEq) | 297 : "=r" (numEq) |
| 278 : "r" (src), "r" (stride) | 298 : "r" (src), "r" (stride) |
| 279 : "%eax", "%ebx" | 299 : "%eax", "%ebx" |
| 525 sums[6] = src[l6] + src[l7]; | 545 sums[6] = src[l6] + src[l7]; |
| 526 sums[7] = src[l7] + src[l8]; | 546 sums[7] = src[l7] + src[l8]; |
| 527 sums[8] = src[l8] + last; | 547 sums[8] = src[l8] + last; |
| 528 | 548 |
| 529 src[l1]= ((sums[0]<<2) + ((first + sums[2])<<1) + sums[4] + 8)>>4; | 549 src[l1]= ((sums[0]<<2) + ((first + sums[2])<<1) + sums[4] + 8)>>4; |
| 530 src[l2]= ((src[l2]<<2) + (first + sums[0] + sums[3]<<1) + sums[5] + 8)>>4; | 550 src[l2]= ((src[l2]<<2) + ((first + sums[0] + sums[3])<<1) + sums[5] + 8)>>4; |
| 531 src[l3]= ((src[l3]<<2) + (first + sums[1] + sums[4]<<1) + sums[6] + 8)>>4; | 551 src[l3]= ((src[l3]<<2) + ((first + sums[1] + sums[4])<<1) + sums[6] + 8)>>4; |
| 532 src[l4]= ((src[l4]<<2) + (sums[2] + sums[5]<<1) + sums[0] + sums[7] + 8)>>4; | 552 src[l4]= ((src[l4]<<2) + ((sums[2] + sums[5])<<1) + sums[0] + sums[7] + 8)>>4; |
| 533 src[l5]= ((src[l5]<<2) + (sums[3] + sums[6]<<1) + sums[1] + sums[8] + 8)>>4; | 553 src[l5]= ((src[l5]<<2) + ((sums[3] + sums[6])<<1) + sums[1] + sums[8] + 8)>>4; |
| 534 src[l6]= ((src[l6]<<2) + (last + sums[7] + sums[4]<<1) + sums[2] + 8)>>4; | 554 src[l6]= ((src[l6]<<2) + ((last + sums[7] + sums[4])<<1) + sums[2] + 8)>>4; |
| 535 src[l7]= ((last + src[l7]<<2) + (src[l8] + sums[5]<<1) + sums[3] + 8)>>4; | 555 src[l7]= (((last + src[l7])<<2) + ((src[l8] + sums[5])<<1) + sums[3] + 8)>>4; |
| 536 src[l8]= ((sums[8]<<2) + (last + sums[6]<<1) + sums[4] + 8)>>4; | 556 src[l8]= ((sums[8]<<2) + ((last + sums[6])<<1) + sums[4] + 8)>>4; |
| 537 | 557 |
| 538 src++; | 558 src++; |
| 539 } | 559 } |
| 540 | 560 |
| 541 #endif | 561 #endif |
| 621 const int l2= stride + l1; | 641 const int l2= stride + l1; |
| 622 const int l3= stride + l2; | 642 const int l3= stride + l2; |
| 623 const int l4= stride + l3; | 643 const int l4= stride + l3; |
| 624 const int l5= stride + l4; | 644 const int l5= stride + l4; |
| 625 const int l6= stride + l5; | 645 const int l6= stride + l5; |
| 626 const int l7= stride + l6; | 646 // const int l7= stride + l6; |
| 627 const int l8= stride + l7; | 647 // const int l8= stride + l7; |
| 628 const int l9= stride + l8; | 648 // const int l9= stride + l8; |
| 629 int x; | 649 int x; |
| 630 src+= stride*3; | 650 src+= stride*3; |
| 631 for(x=0; x<BLOCK_SIZE; x++) | 651 for(x=0; x<BLOCK_SIZE; x++) |
| 632 { | 652 { |
| 633 if(ABS(src[l4]-src[l5]) < QP + QP/4) | 653 if(ABS(src[l4]-src[l5]) < QP + QP/4) |
| 747 const int l3= stride + l2; | 767 const int l3= stride + l2; |
| 748 const int l4= stride + l3; | 768 const int l4= stride + l3; |
| 749 const int l5= stride + l4; | 769 const int l5= stride + l4; |
| 750 const int l6= stride + l5; | 770 const int l6= stride + l5; |
| 751 const int l7= stride + l6; | 771 const int l7= stride + l6; |
| 752 const int l8= stride + l7; | 772 // const int l8= stride + l7; |
| 753 const int l9= stride + l8; | 773 // const int l9= stride + l8; |
| 754 int x; | 774 int x; |
| 755 | 775 |
| 756 src+= stride*3; | 776 src+= stride*3; |
| 757 for(x=0; x<BLOCK_SIZE; x++) | 777 for(x=0; x<BLOCK_SIZE; x++) |
| 758 { | 778 { |
| 1201 "movq temp3, %%mm1 \n\t" // H3 - H4 | 1221 "movq temp3, %%mm1 \n\t" // H3 - H4 |
| 1202 | 1222 |
| 1203 "pxor %%mm2, %%mm2 \n\t" | 1223 "pxor %%mm2, %%mm2 \n\t" |
| 1204 "pxor %%mm3, %%mm3 \n\t" | 1224 "pxor %%mm3, %%mm3 \n\t" |
| 1205 | 1225 |
| 1206 // FIXME rounding error | |
| 1207 "psraw $1, %%mm0 \n\t" // (L3 - L4)/2 | |
| 1208 "psraw $1, %%mm1 \n\t" // (H3 - H4)/2 | |
| 1209 "pcmpgtw %%mm0, %%mm2 \n\t" // sign (L3-L4) | 1226 "pcmpgtw %%mm0, %%mm2 \n\t" // sign (L3-L4) |
| 1210 "pcmpgtw %%mm1, %%mm3 \n\t" // sign (H3-H4) | 1227 "pcmpgtw %%mm1, %%mm3 \n\t" // sign (H3-H4) |
| 1211 "pxor %%mm2, %%mm0 \n\t" | 1228 "pxor %%mm2, %%mm0 \n\t" |
| 1212 "pxor %%mm3, %%mm1 \n\t" | 1229 "pxor %%mm3, %%mm1 \n\t" |
| 1213 "psubw %%mm2, %%mm0 \n\t" // |L3-L4| | 1230 "psubw %%mm2, %%mm0 \n\t" // |L3-L4| |
| 1214 "psubw %%mm3, %%mm1 \n\t" // |H3-H4| | 1231 "psubw %%mm3, %%mm1 \n\t" // |H3-H4| |
| 1215 // "psrlw $1, %%mm0 \n\t" // |L3 - L4|/2 | 1232 "psrlw $1, %%mm0 \n\t" // |L3 - L4|/2 |
| 1216 // "psrlw $1, %%mm1 \n\t" // |H3 - H4|/2 | 1233 "psrlw $1, %%mm1 \n\t" // |H3 - H4|/2 |
| 1217 | 1234 |
| 1218 "pxor %%mm6, %%mm2 \n\t" | 1235 "pxor %%mm6, %%mm2 \n\t" |
| 1219 "pxor %%mm7, %%mm3 \n\t" | 1236 "pxor %%mm7, %%mm3 \n\t" |
| 1220 "pand %%mm2, %%mm4 \n\t" | 1237 "pand %%mm2, %%mm4 \n\t" |
| 1221 "pand %%mm3, %%mm5 \n\t" | 1238 "pand %%mm3, %%mm5 \n\t" |
| 1772 sums[6] = dst[5] + dst[6]; | 1789 sums[6] = dst[5] + dst[6]; |
| 1773 sums[7] = dst[6] + dst[7]; | 1790 sums[7] = dst[6] + dst[7]; |
| 1774 sums[8] = dst[7] + last; | 1791 sums[8] = dst[7] + last; |
| 1775 | 1792 |
| 1776 dst[0]= ((sums[0]<<2) + ((first + sums[2])<<1) + sums[4] + 8)>>4; | 1793 dst[0]= ((sums[0]<<2) + ((first + sums[2])<<1) + sums[4] + 8)>>4; |
| 1777 dst[1]= ((dst[1]<<2) + (first + sums[0] + sums[3]<<1) + sums[5] + 8)>>4; | 1794 dst[1]= ((dst[1]<<2) + ((first + sums[0] + sums[3])<<1) + sums[5] + 8)>>4; |
| 1778 dst[2]= ((dst[2]<<2) + (first + sums[1] + sums[4]<<1) + sums[6] + 8)>>4; | 1795 dst[2]= ((dst[2]<<2) + ((first + sums[1] + sums[4])<<1) + sums[6] + 8)>>4; |
| 1779 dst[3]= ((dst[3]<<2) + (sums[2] + sums[5]<<1) + sums[0] + sums[7] + 8)>>4; | 1796 dst[3]= ((dst[3]<<2) + ((sums[2] + sums[5])<<1) + sums[0] + sums[7] + 8)>>4; |
| 1780 dst[4]= ((dst[4]<<2) + (sums[3] + sums[6]<<1) + sums[1] + sums[8] + 8)>>4; | 1797 dst[4]= ((dst[4]<<2) + ((sums[3] + sums[6])<<1) + sums[1] + sums[8] + 8)>>4; |
| 1781 dst[5]= ((dst[5]<<2) + (last + sums[7] + sums[4]<<1) + sums[2] + 8)>>4; | 1798 dst[5]= ((dst[5]<<2) + ((last + sums[7] + sums[4])<<1) + sums[2] + 8)>>4; |
| 1782 dst[6]= ((last + dst[6]<<2) + (dst[7] + sums[5]<<1) + sums[3] + 8)>>4; | 1799 dst[6]= (((last + dst[6])<<2) + ((dst[7] + sums[5])<<1) + sums[3] + 8)>>4; |
| 1783 dst[7]= ((sums[8]<<2) + (last + sums[6]<<1) + sums[4] + 8)>>4; | 1800 dst[7]= ((sums[8]<<2) + ((last + sums[6])<<1) + sums[4] + 8)>>4; |
| 1784 | 1801 |
| 1785 dst+= stride; | 1802 dst+= stride; |
| 1786 } | 1803 } |
| 1787 #endif | 1804 #endif |
| 1788 } | 1805 } |
| 1816 FIND_MIN_MAX(%%ebx, %1, 2) | 1833 FIND_MIN_MAX(%%ebx, %1, 2) |
| 1817 FIND_MIN_MAX(%0, %1, 8) | 1834 FIND_MIN_MAX(%0, %1, 8) |
| 1818 FIND_MIN_MAX(%%ebx, %1, 2) | 1835 FIND_MIN_MAX(%%ebx, %1, 2) |
| 1819 | 1836 |
| 1820 "movq %%mm6, %%mm4 \n\t" | 1837 "movq %%mm6, %%mm4 \n\t" |
| 1821 "psrlq $32, %%mm6 \n\t" | 1838 "psrlq $8, %%mm6 \n\t" |
| 1822 "pminub %%mm4, %%mm6 \n\t" | 1839 "pminub %%mm4, %%mm6 \n\t" // min of pixels |
| 1840 #ifdef HAVE_MMX2 | |
| 1841 "pshufw $0xF9, %%mm6, %%mm4 \n\t" | |
| 1842 "pminub %%mm4, %%mm6 \n\t" // min of pixels | |
| 1843 "pshufw $0xFE, %%mm6, %%mm4 \n\t" | |
| 1844 #else | |
| 1823 "movq %%mm6, %%mm4 \n\t" | 1845 "movq %%mm6, %%mm4 \n\t" |
| 1824 "psrlq $16, %%mm6 \n\t" | 1846 "psrlq $16, %%mm6 \n\t" |
| 1825 "pminub %%mm4, %%mm6 \n\t" | 1847 "pminub %%mm4, %%mm6 \n\t" |
| 1826 "movq %%mm6, %%mm4 \n\t" | 1848 "movq %%mm6, %%mm4 \n\t" |
| 1827 "psrlq $8, %%mm6 \n\t" | 1849 "psrlq $32, %%mm6 \n\t" |
| 1828 "pminub %%mm4, %%mm6 \n\t" // min of pixels | 1850 #endif |
| 1851 "pminub %%mm4, %%mm6 \n\t" | |
| 1852 | |
| 1829 | 1853 |
| 1830 "movq %%mm7, %%mm4 \n\t" | 1854 "movq %%mm7, %%mm4 \n\t" |
| 1831 "psrlq $32, %%mm7 \n\t" | 1855 "psrlq $8, %%mm7 \n\t" |
| 1832 "pmaxub %%mm4, %%mm7 \n\t" | 1856 "pmaxub %%mm4, %%mm7 \n\t" // max of pixels |
| 1857 #ifdef HAVE_MMX2 | |
| 1858 "pshufw $0xF9, %%mm7, %%mm4 \n\t" | |
| 1859 "pmaxub %%mm4, %%mm7 \n\t" // min of pixels | |
| 1860 "pshufw $0xFE, %%mm7, %%mm4 \n\t" | |
| 1861 #else | |
| 1833 "movq %%mm7, %%mm4 \n\t" | 1862 "movq %%mm7, %%mm4 \n\t" |
| 1834 "psrlq $16, %%mm7 \n\t" | 1863 "psrlq $16, %%mm7 \n\t" |
| 1835 "pmaxub %%mm4, %%mm7 \n\t" | 1864 "pmaxub %%mm4, %%mm7 \n\t" |
| 1836 "movq %%mm7, %%mm4 \n\t" | 1865 "movq %%mm7, %%mm4 \n\t" |
| 1837 "psrlq $8, %%mm7 \n\t" | 1866 "psrlq $32, %%mm7 \n\t" |
| 1838 "pmaxub %%mm4, %%mm7 \n\t" // max of pixels | 1867 #endif |
| 1868 "pmaxub %%mm4, %%mm7 \n\t" | |
| 1839 PAVGB(%%mm6, %%mm7) // (max + min)/2 | 1869 PAVGB(%%mm6, %%mm7) // (max + min)/2 |
| 1870 "punpcklbw %%mm7, %%mm7 \n\t" | |
| 1871 "punpcklbw %%mm7, %%mm7 \n\t" | |
| 1872 "punpcklbw %%mm7, %%mm7 \n\t" | |
| 1873 | |
| 1874 "movq (%0), %%mm0 \n\t" | |
| 1875 "movq %%mm0, %%mm1 \n\t" | |
| 1876 | |
| 1877 | |
| 1840 | 1878 |
| 1841 | 1879 |
| 1842 : : "r" (src), "r" (stride), "r" (QP) | 1880 : : "r" (src), "r" (stride), "r" (QP) |
| 1843 : "%eax", "%ebx" | 1881 : "%eax", "%ebx" |
| 1844 ); | 1882 ); |
| 2134 src++; | 2172 src++; |
| 2135 } | 2173 } |
| 2136 #endif | 2174 #endif |
| 2137 } | 2175 } |
| 2138 | 2176 |
| 2177 #ifdef HAVE_MMX | |
| 2139 /** | 2178 /** |
| 2140 * transposes and shift the given 8x8 Block into dst1 and dst2 | 2179 * transposes and shift the given 8x8 Block into dst1 and dst2 |
| 2141 */ | 2180 */ |
| 2142 static inline void transpose1(uint8_t *dst1, uint8_t *dst2, uint8_t *src, int srcStride) | 2181 static inline void transpose1(uint8_t *dst1, uint8_t *dst2, uint8_t *src, int srcStride) |
| 2143 { | 2182 { |
| 2297 | 2336 |
| 2298 :: "r" (dst), "r" (dstStride), "r" (src) | 2337 :: "r" (dst), "r" (dstStride), "r" (src) |
| 2299 : "%eax", "%ebx" | 2338 : "%eax", "%ebx" |
| 2300 ); | 2339 ); |
| 2301 } | 2340 } |
| 2302 | 2341 #endif |
| 2303 | 2342 |
| 2304 #ifdef HAVE_ODIVX_POSTPROCESS | 2343 #ifdef HAVE_ODIVX_POSTPROCESS |
| 2305 #include "../opendivx/postprocess.h" | 2344 #include "../opendivx/postprocess.h" |
| 2306 int use_old_pp=0; | 2345 int use_old_pp=0; |
| 2307 #endif | 2346 #endif |
| 2355 char *filterToken; | 2394 char *filterToken; |
| 2356 | 2395 |
| 2357 strncpy(temp, name, GET_MODE_BUFFER_SIZE); | 2396 strncpy(temp, name, GET_MODE_BUFFER_SIZE); |
| 2358 | 2397 |
| 2359 for(;;){ | 2398 for(;;){ |
| 2360 char *p2; | |
| 2361 char *filterName; | 2399 char *filterName; |
| 2362 int q= GET_PP_QUALITY_MAX; | 2400 int q= GET_PP_QUALITY_MAX; |
| 2363 int chrom=-1; | 2401 int chrom=-1; |
| 2364 char *option; | 2402 char *option; |
| 2365 char *options[OPTIONS_ARRAY_SIZE]; | 2403 char *options[OPTIONS_ARRAY_SIZE]; |
| 2601 * levelFix == 0 -> dont touch the brighness & contrast | 2639 * levelFix == 0 -> dont touch the brighness & contrast |
| 2602 */ | 2640 */ |
| 2603 static inline void blockCopy(uint8_t dst[], int dstStride, uint8_t src[], int srcStride, | 2641 static inline void blockCopy(uint8_t dst[], int dstStride, uint8_t src[], int srcStride, |
| 2604 int numLines, int levelFix) | 2642 int numLines, int levelFix) |
| 2605 { | 2643 { |
| 2644 #ifndef HAVE_MMX | |
| 2606 int i; | 2645 int i; |
| 2646 #endif | |
| 2607 if(levelFix) | 2647 if(levelFix) |
| 2608 { | 2648 { |
| 2609 #ifdef HAVE_MMX | 2649 #ifdef HAVE_MMX |
| 2610 asm volatile( | 2650 asm volatile( |
| 2611 "leal (%2,%2), %%eax \n\t" | 2651 "leal (%2,%2), %%eax \n\t" |
| 2727 | 2767 |
| 2728 /* Temporary buffers for handling the last block */ | 2768 /* Temporary buffers for handling the last block */ |
| 2729 static uint8_t *tempDstBlock= NULL; | 2769 static uint8_t *tempDstBlock= NULL; |
| 2730 static uint8_t *tempSrcBlock= NULL; | 2770 static uint8_t *tempSrcBlock= NULL; |
| 2731 | 2771 |
| 2772 #ifdef PP_FUNNY_STRIDE | |
| 2732 uint8_t *dstBlockPtrBackup; | 2773 uint8_t *dstBlockPtrBackup; |
| 2733 uint8_t *srcBlockPtrBackup; | 2774 uint8_t *srcBlockPtrBackup; |
| 2734 | 2775 #endif |
| 2776 | |
| 2777 #ifdef MORE_TIMING | |
| 2778 long long T0, T1, diffTime=0; | |
| 2779 #endif | |
| 2735 #ifdef TIMING | 2780 #ifdef TIMING |
| 2736 long long T0, T1, memcpyTime=0, vertTime=0, horizTime=0, sumTime, diffTime=0; | 2781 long long memcpyTime=0, vertTime=0, horizTime=0, sumTime; |
| 2737 sumTime= rdtsc(); | 2782 sumTime= rdtsc(); |
| 2738 #endif | 2783 #endif |
| 2739 | 2784 |
| 2740 if(tempDst==NULL) | 2785 if(tempDst==NULL) |
| 2741 { | 2786 { |
| 3069 #endif | 3114 #endif |
| 3070 | 3115 |
| 3071 dstBlock+=8; | 3116 dstBlock+=8; |
| 3072 srcBlock+=8; | 3117 srcBlock+=8; |
| 3073 | 3118 |
| 3119 #ifdef HAVE_MMX | |
| 3074 tmpXchg= tempBlock1; | 3120 tmpXchg= tempBlock1; |
| 3075 tempBlock1= tempBlock2; | 3121 tempBlock1= tempBlock2; |
| 3076 tempBlock2 = tmpXchg; | 3122 tempBlock2 = tmpXchg; |
| 3123 #endif | |
| 3077 } | 3124 } |
| 3078 | 3125 |
| 3079 /* did we use a tmp buffer */ | 3126 /* did we use a tmp buffer */ |
| 3080 if(y+15 >= height) | 3127 if(y+15 >= height) |
| 3081 { | 3128 { |
