Mercurial > libavcodec.hg
comparison libpostproc/postprocess.c @ 130:0cce5d30d1d8 libavcodec
dering in mmx2
| author | michael |
|---|---|
| date | Thu, 25 Oct 2001 21:55:11 +0000 |
| parents | be35346e27c1 |
| children | c4caf29acc1a |
comparison
equal
deleted
inserted
replaced
| 129:be35346e27c1 | 130:0cce5d30d1d8 |
|---|---|
| 24 doVertDefFilter Ec Ec Ec | 24 doVertDefFilter Ec Ec Ec |
| 25 isHorizDC Ec Ec | 25 isHorizDC Ec Ec |
| 26 isHorizMinMaxOk a E | 26 isHorizMinMaxOk a E |
| 27 doHorizLowPass E e e | 27 doHorizLowPass E e e |
| 28 doHorizDefFilter Ec Ec Ec | 28 doHorizDefFilter Ec Ec Ec |
| 29 deRing | 29 deRing e |
| 30 Vertical RKAlgo1 E a a | 30 Vertical RKAlgo1 E a a |
| 31 Horizontal RKAlgo1 a a | 31 Horizontal RKAlgo1 a a |
| 32 Vertical X1 a E E | 32 Vertical X1 a E E |
| 33 Horizontal X1 a E E | 33 Horizontal X1 a E E |
| 34 LinIpolDeinterlace e E E* | 34 LinIpolDeinterlace e E E* |
| 63 noise reduction filters | 63 noise reduction filters |
| 64 border remover | 64 border remover |
| 65 ... | 65 ... |
| 66 | 66 |
| 67 Notes: | 67 Notes: |
| 68 fixed difference with -vo md5 between doVertDefFilter() C and MMX / MMX2 versions | |
| 69 */ | 68 */ |
| 70 | 69 |
| 71 //Changelog: use the CVS log | 70 //Changelog: use the CVS log |
| 72 | 71 |
| 73 #include <inttypes.h> | 72 #include <inttypes.h> |
| 114 static uint64_t bm11001100= 0xFFFF0000FFFF0000LL; | 113 static uint64_t bm11001100= 0xFFFF0000FFFF0000LL; |
| 115 static uint64_t b00= 0x0000000000000000LL; | 114 static uint64_t b00= 0x0000000000000000LL; |
| 116 static uint64_t b01= 0x0101010101010101LL; | 115 static uint64_t b01= 0x0101010101010101LL; |
| 117 static uint64_t b02= 0x0202020202020202LL; | 116 static uint64_t b02= 0x0202020202020202LL; |
| 118 static uint64_t b0F= 0x0F0F0F0F0F0F0F0FLL; | 117 static uint64_t b0F= 0x0F0F0F0F0F0F0F0FLL; |
| 118 static uint64_t b04= 0x0404040404040404LL; | |
| 119 static uint64_t b08= 0x0808080808080808LL; | |
| 119 static uint64_t bFF= 0xFFFFFFFFFFFFFFFFLL; | 120 static uint64_t bFF= 0xFFFFFFFFFFFFFFFFLL; |
| 120 static uint64_t b20= 0x2020202020202020LL; | 121 static uint64_t b20= 0x2020202020202020LL; |
| 121 static uint64_t b80= 0x8080808080808080LL; | 122 static uint64_t b80= 0x8080808080808080LL; |
| 122 static uint64_t b7E= 0x7E7E7E7E7E7E7E7ELL; | 123 static uint64_t b7E= 0x7E7E7E7E7E7E7E7ELL; |
| 123 static uint64_t b7C= 0x7C7C7C7C7C7C7C7CLL; | 124 static uint64_t b7C= 0x7C7C7C7C7C7C7C7CLL; |
| 127 static uint64_t temp2=0; | 128 static uint64_t temp2=0; |
| 128 static uint64_t temp3=0; | 129 static uint64_t temp3=0; |
| 129 static uint64_t temp4=0; | 130 static uint64_t temp4=0; |
| 130 static uint64_t temp5=0; | 131 static uint64_t temp5=0; |
| 131 static uint64_t pQPb=0; | 132 static uint64_t pQPb=0; |
| 133 static uint64_t pQPb2=0; | |
| 132 static uint8_t tempBlocks[8*16*2]; //used for the horizontal code | 134 static uint8_t tempBlocks[8*16*2]; //used for the horizontal code |
| 133 | 135 |
| 134 int hFlatnessThreshold= 56 - 16; | 136 int hFlatnessThreshold= 56 - 16; |
| 135 int vFlatnessThreshold= 56 - 16; | 137 int vFlatnessThreshold= 56 - 16; |
| 136 | 138 |
| 1804 #endif | 1806 #endif |
| 1805 } | 1807 } |
| 1806 | 1808 |
| 1807 static inline void dering(uint8_t src[], int stride, int QP) | 1809 static inline void dering(uint8_t src[], int stride, int QP) |
| 1808 { | 1810 { |
| 1809 //FIXME | 1811 #ifdef HAVE_MMX2 |
| 1810 | |
| 1811 #ifdef HAVE_MMX2X | |
| 1812 asm volatile( | 1812 asm volatile( |
| 1813 "movq pQPb, %%mm0 \n\t" | |
| 1814 "paddusb %%mm0, %%mm0 \n\t" | |
| 1815 "movq %%mm0, pQPb2 \n\t" | |
| 1816 | |
| 1813 "leal (%0, %1), %%eax \n\t" | 1817 "leal (%0, %1), %%eax \n\t" |
| 1814 "leal (%%eax, %1, 4), %%ebx \n\t" | 1818 "leal (%%eax, %1, 4), %%ebx \n\t" |
| 1815 // 0 1 2 3 4 5 6 7 8 9 | 1819 // 0 1 2 3 4 5 6 7 8 9 |
| 1816 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 | 1820 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 |
| 1817 | 1821 |
| 1818 "pcmpeq %%mm6, %%mm6 \n\t" | 1822 "pcmpeqb %%mm6, %%mm6 \n\t" |
| 1819 "pxor %%mm7, %%mm7 \n\t" | 1823 "pxor %%mm7, %%mm7 \n\t" |
| 1820 | 1824 |
| 1821 #define FIND_MIN_MAX(addr)\ | 1825 #define FIND_MIN_MAX(addr)\ |
| 1822 "movq (" #addr "), %%mm0, \n\t"\ | 1826 "movq " #addr ", %%mm0 \n\t"\ |
| 1823 "pminub %%mm0, %%mm6 \n\t"\ | 1827 "pminub %%mm0, %%mm6 \n\t"\ |
| 1824 "pmaxub %%mm0, %%mm7 \n\t" | 1828 "pmaxub %%mm0, %%mm7 \n\t" |
| 1825 | 1829 |
| 1826 FIND_MIN_MAX(%0) | 1830 FIND_MIN_MAX((%%eax)) |
| 1827 FIND_MIN_MAX(%%eax) | 1831 FIND_MIN_MAX((%%eax, %1)) |
| 1828 FIND_MIN_MAX(%%eax, %1) | 1832 FIND_MIN_MAX((%%eax, %1, 2)) |
| 1829 FIND_MIN_MAX(%%eax, %1, 2) | 1833 FIND_MIN_MAX((%0, %1, 4)) |
| 1830 FIND_MIN_MAX(%0, %1, 4) | 1834 FIND_MIN_MAX((%%ebx)) |
| 1831 FIND_MIN_MAX(%%ebx) | 1835 FIND_MIN_MAX((%%ebx, %1)) |
| 1832 FIND_MIN_MAX(%%ebx, %1) | 1836 FIND_MIN_MAX((%%ebx, %1, 2)) |
| 1833 FIND_MIN_MAX(%%ebx, %1, 2) | 1837 FIND_MIN_MAX((%0, %1, 8)) |
| 1834 FIND_MIN_MAX(%0, %1, 8) | |
| 1835 FIND_MIN_MAX(%%ebx, %1, 2) | |
| 1836 | 1838 |
| 1837 "movq %%mm6, %%mm4 \n\t" | 1839 "movq %%mm6, %%mm4 \n\t" |
| 1838 "psrlq $8, %%mm6 \n\t" | 1840 "psrlq $8, %%mm6 \n\t" |
| 1839 "pminub %%mm4, %%mm6 \n\t" // min of pixels | 1841 "pminub %%mm4, %%mm6 \n\t" // min of pixels |
| 1840 #ifdef HAVE_MMX2 | 1842 #ifdef HAVE_MMX2 |
| 1864 "pmaxub %%mm4, %%mm7 \n\t" | 1866 "pmaxub %%mm4, %%mm7 \n\t" |
| 1865 "movq %%mm7, %%mm4 \n\t" | 1867 "movq %%mm7, %%mm4 \n\t" |
| 1866 "psrlq $32, %%mm7 \n\t" | 1868 "psrlq $32, %%mm7 \n\t" |
| 1867 #endif | 1869 #endif |
| 1868 "pmaxub %%mm4, %%mm7 \n\t" | 1870 "pmaxub %%mm4, %%mm7 \n\t" |
| 1869 PAVGB(%%mm6, %%mm7) // (max + min)/2 | 1871 PAVGB(%%mm6, %%mm7) // a=(max + min)/2 |
| 1870 "punpcklbw %%mm7, %%mm7 \n\t" | 1872 "punpcklbw %%mm7, %%mm7 \n\t" |
| 1871 "punpcklbw %%mm7, %%mm7 \n\t" | 1873 "punpcklbw %%mm7, %%mm7 \n\t" |
| 1872 "punpcklbw %%mm7, %%mm7 \n\t" | 1874 "punpcklbw %%mm7, %%mm7 \n\t" |
| 1873 | 1875 "movq %%mm7, temp0 \n\t" |
| 1874 "movq (%0), %%mm0 \n\t" | 1876 |
| 1875 "movq %%mm0, %%mm1 \n\t" | 1877 "movq (%0), %%mm0 \n\t" // L10 |
| 1876 | 1878 "movq %%mm0, %%mm1 \n\t" // L10 |
| 1877 | 1879 "movq %%mm0, %%mm2 \n\t" // L10 |
| 1880 "psllq $8, %%mm1 \n\t" | |
| 1881 "psrlq $8, %%mm2 \n\t" | |
| 1882 "movd -4(%0), %%mm3 \n\t" | |
| 1883 "movd 8(%0), %%mm4 \n\t" | |
| 1884 "psrlq $24, %%mm3 \n\t" | |
| 1885 "psllq $56, %%mm4 \n\t" | |
| 1886 "por %%mm3, %%mm1 \n\t" // L00 | |
| 1887 "por %%mm4, %%mm2 \n\t" // L20 | |
| 1888 "movq %%mm1, %%mm3 \n\t" // L00 | |
| 1889 PAVGB(%%mm2, %%mm1) // (L20 + L00)/2 | |
| 1890 PAVGB(%%mm0, %%mm1) // (L20 + L00 + 2L10)/4 | |
| 1891 "psubusb %%mm7, %%mm0 \n\t" | |
| 1892 "psubusb %%mm7, %%mm2 \n\t" | |
| 1893 "psubusb %%mm7, %%mm3 \n\t" | |
| 1894 "pcmpeqb b00, %%mm0 \n\t" // L10 > a ? 0 : -1 | |
| 1895 "pcmpeqb b00, %%mm2 \n\t" // L20 > a ? 0 : -1 | |
| 1896 "pcmpeqb b00, %%mm3 \n\t" // L00 > a ? 0 : -1 | |
| 1897 "paddb %%mm2, %%mm0 \n\t" | |
| 1898 "paddb %%mm3, %%mm0 \n\t" | |
| 1899 | |
| 1900 "movq (%%eax), %%mm2 \n\t" // L11 | |
| 1901 "movq %%mm2, %%mm3 \n\t" // L11 | |
| 1902 "movq %%mm2, %%mm4 \n\t" // L11 | |
| 1903 "psllq $8, %%mm3 \n\t" | |
| 1904 "psrlq $8, %%mm4 \n\t" | |
| 1905 "movd -4(%%eax), %%mm5 \n\t" | |
| 1906 "movd 8(%%eax), %%mm6 \n\t" | |
| 1907 "psrlq $24, %%mm5 \n\t" | |
| 1908 "psllq $56, %%mm6 \n\t" | |
| 1909 "por %%mm5, %%mm3 \n\t" // L01 | |
| 1910 "por %%mm6, %%mm4 \n\t" // L21 | |
| 1911 "movq %%mm3, %%mm5 \n\t" // L01 | |
| 1912 PAVGB(%%mm4, %%mm3) // (L21 + L01)/2 | |
| 1913 PAVGB(%%mm2, %%mm3) // (L21 + L01 + 2L11)/4 | |
| 1914 "psubusb %%mm7, %%mm2 \n\t" | |
| 1915 "psubusb %%mm7, %%mm4 \n\t" | |
| 1916 "psubusb %%mm7, %%mm5 \n\t" | |
| 1917 "pcmpeqb b00, %%mm2 \n\t" // L11 > a ? 0 : -1 | |
| 1918 "pcmpeqb b00, %%mm4 \n\t" // L21 > a ? 0 : -1 | |
| 1919 "pcmpeqb b00, %%mm5 \n\t" // L01 > a ? 0 : -1 | |
| 1920 "paddb %%mm4, %%mm2 \n\t" | |
| 1921 "paddb %%mm5, %%mm2 \n\t" | |
| 1922 // 0, 2, 3, 1 | |
| 1923 #define DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) \ | |
| 1924 "movq " #src ", " #sx " \n\t" /* src[0] */\ | |
| 1925 "movq " #sx ", " #lx " \n\t" /* src[0] */\ | |
| 1926 "movq " #sx ", " #t0 " \n\t" /* src[0] */\ | |
| 1927 "psllq $8, " #lx " \n\t"\ | |
| 1928 "psrlq $8, " #t0 " \n\t"\ | |
| 1929 "movd -4" #src ", " #t1 " \n\t"\ | |
| 1930 "psrlq $24, " #t1 " \n\t"\ | |
| 1931 "por " #t1 ", " #lx " \n\t" /* src[-1] */\ | |
| 1932 "movd 8" #src ", " #t1 " \n\t"\ | |
| 1933 "psllq $56, " #t1 " \n\t"\ | |
| 1934 "por " #t1 ", " #t0 " \n\t" /* src[+1] */\ | |
| 1935 "movq " #lx ", " #t1 " \n\t" /* src[-1] */\ | |
| 1936 PAVGB(t0, lx) /* (src[-1] + src[+1])/2 */\ | |
| 1937 PAVGB(sx, lx) /* (src[-1] + 2src[0] + src[+1])/4 */\ | |
| 1938 "psubusb temp0, " #t1 " \n\t"\ | |
| 1939 "psubusb temp0, " #t0 " \n\t"\ | |
| 1940 "psubusb temp0, " #sx " \n\t"\ | |
| 1941 "pcmpeqb b00, " #t1 " \n\t" /* src[-1] > a ? 0 : -1*/\ | |
| 1942 "pcmpeqb b00, " #t0 " \n\t" /* src[+1] > a ? 0 : -1*/\ | |
| 1943 "pcmpeqb b00, " #sx " \n\t" /* src[0] > a ? 0 : -1*/\ | |
| 1944 "paddb " #t1 ", " #t0 " \n\t"\ | |
| 1945 "paddb " #t0 ", " #sx " \n\t"\ | |
| 1946 \ | |
| 1947 PAVGB(lx, pplx) \ | |
| 1948 PAVGB(plx, pplx) /* filtered */\ | |
| 1949 "movq " #dst ", " #t0 " \n\t" /* dst */\ | |
| 1950 "movq " #pplx ", " #t1 " \n\t"\ | |
| 1951 "psubusb " #t0 ", " #pplx " \n\t"\ | |
| 1952 "psubusb " #t1 ", " #t0 " \n\t"\ | |
| 1953 "por " #t0 ", " #pplx " \n\t" /* |filtered - dst| */\ | |
| 1954 "psubusb pQPb2, " #pplx " \n\t"\ | |
| 1955 "pcmpeqb b00, " #pplx " \n\t"\ | |
| 1956 "paddb " #sx ", " #ppsx " \n\t"\ | |
| 1957 "paddb " #psx ", " #ppsx " \n\t"\ | |
| 1958 "#paddb b02, " #ppsx " \n\t"\ | |
| 1959 "pand b08, " #ppsx " \n\t"\ | |
| 1960 "pcmpeqb b00, " #ppsx " \n\t"\ | |
| 1961 "pand " #pplx ", " #ppsx " \n\t"\ | |
| 1962 "pand " #ppsx ", " #t1 " \n\t"\ | |
| 1963 "pandn " #dst ", " #ppsx " \n\t"\ | |
| 1964 "por " #t1 ", " #ppsx " \n\t"\ | |
| 1965 "movq " #ppsx ", " #dst " \n\t" | |
| 1966 /* | |
| 1967 0000000 | |
| 1968 1111111 | |
| 1969 | |
| 1970 1111110 | |
| 1971 1111101 | |
| 1972 1111100 | |
| 1973 1111011 | |
| 1974 1111010 | |
| 1975 1111001 | |
| 1976 | |
| 1977 1111000 | |
| 1978 1110111 | |
| 1979 | |
| 1980 */ | |
| 1981 //DERING_CORE(dst,src ,ppsx ,psx ,sx ,pplx ,plx ,lx ,t0 ,t1) | |
| 1982 DERING_CORE((%%eax),(%%eax, %1) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7) | |
| 1983 DERING_CORE((%%eax, %1),(%%eax, %1, 2) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7) | |
| 1984 DERING_CORE((%%eax, %1, 2),(%0, %1, 4) ,%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7) | |
| 1985 DERING_CORE((%0, %1, 4),(%%ebx) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7) | |
| 1986 DERING_CORE((%%ebx),(%%ebx, %1) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7) | |
| 1987 DERING_CORE((%%ebx, %1), (%%ebx, %1, 2),%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7) | |
| 1988 DERING_CORE((%%ebx, %1, 2),(%0, %1, 8) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7) | |
| 1989 DERING_CORE((%0, %1, 8),(%%ebx, %1, 4) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7) | |
| 1878 | 1990 |
| 1879 | 1991 |
| 1880 : : "r" (src), "r" (stride), "r" (QP) | 1992 : : "r" (src), "r" (stride), "r" (QP) |
| 1881 : "%eax", "%ebx" | 1993 : "%eax", "%ebx" |
| 1882 ); | 1994 ); |
| 2872 int QPDelta= isColor ? 1<<(32-3) : 1<<(32-4); | 2984 int QPDelta= isColor ? 1<<(32-3) : 1<<(32-4); |
| 2873 int QPFrac= QPDelta; | 2985 int QPFrac= QPDelta; |
| 2874 uint8_t *tempBlock1= tempBlocks; | 2986 uint8_t *tempBlock1= tempBlocks; |
| 2875 uint8_t *tempBlock2= tempBlocks + 8; | 2987 uint8_t *tempBlock2= tempBlocks + 8; |
| 2876 #endif | 2988 #endif |
| 2877 /* can we mess with a 8x16 block from srcBlock/dstBlock downwards, if not | 2989 /* can we mess with a 8x16 block from srcBlock/dstBlock downwards and 1 line upwards |
| 2878 than use a temporary buffer */ | 2990 if not than use a temporary buffer */ |
| 2879 if(y+15 >= height) | 2991 if(y+15 >= height) |
| 2880 { | 2992 { |
| 2881 /* copy from line 5 to 12 of src, these will e copied with | 2993 /* copy from line 5 to 12 of src, these will be copied with |
| 2882 blockcopy to dst later */ | 2994 blockcopy to dst later */ |
| 2883 memcpy(tempSrc + srcStride*5, srcBlock + srcStride*5, | 2995 memcpy(tempSrc + srcStride*5, srcBlock + srcStride*5, |
| 2884 srcStride*MAX(height-y-5, 0) ); | 2996 srcStride*MAX(height-y-5, 0) ); |
| 2885 | 2997 |
| 2886 /* duplicate last line to fill the void upto line 12 */ | 2998 /* duplicate last line to fill the void upto line 12 */ |
| 2891 memcpy(tempSrc + srcStride*i, | 3003 memcpy(tempSrc + srcStride*i, |
| 2892 src + srcStride*(height-1), srcStride); | 3004 src + srcStride*(height-1), srcStride); |
| 2893 } | 3005 } |
| 2894 | 3006 |
| 2895 | 3007 |
| 2896 /* copy up to 5 lines of dst */ | 3008 /* copy up to 6 lines of dst */ |
| 2897 memcpy(tempDst, dstBlock, dstStride*MIN(height-y, 5) ); | 3009 memcpy(tempDst, dstBlock - dstStride, dstStride*MIN(height-y+1, 6) ); |
| 2898 dstBlock= tempDst; | 3010 dstBlock= tempDst + dstStride; |
| 2899 srcBlock= tempSrc; | 3011 srcBlock= tempSrc; |
| 2900 } | 3012 } |
| 2901 | 3013 |
| 2902 // From this point on it is guranteed that we can read and write 16 lines downward | 3014 // From this point on it is guranteed that we can read and write 16 lines downward |
| 2903 // finish 1 block before the next otherwise weŽll might have a problem | 3015 // finish 1 block before the next otherwise weŽll might have a problem |
| 3044 T1= rdtsc(); | 3156 T1= rdtsc(); |
| 3045 vertTime+= T1-T0; | 3157 vertTime+= T1-T0; |
| 3046 T0=T1; | 3158 T0=T1; |
| 3047 #endif | 3159 #endif |
| 3048 } | 3160 } |
| 3161 | |
| 3049 #ifdef HAVE_MMX | 3162 #ifdef HAVE_MMX |
| 3050 transpose1(tempBlock1, tempBlock2, dstBlock, dstStride); | 3163 transpose1(tempBlock1, tempBlock2, dstBlock, dstStride); |
| 3051 #endif | 3164 #endif |
| 3052 /* check if we have a previous block to deblock it with dstBlock */ | 3165 /* check if we have a previous block to deblock it with dstBlock */ |
| 3053 if(x - 8 >= 0) | 3166 if(x - 8 >= 0) |
| 3090 #ifdef MORE_TIMING | 3203 #ifdef MORE_TIMING |
| 3091 T1= rdtsc(); | 3204 T1= rdtsc(); |
| 3092 horizTime+= T1-T0; | 3205 horizTime+= T1-T0; |
| 3093 T0=T1; | 3206 T0=T1; |
| 3094 #endif | 3207 #endif |
| 3095 dering(dstBlock - 9 - stride, stride, QP); | 3208 if(mode & DERING) |
| 3209 { | |
| 3210 //FIXME filter first line | |
| 3211 if(y>0) dering(dstBlock - stride - 8, stride, QP); | |
| 3212 } | |
| 3096 } | 3213 } |
| 3097 else if(y!=0) | 3214 else if(mode & DERING) |
| 3098 dering(dstBlock - stride*9 + width-9, stride, QP); | 3215 { |
| 3099 //FIXME dering filter will not be applied to last block (bottom right) | 3216 //FIXME y+15 is required cuz of the tempBuffer thing -> bottom right block isnt filtered |
| 3217 if(y > 8 && y+15 < height) dering(dstBlock - stride*9 + width - 8, stride, QP); | |
| 3218 } | |
| 3219 | |
| 3100 | 3220 |
| 3101 #ifdef PP_FUNNY_STRIDE | 3221 #ifdef PP_FUNNY_STRIDE |
| 3102 /* did we use a tmp-block buffer */ | 3222 /* did we use a tmp-block buffer */ |
| 3103 if(x+7 >= width) | 3223 if(x+7 >= width) |
| 3104 { | 3224 { |
| 3125 | 3245 |
| 3126 /* did we use a tmp buffer */ | 3246 /* did we use a tmp buffer */ |
| 3127 if(y+15 >= height) | 3247 if(y+15 >= height) |
| 3128 { | 3248 { |
| 3129 uint8_t *dstBlock= &(dst[y*dstStride]); | 3249 uint8_t *dstBlock= &(dst[y*dstStride]); |
| 3130 memcpy(dstBlock, tempDst, dstStride*(height-y) ); | 3250 memcpy(dstBlock, tempDst + dstStride, dstStride*(height-y) ); |
| 3131 } | 3251 } |
| 3132 } | 3252 } |
| 3133 #ifdef HAVE_3DNOW | 3253 #ifdef HAVE_3DNOW |
| 3134 asm volatile("femms"); | 3254 asm volatile("femms"); |
| 3135 #elif defined (HAVE_MMX) | 3255 #elif defined (HAVE_MMX) |
