Mercurial > libavcodec.hg
comparison libpostproc/postprocess.c @ 140:52ed0baddd56 libavcodec
minor speedup
cleanup
| author | michael |
|---|---|
| date | Tue, 30 Oct 2001 21:14:02 +0000 |
| parents | 5083d662ff85 |
| children | 626bfabff1f5 |
comparison
equal
deleted
inserted
replaced
| 139:7fc045e7a924 | 140:52ed0baddd56 |
|---|---|
| 60 compare the quality & speed of all filters | 60 compare the quality & speed of all filters |
| 61 split this huge file | 61 split this huge file |
| 62 fix warnings (unused vars, ...) | 62 fix warnings (unused vars, ...) |
| 63 noise reduction filters | 63 noise reduction filters |
| 64 border remover | 64 border remover |
| 65 optimize c versions | |
| 65 ... | 66 ... |
| 66 | 67 |
| 67 Notes: | 68 Notes: |
| 68 */ | 69 */ |
| 69 | 70 |
| 415 static inline void doVertLowPass(uint8_t *src, int stride, int QP) | 416 static inline void doVertLowPass(uint8_t *src, int stride, int QP) |
| 416 { | 417 { |
| 417 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 418 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 418 src+= stride*3; | 419 src+= stride*3; |
| 419 asm volatile( //"movv %0 %1 %2\n\t" | 420 asm volatile( //"movv %0 %1 %2\n\t" |
| 420 "pushl %0 \n\t" | |
| 421 "movq pQPb, %%mm0 \n\t" // QP,..., QP | 421 "movq pQPb, %%mm0 \n\t" // QP,..., QP |
| 422 | 422 |
| 423 "movq (%0), %%mm6 \n\t" | 423 "movq (%0), %%mm6 \n\t" |
| 424 "movq (%0, %1), %%mm5 \n\t" | 424 "movq (%0, %1), %%mm5 \n\t" |
| 425 "movq %%mm5, %%mm1 \n\t" | 425 "movq %%mm5, %%mm1 \n\t" |
| 533 PAVGB(%%mm7, %%mm5) // 11 6 /8 | 533 PAVGB(%%mm7, %%mm5) // 11 6 /8 |
| 534 | 534 |
| 535 PAVGB(%%mm3, %%mm0) // 112 /4 | 535 PAVGB(%%mm3, %%mm0) // 112 /4 |
| 536 PAVGB(%%mm0, %%mm5) // 112246 /16 | 536 PAVGB(%%mm0, %%mm5) // 112246 /16 |
| 537 "movq %%mm5, (%%eax, %1, 4) \n\t" // X | 537 "movq %%mm5, (%%eax, %1, 4) \n\t" // X |
| 538 "popl %0\n\t" | 538 "subl %1, %0 \n\t" |
| 539 | 539 |
| 540 : | 540 : |
| 541 : "r" (src), "r" (stride) | 541 : "r" (src), "r" (stride) |
| 542 : "%eax", "%ebx" | 542 : "%eax", "%ebx" |
| 543 ); | 543 ); |
| 1165 "psubw %%mm2, %%mm0 \n\t" // 2L4 - 5L5 + 5L6 - 2L7 | 1165 "psubw %%mm2, %%mm0 \n\t" // 2L4 - 5L5 + 5L6 - 2L7 |
| 1166 "psubw %%mm3, %%mm1 \n\t" // 2H4 - 5H5 + 5H6 - 2H7 | 1166 "psubw %%mm3, %%mm1 \n\t" // 2H4 - 5H5 + 5H6 - 2H7 |
| 1167 | 1167 |
| 1168 "movq temp0, %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3 | 1168 "movq temp0, %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3 |
| 1169 "movq temp1, %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3 | 1169 "movq temp1, %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3 |
| 1170 //FIXME pxor, psubw, pmax for abs | 1170 |
| 1171 #ifdef HAVE_MMX2 | |
| 1172 "movq %%mm7, %%mm6 \n\t" // 0 | |
| 1173 "psubw %%mm0, %%mm6 \n\t" | |
| 1174 "pmaxsw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7| | |
| 1175 "movq %%mm7, %%mm6 \n\t" // 0 | |
| 1176 "psubw %%mm1, %%mm6 \n\t" | |
| 1177 "pmaxsw %%mm6, %%mm1 \n\t" // |2H4 - 5H5 + 5H6 - 2H7| | |
| 1178 "movq %%mm7, %%mm6 \n\t" // 0 | |
| 1179 "psubw %%mm2, %%mm6 \n\t" | |
| 1180 "pmaxsw %%mm6, %%mm2 \n\t" // |2L0 - 5L1 + 5L2 - 2L3| | |
| 1181 "movq %%mm7, %%mm6 \n\t" // 0 | |
| 1182 "psubw %%mm3, %%mm6 \n\t" | |
| 1183 "pmaxsw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3| | |
| 1184 #else | |
| 1171 "movq %%mm7, %%mm6 \n\t" // 0 | 1185 "movq %%mm7, %%mm6 \n\t" // 0 |
| 1172 "pcmpgtw %%mm0, %%mm6 \n\t" | 1186 "pcmpgtw %%mm0, %%mm6 \n\t" |
| 1173 "pxor %%mm6, %%mm0 \n\t" | 1187 "pxor %%mm6, %%mm0 \n\t" |
| 1174 "psubw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7| | 1188 "psubw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7| |
| 1175 "movq %%mm7, %%mm6 \n\t" // 0 | 1189 "movq %%mm7, %%mm6 \n\t" // 0 |
| 1176 "pcmpgtw %%mm1, %%mm6 \n\t" | 1190 "pcmpgtw %%mm1, %%mm6 \n\t" |
| 1177 "pxor %%mm6, %%mm1 \n\t" | 1191 "pxor %%mm6, %%mm1 \n\t" |
| 1178 "psubw %%mm6, %%mm1 \n\t" // |2H4 - 5H5 + 5H6 - 2H7| | 1192 "psubw %%mm6, %%mm1 \n\t" // |2H4 - 5H5 + 5H6 - 2H7| |
| 1179 | |
| 1180 "movq %%mm7, %%mm6 \n\t" // 0 | 1193 "movq %%mm7, %%mm6 \n\t" // 0 |
| 1181 "pcmpgtw %%mm2, %%mm6 \n\t" | 1194 "pcmpgtw %%mm2, %%mm6 \n\t" |
| 1182 "pxor %%mm6, %%mm2 \n\t" | 1195 "pxor %%mm6, %%mm2 \n\t" |
| 1183 "psubw %%mm6, %%mm2 \n\t" // |2L0 - 5L1 + 5L2 - 2L3| | 1196 "psubw %%mm6, %%mm2 \n\t" // |2L0 - 5L1 + 5L2 - 2L3| |
| 1184 "movq %%mm7, %%mm6 \n\t" // 0 | 1197 "movq %%mm7, %%mm6 \n\t" // 0 |
| 1185 "pcmpgtw %%mm3, %%mm6 \n\t" | 1198 "pcmpgtw %%mm3, %%mm6 \n\t" |
| 1186 "pxor %%mm6, %%mm3 \n\t" | 1199 "pxor %%mm6, %%mm3 \n\t" |
| 1187 "psubw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3| | 1200 "psubw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3| |
| 1201 #endif | |
| 1188 | 1202 |
| 1189 #ifdef HAVE_MMX2 | 1203 #ifdef HAVE_MMX2 |
| 1190 "pminsw %%mm2, %%mm0 \n\t" | 1204 "pminsw %%mm2, %%mm0 \n\t" |
| 1191 "pminsw %%mm3, %%mm1 \n\t" | 1205 "pminsw %%mm3, %%mm1 \n\t" |
| 1192 #else | 1206 #else |
| 1979 PAVGB(t0, lx) /* (src[-1] + src[+1])/2 */\ | 1993 PAVGB(t0, lx) /* (src[-1] + src[+1])/2 */\ |
| 1980 PAVGB(sx, lx) /* (src[-1] + 2src[0] + src[+1])/4 */\ | 1994 PAVGB(sx, lx) /* (src[-1] + 2src[0] + src[+1])/4 */\ |
| 1981 PAVGB(lx, pplx) \ | 1995 PAVGB(lx, pplx) \ |
| 1982 "movq " #lx ", temp1 \n\t"\ | 1996 "movq " #lx ", temp1 \n\t"\ |
| 1983 "movq temp0, " #lx " \n\t"\ | 1997 "movq temp0, " #lx " \n\t"\ |
| 1984 "psubusb " #lx ", " #t1 " \n\t"\ | 1998 "psubusb " #lx ", " #t1 " \n\t"\ |
| 1985 "psubusb " #lx ", " #t0 " \n\t"\ | 1999 "psubusb " #lx ", " #t0 " \n\t"\ |
| 1986 "psubusb " #lx ", " #sx " \n\t"\ | 2000 "psubusb " #lx ", " #sx " \n\t"\ |
| 1987 "movq b00, " #lx " \n\t"\ | 2001 "movq b00, " #lx " \n\t"\ |
| 1988 "pcmpeqb " #lx ", " #t1 " \n\t" /* src[-1] > a ? 0 : -1*/\ | 2002 "pcmpeqb " #lx ", " #t1 " \n\t" /* src[-1] > a ? 0 : -1*/\ |
| 1989 "pcmpeqb " #lx ", " #t0 " \n\t" /* src[+1] > a ? 0 : -1*/\ | 2003 "pcmpeqb " #lx ", " #t0 " \n\t" /* src[+1] > a ? 0 : -1*/\ |
| 1990 "pcmpeqb " #lx ", " #sx " \n\t" /* src[0] > a ? 0 : -1*/\ | 2004 "pcmpeqb " #lx ", " #sx " \n\t" /* src[0] > a ? 0 : -1*/\ |
| 1991 "paddb " #t1 ", " #t0 " \n\t"\ | 2005 "paddb " #t1 ", " #t0 " \n\t"\ |
| 1992 "paddb " #t0 ", " #sx " \n\t"\ | 2006 "paddb " #t0 ", " #sx " \n\t"\ |
| 1993 \ | 2007 \ |
| 1994 PAVGB(plx, pplx) /* filtered */\ | 2008 PAVGB(plx, pplx) /* filtered */\ |
| 1995 "movq " #dst ", " #t0 " \n\t" /* dst */\ | 2009 "movq " #dst ", " #t0 " \n\t" /* dst */\ |
| 2000 PMINUB(t1, pplx, t0)\ | 2014 PMINUB(t1, pplx, t0)\ |
| 2001 "paddb " #sx ", " #ppsx " \n\t"\ | 2015 "paddb " #sx ", " #ppsx " \n\t"\ |
| 2002 "paddb " #psx ", " #ppsx " \n\t"\ | 2016 "paddb " #psx ", " #ppsx " \n\t"\ |
| 2003 "#paddb b02, " #ppsx " \n\t"\ | 2017 "#paddb b02, " #ppsx " \n\t"\ |
| 2004 "pand b08, " #ppsx " \n\t"\ | 2018 "pand b08, " #ppsx " \n\t"\ |
| 2005 "pcmpeqb " #lx ", " #ppsx " \n\t"\ | 2019 "pcmpeqb " #lx ", " #ppsx " \n\t"\ |
| 2006 "pand " #ppsx ", " #pplx " \n\t"\ | 2020 "pand " #ppsx ", " #pplx " \n\t"\ |
| 2007 "pandn " #dst ", " #ppsx " \n\t"\ | 2021 "pandn " #dst ", " #ppsx " \n\t"\ |
| 2008 "por " #pplx ", " #ppsx " \n\t"\ | 2022 "por " #pplx ", " #ppsx " \n\t"\ |
| 2009 "movq " #ppsx ", " #dst " \n\t"\ | 2023 "movq " #ppsx ", " #dst " \n\t"\ |
| 2010 "movq temp1, " #lx " \n\t" | 2024 "movq temp1, " #lx " \n\t" |
| 2011 | 2025 |
| 2012 /* | 2026 /* |
| 2013 0000000 | 2027 0000000 |
| 2994 #endif | 3008 #endif |
| 2995 #ifdef TIMING | 3009 #ifdef TIMING |
| 2996 long long memcpyTime=0, vertTime=0, horizTime=0, sumTime; | 3010 long long memcpyTime=0, vertTime=0, horizTime=0, sumTime; |
| 2997 sumTime= rdtsc(); | 3011 sumTime= rdtsc(); |
| 2998 #endif | 3012 #endif |
| 3013 //mode= 0x7F; | |
| 2999 | 3014 |
| 3000 if(tempDst==NULL) | 3015 if(tempDst==NULL) |
| 3001 { | 3016 { |
| 3002 tempDst= (uint8_t*)memalign(8, 1024*24); | 3017 tempDst= (uint8_t*)memalign(8, 1024*24); |
| 3003 tempSrc= (uint8_t*)memalign(8, 1024*24); | 3018 tempSrc= (uint8_t*)memalign(8, 1024*24); |
