Mercurial > libavcodec.hg
comparison libpostproc/postprocess.c @ 107:bd163e13a0fb libavcodec
minor cleanups
median deinterlace in MMX
fixed typos
| author | michael |
|---|---|
| date | Tue, 16 Oct 2001 02:31:14 +0000 |
| parents | 389391a6d0bf |
| children | dfa9fde4b72d |
comparison
equal
deleted
inserted
replaced
| 106:389391a6d0bf | 107:bd163e13a0fb |
|---|---|
| 30 Vertical RKAlgo1 E a a | 30 Vertical RKAlgo1 E a a |
| 31 Vertical X1 a E E | 31 Vertical X1 a E E |
| 32 Horizontal X1 a E E | 32 Horizontal X1 a E E |
| 33 LinIpolDeinterlace a E E* | 33 LinIpolDeinterlace a E E* |
| 34 LinBlendDeinterlace a E E* | 34 LinBlendDeinterlace a E E* |
| 35 MedianDeinterlace a E | 35 MedianDeinterlace Ec Ec |
| 36 | 36 |
| 37 | 37 |
| 38 * i dont have a 3dnow CPU -> its untested | 38 * i dont have a 3dnow CPU -> its untested |
| 39 E = Exact implementation | 39 E = Exact implementation |
| 40 e = allmost exact implementation | 40 e = allmost exact implementation |
| 54 write a faster and higher quality deblocking filter :) | 54 write a faster and higher quality deblocking filter :) |
| 55 do something about the speed of the horizontal filters | 55 do something about the speed of the horizontal filters |
| 56 make the mainloop more flexible (variable number of blocks at once | 56 make the mainloop more flexible (variable number of blocks at once |
| 57 (the if/else stuff per block is slowing things down) | 57 (the if/else stuff per block is slowing things down) |
| 58 compare the quality & speed of all filters | 58 compare the quality & speed of all filters |
| 59 implement a few simple deinterlacing filters | |
| 60 split this huge file | 59 split this huge file |
| 61 fix warnings (unused vars, ...) | 60 fix warnings (unused vars, ...) |
| 61 noise reduction filters | |
| 62 ... | 62 ... |
| 63 | 63 |
| 64 Notes: | 64 Notes: |
| 65 | 65 |
| 66 | |
| 66 */ | 67 */ |
| 67 | 68 |
| 68 /* | 69 //Changelog: use the CVS log |
| 69 Changelog: use the CVS log | |
| 70 rewrote the horizontal lowpass filter to fix a bug which caused a blocky look | |
| 71 added deinterlace filters (linear interpolate, linear blend, median) | |
| 72 minor cleanups (removed some outcommented stuff) | |
| 73 0.1.3 | |
| 74 bugfixes: last 3 lines not brightness/contrast corrected | |
| 75 brightness statistics messed up with initial black pic | |
| 76 changed initial values of the brightness statistics | |
| 77 C++ -> C conversation | |
| 78 QP range question solved (very likely 1<=QP<=32 according to arpi) | |
| 79 new experimental vertical deblocking filter | |
| 80 RK filter has 3dNow support now (untested) | |
| 81 0.1.2 | |
| 82 fixed a bug in the horizontal default filter | |
| 83 3dnow version of the Horizontal & Vertical Lowpass filters | |
| 84 mmx version of the Horizontal Default filter | |
| 85 mmx2 & C versions of a simple filter described in a paper from ramkishor & karandikar | |
| 86 added mode flags & quality2mode function | |
| 87 0.1.1 | |
| 88 */ | |
| 89 | |
| 90 | 70 |
| 91 #include <inttypes.h> | 71 #include <inttypes.h> |
| 92 #include <stdio.h> | 72 #include <stdio.h> |
| 93 #include <stdlib.h> | 73 #include <stdlib.h> |
| 94 #include "../config.h" | 74 #include "../config.h" |
| 152 | 132 |
| 153 int maxAllowedY=255; | 133 int maxAllowedY=255; |
| 154 //FIXME can never make a movieŽs black brighter (anyone needs that?) | 134 //FIXME can never make a movieŽs black brighter (anyone needs that?) |
| 155 int minAllowedY=0; | 135 int minAllowedY=0; |
| 156 | 136 |
| 157 #ifdef TIMEING | 137 #ifdef TIMING |
| 158 static inline long long rdtsc() | 138 static inline long long rdtsc() |
| 159 { | 139 { |
| 160 long long l; | 140 long long l; |
| 161 asm volatile( "rdtsc\n\t" | 141 asm volatile( "rdtsc\n\t" |
| 162 : "=A" (l) | 142 : "=A" (l) |
| 362 | 342 |
| 363 } | 343 } |
| 364 | 344 |
| 365 /** | 345 /** |
| 366 * Do a vertical low pass filter on the 8x10 block (only write to the 8x8 block in the middle) | 346 * Do a vertical low pass filter on the 8x10 block (only write to the 8x8 block in the middle) |
| 367 * useing the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 | 347 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 |
| 368 */ | 348 */ |
| 369 static inline void doVertLowPass(uint8_t *src, int stride, int QP) | 349 static inline void doVertLowPass(uint8_t *src, int stride, int QP) |
| 370 { | 350 { |
| 371 // QP= 64; | 351 // QP= 64; |
| 372 | 352 |
| 1581 #endif | 1561 #endif |
| 1582 } | 1562 } |
| 1583 | 1563 |
| 1584 /** | 1564 /** |
| 1585 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block) | 1565 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block) |
| 1586 * useing the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version) | 1566 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version) |
| 1587 * useing the 7-Tap Filter (2,2,2,4,2,2,2)/16 (MMX2/3DNOW version) | 1567 * using the 7-Tap Filter (2,2,2,4,2,2,2)/16 (MMX2/3DNOW version) |
| 1588 */ | 1568 */ |
| 1589 static inline void doHorizLowPassAndCopyBack(uint8_t dst[], int stride, int QP) | 1569 static inline void doHorizLowPassAndCopyBack(uint8_t dst[], int stride, int QP) |
| 1590 { | 1570 { |
| 1591 //return; | 1571 //return; |
| 1592 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1572 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 2122 * Deinterlaces the given block | 2102 * Deinterlaces the given block |
| 2123 * will be called for every 8x8 block, except the last row, and can read & write into an 8x16 block | 2103 * will be called for every 8x8 block, except the last row, and can read & write into an 8x16 block |
| 2124 */ | 2104 */ |
| 2125 static inline void deInterlaceMedian(uint8_t src[], int stride) | 2105 static inline void deInterlaceMedian(uint8_t src[], int stride) |
| 2126 { | 2106 { |
| 2127 #if defined (HAVE_MMX2) | 2107 #ifdef HAVE_MMX |
| 2108 #ifdef HAVE_MMX2 | |
| 2128 asm volatile( | 2109 asm volatile( |
| 2129 "leal (%0, %1), %%eax \n\t" | 2110 "leal (%0, %1), %%eax \n\t" |
| 2130 "leal (%%eax, %1, 4), %%ebx \n\t" | 2111 "leal (%%eax, %1, 4), %%ebx \n\t" |
| 2131 // 0 1 2 3 4 5 6 7 8 9 | 2112 // 0 1 2 3 4 5 6 7 8 9 |
| 2132 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 | 2113 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 |
| 2170 | 2151 |
| 2171 | 2152 |
| 2172 : : "r" (src), "r" (stride) | 2153 : : "r" (src), "r" (stride) |
| 2173 : "%eax", "%ebx" | 2154 : "%eax", "%ebx" |
| 2174 ); | 2155 ); |
| 2156 | |
| 2157 #else // MMX without MMX2 | |
| 2158 asm volatile( | |
| 2159 "leal (%0, %1), %%eax \n\t" | |
| 2160 "leal (%%eax, %1, 4), %%ebx \n\t" | |
| 2161 // 0 1 2 3 4 5 6 7 8 9 | |
| 2162 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 | |
| 2163 "pxor %%mm7, %%mm7 \n\t" | |
| 2164 | |
| 2165 #define MEDIAN(a,b,c)\ | |
| 2166 "movq " #a ", %%mm0 \n\t"\ | |
| 2167 "movq " #b ", %%mm2 \n\t"\ | |
| 2168 "movq " #c ", %%mm1 \n\t"\ | |
| 2169 "movq %%mm0, %%mm3 \n\t"\ | |
| 2170 "movq %%mm1, %%mm4 \n\t"\ | |
| 2171 "movq %%mm2, %%mm5 \n\t"\ | |
| 2172 "psubusb %%mm1, %%mm3 \n\t"\ | |
| 2173 "psubusb %%mm2, %%mm4 \n\t"\ | |
| 2174 "psubusb %%mm0, %%mm5 \n\t"\ | |
| 2175 "pcmpeqb %%mm7, %%mm3 \n\t"\ | |
| 2176 "pcmpeqb %%mm7, %%mm4 \n\t"\ | |
| 2177 "pcmpeqb %%mm7, %%mm5 \n\t"\ | |
| 2178 "movq %%mm3, %%mm6 \n\t"\ | |
| 2179 "pxor %%mm4, %%mm3 \n\t"\ | |
| 2180 "pxor %%mm5, %%mm4 \n\t"\ | |
| 2181 "pxor %%mm6, %%mm5 \n\t"\ | |
| 2182 "por %%mm3, %%mm1 \n\t"\ | |
| 2183 "por %%mm4, %%mm2 \n\t"\ | |
| 2184 "por %%mm5, %%mm0 \n\t"\ | |
| 2185 "pand %%mm2, %%mm0 \n\t"\ | |
| 2186 "pand %%mm1, %%mm0 \n\t"\ | |
| 2187 "movq %%mm0, " #b " \n\t" | |
| 2188 | |
| 2189 MEDIAN((%0), (%%eax), (%%eax, %1)) | |
| 2190 MEDIAN((%%eax, %1), (%%eax, %1, 2), (%0, %1, 4)) | |
| 2191 MEDIAN((%0, %1, 4), (%%ebx), (%%ebx, %1)) | |
| 2192 MEDIAN((%%ebx, %1), (%%ebx, %1, 2), (%0, %1, 8)) | |
| 2193 | |
| 2194 : : "r" (src), "r" (stride) | |
| 2195 : "%eax", "%ebx" | |
| 2196 ); | |
| 2197 #endif // MMX | |
| 2175 #else | 2198 #else |
| 2176 //FIXME | 2199 //FIXME |
| 2177 int x; | 2200 int x; |
| 2178 for(x=0; x<8; x++) | 2201 for(x=0; x<8; x++) |
| 2179 { | 2202 { |
| 2191 } | 2214 } |
| 2192 | 2215 |
| 2193 /** | 2216 /** |
| 2194 * Deinterlaces the given block | 2217 * Deinterlaces the given block |
| 2195 * will be called for every 8x8 block, in the last row, and can read & write into an 8x8 block | 2218 * will be called for every 8x8 block, in the last row, and can read & write into an 8x8 block |
| 2196 * will shift the image up by 1 line (FIXME if this is a problem) | |
| 2197 */ | 2219 */ |
| 2198 static inline void deInterlaceMedianLastRow(uint8_t src[], int stride) | 2220 static inline void deInterlaceMedianLastRow(uint8_t src[], int stride) |
| 2199 { | 2221 { |
| 2200 #if defined (HAVE_MMX2) | 2222 #ifdef HAVE_MMX |
| 2223 #ifdef HAVE_MMX2 | |
| 2201 asm volatile( | 2224 asm volatile( |
| 2202 "leal (%0, %1), %%eax \n\t" | 2225 "leal (%0, %1), %%eax \n\t" |
| 2203 "leal (%%eax, %1, 4), %%ebx \n\t" | 2226 "leal (%%eax, %1, 4), %%ebx \n\t" |
| 2204 // 0 1 2 3 4 5 6 7 8 9 | 2227 // 0 1 2 3 4 5 6 7 8 9 |
| 2205 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 | 2228 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 |
| 2235 "movq %%mm1, (%%ebx, %1, 2) \n\t" | 2258 "movq %%mm1, (%%ebx, %1, 2) \n\t" |
| 2236 | 2259 |
| 2237 : : "r" (src), "r" (stride) | 2260 : : "r" (src), "r" (stride) |
| 2238 : "%eax", "%ebx" | 2261 : "%eax", "%ebx" |
| 2239 ); | 2262 ); |
| 2263 #else //MMX & no MMX2 | |
| 2264 asm volatile( | |
| 2265 "leal (%0, %1), %%eax \n\t" | |
| 2266 "leal (%%eax, %1, 4), %%ebx \n\t" | |
| 2267 // 0 1 2 3 4 5 6 7 8 9 | |
| 2268 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 | |
| 2269 "pxor %%mm7, %%mm7 \n\t" | |
| 2270 | |
| 2271 MEDIAN((%0), (%%eax), (%%eax, %1)) | |
| 2272 MEDIAN((%%eax, %1), (%%eax, %1, 2), (%0, %1, 4)) | |
| 2273 MEDIAN((%0, %1, 4), (%%ebx), (%%ebx, %1)) | |
| 2274 | |
| 2275 "movq (%%ebx, %1), %%mm0 \n\t" | |
| 2276 "movq %%mm0, (%%ebx, %1, 2) \n\t" | |
| 2277 | |
| 2278 : : "r" (src), "r" (stride) | |
| 2279 : "%eax", "%ebx" | |
| 2280 ); | |
| 2281 | |
| 2282 #endif //MMX | |
| 2240 #else | 2283 #else |
| 2241 //FIXME | 2284 //FIXME |
| 2242 int x; | 2285 int x; |
| 2243 for(x=0; x<8; x++) | 2286 for(x=0; x<8; x++) |
| 2244 { | 2287 { |
| 2253 src++; | 2296 src++; |
| 2254 } | 2297 } |
| 2255 #endif | 2298 #endif |
| 2256 } | 2299 } |
| 2257 | 2300 |
| 2258 | |
| 2259 #ifdef HAVE_ODIVX_POSTPROCESS | 2301 #ifdef HAVE_ODIVX_POSTPROCESS |
| 2260 #include "../opendivx/postprocess.h" | 2302 #include "../opendivx/postprocess.h" |
| 2261 int use_old_pp=0; | 2303 int use_old_pp=0; |
| 2262 #endif | 2304 #endif |
| 2263 | 2305 |
| 2264 static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, | 2306 static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, |
| 2265 QP_STORE_T QPs[], int QPStride, int isColor, int mode); | 2307 QP_STORE_T QPs[], int QPStride, int isColor, int mode); |
| 2266 | 2308 |
| 2267 /** | 2309 /** |
| 2268 * ... | 2310 * ... |
| 2269 * the mode value is interpreted as a quality value if its negative, its range is then (-1 ... -63) | |
| 2270 * -63 is best quality -1 is worst | |
| 2271 */ | 2311 */ |
| 2272 void postprocess(unsigned char * src[], int src_stride, | 2312 void postprocess(unsigned char * src[], int src_stride, |
| 2273 unsigned char * dst[], int dst_stride, | 2313 unsigned char * dst[], int dst_stride, |
| 2274 int horizontal_size, int vertical_size, | 2314 int horizontal_size, int vertical_size, |
| 2275 QP_STORE_T *QP_store, int QP_stride, | 2315 QP_STORE_T *QP_store, int QP_stride, |
| 2282 if(use_old_pp){ | 2322 if(use_old_pp){ |
| 2283 odivx_postprocess(src,src_stride,dst,dst_stride,horizontal_size,vertical_size,QP_store,QP_stride,mode); | 2323 odivx_postprocess(src,src_stride,dst,dst_stride,horizontal_size,vertical_size,QP_store,QP_stride,mode); |
| 2284 return; | 2324 return; |
| 2285 } | 2325 } |
| 2286 #endif | 2326 #endif |
| 2287 | |
| 2288 // I'm calling this from dec_video.c:video_set_postprocess() | |
| 2289 // if(mode<0) mode= getModeForQuality(-mode); | |
| 2290 | 2327 |
| 2291 /* | 2328 /* |
| 2292 long long T= rdtsc(); | 2329 long long T= rdtsc(); |
| 2293 for(int y=vertical_size-1; y>=0 ; y--) | 2330 for(int y=vertical_size-1; y>=0 ; y--) |
| 2294 memcpy(dst[0] + y*src_stride, src[0] + y*src_stride,src_stride); | 2331 memcpy(dst[0] + y*src_stride, src[0] + y*src_stride,src_stride); |
| 2498 /* we need 64bit here otherwise weŽll going to have a problem | 2535 /* we need 64bit here otherwise weŽll going to have a problem |
| 2499 after watching a black picture for 5 hours*/ | 2536 after watching a black picture for 5 hours*/ |
| 2500 static uint64_t *yHistogram= NULL; | 2537 static uint64_t *yHistogram= NULL; |
| 2501 int black=0, white=255; // blackest black and whitest white in the picture | 2538 int black=0, white=255; // blackest black and whitest white in the picture |
| 2502 | 2539 |
| 2503 #ifdef TIMEING | 2540 #ifdef TIMING |
| 2504 long long T0, T1, memcpyTime=0, vertTime=0, horizTime=0, sumTime, diffTime=0; | 2541 long long T0, T1, memcpyTime=0, vertTime=0, horizTime=0, sumTime, diffTime=0; |
| 2505 sumTime= rdtsc(); | 2542 sumTime= rdtsc(); |
| 2506 #endif | 2543 #endif |
| 2507 | 2544 |
| 2508 if(!yHistogram) | 2545 if(!yHistogram) |
| 2599 #endif | 2636 #endif |
| 2600 | 2637 |
| 2601 | 2638 |
| 2602 if(y + 12 < height) | 2639 if(y + 12 < height) |
| 2603 { | 2640 { |
| 2604 #ifdef MORE_TIMEING | 2641 #ifdef MORE_TIMING |
| 2605 T0= rdtsc(); | 2642 T0= rdtsc(); |
| 2606 #endif | 2643 #endif |
| 2607 | 2644 |
| 2608 #ifdef HAVE_MMX2 | 2645 #ifdef HAVE_MMX2 |
| 2609 prefetchnta(vertSrcBlock + (((x>>3)&3) + 2)*srcStride + 32); | 2646 prefetchnta(vertSrcBlock + (((x>>3)&3) + 2)*srcStride + 32); |
| 2633 deInterlaceInterpolateCubic(dstBlock, dstStride); | 2670 deInterlaceInterpolateCubic(dstBlock, dstStride); |
| 2634 else if(mode & CUBIC_BLEND_DEINT_FILTER) | 2671 else if(mode & CUBIC_BLEND_DEINT_FILTER) |
| 2635 deInterlaceBlendCubic(dstBlock, dstStride); | 2672 deInterlaceBlendCubic(dstBlock, dstStride); |
| 2636 */ | 2673 */ |
| 2637 | 2674 |
| 2638 #ifdef MORE_TIMEING | 2675 #ifdef MORE_TIMING |
| 2639 T1= rdtsc(); | 2676 T1= rdtsc(); |
| 2640 memcpyTime+= T1-T0; | 2677 memcpyTime+= T1-T0; |
| 2641 T0=T1; | 2678 T0=T1; |
| 2642 #endif | 2679 #endif |
| 2643 if(mode & V_DEBLOCK) | 2680 if(mode & V_DEBLOCK) |
| 2655 } | 2692 } |
| 2656 else | 2693 else |
| 2657 doVertDefFilter(vertBlock, stride, QP); | 2694 doVertDefFilter(vertBlock, stride, QP); |
| 2658 } | 2695 } |
| 2659 } | 2696 } |
| 2660 #ifdef MORE_TIMEING | 2697 #ifdef MORE_TIMING |
| 2661 T1= rdtsc(); | 2698 T1= rdtsc(); |
| 2662 vertTime+= T1-T0; | 2699 vertTime+= T1-T0; |
| 2663 T0=T1; | 2700 T0=T1; |
| 2664 #endif | 2701 #endif |
| 2665 } | 2702 } |
| 2681 */ | 2718 */ |
| 2682 } | 2719 } |
| 2683 | 2720 |
| 2684 if(x - 8 >= 0 && x<width) | 2721 if(x - 8 >= 0 && x<width) |
| 2685 { | 2722 { |
| 2686 #ifdef MORE_TIMEING | 2723 #ifdef MORE_TIMING |
| 2687 T0= rdtsc(); | 2724 T0= rdtsc(); |
| 2688 #endif | 2725 #endif |
| 2689 if(mode & H_DEBLOCK) | 2726 if(mode & H_DEBLOCK) |
| 2690 { | 2727 { |
| 2691 if(mode & H_X1_FILTER) | 2728 if(mode & H_X1_FILTER) |
| 2699 } | 2736 } |
| 2700 else | 2737 else |
| 2701 doHorizDefFilterAndCopyBack(dstBlock-4, stride, QP); | 2738 doHorizDefFilterAndCopyBack(dstBlock-4, stride, QP); |
| 2702 } | 2739 } |
| 2703 } | 2740 } |
| 2704 #ifdef MORE_TIMEING | 2741 #ifdef MORE_TIMING |
| 2705 T1= rdtsc(); | 2742 T1= rdtsc(); |
| 2706 horizTime+= T1-T0; | 2743 horizTime+= T1-T0; |
| 2707 T0=T1; | 2744 T0=T1; |
| 2708 #endif | 2745 #endif |
| 2709 dering(dstBlock - 9 - stride, stride, QP); | 2746 dering(dstBlock - 9 - stride, stride, QP); |
| 2723 asm volatile("femms"); | 2760 asm volatile("femms"); |
| 2724 #elif defined (HAVE_MMX) | 2761 #elif defined (HAVE_MMX) |
| 2725 asm volatile("emms"); | 2762 asm volatile("emms"); |
| 2726 #endif | 2763 #endif |
| 2727 | 2764 |
| 2728 #ifdef TIMEING | 2765 #ifdef TIMING |
| 2729 // FIXME diff is mostly the time spent for rdtsc (should subtract that but ...) | 2766 // FIXME diff is mostly the time spent for rdtsc (should subtract that but ...) |
| 2730 sumTime= rdtsc() - sumTime; | 2767 sumTime= rdtsc() - sumTime; |
| 2731 if(!isColor) | 2768 if(!isColor) |
| 2732 printf("cpy:%4dk, vert:%4dk, horiz:%4dk, sum:%4dk, diff:%4dk, color: %d/%d \r", | 2769 printf("cpy:%4dk, vert:%4dk, horiz:%4dk, sum:%4dk, diff:%4dk, color: %d/%d \r", |
| 2733 (int)(memcpyTime/1000), (int)(vertTime/1000), (int)(horizTime/1000), | 2770 (int)(memcpyTime/1000), (int)(vertTime/1000), (int)(horizTime/1000), |
