Mercurial > libavcodec.hg
comparison libpostproc/postprocess_template.c @ 791:4f61ca80b6c1 libavcodec
better deblocking filter
| author | michael |
|---|---|
| date | Tue, 29 Oct 2002 18:35:15 +0000 |
| parents | 54079a650ba8 |
| children | 8e9faf69110f |
comparison
equal
deleted
inserted
replaced
| 790:b9156f8e6747 | 791:4f61ca80b6c1 |
|---|---|
| 54 src+= stride*4; // src points to begin of the 8x8 Block | 54 src+= stride*4; // src points to begin of the 8x8 Block |
| 55 asm volatile( | 55 asm volatile( |
| 56 "leal (%1, %2), %%eax \n\t" | 56 "leal (%1, %2), %%eax \n\t" |
| 57 // 0 1 2 3 4 5 6 7 8 9 | 57 // 0 1 2 3 4 5 6 7 8 9 |
| 58 // %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2 | 58 // %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2 |
| 59 "movq %3, %%mm7 \n\t" // mm7 = 0x7F | 59 "movq %3, %%mm7 \n\t" |
| 60 "movq %4, %%mm6 \n\t" // mm6 = 0x7D | 60 "movq %4, %%mm6 \n\t" |
| 61 | |
| 61 "movq (%1), %%mm0 \n\t" | 62 "movq (%1), %%mm0 \n\t" |
| 62 "movq (%%eax), %%mm1 \n\t" | 63 "movq (%%eax), %%mm1 \n\t" |
| 63 "psubb %%mm1, %%mm0 \n\t" // mm0 = differnece | 64 "psubb %%mm1, %%mm0 \n\t" // mm0 = differnece |
| 64 "paddb %%mm7, %%mm0 \n\t" | 65 "paddb %%mm7, %%mm0 \n\t" |
| 65 "pcmpgtb %%mm6, %%mm0 \n\t" | 66 "pcmpgtb %%mm6, %%mm0 \n\t" |
| 117 "psrlq $32, %%mm0 \n\t" | 118 "psrlq $32, %%mm0 \n\t" |
| 118 "paddb %%mm1, %%mm0 \n\t" | 119 "paddb %%mm1, %%mm0 \n\t" |
| 119 #endif | 120 #endif |
| 120 "movd %%mm0, %0 \n\t" | 121 "movd %%mm0, %0 \n\t" |
| 121 : "=r" (numEq) | 122 : "=r" (numEq) |
| 122 : "r" (src), "r" (stride), "m" (c->mmxDcOffset), "m" (c->mmxDcThreshold) | 123 : "r" (src), "r" (stride), "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP]) |
| 123 : "%eax" | 124 : "%eax" |
| 124 ); | 125 ); |
| 125 numEq= (-numEq) &0xFF; | 126 numEq= (-numEq) &0xFF; |
| 126 return numEq > c->ppMode.flatnessThreshold; | 127 return numEq > c->ppMode.flatnessThreshold; |
| 127 } | 128 } |
| 148 : "=r" (isOk) | 149 : "=r" (isOk) |
| 149 : "r" (src), "r" (stride), "m" (c->pQPb) | 150 : "r" (src), "r" (stride), "m" (c->pQPb) |
| 150 ); | 151 ); |
| 151 return isOk==0; | 152 return isOk==0; |
| 152 #else | 153 #else |
| 154 #if 1 | |
| 153 int x; | 155 int x; |
| 154 const int QP= c->QP; | 156 const int QP= c->QP; |
| 155 src+= stride*3; | 157 src+= stride*3; |
| 156 for(x=0; x<BLOCK_SIZE; x++) | 158 for(x=0; x<BLOCK_SIZE; x++) |
| 157 { | 159 { |
| 158 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0; | 160 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0; |
| 159 } | 161 } |
| 160 | 162 |
| 161 return 1; | 163 return 1; |
| 164 #else | |
| 165 int x; | |
| 166 const int QP= c->QP; | |
| 167 src+= stride*4; | |
| 168 for(x=0; x<BLOCK_SIZE; x++) | |
| 169 { | |
| 170 int min=255; | |
| 171 int max=0; | |
| 172 int y; | |
| 173 for(y=0; y<8; y++){ | |
| 174 int v= src[x + y*stride]; | |
| 175 if(v>max) max=v; | |
| 176 if(v<min) min=v; | |
| 177 } | |
| 178 if(max-min > 2*QP) return 0; | |
| 179 } | |
| 180 return 1; | |
| 181 #endif | |
| 162 #endif | 182 #endif |
| 163 } | 183 } |
| 164 | 184 |
| 165 /** | 185 /** |
| 166 * Do a vertical low pass filter on the 8x16 block (only write to the 8x8 block in the middle) | 186 * Do a vertical low pass filter on the 8x16 block (only write to the 8x8 block in the middle) |
| 2637 const int mode= isColor ? c.ppMode.chromMode : c.ppMode.lumMode; | 2657 const int mode= isColor ? c.ppMode.chromMode : c.ppMode.lumMode; |
| 2638 #endif | 2658 #endif |
| 2639 int black=0, white=255; // blackest black and whitest white in the picture | 2659 int black=0, white=255; // blackest black and whitest white in the picture |
| 2640 int QPCorrecture= 256*256; | 2660 int QPCorrecture= 256*256; |
| 2641 | 2661 |
| 2642 int copyAhead; | 2662 int copyAhead, i; |
| 2643 | 2663 |
| 2644 //FIXME remove | 2664 //FIXME remove |
| 2645 uint64_t * const yHistogram= c.yHistogram; | 2665 uint64_t * const yHistogram= c.yHistogram; |
| 2646 uint8_t * const tempSrc= c.tempSrc; | 2666 uint8_t * const tempSrc= c.tempSrc; |
| 2647 uint8_t * const tempDst= c.tempDst; | 2667 uint8_t * const tempDst= c.tempDst; |
| 2648 | 2668 const int mbWidth= isColor ? (width+7)>>3 : (width+15)>>4; |
| 2649 c.dcOffset= c.ppMode.maxDcDiff; | |
| 2650 c.dcThreshold= c.ppMode.maxDcDiff*2 + 1; | |
| 2651 | 2669 |
| 2652 #ifdef HAVE_MMX | 2670 #ifdef HAVE_MMX |
| 2653 c.mmxDcOffset= 0x7F - c.dcOffset; | 2671 for(i=0; i<32; i++){ |
| 2654 c.mmxDcThreshold= 0x7F - c.dcThreshold; | 2672 int offset= ((i*c.ppMode.baseDcDiff)>>8) + 1; |
| 2655 | 2673 int threshold= offset*2 + 1; |
| 2656 c.mmxDcOffset*= 0x0101010101010101LL; | 2674 c.mmxDcOffset[i]= 0x7F - offset; |
| 2657 c.mmxDcThreshold*= 0x0101010101010101LL; | 2675 c.mmxDcThreshold[i]= 0x7F - threshold; |
| 2676 c.mmxDcOffset[i]*= 0x0101010101010101LL; | |
| 2677 c.mmxDcThreshold[i]*= 0x0101010101010101LL; | |
| 2678 } | |
| 2658 #endif | 2679 #endif |
| 2659 | 2680 |
| 2660 if(mode & CUBIC_IPOL_DEINT_FILTER) copyAhead=16; | 2681 if(mode & CUBIC_IPOL_DEINT_FILTER) copyAhead=16; |
| 2661 else if( (mode & LINEAR_BLEND_DEINT_FILTER) | 2682 else if( (mode & LINEAR_BLEND_DEINT_FILTER) |
| 2662 || (mode & FFMPEG_DEINT_FILTER)) copyAhead=14; | 2683 || (mode & FFMPEG_DEINT_FILTER)) copyAhead=14; |
| 2812 uint8_t *dstBlock= &(dst[y*dstStride]); | 2833 uint8_t *dstBlock= &(dst[y*dstStride]); |
| 2813 #ifdef HAVE_MMX | 2834 #ifdef HAVE_MMX |
| 2814 uint8_t *tempBlock1= c.tempBlocks; | 2835 uint8_t *tempBlock1= c.tempBlocks; |
| 2815 uint8_t *tempBlock2= c.tempBlocks + 8; | 2836 uint8_t *tempBlock2= c.tempBlocks + 8; |
| 2816 #endif | 2837 #endif |
| 2817 #ifdef ARCH_X86 | |
| 2818 int *QPptr= isColor ? &QPs[(y>>3)*QPStride] :&QPs[(y>>4)*QPStride]; | 2838 int *QPptr= isColor ? &QPs[(y>>3)*QPStride] :&QPs[(y>>4)*QPStride]; |
| 2819 int QPDelta= isColor ? (-1) : 1<<31; | 2839 int *nonBQPptr= isColor ? &c.nonBQPTable[(y>>3)*mbWidth] :&c.nonBQPTable[(y>>4)*mbWidth]; |
| 2820 int QPFrac= 1<<30; | |
| 2821 #endif | |
| 2822 int QP=0; | 2840 int QP=0; |
| 2823 /* can we mess with a 8x16 block from srcBlock/dstBlock downwards and 1 line upwards | 2841 /* can we mess with a 8x16 block from srcBlock/dstBlock downwards and 1 line upwards |
| 2824 if not than use a temporary buffer */ | 2842 if not than use a temporary buffer */ |
| 2825 if(y+15 >= height) | 2843 if(y+15 >= height) |
| 2826 { | 2844 { |
| 2853 { | 2871 { |
| 2854 const int stride= dstStride; | 2872 const int stride= dstStride; |
| 2855 #ifdef HAVE_MMX | 2873 #ifdef HAVE_MMX |
| 2856 uint8_t *tmpXchg; | 2874 uint8_t *tmpXchg; |
| 2857 #endif | 2875 #endif |
| 2858 #ifdef ARCH_X86 | 2876 if(isColor) |
| 2859 QP= *QPptr; | |
| 2860 asm volatile( | |
| 2861 "addl %2, %1 \n\t" | |
| 2862 "sbbl %%eax, %%eax \n\t" | |
| 2863 "shll $2, %%eax \n\t" | |
| 2864 "subl %%eax, %0 \n\t" | |
| 2865 : "+r" (QPptr), "+m" (QPFrac) | |
| 2866 : "r" (QPDelta) | |
| 2867 : "%eax" | |
| 2868 ); | |
| 2869 #else | |
| 2870 QP= isColor ? | |
| 2871 QPs[(y>>3)*QPStride + (x>>3)]: | |
| 2872 QPs[(y>>4)*QPStride + (x>>4)]; | |
| 2873 #endif | |
| 2874 if(!isColor) | |
| 2875 { | 2877 { |
| 2878 QP= QPptr[x>>3]; | |
| 2879 c.nonBQP= nonBQPptr[x>>3]; | |
| 2880 } | |
| 2881 else | |
| 2882 { | |
| 2883 QP= QPptr[x>>4]; | |
| 2876 QP= (QP* QPCorrecture + 256*128)>>16; | 2884 QP= (QP* QPCorrecture + 256*128)>>16; |
| 2885 c.nonBQP= nonBQPptr[x>>4]; | |
| 2886 c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16; | |
| 2877 yHistogram[ srcBlock[srcStride*12 + 4] ]++; | 2887 yHistogram[ srcBlock[srcStride*12 + 4] ]++; |
| 2878 } | 2888 } |
| 2879 //printf("%d ", QP); | |
| 2880 c.QP= QP; | 2889 c.QP= QP; |
| 2881 #ifdef HAVE_MMX | 2890 #ifdef HAVE_MMX |
| 2882 asm volatile( | 2891 asm volatile( |
| 2883 "movd %1, %%mm7 \n\t" | 2892 "movd %1, %%mm7 \n\t" |
| 2884 "packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP | 2893 "packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP |
