Mercurial > libavcodec.hg
comparison libpostproc/postprocess.c @ 791:4f61ca80b6c1 libavcodec
better deblocking filter
| author | michael |
|---|---|
| date | Tue, 29 Oct 2002 18:35:15 +0000 |
| parents | 4914252c963a |
| children | 2d1283d511b7 |
comparison
equal
deleted
inserted
replaced
| 790:b9156f8e6747 | 791:4f61ca80b6c1 |
|---|---|
| 57 (the if/else stuff per block is slowing things down) | 57 (the if/else stuff per block is slowing things down) |
| 58 compare the quality & speed of all filters | 58 compare the quality & speed of all filters |
| 59 split this huge file | 59 split this huge file |
| 60 optimize c versions | 60 optimize c versions |
| 61 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks | 61 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks |
| 62 put fastmemcpy back | |
| 63 ... | 62 ... |
| 64 */ | 63 */ |
| 65 | 64 |
| 66 //Changelog: use the CVS log | 65 //Changelog: use the CVS log |
| 67 | 66 |
| 147 uint8_t *tempSrcBlock; | 146 uint8_t *tempSrcBlock; |
| 148 uint8_t *deintTemp; | 147 uint8_t *deintTemp; |
| 149 | 148 |
| 150 uint64_t __attribute__((aligned(8))) pQPb; | 149 uint64_t __attribute__((aligned(8))) pQPb; |
| 151 uint64_t __attribute__((aligned(8))) pQPb2; | 150 uint64_t __attribute__((aligned(8))) pQPb2; |
| 151 | |
| 152 uint64_t __attribute__((aligned(8))) mmxDcOffset[32]; | |
| 153 uint64_t __attribute__((aligned(8))) mmxDcThreshold[32]; | |
| 152 | 154 |
| 153 uint64_t __attribute__((aligned(8))) mmxDcOffset; | 155 QP_STORE_T *nonBQPTable; |
| 154 uint64_t __attribute__((aligned(8))) mmxDcThreshold; | 156 |
| 155 | |
| 156 int QP; | 157 int QP; |
| 157 int dcOffset; | 158 int nonBQP; |
| 158 int dcThreshold; | |
| 159 | 159 |
| 160 int frameNum; | 160 int frameNum; |
| 161 | 161 |
| 162 PPMode ppMode; | 162 PPMode ppMode; |
| 163 } PPContext; | 163 } PPContext; |
| 245 */ | 245 */ |
| 246 static inline int isHorizDC(uint8_t src[], int stride, PPContext *c) | 246 static inline int isHorizDC(uint8_t src[], int stride, PPContext *c) |
| 247 { | 247 { |
| 248 int numEq= 0; | 248 int numEq= 0; |
| 249 int y; | 249 int y; |
| 250 const int dcOffset= c->dcOffset; | 250 const int dcOffset= ((c->QP*c->ppMode.baseDcDiff)>>8) + 1; |
| 251 const int dcThreshold= c->dcThreshold; | 251 const int dcThreshold= dcOffset*2 + 1; |
| 252 for(y=0; y<BLOCK_SIZE; y++) | 252 for(y=0; y<BLOCK_SIZE; y++) |
| 253 { | 253 { |
| 254 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++; | 254 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++; |
| 255 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++; | 255 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++; |
| 256 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++; | 256 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++; |
| 267 * Check if the middle 8x8 Block in the given 8x16 block is flat | 267 * Check if the middle 8x8 Block in the given 8x16 block is flat |
| 268 */ | 268 */ |
| 269 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){ | 269 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){ |
| 270 int numEq= 0; | 270 int numEq= 0; |
| 271 int y; | 271 int y; |
| 272 const int dcOffset= c->dcOffset; | 272 const int dcOffset= ((c->QP*c->ppMode.baseDcDiff)>>8) + 1; |
| 273 const int dcThreshold= c->dcThreshold; | 273 const int dcThreshold= dcOffset*2 + 1; |
| 274 src+= stride*4; // src points to begin of the 8x8 Block | 274 src+= stride*4; // src points to begin of the 8x8 Block |
| 275 for(y=0; y<BLOCK_SIZE-1; y++) | 275 for(y=0; y<BLOCK_SIZE-1; y++) |
| 276 { | 276 { |
| 277 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++; | 277 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++; |
| 278 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++; | 278 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++; |
| 723 } | 723 } |
| 724 } | 724 } |
| 725 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK) | 725 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK) |
| 726 { | 726 { |
| 727 int o; | 727 int o; |
| 728 ppMode.maxDcDiff=1; | 728 ppMode.baseDcDiff=256/4; |
| 729 // hFlatnessThreshold= 40; | 729 // hFlatnessThreshold= 40; |
| 730 // vFlatnessThreshold= 40; | 730 // vFlatnessThreshold= 40; |
| 731 | 731 |
| 732 for(o=0; options[o]!=NULL && o<2; o++) | 732 for(o=0; options[o]!=NULL && o<2; o++) |
| 733 { | 733 { |
| 734 char *tail; | 734 char *tail; |
| 735 int val= strtol(options[o], &tail, 0); | 735 int val= strtol(options[o], &tail, 0); |
| 736 if(tail==options[o]) break; | 736 if(tail==options[o]) break; |
| 737 | 737 |
| 738 numOfUnknownOptions--; | 738 numOfUnknownOptions--; |
| 739 if(o==0) ppMode.maxDcDiff= val; | 739 if(o==0) ppMode.baseDcDiff= val; |
| 740 else ppMode.flatnessThreshold= val; | 740 else ppMode.flatnessThreshold= val; |
| 741 } | 741 } |
| 742 } | 742 } |
| 743 else if(filters[i].mask == FORCE_QUANT) | 743 else if(filters[i].mask == FORCE_QUANT) |
| 744 { | 744 { |
| 766 } | 766 } |
| 767 | 767 |
| 768 void *getPPContext(int width, int height){ | 768 void *getPPContext(int width, int height){ |
| 769 PPContext *c= memalign(32, sizeof(PPContext)); | 769 PPContext *c= memalign(32, sizeof(PPContext)); |
| 770 int i; | 770 int i; |
| 771 int mbWidth = (width+15)>>4; | |
| 772 int mbHeight= (height+15)>>4; | |
| 771 | 773 |
| 772 c->tempBlocks= (uint8_t*)memalign(8, 2*16*8); | 774 c->tempBlocks= (uint8_t*)memalign(8, 2*16*8); |
| 773 c->yHistogram= (uint64_t*)memalign(8, 256*sizeof(uint64_t)); | 775 c->yHistogram= (uint64_t*)memalign(8, 256*sizeof(uint64_t)); |
| 774 for(i=0; i<256; i++) | 776 for(i=0; i<256; i++) |
| 775 c->yHistogram[i]= width*height/64*15/256; | 777 c->yHistogram[i]= width*height/64*15/256; |
| 787 c->tempDst= (uint8_t*)memalign(8, 1024*24); | 789 c->tempDst= (uint8_t*)memalign(8, 1024*24); |
| 788 c->tempSrc= (uint8_t*)memalign(8, 1024*24); | 790 c->tempSrc= (uint8_t*)memalign(8, 1024*24); |
| 789 c->tempDstBlock= (uint8_t*)memalign(8, 1024*24); | 791 c->tempDstBlock= (uint8_t*)memalign(8, 1024*24); |
| 790 c->tempSrcBlock= (uint8_t*)memalign(8, 1024*24); | 792 c->tempSrcBlock= (uint8_t*)memalign(8, 1024*24); |
| 791 c->deintTemp= (uint8_t*)memalign(8, width+16); | 793 c->deintTemp= (uint8_t*)memalign(8, width+16); |
| 794 c->nonBQPTable= (QP_STORE_T*)memalign(8, mbWidth*mbHeight*sizeof(QP_STORE_T)); | |
| 795 memset(c->nonBQPTable, 0, mbWidth*mbHeight*sizeof(QP_STORE_T)); | |
| 792 | 796 |
| 793 c->frameNum=-1; | 797 c->frameNum=-1; |
| 794 | 798 |
| 795 return c; | 799 return c; |
| 796 } | 800 } |
| 807 free(c->tempDst); | 811 free(c->tempDst); |
| 808 free(c->tempSrc); | 812 free(c->tempSrc); |
| 809 free(c->tempDstBlock); | 813 free(c->tempDstBlock); |
| 810 free(c->tempSrcBlock); | 814 free(c->tempSrcBlock); |
| 811 free(c->deintTemp); | 815 free(c->deintTemp); |
| 816 free(c->nonBQPTable); | |
| 812 | 817 |
| 813 free(c); | 818 free(c); |
| 814 } | 819 } |
| 815 | 820 |
| 816 //FIXME move this shit away from here | 821 //FIXME move this shit away from here |
| 839 } | 844 } |
| 840 | 845 |
| 841 | 846 |
| 842 void postprocess(uint8_t * src[3], int srcStride[3], | 847 void postprocess(uint8_t * src[3], int srcStride[3], |
| 843 uint8_t * dst[3], int dstStride[3], | 848 uint8_t * dst[3], int dstStride[3], |
| 844 int horizontalSize, int verticalSize, | 849 int width, int height, |
| 845 QP_STORE_T *QP_store, int QPStride, | 850 QP_STORE_T *QP_store, int QPStride, |
| 846 PPMode *mode, void *c) | 851 PPMode *mode, void *vc, int pict_type) |
| 847 { | 852 { |
| 848 | 853 int mbWidth = (width+15)>>4; |
| 854 int mbHeight= (height+15)>>4; | |
| 849 QP_STORE_T quantArray[2048/8]; | 855 QP_STORE_T quantArray[2048/8]; |
| 856 PPContext *c = (PPContext*)vc; | |
| 850 | 857 |
| 851 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)) | 858 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)) |
| 852 { | 859 { |
| 853 int i; | 860 int i; |
| 854 QP_store= quantArray; | 861 QP_store= quantArray; |
| 856 if(mode->lumMode & FORCE_QUANT) | 863 if(mode->lumMode & FORCE_QUANT) |
| 857 for(i=0; i<2048/8; i++) quantArray[i]= mode->forcedQuant; | 864 for(i=0; i<2048/8; i++) quantArray[i]= mode->forcedQuant; |
| 858 else | 865 else |
| 859 for(i=0; i<2048/8; i++) quantArray[i]= 1; | 866 for(i=0; i<2048/8; i++) quantArray[i]= 1; |
| 860 } | 867 } |
| 868 if(0){ | |
| 869 int x,y; | |
| 870 for(y=0; y<mbHeight; y++){ | |
| 871 for(x=0; x<mbWidth; x++){ | |
| 872 printf("%2d ", QP_store[x + y*QPStride]); | |
| 873 } | |
| 874 printf("\n"); | |
| 875 } | |
| 876 printf("\n"); | |
| 877 } | |
| 878 //printf("pict_type:%d\n", pict_type); | |
| 879 if(pict_type!=3) | |
| 880 { | |
| 881 int x,y; | |
| 882 for(y=0; y<mbHeight; y++){ | |
| 883 for(x=0; x<mbWidth; x++){ | |
| 884 int qscale= QP_store[x + y*QPStride]; | |
| 885 if(qscale&~31) | |
| 886 qscale=31; | |
| 887 c->nonBQPTable[y*mbWidth + x]= qscale; | |
| 888 } | |
| 889 } | |
| 890 } | |
| 861 | 891 |
| 862 if(firstTime2 && verbose) | 892 if(firstTime2 && verbose) |
| 863 { | 893 { |
| 864 printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode); | 894 printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode); |
| 865 firstTime2=0; | 895 firstTime2=0; |
| 866 } | 896 } |
| 867 | 897 |
| 868 postProcess(src[0], srcStride[0], dst[0], dstStride[0], | 898 postProcess(src[0], srcStride[0], dst[0], dstStride[0], |
| 869 horizontalSize, verticalSize, QP_store, QPStride, 0, mode, c); | 899 width, height, QP_store, QPStride, 0, mode, c); |
| 870 | 900 |
| 871 horizontalSize = (horizontalSize+1)>> 1; | 901 width = (width +1)>>1; |
| 872 verticalSize = (verticalSize+1)>>1; | 902 height = (height+1)>>1; |
| 873 | 903 |
| 874 if(mode->chromMode) | 904 if(mode->chromMode) |
| 875 { | 905 { |
| 876 postProcess(src[1], srcStride[1], dst[1], dstStride[1], | 906 postProcess(src[1], srcStride[1], dst[1], dstStride[1], |
| 877 horizontalSize, verticalSize, QP_store, QPStride, 1, mode, c); | 907 width, height, QP_store, QPStride, 1, mode, c); |
| 878 postProcess(src[2], srcStride[2], dst[2], dstStride[2], | 908 postProcess(src[2], srcStride[2], dst[2], dstStride[2], |
| 879 horizontalSize, verticalSize, QP_store, QPStride, 2, mode, c); | 909 width, height, QP_store, QPStride, 2, mode, c); |
| 880 } | 910 } |
| 881 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]) | 911 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]) |
| 882 { | 912 { |
| 883 memcpy(dst[1], src[1], srcStride[1]*verticalSize); | 913 memcpy(dst[1], src[1], srcStride[1]*height); |
| 884 memcpy(dst[2], src[2], srcStride[2]*verticalSize); | 914 memcpy(dst[2], src[2], srcStride[2]*height); |
| 885 } | 915 } |
| 886 else | 916 else |
| 887 { | 917 { |
| 888 int y; | 918 int y; |
| 889 for(y=0; y<verticalSize; y++) | 919 for(y=0; y<height; y++) |
| 890 { | 920 { |
| 891 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), horizontalSize); | 921 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width); |
| 892 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), horizontalSize); | 922 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width); |
| 893 } | 923 } |
| 894 } | 924 } |
| 895 } | 925 } |
| 896 | 926 |
