Mercurial > libavcodec.hg
comparison libpostproc/postprocess_template.c @ 158:d1a4f4ca7178 libavcodec
temp denoiser:
bugfix?
averageing noise over the surrounding blocks
| author | michael |
|---|---|
| date | Wed, 14 Nov 2001 11:51:36 +0000 |
| parents | bc12fd7e6153 |
| children | 32e7f17a04a7 |
comparison
equal
deleted
inserted
replaced
| 157:bc12fd7e6153 | 158:d1a4f4ca7178 |
|---|---|
| 151 static uint64_t __attribute__((aligned(8))) temp4=0; | 151 static uint64_t __attribute__((aligned(8))) temp4=0; |
| 152 static uint64_t __attribute__((aligned(8))) temp5=0; | 152 static uint64_t __attribute__((aligned(8))) temp5=0; |
| 153 static uint64_t __attribute__((aligned(8))) pQPb=0; | 153 static uint64_t __attribute__((aligned(8))) pQPb=0; |
| 154 static uint64_t __attribute__((aligned(8))) pQPb2=0; | 154 static uint64_t __attribute__((aligned(8))) pQPb2=0; |
| 155 static uint8_t __attribute__((aligned(8))) tempBlocks[8*16*2]; //used for the horizontal code | 155 static uint8_t __attribute__((aligned(8))) tempBlocks[8*16*2]; //used for the horizontal code |
| 156 static uint32_t __attribute__((aligned(4))) maxTmpNoise[4]; | |
| 156 #else | 157 #else |
| 157 static uint64_t packedYOffset= 0x0000000000000000LL; | 158 static uint64_t packedYOffset= 0x0000000000000000LL; |
| 158 static uint64_t packedYScale= 0x0100010001000100LL; | 159 static uint64_t packedYScale= 0x0100010001000100LL; |
| 159 static uint8_t tempBlocks[8*16*2]; //used for the horizontal code | 160 static uint8_t tempBlocks[8*16*2]; //used for the horizontal code |
| 160 #endif | 161 #endif |
| 2594 } | 2595 } |
| 2595 #endif | 2596 #endif |
| 2596 //static int test=0; | 2597 //static int test=0; |
| 2597 | 2598 |
| 2598 static void inline tempNoiseReducer(uint8_t *src, int stride, | 2599 static void inline tempNoiseReducer(uint8_t *src, int stride, |
| 2599 uint8_t *tempBlured, int *maxNoise) | 2600 uint8_t *tempBlured, uint32_t *tempBluredPast, int *maxNoise) |
| 2600 { | 2601 { |
| 2601 #define FAST_L2_DIFF | 2602 #define FAST_L2_DIFF |
| 2602 //#define L1_DIFF //u should change the thresholds too if u try that one | 2603 //#define L1_DIFF //u should change the thresholds too if u try that one |
| 2603 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 2604 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 2604 asm volatile( | 2605 asm volatile( |
| 2692 | 2693 |
| 2693 "movq %%mm0, %%mm4 \n\t" | 2694 "movq %%mm0, %%mm4 \n\t" |
| 2694 "psrlq $32, %%mm0 \n\t" | 2695 "psrlq $32, %%mm0 \n\t" |
| 2695 "paddd %%mm0, %%mm4 \n\t" | 2696 "paddd %%mm0, %%mm4 \n\t" |
| 2696 "movd %%mm4, %%ecx \n\t" | 2697 "movd %%mm4, %%ecx \n\t" |
| 2698 "shll $2, %%ecx \n\t" | |
| 2699 "movl %3, %%ebx \n\t" | |
| 2700 "addl -4(%%ebx), %%ecx \n\t" | |
| 2701 "addl 4(%%ebx), %%ecx \n\t" | |
| 2702 "addl -1024(%%ebx), %%ecx \n\t" | |
| 2703 "addl $4, %%ecx \n\t" | |
| 2704 "addl 1024(%%ebx), %%ecx \n\t" | |
| 2705 "shrl $3, %%ecx \n\t" | |
| 2706 "movl %%ecx, (%%ebx) \n\t" | |
| 2707 "leal (%%eax, %2, 2), %%ebx \n\t" // 5*stride | |
| 2708 | |
| 2697 // "movl %3, %%ecx \n\t" | 2709 // "movl %3, %%ecx \n\t" |
| 2698 // "movl %%ecx, test \n\t" | 2710 // "movl %%ecx, test \n\t" |
| 2699 // "jmp 4f \n\t" | 2711 // "jmp 4f \n\t" |
| 2700 "cmpl %4, %%ecx \n\t" | 2712 "cmpl 4+maxTmpNoise, %%ecx \n\t" |
| 2701 " jb 2f \n\t" | 2713 " jb 2f \n\t" |
| 2702 "cmpl %5, %%ecx \n\t" | 2714 "cmpl 8+maxTmpNoise, %%ecx \n\t" |
| 2703 " jb 1f \n\t" | 2715 " jb 1f \n\t" |
| 2704 | 2716 |
| 2705 "leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride | 2717 "leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride |
| 2706 "movq (%0), %%mm0 \n\t" // L0 | 2718 "movq (%0), %%mm0 \n\t" // L0 |
| 2707 "movq (%0, %2), %%mm1 \n\t" // L1 | 2719 "movq (%0, %2), %%mm1 \n\t" // L1 |
| 2756 "movq %%mm6, (%0, %%eax, 2) \n\t" // L6 | 2768 "movq %%mm6, (%0, %%eax, 2) \n\t" // L6 |
| 2757 "movq %%mm7, (%0, %%ecx) \n\t" // L7 | 2769 "movq %%mm7, (%0, %%ecx) \n\t" // L7 |
| 2758 "jmp 4f \n\t" | 2770 "jmp 4f \n\t" |
| 2759 | 2771 |
| 2760 "2: \n\t" | 2772 "2: \n\t" |
| 2761 "cmpl %3, %%ecx \n\t" | 2773 "cmpl maxTmpNoise, %%ecx \n\t" |
| 2762 " jb 3f \n\t" | 2774 " jb 3f \n\t" |
| 2763 | 2775 |
| 2764 "leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride | 2776 "leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride |
| 2765 "movq (%0), %%mm0 \n\t" // L0 | 2777 "movq (%0), %%mm0 \n\t" // L0 |
| 2766 "movq (%0, %2), %%mm1 \n\t" // L1 | 2778 "movq (%0, %2), %%mm1 \n\t" // L1 |
| 2873 "movq %%mm2, (%0, %%eax, 2) \n\t" // L6 | 2885 "movq %%mm2, (%0, %%eax, 2) \n\t" // L6 |
| 2874 "movq %%mm3, (%0, %%ecx) \n\t" // L7 | 2886 "movq %%mm3, (%0, %%ecx) \n\t" // L7 |
| 2875 | 2887 |
| 2876 "4: \n\t" | 2888 "4: \n\t" |
| 2877 | 2889 |
| 2878 :: "r" (src), "r" (tempBlured), "r"(stride), | 2890 :: "r" (src), "r" (tempBlured), "r"(stride), "m" (tempBluredPast) |
| 2879 "m"(maxNoise[0]), "m"(maxNoise[1]), "m"(maxNoise[2]) | |
| 2880 : "%eax", "%ebx", "%ecx", "memory" | 2891 : "%eax", "%ebx", "%ecx", "memory" |
| 2881 ); | 2892 ); |
| 2882 //printf("%d\n", test); | 2893 //printf("%d\n", test); |
| 2883 #else | 2894 #else |
| 2884 int y; | 2895 int y; |
| 2885 int d=0; | 2896 int d=0; |
| 2886 int sysd=0; | 2897 int sysd=0; |
| 2898 int i; | |
| 2887 | 2899 |
| 2888 for(y=0; y<8; y++) | 2900 for(y=0; y<8; y++) |
| 2889 { | 2901 { |
| 2890 int x; | 2902 int x; |
| 2891 for(x=0; x<8; x++) | 2903 for(x=0; x<8; x++) |
| 2898 // d+= ABS(d1); | 2910 // d+= ABS(d1); |
| 2899 d+= d1*d1; | 2911 d+= d1*d1; |
| 2900 sysd+= d1; | 2912 sysd+= d1; |
| 2901 } | 2913 } |
| 2902 } | 2914 } |
| 2915 i=d; | |
| 2916 d= ( | |
| 2917 4*d | |
| 2918 +(*(tempBluredPast-256)) | |
| 2919 +(*(tempBluredPast-1))+ (*(tempBluredPast+1)) | |
| 2920 +(*(tempBluredPast+256)) | |
| 2921 +4)>>3; | |
| 2922 *tempBluredPast=i; | |
| 2923 // ((*tempBluredPast)*3 + d + 2)>>2; | |
| 2924 | |
| 2903 //printf("%d %d %d\n", maxNoise[0], maxNoise[1], maxNoise[2]); | 2925 //printf("%d %d %d\n", maxNoise[0], maxNoise[1], maxNoise[2]); |
| 2904 /* | 2926 /* |
| 2905 Switch between | 2927 Switch between |
| 2906 1 0 0 0 0 0 0 (0) | 2928 1 0 0 0 0 0 0 (0) |
| 2907 64 32 16 8 4 2 1 (1) | 2929 64 32 16 8 4 2 1 (1) |
| 3460 static uint8_t *tempDstBlock= NULL; | 3482 static uint8_t *tempDstBlock= NULL; |
| 3461 static uint8_t *tempSrcBlock= NULL; | 3483 static uint8_t *tempSrcBlock= NULL; |
| 3462 | 3484 |
| 3463 /* Temporal noise reducing buffers */ | 3485 /* Temporal noise reducing buffers */ |
| 3464 static uint8_t *tempBlured[3]= {NULL,NULL,NULL}; | 3486 static uint8_t *tempBlured[3]= {NULL,NULL,NULL}; |
| 3487 static uint32_t *tempBluredPast[3]= {NULL,NULL,NULL}; | |
| 3465 | 3488 |
| 3466 #ifdef PP_FUNNY_STRIDE | 3489 #ifdef PP_FUNNY_STRIDE |
| 3467 uint8_t *dstBlockPtrBackup; | 3490 uint8_t *dstBlockPtrBackup; |
| 3468 uint8_t *srcBlockPtrBackup; | 3491 uint8_t *srcBlockPtrBackup; |
| 3469 #endif | 3492 #endif |
| 3474 #ifdef TIMING | 3497 #ifdef TIMING |
| 3475 long long memcpyTime=0, vertTime=0, horizTime=0, sumTime; | 3498 long long memcpyTime=0, vertTime=0, horizTime=0, sumTime; |
| 3476 sumTime= rdtsc(); | 3499 sumTime= rdtsc(); |
| 3477 #endif | 3500 #endif |
| 3478 //mode= 0x7F; | 3501 //mode= 0x7F; |
| 3502 #ifdef HAVE_MMX | |
| 3503 maxTmpNoise[0]= ppMode->maxTmpNoise[0]; | |
| 3504 maxTmpNoise[1]= ppMode->maxTmpNoise[1]; | |
| 3505 maxTmpNoise[2]= ppMode->maxTmpNoise[2]; | |
| 3506 #endif | |
| 3479 | 3507 |
| 3480 if(tempDst==NULL) | 3508 if(tempDst==NULL) |
| 3481 { | 3509 { |
| 3482 tempDst= (uint8_t*)memalign(8, 1024*24); | 3510 tempDst= (uint8_t*)memalign(8, 1024*24); |
| 3483 tempSrc= (uint8_t*)memalign(8, 1024*24); | 3511 tempSrc= (uint8_t*)memalign(8, 1024*24); |
| 3489 { | 3517 { |
| 3490 // printf("%d %d %d\n", isColor, dstStride, height); | 3518 // printf("%d %d %d\n", isColor, dstStride, height); |
| 3491 //FIXME works only as long as the size doesnt increase | 3519 //FIXME works only as long as the size doesnt increase |
| 3492 //Note:the +17*1024 is just there so i dont have to worry about r/w over te end | 3520 //Note:the +17*1024 is just there so i dont have to worry about r/w over te end |
| 3493 tempBlured[isColor]= (uint8_t*)memalign(8, dstStride*((height+7)&(~7)) + 17*1024); | 3521 tempBlured[isColor]= (uint8_t*)memalign(8, dstStride*((height+7)&(~7)) + 17*1024); |
| 3522 tempBluredPast[isColor]= (uint32_t*)memalign(8, 256*((height+7)&(~7))/2 + 17*1024); | |
| 3494 | 3523 |
| 3495 memset(tempBlured[isColor], 0, dstStride*((height+7)&(~7)) + 17*1024); | 3524 memset(tempBlured[isColor], 0, dstStride*((height+7)&(~7)) + 17*1024); |
| 3525 memset(tempBluredPast[isColor], 0, 256*((height+7)&(~7))/2 + 17*1024); | |
| 3496 } | 3526 } |
| 3497 | 3527 |
| 3498 if(!yHistogram) | 3528 if(!yHistogram) |
| 3499 { | 3529 { |
| 3500 int i; | 3530 int i; |
| 3880 | 3910 |
| 3881 if(mode & TEMP_NOISE_FILTER) | 3911 if(mode & TEMP_NOISE_FILTER) |
| 3882 { | 3912 { |
| 3883 tempNoiseReducer(dstBlock-8, stride, | 3913 tempNoiseReducer(dstBlock-8, stride, |
| 3884 tempBlured[isColor] + y*dstStride + x, | 3914 tempBlured[isColor] + y*dstStride + x, |
| 3915 tempBluredPast[isColor] + (y>>3)*256 + (x>>3), | |
| 3885 ppMode->maxTmpNoise); | 3916 ppMode->maxTmpNoise); |
| 3886 } | 3917 } |
| 3887 } | 3918 } |
| 3888 | 3919 |
| 3889 #ifdef PP_FUNNY_STRIDE | 3920 #ifdef PP_FUNNY_STRIDE |
| 3918 | 3949 |
| 3919 if((mode & TEMP_NOISE_FILTER)) | 3950 if((mode & TEMP_NOISE_FILTER)) |
| 3920 { | 3951 { |
| 3921 tempNoiseReducer(dstBlock-8, dstStride, | 3952 tempNoiseReducer(dstBlock-8, dstStride, |
| 3922 tempBlured[isColor] + y*dstStride + x, | 3953 tempBlured[isColor] + y*dstStride + x, |
| 3954 tempBluredPast[isColor] + (y>>3)*256 + (x>>3), | |
| 3923 ppMode->maxTmpNoise); | 3955 ppMode->maxTmpNoise); |
| 3924 } | 3956 } |
| 3925 | 3957 |
| 3926 /* did we use a tmp buffer for the last lines*/ | 3958 /* did we use a tmp buffer for the last lines*/ |
| 3927 if(y+15 >= height) | 3959 if(y+15 >= height) |
