Mercurial > libavcodec.hg
comparison libpostproc/postprocess_template.c @ 169:20bcd5b70886 libavcodec
runtime cpu detection
| author | michael |
|---|---|
| date | Sat, 24 Nov 2001 22:16:29 +0000 |
| parents | 712c7a115164 |
| children | a0efaf471d6b |
comparison
equal
deleted
inserted
replaced
| 168:712c7a115164 | 169:20bcd5b70886 |
|---|---|
| 14 You should have received a copy of the GNU General Public License | 14 You should have received a copy of the GNU General Public License |
| 15 along with this program; if not, write to the Free Software | 15 along with this program; if not, write to the Free Software |
| 16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
| 17 */ | 17 */ |
| 18 | 18 |
| 19 /* | 19 #undef PAVGB |
| 20 C MMX MMX2 3DNow | 20 #undef PMINUB |
| 21 isVertDC Ec Ec | 21 #undef PMAXUB |
| 22 isVertMinMaxOk Ec Ec | |
| 23 doVertLowPass E e e | |
| 24 doVertDefFilter Ec Ec e e | |
| 25 isHorizDC Ec Ec | |
| 26 isHorizMinMaxOk a E | |
| 27 doHorizLowPass E e e | |
| 28 doHorizDefFilter Ec Ec e e | |
| 29 deRing E e e* | |
| 30 Vertical RKAlgo1 E a a | |
| 31 Horizontal RKAlgo1 a a | |
| 32 Vertical X1# a E E | |
| 33 Horizontal X1# a E E | |
| 34 LinIpolDeinterlace e E E* | |
| 35 CubicIpolDeinterlace a e e* | |
| 36 LinBlendDeinterlace e E E* | |
| 37 MedianDeinterlace# Ec Ec | |
| 38 TempDeNoiser# E e e | |
| 39 | |
| 40 * i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work | |
| 41 # more or less selfinvented filters so the exactness isnt too meaningfull | |
| 42 E = Exact implementation | |
| 43 e = allmost exact implementation (slightly different rounding,...) | |
| 44 a = alternative / approximate impl | |
| 45 c = checked against the other implementations (-vo md5) | |
| 46 */ | |
| 47 | |
| 48 /* | |
| 49 TODO: | |
| 50 reduce the time wasted on the mem transfer | |
| 51 implement everything in C at least (done at the moment but ...) | |
| 52 unroll stuff if instructions depend too much on the prior one | |
| 53 we use 8x8 blocks for the horizontal filters, opendivx seems to use 8x4? | |
| 54 move YScale thing to the end instead of fixing QP | |
| 55 write a faster and higher quality deblocking filter :) | |
| 56 make the mainloop more flexible (variable number of blocks at once | |
| 57 (the if/else stuff per block is slowing things down) | |
| 58 compare the quality & speed of all filters | |
| 59 split this huge file | |
| 60 border remover | |
| 61 optimize c versions | |
| 62 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks | |
| 63 smart blur | |
| 64 commandline option for the deblock / dering thresholds | |
| 65 ... | |
| 66 */ | |
| 67 | |
| 68 //Changelog: use the CVS log | |
| 69 | |
| 70 #include "../config.h" | |
| 71 #include <inttypes.h> | |
| 72 #include <stdio.h> | |
| 73 #include <stdlib.h> | |
| 74 #include <string.h> | |
| 75 #ifdef HAVE_MALLOC_H | |
| 76 #include <malloc.h> | |
| 77 #endif | |
| 78 //#undef HAVE_MMX2 | |
| 79 //#define HAVE_3DNOW | |
| 80 //#undef HAVE_MMX | |
| 81 //#define DEBUG_BRIGHTNESS | |
| 82 #include "../libvo/fastmemcpy.h" | |
| 83 #include "postprocess.h" | |
| 84 | |
| 85 #define MIN(a,b) ((a) > (b) ? (b) : (a)) | |
| 86 #define MAX(a,b) ((a) < (b) ? (b) : (a)) | |
| 87 #define ABS(a) ((a) > 0 ? (a) : (-(a))) | |
| 88 #define SIGN(a) ((a) > 0 ? 1 : -1) | |
| 89 | 22 |
| 90 #ifdef HAVE_MMX2 | 23 #ifdef HAVE_MMX2 |
| 91 #define PAVGB(a,b) "pavgb " #a ", " #b " \n\t" | 24 #define PAVGB(a,b) "pavgb " #a ", " #b " \n\t" |
| 92 #elif defined (HAVE_3DNOW) | 25 #elif defined (HAVE_3DNOW) |
| 93 #define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t" | 26 #define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t" |
| 109 "psubusb " #a ", " #b " \n\t"\ | 42 "psubusb " #a ", " #b " \n\t"\ |
| 110 "paddb " #a ", " #b " \n\t" | 43 "paddb " #a ", " #b " \n\t" |
| 111 #endif | 44 #endif |
| 112 | 45 |
| 113 | 46 |
| 114 #define GET_MODE_BUFFER_SIZE 500 | |
| 115 #define OPTIONS_ARRAY_SIZE 10 | |
| 116 | |
| 117 #ifdef HAVE_MMX | |
| 118 static volatile uint64_t __attribute__((aligned(8))) packedYOffset= 0x0000000000000000LL; | |
| 119 static volatile uint64_t __attribute__((aligned(8))) packedYScale= 0x0100010001000100LL; | |
| 120 static uint64_t __attribute__((aligned(8))) w05= 0x0005000500050005LL; | |
| 121 static uint64_t __attribute__((aligned(8))) w20= 0x0020002000200020LL; | |
| 122 static uint64_t __attribute__((aligned(8))) w1400= 0x1400140014001400LL; | |
| 123 static uint64_t __attribute__((aligned(8))) bm00000001= 0x00000000000000FFLL; | |
| 124 static uint64_t __attribute__((aligned(8))) bm00010000= 0x000000FF00000000LL; | |
| 125 static uint64_t __attribute__((aligned(8))) bm00001000= 0x00000000FF000000LL; | |
| 126 static uint64_t __attribute__((aligned(8))) bm10000000= 0xFF00000000000000LL; | |
| 127 static uint64_t __attribute__((aligned(8))) bm10000001= 0xFF000000000000FFLL; | |
| 128 static uint64_t __attribute__((aligned(8))) bm11000011= 0xFFFF00000000FFFFLL; | |
| 129 static uint64_t __attribute__((aligned(8))) bm00000011= 0x000000000000FFFFLL; | |
| 130 static uint64_t __attribute__((aligned(8))) bm11111110= 0xFFFFFFFFFFFFFF00LL; | |
| 131 static uint64_t __attribute__((aligned(8))) bm11000000= 0xFFFF000000000000LL; | |
| 132 static uint64_t __attribute__((aligned(8))) bm00011000= 0x000000FFFF000000LL; | |
| 133 static uint64_t __attribute__((aligned(8))) bm00110011= 0x0000FFFF0000FFFFLL; | |
| 134 static uint64_t __attribute__((aligned(8))) bm11001100= 0xFFFF0000FFFF0000LL; | |
| 135 static uint64_t __attribute__((aligned(8))) b00= 0x0000000000000000LL; | |
| 136 static uint64_t __attribute__((aligned(8))) b01= 0x0101010101010101LL; | |
| 137 static uint64_t __attribute__((aligned(8))) b02= 0x0202020202020202LL; | |
| 138 static uint64_t __attribute__((aligned(8))) b0F= 0x0F0F0F0F0F0F0F0FLL; | |
| 139 static uint64_t __attribute__((aligned(8))) b04= 0x0404040404040404LL; | |
| 140 static uint64_t __attribute__((aligned(8))) b08= 0x0808080808080808LL; | |
| 141 static uint64_t __attribute__((aligned(8))) bFF= 0xFFFFFFFFFFFFFFFFLL; | |
| 142 static uint64_t __attribute__((aligned(8))) b20= 0x2020202020202020LL; | |
| 143 static uint64_t __attribute__((aligned(8))) b80= 0x8080808080808080LL; | |
| 144 static uint64_t __attribute__((aligned(8))) b7E= 0x7E7E7E7E7E7E7E7ELL; | |
| 145 static uint64_t __attribute__((aligned(8))) b7C= 0x7C7C7C7C7C7C7C7CLL; | |
| 146 static uint64_t __attribute__((aligned(8))) b3F= 0x3F3F3F3F3F3F3F3FLL; | |
| 147 static uint64_t __attribute__((aligned(8))) temp0=0; | |
| 148 static uint64_t __attribute__((aligned(8))) temp1=0; | |
| 149 static uint64_t __attribute__((aligned(8))) temp2=0; | |
| 150 static uint64_t __attribute__((aligned(8))) temp3=0; | |
| 151 static uint64_t __attribute__((aligned(8))) temp4=0; | |
| 152 static uint64_t __attribute__((aligned(8))) temp5=0; | |
| 153 static uint64_t __attribute__((aligned(8))) pQPb=0; | |
| 154 static uint64_t __attribute__((aligned(8))) pQPb2=0; | |
| 155 static uint8_t __attribute__((aligned(8))) tempBlocks[8*16*2]; //used for the horizontal code | |
| 156 static uint32_t __attribute__((aligned(4))) maxTmpNoise[4]; | |
| 157 #else | |
| 158 static uint64_t packedYOffset= 0x0000000000000000LL; | |
| 159 static uint64_t packedYScale= 0x0100010001000100LL; | |
| 160 static uint8_t tempBlocks[8*16*2]; //used for the horizontal code | |
| 161 #endif | |
| 162 | |
| 163 int hFlatnessThreshold= 56 - 16; | |
| 164 int vFlatnessThreshold= 56 - 16; | |
| 165 int deringThreshold= 20; | |
| 166 | |
| 167 //amount of "black" u r willing to loose to get a brightness corrected picture | |
| 168 double maxClippedThreshold= 0.01; | |
| 169 | |
| 170 int maxAllowedY=234; | |
| 171 int minAllowedY=16; | |
| 172 | |
| 173 static struct PPFilter filters[]= | |
| 174 { | |
| 175 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK}, | |
| 176 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK}, | |
| 177 {"vr", "rkvdeblock", 1, 2, 4, H_RK1_FILTER}, | |
| 178 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER}, | |
| 179 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER}, | |
| 180 {"dr", "dering", 1, 5, 6, DERING}, | |
| 181 {"al", "autolevels", 0, 1, 2, LEVEL_FIX}, | |
| 182 {"lb", "linblenddeint", 0, 1, 6, LINEAR_BLEND_DEINT_FILTER}, | |
| 183 {"li", "linipoldeint", 0, 1, 6, LINEAR_IPOL_DEINT_FILTER}, | |
| 184 {"ci", "cubicipoldeint", 0, 1, 6, CUBIC_IPOL_DEINT_FILTER}, | |
| 185 {"md", "mediandeint", 0, 1, 6, MEDIAN_DEINT_FILTER}, | |
| 186 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER}, | |
| 187 {NULL, NULL,0,0,0,0} //End Marker | |
| 188 }; | |
| 189 | |
| 190 static char *replaceTable[]= | |
| 191 { | |
| 192 "default", "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400", | |
| 193 "de", "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400", | |
| 194 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400", | |
| 195 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400", | |
| 196 NULL //End Marker | |
| 197 }; | |
| 198 | |
| 199 #ifdef HAVE_MMX | |
| 200 static inline void unusedVariableWarningFixer() | |
| 201 { | |
| 202 if( | |
| 203 packedYOffset + packedYScale + w05 + w20 + w1400 + bm00000001 + bm00010000 | |
| 204 + bm00001000 + bm10000000 + bm10000001 + bm11000011 + bm00000011 + bm11111110 | |
| 205 + bm11000000 + bm00011000 + bm00110011 + bm11001100 + b00 + b01 + b02 + b0F | |
| 206 + bFF + b20 + b04+ b08 + pQPb2 + b80 + b7E + b7C + b3F + temp0 + temp1 + temp2 + temp3 + temp4 | |
| 207 + temp5 + pQPb== 0) b00=0; | |
| 208 } | |
| 209 #endif | |
| 210 | |
| 211 #ifdef TIMING | |
| 212 static inline long long rdtsc() | |
| 213 { | |
| 214 long long l; | |
| 215 asm volatile( "rdtsc\n\t" | |
| 216 : "=A" (l) | |
| 217 ); | |
| 218 // printf("%d\n", int(l/1000)); | |
| 219 return l; | |
| 220 } | |
| 221 #endif | |
| 222 | |
| 223 #ifdef HAVE_MMX2 | |
| 224 static inline void prefetchnta(void *p) | |
| 225 { | |
| 226 asm volatile( "prefetchnta (%0)\n\t" | |
| 227 : : "r" (p) | |
| 228 ); | |
| 229 } | |
| 230 | |
| 231 static inline void prefetcht0(void *p) | |
| 232 { | |
| 233 asm volatile( "prefetcht0 (%0)\n\t" | |
| 234 : : "r" (p) | |
| 235 ); | |
| 236 } | |
| 237 | |
| 238 static inline void prefetcht1(void *p) | |
| 239 { | |
| 240 asm volatile( "prefetcht1 (%0)\n\t" | |
| 241 : : "r" (p) | |
| 242 ); | |
| 243 } | |
| 244 | |
| 245 static inline void prefetcht2(void *p) | |
| 246 { | |
| 247 asm volatile( "prefetcht2 (%0)\n\t" | |
| 248 : : "r" (p) | |
| 249 ); | |
| 250 } | |
| 251 #endif | |
| 252 | |
| 253 //FIXME? |255-0| = 1 (shouldnt be a problem ...) | 47 //FIXME? |255-0| = 1 (shouldnt be a problem ...) |
| 254 /** | 48 /** |
| 255 * Check if the middle 8x8 Block in the given 8x16 block is flat | 49 * Check if the middle 8x8 Block in the given 8x16 block is flat |
| 256 */ | 50 */ |
| 257 static inline int isVertDC(uint8_t src[], int stride){ | 51 static inline int RENAME(isVertDC)(uint8_t src[], int stride){ |
| 258 int numEq= 0; | 52 int numEq= 0; |
| 259 #ifndef HAVE_MMX | 53 #ifndef HAVE_MMX |
| 260 int y; | 54 int y; |
| 261 #endif | 55 #endif |
| 262 src+= stride*4; // src points to begin of the 8x8 Block | 56 src+= stride*4; // src points to begin of the 8x8 Block |
| 361 */ | 155 */ |
| 362 // for(int i=0; i<numEq/8; i++) src[i]=255; | 156 // for(int i=0; i<numEq/8; i++) src[i]=255; |
| 363 return (numEq > vFlatnessThreshold) ? 1 : 0; | 157 return (numEq > vFlatnessThreshold) ? 1 : 0; |
| 364 } | 158 } |
| 365 | 159 |
| 366 static inline int isVertMinMaxOk(uint8_t src[], int stride, int QP) | 160 static inline int RENAME(isVertMinMaxOk)(uint8_t src[], int stride, int QP) |
| 367 { | 161 { |
| 368 #ifdef HAVE_MMX | 162 #ifdef HAVE_MMX |
| 369 int isOk; | 163 int isOk; |
| 370 src+= stride*3; | 164 src+= stride*3; |
| 371 asm volatile( | 165 asm volatile( |
| 418 | 212 |
| 419 /** | 213 /** |
| 420 * Do a vertical low pass filter on the 8x16 block (only write to the 8x8 block in the middle) | 214 * Do a vertical low pass filter on the 8x16 block (only write to the 8x8 block in the middle) |
| 421 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 | 215 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 |
| 422 */ | 216 */ |
| 423 static inline void doVertLowPass(uint8_t *src, int stride, int QP) | 217 static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, int QP) |
| 424 { | 218 { |
| 425 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 219 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 426 src+= stride*3; | 220 src+= stride*3; |
| 427 asm volatile( //"movv %0 %1 %2\n\t" | 221 asm volatile( //"movv %0 %1 %2\n\t" |
| 428 "movq pQPb, %%mm0 \n\t" // QP,..., QP | 222 "movq pQPb, %%mm0 \n\t" // QP,..., QP |
| 600 x = 8 | 394 x = 8 |
| 601 x/2 = 4 | 395 x/2 = 4 |
| 602 x/8 = 1 | 396 x/8 = 1 |
| 603 1 12 12 23 | 397 1 12 12 23 |
| 604 */ | 398 */ |
| 605 static inline void vertRK1Filter(uint8_t *src, int stride, int QP) | 399 static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP) |
| 606 { | 400 { |
| 607 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 401 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 608 src+= stride*3; | 402 src+= stride*3; |
| 609 // FIXME rounding | 403 // FIXME rounding |
| 610 asm volatile( | 404 asm volatile( |
| 700 * will not damage linear gradients | 494 * will not damage linear gradients |
| 701 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter | 495 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter |
| 702 * can only smooth blocks at the expected locations (it cant smooth them if they did move) | 496 * can only smooth blocks at the expected locations (it cant smooth them if they did move) |
| 703 * MMX2 version does correct clipping C version doesnt | 497 * MMX2 version does correct clipping C version doesnt |
| 704 */ | 498 */ |
| 705 static inline void vertX1Filter(uint8_t *src, int stride, int QP) | 499 static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, int QP) |
| 706 { | 500 { |
| 707 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 501 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 708 src+= stride*3; | 502 src+= stride*3; |
| 709 | 503 |
| 710 asm volatile( | 504 asm volatile( |
| 856 } | 650 } |
| 857 */ | 651 */ |
| 858 #endif | 652 #endif |
| 859 } | 653 } |
| 860 | 654 |
| 861 /** | 655 static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP) |
| 862 * Experimental Filter 1 (Horizontal) | |
| 863 * will not damage linear gradients | |
| 864 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter | |
| 865 * can only smooth blocks at the expected locations (it cant smooth them if they did move) | |
| 866 * MMX2 version does correct clipping C version doesnt | |
| 867 * not identical with the vertical one | |
| 868 */ | |
| 869 static inline void horizX1Filter(uint8_t *src, int stride, int QP) | |
| 870 { | |
| 871 int y; | |
| 872 //FIXME (has little in common with the mmx2 version) | |
| 873 for(y=0; y<BLOCK_SIZE; y++) | |
| 874 { | |
| 875 int a= src[1] - src[2]; | |
| 876 int b= src[3] - src[4]; | |
| 877 int c= src[5] - src[6]; | |
| 878 | |
| 879 int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0); | |
| 880 | |
| 881 if(d < QP) | |
| 882 { | |
| 883 int v = d * SIGN(-b); | |
| 884 | |
| 885 src[1] +=v/8; | |
| 886 src[2] +=v/4; | |
| 887 src[3] +=3*v/8; | |
| 888 src[4] -=3*v/8; | |
| 889 src[5] -=v/4; | |
| 890 src[6] -=v/8; | |
| 891 | |
| 892 } | |
| 893 src+=stride; | |
| 894 } | |
| 895 } | |
| 896 | |
| 897 | |
| 898 static inline void doVertDefFilter(uint8_t src[], int stride, int QP) | |
| 899 { | 656 { |
| 900 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 657 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 901 /* | 658 /* |
| 902 uint8_t tmp[16]; | 659 uint8_t tmp[16]; |
| 903 const int l1= stride; | 660 const int l1= stride; |
| 1472 src++; | 1229 src++; |
| 1473 } | 1230 } |
| 1474 #endif | 1231 #endif |
| 1475 } | 1232 } |
| 1476 | 1233 |
| 1477 /** | 1234 static inline void RENAME(dering)(uint8_t src[], int stride, int QP) |
| 1478 * Check if the given 8x8 Block is mostly "flat" | |
| 1479 */ | |
| 1480 static inline int isHorizDC(uint8_t src[], int stride) | |
| 1481 { | |
| 1482 int numEq= 0; | |
| 1483 int y; | |
| 1484 for(y=0; y<BLOCK_SIZE; y++) | |
| 1485 { | |
| 1486 if(((src[0] - src[1] + 1) & 0xFFFF) < 3) numEq++; | |
| 1487 if(((src[1] - src[2] + 1) & 0xFFFF) < 3) numEq++; | |
| 1488 if(((src[2] - src[3] + 1) & 0xFFFF) < 3) numEq++; | |
| 1489 if(((src[3] - src[4] + 1) & 0xFFFF) < 3) numEq++; | |
| 1490 if(((src[4] - src[5] + 1) & 0xFFFF) < 3) numEq++; | |
| 1491 if(((src[5] - src[6] + 1) & 0xFFFF) < 3) numEq++; | |
| 1492 if(((src[6] - src[7] + 1) & 0xFFFF) < 3) numEq++; | |
| 1493 src+= stride; | |
| 1494 } | |
| 1495 return numEq > hFlatnessThreshold; | |
| 1496 } | |
| 1497 | |
| 1498 static inline int isHorizMinMaxOk(uint8_t src[], int stride, int QP) | |
| 1499 { | |
| 1500 if(abs(src[0] - src[7]) > 2*QP) return 0; | |
| 1501 | |
| 1502 return 1; | |
| 1503 } | |
| 1504 | |
| 1505 static inline void doHorizDefFilter(uint8_t dst[], int stride, int QP) | |
| 1506 { | |
| 1507 int y; | |
| 1508 for(y=0; y<BLOCK_SIZE; y++) | |
| 1509 { | |
| 1510 const int middleEnergy= 5*(dst[4] - dst[5]) + 2*(dst[2] - dst[5]); | |
| 1511 | |
| 1512 if(ABS(middleEnergy) < 8*QP) | |
| 1513 { | |
| 1514 const int q=(dst[3] - dst[4])/2; | |
| 1515 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]); | |
| 1516 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]); | |
| 1517 | |
| 1518 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) ); | |
| 1519 d= MAX(d, 0); | |
| 1520 | |
| 1521 d= (5*d + 32) >> 6; | |
| 1522 d*= SIGN(-middleEnergy); | |
| 1523 | |
| 1524 if(q>0) | |
| 1525 { | |
| 1526 d= d<0 ? 0 : d; | |
| 1527 d= d>q ? q : d; | |
| 1528 } | |
| 1529 else | |
| 1530 { | |
| 1531 d= d>0 ? 0 : d; | |
| 1532 d= d<q ? q : d; | |
| 1533 } | |
| 1534 | |
| 1535 dst[3]-= d; | |
| 1536 dst[4]+= d; | |
| 1537 } | |
| 1538 dst+= stride; | |
| 1539 } | |
| 1540 } | |
| 1541 | |
| 1542 /** | |
| 1543 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block) | |
| 1544 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version) | |
| 1545 */ | |
| 1546 static inline void doHorizLowPass(uint8_t dst[], int stride, int QP) | |
| 1547 { | |
| 1548 | |
| 1549 int y; | |
| 1550 for(y=0; y<BLOCK_SIZE; y++) | |
| 1551 { | |
| 1552 const int first= ABS(dst[-1] - dst[0]) < QP ? dst[-1] : dst[0]; | |
| 1553 const int last= ABS(dst[8] - dst[7]) < QP ? dst[8] : dst[7]; | |
| 1554 | |
| 1555 int sums[9]; | |
| 1556 sums[0] = first + dst[0]; | |
| 1557 sums[1] = dst[0] + dst[1]; | |
| 1558 sums[2] = dst[1] + dst[2]; | |
| 1559 sums[3] = dst[2] + dst[3]; | |
| 1560 sums[4] = dst[3] + dst[4]; | |
| 1561 sums[5] = dst[4] + dst[5]; | |
| 1562 sums[6] = dst[5] + dst[6]; | |
| 1563 sums[7] = dst[6] + dst[7]; | |
| 1564 sums[8] = dst[7] + last; | |
| 1565 | |
| 1566 dst[0]= ((sums[0]<<2) + ((first + sums[2])<<1) + sums[4] + 8)>>4; | |
| 1567 dst[1]= ((dst[1]<<2) + ((first + sums[0] + sums[3])<<1) + sums[5] + 8)>>4; | |
| 1568 dst[2]= ((dst[2]<<2) + ((first + sums[1] + sums[4])<<1) + sums[6] + 8)>>4; | |
| 1569 dst[3]= ((dst[3]<<2) + ((sums[2] + sums[5])<<1) + sums[0] + sums[7] + 8)>>4; | |
| 1570 dst[4]= ((dst[4]<<2) + ((sums[3] + sums[6])<<1) + sums[1] + sums[8] + 8)>>4; | |
| 1571 dst[5]= ((dst[5]<<2) + ((last + sums[7] + sums[4])<<1) + sums[2] + 8)>>4; | |
| 1572 dst[6]= (((last + dst[6])<<2) + ((dst[7] + sums[5])<<1) + sums[3] + 8)>>4; | |
| 1573 dst[7]= ((sums[8]<<2) + ((last + sums[6])<<1) + sums[4] + 8)>>4; | |
| 1574 | |
| 1575 dst+= stride; | |
| 1576 } | |
| 1577 } | |
| 1578 | |
| 1579 | |
| 1580 static inline void dering(uint8_t src[], int stride, int QP) | |
| 1581 { | 1235 { |
| 1582 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1236 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 1583 asm volatile( | 1237 asm volatile( |
| 1584 "movq pQPb, %%mm0 \n\t" | 1238 "movq pQPb, %%mm0 \n\t" |
| 1585 "paddusb %%mm0, %%mm0 \n\t" | 1239 "paddusb %%mm0, %%mm0 \n\t" |
| 1590 // 0 1 2 3 4 5 6 7 8 9 | 1244 // 0 1 2 3 4 5 6 7 8 9 |
| 1591 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 | 1245 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 |
| 1592 | 1246 |
| 1593 "pcmpeqb %%mm7, %%mm7 \n\t" | 1247 "pcmpeqb %%mm7, %%mm7 \n\t" |
| 1594 "pxor %%mm6, %%mm6 \n\t" | 1248 "pxor %%mm6, %%mm6 \n\t" |
| 1249 #undef FIND_MIN_MAX | |
| 1595 #ifdef HAVE_MMX2 | 1250 #ifdef HAVE_MMX2 |
| 1596 #define FIND_MIN_MAX(addr)\ | 1251 #define FIND_MIN_MAX(addr)\ |
| 1597 "movq " #addr ", %%mm0 \n\t"\ | 1252 "movq " #addr ", %%mm0 \n\t"\ |
| 1598 "pminub %%mm0, %%mm7 \n\t"\ | 1253 "pminub %%mm0, %%mm7 \n\t"\ |
| 1599 "pmaxub %%mm0, %%mm6 \n\t" | 1254 "pmaxub %%mm0, %%mm6 \n\t" |
| 1918 * Deinterlaces the given block | 1573 * Deinterlaces the given block |
| 1919 * will be called for every 8x8 block and can read & write from line 4-15 | 1574 * will be called for every 8x8 block and can read & write from line 4-15 |
| 1920 * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too | 1575 * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too |
| 1921 * lines 4-12 will be read into the deblocking filter and should be deinterlaced | 1576 * lines 4-12 will be read into the deblocking filter and should be deinterlaced |
| 1922 */ | 1577 */ |
| 1923 static inline void deInterlaceInterpolateLinear(uint8_t src[], int stride) | 1578 static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int stride) |
| 1924 { | 1579 { |
| 1925 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1580 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 1926 src+= 4*stride; | 1581 src+= 4*stride; |
| 1927 asm volatile( | 1582 asm volatile( |
| 1928 "leal (%0, %1), %%eax \n\t" | 1583 "leal (%0, %1), %%eax \n\t" |
| 1967 * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too | 1622 * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too |
| 1968 * lines 4-12 will be read into the deblocking filter and should be deinterlaced | 1623 * lines 4-12 will be read into the deblocking filter and should be deinterlaced |
| 1969 * this filter will read lines 3-15 and write 7-13 | 1624 * this filter will read lines 3-15 and write 7-13 |
| 1970 * no cliping in C version | 1625 * no cliping in C version |
| 1971 */ | 1626 */ |
| 1972 static inline void deInterlaceInterpolateCubic(uint8_t src[], int stride) | 1627 static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride) |
| 1973 { | 1628 { |
| 1974 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1629 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 1975 src+= stride*3; | 1630 src+= stride*3; |
| 1976 asm volatile( | 1631 asm volatile( |
| 1977 "leal (%0, %1), %%eax \n\t" | 1632 "leal (%0, %1), %%eax \n\t" |
| 2032 * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too | 1687 * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too |
| 2033 * lines 4-12 will be read into the deblocking filter and should be deinterlaced | 1688 * lines 4-12 will be read into the deblocking filter and should be deinterlaced |
| 2034 * will shift the image up by 1 line (FIXME if this is a problem) | 1689 * will shift the image up by 1 line (FIXME if this is a problem) |
| 2035 * this filter will read lines 4-13 and write 4-11 | 1690 * this filter will read lines 4-13 and write 4-11 |
| 2036 */ | 1691 */ |
| 2037 static inline void deInterlaceBlendLinear(uint8_t src[], int stride) | 1692 static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride) |
| 2038 { | 1693 { |
| 2039 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1694 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 2040 src+= 4*stride; | 1695 src+= 4*stride; |
| 2041 asm volatile( | 1696 asm volatile( |
| 2042 "leal (%0, %1), %%eax \n\t" | 1697 "leal (%0, %1), %%eax \n\t" |
| 2105 * Deinterlaces the given block | 1760 * Deinterlaces the given block |
| 2106 * will be called for every 8x8 block and can read & write from line 4-15, | 1761 * will be called for every 8x8 block and can read & write from line 4-15, |
| 2107 * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too | 1762 * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too |
| 2108 * lines 4-12 will be read into the deblocking filter and should be deinterlaced | 1763 * lines 4-12 will be read into the deblocking filter and should be deinterlaced |
| 2109 */ | 1764 */ |
| 2110 static inline void deInterlaceMedian(uint8_t src[], int stride) | 1765 static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride) |
| 2111 { | 1766 { |
| 2112 #ifdef HAVE_MMX | 1767 #ifdef HAVE_MMX |
| 2113 src+= 4*stride; | 1768 src+= 4*stride; |
| 2114 #ifdef HAVE_MMX2 | 1769 #ifdef HAVE_MMX2 |
| 2115 asm volatile( | 1770 asm volatile( |
| 2222 | 1877 |
| 2223 #ifdef HAVE_MMX | 1878 #ifdef HAVE_MMX |
| 2224 /** | 1879 /** |
| 2225 * transposes and shift the given 8x8 Block into dst1 and dst2 | 1880 * transposes and shift the given 8x8 Block into dst1 and dst2 |
| 2226 */ | 1881 */ |
| 2227 static inline void transpose1(uint8_t *dst1, uint8_t *dst2, uint8_t *src, int srcStride) | 1882 static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, uint8_t *src, int srcStride) |
| 2228 { | 1883 { |
| 2229 asm( | 1884 asm( |
| 2230 "leal (%0, %1), %%eax \n\t" | 1885 "leal (%0, %1), %%eax \n\t" |
| 2231 "leal (%%eax, %1, 4), %%ebx \n\t" | 1886 "leal (%%eax, %1, 4), %%ebx \n\t" |
| 2232 // 0 1 2 3 4 5 6 7 8 9 | 1887 // 0 1 2 3 4 5 6 7 8 9 |
| 2306 } | 1961 } |
| 2307 | 1962 |
| 2308 /** | 1963 /** |
| 2309 * transposes the given 8x8 block | 1964 * transposes the given 8x8 block |
| 2310 */ | 1965 */ |
| 2311 static inline void transpose2(uint8_t *dst, int dstStride, uint8_t *src) | 1966 static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, uint8_t *src) |
| 2312 { | 1967 { |
| 2313 asm( | 1968 asm( |
| 2314 "leal (%0, %1), %%eax \n\t" | 1969 "leal (%0, %1), %%eax \n\t" |
| 2315 "leal (%%eax, %1, 4), %%ebx \n\t" | 1970 "leal (%%eax, %1, 4), %%ebx \n\t" |
| 2316 // 0 1 2 3 4 5 6 7 8 9 | 1971 // 0 1 2 3 4 5 6 7 8 9 |
| 2385 ); | 2040 ); |
| 2386 } | 2041 } |
| 2387 #endif | 2042 #endif |
| 2388 //static int test=0; | 2043 //static int test=0; |
| 2389 | 2044 |
| 2390 static void inline tempNoiseReducer(uint8_t *src, int stride, | 2045 static void inline RENAME(tempNoiseReducer)(uint8_t *src, int stride, |
| 2391 uint8_t *tempBlured, uint32_t *tempBluredPast, int *maxNoise) | 2046 uint8_t *tempBlured, uint32_t *tempBluredPast, int *maxNoise) |
| 2392 { | 2047 { |
| 2393 #define FAST_L2_DIFF | 2048 #define FAST_L2_DIFF |
| 2394 //#define L1_DIFF //u should change the thresholds too if u try that one | 2049 //#define L1_DIFF //u should change the thresholds too if u try that one |
| 2395 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 2050 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 2784 } | 2439 } |
| 2785 } | 2440 } |
| 2786 #endif | 2441 #endif |
| 2787 } | 2442 } |
| 2788 | 2443 |
| 2789 #ifdef HAVE_ODIVX_POSTPROCESS | 2444 static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, |
| 2790 #include "../opendivx/postprocess.h" | |
| 2791 int use_old_pp=0; | |
| 2792 #endif | |
| 2793 | |
| 2794 static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, | |
| 2795 QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode); | 2445 QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode); |
| 2796 | |
| 2797 /* -pp Command line Help | |
| 2798 NOTE/FIXME: put this at an appropriate place (--help, html docs, man mplayer)? | |
| 2799 | |
| 2800 -pp <filterName>[:<option>[:<option>...]][,[-]<filterName>[:<option>...]]... | |
| 2801 | |
| 2802 long form example: | |
| 2803 -pp vdeblock:autoq,hdeblock:autoq,linblenddeint -pp default,-vdeblock | |
| 2804 short form example: | |
| 2805 -pp vb:a,hb:a,lb -pp de,-vb | |
| 2806 more examples: | |
| 2807 -pp tn:64:128:256 | |
| 2808 | |
| 2809 Filters Options | |
| 2810 short long name short long option Description | |
| 2811 * * a autoq cpu power dependant enabler | |
| 2812 c chrom chrominance filtring enabled | |
| 2813 y nochrom chrominance filtring disabled | |
| 2814 hb hdeblock horizontal deblocking filter | |
| 2815 vb vdeblock vertical deblocking filter | |
| 2816 vr rkvdeblock | |
| 2817 h1 x1hdeblock Experimental horizontal deblock filter 1 | |
| 2818 v1 x1vdeblock Experimental vertical deblock filter 1 | |
| 2819 dr dering not implemented yet | |
| 2820 al autolevels automatic brightness / contrast fixer | |
| 2821 f fullyrange stretch luminance range to (0..255) | |
| 2822 lb linblenddeint linear blend deinterlacer | |
| 2823 li linipoldeint linear interpolating deinterlacer | |
| 2824 ci cubicipoldeint cubic interpolating deinterlacer | |
| 2825 md mediandeint median deinterlacer | |
| 2826 de default hdeblock:a,vdeblock:a,dering:a,autolevels | |
| 2827 fa fast x1hdeblock:a,x1vdeblock:a,dering:a,autolevels | |
| 2828 tn tmpnoise (3 Thresholds) Temporal Noise Reducer | |
| 2829 */ | |
| 2830 | |
| 2831 /** | |
| 2832 * returns a PPMode struct which will have a non 0 error variable if an error occured | |
| 2833 * name is the string after "-pp" on the command line | |
| 2834 * quality is a number from 0 to GET_PP_QUALITY_MAX | |
| 2835 */ | |
| 2836 struct PPMode getPPModeByNameAndQuality(char *name, int quality) | |
| 2837 { | |
| 2838 char temp[GET_MODE_BUFFER_SIZE]; | |
| 2839 char *p= temp; | |
| 2840 char *filterDelimiters= ","; | |
| 2841 char *optionDelimiters= ":"; | |
| 2842 struct PPMode ppMode= {0,0,0,0,0,0,{150,200,400}}; | |
| 2843 char *filterToken; | |
| 2844 | |
| 2845 strncpy(temp, name, GET_MODE_BUFFER_SIZE); | |
| 2846 | |
| 2847 printf("%s\n", name); | |
| 2848 | |
| 2849 for(;;){ | |
| 2850 char *filterName; | |
| 2851 int q= 1000000; //GET_PP_QUALITY_MAX; | |
| 2852 int chrom=-1; | |
| 2853 char *option; | |
| 2854 char *options[OPTIONS_ARRAY_SIZE]; | |
| 2855 int i; | |
| 2856 int filterNameOk=0; | |
| 2857 int numOfUnknownOptions=0; | |
| 2858 int enable=1; //does the user want us to enabled or disabled the filter | |
| 2859 | |
| 2860 filterToken= strtok(p, filterDelimiters); | |
| 2861 if(filterToken == NULL) break; | |
| 2862 p+= strlen(filterToken) + 1; // p points to next filterToken | |
| 2863 filterName= strtok(filterToken, optionDelimiters); | |
| 2864 printf("%s::%s\n", filterToken, filterName); | |
| 2865 | |
| 2866 if(*filterName == '-') | |
| 2867 { | |
| 2868 enable=0; | |
| 2869 filterName++; | |
| 2870 } | |
| 2871 | |
| 2872 for(;;){ //for all options | |
| 2873 option= strtok(NULL, optionDelimiters); | |
| 2874 if(option == NULL) break; | |
| 2875 | |
| 2876 printf("%s\n", option); | |
| 2877 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality; | |
| 2878 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0; | |
| 2879 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1; | |
| 2880 else | |
| 2881 { | |
| 2882 options[numOfUnknownOptions] = option; | |
| 2883 numOfUnknownOptions++; | |
| 2884 } | |
| 2885 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break; | |
| 2886 } | |
| 2887 options[numOfUnknownOptions] = NULL; | |
| 2888 | |
| 2889 /* replace stuff from the replace Table */ | |
| 2890 for(i=0; replaceTable[2*i]!=NULL; i++) | |
| 2891 { | |
| 2892 if(!strcmp(replaceTable[2*i], filterName)) | |
| 2893 { | |
| 2894 int newlen= strlen(replaceTable[2*i + 1]); | |
| 2895 int plen; | |
| 2896 int spaceLeft; | |
| 2897 | |
| 2898 if(p==NULL) p= temp, *p=0; //last filter | |
| 2899 else p--, *p=','; //not last filter | |
| 2900 | |
| 2901 plen= strlen(p); | |
| 2902 spaceLeft= (int)p - (int)temp + plen; | |
| 2903 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE) | |
| 2904 { | |
| 2905 ppMode.error++; | |
| 2906 break; | |
| 2907 } | |
| 2908 memmove(p + newlen, p, plen+1); | |
| 2909 memcpy(p, replaceTable[2*i + 1], newlen); | |
| 2910 filterNameOk=1; | |
| 2911 } | |
| 2912 } | |
| 2913 | |
| 2914 for(i=0; filters[i].shortName!=NULL; i++) | |
| 2915 { | |
| 2916 // printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName); | |
| 2917 if( !strcmp(filters[i].longName, filterName) | |
| 2918 || !strcmp(filters[i].shortName, filterName)) | |
| 2919 { | |
| 2920 ppMode.lumMode &= ~filters[i].mask; | |
| 2921 ppMode.chromMode &= ~filters[i].mask; | |
| 2922 | |
| 2923 filterNameOk=1; | |
| 2924 if(!enable) break; // user wants to disable it | |
| 2925 | |
| 2926 if(q >= filters[i].minLumQuality) | |
| 2927 ppMode.lumMode|= filters[i].mask; | |
| 2928 if(chrom==1 || (chrom==-1 && filters[i].chromDefault)) | |
| 2929 if(q >= filters[i].minChromQuality) | |
| 2930 ppMode.chromMode|= filters[i].mask; | |
| 2931 | |
| 2932 if(filters[i].mask == LEVEL_FIX) | |
| 2933 { | |
| 2934 int o; | |
| 2935 ppMode.minAllowedY= 16; | |
| 2936 ppMode.maxAllowedY= 234; | |
| 2937 for(o=0; options[o]!=NULL; o++) | |
| 2938 if( !strcmp(options[o],"fullyrange") | |
| 2939 ||!strcmp(options[o],"f")) | |
| 2940 { | |
| 2941 ppMode.minAllowedY= 0; | |
| 2942 ppMode.maxAllowedY= 255; | |
| 2943 numOfUnknownOptions--; | |
| 2944 } | |
| 2945 } | |
| 2946 else if(filters[i].mask == TEMP_NOISE_FILTER) | |
| 2947 { | |
| 2948 int o; | |
| 2949 int numOfNoises=0; | |
| 2950 ppMode.maxTmpNoise[0]= 150; | |
| 2951 ppMode.maxTmpNoise[1]= 200; | |
| 2952 ppMode.maxTmpNoise[2]= 400; | |
| 2953 | |
| 2954 for(o=0; options[o]!=NULL; o++) | |
| 2955 { | |
| 2956 char *tail; | |
| 2957 ppMode.maxTmpNoise[numOfNoises]= | |
| 2958 strtol(options[o], &tail, 0); | |
| 2959 if(tail!=options[o]) | |
| 2960 { | |
| 2961 numOfNoises++; | |
| 2962 numOfUnknownOptions--; | |
| 2963 if(numOfNoises >= 3) break; | |
| 2964 } | |
| 2965 } | |
| 2966 } | |
| 2967 } | |
| 2968 } | |
| 2969 if(!filterNameOk) ppMode.error++; | |
| 2970 ppMode.error += numOfUnknownOptions; | |
| 2971 } | |
| 2972 | |
| 2973 #ifdef HAVE_ODIVX_POSTPROCESS | |
| 2974 if(ppMode.lumMode & H_DEBLOCK) ppMode.oldMode |= PP_DEBLOCK_Y_H; | |
| 2975 if(ppMode.lumMode & V_DEBLOCK) ppMode.oldMode |= PP_DEBLOCK_Y_V; | |
| 2976 if(ppMode.chromMode & H_DEBLOCK) ppMode.oldMode |= PP_DEBLOCK_C_H; | |
| 2977 if(ppMode.chromMode & V_DEBLOCK) ppMode.oldMode |= PP_DEBLOCK_C_V; | |
| 2978 if(ppMode.lumMode & DERING) ppMode.oldMode |= PP_DERING_Y; | |
| 2979 if(ppMode.chromMode & DERING) ppMode.oldMode |= PP_DERING_C; | |
| 2980 #endif | |
| 2981 | |
| 2982 return ppMode; | |
| 2983 } | |
| 2984 | |
| 2985 /** | |
| 2986 * Obsolete, dont use it, use postprocess2() instead | |
| 2987 */ | |
| 2988 void postprocess(unsigned char * src[], int src_stride, | |
| 2989 unsigned char * dst[], int dst_stride, | |
| 2990 int horizontal_size, int vertical_size, | |
| 2991 QP_STORE_T *QP_store, int QP_stride, | |
| 2992 int mode) | |
| 2993 { | |
| 2994 struct PPMode ppMode; | |
| 2995 static QP_STORE_T zeroArray[2048/8]; | |
| 2996 /* | |
| 2997 static int qual=0; | |
| 2998 | |
| 2999 ppMode= getPPModeByNameAndQuality("fast,default,-hdeblock,-vdeblock,tmpnoise:150:200:300", qual); | |
| 3000 printf("OK\n"); | |
| 3001 qual++; | |
| 3002 qual%=7; | |
| 3003 printf("\n%X %X %X %X :%d: %d %d %d\n", ppMode.lumMode, ppMode.chromMode, ppMode.oldMode, ppMode.error, | |
| 3004 qual, ppMode.maxTmpNoise[0], ppMode.maxTmpNoise[1], ppMode.maxTmpNoise[2]); | |
| 3005 postprocess2(src, src_stride, dst, dst_stride, | |
| 3006 horizontal_size, vertical_size, QP_store, QP_stride, &ppMode); | |
| 3007 | |
| 3008 return; | |
| 3009 */ | |
| 3010 if(QP_store==NULL) | |
| 3011 { | |
| 3012 QP_store= zeroArray; | |
| 3013 QP_stride= 0; | |
| 3014 } | |
| 3015 | |
| 3016 ppMode.lumMode= mode; | |
| 3017 mode= ((mode&0xFF)>>4) | (mode&0xFFFFFF00); | |
| 3018 ppMode.chromMode= mode; | |
| 3019 ppMode.maxTmpNoise[0]= 700; | |
| 3020 ppMode.maxTmpNoise[1]= 1500; | |
| 3021 ppMode.maxTmpNoise[2]= 3000; | |
| 3022 | |
| 3023 #ifdef HAVE_ODIVX_POSTPROCESS | |
| 3024 // Note: I could make this shit outside of this file, but it would mean one | |
| 3025 // more function call... | |
| 3026 if(use_old_pp){ | |
| 3027 odivx_postprocess(src,src_stride,dst,dst_stride,horizontal_size,vertical_size,QP_store,QP_stride,mode); | |
| 3028 return; | |
| 3029 } | |
| 3030 #endif | |
| 3031 | |
| 3032 postProcess(src[0], src_stride, dst[0], dst_stride, | |
| 3033 horizontal_size, vertical_size, QP_store, QP_stride, 0, &ppMode); | |
| 3034 | |
| 3035 horizontal_size >>= 1; | |
| 3036 vertical_size >>= 1; | |
| 3037 src_stride >>= 1; | |
| 3038 dst_stride >>= 1; | |
| 3039 | |
| 3040 if(ppMode.chromMode) | |
| 3041 { | |
| 3042 postProcess(src[1], src_stride, dst[1], dst_stride, | |
| 3043 horizontal_size, vertical_size, QP_store, QP_stride, 1, &ppMode); | |
| 3044 postProcess(src[2], src_stride, dst[2], dst_stride, | |
| 3045 horizontal_size, vertical_size, QP_store, QP_stride, 2, &ppMode); | |
| 3046 } | |
| 3047 else if(src_stride == dst_stride) | |
| 3048 { | |
| 3049 memcpy(dst[1], src[1], src_stride*vertical_size); | |
| 3050 memcpy(dst[2], src[2], src_stride*vertical_size); | |
| 3051 } | |
| 3052 else | |
| 3053 { | |
| 3054 int y; | |
| 3055 for(y=0; y<vertical_size; y++) | |
| 3056 { | |
| 3057 memcpy(&(dst[1][y*dst_stride]), &(src[1][y*src_stride]), horizontal_size); | |
| 3058 memcpy(&(dst[2][y*dst_stride]), &(src[2][y*src_stride]), horizontal_size); | |
| 3059 } | |
| 3060 } | |
| 3061 | |
| 3062 #if 0 | |
| 3063 memset(dst[1], 128, dst_stride*vertical_size); | |
| 3064 memset(dst[2], 128, dst_stride*vertical_size); | |
| 3065 #endif | |
| 3066 } | |
| 3067 | |
| 3068 void postprocess2(unsigned char * src[], int src_stride, | |
| 3069 unsigned char * dst[], int dst_stride, | |
| 3070 int horizontal_size, int vertical_size, | |
| 3071 QP_STORE_T *QP_store, int QP_stride, | |
| 3072 struct PPMode *mode) | |
| 3073 { | |
| 3074 | |
| 3075 static QP_STORE_T zeroArray[2048/8]; | |
| 3076 if(QP_store==NULL) | |
| 3077 { | |
| 3078 QP_store= zeroArray; | |
| 3079 QP_stride= 0; | |
| 3080 } | |
| 3081 | |
| 3082 #ifdef HAVE_ODIVX_POSTPROCESS | |
| 3083 // Note: I could make this shit outside of this file, but it would mean one | |
| 3084 // more function call... | |
| 3085 if(use_old_pp){ | |
| 3086 odivx_postprocess(src,src_stride,dst,dst_stride,horizontal_size,vertical_size,QP_store,QP_stride, | |
| 3087 mode->oldMode); | |
| 3088 return; | |
| 3089 } | |
| 3090 #endif | |
| 3091 | |
| 3092 postProcess(src[0], src_stride, dst[0], dst_stride, | |
| 3093 horizontal_size, vertical_size, QP_store, QP_stride, 0, mode); | |
| 3094 | |
| 3095 horizontal_size >>= 1; | |
| 3096 vertical_size >>= 1; | |
| 3097 src_stride >>= 1; | |
| 3098 dst_stride >>= 1; | |
| 3099 | |
| 3100 if(mode->chromMode) | |
| 3101 { | |
| 3102 postProcess(src[1], src_stride, dst[1], dst_stride, | |
| 3103 horizontal_size, vertical_size, QP_store, QP_stride, 1, mode); | |
| 3104 postProcess(src[2], src_stride, dst[2], dst_stride, | |
| 3105 horizontal_size, vertical_size, QP_store, QP_stride, 2, mode); | |
| 3106 } | |
| 3107 else if(src_stride == dst_stride) | |
| 3108 { | |
| 3109 memcpy(dst[1], src[1], src_stride*vertical_size); | |
| 3110 memcpy(dst[2], src[2], src_stride*vertical_size); | |
| 3111 } | |
| 3112 else | |
| 3113 { | |
| 3114 int y; | |
| 3115 for(y=0; y<vertical_size; y++) | |
| 3116 { | |
| 3117 memcpy(&(dst[1][y*dst_stride]), &(src[1][y*src_stride]), horizontal_size); | |
| 3118 memcpy(&(dst[2][y*dst_stride]), &(src[2][y*src_stride]), horizontal_size); | |
| 3119 } | |
| 3120 } | |
| 3121 } | |
| 3122 | |
| 3123 | |
| 3124 /** | |
| 3125 * gets the mode flags for a given quality (larger values mean slower but better postprocessing) | |
| 3126 * 0 <= quality <= 6 | |
| 3127 */ | |
| 3128 int getPpModeForQuality(int quality){ | |
| 3129 int modes[1+GET_PP_QUALITY_MAX]= { | |
| 3130 0, | |
| 3131 #if 1 | |
| 3132 // horizontal filters first | |
| 3133 LUM_H_DEBLOCK, | |
| 3134 LUM_H_DEBLOCK | LUM_V_DEBLOCK, | |
| 3135 LUM_H_DEBLOCK | LUM_V_DEBLOCK | CHROM_H_DEBLOCK, | |
| 3136 LUM_H_DEBLOCK | LUM_V_DEBLOCK | CHROM_H_DEBLOCK | CHROM_V_DEBLOCK, | |
| 3137 LUM_H_DEBLOCK | LUM_V_DEBLOCK | CHROM_H_DEBLOCK | CHROM_V_DEBLOCK | LUM_DERING, | |
| 3138 LUM_H_DEBLOCK | LUM_V_DEBLOCK | CHROM_H_DEBLOCK | CHROM_V_DEBLOCK | LUM_DERING | CHROM_DERING | |
| 3139 #else | |
| 3140 // vertical filters first | |
| 3141 LUM_V_DEBLOCK, | |
| 3142 LUM_V_DEBLOCK | LUM_H_DEBLOCK, | |
| 3143 LUM_V_DEBLOCK | LUM_H_DEBLOCK | CHROM_V_DEBLOCK, | |
| 3144 LUM_V_DEBLOCK | LUM_H_DEBLOCK | CHROM_V_DEBLOCK | CHROM_H_DEBLOCK, | |
| 3145 LUM_V_DEBLOCK | LUM_H_DEBLOCK | CHROM_V_DEBLOCK | CHROM_H_DEBLOCK | LUM_DERING, | |
| 3146 LUM_V_DEBLOCK | LUM_H_DEBLOCK | CHROM_V_DEBLOCK | CHROM_H_DEBLOCK | LUM_DERING | CHROM_DERING | |
| 3147 #endif | |
| 3148 }; | |
| 3149 | |
| 3150 #ifdef HAVE_ODIVX_POSTPROCESS | |
| 3151 int odivx_modes[1+GET_PP_QUALITY_MAX]= { | |
| 3152 0, | |
| 3153 PP_DEBLOCK_Y_H, | |
| 3154 PP_DEBLOCK_Y_H|PP_DEBLOCK_Y_V, | |
| 3155 PP_DEBLOCK_Y_H|PP_DEBLOCK_Y_V|PP_DEBLOCK_C_H, | |
| 3156 PP_DEBLOCK_Y_H|PP_DEBLOCK_Y_V|PP_DEBLOCK_C_H|PP_DEBLOCK_C_V, | |
| 3157 PP_DEBLOCK_Y_H|PP_DEBLOCK_Y_V|PP_DEBLOCK_C_H|PP_DEBLOCK_C_V|PP_DERING_Y, | |
| 3158 PP_DEBLOCK_Y_H|PP_DEBLOCK_Y_V|PP_DEBLOCK_C_H|PP_DEBLOCK_C_V|PP_DERING_Y|PP_DERING_C | |
| 3159 }; | |
| 3160 if(use_old_pp) return odivx_modes[quality]; | |
| 3161 #endif | |
| 3162 return modes[quality]; | |
| 3163 } | |
| 3164 | 2446 |
| 3165 /** | 2447 /** |
| 3166 * Copies a block from src to dst and fixes the blacklevel | 2448 * Copies a block from src to dst and fixes the blacklevel |
| 3167 * numLines must be a multiple of 4 | 2449 * numLines must be a multiple of 4 |
| 3168 * levelFix == 0 -> dont touch the brighness & contrast | 2450 * levelFix == 0 -> dont touch the brighness & contrast |
| 3169 */ | 2451 */ |
| 3170 static inline void blockCopy(uint8_t dst[], int dstStride, uint8_t src[], int srcStride, | 2452 static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, uint8_t src[], int srcStride, |
| 3171 int levelFix) | 2453 int levelFix) |
| 3172 { | 2454 { |
| 3173 #ifndef HAVE_MMX | 2455 #ifndef HAVE_MMX |
| 3174 int i; | 2456 int i; |
| 3175 #endif | 2457 #endif |
| 3265 | 2547 |
| 3266 | 2548 |
| 3267 /** | 2549 /** |
| 3268 * Filters array of bytes (Y or U or V values) | 2550 * Filters array of bytes (Y or U or V values) |
| 3269 */ | 2551 */ |
| 3270 static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, | 2552 static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, |
| 3271 QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode) | 2553 QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode) |
| 3272 { | 2554 { |
| 3273 int x,y; | 2555 int x,y; |
| 3274 const int mode= isColor ? ppMode->chromMode : ppMode->lumMode; | 2556 const int mode= isColor ? ppMode->chromMode : ppMode->lumMode; |
| 3275 | 2557 |
| 3461 prefetchw(dstBlock + (((x>>3)&3) + 5)*dstStride + 32); | 2743 prefetchw(dstBlock + (((x>>3)&3) + 5)*dstStride + 32); |
| 3462 prefetchw(dstBlock + (((x>>3)&3) + 9)*dstStride + 32); | 2744 prefetchw(dstBlock + (((x>>3)&3) + 9)*dstStride + 32); |
| 3463 */ | 2745 */ |
| 3464 #endif | 2746 #endif |
| 3465 | 2747 |
| 3466 blockCopy(dstBlock + dstStride*copyAhead, dstStride, | 2748 RENAME(blockCopy)(dstBlock + dstStride*copyAhead, dstStride, |
| 3467 srcBlock + srcStride*copyAhead, srcStride, mode & LEVEL_FIX); | 2749 srcBlock + srcStride*copyAhead, srcStride, mode & LEVEL_FIX); |
| 3468 | 2750 |
| 3469 if(mode & LINEAR_IPOL_DEINT_FILTER) | 2751 if(mode & LINEAR_IPOL_DEINT_FILTER) |
| 3470 deInterlaceInterpolateLinear(dstBlock, dstStride); | 2752 RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride); |
| 3471 else if(mode & LINEAR_BLEND_DEINT_FILTER) | 2753 else if(mode & LINEAR_BLEND_DEINT_FILTER) |
| 3472 deInterlaceBlendLinear(dstBlock, dstStride); | 2754 RENAME(deInterlaceBlendLinear)(dstBlock, dstStride); |
| 3473 else if(mode & MEDIAN_DEINT_FILTER) | 2755 else if(mode & MEDIAN_DEINT_FILTER) |
| 3474 deInterlaceMedian(dstBlock, dstStride); | 2756 RENAME(deInterlaceMedian)(dstBlock, dstStride); |
| 3475 else if(mode & CUBIC_IPOL_DEINT_FILTER) | 2757 else if(mode & CUBIC_IPOL_DEINT_FILTER) |
| 3476 deInterlaceInterpolateCubic(dstBlock, dstStride); | 2758 RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride); |
| 3477 /* else if(mode & CUBIC_BLEND_DEINT_FILTER) | 2759 /* else if(mode & CUBIC_BLEND_DEINT_FILTER) |
| 3478 deInterlaceBlendCubic(dstBlock, dstStride); | 2760 RENAME(deInterlaceBlendCubic)(dstBlock, dstStride); |
| 3479 */ | 2761 */ |
| 3480 dstBlock+=8; | 2762 dstBlock+=8; |
| 3481 srcBlock+=8; | 2763 srcBlock+=8; |
| 3482 } | 2764 } |
| 3483 memcpy(&(dst[y*dstStride]) + 8*dstStride, tempDst + 9*dstStride, copyAhead*dstStride ); | 2765 memcpy(&(dst[y*dstStride]) + 8*dstStride, tempDst + 9*dstStride, copyAhead*dstStride ); |
| 3486 for(y=0; y<height; y+=BLOCK_SIZE) | 2768 for(y=0; y<height; y+=BLOCK_SIZE) |
| 3487 { | 2769 { |
| 3488 //1% speedup if these are here instead of the inner loop | 2770 //1% speedup if these are here instead of the inner loop |
| 3489 uint8_t *srcBlock= &(src[y*srcStride]); | 2771 uint8_t *srcBlock= &(src[y*srcStride]); |
| 3490 uint8_t *dstBlock= &(dst[y*dstStride]); | 2772 uint8_t *dstBlock= &(dst[y*dstStride]); |
| 2773 #ifdef HAVE_MMX | |
| 2774 uint8_t *tempBlock1= tempBlocks; | |
| 2775 uint8_t *tempBlock2= tempBlocks + 8; | |
| 2776 #endif | |
| 3491 #ifdef ARCH_X86 | 2777 #ifdef ARCH_X86 |
| 3492 int *QPptr= isColor ? &QPs[(y>>3)*QPStride] :&QPs[(y>>4)*QPStride]; | 2778 int *QPptr= isColor ? &QPs[(y>>3)*QPStride] :&QPs[(y>>4)*QPStride]; |
| 3493 int QPDelta= isColor ? 1<<(32-3) : 1<<(32-4); | 2779 int QPDelta= isColor ? 1<<(32-3) : 1<<(32-4); |
| 3494 int QPFrac= QPDelta; | 2780 int QPFrac= QPDelta; |
| 3495 uint8_t *tempBlock1= tempBlocks; | |
| 3496 uint8_t *tempBlock2= tempBlocks + 8; | |
| 3497 #endif | 2781 #endif |
| 3498 int QP=0; | 2782 int QP=0; |
| 3499 /* can we mess with a 8x16 block from srcBlock/dstBlock downwards and 1 line upwards | 2783 /* can we mess with a 8x16 block from srcBlock/dstBlock downwards and 1 line upwards |
| 3500 if not than use a temporary buffer */ | 2784 if not than use a temporary buffer */ |
| 3501 if(y+15 >= height) | 2785 if(y+15 >= height) |
| 3525 // finish 1 block before the next otherwise weŽll might have a problem | 2809 // finish 1 block before the next otherwise weŽll might have a problem |
| 3526 // with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing | 2810 // with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing |
| 3527 for(x=0; x<width; x+=BLOCK_SIZE) | 2811 for(x=0; x<width; x+=BLOCK_SIZE) |
| 3528 { | 2812 { |
| 3529 const int stride= dstStride; | 2813 const int stride= dstStride; |
| 2814 #ifdef HAVE_MMX | |
| 3530 uint8_t *tmpXchg; | 2815 uint8_t *tmpXchg; |
| 2816 #endif | |
| 3531 #ifdef ARCH_X86 | 2817 #ifdef ARCH_X86 |
| 3532 QP= *QPptr; | 2818 QP= *QPptr; |
| 3533 asm volatile( | 2819 asm volatile( |
| 3534 "addl %2, %1 \n\t" | 2820 "addl %2, %1 \n\t" |
| 3535 "sbbl %%eax, %%eax \n\t" | 2821 "sbbl %%eax, %%eax \n\t" |
| 3617 dstBlock= tempDstBlock; | 2903 dstBlock= tempDstBlock; |
| 3618 srcBlock= tempSrcBlock; | 2904 srcBlock= tempSrcBlock; |
| 3619 } | 2905 } |
| 3620 #endif | 2906 #endif |
| 3621 | 2907 |
| 3622 blockCopy(dstBlock + dstStride*copyAhead, dstStride, | 2908 RENAME(blockCopy)(dstBlock + dstStride*copyAhead, dstStride, |
| 3623 srcBlock + srcStride*copyAhead, srcStride, mode & LEVEL_FIX); | 2909 srcBlock + srcStride*copyAhead, srcStride, mode & LEVEL_FIX); |
| 3624 | 2910 |
| 3625 if(mode & LINEAR_IPOL_DEINT_FILTER) | 2911 if(mode & LINEAR_IPOL_DEINT_FILTER) |
| 3626 deInterlaceInterpolateLinear(dstBlock, dstStride); | 2912 RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride); |
| 3627 else if(mode & LINEAR_BLEND_DEINT_FILTER) | 2913 else if(mode & LINEAR_BLEND_DEINT_FILTER) |
| 3628 deInterlaceBlendLinear(dstBlock, dstStride); | 2914 RENAME(deInterlaceBlendLinear)(dstBlock, dstStride); |
| 3629 else if(mode & MEDIAN_DEINT_FILTER) | 2915 else if(mode & MEDIAN_DEINT_FILTER) |
| 3630 deInterlaceMedian(dstBlock, dstStride); | 2916 RENAME(deInterlaceMedian)(dstBlock, dstStride); |
| 3631 else if(mode & CUBIC_IPOL_DEINT_FILTER) | 2917 else if(mode & CUBIC_IPOL_DEINT_FILTER) |
| 3632 deInterlaceInterpolateCubic(dstBlock, dstStride); | 2918 RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride); |
| 3633 /* else if(mode & CUBIC_BLEND_DEINT_FILTER) | 2919 /* else if(mode & CUBIC_BLEND_DEINT_FILTER) |
| 3634 deInterlaceBlendCubic(dstBlock, dstStride); | 2920 RENAME(deInterlaceBlendCubic)(dstBlock, dstStride); |
| 3635 */ | 2921 */ |
| 3636 | 2922 |
| 3637 /* only deblock if we have 2 blocks */ | 2923 /* only deblock if we have 2 blocks */ |
| 3638 if(y + 8 < height) | 2924 if(y + 8 < height) |
| 3639 { | 2925 { |
| 3641 T1= rdtsc(); | 2927 T1= rdtsc(); |
| 3642 memcpyTime+= T1-T0; | 2928 memcpyTime+= T1-T0; |
| 3643 T0=T1; | 2929 T0=T1; |
| 3644 #endif | 2930 #endif |
| 3645 if(mode & V_RK1_FILTER) | 2931 if(mode & V_RK1_FILTER) |
| 3646 vertRK1Filter(dstBlock, stride, QP); | 2932 RENAME(vertRK1Filter)(dstBlock, stride, QP); |
| 3647 else if(mode & V_X1_FILTER) | 2933 else if(mode & V_X1_FILTER) |
| 3648 vertX1Filter(dstBlock, stride, QP); | 2934 RENAME(vertX1Filter)(dstBlock, stride, QP); |
| 3649 else if(mode & V_DEBLOCK) | 2935 else if(mode & V_DEBLOCK) |
| 3650 { | 2936 { |
| 3651 if( isVertDC(dstBlock, stride)) | 2937 if( RENAME(isVertDC)(dstBlock, stride)) |
| 3652 { | 2938 { |
| 3653 if(isVertMinMaxOk(dstBlock, stride, QP)) | 2939 if(RENAME(isVertMinMaxOk)(dstBlock, stride, QP)) |
| 3654 doVertLowPass(dstBlock, stride, QP); | 2940 RENAME(doVertLowPass)(dstBlock, stride, QP); |
| 3655 } | 2941 } |
| 3656 else | 2942 else |
| 3657 doVertDefFilter(dstBlock, stride, QP); | 2943 RENAME(doVertDefFilter)(dstBlock, stride, QP); |
| 3658 } | 2944 } |
| 3659 #ifdef MORE_TIMING | 2945 #ifdef MORE_TIMING |
| 3660 T1= rdtsc(); | 2946 T1= rdtsc(); |
| 3661 vertTime+= T1-T0; | 2947 vertTime+= T1-T0; |
| 3662 T0=T1; | 2948 T0=T1; |
| 3663 #endif | 2949 #endif |
| 3664 } | 2950 } |
| 3665 | 2951 |
| 3666 #ifdef HAVE_MMX | 2952 #ifdef HAVE_MMX |
| 3667 transpose1(tempBlock1, tempBlock2, dstBlock, dstStride); | 2953 RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride); |
| 3668 #endif | 2954 #endif |
| 3669 /* check if we have a previous block to deblock it with dstBlock */ | 2955 /* check if we have a previous block to deblock it with dstBlock */ |
| 3670 if(x - 8 >= 0) | 2956 if(x - 8 >= 0) |
| 3671 { | 2957 { |
| 3672 #ifdef MORE_TIMING | 2958 #ifdef MORE_TIMING |
| 3673 T0= rdtsc(); | 2959 T0= rdtsc(); |
| 3674 #endif | 2960 #endif |
| 3675 #ifdef HAVE_MMX | 2961 #ifdef HAVE_MMX |
| 3676 if(mode & H_RK1_FILTER) | 2962 if(mode & H_RK1_FILTER) |
| 3677 vertRK1Filter(tempBlock1, 16, QP); | 2963 RENAME(vertRK1Filter)(tempBlock1, 16, QP); |
| 3678 else if(mode & H_X1_FILTER) | 2964 else if(mode & H_X1_FILTER) |
| 3679 vertX1Filter(tempBlock1, 16, QP); | 2965 RENAME(vertX1Filter)(tempBlock1, 16, QP); |
| 3680 else if(mode & H_DEBLOCK) | 2966 else if(mode & H_DEBLOCK) |
| 3681 { | 2967 { |
| 3682 if( isVertDC(tempBlock1, 16) ) | 2968 if( RENAME(isVertDC)(tempBlock1, 16) ) |
| 3683 { | 2969 { |
| 3684 if(isVertMinMaxOk(tempBlock1, 16, QP)) | 2970 if(RENAME(isVertMinMaxOk)(tempBlock1, 16, QP)) |
| 3685 doVertLowPass(tempBlock1, 16, QP); | 2971 RENAME(doVertLowPass)(tempBlock1, 16, QP); |
| 3686 } | 2972 } |
| 3687 else | 2973 else |
| 3688 doVertDefFilter(tempBlock1, 16, QP); | 2974 RENAME(doVertDefFilter)(tempBlock1, 16, QP); |
| 3689 } | 2975 } |
| 3690 | 2976 |
| 3691 transpose2(dstBlock-4, dstStride, tempBlock1 + 4*16); | 2977 RENAME(transpose2)(dstBlock-4, dstStride, tempBlock1 + 4*16); |
| 3692 | 2978 |
| 3693 #else | 2979 #else |
| 3694 if(mode & H_X1_FILTER) | 2980 if(mode & H_X1_FILTER) |
| 3695 horizX1Filter(dstBlock-4, stride, QP); | 2981 horizX1Filter(dstBlock-4, stride, QP); |
| 3696 else if(mode & H_DEBLOCK) | 2982 else if(mode & H_DEBLOCK) |
| 3710 T0=T1; | 2996 T0=T1; |
| 3711 #endif | 2997 #endif |
| 3712 if(mode & DERING) | 2998 if(mode & DERING) |
| 3713 { | 2999 { |
| 3714 //FIXME filter first line | 3000 //FIXME filter first line |
| 3715 if(y>0) dering(dstBlock - stride - 8, stride, QP); | 3001 if(y>0) RENAME(dering)(dstBlock - stride - 8, stride, QP); |
| 3716 } | 3002 } |
| 3717 | 3003 |
| 3718 if(mode & TEMP_NOISE_FILTER) | 3004 if(mode & TEMP_NOISE_FILTER) |
| 3719 { | 3005 { |
| 3720 tempNoiseReducer(dstBlock-8, stride, | 3006 RENAME(tempNoiseReducer)(dstBlock-8, stride, |
| 3721 tempBlured[isColor] + y*dstStride + x, | 3007 tempBlured[isColor] + y*dstStride + x, |
| 3722 tempBluredPast[isColor] + (y>>3)*256 + (x>>3), | 3008 tempBluredPast[isColor] + (y>>3)*256 + (x>>3), |
| 3723 ppMode->maxTmpNoise); | 3009 ppMode->maxTmpNoise); |
| 3724 } | 3010 } |
| 3725 } | 3011 } |
| 3749 #endif | 3035 #endif |
| 3750 } | 3036 } |
| 3751 | 3037 |
| 3752 if(mode & DERING) | 3038 if(mode & DERING) |
| 3753 { | 3039 { |
| 3754 if(y > 0) dering(dstBlock - dstStride - 8, dstStride, QP); | 3040 if(y > 0) RENAME(dering)(dstBlock - dstStride - 8, dstStride, QP); |
| 3755 } | 3041 } |
| 3756 | 3042 |
| 3757 if((mode & TEMP_NOISE_FILTER)) | 3043 if((mode & TEMP_NOISE_FILTER)) |
| 3758 { | 3044 { |
| 3759 tempNoiseReducer(dstBlock-8, dstStride, | 3045 RENAME(tempNoiseReducer)(dstBlock-8, dstStride, |
| 3760 tempBlured[isColor] + y*dstStride + x, | 3046 tempBlured[isColor] + y*dstStride + x, |
| 3761 tempBluredPast[isColor] + (y>>3)*256 + (x>>3), | 3047 tempBluredPast[isColor] + (y>>3)*256 + (x>>3), |
| 3762 ppMode->maxTmpNoise); | 3048 ppMode->maxTmpNoise); |
| 3763 } | 3049 } |
| 3764 | 3050 |
