Mercurial > libavcodec.hg
comparison libpostproc/postprocess.c @ 2979:bfabfdf9ce55 libavcodec
COSMETICS: tabs --> spaces, some prettyprinting
| author | diego |
|---|---|
| date | Thu, 22 Dec 2005 01:10:11 +0000 |
| parents | ef2149182f1c |
| children | 0b546eab515d |
comparison
equal
deleted
inserted
replaced
| 2978:403183bbb505 | 2979:bfabfdf9ce55 |
|---|---|
| 22 * @file postprocess.c | 22 * @file postprocess.c |
| 23 * postprocessing. | 23 * postprocessing. |
| 24 */ | 24 */ |
| 25 | 25 |
| 26 /* | 26 /* |
| 27 C MMX MMX2 3DNow AltiVec | 27 C MMX MMX2 3DNow AltiVec |
| 28 isVertDC Ec Ec Ec | 28 isVertDC Ec Ec Ec |
| 29 isVertMinMaxOk Ec Ec Ec | 29 isVertMinMaxOk Ec Ec Ec |
| 30 doVertLowPass E e e Ec | 30 doVertLowPass E e e Ec |
| 31 doVertDefFilter Ec Ec e e Ec | 31 doVertDefFilter Ec Ec e e Ec |
| 32 isHorizDC Ec Ec Ec | 32 isHorizDC Ec Ec Ec |
| 33 isHorizMinMaxOk a E Ec | 33 isHorizMinMaxOk a E Ec |
| 34 doHorizLowPass E e e Ec | 34 doHorizLowPass E e e Ec |
| 35 doHorizDefFilter Ec Ec e e Ec | 35 doHorizDefFilter Ec Ec e e Ec |
| 36 do_a_deblock Ec E Ec E | 36 do_a_deblock Ec E Ec E |
| 37 deRing E e e* Ecp | 37 deRing E e e* Ecp |
| 38 Vertical RKAlgo1 E a a | 38 Vertical RKAlgo1 E a a |
| 39 Horizontal RKAlgo1 a a | 39 Horizontal RKAlgo1 a a |
| 40 Vertical X1# a E E | 40 Vertical X1# a E E |
| 41 Horizontal X1# a E E | 41 Horizontal X1# a E E |
| 42 LinIpolDeinterlace e E E* | 42 LinIpolDeinterlace e E E* |
| 43 CubicIpolDeinterlace a e e* | 43 CubicIpolDeinterlace a e e* |
| 44 LinBlendDeinterlace e E E* | 44 LinBlendDeinterlace e E E* |
| 45 MedianDeinterlace# E Ec Ec | 45 MedianDeinterlace# E Ec Ec |
| 46 TempDeNoiser# E e e Ec | 46 TempDeNoiser# E e e Ec |
| 47 | 47 |
| 48 * i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work | 48 * i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work |
| 49 # more or less selfinvented filters so the exactness isnt too meaningfull | 49 # more or less selfinvented filters so the exactness isnt too meaningfull |
| 50 E = Exact implementation | 50 E = Exact implementation |
| 51 e = allmost exact implementation (slightly different rounding,...) | 51 e = allmost exact implementation (slightly different rounding,...) |
| 59 reduce the time wasted on the mem transfer | 59 reduce the time wasted on the mem transfer |
| 60 unroll stuff if instructions depend too much on the prior one | 60 unroll stuff if instructions depend too much on the prior one |
| 61 move YScale thing to the end instead of fixing QP | 61 move YScale thing to the end instead of fixing QP |
| 62 write a faster and higher quality deblocking filter :) | 62 write a faster and higher quality deblocking filter :) |
| 63 make the mainloop more flexible (variable number of blocks at once | 63 make the mainloop more flexible (variable number of blocks at once |
| 64 (the if/else stuff per block is slowing things down) | 64 (the if/else stuff per block is slowing things down) |
| 65 compare the quality & speed of all filters | 65 compare the quality & speed of all filters |
| 66 split this huge file | 66 split this huge file |
| 67 optimize c versions | 67 optimize c versions |
| 68 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks | 68 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks |
| 69 ... | 69 ... |
| 118 # define attribute_used | 118 # define attribute_used |
| 119 # define always_inline inline | 119 # define always_inline inline |
| 120 #endif | 120 #endif |
| 121 | 121 |
| 122 #if defined(ARCH_X86) || defined(ARCH_X86_64) | 122 #if defined(ARCH_X86) || defined(ARCH_X86_64) |
| 123 static uint64_t __attribute__((aligned(8))) attribute_used w05= 0x0005000500050005LL; | 123 static uint64_t __attribute__((aligned(8))) attribute_used w05= 0x0005000500050005LL; |
| 124 static uint64_t __attribute__((aligned(8))) attribute_used w04= 0x0004000400040004LL; | 124 static uint64_t __attribute__((aligned(8))) attribute_used w04= 0x0004000400040004LL; |
| 125 static uint64_t __attribute__((aligned(8))) attribute_used w20= 0x0020002000200020LL; | 125 static uint64_t __attribute__((aligned(8))) attribute_used w20= 0x0020002000200020LL; |
| 126 static uint64_t __attribute__((aligned(8))) attribute_used b00= 0x0000000000000000LL; | 126 static uint64_t __attribute__((aligned(8))) attribute_used b00= 0x0000000000000000LL; |
| 127 static uint64_t __attribute__((aligned(8))) attribute_used b01= 0x0101010101010101LL; | 127 static uint64_t __attribute__((aligned(8))) attribute_used b01= 0x0101010101010101LL; |
| 128 static uint64_t __attribute__((aligned(8))) attribute_used b02= 0x0202020202020202LL; | 128 static uint64_t __attribute__((aligned(8))) attribute_used b02= 0x0202020202020202LL; |
| 129 static uint64_t __attribute__((aligned(8))) attribute_used b08= 0x0808080808080808LL; | 129 static uint64_t __attribute__((aligned(8))) attribute_used b08= 0x0808080808080808LL; |
| 130 static uint64_t __attribute__((aligned(8))) attribute_used b80= 0x8080808080808080LL; | 130 static uint64_t __attribute__((aligned(8))) attribute_used b80= 0x8080808080808080LL; |
| 131 #endif | 131 #endif |
| 132 | 132 |
| 133 static uint8_t clip_table[3*256]; | 133 static uint8_t clip_table[3*256]; |
| 134 static uint8_t * const clip_tab= clip_table + 256; | 134 static uint8_t * const clip_tab= clip_table + 256; |
| 135 | 135 |
| 138 static const int attribute_used deringThreshold= 20; | 138 static const int attribute_used deringThreshold= 20; |
| 139 | 139 |
| 140 | 140 |
| 141 static struct PPFilter filters[]= | 141 static struct PPFilter filters[]= |
| 142 { | 142 { |
| 143 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK}, | 143 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK}, |
| 144 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK}, | 144 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK}, |
| 145 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER}, | 145 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER}, |
| 146 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/ | 146 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/ |
| 147 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER}, | 147 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER}, |
| 148 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER}, | 148 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER}, |
| 149 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK}, | 149 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK}, |
| 150 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK}, | 150 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK}, |
| 151 {"dr", "dering", 1, 5, 6, DERING}, | 151 {"dr", "dering", 1, 5, 6, DERING}, |
| 152 {"al", "autolevels", 0, 1, 2, LEVEL_FIX}, | 152 {"al", "autolevels", 0, 1, 2, LEVEL_FIX}, |
| 153 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER}, | 153 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER}, |
| 154 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER}, | 154 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER}, |
| 155 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER}, | 155 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER}, |
| 156 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER}, | 156 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER}, |
| 157 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER}, | 157 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER}, |
| 158 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER}, | 158 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER}, |
| 159 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER}, | 159 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER}, |
| 160 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT}, | 160 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT}, |
| 161 {NULL, NULL,0,0,0,0} //End Marker | 161 {NULL, NULL,0,0,0,0} //End Marker |
| 162 }; | 162 }; |
| 163 | 163 |
| 164 static char *replaceTable[]= | 164 static char *replaceTable[]= |
| 165 { | 165 { |
| 166 "default", "hdeblock:a,vdeblock:a,dering:a", | 166 "default", "hdeblock:a,vdeblock:a,dering:a", |
| 167 "de", "hdeblock:a,vdeblock:a,dering:a", | 167 "de", "hdeblock:a,vdeblock:a,dering:a", |
| 168 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a", | 168 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a", |
| 169 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a", | 169 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a", |
| 170 "ac", "ha:a:128:7,va:a,dering:a", | 170 "ac", "ha:a:128:7,va:a,dering:a", |
| 171 NULL //End Marker | 171 NULL //End Marker |
| 172 }; | 172 }; |
| 173 | 173 |
| 174 | 174 |
| 175 #if defined(ARCH_X86) || defined(ARCH_X86_64) | 175 #if defined(ARCH_X86) || defined(ARCH_X86_64) |
| 176 static inline void prefetchnta(void *p) | 176 static inline void prefetchnta(void *p) |
| 177 { | 177 { |
| 178 asm volatile( "prefetchnta (%0)\n\t" | 178 asm volatile( "prefetchnta (%0)\n\t" |
| 179 : : "r" (p) | 179 : : "r" (p) |
| 180 ); | 180 ); |
| 181 } | 181 } |
| 182 | 182 |
| 183 static inline void prefetcht0(void *p) | 183 static inline void prefetcht0(void *p) |
| 184 { | 184 { |
| 185 asm volatile( "prefetcht0 (%0)\n\t" | 185 asm volatile( "prefetcht0 (%0)\n\t" |
| 186 : : "r" (p) | 186 : : "r" (p) |
| 187 ); | 187 ); |
| 188 } | 188 } |
| 189 | 189 |
| 190 static inline void prefetcht1(void *p) | 190 static inline void prefetcht1(void *p) |
| 191 { | 191 { |
| 192 asm volatile( "prefetcht1 (%0)\n\t" | 192 asm volatile( "prefetcht1 (%0)\n\t" |
| 193 : : "r" (p) | 193 : : "r" (p) |
| 194 ); | 194 ); |
| 195 } | 195 } |
| 196 | 196 |
| 197 static inline void prefetcht2(void *p) | 197 static inline void prefetcht2(void *p) |
| 198 { | 198 { |
| 199 asm volatile( "prefetcht2 (%0)\n\t" | 199 asm volatile( "prefetcht2 (%0)\n\t" |
| 200 : : "r" (p) | 200 : : "r" (p) |
| 201 ); | 201 ); |
| 202 } | 202 } |
| 203 #endif | 203 #endif |
| 204 | 204 |
| 205 // The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing | 205 // The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing |
| 206 | 206 |
| 207 /** | 207 /** |
| 208 * Check if the given 8x8 Block is mostly "flat" | 208 * Check if the given 8x8 Block is mostly "flat" |
| 209 */ | 209 */ |
| 210 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c) | 210 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c) |
| 211 { | 211 { |
| 212 int numEq= 0; | 212 int numEq= 0; |
| 213 int y; | 213 int y; |
| 214 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; | 214 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; |
| 215 const int dcThreshold= dcOffset*2 + 1; | 215 const int dcThreshold= dcOffset*2 + 1; |
| 216 | 216 |
| 217 for(y=0; y<BLOCK_SIZE; y++) | 217 for(y=0; y<BLOCK_SIZE; y++) |
| 218 { | 218 { |
| 219 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++; | 219 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++; |
| 220 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++; | 220 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++; |
| 221 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++; | 221 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++; |
| 222 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++; | 222 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++; |
| 223 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++; | 223 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++; |
| 224 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++; | 224 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++; |
| 225 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++; | 225 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++; |
| 226 src+= stride; | 226 src+= stride; |
| 227 } | 227 } |
| 228 return numEq > c->ppMode.flatnessThreshold; | 228 return numEq > c->ppMode.flatnessThreshold; |
| 229 } | 229 } |
| 230 | 230 |
| 231 /** | 231 /** |
| 232 * Check if the middle 8x8 Block in the given 8x16 block is flat | 232 * Check if the middle 8x8 Block in the given 8x16 block is flat |
| 233 */ | 233 */ |
| 234 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){ | 234 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){ |
| 235 int numEq= 0; | 235 int numEq= 0; |
| 236 int y; | 236 int y; |
| 237 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; | 237 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; |
| 238 const int dcThreshold= dcOffset*2 + 1; | 238 const int dcThreshold= dcOffset*2 + 1; |
| 239 | 239 |
| 240 src+= stride*4; // src points to begin of the 8x8 Block | 240 src+= stride*4; // src points to begin of the 8x8 Block |
| 241 for(y=0; y<BLOCK_SIZE-1; y++) | 241 for(y=0; y<BLOCK_SIZE-1; y++) |
| 242 { | 242 { |
| 243 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++; | 243 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++; |
| 244 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++; | 244 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++; |
| 245 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++; | 245 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++; |
| 246 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++; | 246 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++; |
| 247 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++; | 247 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++; |
| 248 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++; | 248 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++; |
| 249 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++; | 249 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++; |
| 250 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++; | 250 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++; |
| 251 src+= stride; | 251 src+= stride; |
| 252 } | 252 } |
| 253 return numEq > c->ppMode.flatnessThreshold; | 253 return numEq > c->ppMode.flatnessThreshold; |
| 254 } | 254 } |
| 255 | 255 |
| 256 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP) | 256 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP) |
| 257 { | 257 { |
| 258 int i; | 258 int i; |
| 259 #if 1 | 259 #if 1 |
| 260 for(i=0; i<2; i++){ | 260 for(i=0; i<2; i++){ |
| 261 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0; | 261 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0; |
| 262 src += stride; | 262 src += stride; |
| 263 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0; | 263 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0; |
| 264 src += stride; | 264 src += stride; |
| 265 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0; | 265 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0; |
| 266 src += stride; | 266 src += stride; |
| 267 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0; | 267 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0; |
| 268 src += stride; | 268 src += stride; |
| 269 } | 269 } |
| 270 #else | 270 #else |
| 271 for(i=0; i<8; i++){ | 271 for(i=0; i<8; i++){ |
| 272 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0; | 272 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0; |
| 273 src += stride; | 273 src += stride; |
| 274 } | 274 } |
| 275 #endif | 275 #endif |
| 276 return 1; | 276 return 1; |
| 277 } | 277 } |
| 278 | 278 |
| 279 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP) | 279 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP) |
| 280 { | 280 { |
| 281 #if 1 | 281 #if 1 |
| 282 #if 1 | 282 #if 1 |
| 283 int x; | 283 int x; |
| 284 src+= stride*4; | 284 src+= stride*4; |
| 285 for(x=0; x<BLOCK_SIZE; x+=4) | 285 for(x=0; x<BLOCK_SIZE; x+=4) |
| 286 { | 286 { |
| 287 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0; | 287 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0; |
| 288 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0; | 288 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0; |
| 289 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0; | 289 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0; |
| 290 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0; | 290 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0; |
| 291 } | 291 } |
| 292 #else | 292 #else |
| 293 int x; | 293 int x; |
| 294 src+= stride*3; | 294 src+= stride*3; |
| 295 for(x=0; x<BLOCK_SIZE; x++) | 295 for(x=0; x<BLOCK_SIZE; x++) |
| 296 { | 296 { |
| 297 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0; | 297 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0; |
| 298 } | 298 } |
| 299 #endif | 299 #endif |
| 300 return 1; | 300 return 1; |
| 301 #else | 301 #else |
| 302 int x; | 302 int x; |
| 303 src+= stride*4; | 303 src+= stride*4; |
| 304 for(x=0; x<BLOCK_SIZE; x++) | 304 for(x=0; x<BLOCK_SIZE; x++) |
| 305 { | 305 { |
| 306 int min=255; | 306 int min=255; |
| 307 int max=0; | 307 int max=0; |
| 308 int y; | 308 int y; |
| 309 for(y=0; y<8; y++){ | 309 for(y=0; y<8; y++){ |
| 310 int v= src[x + y*stride]; | 310 int v= src[x + y*stride]; |
| 311 if(v>max) max=v; | 311 if(v>max) max=v; |
| 312 if(v<min) min=v; | 312 if(v<min) min=v; |
| 313 } | 313 } |
| 314 if(max-min > 2*QP) return 0; | 314 if(max-min > 2*QP) return 0; |
| 315 } | 315 } |
| 316 return 1; | 316 return 1; |
| 317 #endif | 317 #endif |
| 318 } | 318 } |
| 319 | 319 |
| 320 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){ | 320 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){ |
| 321 if( isHorizDC_C(src, stride, c) ){ | 321 if( isHorizDC_C(src, stride, c) ){ |
| 322 if( isHorizMinMaxOk_C(src, stride, c->QP) ) | 322 if( isHorizMinMaxOk_C(src, stride, c->QP) ) |
| 323 return 1; | 323 return 1; |
| 324 else | 324 else |
| 325 return 0; | 325 return 0; |
| 326 }else{ | 326 }else{ |
| 327 return 2; | 327 return 2; |
| 328 } | 328 } |
| 329 } | 329 } |
| 330 | 330 |
| 331 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){ | 331 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){ |
| 332 if( isVertDC_C(src, stride, c) ){ | 332 if( isVertDC_C(src, stride, c) ){ |
| 333 if( isVertMinMaxOk_C(src, stride, c->QP) ) | 333 if( isVertMinMaxOk_C(src, stride, c->QP) ) |
| 334 return 1; | 334 return 1; |
| 335 else | 335 else |
| 336 return 0; | 336 return 0; |
| 337 }else{ | 337 }else{ |
| 338 return 2; | 338 return 2; |
| 339 } | 339 } |
| 340 } | 340 } |
| 341 | 341 |
| 342 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c) | 342 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c) |
| 343 { | 343 { |
| 344 int y; | 344 int y; |
| 345 for(y=0; y<BLOCK_SIZE; y++) | 345 for(y=0; y<BLOCK_SIZE; y++) |
| 346 { | 346 { |
| 347 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]); | 347 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]); |
| 348 | 348 |
| 349 if(ABS(middleEnergy) < 8*c->QP) | 349 if(ABS(middleEnergy) < 8*c->QP) |
| 350 { | 350 { |
| 351 const int q=(dst[3] - dst[4])/2; | 351 const int q=(dst[3] - dst[4])/2; |
| 352 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]); | 352 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]); |
| 353 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]); | 353 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]); |
| 354 | 354 |
| 355 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) ); | 355 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) ); |
| 356 d= MAX(d, 0); | 356 d= MAX(d, 0); |
| 357 | 357 |
| 358 d= (5*d + 32) >> 6; | 358 d= (5*d + 32) >> 6; |
| 359 d*= SIGN(-middleEnergy); | 359 d*= SIGN(-middleEnergy); |
| 360 | 360 |
| 361 if(q>0) | 361 if(q>0) |
| 362 { | 362 { |
| 363 d= d<0 ? 0 : d; | 363 d= d<0 ? 0 : d; |
| 364 d= d>q ? q : d; | 364 d= d>q ? q : d; |
| 365 } | 365 } |
| 366 else | 366 else |
| 367 { | 367 { |
| 368 d= d>0 ? 0 : d; | 368 d= d>0 ? 0 : d; |
| 369 d= d<q ? q : d; | 369 d= d<q ? q : d; |
| 370 } | 370 } |
| 371 | 371 |
| 372 dst[3]-= d; | 372 dst[3]-= d; |
| 373 dst[4]+= d; | 373 dst[4]+= d; |
| 374 } | 374 } |
| 375 dst+= stride; | 375 dst+= stride; |
| 376 } | 376 } |
| 377 } | 377 } |
| 378 | 378 |
| 379 /** | 379 /** |
| 380 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block) | 380 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block) |
| 381 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version) | 381 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version) |
| 382 */ | 382 */ |
| 383 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c) | 383 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c) |
| 384 { | 384 { |
| 385 int y; | 385 int y; |
| 386 for(y=0; y<BLOCK_SIZE; y++) | 386 for(y=0; y<BLOCK_SIZE; y++) |
| 387 { | 387 { |
| 388 const int first= ABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0]; | 388 const int first= ABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0]; |
| 389 const int last= ABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7]; | 389 const int last= ABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7]; |
| 390 | 390 |
| 391 int sums[10]; | 391 int sums[10]; |
| 392 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4; | 392 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4; |
| 393 sums[1] = sums[0] - first + dst[3]; | 393 sums[1] = sums[0] - first + dst[3]; |
| 394 sums[2] = sums[1] - first + dst[4]; | 394 sums[2] = sums[1] - first + dst[4]; |
| 395 sums[3] = sums[2] - first + dst[5]; | 395 sums[3] = sums[2] - first + dst[5]; |
| 396 sums[4] = sums[3] - first + dst[6]; | 396 sums[4] = sums[3] - first + dst[6]; |
| 397 sums[5] = sums[4] - dst[0] + dst[7]; | 397 sums[5] = sums[4] - dst[0] + dst[7]; |
| 398 sums[6] = sums[5] - dst[1] + last; | 398 sums[6] = sums[5] - dst[1] + last; |
| 399 sums[7] = sums[6] - dst[2] + last; | 399 sums[7] = sums[6] - dst[2] + last; |
| 400 sums[8] = sums[7] - dst[3] + last; | 400 sums[8] = sums[7] - dst[3] + last; |
| 401 sums[9] = sums[8] - dst[4] + last; | 401 sums[9] = sums[8] - dst[4] + last; |
| 402 | 402 |
| 403 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4; | 403 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4; |
| 404 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4; | 404 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4; |
| 405 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4; | 405 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4; |
| 406 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4; | 406 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4; |
| 407 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4; | 407 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4; |
| 408 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4; | 408 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4; |
| 409 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4; | 409 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4; |
| 410 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4; | 410 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4; |
| 411 | 411 |
| 412 dst+= stride; | 412 dst+= stride; |
| 413 } | 413 } |
| 414 } | 414 } |
| 415 | 415 |
| 416 /** | 416 /** |
| 417 * Experimental Filter 1 (Horizontal) | 417 * Experimental Filter 1 (Horizontal) |
| 418 * will not damage linear gradients | 418 * will not damage linear gradients |
| 421 * MMX2 version does correct clipping C version doesnt | 421 * MMX2 version does correct clipping C version doesnt |
| 422 * not identical with the vertical one | 422 * not identical with the vertical one |
| 423 */ | 423 */ |
| 424 static inline void horizX1Filter(uint8_t *src, int stride, int QP) | 424 static inline void horizX1Filter(uint8_t *src, int stride, int QP) |
| 425 { | 425 { |
| 426 int y; | 426 int y; |
| 427 static uint64_t *lut= NULL; | 427 static uint64_t *lut= NULL; |
| 428 if(lut==NULL) | 428 if(lut==NULL) |
| 429 { | 429 { |
| 430 int i; | 430 int i; |
| 431 lut= (uint64_t*)memalign(8, 256*8); | 431 lut= (uint64_t*)memalign(8, 256*8); |
| 432 for(i=0; i<256; i++) | 432 for(i=0; i<256; i++) |
| 433 { | 433 { |
| 434 int v= i < 128 ? 2*i : 2*(i-256); | 434 int v= i < 128 ? 2*i : 2*(i-256); |
| 435 /* | 435 /* |
| 436 //Simulate 112242211 9-Tap filter | 436 //Simulate 112242211 9-Tap filter |
| 437 uint64_t a= (v/16) & 0xFF; | 437 uint64_t a= (v/16) & 0xFF; |
| 438 uint64_t b= (v/8) & 0xFF; | 438 uint64_t b= (v/8) & 0xFF; |
| 439 uint64_t c= (v/4) & 0xFF; | 439 uint64_t c= (v/4) & 0xFF; |
| 440 uint64_t d= (3*v/8) & 0xFF; | 440 uint64_t d= (3*v/8) & 0xFF; |
| 441 */ | 441 */ |
| 442 //Simulate piecewise linear interpolation | 442 //Simulate piecewise linear interpolation |
| 443 uint64_t a= (v/16) & 0xFF; | 443 uint64_t a= (v/16) & 0xFF; |
| 444 uint64_t b= (v*3/16) & 0xFF; | 444 uint64_t b= (v*3/16) & 0xFF; |
| 445 uint64_t c= (v*5/16) & 0xFF; | 445 uint64_t c= (v*5/16) & 0xFF; |
| 446 uint64_t d= (7*v/16) & 0xFF; | 446 uint64_t d= (7*v/16) & 0xFF; |
| 447 uint64_t A= (0x100 - a)&0xFF; | 447 uint64_t A= (0x100 - a)&0xFF; |
| 448 uint64_t B= (0x100 - b)&0xFF; | 448 uint64_t B= (0x100 - b)&0xFF; |
| 449 uint64_t C= (0x100 - c)&0xFF; | 449 uint64_t C= (0x100 - c)&0xFF; |
| 450 uint64_t D= (0x100 - c)&0xFF; | 450 uint64_t D= (0x100 - c)&0xFF; |
| 451 | 451 |
| 452 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) | | 452 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) | |
| 453 (D<<24) | (C<<16) | (B<<8) | (A); | 453 (D<<24) | (C<<16) | (B<<8) | (A); |
| 454 //lut[i] = (v<<32) | (v<<24); | 454 //lut[i] = (v<<32) | (v<<24); |
| 455 } | 455 } |
| 456 } | 456 } |
| 457 | 457 |
| 458 for(y=0; y<BLOCK_SIZE; y++) | 458 for(y=0; y<BLOCK_SIZE; y++) |
| 459 { | 459 { |
| 460 int a= src[1] - src[2]; | 460 int a= src[1] - src[2]; |
| 461 int b= src[3] - src[4]; | 461 int b= src[3] - src[4]; |
| 462 int c= src[5] - src[6]; | 462 int c= src[5] - src[6]; |
| 463 | 463 |
| 464 int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0); | 464 int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0); |
| 465 | 465 |
| 466 if(d < QP) | 466 if(d < QP) |
| 467 { | 467 { |
| 468 int v = d * SIGN(-b); | 468 int v = d * SIGN(-b); |
| 469 | 469 |
| 470 src[1] +=v/8; | 470 src[1] +=v/8; |
| 471 src[2] +=v/4; | 471 src[2] +=v/4; |
| 472 src[3] +=3*v/8; | 472 src[3] +=3*v/8; |
| 473 src[4] -=3*v/8; | 473 src[4] -=3*v/8; |
| 474 src[5] -=v/4; | 474 src[5] -=v/4; |
| 475 src[6] -=v/8; | 475 src[6] -=v/8; |
| 476 | 476 |
| 477 } | 477 } |
| 478 src+=stride; | 478 src+=stride; |
| 479 } | 479 } |
| 480 } | 480 } |
| 481 | 481 |
| 482 /** | 482 /** |
| 483 * accurate deblock filter | 483 * accurate deblock filter |
| 484 */ | 484 */ |
| 485 static always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){ | 485 static always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){ |
| 486 int y; | 486 int y; |
| 487 const int QP= c->QP; | 487 const int QP= c->QP; |
| 488 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; | 488 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; |
| 489 const int dcThreshold= dcOffset*2 + 1; | 489 const int dcThreshold= dcOffset*2 + 1; |
| 490 //START_TIMER | 490 //START_TIMER |
| 491 src+= step*4; // src points to begin of the 8x8 Block | 491 src+= step*4; // src points to begin of the 8x8 Block |
| 492 for(y=0; y<8; y++){ | 492 for(y=0; y<8; y++){ |
| 493 int numEq= 0; | 493 int numEq= 0; |
| 494 | 494 |
| 495 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++; | 495 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++; |
| 496 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++; | 496 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++; |
| 497 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++; | 497 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++; |
| 498 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++; | 498 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++; |
| 499 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++; | 499 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++; |
| 500 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++; | 500 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++; |
| 501 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++; | 501 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++; |
| 502 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++; | 502 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++; |
| 503 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++; | 503 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++; |
| 504 if(numEq > c->ppMode.flatnessThreshold){ | 504 if(numEq > c->ppMode.flatnessThreshold){ |
| 505 int min, max, x; | 505 int min, max, x; |
| 506 | 506 |
| 507 if(src[0] > src[step]){ | 507 if(src[0] > src[step]){ |
| 508 max= src[0]; | 508 max= src[0]; |
| 509 min= src[step]; | 509 min= src[step]; |
| 510 }else{ | 510 }else{ |
| 511 max= src[step]; | 511 max= src[step]; |
| 512 min= src[0]; | 512 min= src[0]; |
| 513 } | 513 } |
| 514 for(x=2; x<8; x+=2){ | 514 for(x=2; x<8; x+=2){ |
| 515 if(src[x*step] > src[(x+1)*step]){ | 515 if(src[x*step] > src[(x+1)*step]){ |
| 516 if(src[x *step] > max) max= src[ x *step]; | 516 if(src[x *step] > max) max= src[ x *step]; |
| 517 if(src[(x+1)*step] < min) min= src[(x+1)*step]; | 517 if(src[(x+1)*step] < min) min= src[(x+1)*step]; |
| 518 }else{ | 518 }else{ |
| 519 if(src[(x+1)*step] > max) max= src[(x+1)*step]; | 519 if(src[(x+1)*step] > max) max= src[(x+1)*step]; |
| 520 if(src[ x *step] < min) min= src[ x *step]; | 520 if(src[ x *step] < min) min= src[ x *step]; |
| 521 } | 521 } |
| 522 } | 522 } |
| 523 if(max-min < 2*QP){ | 523 if(max-min < 2*QP){ |
| 524 const int first= ABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0]; | 524 const int first= ABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0]; |
| 525 const int last= ABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step]; | 525 const int last= ABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step]; |
| 526 | 526 |
| 527 int sums[10]; | 527 int sums[10]; |
| 528 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4; | 528 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4; |
| 529 sums[1] = sums[0] - first + src[3*step]; | 529 sums[1] = sums[0] - first + src[3*step]; |
| 530 sums[2] = sums[1] - first + src[4*step]; | 530 sums[2] = sums[1] - first + src[4*step]; |
| 531 sums[3] = sums[2] - first + src[5*step]; | 531 sums[3] = sums[2] - first + src[5*step]; |
| 532 sums[4] = sums[3] - first + src[6*step]; | 532 sums[4] = sums[3] - first + src[6*step]; |
| 533 sums[5] = sums[4] - src[0*step] + src[7*step]; | 533 sums[5] = sums[4] - src[0*step] + src[7*step]; |
| 534 sums[6] = sums[5] - src[1*step] + last; | 534 sums[6] = sums[5] - src[1*step] + last; |
| 535 sums[7] = sums[6] - src[2*step] + last; | 535 sums[7] = sums[6] - src[2*step] + last; |
| 536 sums[8] = sums[7] - src[3*step] + last; | 536 sums[8] = sums[7] - src[3*step] + last; |
| 537 sums[9] = sums[8] - src[4*step] + last; | 537 sums[9] = sums[8] - src[4*step] + last; |
| 538 | 538 |
| 539 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4; | 539 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4; |
| 540 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4; | 540 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4; |
| 541 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4; | 541 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4; |
| 542 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4; | 542 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4; |
| 543 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4; | 543 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4; |
| 544 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4; | 544 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4; |
| 545 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4; | 545 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4; |
| 546 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4; | 546 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4; |
| 547 } | 547 } |
| 548 }else{ | 548 }else{ |
| 549 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]); | 549 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]); |
| 550 | 550 |
| 551 if(ABS(middleEnergy) < 8*QP) | 551 if(ABS(middleEnergy) < 8*QP) |
| 552 { | 552 { |
| 553 const int q=(src[3*step] - src[4*step])/2; | 553 const int q=(src[3*step] - src[4*step])/2; |
| 554 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]); | 554 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]); |
| 555 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]); | 555 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]); |
| 556 | 556 |
| 557 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) ); | 557 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) ); |
| 558 d= MAX(d, 0); | 558 d= MAX(d, 0); |
| 559 | 559 |
| 560 d= (5*d + 32) >> 6; | 560 d= (5*d + 32) >> 6; |
| 561 d*= SIGN(-middleEnergy); | 561 d*= SIGN(-middleEnergy); |
| 562 | 562 |
| 563 if(q>0) | 563 if(q>0) |
| 564 { | 564 { |
| 565 d= d<0 ? 0 : d; | 565 d= d<0 ? 0 : d; |
| 566 d= d>q ? q : d; | 566 d= d>q ? q : d; |
| 567 } | 567 } |
| 568 else | 568 else |
| 569 { | 569 { |
| 570 d= d>0 ? 0 : d; | 570 d= d>0 ? 0 : d; |
| 571 d= d<q ? q : d; | 571 d= d<q ? q : d; |
| 572 } | 572 } |
| 573 | 573 |
| 574 src[3*step]-= d; | 574 src[3*step]-= d; |
| 575 src[4*step]+= d; | 575 src[4*step]+= d; |
| 576 } | 576 } |
| 577 } | 577 } |
| 578 | 578 |
| 579 src += stride; | 579 src += stride; |
| 580 } | 580 } |
| 581 /*if(step==16){ | 581 /*if(step==16){ |
| 582 STOP_TIMER("step16") | 582 STOP_TIMER("step16") |
| 583 }else{ | 583 }else{ |
| 584 STOP_TIMER("stepX") | 584 STOP_TIMER("stepX") |
| 585 }*/ | 585 }*/ |
| 666 #endif | 666 #endif |
| 667 | 667 |
| 668 // minor note: the HAVE_xyz is messed up after that line so dont use it | 668 // minor note: the HAVE_xyz is messed up after that line so dont use it |
| 669 | 669 |
| 670 static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, | 670 static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, |
| 671 QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc) | 671 QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc) |
| 672 { | 672 { |
| 673 PPContext *c= (PPContext *)vc; | 673 PPContext *c= (PPContext *)vc; |
| 674 PPMode *ppMode= (PPMode *)vm; | 674 PPMode *ppMode= (PPMode *)vm; |
| 675 c->ppMode= *ppMode; //FIXME | 675 c->ppMode= *ppMode; //FIXME |
| 676 | 676 |
| 677 // useing ifs here as they are faster than function pointers allthough the | 677 // useing ifs here as they are faster than function pointers allthough the |
| 678 // difference wouldnt be messureable here but its much better because | 678 // difference wouldnt be messureable here but its much better because |
| 679 // someone might exchange the cpu whithout restarting mplayer ;) | 679 // someone might exchange the cpu whithout restarting mplayer ;) |
| 680 #ifdef RUNTIME_CPUDETECT | 680 #ifdef RUNTIME_CPUDETECT |
| 681 #if defined(ARCH_X86) || defined(ARCH_X86_64) | 681 #if defined(ARCH_X86) || defined(ARCH_X86_64) |
| 682 // ordered per speed fasterst first | 682 // ordered per speed fasterst first |
| 683 if(c->cpuCaps & PP_CPU_CAPS_MMX2) | 683 if(c->cpuCaps & PP_CPU_CAPS_MMX2) |
| 684 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | 684 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
| 685 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW) | 685 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW) |
| 686 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | 686 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
| 687 else if(c->cpuCaps & PP_CPU_CAPS_MMX) | 687 else if(c->cpuCaps & PP_CPU_CAPS_MMX) |
| 688 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | 688 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
| 689 else | 689 else |
| 690 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | 690 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
| 691 #else | 691 #else |
| 692 #ifdef ARCH_POWERPC | 692 #ifdef ARCH_POWERPC |
| 693 #ifdef HAVE_ALTIVEC | 693 #ifdef HAVE_ALTIVEC |
| 694 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC) | 694 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC) |
| 695 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | 695 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
| 696 else | 696 else |
| 697 #endif | 697 #endif |
| 698 #endif | 698 #endif |
| 699 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | 699 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
| 700 #endif | 700 #endif |
| 701 #else //RUNTIME_CPUDETECT | 701 #else //RUNTIME_CPUDETECT |
| 702 #ifdef HAVE_MMX2 | 702 #ifdef HAVE_MMX2 |
| 703 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | 703 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
| 704 #elif defined (HAVE_3DNOW) | 704 #elif defined (HAVE_3DNOW) |
| 705 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | 705 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
| 706 #elif defined (HAVE_MMX) | 706 #elif defined (HAVE_MMX) |
| 707 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | 707 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
| 708 #elif defined (HAVE_ALTIVEC) | 708 #elif defined (HAVE_ALTIVEC) |
| 709 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | 709 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
| 710 #else | 710 #else |
| 711 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | 711 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
| 712 #endif | 712 #endif |
| 713 #endif //!RUNTIME_CPUDETECT | 713 #endif //!RUNTIME_CPUDETECT |
| 714 } | 714 } |
| 715 | 715 |
| 716 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, | 716 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, |
| 717 // QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode); | 717 // QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode); |
| 718 | 718 |
| 719 /* -pp Command line Help | 719 /* -pp Command line Help |
| 720 */ | 720 */ |
| 721 char *pp_help= | 721 char *pp_help= |
| 722 "Available postprocessing filters:\n" | 722 "Available postprocessing filters:\n" |
| 723 "Filters Options\n" | 723 "Filters Options\n" |
| 724 "short long name short long option Description\n" | 724 "short long name short long option Description\n" |
| 725 "* * a autoq CPU power dependent enabler\n" | 725 "* * a autoq CPU power dependent enabler\n" |
| 726 " c chrom chrominance filtering enabled\n" | 726 " c chrom chrominance filtering enabled\n" |
| 727 " y nochrom chrominance filtering disabled\n" | 727 " y nochrom chrominance filtering disabled\n" |
| 728 " n noluma luma filtering disabled\n" | 728 " n noluma luma filtering disabled\n" |
| 729 "hb hdeblock (2 threshold) horizontal deblocking filter\n" | 729 "hb hdeblock (2 threshold) horizontal deblocking filter\n" |
| 730 " 1. difference factor: default=32, higher -> more deblocking\n" | 730 " 1. difference factor: default=32, higher -> more deblocking\n" |
| 731 " 2. flatness threshold: default=39, lower -> more deblocking\n" | 731 " 2. flatness threshold: default=39, lower -> more deblocking\n" |
| 732 " the h & v deblocking filters share these\n" | 732 " the h & v deblocking filters share these\n" |
| 733 " so you can't set different thresholds for h / v\n" | 733 " so you can't set different thresholds for h / v\n" |
| 734 "vb vdeblock (2 threshold) vertical deblocking filter\n" | 734 "vb vdeblock (2 threshold) vertical deblocking filter\n" |
| 735 "ha hadeblock (2 threshold) horizontal deblocking filter\n" | 735 "ha hadeblock (2 threshold) horizontal deblocking filter\n" |
| 736 "va vadeblock (2 threshold) vertical deblocking filter\n" | 736 "va vadeblock (2 threshold) vertical deblocking filter\n" |
| 737 "h1 x1hdeblock experimental h deblock filter 1\n" | 737 "h1 x1hdeblock experimental h deblock filter 1\n" |
| 738 "v1 x1vdeblock experimental v deblock filter 1\n" | 738 "v1 x1vdeblock experimental v deblock filter 1\n" |
| 739 "dr dering deringing filter\n" | 739 "dr dering deringing filter\n" |
| 740 "al autolevels automatic brightness / contrast\n" | 740 "al autolevels automatic brightness / contrast\n" |
| 741 " f fullyrange stretch luminance to (0..255)\n" | 741 " f fullyrange stretch luminance to (0..255)\n" |
| 742 "lb linblenddeint linear blend deinterlacer\n" | 742 "lb linblenddeint linear blend deinterlacer\n" |
| 743 "li linipoldeint linear interpolating deinterlace\n" | 743 "li linipoldeint linear interpolating deinterlace\n" |
| 744 "ci cubicipoldeint cubic interpolating deinterlacer\n" | 744 "ci cubicipoldeint cubic interpolating deinterlacer\n" |
| 745 "md mediandeint median deinterlacer\n" | 745 "md mediandeint median deinterlacer\n" |
| 746 "fd ffmpegdeint ffmpeg deinterlacer\n" | 746 "fd ffmpegdeint ffmpeg deinterlacer\n" |
| 747 "l5 lowpass5 FIR lowpass deinterlacer\n" | 747 "l5 lowpass5 FIR lowpass deinterlacer\n" |
| 748 "de default hb:a,vb:a,dr:a\n" | 748 "de default hb:a,vb:a,dr:a\n" |
| 749 "fa fast h1:a,v1:a,dr:a\n" | 749 "fa fast h1:a,v1:a,dr:a\n" |
| 750 "ac ha:a:128:7,va:a,dr:a\n" | 750 "ac ha:a:128:7,va:a,dr:a\n" |
| 751 "tn tmpnoise (3 threshold) temporal noise reducer\n" | 751 "tn tmpnoise (3 threshold) temporal noise reducer\n" |
| 752 " 1. <= 2. <= 3. larger -> stronger filtering\n" | 752 " 1. <= 2. <= 3. larger -> stronger filtering\n" |
| 753 "fq forceQuant <quantizer> force quantizer\n" | 753 "fq forceQuant <quantizer> force quantizer\n" |
| 754 "Usage:\n" | 754 "Usage:\n" |
| 755 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n" | 755 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n" |
| 756 "long form example:\n" | 756 "long form example:\n" |
| 757 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n" | 757 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n" |
| 758 "short form example:\n" | 758 "short form example:\n" |
| 759 "vb:a/hb:a/lb de,-vb\n" | 759 "vb:a/hb:a/lb de,-vb\n" |
| 760 "more examples:\n" | 760 "more examples:\n" |
| 761 "tn:64:128:256\n" | 761 "tn:64:128:256\n" |
| 762 ; | 762 ; |
| 763 | 763 |
| 764 pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality) | 764 pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality) |
| 765 { | 765 { |
| 766 char temp[GET_MODE_BUFFER_SIZE]; | 766 char temp[GET_MODE_BUFFER_SIZE]; |
| 767 char *p= temp; | 767 char *p= temp; |
| 768 char *filterDelimiters= ",/"; | 768 char *filterDelimiters= ",/"; |
| 769 char *optionDelimiters= ":"; | 769 char *optionDelimiters= ":"; |
| 770 struct PPMode *ppMode; | 770 struct PPMode *ppMode; |
| 771 char *filterToken; | 771 char *filterToken; |
| 772 | 772 |
| 773 ppMode= memalign(8, sizeof(PPMode)); | 773 ppMode= memalign(8, sizeof(PPMode)); |
| 774 | 774 |
| 775 ppMode->lumMode= 0; | 775 ppMode->lumMode= 0; |
| 776 ppMode->chromMode= 0; | 776 ppMode->chromMode= 0; |
| 777 ppMode->maxTmpNoise[0]= 700; | 777 ppMode->maxTmpNoise[0]= 700; |
| 778 ppMode->maxTmpNoise[1]= 1500; | 778 ppMode->maxTmpNoise[1]= 1500; |
| 779 ppMode->maxTmpNoise[2]= 3000; | 779 ppMode->maxTmpNoise[2]= 3000; |
| 780 ppMode->maxAllowedY= 234; | 780 ppMode->maxAllowedY= 234; |
| 781 ppMode->minAllowedY= 16; | 781 ppMode->minAllowedY= 16; |
| 782 ppMode->baseDcDiff= 256/8; | 782 ppMode->baseDcDiff= 256/8; |
| 783 ppMode->flatnessThreshold= 56-16-1; | 783 ppMode->flatnessThreshold= 56-16-1; |
| 784 ppMode->maxClippedThreshold= 0.01; | 784 ppMode->maxClippedThreshold= 0.01; |
| 785 ppMode->error=0; | 785 ppMode->error=0; |
| 786 | 786 |
| 787 strncpy(temp, name, GET_MODE_BUFFER_SIZE); | 787 strncpy(temp, name, GET_MODE_BUFFER_SIZE); |
| 788 | 788 |
| 789 if(verbose>1) printf("pp: %s\n", name); | 789 if(verbose>1) printf("pp: %s\n", name); |
| 790 | 790 |
| 791 for(;;){ | 791 for(;;){ |
| 792 char *filterName; | 792 char *filterName; |
| 793 int q= 1000000; //PP_QUALITY_MAX; | 793 int q= 1000000; //PP_QUALITY_MAX; |
| 794 int chrom=-1; | 794 int chrom=-1; |
| 795 int luma=-1; | 795 int luma=-1; |
| 796 char *option; | 796 char *option; |
| 797 char *options[OPTIONS_ARRAY_SIZE]; | 797 char *options[OPTIONS_ARRAY_SIZE]; |
| 798 int i; | 798 int i; |
| 799 int filterNameOk=0; | 799 int filterNameOk=0; |
| 800 int numOfUnknownOptions=0; | 800 int numOfUnknownOptions=0; |
| 801 int enable=1; //does the user want us to enabled or disabled the filter | 801 int enable=1; //does the user want us to enabled or disabled the filter |
| 802 | 802 |
| 803 filterToken= strtok(p, filterDelimiters); | 803 filterToken= strtok(p, filterDelimiters); |
| 804 if(filterToken == NULL) break; | 804 if(filterToken == NULL) break; |
| 805 p+= strlen(filterToken) + 1; // p points to next filterToken | 805 p+= strlen(filterToken) + 1; // p points to next filterToken |
| 806 filterName= strtok(filterToken, optionDelimiters); | 806 filterName= strtok(filterToken, optionDelimiters); |
| 807 if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName); | 807 if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName); |
| 808 | 808 |
| 809 if(*filterName == '-') | 809 if(*filterName == '-') |
| 810 { | 810 { |
| 811 enable=0; | 811 enable=0; |
| 812 filterName++; | 812 filterName++; |
| 813 } | 813 } |
| 814 | 814 |
| 815 for(;;){ //for all options | 815 for(;;){ //for all options |
| 816 option= strtok(NULL, optionDelimiters); | 816 option= strtok(NULL, optionDelimiters); |
| 817 if(option == NULL) break; | 817 if(option == NULL) break; |
| 818 | 818 |
| 819 if(verbose>1) printf("pp: option: %s\n", option); | 819 if(verbose>1) printf("pp: option: %s\n", option); |
| 820 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality; | 820 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality; |
| 821 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0; | 821 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0; |
| 822 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1; | 822 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1; |
| 823 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0; | 823 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0; |
| 824 else | 824 else |
| 825 { | 825 { |
| 826 options[numOfUnknownOptions] = option; | 826 options[numOfUnknownOptions] = option; |
| 827 numOfUnknownOptions++; | 827 numOfUnknownOptions++; |
| 828 } | 828 } |
| 829 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break; | 829 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break; |
| 830 } | 830 } |
| 831 options[numOfUnknownOptions] = NULL; | 831 options[numOfUnknownOptions] = NULL; |
| 832 | 832 |
| 833 /* replace stuff from the replace Table */ | 833 /* replace stuff from the replace Table */ |
| 834 for(i=0; replaceTable[2*i]!=NULL; i++) | 834 for(i=0; replaceTable[2*i]!=NULL; i++) |
| 835 { | 835 { |
| 836 if(!strcmp(replaceTable[2*i], filterName)) | 836 if(!strcmp(replaceTable[2*i], filterName)) |
| 837 { | 837 { |
| 838 int newlen= strlen(replaceTable[2*i + 1]); | 838 int newlen= strlen(replaceTable[2*i + 1]); |
| 839 int plen; | 839 int plen; |
| 840 int spaceLeft; | 840 int spaceLeft; |
| 841 | 841 |
| 842 if(p==NULL) p= temp, *p=0; //last filter | 842 if(p==NULL) p= temp, *p=0; //last filter |
| 843 else p--, *p=','; //not last filter | 843 else p--, *p=','; //not last filter |
| 844 | 844 |
| 845 plen= strlen(p); | 845 plen= strlen(p); |
| 846 spaceLeft= p - temp + plen; | 846 spaceLeft= p - temp + plen; |
| 847 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE) | 847 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE) |
| 848 { | 848 { |
| 849 ppMode->error++; | 849 ppMode->error++; |
| 850 break; | 850 break; |
| 851 } | 851 } |
| 852 memmove(p + newlen, p, plen+1); | 852 memmove(p + newlen, p, plen+1); |
| 853 memcpy(p, replaceTable[2*i + 1], newlen); | 853 memcpy(p, replaceTable[2*i + 1], newlen); |
| 854 filterNameOk=1; | 854 filterNameOk=1; |
| 855 } | 855 } |
| 856 } | 856 } |
| 857 | 857 |
| 858 for(i=0; filters[i].shortName!=NULL; i++) | 858 for(i=0; filters[i].shortName!=NULL; i++) |
| 859 { | 859 { |
| 860 // printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName); | 860 // printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName); |
| 861 if( !strcmp(filters[i].longName, filterName) | 861 if( !strcmp(filters[i].longName, filterName) |
| 862 || !strcmp(filters[i].shortName, filterName)) | 862 || !strcmp(filters[i].shortName, filterName)) |
| 863 { | 863 { |
| 864 ppMode->lumMode &= ~filters[i].mask; | 864 ppMode->lumMode &= ~filters[i].mask; |
| 865 ppMode->chromMode &= ~filters[i].mask; | 865 ppMode->chromMode &= ~filters[i].mask; |
| 866 | 866 |
| 867 filterNameOk=1; | 867 filterNameOk=1; |
| 868 if(!enable) break; // user wants to disable it | 868 if(!enable) break; // user wants to disable it |
| 869 | 869 |
| 870 if(q >= filters[i].minLumQuality && luma) | 870 if(q >= filters[i].minLumQuality && luma) |
| 871 ppMode->lumMode|= filters[i].mask; | 871 ppMode->lumMode|= filters[i].mask; |
| 872 if(chrom==1 || (chrom==-1 && filters[i].chromDefault)) | 872 if(chrom==1 || (chrom==-1 && filters[i].chromDefault)) |
| 873 if(q >= filters[i].minChromQuality) | 873 if(q >= filters[i].minChromQuality) |
| 874 ppMode->chromMode|= filters[i].mask; | 874 ppMode->chromMode|= filters[i].mask; |
| 875 | 875 |
| 876 if(filters[i].mask == LEVEL_FIX) | 876 if(filters[i].mask == LEVEL_FIX) |
| 877 { | 877 { |
| 878 int o; | 878 int o; |
| 879 ppMode->minAllowedY= 16; | 879 ppMode->minAllowedY= 16; |
| 880 ppMode->maxAllowedY= 234; | 880 ppMode->maxAllowedY= 234; |
| 881 for(o=0; options[o]!=NULL; o++) | 881 for(o=0; options[o]!=NULL; o++) |
| 882 { | 882 { |
| 883 if( !strcmp(options[o],"fullyrange") | 883 if( !strcmp(options[o],"fullyrange") |
| 884 ||!strcmp(options[o],"f")) | 884 ||!strcmp(options[o],"f")) |
| 885 { | 885 { |
| 886 ppMode->minAllowedY= 0; | 886 ppMode->minAllowedY= 0; |
| 887 ppMode->maxAllowedY= 255; | 887 ppMode->maxAllowedY= 255; |
| 888 numOfUnknownOptions--; | 888 numOfUnknownOptions--; |
| 889 } | 889 } |
| 890 } | 890 } |
| 891 } | 891 } |
| 892 else if(filters[i].mask == TEMP_NOISE_FILTER) | 892 else if(filters[i].mask == TEMP_NOISE_FILTER) |
| 893 { | 893 { |
| 894 int o; | 894 int o; |
| 895 int numOfNoises=0; | 895 int numOfNoises=0; |
| 896 | 896 |
| 897 for(o=0; options[o]!=NULL; o++) | 897 for(o=0; options[o]!=NULL; o++) |
| 898 { | 898 { |
| 899 char *tail; | 899 char *tail; |
| 900 ppMode->maxTmpNoise[numOfNoises]= | 900 ppMode->maxTmpNoise[numOfNoises]= |
| 901 strtol(options[o], &tail, 0); | 901 strtol(options[o], &tail, 0); |
| 902 if(tail!=options[o]) | 902 if(tail!=options[o]) |
| 903 { | 903 { |
| 904 numOfNoises++; | 904 numOfNoises++; |
| 905 numOfUnknownOptions--; | 905 numOfUnknownOptions--; |
| 906 if(numOfNoises >= 3) break; | 906 if(numOfNoises >= 3) break; |
| 907 } | 907 } |
| 908 } | 908 } |
| 909 } | 909 } |
| 910 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK | 910 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK |
| 911 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK) | 911 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK) |
| 912 { | 912 { |
| 913 int o; | 913 int o; |
| 914 | 914 |
| 915 for(o=0; options[o]!=NULL && o<2; o++) | 915 for(o=0; options[o]!=NULL && o<2; o++) |
| 916 { | 916 { |
| 917 char *tail; | 917 char *tail; |
| 918 int val= strtol(options[o], &tail, 0); | 918 int val= strtol(options[o], &tail, 0); |
| 919 if(tail==options[o]) break; | 919 if(tail==options[o]) break; |
| 920 | 920 |
| 921 numOfUnknownOptions--; | 921 numOfUnknownOptions--; |
| 922 if(o==0) ppMode->baseDcDiff= val; | 922 if(o==0) ppMode->baseDcDiff= val; |
| 923 else ppMode->flatnessThreshold= val; | 923 else ppMode->flatnessThreshold= val; |
| 924 } | 924 } |
| 925 } | 925 } |
| 926 else if(filters[i].mask == FORCE_QUANT) | 926 else if(filters[i].mask == FORCE_QUANT) |
| 927 { | 927 { |
| 928 int o; | 928 int o; |
| 929 ppMode->forcedQuant= 15; | 929 ppMode->forcedQuant= 15; |
| 930 | 930 |
| 931 for(o=0; options[o]!=NULL && o<1; o++) | 931 for(o=0; options[o]!=NULL && o<1; o++) |
| 932 { | 932 { |
| 933 char *tail; | 933 char *tail; |
| 934 int val= strtol(options[o], &tail, 0); | 934 int val= strtol(options[o], &tail, 0); |
| 935 if(tail==options[o]) break; | 935 if(tail==options[o]) break; |
| 936 | 936 |
| 937 numOfUnknownOptions--; | 937 numOfUnknownOptions--; |
| 938 ppMode->forcedQuant= val; | 938 ppMode->forcedQuant= val; |
| 939 } | 939 } |
| 940 } | 940 } |
| 941 } | 941 } |
| 942 } | 942 } |
| 943 if(!filterNameOk) ppMode->error++; | 943 if(!filterNameOk) ppMode->error++; |
| 944 ppMode->error += numOfUnknownOptions; | 944 ppMode->error += numOfUnknownOptions; |
| 945 } | 945 } |
| 946 | 946 |
| 947 if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode); | 947 if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode); |
| 948 if(ppMode->error) | 948 if(ppMode->error) |
| 949 { | 949 { |
| 950 fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name); | 950 fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name); |
| 951 free(ppMode); | 951 free(ppMode); |
| 952 return NULL; | 952 return NULL; |
| 953 } | 953 } |
| 954 return ppMode; | 954 return ppMode; |
| 955 } | 955 } |
| 956 | 956 |
| 957 void pp_free_mode(pp_mode_t *mode){ | 957 void pp_free_mode(pp_mode_t *mode){ |
| 958 if(mode) free(mode); | 958 if(mode) free(mode); |
| 959 } | 959 } |
| 960 | 960 |
| 961 static void reallocAlign(void **p, int alignment, int size){ | 961 static void reallocAlign(void **p, int alignment, int size){ |
| 962 if(*p) free(*p); | 962 if(*p) free(*p); |
| 963 *p= memalign(alignment, size); | 963 *p= memalign(alignment, size); |
| 964 memset(*p, 0, size); | 964 memset(*p, 0, size); |
| 965 } | 965 } |
| 966 | 966 |
| 967 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){ | 967 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){ |
| 968 int mbWidth = (width+15)>>4; | 968 int mbWidth = (width+15)>>4; |
| 969 int mbHeight= (height+15)>>4; | 969 int mbHeight= (height+15)>>4; |
| 970 int i; | 970 int i; |
| 971 | 971 |
| 972 c->stride= stride; | 972 c->stride= stride; |
| 973 c->qpStride= qpStride; | 973 c->qpStride= qpStride; |
| 974 | 974 |
| 975 reallocAlign((void **)&c->tempDst, 8, stride*24); | 975 reallocAlign((void **)&c->tempDst, 8, stride*24); |
| 976 reallocAlign((void **)&c->tempSrc, 8, stride*24); | 976 reallocAlign((void **)&c->tempSrc, 8, stride*24); |
| 977 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8); | 977 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8); |
| 978 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t)); | 978 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t)); |
| 979 for(i=0; i<256; i++) | 979 for(i=0; i<256; i++) |
| 980 c->yHistogram[i]= width*height/64*15/256; | 980 c->yHistogram[i]= width*height/64*15/256; |
| 981 | 981 |
| 982 for(i=0; i<3; i++) | 982 for(i=0; i<3; i++) |
| 983 { | 983 { |
| 984 //Note:the +17*1024 is just there so i dont have to worry about r/w over te end | 984 //Note:the +17*1024 is just there so i dont have to worry about r/w over te end |
| 985 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024); | 985 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024); |
| 986 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size | 986 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size |
| 987 } | 987 } |
| 988 | 988 |
| 989 reallocAlign((void **)&c->deintTemp, 8, 2*width+32); | 989 reallocAlign((void **)&c->deintTemp, 8, 2*width+32); |
| 990 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T)); | 990 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T)); |
| 991 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T)); | 991 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T)); |
| 992 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T)); | 992 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T)); |
| 993 } | 993 } |
| 994 | 994 |
| 995 static void global_init(void){ | 995 static void global_init(void){ |
| 996 int i; | 996 int i; |
| 997 memset(clip_table, 0, 256); | 997 memset(clip_table, 0, 256); |
| 998 for(i=256; i<512; i++) | 998 for(i=256; i<512; i++) |
| 999 clip_table[i]= i; | 999 clip_table[i]= i; |
| 1000 memset(clip_table+512, 0, 256); | 1000 memset(clip_table+512, 0, 256); |
| 1001 } | 1001 } |
| 1002 | 1002 |
| 1003 pp_context_t *pp_get_context(int width, int height, int cpuCaps){ | 1003 pp_context_t *pp_get_context(int width, int height, int cpuCaps){ |
| 1004 PPContext *c= memalign(32, sizeof(PPContext)); | 1004 PPContext *c= memalign(32, sizeof(PPContext)); |
| 1005 int stride= (width+15)&(~15); //assumed / will realloc if needed | 1005 int stride= (width+15)&(~15); //assumed / will realloc if needed |
| 1006 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed | 1006 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed |
| 1007 | 1007 |
| 1008 global_init(); | 1008 global_init(); |
| 1009 | 1009 |
| 1010 memset(c, 0, sizeof(PPContext)); | 1010 memset(c, 0, sizeof(PPContext)); |
| 1011 c->cpuCaps= cpuCaps; | 1011 c->cpuCaps= cpuCaps; |
| 1012 if(cpuCaps&PP_FORMAT){ | 1012 if(cpuCaps&PP_FORMAT){ |
| 1013 c->hChromaSubSample= cpuCaps&0x3; | 1013 c->hChromaSubSample= cpuCaps&0x3; |
| 1014 c->vChromaSubSample= (cpuCaps>>4)&0x3; | 1014 c->vChromaSubSample= (cpuCaps>>4)&0x3; |
| 1015 }else{ | 1015 }else{ |
| 1016 c->hChromaSubSample= 1; | 1016 c->hChromaSubSample= 1; |
| 1017 c->vChromaSubSample= 1; | 1017 c->vChromaSubSample= 1; |
| 1018 } | 1018 } |
| 1019 | 1019 |
| 1020 reallocBuffers(c, width, height, stride, qpStride); | 1020 reallocBuffers(c, width, height, stride, qpStride); |
| 1021 | 1021 |
| 1022 c->frameNum=-1; | 1022 c->frameNum=-1; |
| 1023 | 1023 |
| 1024 return c; | 1024 return c; |
| 1025 } | 1025 } |
| 1026 | 1026 |
| 1027 void pp_free_context(void *vc){ | 1027 void pp_free_context(void *vc){ |
| 1028 PPContext *c = (PPContext*)vc; | 1028 PPContext *c = (PPContext*)vc; |
| 1029 int i; | 1029 int i; |
| 1030 | 1030 |
| 1031 for(i=0; i<3; i++) free(c->tempBlured[i]); | 1031 for(i=0; i<3; i++) free(c->tempBlured[i]); |
| 1032 for(i=0; i<3; i++) free(c->tempBluredPast[i]); | 1032 for(i=0; i<3; i++) free(c->tempBluredPast[i]); |
| 1033 | 1033 |
| 1034 free(c->tempBlocks); | 1034 free(c->tempBlocks); |
| 1035 free(c->yHistogram); | 1035 free(c->yHistogram); |
| 1036 free(c->tempDst); | 1036 free(c->tempDst); |
| 1037 free(c->tempSrc); | 1037 free(c->tempSrc); |
| 1038 free(c->deintTemp); | 1038 free(c->deintTemp); |
| 1039 free(c->stdQPTable); | 1039 free(c->stdQPTable); |
| 1040 free(c->nonBQPTable); | 1040 free(c->nonBQPTable); |
| 1041 free(c->forcedQPTable); | 1041 free(c->forcedQPTable); |
| 1042 | 1042 |
| 1043 memset(c, 0, sizeof(PPContext)); | 1043 memset(c, 0, sizeof(PPContext)); |
| 1044 | 1044 |
| 1045 free(c); | 1045 free(c); |
| 1046 } | 1046 } |
| 1047 | 1047 |
| 1048 void pp_postprocess(uint8_t * src[3], int srcStride[3], | 1048 void pp_postprocess(uint8_t * src[3], int srcStride[3], |
| 1049 uint8_t * dst[3], int dstStride[3], | 1049 uint8_t * dst[3], int dstStride[3], |
| 1050 int width, int height, | 1050 int width, int height, |
| 1051 QP_STORE_T *QP_store, int QPStride, | 1051 QP_STORE_T *QP_store, int QPStride, |
| 1052 pp_mode_t *vm, void *vc, int pict_type) | 1052 pp_mode_t *vm, void *vc, int pict_type) |
| 1053 { | 1053 { |
| 1054 int mbWidth = (width+15)>>4; | 1054 int mbWidth = (width+15)>>4; |
| 1055 int mbHeight= (height+15)>>4; | 1055 int mbHeight= (height+15)>>4; |
| 1056 PPMode *mode = (PPMode*)vm; | 1056 PPMode *mode = (PPMode*)vm; |
| 1057 PPContext *c = (PPContext*)vc; | 1057 PPContext *c = (PPContext*)vc; |
| 1058 int minStride= MAX(ABS(srcStride[0]), ABS(dstStride[0])); | 1058 int minStride= MAX(ABS(srcStride[0]), ABS(dstStride[0])); |
| 1059 int absQPStride = ABS(QPStride); | 1059 int absQPStride = ABS(QPStride); |
| 1060 | 1060 |
| 1061 // c->stride and c->QPStride are always positive | 1061 // c->stride and c->QPStride are always positive |
| 1062 if(c->stride < minStride || c->qpStride < absQPStride) | 1062 if(c->stride < minStride || c->qpStride < absQPStride) |
| 1063 reallocBuffers(c, width, height, | 1063 reallocBuffers(c, width, height, |
| 1064 MAX(minStride, c->stride), | 1064 MAX(minStride, c->stride), |
| 1065 MAX(c->qpStride, absQPStride)); | 1065 MAX(c->qpStride, absQPStride)); |
| 1066 | 1066 |
| 1067 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)) | 1067 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)) |
| 1068 { | 1068 { |
| 1069 int i; | 1069 int i; |
| 1070 QP_store= c->forcedQPTable; | 1070 QP_store= c->forcedQPTable; |
| 1071 absQPStride = QPStride = 0; | 1071 absQPStride = QPStride = 0; |
| 1072 if(mode->lumMode & FORCE_QUANT) | 1072 if(mode->lumMode & FORCE_QUANT) |
| 1073 for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant; | 1073 for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant; |
| 1074 else | 1074 else |
| 1075 for(i=0; i<mbWidth; i++) QP_store[i]= 1; | 1075 for(i=0; i<mbWidth; i++) QP_store[i]= 1; |
| 1076 } | 1076 } |
| 1077 //printf("pict_type:%d\n", pict_type); | 1077 //printf("pict_type:%d\n", pict_type); |
| 1078 | 1078 |
| 1079 if(pict_type & PP_PICT_TYPE_QP2){ | 1079 if(pict_type & PP_PICT_TYPE_QP2){ |
| 1080 int i; | 1080 int i; |
| 1081 const int count= mbHeight * absQPStride; | 1081 const int count= mbHeight * absQPStride; |
| 1082 for(i=0; i<(count>>2); i++){ | 1082 for(i=0; i<(count>>2); i++){ |
| 1083 ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F; | 1083 ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F; |
| 1084 } | 1084 } |
| 1085 for(i<<=2; i<count; i++){ | 1085 for(i<<=2; i<count; i++){ |
| 1086 c->stdQPTable[i] = QP_store[i]>>1; | 1086 c->stdQPTable[i] = QP_store[i]>>1; |
| 1087 } | 1087 } |
| 1088 QP_store= c->stdQPTable; | 1088 QP_store= c->stdQPTable; |
| 1089 QPStride= absQPStride; | 1089 QPStride= absQPStride; |
| 1090 } | 1090 } |
| 1091 | 1091 |
| 1092 if(0){ | 1092 if(0){ |
| 1093 int x,y; | 1093 int x,y; |
| 1094 for(y=0; y<mbHeight; y++){ | 1094 for(y=0; y<mbHeight; y++){ |
| 1095 for(x=0; x<mbWidth; x++){ | 1095 for(x=0; x<mbWidth; x++){ |
| 1096 printf("%2d ", QP_store[x + y*QPStride]); | 1096 printf("%2d ", QP_store[x + y*QPStride]); |
| 1097 } | 1097 } |
| 1098 printf("\n"); | 1098 printf("\n"); |
| 1099 } | 1099 } |
| 1100 printf("\n"); | 1100 printf("\n"); |
| 1101 } | 1101 } |
| 1102 | 1102 |
| 1103 if((pict_type&7)!=3) | 1103 if((pict_type&7)!=3) |
| 1104 { | 1104 { |
| 1105 if (QPStride >= 0) { | 1105 if (QPStride >= 0) { |
| 1106 int i; | 1106 int i; |
| 1107 const int count= mbHeight * QPStride; | 1107 const int count= mbHeight * QPStride; |
| 1108 for(i=0; i<(count>>2); i++){ | 1108 for(i=0; i<(count>>2); i++){ |
| 1109 ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F; | 1109 ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F; |
| 1110 } | 1110 } |
| 1111 for(i<<=2; i<count; i++){ | 1111 for(i<<=2; i<count; i++){ |
| 1112 c->nonBQPTable[i] = QP_store[i] & 0x3F; | 1112 c->nonBQPTable[i] = QP_store[i] & 0x3F; |
| 1113 } | 1113 } |
| 1114 } else { | 1114 } else { |
| 1115 int i,j; | 1115 int i,j; |
| 1116 for(i=0; i<mbHeight; i++) { | 1116 for(i=0; i<mbHeight; i++) { |
| 1117 for(j=0; j<absQPStride; j++) { | 1117 for(j=0; j<absQPStride; j++) { |
| 1118 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F; | 1118 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F; |
| 1119 } | 1119 } |
| 1120 } | 1120 } |
| 1121 } | 1121 } |
| 1122 } | 1122 } |
| 1123 | 1123 |
| 1124 if(verbose>2) | 1124 if(verbose>2) |
| 1125 { | 1125 { |
| 1126 printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode); | 1126 printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode); |
| 1127 } | 1127 } |
| 1128 | 1128 |
| 1129 postProcess(src[0], srcStride[0], dst[0], dstStride[0], | 1129 postProcess(src[0], srcStride[0], dst[0], dstStride[0], |
| 1130 width, height, QP_store, QPStride, 0, mode, c); | 1130 width, height, QP_store, QPStride, 0, mode, c); |
| 1131 | 1131 |
| 1132 width = (width )>>c->hChromaSubSample; | 1132 width = (width )>>c->hChromaSubSample; |
| 1133 height = (height)>>c->vChromaSubSample; | 1133 height = (height)>>c->vChromaSubSample; |
| 1134 | 1134 |
| 1135 if(mode->chromMode) | 1135 if(mode->chromMode) |
| 1136 { | 1136 { |
| 1137 postProcess(src[1], srcStride[1], dst[1], dstStride[1], | 1137 postProcess(src[1], srcStride[1], dst[1], dstStride[1], |
| 1138 width, height, QP_store, QPStride, 1, mode, c); | 1138 width, height, QP_store, QPStride, 1, mode, c); |
| 1139 postProcess(src[2], srcStride[2], dst[2], dstStride[2], | 1139 postProcess(src[2], srcStride[2], dst[2], dstStride[2], |
| 1140 width, height, QP_store, QPStride, 2, mode, c); | 1140 width, height, QP_store, QPStride, 2, mode, c); |
| 1141 } | 1141 } |
| 1142 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]) | 1142 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]) |
| 1143 { | 1143 { |
| 1144 linecpy(dst[1], src[1], height, srcStride[1]); | 1144 linecpy(dst[1], src[1], height, srcStride[1]); |
| 1145 linecpy(dst[2], src[2], height, srcStride[2]); | 1145 linecpy(dst[2], src[2], height, srcStride[2]); |
| 1146 } | 1146 } |
| 1147 else | 1147 else |
| 1148 { | 1148 { |
| 1149 int y; | 1149 int y; |
| 1150 for(y=0; y<height; y++) | 1150 for(y=0; y<height; y++) |
| 1151 { | 1151 { |
| 1152 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width); | 1152 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width); |
| 1153 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width); | 1153 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width); |
| 1154 } | 1154 } |
| 1155 } | 1155 } |
| 1156 } | 1156 } |
| 1157 | 1157 |
