Mercurial > libavcodec.hg
annotate libpostproc/postprocess.c @ 1757:3906ddbaffec libavcodec
optimization & bugfix extracted from the 4k line diff between ffmpeg 0.4.7 and http://www.alicestreet.com/ffh263.html
the other parts of the diff where
1. spelling fixes (rejected as only a small part of it could be applied automatically)
2. cosmetics (reindention, function reordering, var renaming, ...) with bugs (rejected)
3. rtp related stuff (rejetced as it breaks several codecs)
4. some changes to the intra/inter decission & scene change detection (quality tests needed first)
| author | michael |
|---|---|
| date | Sat, 24 Jan 2004 23:47:33 +0000 |
| parents | ea5200a9f730 |
| children | 3875b8b30399 |
| rev | line source |
|---|---|
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
1 /* |
| 1067 | 2 Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at) |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
3 |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
4 This program is free software; you can redistribute it and/or modify |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
5 it under the terms of the GNU General Public License as published by |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
6 the Free Software Foundation; either version 2 of the License, or |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
7 (at your option) any later version. |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
8 |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
9 This program is distributed in the hope that it will be useful, |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
10 but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
12 GNU General Public License for more details. |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
13 |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
14 You should have received a copy of the GNU General Public License |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
15 along with this program; if not, write to the Free Software |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
17 */ |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
18 |
| 1109 | 19 /** |
| 20 * @file postprocess.c | |
| 21 * postprocessing. | |
| 22 */ | |
| 23 | |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
24 /* |
|
106
389391a6d0bf
rewrote the horizontal lowpass filter to fix a bug which caused a blocky look
michael
parents:
105
diff
changeset
|
25 C MMX MMX2 3DNow |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
26 isVertDC Ec Ec |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
27 isVertMinMaxOk Ec Ec |
|
106
389391a6d0bf
rewrote the horizontal lowpass filter to fix a bug which caused a blocky look
michael
parents:
105
diff
changeset
|
28 doVertLowPass E e e |
| 163 | 29 doVertDefFilter Ec Ec e e |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
30 isHorizDC Ec Ec |
|
128
e5266b8e79be
much better horizontal filters (transpose & use the vertical ones) :)
michael
parents:
126
diff
changeset
|
31 isHorizMinMaxOk a E |
|
e5266b8e79be
much better horizontal filters (transpose & use the vertical ones) :)
michael
parents:
126
diff
changeset
|
32 doHorizLowPass E e e |
| 163 | 33 doHorizDefFilter Ec Ec e e |
| 134 | 34 deRing E e e* |
|
106
389391a6d0bf
rewrote the horizontal lowpass filter to fix a bug which caused a blocky look
michael
parents:
105
diff
changeset
|
35 Vertical RKAlgo1 E a a |
|
129
be35346e27c1
fixed difference with -vo md5 between doVertDefFilter() C and MMX / MMX2 versions
michael
parents:
128
diff
changeset
|
36 Horizontal RKAlgo1 a a |
| 156 | 37 Vertical X1# a E E |
| 38 Horizontal X1# a E E | |
| 111 | 39 LinIpolDeinterlace e E E* |
| 40 CubicIpolDeinterlace a e e* | |
| 41 LinBlendDeinterlace e E E* | |
|
1029
804cc05a3f61
C implementation of the median deinterlacer (seems to be the only one
rfelker
parents:
957
diff
changeset
|
42 MedianDeinterlace# E Ec Ec |
|
157
bc12fd7e6153
temp denoiser changes: (a-b)^2 instead of |a-b| and MMX2/3DNOW version
michael
parents:
156
diff
changeset
|
43 TempDeNoiser# E e e |
| 156 | 44 |
| 45 * i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work | |
| 46 # more or less selfinvented filters so the exactness isnt too meaningfull | |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
47 E = Exact implementation |
| 111 | 48 e = allmost exact implementation (slightly different rounding,...) |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
49 a = alternative / approximate impl |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
50 c = checked against the other implementations (-vo md5) |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
51 */ |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
52 |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
53 /* |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
54 TODO: |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
55 reduce the time wasted on the mem transfer |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
56 unroll stuff if instructions depend too much on the prior one |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
57 move YScale thing to the end instead of fixing QP |
| 96 | 58 write a faster and higher quality deblocking filter :) |
|
97
e57b1d38d71f
bugfixes: last 3 lines not brightness/contrast corrected
michael
parents:
96
diff
changeset
|
59 make the mainloop more flexible (variable number of blocks at once |
|
e57b1d38d71f
bugfixes: last 3 lines not brightness/contrast corrected
michael
parents:
96
diff
changeset
|
60 (the if/else stuff per block is slowing things down) |
| 99 | 61 compare the quality & speed of all filters |
| 62 split this huge file | |
| 140 | 63 optimize c versions |
| 156 | 64 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
65 ... |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
66 */ |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
67 |
| 107 | 68 //Changelog: use the CVS log |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
69 |
| 1067 | 70 #include "config.h" |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
71 #include <inttypes.h> |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
72 #include <stdio.h> |
|
97
e57b1d38d71f
bugfixes: last 3 lines not brightness/contrast corrected
michael
parents:
96
diff
changeset
|
73 #include <stdlib.h> |
| 116 | 74 #include <string.h> |
| 133 | 75 #ifdef HAVE_MALLOC_H |
| 76 #include <malloc.h> | |
| 77 #endif | |
| 96 | 78 //#undef HAVE_MMX2 |
| 79 //#define HAVE_3DNOW | |
| 80 //#undef HAVE_MMX | |
| 169 | 81 //#undef ARCH_X86 |
| 163 | 82 //#define DEBUG_BRIGHTNESS |
| 1069 | 83 #ifdef USE_FASTMEMCPY |
| 1223 | 84 #include "../fastmemcpy.h" |
| 837 | 85 #endif |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
86 #include "postprocess.h" |
| 829 | 87 #include "postprocess_internal.h" |
| 1069 | 88 |
| 89 #include "mangle.h" //FIXME should be supressed | |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
90 |
| 1071 | 91 #ifndef HAVE_MEMALIGN |
| 92 #define memalign(a,b) malloc(b) | |
| 93 #endif | |
| 94 | |
| 104 | 95 #define MIN(a,b) ((a) > (b) ? (b) : (a)) |
| 96 #define MAX(a,b) ((a) < (b) ? (b) : (a)) | |
| 97 #define ABS(a) ((a) > 0 ? (a) : (-(a))) | |
| 98 #define SIGN(a) ((a) > 0 ? 1 : -1) | |
| 99 | |
| 116 | 100 #define GET_MODE_BUFFER_SIZE 500 |
| 101 #define OPTIONS_ARRAY_SIZE 10 | |
| 787 | 102 #define BLOCK_SIZE 8 |
| 103 #define TEMP_STRIDE 8 | |
| 104 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet | |
| 116 | 105 |
| 169 | 106 #ifdef ARCH_X86 |
|
148
1cfc4d567c0a
minor changes (fixed some warnings, added attribute aligned(8) stuff)
michael
parents:
142
diff
changeset
|
107 static uint64_t __attribute__((aligned(8))) w05= 0x0005000500050005LL; |
|
1cfc4d567c0a
minor changes (fixed some warnings, added attribute aligned(8) stuff)
michael
parents:
142
diff
changeset
|
108 static uint64_t __attribute__((aligned(8))) w20= 0x0020002000200020LL; |
|
1cfc4d567c0a
minor changes (fixed some warnings, added attribute aligned(8) stuff)
michael
parents:
142
diff
changeset
|
109 static uint64_t __attribute__((aligned(8))) b00= 0x0000000000000000LL; |
|
1cfc4d567c0a
minor changes (fixed some warnings, added attribute aligned(8) stuff)
michael
parents:
142
diff
changeset
|
110 static uint64_t __attribute__((aligned(8))) b01= 0x0101010101010101LL; |
|
1cfc4d567c0a
minor changes (fixed some warnings, added attribute aligned(8) stuff)
michael
parents:
142
diff
changeset
|
111 static uint64_t __attribute__((aligned(8))) b02= 0x0202020202020202LL; |
|
1cfc4d567c0a
minor changes (fixed some warnings, added attribute aligned(8) stuff)
michael
parents:
142
diff
changeset
|
112 static uint64_t __attribute__((aligned(8))) b08= 0x0808080808080808LL; |
|
1cfc4d567c0a
minor changes (fixed some warnings, added attribute aligned(8) stuff)
michael
parents:
142
diff
changeset
|
113 static uint64_t __attribute__((aligned(8))) b80= 0x8080808080808080LL; |
|
1cfc4d567c0a
minor changes (fixed some warnings, added attribute aligned(8) stuff)
michael
parents:
142
diff
changeset
|
114 #endif |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
115 |
| 1157 | 116 |
| 117 static uint8_t clip_table[3*256]; | |
| 118 static uint8_t * const clip_tab= clip_table + 256; | |
| 119 | |
| 793 | 120 static int verbose= 0; |
| 179 | 121 |
| 793 | 122 static const int deringThreshold= 20; |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
123 |
| 787 | 124 |
| 116 | 125 static struct PPFilter filters[]= |
| 126 { | |
| 127 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK}, | |
| 128 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK}, | |
| 787 | 129 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER}, |
| 130 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/ | |
| 116 | 131 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER}, |
| 132 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER}, | |
| 133 {"dr", "dering", 1, 5, 6, DERING}, | |
| 134 {"al", "autolevels", 0, 1, 2, LEVEL_FIX}, | |
| 181 | 135 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER}, |
| 136 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER}, | |
| 137 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER}, | |
| 138 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER}, | |
| 787 | 139 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER}, |
| 1157 | 140 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER}, |
| 156 | 141 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER}, |
| 183 | 142 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT}, |
| 116 | 143 {NULL, NULL,0,0,0,0} //End Marker |
| 144 }; | |
| 145 | |
| 146 static char *replaceTable[]= | |
| 147 { | |
| 156 | 148 "default", "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400", |
| 149 "de", "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400", | |
| 150 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400", | |
| 151 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400", | |
| 116 | 152 NULL //End Marker |
| 153 }; | |
| 154 | |
| 787 | 155 #ifdef ARCH_X86 |
|
129
be35346e27c1
fixed difference with -vo md5 between doVertDefFilter() C and MMX / MMX2 versions
michael
parents:
128
diff
changeset
|
156 static inline void unusedVariableWarningFixer() |
|
be35346e27c1
fixed difference with -vo md5 between doVertDefFilter() C and MMX / MMX2 versions
michael
parents:
128
diff
changeset
|
157 { |
| 787 | 158 if(w05 + w20 + b00 + b01 + b02 + b08 + b80 == 0) b00=0; |
|
129
be35346e27c1
fixed difference with -vo md5 between doVertDefFilter() C and MMX / MMX2 versions
michael
parents:
128
diff
changeset
|
159 } |
|
148
1cfc4d567c0a
minor changes (fixed some warnings, added attribute aligned(8) stuff)
michael
parents:
142
diff
changeset
|
160 #endif |
|
129
be35346e27c1
fixed difference with -vo md5 between doVertDefFilter() C and MMX / MMX2 versions
michael
parents:
128
diff
changeset
|
161 |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
162 |
| 787 | 163 #ifdef ARCH_X86 |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
164 static inline void prefetchnta(void *p) |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
165 { |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
166 asm volatile( "prefetchnta (%0)\n\t" |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
167 : : "r" (p) |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
168 ); |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
169 } |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
170 |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
171 static inline void prefetcht0(void *p) |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
172 { |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
173 asm volatile( "prefetcht0 (%0)\n\t" |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
174 : : "r" (p) |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
175 ); |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
176 } |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
177 |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
178 static inline void prefetcht1(void *p) |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
179 { |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
180 asm volatile( "prefetcht1 (%0)\n\t" |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
181 : : "r" (p) |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
182 ); |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
183 } |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
184 |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
185 static inline void prefetcht2(void *p) |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
186 { |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
187 asm volatile( "prefetcht2 (%0)\n\t" |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
188 : : "r" (p) |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
189 ); |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
190 } |
| 102 | 191 #endif |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
192 |
| 169 | 193 // The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
194 |
| 165 | 195 /** |
| 196 * Check if the given 8x8 Block is mostly "flat" | |
| 197 */ | |
| 787 | 198 static inline int isHorizDC(uint8_t src[], int stride, PPContext *c) |
| 165 | 199 { |
| 200 int numEq= 0; | |
| 201 int y; | |
| 1196 | 202 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; |
| 791 | 203 const int dcThreshold= dcOffset*2 + 1; |
| 1196 | 204 |
| 165 | 205 for(y=0; y<BLOCK_SIZE; y++) |
| 206 { | |
| 787 | 207 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++; |
| 208 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++; | |
| 209 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++; | |
| 210 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++; | |
| 211 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++; | |
| 212 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++; | |
| 213 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++; | |
| 165 | 214 src+= stride; |
| 215 } | |
| 787 | 216 return numEq > c->ppMode.flatnessThreshold; |
| 217 } | |
| 218 | |
| 219 /** | |
| 220 * Check if the middle 8x8 Block in the given 8x16 block is flat | |
| 221 */ | |
| 222 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){ | |
| 223 int numEq= 0; | |
| 224 int y; | |
| 1196 | 225 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; |
| 791 | 226 const int dcThreshold= dcOffset*2 + 1; |
| 1196 | 227 |
| 787 | 228 src+= stride*4; // src points to begin of the 8x8 Block |
| 229 for(y=0; y<BLOCK_SIZE-1; y++) | |
| 230 { | |
| 231 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++; | |
| 232 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++; | |
| 233 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++; | |
| 234 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++; | |
| 235 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++; | |
| 236 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++; | |
| 237 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++; | |
| 238 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++; | |
| 239 src+= stride; | |
| 240 } | |
| 241 return numEq > c->ppMode.flatnessThreshold; | |
| 165 | 242 } |
| 243 | |
| 244 static inline int isHorizMinMaxOk(uint8_t src[], int stride, int QP) | |
| 245 { | |
|
1327
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
246 int i; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
247 #if 1 |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
248 for(i=0; i<2; i++){ |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
249 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
250 src += stride; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
251 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
252 src += stride; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
253 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
254 src += stride; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
255 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
256 src += stride; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
257 } |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
258 #else |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
259 for(i=0; i<8; i++){ |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
260 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
261 src += stride; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
262 } |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
263 #endif |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
264 return 1; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
265 } |
| 165 | 266 |
|
1327
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
267 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP) |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
268 { |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
269 #if 1 |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
270 #if 1 |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
271 int x; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
272 src+= stride*4; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
273 for(x=0; x<BLOCK_SIZE; x+=4) |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
274 { |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
275 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
276 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
277 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
278 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
279 } |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
280 #else |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
281 int x; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
282 src+= stride*3; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
283 for(x=0; x<BLOCK_SIZE; x++) |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
284 { |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
285 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
286 } |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
287 #endif |
| 165 | 288 return 1; |
|
1327
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
289 #else |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
290 int x; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
291 src+= stride*4; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
292 for(x=0; x<BLOCK_SIZE; x++) |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
293 { |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
294 int min=255; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
295 int max=0; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
296 int y; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
297 for(y=0; y<8; y++){ |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
298 int v= src[x + y*stride]; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
299 if(v>max) max=v; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
300 if(v<min) min=v; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
301 } |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
302 if(max-min > 2*QP) return 0; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
303 } |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
304 return 1; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
305 #endif |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
306 } |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
307 |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
308 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){ |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
309 if( isVertDC_C(src, stride, c) ){ |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
310 if( isVertMinMaxOk_C(src, stride, c->QP) ) |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
311 return 1; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
312 else |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
313 return 0; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
314 }else{ |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
315 return 2; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
316 } |
| 165 | 317 } |
| 318 | |
| 319 static inline void doHorizDefFilter(uint8_t dst[], int stride, int QP) | |
| 320 { | |
| 321 int y; | |
| 322 for(y=0; y<BLOCK_SIZE; y++) | |
| 323 { | |
| 324 const int middleEnergy= 5*(dst[4] - dst[5]) + 2*(dst[2] - dst[5]); | |
| 325 | |
| 326 if(ABS(middleEnergy) < 8*QP) | |
| 327 { | |
| 328 const int q=(dst[3] - dst[4])/2; | |
| 329 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]); | |
| 330 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]); | |
| 331 | |
| 332 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) ); | |
| 333 d= MAX(d, 0); | |
| 334 | |
| 335 d= (5*d + 32) >> 6; | |
| 336 d*= SIGN(-middleEnergy); | |
| 337 | |
| 338 if(q>0) | |
| 339 { | |
| 340 d= d<0 ? 0 : d; | |
| 341 d= d>q ? q : d; | |
| 342 } | |
| 343 else | |
| 344 { | |
| 345 d= d>0 ? 0 : d; | |
| 346 d= d<q ? q : d; | |
| 347 } | |
| 348 | |
| 349 dst[3]-= d; | |
| 350 dst[4]+= d; | |
| 351 } | |
| 352 dst+= stride; | |
| 353 } | |
| 354 } | |
| 355 | |
| 356 /** | |
| 357 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block) | |
| 358 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version) | |
| 359 */ | |
| 360 static inline void doHorizLowPass(uint8_t dst[], int stride, int QP) | |
| 361 { | |
| 362 | |
| 363 int y; | |
| 364 for(y=0; y<BLOCK_SIZE; y++) | |
| 365 { | |
| 366 const int first= ABS(dst[-1] - dst[0]) < QP ? dst[-1] : dst[0]; | |
| 367 const int last= ABS(dst[8] - dst[7]) < QP ? dst[8] : dst[7]; | |
| 368 | |
| 369 int sums[9]; | |
| 370 sums[0] = first + dst[0]; | |
| 371 sums[1] = dst[0] + dst[1]; | |
| 372 sums[2] = dst[1] + dst[2]; | |
| 373 sums[3] = dst[2] + dst[3]; | |
| 374 sums[4] = dst[3] + dst[4]; | |
| 375 sums[5] = dst[4] + dst[5]; | |
| 376 sums[6] = dst[5] + dst[6]; | |
| 377 sums[7] = dst[6] + dst[7]; | |
| 378 sums[8] = dst[7] + last; | |
| 379 | |
| 380 dst[0]= ((sums[0]<<2) + ((first + sums[2])<<1) + sums[4] + 8)>>4; | |
| 381 dst[1]= ((dst[1]<<2) + ((first + sums[0] + sums[3])<<1) + sums[5] + 8)>>4; | |
| 382 dst[2]= ((dst[2]<<2) + ((first + sums[1] + sums[4])<<1) + sums[6] + 8)>>4; | |
| 383 dst[3]= ((dst[3]<<2) + ((sums[2] + sums[5])<<1) + sums[0] + sums[7] + 8)>>4; | |
| 384 dst[4]= ((dst[4]<<2) + ((sums[3] + sums[6])<<1) + sums[1] + sums[8] + 8)>>4; | |
| 385 dst[5]= ((dst[5]<<2) + ((last + sums[7] + sums[4])<<1) + sums[2] + 8)>>4; | |
| 386 dst[6]= (((last + dst[6])<<2) + ((dst[7] + sums[5])<<1) + sums[3] + 8)>>4; | |
| 387 dst[7]= ((sums[8]<<2) + ((last + sums[6])<<1) + sums[4] + 8)>>4; | |
| 388 | |
| 389 dst+= stride; | |
| 390 } | |
| 391 } | |
| 392 | |
| 169 | 393 /** |
| 394 * Experimental Filter 1 (Horizontal) | |
| 395 * will not damage linear gradients | |
| 396 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter | |
| 397 * can only smooth blocks at the expected locations (it cant smooth them if they did move) | |
| 398 * MMX2 version does correct clipping C version doesnt | |
| 399 * not identical with the vertical one | |
| 400 */ | |
| 401 static inline void horizX1Filter(uint8_t *src, int stride, int QP) | |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
402 { |
| 169 | 403 int y; |
| 404 static uint64_t *lut= NULL; | |
| 405 if(lut==NULL) | |
| 406 { | |
| 407 int i; | |
| 408 lut= (uint64_t*)memalign(8, 256*8); | |
| 409 for(i=0; i<256; i++) | |
| 410 { | |
| 411 int v= i < 128 ? 2*i : 2*(i-256); | |
| 412 /* | |
| 413 //Simulate 112242211 9-Tap filter | |
| 414 uint64_t a= (v/16) & 0xFF; | |
| 415 uint64_t b= (v/8) & 0xFF; | |
| 416 uint64_t c= (v/4) & 0xFF; | |
| 417 uint64_t d= (3*v/8) & 0xFF; | |
| 418 */ | |
| 419 //Simulate piecewise linear interpolation | |
| 420 uint64_t a= (v/16) & 0xFF; | |
| 421 uint64_t b= (v*3/16) & 0xFF; | |
| 422 uint64_t c= (v*5/16) & 0xFF; | |
| 423 uint64_t d= (7*v/16) & 0xFF; | |
| 424 uint64_t A= (0x100 - a)&0xFF; | |
| 425 uint64_t B= (0x100 - b)&0xFF; | |
| 426 uint64_t C= (0x100 - c)&0xFF; | |
| 427 uint64_t D= (0x100 - c)&0xFF; | |
| 130 | 428 |
| 169 | 429 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) | |
| 430 (D<<24) | (C<<16) | (B<<8) | (A); | |
| 431 //lut[i] = (v<<32) | (v<<24); | |
| 134 | 432 } |
| 433 } | |
| 434 | |
| 169 | 435 for(y=0; y<BLOCK_SIZE; y++) |
| 134 | 436 { |
| 169 | 437 int a= src[1] - src[2]; |
| 438 int b= src[3] - src[4]; | |
| 439 int c= src[5] - src[6]; | |
| 134 | 440 |
| 169 | 441 int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0); |
| 167 | 442 |
| 169 | 443 if(d < QP) |
| 167 | 444 { |
| 169 | 445 int v = d * SIGN(-b); |
|
106
389391a6d0bf
rewrote the horizontal lowpass filter to fix a bug which caused a blocky look
michael
parents:
105
diff
changeset
|
446 |
| 169 | 447 src[1] +=v/8; |
| 448 src[2] +=v/4; | |
| 449 src[3] +=3*v/8; | |
| 450 src[4] -=3*v/8; | |
| 451 src[5] -=v/4; | |
| 452 src[6] -=v/8; | |
|
106
389391a6d0bf
rewrote the horizontal lowpass filter to fix a bug which caused a blocky look
michael
parents:
105
diff
changeset
|
453 |
| 169 | 454 } |
| 455 src+=stride; | |
|
106
389391a6d0bf
rewrote the horizontal lowpass filter to fix a bug which caused a blocky look
michael
parents:
105
diff
changeset
|
456 } |
|
389391a6d0bf
rewrote the horizontal lowpass filter to fix a bug which caused a blocky look
michael
parents:
105
diff
changeset
|
457 } |
|
389391a6d0bf
rewrote the horizontal lowpass filter to fix a bug which caused a blocky look
michael
parents:
105
diff
changeset
|
458 |
|
389391a6d0bf
rewrote the horizontal lowpass filter to fix a bug which caused a blocky look
michael
parents:
105
diff
changeset
|
459 |
| 171 | 460 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one |
| 169 | 461 //Plain C versions |
| 171 | 462 #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT) |
| 463 #define COMPILE_C | |
| 464 #endif | |
| 465 | |
| 787 | 466 #ifdef ARCH_X86 |
| 171 | 467 |
| 468 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT) | |
| 469 #define COMPILE_MMX | |
| 470 #endif | |
| 471 | |
| 472 #if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT) | |
| 473 #define COMPILE_MMX2 | |
| 474 #endif | |
| 475 | |
| 476 #if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT) | |
| 477 #define COMPILE_3DNOW | |
| 478 #endif | |
| 787 | 479 #endif //ARCH_X86 |
| 171 | 480 |
| 481 #undef HAVE_MMX | |
| 482 #undef HAVE_MMX2 | |
| 483 #undef HAVE_3DNOW | |
| 484 #undef ARCH_X86 | |
| 485 | |
| 486 #ifdef COMPILE_C | |
| 169 | 487 #undef HAVE_MMX |
| 488 #undef HAVE_MMX2 | |
| 489 #undef HAVE_3DNOW | |
| 490 #undef ARCH_X86 | |
| 491 #define RENAME(a) a ## _C | |
| 492 #include "postprocess_template.c" | |
| 171 | 493 #endif |
|
106
389391a6d0bf
rewrote the horizontal lowpass filter to fix a bug which caused a blocky look
michael
parents:
105
diff
changeset
|
494 |
| 169 | 495 //MMX versions |
| 171 | 496 #ifdef COMPILE_MMX |
| 169 | 497 #undef RENAME |
| 498 #define HAVE_MMX | |
| 499 #undef HAVE_MMX2 | |
| 500 #undef HAVE_3DNOW | |
| 501 #define ARCH_X86 | |
| 502 #define RENAME(a) a ## _MMX | |
| 503 #include "postprocess_template.c" | |
| 171 | 504 #endif |
|
128
e5266b8e79be
much better horizontal filters (transpose & use the vertical ones) :)
michael
parents:
126
diff
changeset
|
505 |
| 169 | 506 //MMX2 versions |
| 171 | 507 #ifdef COMPILE_MMX2 |
| 169 | 508 #undef RENAME |
| 509 #define HAVE_MMX | |
| 510 #define HAVE_MMX2 | |
| 511 #undef HAVE_3DNOW | |
| 512 #define ARCH_X86 | |
| 513 #define RENAME(a) a ## _MMX2 | |
| 514 #include "postprocess_template.c" | |
| 171 | 515 #endif |
|
128
e5266b8e79be
much better horizontal filters (transpose & use the vertical ones) :)
michael
parents:
126
diff
changeset
|
516 |
| 169 | 517 //3DNOW versions |
| 171 | 518 #ifdef COMPILE_3DNOW |
| 169 | 519 #undef RENAME |
| 520 #define HAVE_MMX | |
| 521 #undef HAVE_MMX2 | |
| 522 #define HAVE_3DNOW | |
| 523 #define ARCH_X86 | |
| 524 #define RENAME(a) a ## _3DNow | |
| 525 #include "postprocess_template.c" | |
| 171 | 526 #endif |
|
128
e5266b8e79be
much better horizontal filters (transpose & use the vertical ones) :)
michael
parents:
126
diff
changeset
|
527 |
| 169 | 528 // minor note: the HAVE_xyz is messed up after that line so dont use it |
|
128
e5266b8e79be
much better horizontal filters (transpose & use the vertical ones) :)
michael
parents:
126
diff
changeset
|
529 |
| 169 | 530 static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, |
| 829 | 531 QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc) |
| 156 | 532 { |
| 787 | 533 PPContext *c= (PPContext *)vc; |
| 829 | 534 PPMode *ppMode= (PPMode *)vm; |
| 787 | 535 c->ppMode= *ppMode; //FIXME |
| 536 | |
| 169 | 537 // useing ifs here as they are faster than function pointers allthough the |
| 538 // difference wouldnt be messureable here but its much better because | |
| 539 // someone might exchange the cpu whithout restarting mplayer ;) | |
| 171 | 540 #ifdef RUNTIME_CPUDETECT |
| 787 | 541 #ifdef ARCH_X86 |
| 169 | 542 // ordered per speed fasterst first |
| 805 | 543 if(c->cpuCaps & PP_CPU_CAPS_MMX2) |
| 787 | 544 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
| 805 | 545 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW) |
| 787 | 546 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
| 805 | 547 else if(c->cpuCaps & PP_CPU_CAPS_MMX) |
| 787 | 548 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
| 169 | 549 else |
| 787 | 550 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
|
157
bc12fd7e6153
temp denoiser changes: (a-b)^2 instead of |a-b| and MMX2/3DNOW version
michael
parents:
156
diff
changeset
|
551 #else |
| 787 | 552 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
|
157
bc12fd7e6153
temp denoiser changes: (a-b)^2 instead of |a-b| and MMX2/3DNOW version
michael
parents:
156
diff
changeset
|
553 #endif |
| 171 | 554 #else //RUNTIME_CPUDETECT |
| 555 #ifdef HAVE_MMX2 | |
| 787 | 556 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
| 171 | 557 #elif defined (HAVE_3DNOW) |
| 787 | 558 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
| 171 | 559 #elif defined (HAVE_MMX) |
| 787 | 560 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
| 171 | 561 #else |
| 787 | 562 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
| 171 | 563 #endif |
| 564 #endif //!RUNTIME_CPUDETECT | |
| 156 | 565 } |
| 566 | |
| 169 | 567 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, |
| 568 // QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode); | |
| 96 | 569 |
| 116 | 570 /* -pp Command line Help |
| 571 */ | |
| 804 | 572 char *pp_help= |
| 833 | 573 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n" |
| 184 | 574 "long form example:\n" |
| 833 | 575 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n" |
| 184 | 576 "short form example:\n" |
| 833 | 577 "vb:a/hb:a/lb de,-vb\n" |
| 184 | 578 "more examples:\n" |
| 806 | 579 "tn:64:128:256\n" |
| 184 | 580 "Filters Options\n" |
| 581 "short long name short long option Description\n" | |
|
1467
a320fe172086
Ministry of English Composition fixes (courtesy of Diego Biurrun
tmmm
parents:
1327
diff
changeset
|
582 "* * a autoq CPU power dependent enabler\n" |
|
a320fe172086
Ministry of English Composition fixes (courtesy of Diego Biurrun
tmmm
parents:
1327
diff
changeset
|
583 " c chrom chrominance filtering enabled\n" |
|
a320fe172086
Ministry of English Composition fixes (courtesy of Diego Biurrun
tmmm
parents:
1327
diff
changeset
|
584 " y nochrom chrominance filtering disabled\n" |
|
a320fe172086
Ministry of English Composition fixes (courtesy of Diego Biurrun
tmmm
parents:
1327
diff
changeset
|
585 "hb hdeblock (2 threshold) horizontal deblocking filter\n" |
|
1197
d9cbc8ef5a33
better? default thresholds, if this is worse for any files, then tell us ASAP
michaelni
parents:
1196
diff
changeset
|
586 " 1. difference factor: default=32, higher -> more deblocking\n" |
|
d9cbc8ef5a33
better? default thresholds, if this is worse for any files, then tell us ASAP
michaelni
parents:
1196
diff
changeset
|
587 " 2. flatness threshold: default=39, lower -> more deblocking\n" |
| 184 | 588 " the h & v deblocking filters share these\n" |
|
1467
a320fe172086
Ministry of English Composition fixes (courtesy of Diego Biurrun
tmmm
parents:
1327
diff
changeset
|
589 " so you can't set different thresholds for h / v\n" |
|
a320fe172086
Ministry of English Composition fixes (courtesy of Diego Biurrun
tmmm
parents:
1327
diff
changeset
|
590 "vb vdeblock (2 threshold) vertical deblocking filter\n" |
|
a320fe172086
Ministry of English Composition fixes (courtesy of Diego Biurrun
tmmm
parents:
1327
diff
changeset
|
591 "h1 x1hdeblock experimental h deblock filter 1\n" |
|
a320fe172086
Ministry of English Composition fixes (courtesy of Diego Biurrun
tmmm
parents:
1327
diff
changeset
|
592 "v1 x1vdeblock experimental v deblock filter 1\n" |
|
a320fe172086
Ministry of English Composition fixes (courtesy of Diego Biurrun
tmmm
parents:
1327
diff
changeset
|
593 "dr dering deringing filter\n" |
| 184 | 594 "al autolevels automatic brightness / contrast\n" |
| 595 " f fullyrange stretch luminance to (0..255)\n" | |
| 596 "lb linblenddeint linear blend deinterlacer\n" | |
| 597 "li linipoldeint linear interpolating deinterlace\n" | |
| 598 "ci cubicipoldeint cubic interpolating deinterlacer\n" | |
| 599 "md mediandeint median deinterlacer\n" | |
| 787 | 600 "fd ffmpegdeint ffmpeg deinterlacer\n" |
| 184 | 601 "de default hb:a,vb:a,dr:a,al\n" |
| 602 "fa fast h1:a,v1:a,dr:a,al\n" | |
|
1467
a320fe172086
Ministry of English Composition fixes (courtesy of Diego Biurrun
tmmm
parents:
1327
diff
changeset
|
603 "tn tmpnoise (3 threshold) temporal noise reducer\n" |
| 184 | 604 " 1. <= 2. <= 3. larger -> stronger filtering\n" |
|
1467
a320fe172086
Ministry of English Composition fixes (courtesy of Diego Biurrun
tmmm
parents:
1327
diff
changeset
|
605 "fq forceQuant <quantizer> force quantizer\n" |
| 184 | 606 ; |
| 116 | 607 |
| 829 | 608 pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality) |
| 116 | 609 { |
| 610 char temp[GET_MODE_BUFFER_SIZE]; | |
| 611 char *p= temp; | |
| 787 | 612 char *filterDelimiters= ",/"; |
| 116 | 613 char *optionDelimiters= ":"; |
| 829 | 614 struct PPMode *ppMode; |
| 116 | 615 char *filterToken; |
| 616 | |
| 829 | 617 ppMode= memalign(8, sizeof(PPMode)); |
| 618 | |
| 619 ppMode->lumMode= 0; | |
| 620 ppMode->chromMode= 0; | |
| 621 ppMode->maxTmpNoise[0]= 700; | |
| 622 ppMode->maxTmpNoise[1]= 1500; | |
| 623 ppMode->maxTmpNoise[2]= 3000; | |
| 624 ppMode->maxAllowedY= 234; | |
| 625 ppMode->minAllowedY= 16; | |
|
1197
d9cbc8ef5a33
better? default thresholds, if this is worse for any files, then tell us ASAP
michaelni
parents:
1196
diff
changeset
|
626 ppMode->baseDcDiff= 256/8; |
|
d9cbc8ef5a33
better? default thresholds, if this is worse for any files, then tell us ASAP
michaelni
parents:
1196
diff
changeset
|
627 ppMode->flatnessThreshold= 56-16-1; |
| 829 | 628 ppMode->maxClippedThreshold= 0.01; |
| 629 ppMode->error=0; | |
| 793 | 630 |
| 116 | 631 strncpy(temp, name, GET_MODE_BUFFER_SIZE); |
| 632 | |
| 202 | 633 if(verbose>1) printf("pp: %s\n", name); |
| 156 | 634 |
| 116 | 635 for(;;){ |
| 636 char *filterName; | |
| 830 | 637 int q= 1000000; //PP_QUALITY_MAX; |
| 116 | 638 int chrom=-1; |
| 639 char *option; | |
| 640 char *options[OPTIONS_ARRAY_SIZE]; | |
| 641 int i; | |
| 642 int filterNameOk=0; | |
| 643 int numOfUnknownOptions=0; | |
| 644 int enable=1; //does the user want us to enabled or disabled the filter | |
| 645 | |
| 646 filterToken= strtok(p, filterDelimiters); | |
| 647 if(filterToken == NULL) break; | |
| 156 | 648 p+= strlen(filterToken) + 1; // p points to next filterToken |
| 116 | 649 filterName= strtok(filterToken, optionDelimiters); |
| 202 | 650 if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName); |
| 116 | 651 |
| 652 if(*filterName == '-') | |
| 653 { | |
| 654 enable=0; | |
| 655 filterName++; | |
| 656 } | |
| 156 | 657 |
| 116 | 658 for(;;){ //for all options |
| 659 option= strtok(NULL, optionDelimiters); | |
| 660 if(option == NULL) break; | |
| 661 | |
| 202 | 662 if(verbose>1) printf("pp: option: %s\n", option); |
| 116 | 663 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality; |
| 664 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0; | |
| 665 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1; | |
| 666 else | |
| 667 { | |
| 668 options[numOfUnknownOptions] = option; | |
| 669 numOfUnknownOptions++; | |
| 670 } | |
| 671 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break; | |
| 672 } | |
| 156 | 673 options[numOfUnknownOptions] = NULL; |
| 116 | 674 |
| 675 /* replace stuff from the replace Table */ | |
| 676 for(i=0; replaceTable[2*i]!=NULL; i++) | |
| 677 { | |
| 678 if(!strcmp(replaceTable[2*i], filterName)) | |
| 679 { | |
| 680 int newlen= strlen(replaceTable[2*i + 1]); | |
| 681 int plen; | |
| 682 int spaceLeft; | |
| 683 | |
| 684 if(p==NULL) p= temp, *p=0; //last filter | |
| 685 else p--, *p=','; //not last filter | |
| 686 | |
| 687 plen= strlen(p); | |
|
419
b71190bacce8
applied 64bit patch from Ulrich Hecht <uli at suse dot de>
alex
parents:
212
diff
changeset
|
688 spaceLeft= p - temp + plen; |
| 116 | 689 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE) |
| 690 { | |
| 829 | 691 ppMode->error++; |
| 116 | 692 break; |
| 693 } | |
| 694 memmove(p + newlen, p, plen+1); | |
| 695 memcpy(p, replaceTable[2*i + 1], newlen); | |
| 696 filterNameOk=1; | |
| 697 } | |
| 698 } | |
| 699 | |
| 700 for(i=0; filters[i].shortName!=NULL; i++) | |
| 701 { | |
| 156 | 702 // printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName); |
| 116 | 703 if( !strcmp(filters[i].longName, filterName) |
| 704 || !strcmp(filters[i].shortName, filterName)) | |
| 705 { | |
| 829 | 706 ppMode->lumMode &= ~filters[i].mask; |
| 707 ppMode->chromMode &= ~filters[i].mask; | |
| 116 | 708 |
| 709 filterNameOk=1; | |
| 710 if(!enable) break; // user wants to disable it | |
| 711 | |
| 712 if(q >= filters[i].minLumQuality) | |
| 829 | 713 ppMode->lumMode|= filters[i].mask; |
| 116 | 714 if(chrom==1 || (chrom==-1 && filters[i].chromDefault)) |
| 715 if(q >= filters[i].minChromQuality) | |
| 829 | 716 ppMode->chromMode|= filters[i].mask; |
| 116 | 717 |
| 718 if(filters[i].mask == LEVEL_FIX) | |
| 719 { | |
| 720 int o; | |
| 829 | 721 ppMode->minAllowedY= 16; |
| 722 ppMode->maxAllowedY= 234; | |
| 116 | 723 for(o=0; options[o]!=NULL; o++) |
|
182
3ccd74a91074
minor brightness/contrast bugfix / moved some global vars into ppMode
michael
parents:
181
diff
changeset
|
724 { |
| 116 | 725 if( !strcmp(options[o],"fullyrange") |
| 726 ||!strcmp(options[o],"f")) | |
| 727 { | |
| 829 | 728 ppMode->minAllowedY= 0; |
| 729 ppMode->maxAllowedY= 255; | |
| 116 | 730 numOfUnknownOptions--; |
| 731 } | |
|
182
3ccd74a91074
minor brightness/contrast bugfix / moved some global vars into ppMode
michael
parents:
181
diff
changeset
|
732 } |
| 116 | 733 } |
| 156 | 734 else if(filters[i].mask == TEMP_NOISE_FILTER) |
| 735 { | |
| 736 int o; | |
| 737 int numOfNoises=0; | |
| 738 | |
| 739 for(o=0; options[o]!=NULL; o++) | |
| 740 { | |
| 741 char *tail; | |
| 829 | 742 ppMode->maxTmpNoise[numOfNoises]= |
| 156 | 743 strtol(options[o], &tail, 0); |
| 744 if(tail!=options[o]) | |
| 745 { | |
| 746 numOfNoises++; | |
| 747 numOfUnknownOptions--; | |
| 748 if(numOfNoises >= 3) break; | |
| 749 } | |
| 750 } | |
| 751 } | |
| 181 | 752 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK) |
| 753 { | |
| 754 int o; | |
| 755 | |
| 756 for(o=0; options[o]!=NULL && o<2; o++) | |
| 757 { | |
| 758 char *tail; | |
| 759 int val= strtol(options[o], &tail, 0); | |
| 760 if(tail==options[o]) break; | |
| 761 | |
| 762 numOfUnknownOptions--; | |
| 829 | 763 if(o==0) ppMode->baseDcDiff= val; |
| 764 else ppMode->flatnessThreshold= val; | |
| 181 | 765 } |
| 766 } | |
| 183 | 767 else if(filters[i].mask == FORCE_QUANT) |
| 768 { | |
| 769 int o; | |
| 829 | 770 ppMode->forcedQuant= 15; |
| 183 | 771 |
| 772 for(o=0; options[o]!=NULL && o<1; o++) | |
| 773 { | |
| 774 char *tail; | |
| 775 int val= strtol(options[o], &tail, 0); | |
| 776 if(tail==options[o]) break; | |
| 777 | |
| 778 numOfUnknownOptions--; | |
| 829 | 779 ppMode->forcedQuant= val; |
| 183 | 780 } |
| 781 } | |
| 116 | 782 } |
| 783 } | |
| 829 | 784 if(!filterNameOk) ppMode->error++; |
| 785 ppMode->error += numOfUnknownOptions; | |
| 116 | 786 } |
| 787 | |
| 829 | 788 if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode); |
| 789 if(ppMode->error) | |
| 790 { | |
| 791 fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name); | |
| 792 free(ppMode); | |
| 793 return NULL; | |
| 794 } | |
| 116 | 795 return ppMode; |
| 796 } | |
| 797 | |
| 829 | 798 void pp_free_mode(pp_mode_t *mode){ |
| 799 if(mode) free(mode); | |
| 800 } | |
| 801 | |
| 937 | 802 static void reallocAlign(void **p, int alignment, int size){ |
| 803 if(*p) free(*p); | |
| 804 *p= memalign(alignment, size); | |
| 805 memset(*p, 0, size); | |
| 806 } | |
| 807 | |
| 1196 | 808 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){ |
| 791 | 809 int mbWidth = (width+15)>>4; |
| 810 int mbHeight= (height+15)>>4; | |
| 937 | 811 int i; |
| 812 | |
| 813 c->stride= stride; | |
| 1196 | 814 c->qpStride= qpStride; |
| 787 | 815 |
| 937 | 816 reallocAlign((void **)&c->tempDst, 8, stride*24); |
| 817 reallocAlign((void **)&c->tempSrc, 8, stride*24); | |
| 818 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8); | |
| 819 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t)); | |
| 787 | 820 for(i=0; i<256; i++) |
| 821 c->yHistogram[i]= width*height/64*15/256; | |
| 822 | |
| 823 for(i=0; i<3; i++) | |
| 185 | 824 { |
| 787 | 825 //Note:the +17*1024 is just there so i dont have to worry about r/w over te end |
| 937 | 826 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024); |
| 827 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size | |
| 185 | 828 } |
| 937 | 829 |
| 1157 | 830 reallocAlign((void **)&c->deintTemp, 8, 2*width+32); |
| 1196 | 831 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T)); |
| 832 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T)); | |
| 937 | 833 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T)); |
| 834 } | |
| 179 | 835 |
| 1282 | 836 static void global_init(void){ |
| 1157 | 837 int i; |
| 838 memset(clip_table, 0, 256); | |
| 839 for(i=256; i<512; i++) | |
| 840 clip_table[i]= i; | |
| 841 memset(clip_table+512, 0, 256); | |
| 842 } | |
| 843 | |
| 937 | 844 pp_context_t *pp_get_context(int width, int height, int cpuCaps){ |
| 845 PPContext *c= memalign(32, sizeof(PPContext)); | |
| 846 int stride= (width+15)&(~15); //assumed / will realloc if needed | |
| 1196 | 847 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed |
| 937 | 848 |
| 1157 | 849 global_init(); |
| 850 | |
| 937 | 851 memset(c, 0, sizeof(PPContext)); |
| 852 c->cpuCaps= cpuCaps; | |
| 957 | 853 if(cpuCaps&PP_FORMAT){ |
| 854 c->hChromaSubSample= cpuCaps&0x3; | |
| 855 c->vChromaSubSample= (cpuCaps>>4)&0x3; | |
| 856 }else{ | |
| 857 c->hChromaSubSample= 1; | |
| 858 c->vChromaSubSample= 1; | |
| 859 } | |
| 937 | 860 |
| 1196 | 861 reallocBuffers(c, width, height, stride, qpStride); |
| 937 | 862 |
| 787 | 863 c->frameNum=-1; |
| 179 | 864 |
| 787 | 865 return c; |
| 179 | 866 } |
| 867 | |
| 792 | 868 void pp_free_context(void *vc){ |
| 787 | 869 PPContext *c = (PPContext*)vc; |
| 870 int i; | |
| 871 | |
| 872 for(i=0; i<3; i++) free(c->tempBlured[i]); | |
| 873 for(i=0; i<3; i++) free(c->tempBluredPast[i]); | |
| 874 | |
| 875 free(c->tempBlocks); | |
| 876 free(c->yHistogram); | |
| 877 free(c->tempDst); | |
| 878 free(c->tempSrc); | |
| 879 free(c->deintTemp); | |
| 1196 | 880 free(c->stdQPTable); |
| 791 | 881 free(c->nonBQPTable); |
| 937 | 882 free(c->forcedQPTable); |
| 883 | |
| 884 memset(c, 0, sizeof(PPContext)); | |
| 885 | |
| 787 | 886 free(c); |
| 887 } | |
| 888 | |
| 792 | 889 void pp_postprocess(uint8_t * src[3], int srcStride[3], |
| 787 | 890 uint8_t * dst[3], int dstStride[3], |
| 791 | 891 int width, int height, |
| 787 | 892 QP_STORE_T *QP_store, int QPStride, |
| 829 | 893 pp_mode_t *vm, void *vc, int pict_type) |
| 116 | 894 { |
| 791 | 895 int mbWidth = (width+15)>>4; |
| 896 int mbHeight= (height+15)>>4; | |
| 829 | 897 PPMode *mode = (PPMode*)vm; |
| 791 | 898 PPContext *c = (PPContext*)vc; |
| 937 | 899 int minStride= MAX(srcStride[0], dstStride[0]); |
| 1196 | 900 |
| 901 if(c->stride < minStride || c->qpStride < QPStride) | |
| 902 reallocBuffers(c, width, height, | |
| 903 MAX(minStride, c->stride), | |
| 904 MAX(c->qpStride, QPStride)); | |
| 787 | 905 |
| 183 | 906 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)) |
| 152 | 907 { |
| 183 | 908 int i; |
| 937 | 909 QP_store= c->forcedQPTable; |
| 787 | 910 QPStride= 0; |
| 183 | 911 if(mode->lumMode & FORCE_QUANT) |
| 937 | 912 for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant; |
| 183 | 913 else |
| 937 | 914 for(i=0; i<mbWidth; i++) QP_store[i]= 1; |
| 152 | 915 } |
| 1196 | 916 //printf("pict_type:%d\n", pict_type); |
| 917 | |
| 918 if(pict_type & PP_PICT_TYPE_QP2){ | |
| 919 int i; | |
| 920 const int count= mbHeight * QPStride; | |
| 921 for(i=0; i<(count>>2); i++){ | |
| 922 ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F; | |
| 923 } | |
| 924 for(i<<=2; i<count; i++){ | |
| 925 c->stdQPTable[i] = QP_store[i]>>1; | |
| 926 } | |
| 927 QP_store= c->stdQPTable; | |
| 928 } | |
| 929 | |
| 791 | 930 if(0){ |
| 931 int x,y; | |
| 932 for(y=0; y<mbHeight; y++){ | |
| 933 for(x=0; x<mbWidth; x++){ | |
| 934 printf("%2d ", QP_store[x + y*QPStride]); | |
| 935 } | |
| 936 printf("\n"); | |
| 937 } | |
| 938 printf("\n"); | |
| 939 } | |
| 798 | 940 |
| 1196 | 941 if((pict_type&7)!=3) |
| 791 | 942 { |
| 1196 | 943 int i; |
| 944 const int count= mbHeight * QPStride; | |
| 945 for(i=0; i<(count>>2); i++){ | |
| 1724 | 946 ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F; |
| 1196 | 947 } |
| 948 for(i<<=2; i<count; i++){ | |
| 1724 | 949 c->nonBQPTable[i] = QP_store[i] & 0x3F; |
| 791 | 950 } |
| 951 } | |
| 152 | 952 |
| 793 | 953 if(verbose>2) |
| 202 | 954 { |
| 955 printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode); | |
| 956 } | |
| 957 | |
| 787 | 958 postProcess(src[0], srcStride[0], dst[0], dstStride[0], |
| 1076 | 959 width, height, QP_store, QPStride, 0, mode, c); |
| 116 | 960 |
| 957 | 961 width = (width )>>c->hChromaSubSample; |
| 962 height = (height)>>c->vChromaSubSample; | |
| 116 | 963 |
|
168
712c7a115164
use fastmemcpy for chrominance if no chrominance filtering is done
michael
parents:
167
diff
changeset
|
964 if(mode->chromMode) |
|
712c7a115164
use fastmemcpy for chrominance if no chrominance filtering is done
michael
parents:
167
diff
changeset
|
965 { |
| 787 | 966 postProcess(src[1], srcStride[1], dst[1], dstStride[1], |
| 1076 | 967 width, height, QP_store, QPStride, 1, mode, c); |
| 787 | 968 postProcess(src[2], srcStride[2], dst[2], dstStride[2], |
| 1076 | 969 width, height, QP_store, QPStride, 2, mode, c); |
|
168
712c7a115164
use fastmemcpy for chrominance if no chrominance filtering is done
michael
parents:
167
diff
changeset
|
970 } |
| 787 | 971 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]) |
|
168
712c7a115164
use fastmemcpy for chrominance if no chrominance filtering is done
michael
parents:
167
diff
changeset
|
972 { |
| 791 | 973 memcpy(dst[1], src[1], srcStride[1]*height); |
| 974 memcpy(dst[2], src[2], srcStride[2]*height); | |
|
168
712c7a115164
use fastmemcpy for chrominance if no chrominance filtering is done
michael
parents:
167
diff
changeset
|
975 } |
|
712c7a115164
use fastmemcpy for chrominance if no chrominance filtering is done
michael
parents:
167
diff
changeset
|
976 else |
|
712c7a115164
use fastmemcpy for chrominance if no chrominance filtering is done
michael
parents:
167
diff
changeset
|
977 { |
|
712c7a115164
use fastmemcpy for chrominance if no chrominance filtering is done
michael
parents:
167
diff
changeset
|
978 int y; |
| 791 | 979 for(y=0; y<height; y++) |
|
168
712c7a115164
use fastmemcpy for chrominance if no chrominance filtering is done
michael
parents:
167
diff
changeset
|
980 { |
| 791 | 981 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width); |
| 982 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width); | |
|
168
712c7a115164
use fastmemcpy for chrominance if no chrominance filtering is done
michael
parents:
167
diff
changeset
|
983 } |
|
712c7a115164
use fastmemcpy for chrominance if no chrominance filtering is done
michael
parents:
167
diff
changeset
|
984 } |
| 116 | 985 } |
| 986 |
