Mercurial > libavcodec.hg
annotate libpostproc/postprocess.c @ 3198:6b9f0c4fbdbe libavcodec
First part of a series of speed-enchancing patches.
This one sets up a snow.h and makes snow use the dsputil function pointer
framework to access the three functions that will be implemented in asm
in the other parts of the patchset.
Patch by Robert Edele < yartrebo AH earthlink POIS net>
Original thread:
Subject: [Ffmpeg-devel] [PATCH] Snow mmx+sse2 asm optimizations
Date: Sun, 05 Feb 2006 12:47:14 -0500
| author | gpoirier |
|---|---|
| date | Thu, 16 Mar 2006 19:18:18 +0000 |
| parents | f4597d12563b |
| children |
| rev | line source |
|---|---|
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
1 /* |
| 1067 | 2 Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at) |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
3 |
|
2036
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
4 AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org> |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
5 |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
6 This program is free software; you can redistribute it and/or modify |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
7 it under the terms of the GNU General Public License as published by |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
8 the Free Software Foundation; either version 2 of the License, or |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
9 (at your option) any later version. |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
10 |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
11 This program is distributed in the hope that it will be useful, |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
12 but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
14 GNU General Public License for more details. |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
15 |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
16 You should have received a copy of the GNU General Public License |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
17 along with this program; if not, write to the Free Software |
|
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
2979
diff
changeset
|
18 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
19 */ |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
20 |
| 1109 | 21 /** |
| 22 * @file postprocess.c | |
| 23 * postprocessing. | |
| 24 */ | |
| 2967 | 25 |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
26 /* |
| 2979 | 27 C MMX MMX2 3DNow AltiVec |
| 28 isVertDC Ec Ec Ec | |
| 29 isVertMinMaxOk Ec Ec Ec | |
| 30 doVertLowPass E e e Ec | |
| 31 doVertDefFilter Ec Ec e e Ec | |
| 32 isHorizDC Ec Ec Ec | |
| 33 isHorizMinMaxOk a E Ec | |
| 34 doHorizLowPass E e e Ec | |
| 35 doHorizDefFilter Ec Ec e e Ec | |
| 36 do_a_deblock Ec E Ec E | |
| 37 deRing E e e* Ecp | |
| 38 Vertical RKAlgo1 E a a | |
| 39 Horizontal RKAlgo1 a a | |
| 40 Vertical X1# a E E | |
| 41 Horizontal X1# a E E | |
| 42 LinIpolDeinterlace e E E* | |
| 43 CubicIpolDeinterlace a e e* | |
| 44 LinBlendDeinterlace e E E* | |
| 45 MedianDeinterlace# E Ec Ec | |
| 46 TempDeNoiser# E e e Ec | |
| 156 | 47 |
| 48 * i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work | |
| 49 # more or less selfinvented filters so the exactness isnt too meaningfull | |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
50 E = Exact implementation |
| 111 | 51 e = allmost exact implementation (slightly different rounding,...) |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
52 a = alternative / approximate impl |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
53 c = checked against the other implementations (-vo md5) |
|
2036
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
54 p = partially optimized, still some work to do |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
55 */ |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
56 |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
57 /* |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
58 TODO: |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
59 reduce the time wasted on the mem transfer |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
60 unroll stuff if instructions depend too much on the prior one |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
61 move YScale thing to the end instead of fixing QP |
| 96 | 62 write a faster and higher quality deblocking filter :) |
|
97
e57b1d38d71f
bugfixes: last 3 lines not brightness/contrast corrected
michael
parents:
96
diff
changeset
|
63 make the mainloop more flexible (variable number of blocks at once |
| 2979 | 64 (the if/else stuff per block is slowing things down) |
| 99 | 65 compare the quality & speed of all filters |
| 66 split this huge file | |
| 140 | 67 optimize c versions |
| 156 | 68 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
69 ... |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
70 */ |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
71 |
| 107 | 72 //Changelog: use the CVS log |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
73 |
| 1067 | 74 #include "config.h" |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
75 #include <inttypes.h> |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
76 #include <stdio.h> |
|
97
e57b1d38d71f
bugfixes: last 3 lines not brightness/contrast corrected
michael
parents:
96
diff
changeset
|
77 #include <stdlib.h> |
| 116 | 78 #include <string.h> |
| 133 | 79 #ifdef HAVE_MALLOC_H |
| 80 #include <malloc.h> | |
| 81 #endif | |
| 96 | 82 //#undef HAVE_MMX2 |
| 83 //#define HAVE_3DNOW | |
| 84 //#undef HAVE_MMX | |
| 169 | 85 //#undef ARCH_X86 |
| 163 | 86 //#define DEBUG_BRIGHTNESS |
| 1069 | 87 #ifdef USE_FASTMEMCPY |
| 1775 | 88 #include "fastmemcpy.h" |
| 837 | 89 #endif |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
90 #include "postprocess.h" |
| 829 | 91 #include "postprocess_internal.h" |
| 1069 | 92 |
| 93 #include "mangle.h" //FIXME should be supressed | |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
94 |
| 2041 | 95 #ifdef HAVE_ALTIVEC_H |
| 96 #include <altivec.h> | |
| 97 #endif | |
| 98 | |
| 1071 | 99 #ifndef HAVE_MEMALIGN |
| 100 #define memalign(a,b) malloc(b) | |
| 101 #endif | |
| 102 | |
| 104 | 103 #define MIN(a,b) ((a) > (b) ? (b) : (a)) |
| 104 #define MAX(a,b) ((a) < (b) ? (b) : (a)) | |
| 105 #define ABS(a) ((a) > 0 ? (a) : (-(a))) | |
| 106 #define SIGN(a) ((a) > 0 ? 1 : -1) | |
| 107 | |
| 116 | 108 #define GET_MODE_BUFFER_SIZE 500 |
| 109 #define OPTIONS_ARRAY_SIZE 10 | |
| 787 | 110 #define BLOCK_SIZE 8 |
| 111 #define TEMP_STRIDE 8 | |
| 112 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet | |
| 116 | 113 |
|
1847
ef661c4dc5a6
attribute_used patch by (mitya at school dot ioffe dot ru (Dmitry Baryshkov))
michael
parents:
1775
diff
changeset
|
114 #if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0) |
|
ef661c4dc5a6
attribute_used patch by (mitya at school dot ioffe dot ru (Dmitry Baryshkov))
michael
parents:
1775
diff
changeset
|
115 # define attribute_used __attribute__((used)) |
|
2037
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
116 # define always_inline __attribute__((always_inline)) inline |
|
1847
ef661c4dc5a6
attribute_used patch by (mitya at school dot ioffe dot ru (Dmitry Baryshkov))
michael
parents:
1775
diff
changeset
|
117 #else |
|
ef661c4dc5a6
attribute_used patch by (mitya at school dot ioffe dot ru (Dmitry Baryshkov))
michael
parents:
1775
diff
changeset
|
118 # define attribute_used |
|
2037
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
119 # define always_inline inline |
|
1847
ef661c4dc5a6
attribute_used patch by (mitya at school dot ioffe dot ru (Dmitry Baryshkov))
michael
parents:
1775
diff
changeset
|
120 #endif |
|
ef661c4dc5a6
attribute_used patch by (mitya at school dot ioffe dot ru (Dmitry Baryshkov))
michael
parents:
1775
diff
changeset
|
121 |
|
2293
15cfba1b97b5
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents:
2188
diff
changeset
|
122 #if defined(ARCH_X86) || defined(ARCH_X86_64) |
| 2979 | 123 static uint64_t __attribute__((aligned(8))) attribute_used w05= 0x0005000500050005LL; |
| 124 static uint64_t __attribute__((aligned(8))) attribute_used w04= 0x0004000400040004LL; | |
| 125 static uint64_t __attribute__((aligned(8))) attribute_used w20= 0x0020002000200020LL; | |
| 126 static uint64_t __attribute__((aligned(8))) attribute_used b00= 0x0000000000000000LL; | |
| 127 static uint64_t __attribute__((aligned(8))) attribute_used b01= 0x0101010101010101LL; | |
| 128 static uint64_t __attribute__((aligned(8))) attribute_used b02= 0x0202020202020202LL; | |
| 129 static uint64_t __attribute__((aligned(8))) attribute_used b08= 0x0808080808080808LL; | |
| 130 static uint64_t __attribute__((aligned(8))) attribute_used b80= 0x8080808080808080LL; | |
|
148
1cfc4d567c0a
minor changes (fixed some warnings, added attribute aligned(8) stuff)
michael
parents:
142
diff
changeset
|
131 #endif |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
132 |
| 1157 | 133 static uint8_t clip_table[3*256]; |
| 134 static uint8_t * const clip_tab= clip_table + 256; | |
| 135 | |
|
2031
4225c131a2eb
warning fixes by (Michael Roitzsch <mroi at users dot sourceforge dot net>)
michael
parents:
2024
diff
changeset
|
136 static const int verbose= 0; |
| 179 | 137 |
|
1847
ef661c4dc5a6
attribute_used patch by (mitya at school dot ioffe dot ru (Dmitry Baryshkov))
michael
parents:
1775
diff
changeset
|
138 static const int attribute_used deringThreshold= 20; |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
139 |
| 787 | 140 |
| 116 | 141 static struct PPFilter filters[]= |
| 142 { | |
| 2979 | 143 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK}, |
| 144 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK}, | |
| 145 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER}, | |
| 146 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/ | |
| 147 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER}, | |
| 148 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER}, | |
| 149 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK}, | |
| 150 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK}, | |
| 151 {"dr", "dering", 1, 5, 6, DERING}, | |
| 152 {"al", "autolevels", 0, 1, 2, LEVEL_FIX}, | |
| 153 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER}, | |
| 154 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER}, | |
| 155 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER}, | |
| 156 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER}, | |
| 157 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER}, | |
| 158 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER}, | |
| 159 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER}, | |
| 160 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT}, | |
| 161 {NULL, NULL,0,0,0,0} //End Marker | |
| 116 | 162 }; |
| 163 | |
| 164 static char *replaceTable[]= | |
| 165 { | |
| 2979 | 166 "default", "hdeblock:a,vdeblock:a,dering:a", |
| 167 "de", "hdeblock:a,vdeblock:a,dering:a", | |
| 168 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a", | |
| 169 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a", | |
| 170 "ac", "ha:a:128:7,va:a,dering:a", | |
| 171 NULL //End Marker | |
| 116 | 172 }; |
| 173 | |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
174 |
|
2293
15cfba1b97b5
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents:
2188
diff
changeset
|
175 #if defined(ARCH_X86) || defined(ARCH_X86_64) |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
176 static inline void prefetchnta(void *p) |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
177 { |
| 2979 | 178 asm volatile( "prefetchnta (%0)\n\t" |
| 179 : : "r" (p) | |
| 180 ); | |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
181 } |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
182 |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
183 static inline void prefetcht0(void *p) |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
184 { |
| 2979 | 185 asm volatile( "prefetcht0 (%0)\n\t" |
| 186 : : "r" (p) | |
| 187 ); | |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
188 } |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
189 |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
190 static inline void prefetcht1(void *p) |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
191 { |
| 2979 | 192 asm volatile( "prefetcht1 (%0)\n\t" |
| 193 : : "r" (p) | |
| 194 ); | |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
195 } |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
196 |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
197 static inline void prefetcht2(void *p) |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
198 { |
| 2979 | 199 asm volatile( "prefetcht2 (%0)\n\t" |
| 200 : : "r" (p) | |
| 201 ); | |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
202 } |
| 102 | 203 #endif |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
204 |
| 169 | 205 // The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
206 |
| 165 | 207 /** |
| 208 * Check if the given 8x8 Block is mostly "flat" | |
| 209 */ | |
|
2036
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
210 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c) |
| 165 | 211 { |
| 2979 | 212 int numEq= 0; |
| 213 int y; | |
| 214 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; | |
| 215 const int dcThreshold= dcOffset*2 + 1; | |
| 1196 | 216 |
| 2979 | 217 for(y=0; y<BLOCK_SIZE; y++) |
| 218 { | |
| 219 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++; | |
| 220 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++; | |
| 221 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++; | |
| 222 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++; | |
| 223 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++; | |
| 224 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++; | |
| 225 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++; | |
| 226 src+= stride; | |
| 227 } | |
| 228 return numEq > c->ppMode.flatnessThreshold; | |
| 787 | 229 } |
| 230 | |
| 231 /** | |
| 232 * Check if the middle 8x8 Block in the given 8x16 block is flat | |
| 233 */ | |
| 234 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){ | |
| 2979 | 235 int numEq= 0; |
| 236 int y; | |
| 237 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; | |
| 238 const int dcThreshold= dcOffset*2 + 1; | |
| 1196 | 239 |
| 2979 | 240 src+= stride*4; // src points to begin of the 8x8 Block |
| 241 for(y=0; y<BLOCK_SIZE-1; y++) | |
| 242 { | |
| 243 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++; | |
| 244 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++; | |
| 245 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++; | |
| 246 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++; | |
| 247 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++; | |
| 248 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++; | |
| 249 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++; | |
| 250 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++; | |
| 251 src+= stride; | |
| 252 } | |
| 253 return numEq > c->ppMode.flatnessThreshold; | |
| 165 | 254 } |
| 255 | |
|
2036
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
256 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP) |
| 165 | 257 { |
| 2979 | 258 int i; |
|
1327
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
259 #if 1 |
| 2979 | 260 for(i=0; i<2; i++){ |
| 261 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0; | |
| 262 src += stride; | |
| 263 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0; | |
| 264 src += stride; | |
| 265 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0; | |
| 266 src += stride; | |
| 267 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0; | |
| 268 src += stride; | |
| 269 } | |
| 2967 | 270 #else |
| 2979 | 271 for(i=0; i<8; i++){ |
| 272 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0; | |
| 273 src += stride; | |
| 274 } | |
|
1327
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
275 #endif |
| 2979 | 276 return 1; |
|
1327
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
277 } |
| 165 | 278 |
|
1327
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
279 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP) |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
280 { |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
281 #if 1 |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
282 #if 1 |
| 2979 | 283 int x; |
| 284 src+= stride*4; | |
| 285 for(x=0; x<BLOCK_SIZE; x+=4) | |
| 286 { | |
| 287 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0; | |
| 288 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0; | |
| 289 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0; | |
| 290 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0; | |
| 291 } | |
|
1327
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
292 #else |
| 2979 | 293 int x; |
| 294 src+= stride*3; | |
| 295 for(x=0; x<BLOCK_SIZE; x++) | |
| 296 { | |
| 297 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0; | |
| 298 } | |
|
1327
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
299 #endif |
| 2979 | 300 return 1; |
|
1327
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
301 #else |
| 2979 | 302 int x; |
| 303 src+= stride*4; | |
| 304 for(x=0; x<BLOCK_SIZE; x++) | |
| 305 { | |
| 306 int min=255; | |
| 307 int max=0; | |
| 308 int y; | |
| 309 for(y=0; y<8; y++){ | |
| 310 int v= src[x + y*stride]; | |
| 311 if(v>max) max=v; | |
| 312 if(v<min) min=v; | |
| 313 } | |
| 314 if(max-min > 2*QP) return 0; | |
| 315 } | |
| 316 return 1; | |
|
1327
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
317 #endif |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
318 } |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
319 |
|
2036
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
320 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){ |
| 2979 | 321 if( isHorizDC_C(src, stride, c) ){ |
| 322 if( isHorizMinMaxOk_C(src, stride, c->QP) ) | |
| 323 return 1; | |
| 324 else | |
| 325 return 0; | |
| 326 }else{ | |
| 327 return 2; | |
| 328 } | |
|
2036
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
329 } |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
330 |
|
1327
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
331 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){ |
| 2979 | 332 if( isVertDC_C(src, stride, c) ){ |
| 333 if( isVertMinMaxOk_C(src, stride, c->QP) ) | |
| 334 return 1; | |
| 335 else | |
| 336 return 0; | |
| 337 }else{ | |
| 338 return 2; | |
| 339 } | |
| 165 | 340 } |
| 341 | |
|
2036
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
342 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c) |
| 165 | 343 { |
| 2979 | 344 int y; |
| 345 for(y=0; y<BLOCK_SIZE; y++) | |
| 346 { | |
| 347 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]); | |
| 165 | 348 |
| 2979 | 349 if(ABS(middleEnergy) < 8*c->QP) |
| 350 { | |
| 351 const int q=(dst[3] - dst[4])/2; | |
| 352 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]); | |
| 353 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]); | |
| 165 | 354 |
| 2979 | 355 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) ); |
| 356 d= MAX(d, 0); | |
| 165 | 357 |
| 2979 | 358 d= (5*d + 32) >> 6; |
| 359 d*= SIGN(-middleEnergy); | |
| 165 | 360 |
| 2979 | 361 if(q>0) |
| 362 { | |
| 363 d= d<0 ? 0 : d; | |
| 364 d= d>q ? q : d; | |
| 365 } | |
| 366 else | |
| 367 { | |
| 368 d= d>0 ? 0 : d; | |
| 369 d= d<q ? q : d; | |
| 370 } | |
| 165 | 371 |
| 2979 | 372 dst[3]-= d; |
| 373 dst[4]+= d; | |
| 374 } | |
| 375 dst+= stride; | |
| 376 } | |
| 165 | 377 } |
| 378 | |
| 379 /** | |
| 380 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block) | |
| 381 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version) | |
| 382 */ | |
|
2036
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
383 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c) |
| 165 | 384 { |
| 2979 | 385 int y; |
| 386 for(y=0; y<BLOCK_SIZE; y++) | |
| 387 { | |
| 388 const int first= ABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0]; | |
| 389 const int last= ABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7]; | |
| 165 | 390 |
| 2979 | 391 int sums[10]; |
| 392 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4; | |
| 393 sums[1] = sums[0] - first + dst[3]; | |
| 394 sums[2] = sums[1] - first + dst[4]; | |
| 395 sums[3] = sums[2] - first + dst[5]; | |
| 396 sums[4] = sums[3] - first + dst[6]; | |
| 397 sums[5] = sums[4] - dst[0] + dst[7]; | |
| 398 sums[6] = sums[5] - dst[1] + last; | |
| 399 sums[7] = sums[6] - dst[2] + last; | |
| 400 sums[8] = sums[7] - dst[3] + last; | |
| 401 sums[9] = sums[8] - dst[4] + last; | |
| 165 | 402 |
| 2979 | 403 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4; |
| 404 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4; | |
| 405 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4; | |
| 406 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4; | |
| 407 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4; | |
| 408 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4; | |
| 409 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4; | |
| 410 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4; | |
| 165 | 411 |
| 2979 | 412 dst+= stride; |
| 413 } | |
| 165 | 414 } |
| 415 | |
| 169 | 416 /** |
| 417 * Experimental Filter 1 (Horizontal) | |
| 418 * will not damage linear gradients | |
| 419 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter | |
| 420 * can only smooth blocks at the expected locations (it cant smooth them if they did move) | |
| 421 * MMX2 version does correct clipping C version doesnt | |
| 422 * not identical with the vertical one | |
| 423 */ | |
| 424 static inline void horizX1Filter(uint8_t *src, int stride, int QP) | |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
425 { |
| 2979 | 426 int y; |
| 427 static uint64_t *lut= NULL; | |
| 428 if(lut==NULL) | |
| 429 { | |
| 430 int i; | |
| 431 lut= (uint64_t*)memalign(8, 256*8); | |
| 432 for(i=0; i<256; i++) | |
| 433 { | |
| 434 int v= i < 128 ? 2*i : 2*(i-256); | |
| 169 | 435 /* |
| 436 //Simulate 112242211 9-Tap filter | |
| 2979 | 437 uint64_t a= (v/16) & 0xFF; |
| 438 uint64_t b= (v/8) & 0xFF; | |
| 439 uint64_t c= (v/4) & 0xFF; | |
| 440 uint64_t d= (3*v/8) & 0xFF; | |
| 169 | 441 */ |
| 442 //Simulate piecewise linear interpolation | |
| 2979 | 443 uint64_t a= (v/16) & 0xFF; |
| 444 uint64_t b= (v*3/16) & 0xFF; | |
| 445 uint64_t c= (v*5/16) & 0xFF; | |
| 446 uint64_t d= (7*v/16) & 0xFF; | |
| 447 uint64_t A= (0x100 - a)&0xFF; | |
| 448 uint64_t B= (0x100 - b)&0xFF; | |
| 449 uint64_t C= (0x100 - c)&0xFF; | |
| 450 uint64_t D= (0x100 - c)&0xFF; | |
| 130 | 451 |
| 2979 | 452 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) | |
| 453 (D<<24) | (C<<16) | (B<<8) | (A); | |
| 454 //lut[i] = (v<<32) | (v<<24); | |
| 455 } | |
| 456 } | |
| 134 | 457 |
| 2979 | 458 for(y=0; y<BLOCK_SIZE; y++) |
| 459 { | |
| 460 int a= src[1] - src[2]; | |
| 461 int b= src[3] - src[4]; | |
| 462 int c= src[5] - src[6]; | |
| 134 | 463 |
| 2979 | 464 int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0); |
| 167 | 465 |
| 2979 | 466 if(d < QP) |
| 467 { | |
| 468 int v = d * SIGN(-b); | |
|
106
389391a6d0bf
rewrote the horizontal lowpass filter to fix a bug which caused a blocky look
michael
parents:
105
diff
changeset
|
469 |
| 2979 | 470 src[1] +=v/8; |
| 471 src[2] +=v/4; | |
| 472 src[3] +=3*v/8; | |
| 473 src[4] -=3*v/8; | |
| 474 src[5] -=v/4; | |
| 475 src[6] -=v/8; | |
|
106
389391a6d0bf
rewrote the horizontal lowpass filter to fix a bug which caused a blocky look
michael
parents:
105
diff
changeset
|
476 |
| 2979 | 477 } |
| 478 src+=stride; | |
| 479 } | |
|
106
389391a6d0bf
rewrote the horizontal lowpass filter to fix a bug which caused a blocky look
michael
parents:
105
diff
changeset
|
480 } |
|
389391a6d0bf
rewrote the horizontal lowpass filter to fix a bug which caused a blocky look
michael
parents:
105
diff
changeset
|
481 |
|
2037
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
482 /** |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
483 * accurate deblock filter |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
484 */ |
|
2039
f25e485a7850
mmx optimized version of the per line/accurate deblock filter
michael
parents:
2038
diff
changeset
|
485 static always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){ |
| 2979 | 486 int y; |
| 487 const int QP= c->QP; | |
| 488 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; | |
| 489 const int dcThreshold= dcOffset*2 + 1; | |
|
2037
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
490 //START_TIMER |
| 2979 | 491 src+= step*4; // src points to begin of the 8x8 Block |
| 492 for(y=0; y<8; y++){ | |
| 493 int numEq= 0; | |
|
2037
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
494 |
| 2979 | 495 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++; |
| 496 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++; | |
| 497 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++; | |
| 498 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++; | |
| 499 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++; | |
| 500 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++; | |
| 501 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++; | |
| 502 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++; | |
| 503 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++; | |
| 504 if(numEq > c->ppMode.flatnessThreshold){ | |
| 505 int min, max, x; | |
| 2967 | 506 |
| 2979 | 507 if(src[0] > src[step]){ |
| 508 max= src[0]; | |
| 509 min= src[step]; | |
| 510 }else{ | |
| 511 max= src[step]; | |
| 512 min= src[0]; | |
| 513 } | |
| 514 for(x=2; x<8; x+=2){ | |
| 515 if(src[x*step] > src[(x+1)*step]){ | |
| 516 if(src[x *step] > max) max= src[ x *step]; | |
| 517 if(src[(x+1)*step] < min) min= src[(x+1)*step]; | |
| 518 }else{ | |
| 519 if(src[(x+1)*step] > max) max= src[(x+1)*step]; | |
| 520 if(src[ x *step] < min) min= src[ x *step]; | |
| 521 } | |
| 522 } | |
| 523 if(max-min < 2*QP){ | |
| 524 const int first= ABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0]; | |
| 525 const int last= ABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step]; | |
| 2967 | 526 |
| 2979 | 527 int sums[10]; |
| 528 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4; | |
| 529 sums[1] = sums[0] - first + src[3*step]; | |
| 530 sums[2] = sums[1] - first + src[4*step]; | |
| 531 sums[3] = sums[2] - first + src[5*step]; | |
| 532 sums[4] = sums[3] - first + src[6*step]; | |
| 533 sums[5] = sums[4] - src[0*step] + src[7*step]; | |
| 534 sums[6] = sums[5] - src[1*step] + last; | |
| 535 sums[7] = sums[6] - src[2*step] + last; | |
| 536 sums[8] = sums[7] - src[3*step] + last; | |
| 537 sums[9] = sums[8] - src[4*step] + last; | |
|
2037
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
538 |
| 2979 | 539 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4; |
| 540 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4; | |
| 541 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4; | |
| 542 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4; | |
| 543 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4; | |
| 544 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4; | |
| 545 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4; | |
| 546 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4; | |
| 547 } | |
| 548 }else{ | |
| 549 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]); | |
|
2037
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
550 |
| 2979 | 551 if(ABS(middleEnergy) < 8*QP) |
| 552 { | |
| 553 const int q=(src[3*step] - src[4*step])/2; | |
| 554 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]); | |
| 555 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]); | |
|
2037
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
556 |
| 2979 | 557 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) ); |
| 558 d= MAX(d, 0); | |
| 2967 | 559 |
| 2979 | 560 d= (5*d + 32) >> 6; |
| 561 d*= SIGN(-middleEnergy); | |
| 2967 | 562 |
| 2979 | 563 if(q>0) |
| 564 { | |
| 565 d= d<0 ? 0 : d; | |
| 566 d= d>q ? q : d; | |
| 567 } | |
| 568 else | |
| 569 { | |
| 570 d= d>0 ? 0 : d; | |
| 571 d= d<q ? q : d; | |
| 572 } | |
| 2967 | 573 |
| 2979 | 574 src[3*step]-= d; |
| 575 src[4*step]+= d; | |
| 576 } | |
| 577 } | |
|
2037
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
578 |
| 2979 | 579 src += stride; |
| 580 } | |
|
2037
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
581 /*if(step==16){ |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
582 STOP_TIMER("step16") |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
583 }else{ |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
584 STOP_TIMER("stepX") |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
585 }*/ |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
586 } |
|
106
389391a6d0bf
rewrote the horizontal lowpass filter to fix a bug which caused a blocky look
michael
parents:
105
diff
changeset
|
587 |
| 171 | 588 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one |
| 169 | 589 //Plain C versions |
| 171 | 590 #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT) |
| 591 #define COMPILE_C | |
| 592 #endif | |
| 593 | |
|
2036
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
594 #ifdef ARCH_POWERPC |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
595 #ifdef HAVE_ALTIVEC |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
596 #define COMPILE_ALTIVEC |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
597 #endif //HAVE_ALTIVEC |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
598 #endif //ARCH_POWERPC |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
599 |
|
2293
15cfba1b97b5
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents:
2188
diff
changeset
|
600 #if defined(ARCH_X86) || defined(ARCH_X86_64) |
| 171 | 601 |
| 602 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT) | |
| 603 #define COMPILE_MMX | |
| 604 #endif | |
| 605 | |
| 606 #if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT) | |
| 607 #define COMPILE_MMX2 | |
| 608 #endif | |
| 609 | |
| 610 #if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT) | |
| 611 #define COMPILE_3DNOW | |
| 612 #endif | |
| 787 | 613 #endif //ARCH_X86 |
| 171 | 614 |
| 615 #undef HAVE_MMX | |
| 616 #undef HAVE_MMX2 | |
| 617 #undef HAVE_3DNOW | |
|
2036
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
618 #undef HAVE_ALTIVEC |
| 171 | 619 |
| 620 #ifdef COMPILE_C | |
| 169 | 621 #undef HAVE_MMX |
| 622 #undef HAVE_MMX2 | |
| 623 #undef HAVE_3DNOW | |
| 624 #define RENAME(a) a ## _C | |
| 625 #include "postprocess_template.c" | |
| 171 | 626 #endif |
|
106
389391a6d0bf
rewrote the horizontal lowpass filter to fix a bug which caused a blocky look
michael
parents:
105
diff
changeset
|
627 |
|
2036
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
628 #ifdef ARCH_POWERPC |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
629 #ifdef COMPILE_ALTIVEC |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
630 #undef RENAME |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
631 #define HAVE_ALTIVEC |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
632 #define RENAME(a) a ## _altivec |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
633 #include "postprocess_altivec_template.c" |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
634 #include "postprocess_template.c" |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
635 #endif |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
636 #endif //ARCH_POWERPC |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
637 |
| 169 | 638 //MMX versions |
| 171 | 639 #ifdef COMPILE_MMX |
| 169 | 640 #undef RENAME |
| 641 #define HAVE_MMX | |
| 642 #undef HAVE_MMX2 | |
| 643 #undef HAVE_3DNOW | |
| 644 #define RENAME(a) a ## _MMX | |
| 645 #include "postprocess_template.c" | |
| 171 | 646 #endif |
|
128
e5266b8e79be
much better horizontal filters (transpose & use the vertical ones) :)
michael
parents:
126
diff
changeset
|
647 |
| 169 | 648 //MMX2 versions |
| 171 | 649 #ifdef COMPILE_MMX2 |
| 169 | 650 #undef RENAME |
| 651 #define HAVE_MMX | |
| 652 #define HAVE_MMX2 | |
| 653 #undef HAVE_3DNOW | |
| 654 #define RENAME(a) a ## _MMX2 | |
| 655 #include "postprocess_template.c" | |
| 171 | 656 #endif |
|
128
e5266b8e79be
much better horizontal filters (transpose & use the vertical ones) :)
michael
parents:
126
diff
changeset
|
657 |
| 169 | 658 //3DNOW versions |
| 171 | 659 #ifdef COMPILE_3DNOW |
| 169 | 660 #undef RENAME |
| 661 #define HAVE_MMX | |
| 662 #undef HAVE_MMX2 | |
| 663 #define HAVE_3DNOW | |
| 664 #define RENAME(a) a ## _3DNow | |
| 665 #include "postprocess_template.c" | |
| 171 | 666 #endif |
|
128
e5266b8e79be
much better horizontal filters (transpose & use the vertical ones) :)
michael
parents:
126
diff
changeset
|
667 |
| 169 | 668 // minor note: the HAVE_xyz is messed up after that line so dont use it |
|
128
e5266b8e79be
much better horizontal filters (transpose & use the vertical ones) :)
michael
parents:
126
diff
changeset
|
669 |
| 169 | 670 static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, |
| 2979 | 671 QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc) |
| 156 | 672 { |
| 2979 | 673 PPContext *c= (PPContext *)vc; |
| 674 PPMode *ppMode= (PPMode *)vm; | |
| 675 c->ppMode= *ppMode; //FIXME | |
| 787 | 676 |
| 2979 | 677 // useing ifs here as they are faster than function pointers allthough the |
| 678 // difference wouldnt be messureable here but its much better because | |
| 679 // someone might exchange the cpu whithout restarting mplayer ;) | |
| 171 | 680 #ifdef RUNTIME_CPUDETECT |
|
2293
15cfba1b97b5
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents:
2188
diff
changeset
|
681 #if defined(ARCH_X86) || defined(ARCH_X86_64) |
| 2979 | 682 // ordered per speed fasterst first |
| 683 if(c->cpuCaps & PP_CPU_CAPS_MMX2) | |
| 684 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | |
| 685 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW) | |
| 686 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | |
| 687 else if(c->cpuCaps & PP_CPU_CAPS_MMX) | |
| 688 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | |
| 689 else | |
| 690 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | |
|
157
bc12fd7e6153
temp denoiser changes: (a-b)^2 instead of |a-b| and MMX2/3DNOW version
michael
parents:
156
diff
changeset
|
691 #else |
|
2036
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
692 #ifdef ARCH_POWERPC |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
693 #ifdef HAVE_ALTIVEC |
|
2188
226d0a39347d
typo fix by (Marcin 'Morgoth' Kurek <morgoth6 at box43 dot pl>)
michael
parents:
2055
diff
changeset
|
694 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC) |
| 2979 | 695 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
|
2036
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
696 else |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
697 #endif |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
698 #endif |
| 2979 | 699 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
|
157
bc12fd7e6153
temp denoiser changes: (a-b)^2 instead of |a-b| and MMX2/3DNOW version
michael
parents:
156
diff
changeset
|
700 #endif |
| 171 | 701 #else //RUNTIME_CPUDETECT |
| 702 #ifdef HAVE_MMX2 | |
| 2979 | 703 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
| 171 | 704 #elif defined (HAVE_3DNOW) |
| 2979 | 705 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
| 171 | 706 #elif defined (HAVE_MMX) |
| 2979 | 707 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
|
2036
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
708 #elif defined (HAVE_ALTIVEC) |
| 2979 | 709 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
| 171 | 710 #else |
| 2979 | 711 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
| 171 | 712 #endif |
| 713 #endif //!RUNTIME_CPUDETECT | |
| 156 | 714 } |
| 715 | |
| 169 | 716 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, |
| 2979 | 717 // QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode); |
| 96 | 718 |
| 116 | 719 /* -pp Command line Help |
| 720 */ | |
| 804 | 721 char *pp_help= |
|
2407
cb521eadf9ae
Make -pphelp output consistent with the other instances of -xxx help.
diego
parents:
2293
diff
changeset
|
722 "Available postprocessing filters:\n" |
| 2979 | 723 "Filters Options\n" |
| 724 "short long name short long option Description\n" | |
| 725 "* * a autoq CPU power dependent enabler\n" | |
| 726 " c chrom chrominance filtering enabled\n" | |
| 727 " y nochrom chrominance filtering disabled\n" | |
| 728 " n noluma luma filtering disabled\n" | |
| 729 "hb hdeblock (2 threshold) horizontal deblocking filter\n" | |
| 730 " 1. difference factor: default=32, higher -> more deblocking\n" | |
| 731 " 2. flatness threshold: default=39, lower -> more deblocking\n" | |
| 732 " the h & v deblocking filters share these\n" | |
| 733 " so you can't set different thresholds for h / v\n" | |
| 734 "vb vdeblock (2 threshold) vertical deblocking filter\n" | |
| 735 "ha hadeblock (2 threshold) horizontal deblocking filter\n" | |
| 736 "va vadeblock (2 threshold) vertical deblocking filter\n" | |
| 737 "h1 x1hdeblock experimental h deblock filter 1\n" | |
| 738 "v1 x1vdeblock experimental v deblock filter 1\n" | |
| 739 "dr dering deringing filter\n" | |
| 740 "al autolevels automatic brightness / contrast\n" | |
| 741 " f fullyrange stretch luminance to (0..255)\n" | |
| 742 "lb linblenddeint linear blend deinterlacer\n" | |
| 743 "li linipoldeint linear interpolating deinterlace\n" | |
| 744 "ci cubicipoldeint cubic interpolating deinterlacer\n" | |
| 745 "md mediandeint median deinterlacer\n" | |
| 746 "fd ffmpegdeint ffmpeg deinterlacer\n" | |
| 747 "l5 lowpass5 FIR lowpass deinterlacer\n" | |
| 748 "de default hb:a,vb:a,dr:a\n" | |
| 749 "fa fast h1:a,v1:a,dr:a\n" | |
| 750 "ac ha:a:128:7,va:a,dr:a\n" | |
| 751 "tn tmpnoise (3 threshold) temporal noise reducer\n" | |
| 752 " 1. <= 2. <= 3. larger -> stronger filtering\n" | |
| 753 "fq forceQuant <quantizer> force quantizer\n" | |
|
2407
cb521eadf9ae
Make -pphelp output consistent with the other instances of -xxx help.
diego
parents:
2293
diff
changeset
|
754 "Usage:\n" |
|
cb521eadf9ae
Make -pphelp output consistent with the other instances of -xxx help.
diego
parents:
2293
diff
changeset
|
755 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n" |
|
cb521eadf9ae
Make -pphelp output consistent with the other instances of -xxx help.
diego
parents:
2293
diff
changeset
|
756 "long form example:\n" |
| 2979 | 757 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n" |
|
2407
cb521eadf9ae
Make -pphelp output consistent with the other instances of -xxx help.
diego
parents:
2293
diff
changeset
|
758 "short form example:\n" |
| 2979 | 759 "vb:a/hb:a/lb de,-vb\n" |
|
2407
cb521eadf9ae
Make -pphelp output consistent with the other instances of -xxx help.
diego
parents:
2293
diff
changeset
|
760 "more examples:\n" |
|
cb521eadf9ae
Make -pphelp output consistent with the other instances of -xxx help.
diego
parents:
2293
diff
changeset
|
761 "tn:64:128:256\n" |
|
3162
f4597d12563b
Make -xy help output consistent, output an empty line before and after.
diego
parents:
3036
diff
changeset
|
762 "\n" |
| 184 | 763 ; |
| 116 | 764 |
| 829 | 765 pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality) |
| 116 | 766 { |
| 2979 | 767 char temp[GET_MODE_BUFFER_SIZE]; |
| 768 char *p= temp; | |
| 769 char *filterDelimiters= ",/"; | |
| 770 char *optionDelimiters= ":"; | |
| 771 struct PPMode *ppMode; | |
| 772 char *filterToken; | |
| 116 | 773 |
| 2979 | 774 ppMode= memalign(8, sizeof(PPMode)); |
| 2967 | 775 |
| 2979 | 776 ppMode->lumMode= 0; |
| 777 ppMode->chromMode= 0; | |
| 778 ppMode->maxTmpNoise[0]= 700; | |
| 779 ppMode->maxTmpNoise[1]= 1500; | |
| 780 ppMode->maxTmpNoise[2]= 3000; | |
| 781 ppMode->maxAllowedY= 234; | |
| 782 ppMode->minAllowedY= 16; | |
| 783 ppMode->baseDcDiff= 256/8; | |
| 784 ppMode->flatnessThreshold= 56-16-1; | |
| 785 ppMode->maxClippedThreshold= 0.01; | |
| 786 ppMode->error=0; | |
| 793 | 787 |
| 2979 | 788 strncpy(temp, name, GET_MODE_BUFFER_SIZE); |
| 116 | 789 |
| 2979 | 790 if(verbose>1) printf("pp: %s\n", name); |
| 156 | 791 |
| 2979 | 792 for(;;){ |
| 793 char *filterName; | |
| 794 int q= 1000000; //PP_QUALITY_MAX; | |
| 795 int chrom=-1; | |
| 796 int luma=-1; | |
| 797 char *option; | |
| 798 char *options[OPTIONS_ARRAY_SIZE]; | |
| 799 int i; | |
| 800 int filterNameOk=0; | |
| 801 int numOfUnknownOptions=0; | |
| 802 int enable=1; //does the user want us to enabled or disabled the filter | |
| 116 | 803 |
| 2979 | 804 filterToken= strtok(p, filterDelimiters); |
| 805 if(filterToken == NULL) break; | |
| 806 p+= strlen(filterToken) + 1; // p points to next filterToken | |
| 807 filterName= strtok(filterToken, optionDelimiters); | |
| 808 if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName); | |
| 116 | 809 |
| 2979 | 810 if(*filterName == '-') |
| 811 { | |
| 812 enable=0; | |
| 813 filterName++; | |
| 814 } | |
| 156 | 815 |
| 2979 | 816 for(;;){ //for all options |
| 817 option= strtok(NULL, optionDelimiters); | |
| 818 if(option == NULL) break; | |
| 116 | 819 |
| 2979 | 820 if(verbose>1) printf("pp: option: %s\n", option); |
| 821 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality; | |
| 822 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0; | |
| 823 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1; | |
| 824 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0; | |
| 825 else | |
| 826 { | |
| 827 options[numOfUnknownOptions] = option; | |
| 828 numOfUnknownOptions++; | |
| 829 } | |
| 830 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break; | |
| 831 } | |
| 832 options[numOfUnknownOptions] = NULL; | |
| 116 | 833 |
| 2979 | 834 /* replace stuff from the replace Table */ |
| 835 for(i=0; replaceTable[2*i]!=NULL; i++) | |
| 836 { | |
| 837 if(!strcmp(replaceTable[2*i], filterName)) | |
| 838 { | |
| 839 int newlen= strlen(replaceTable[2*i + 1]); | |
| 840 int plen; | |
| 841 int spaceLeft; | |
| 116 | 842 |
| 2979 | 843 if(p==NULL) p= temp, *p=0; //last filter |
| 844 else p--, *p=','; //not last filter | |
| 116 | 845 |
| 2979 | 846 plen= strlen(p); |
| 847 spaceLeft= p - temp + plen; | |
| 848 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE) | |
| 849 { | |
| 850 ppMode->error++; | |
| 851 break; | |
| 852 } | |
| 853 memmove(p + newlen, p, plen+1); | |
| 854 memcpy(p, replaceTable[2*i + 1], newlen); | |
| 855 filterNameOk=1; | |
| 856 } | |
| 857 } | |
| 116 | 858 |
| 2979 | 859 for(i=0; filters[i].shortName!=NULL; i++) |
| 860 { | |
| 861 // printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName); | |
| 862 if( !strcmp(filters[i].longName, filterName) | |
| 863 || !strcmp(filters[i].shortName, filterName)) | |
| 864 { | |
| 865 ppMode->lumMode &= ~filters[i].mask; | |
| 866 ppMode->chromMode &= ~filters[i].mask; | |
| 116 | 867 |
| 2979 | 868 filterNameOk=1; |
| 869 if(!enable) break; // user wants to disable it | |
| 116 | 870 |
| 2979 | 871 if(q >= filters[i].minLumQuality && luma) |
| 872 ppMode->lumMode|= filters[i].mask; | |
| 873 if(chrom==1 || (chrom==-1 && filters[i].chromDefault)) | |
| 874 if(q >= filters[i].minChromQuality) | |
| 875 ppMode->chromMode|= filters[i].mask; | |
| 116 | 876 |
| 2979 | 877 if(filters[i].mask == LEVEL_FIX) |
| 878 { | |
| 879 int o; | |
| 880 ppMode->minAllowedY= 16; | |
| 881 ppMode->maxAllowedY= 234; | |
| 882 for(o=0; options[o]!=NULL; o++) | |
| 883 { | |
| 884 if( !strcmp(options[o],"fullyrange") | |
| 885 ||!strcmp(options[o],"f")) | |
| 886 { | |
| 887 ppMode->minAllowedY= 0; | |
| 888 ppMode->maxAllowedY= 255; | |
| 889 numOfUnknownOptions--; | |
| 890 } | |
| 891 } | |
| 892 } | |
| 893 else if(filters[i].mask == TEMP_NOISE_FILTER) | |
| 894 { | |
| 895 int o; | |
| 896 int numOfNoises=0; | |
| 156 | 897 |
| 2979 | 898 for(o=0; options[o]!=NULL; o++) |
| 899 { | |
| 900 char *tail; | |
| 901 ppMode->maxTmpNoise[numOfNoises]= | |
| 902 strtol(options[o], &tail, 0); | |
| 903 if(tail!=options[o]) | |
| 904 { | |
| 905 numOfNoises++; | |
| 906 numOfUnknownOptions--; | |
| 907 if(numOfNoises >= 3) break; | |
| 908 } | |
| 909 } | |
| 910 } | |
| 911 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK | |
| 912 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK) | |
| 913 { | |
| 914 int o; | |
| 181 | 915 |
| 2979 | 916 for(o=0; options[o]!=NULL && o<2; o++) |
| 917 { | |
| 918 char *tail; | |
| 919 int val= strtol(options[o], &tail, 0); | |
| 920 if(tail==options[o]) break; | |
| 181 | 921 |
| 2979 | 922 numOfUnknownOptions--; |
| 923 if(o==0) ppMode->baseDcDiff= val; | |
| 924 else ppMode->flatnessThreshold= val; | |
| 925 } | |
| 926 } | |
| 927 else if(filters[i].mask == FORCE_QUANT) | |
| 928 { | |
| 929 int o; | |
| 930 ppMode->forcedQuant= 15; | |
| 183 | 931 |
| 2979 | 932 for(o=0; options[o]!=NULL && o<1; o++) |
| 933 { | |
| 934 char *tail; | |
| 935 int val= strtol(options[o], &tail, 0); | |
| 936 if(tail==options[o]) break; | |
| 183 | 937 |
| 2979 | 938 numOfUnknownOptions--; |
| 939 ppMode->forcedQuant= val; | |
| 940 } | |
| 941 } | |
| 942 } | |
| 943 } | |
| 944 if(!filterNameOk) ppMode->error++; | |
| 945 ppMode->error += numOfUnknownOptions; | |
| 946 } | |
| 116 | 947 |
| 2979 | 948 if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode); |
| 949 if(ppMode->error) | |
| 950 { | |
| 951 fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name); | |
| 952 free(ppMode); | |
| 953 return NULL; | |
| 954 } | |
| 955 return ppMode; | |
| 116 | 956 } |
| 957 | |
| 829 | 958 void pp_free_mode(pp_mode_t *mode){ |
| 959 if(mode) free(mode); | |
| 960 } | |
| 961 | |
| 937 | 962 static void reallocAlign(void **p, int alignment, int size){ |
| 2979 | 963 if(*p) free(*p); |
| 964 *p= memalign(alignment, size); | |
| 965 memset(*p, 0, size); | |
| 937 | 966 } |
| 967 | |
| 1196 | 968 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){ |
| 2979 | 969 int mbWidth = (width+15)>>4; |
| 970 int mbHeight= (height+15)>>4; | |
| 971 int i; | |
| 937 | 972 |
| 2979 | 973 c->stride= stride; |
| 974 c->qpStride= qpStride; | |
| 787 | 975 |
| 2979 | 976 reallocAlign((void **)&c->tempDst, 8, stride*24); |
| 977 reallocAlign((void **)&c->tempSrc, 8, stride*24); | |
| 978 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8); | |
| 979 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t)); | |
| 980 for(i=0; i<256; i++) | |
| 981 c->yHistogram[i]= width*height/64*15/256; | |
| 787 | 982 |
| 2979 | 983 for(i=0; i<3; i++) |
| 984 { | |
| 985 //Note:the +17*1024 is just there so i dont have to worry about r/w over te end | |
| 986 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024); | |
| 987 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size | |
| 988 } | |
| 937 | 989 |
| 2979 | 990 reallocAlign((void **)&c->deintTemp, 8, 2*width+32); |
| 991 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T)); | |
| 992 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T)); | |
| 993 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T)); | |
| 937 | 994 } |
| 179 | 995 |
| 1282 | 996 static void global_init(void){ |
| 2979 | 997 int i; |
| 998 memset(clip_table, 0, 256); | |
| 999 for(i=256; i<512; i++) | |
| 1000 clip_table[i]= i; | |
| 1001 memset(clip_table+512, 0, 256); | |
| 1157 | 1002 } |
| 1003 | |
| 937 | 1004 pp_context_t *pp_get_context(int width, int height, int cpuCaps){ |
| 2979 | 1005 PPContext *c= memalign(32, sizeof(PPContext)); |
| 1006 int stride= (width+15)&(~15); //assumed / will realloc if needed | |
| 1007 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed | |
| 2967 | 1008 |
| 2979 | 1009 global_init(); |
| 1157 | 1010 |
| 2979 | 1011 memset(c, 0, sizeof(PPContext)); |
| 1012 c->cpuCaps= cpuCaps; | |
| 1013 if(cpuCaps&PP_FORMAT){ | |
| 1014 c->hChromaSubSample= cpuCaps&0x3; | |
| 1015 c->vChromaSubSample= (cpuCaps>>4)&0x3; | |
| 1016 }else{ | |
| 1017 c->hChromaSubSample= 1; | |
| 1018 c->vChromaSubSample= 1; | |
| 1019 } | |
| 937 | 1020 |
| 2979 | 1021 reallocBuffers(c, width, height, stride, qpStride); |
| 2967 | 1022 |
| 2979 | 1023 c->frameNum=-1; |
| 179 | 1024 |
| 2979 | 1025 return c; |
| 179 | 1026 } |
| 1027 | |
| 792 | 1028 void pp_free_context(void *vc){ |
| 2979 | 1029 PPContext *c = (PPContext*)vc; |
| 1030 int i; | |
| 2967 | 1031 |
| 2979 | 1032 for(i=0; i<3; i++) free(c->tempBlured[i]); |
| 1033 for(i=0; i<3; i++) free(c->tempBluredPast[i]); | |
| 2967 | 1034 |
| 2979 | 1035 free(c->tempBlocks); |
| 1036 free(c->yHistogram); | |
| 1037 free(c->tempDst); | |
| 1038 free(c->tempSrc); | |
| 1039 free(c->deintTemp); | |
| 1040 free(c->stdQPTable); | |
| 1041 free(c->nonBQPTable); | |
| 1042 free(c->forcedQPTable); | |
| 2967 | 1043 |
| 2979 | 1044 memset(c, 0, sizeof(PPContext)); |
| 937 | 1045 |
| 2979 | 1046 free(c); |
| 787 | 1047 } |
| 1048 | |
| 792 | 1049 void pp_postprocess(uint8_t * src[3], int srcStride[3], |
| 787 | 1050 uint8_t * dst[3], int dstStride[3], |
| 791 | 1051 int width, int height, |
| 787 | 1052 QP_STORE_T *QP_store, int QPStride, |
| 2979 | 1053 pp_mode_t *vm, void *vc, int pict_type) |
| 116 | 1054 { |
| 2979 | 1055 int mbWidth = (width+15)>>4; |
| 1056 int mbHeight= (height+15)>>4; | |
| 1057 PPMode *mode = (PPMode*)vm; | |
| 1058 PPContext *c = (PPContext*)vc; | |
| 1059 int minStride= MAX(ABS(srcStride[0]), ABS(dstStride[0])); | |
| 1060 int absQPStride = ABS(QPStride); | |
| 1196 | 1061 |
| 2979 | 1062 // c->stride and c->QPStride are always positive |
| 1063 if(c->stride < minStride || c->qpStride < absQPStride) | |
| 1064 reallocBuffers(c, width, height, | |
| 1065 MAX(minStride, c->stride), | |
| 1066 MAX(c->qpStride, absQPStride)); | |
| 787 | 1067 |
| 2979 | 1068 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)) |
| 1069 { | |
| 1070 int i; | |
| 1071 QP_store= c->forcedQPTable; | |
| 1072 absQPStride = QPStride = 0; | |
| 1073 if(mode->lumMode & FORCE_QUANT) | |
| 1074 for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant; | |
| 1075 else | |
| 1076 for(i=0; i<mbWidth; i++) QP_store[i]= 1; | |
| 1077 } | |
| 1196 | 1078 //printf("pict_type:%d\n", pict_type); |
| 1079 | |
| 2979 | 1080 if(pict_type & PP_PICT_TYPE_QP2){ |
| 1081 int i; | |
| 1082 const int count= mbHeight * absQPStride; | |
| 1083 for(i=0; i<(count>>2); i++){ | |
| 1084 ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F; | |
| 1085 } | |
| 1086 for(i<<=2; i<count; i++){ | |
| 1087 c->stdQPTable[i] = QP_store[i]>>1; | |
| 1088 } | |
| 1196 | 1089 QP_store= c->stdQPTable; |
| 2979 | 1090 QPStride= absQPStride; |
| 1091 } | |
| 1196 | 1092 |
| 791 | 1093 if(0){ |
| 1094 int x,y; | |
| 1095 for(y=0; y<mbHeight; y++){ | |
| 2979 | 1096 for(x=0; x<mbWidth; x++){ |
| 1097 printf("%2d ", QP_store[x + y*QPStride]); | |
| 1098 } | |
| 1099 printf("\n"); | |
| 791 | 1100 } |
| 2979 | 1101 printf("\n"); |
| 791 | 1102 } |
| 798 | 1103 |
| 2979 | 1104 if((pict_type&7)!=3) |
| 1105 { | |
| 1106 if (QPStride >= 0) { | |
| 1107 int i; | |
| 1108 const int count= mbHeight * QPStride; | |
| 1109 for(i=0; i<(count>>2); i++){ | |
| 1110 ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F; | |
| 1111 } | |
| 1112 for(i<<=2; i<count; i++){ | |
| 1113 c->nonBQPTable[i] = QP_store[i] & 0x3F; | |
| 1114 } | |
| 1115 } else { | |
| 1116 int i,j; | |
| 1117 for(i=0; i<mbHeight; i++) { | |
| 1118 for(j=0; j<absQPStride; j++) { | |
| 1119 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F; | |
| 1120 } | |
| 1121 } | |
| 1122 } | |
| 1123 } | |
| 152 | 1124 |
| 2979 | 1125 if(verbose>2) |
| 1126 { | |
| 1127 printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode); | |
| 1128 } | |
| 202 | 1129 |
| 2979 | 1130 postProcess(src[0], srcStride[0], dst[0], dstStride[0], |
| 1131 width, height, QP_store, QPStride, 0, mode, c); | |
| 116 | 1132 |
| 2979 | 1133 width = (width )>>c->hChromaSubSample; |
| 1134 height = (height)>>c->vChromaSubSample; | |
| 116 | 1135 |
| 2979 | 1136 if(mode->chromMode) |
| 1137 { | |
| 1138 postProcess(src[1], srcStride[1], dst[1], dstStride[1], | |
| 1139 width, height, QP_store, QPStride, 1, mode, c); | |
| 1140 postProcess(src[2], srcStride[2], dst[2], dstStride[2], | |
| 1141 width, height, QP_store, QPStride, 2, mode, c); | |
| 1142 } | |
| 1143 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]) | |
| 1144 { | |
| 1145 linecpy(dst[1], src[1], height, srcStride[1]); | |
| 1146 linecpy(dst[2], src[2], height, srcStride[2]); | |
| 1147 } | |
| 1148 else | |
| 1149 { | |
| 1150 int y; | |
| 1151 for(y=0; y<height; y++) | |
| 1152 { | |
| 1153 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width); | |
| 1154 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width); | |
| 1155 } | |
| 1156 } | |
| 116 | 1157 } |
| 1158 |
