Mercurial > libavcodec.hg
annotate libpostproc/postprocess.c @ 2071:41d30bae5019 libavcodec
attempt to create some separation in the FLAC system with respect to
demuxer and decoder layers by enabling the FLAC decoder to decode data
without needing the entire file, from start to finish
| author | melanson |
|---|---|
| date | Thu, 10 Jun 2004 04:13:43 +0000 |
| parents | 1c019179525c |
| children | 226d0a39347d |
| rev | line source |
|---|---|
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
1 /* |
| 1067 | 2 Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at) |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
3 |
|
2036
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
4 AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org> |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
5 |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
6 This program is free software; you can redistribute it and/or modify |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
7 it under the terms of the GNU General Public License as published by |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
8 the Free Software Foundation; either version 2 of the License, or |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
9 (at your option) any later version. |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
10 |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
11 This program is distributed in the hope that it will be useful, |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
12 but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
14 GNU General Public License for more details. |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
15 |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
16 You should have received a copy of the GNU General Public License |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
17 along with this program; if not, write to the Free Software |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
19 */ |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
20 |
| 1109 | 21 /** |
| 22 * @file postprocess.c | |
| 23 * postprocessing. | |
| 24 */ | |
| 25 | |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
26 /* |
|
2036
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
27 C MMX MMX2 3DNow AltiVec |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
28 isVertDC Ec Ec Ec |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
29 isVertMinMaxOk Ec Ec Ec |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
30 doVertLowPass E e e Ec |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
31 doVertDefFilter Ec Ec e e Ec |
| 2043 | 32 isHorizDC Ec Ec Ec |
| 33 isHorizMinMaxOk a E Ec | |
| 34 doHorizLowPass E e e Ec | |
| 35 doHorizDefFilter Ec Ec e e Ec | |
|
2039
f25e485a7850
mmx optimized version of the per line/accurate deblock filter
michael
parents:
2038
diff
changeset
|
36 do_a_deblock Ec E Ec E |
|
2036
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
37 deRing E e e* Ecp |
|
106
389391a6d0bf
rewrote the horizontal lowpass filter to fix a bug which caused a blocky look
michael
parents:
105
diff
changeset
|
38 Vertical RKAlgo1 E a a |
|
129
be35346e27c1
fixed difference with -vo md5 between doVertDefFilter() C and MMX / MMX2 versions
michael
parents:
128
diff
changeset
|
39 Horizontal RKAlgo1 a a |
| 156 | 40 Vertical X1# a E E |
| 41 Horizontal X1# a E E | |
| 111 | 42 LinIpolDeinterlace e E E* |
| 43 CubicIpolDeinterlace a e e* | |
| 44 LinBlendDeinterlace e E E* | |
|
1029
804cc05a3f61
C implementation of the median deinterlacer (seems to be the only one
rfelker
parents:
957
diff
changeset
|
45 MedianDeinterlace# E Ec Ec |
| 2043 | 46 TempDeNoiser# E e e Ec |
| 156 | 47 |
| 48 * i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work | |
| 49 # more or less selfinvented filters so the exactness isnt too meaningfull | |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
50 E = Exact implementation |
| 111 | 51 e = allmost exact implementation (slightly different rounding,...) |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
52 a = alternative / approximate impl |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
53 c = checked against the other implementations (-vo md5) |
|
2036
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
54 p = partially optimized, still some work to do |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
55 */ |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
56 |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
57 /* |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
58 TODO: |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
59 reduce the time wasted on the mem transfer |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
60 unroll stuff if instructions depend too much on the prior one |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
61 move YScale thing to the end instead of fixing QP |
| 96 | 62 write a faster and higher quality deblocking filter :) |
|
97
e57b1d38d71f
bugfixes: last 3 lines not brightness/contrast corrected
michael
parents:
96
diff
changeset
|
63 make the mainloop more flexible (variable number of blocks at once |
|
e57b1d38d71f
bugfixes: last 3 lines not brightness/contrast corrected
michael
parents:
96
diff
changeset
|
64 (the if/else stuff per block is slowing things down) |
| 99 | 65 compare the quality & speed of all filters |
| 66 split this huge file | |
| 140 | 67 optimize c versions |
| 156 | 68 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
69 ... |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
70 */ |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
71 |
| 107 | 72 //Changelog: use the CVS log |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
73 |
| 1067 | 74 #include "config.h" |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
75 #include <inttypes.h> |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
76 #include <stdio.h> |
|
97
e57b1d38d71f
bugfixes: last 3 lines not brightness/contrast corrected
michael
parents:
96
diff
changeset
|
77 #include <stdlib.h> |
| 116 | 78 #include <string.h> |
| 133 | 79 #ifdef HAVE_MALLOC_H |
| 80 #include <malloc.h> | |
| 81 #endif | |
| 96 | 82 //#undef HAVE_MMX2 |
| 83 //#define HAVE_3DNOW | |
| 84 //#undef HAVE_MMX | |
| 169 | 85 //#undef ARCH_X86 |
| 163 | 86 //#define DEBUG_BRIGHTNESS |
| 1069 | 87 #ifdef USE_FASTMEMCPY |
| 1775 | 88 #include "fastmemcpy.h" |
| 837 | 89 #endif |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
90 #include "postprocess.h" |
| 829 | 91 #include "postprocess_internal.h" |
| 1069 | 92 |
| 93 #include "mangle.h" //FIXME should be supressed | |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
94 |
| 2041 | 95 #ifdef HAVE_ALTIVEC_H |
| 96 #include <altivec.h> | |
| 97 #endif | |
| 98 | |
| 1071 | 99 #ifndef HAVE_MEMALIGN |
| 100 #define memalign(a,b) malloc(b) | |
| 101 #endif | |
| 102 | |
| 104 | 103 #define MIN(a,b) ((a) > (b) ? (b) : (a)) |
| 104 #define MAX(a,b) ((a) < (b) ? (b) : (a)) | |
| 105 #define ABS(a) ((a) > 0 ? (a) : (-(a))) | |
| 106 #define SIGN(a) ((a) > 0 ? 1 : -1) | |
| 107 | |
| 116 | 108 #define GET_MODE_BUFFER_SIZE 500 |
| 109 #define OPTIONS_ARRAY_SIZE 10 | |
| 787 | 110 #define BLOCK_SIZE 8 |
| 111 #define TEMP_STRIDE 8 | |
| 112 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet | |
| 116 | 113 |
|
1847
ef661c4dc5a6
attribute_used patch by (mitya at school dot ioffe dot ru (Dmitry Baryshkov))
michael
parents:
1775
diff
changeset
|
114 #if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0) |
|
ef661c4dc5a6
attribute_used patch by (mitya at school dot ioffe dot ru (Dmitry Baryshkov))
michael
parents:
1775
diff
changeset
|
115 # define attribute_used __attribute__((used)) |
|
2037
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
116 # define always_inline __attribute__((always_inline)) inline |
|
1847
ef661c4dc5a6
attribute_used patch by (mitya at school dot ioffe dot ru (Dmitry Baryshkov))
michael
parents:
1775
diff
changeset
|
117 #else |
|
ef661c4dc5a6
attribute_used patch by (mitya at school dot ioffe dot ru (Dmitry Baryshkov))
michael
parents:
1775
diff
changeset
|
118 # define attribute_used |
|
2037
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
119 # define always_inline inline |
|
1847
ef661c4dc5a6
attribute_used patch by (mitya at school dot ioffe dot ru (Dmitry Baryshkov))
michael
parents:
1775
diff
changeset
|
120 #endif |
|
ef661c4dc5a6
attribute_used patch by (mitya at school dot ioffe dot ru (Dmitry Baryshkov))
michael
parents:
1775
diff
changeset
|
121 |
| 169 | 122 #ifdef ARCH_X86 |
|
1847
ef661c4dc5a6
attribute_used patch by (mitya at school dot ioffe dot ru (Dmitry Baryshkov))
michael
parents:
1775
diff
changeset
|
123 static uint64_t __attribute__((aligned(8))) attribute_used w05= 0x0005000500050005LL; |
| 2040 | 124 static uint64_t __attribute__((aligned(8))) attribute_used w04= 0x0004000400040004LL; |
|
1847
ef661c4dc5a6
attribute_used patch by (mitya at school dot ioffe dot ru (Dmitry Baryshkov))
michael
parents:
1775
diff
changeset
|
125 static uint64_t __attribute__((aligned(8))) attribute_used w20= 0x0020002000200020LL; |
|
ef661c4dc5a6
attribute_used patch by (mitya at school dot ioffe dot ru (Dmitry Baryshkov))
michael
parents:
1775
diff
changeset
|
126 static uint64_t __attribute__((aligned(8))) attribute_used b00= 0x0000000000000000LL; |
|
ef661c4dc5a6
attribute_used patch by (mitya at school dot ioffe dot ru (Dmitry Baryshkov))
michael
parents:
1775
diff
changeset
|
127 static uint64_t __attribute__((aligned(8))) attribute_used b01= 0x0101010101010101LL; |
|
ef661c4dc5a6
attribute_used patch by (mitya at school dot ioffe dot ru (Dmitry Baryshkov))
michael
parents:
1775
diff
changeset
|
128 static uint64_t __attribute__((aligned(8))) attribute_used b02= 0x0202020202020202LL; |
|
ef661c4dc5a6
attribute_used patch by (mitya at school dot ioffe dot ru (Dmitry Baryshkov))
michael
parents:
1775
diff
changeset
|
129 static uint64_t __attribute__((aligned(8))) attribute_used b08= 0x0808080808080808LL; |
|
ef661c4dc5a6
attribute_used patch by (mitya at school dot ioffe dot ru (Dmitry Baryshkov))
michael
parents:
1775
diff
changeset
|
130 static uint64_t __attribute__((aligned(8))) attribute_used b80= 0x8080808080808080LL; |
|
148
1cfc4d567c0a
minor changes (fixed some warnings, added attribute aligned(8) stuff)
michael
parents:
142
diff
changeset
|
131 #endif |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
132 |
| 1157 | 133 static uint8_t clip_table[3*256]; |
| 134 static uint8_t * const clip_tab= clip_table + 256; | |
| 135 | |
|
2031
4225c131a2eb
warning fixes by (Michael Roitzsch <mroi at users dot sourceforge dot net>)
michael
parents:
2024
diff
changeset
|
136 static const int verbose= 0; |
| 179 | 137 |
|
1847
ef661c4dc5a6
attribute_used patch by (mitya at school dot ioffe dot ru (Dmitry Baryshkov))
michael
parents:
1775
diff
changeset
|
138 static const int attribute_used deringThreshold= 20; |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
139 |
| 787 | 140 |
| 116 | 141 static struct PPFilter filters[]= |
| 142 { | |
| 143 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK}, | |
| 144 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK}, | |
| 787 | 145 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER}, |
| 146 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/ | |
| 116 | 147 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER}, |
| 148 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER}, | |
|
2037
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
149 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK}, |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
150 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK}, |
| 116 | 151 {"dr", "dering", 1, 5, 6, DERING}, |
| 152 {"al", "autolevels", 0, 1, 2, LEVEL_FIX}, | |
| 181 | 153 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER}, |
| 154 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER}, | |
| 155 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER}, | |
| 156 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER}, | |
| 787 | 157 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER}, |
| 1157 | 158 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER}, |
| 156 | 159 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER}, |
| 183 | 160 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT}, |
| 116 | 161 {NULL, NULL,0,0,0,0} //End Marker |
| 162 }; | |
| 163 | |
| 164 static char *replaceTable[]= | |
| 165 { | |
|
2037
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
166 "default", "hdeblock:a,vdeblock:a,dering:a", |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
167 "de", "hdeblock:a,vdeblock:a,dering:a", |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
168 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a", |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
169 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a", |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
170 "ac", "ha:a:128:7,va:a,dering:a", |
| 116 | 171 NULL //End Marker |
| 172 }; | |
| 173 | |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
174 |
| 787 | 175 #ifdef ARCH_X86 |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
176 static inline void prefetchnta(void *p) |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
177 { |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
178 asm volatile( "prefetchnta (%0)\n\t" |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
179 : : "r" (p) |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
180 ); |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
181 } |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
182 |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
183 static inline void prefetcht0(void *p) |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
184 { |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
185 asm volatile( "prefetcht0 (%0)\n\t" |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
186 : : "r" (p) |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
187 ); |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
188 } |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
189 |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
190 static inline void prefetcht1(void *p) |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
191 { |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
192 asm volatile( "prefetcht1 (%0)\n\t" |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
193 : : "r" (p) |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
194 ); |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
195 } |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
196 |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
197 static inline void prefetcht2(void *p) |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
198 { |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
199 asm volatile( "prefetcht2 (%0)\n\t" |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
200 : : "r" (p) |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
201 ); |
|
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
202 } |
| 102 | 203 #endif |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
204 |
| 169 | 205 // The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
206 |
| 165 | 207 /** |
| 208 * Check if the given 8x8 Block is mostly "flat" | |
| 209 */ | |
|
2036
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
210 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c) |
| 165 | 211 { |
| 212 int numEq= 0; | |
| 213 int y; | |
| 1196 | 214 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; |
| 791 | 215 const int dcThreshold= dcOffset*2 + 1; |
| 1196 | 216 |
| 165 | 217 for(y=0; y<BLOCK_SIZE; y++) |
| 218 { | |
| 787 | 219 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++; |
| 220 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++; | |
| 221 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++; | |
| 222 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++; | |
| 223 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++; | |
| 224 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++; | |
| 225 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++; | |
| 165 | 226 src+= stride; |
| 227 } | |
| 787 | 228 return numEq > c->ppMode.flatnessThreshold; |
| 229 } | |
| 230 | |
| 231 /** | |
| 232 * Check if the middle 8x8 Block in the given 8x16 block is flat | |
| 233 */ | |
| 234 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){ | |
| 235 int numEq= 0; | |
| 236 int y; | |
| 1196 | 237 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; |
| 791 | 238 const int dcThreshold= dcOffset*2 + 1; |
| 1196 | 239 |
| 787 | 240 src+= stride*4; // src points to begin of the 8x8 Block |
| 241 for(y=0; y<BLOCK_SIZE-1; y++) | |
| 242 { | |
| 243 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++; | |
| 244 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++; | |
| 245 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++; | |
| 246 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++; | |
| 247 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++; | |
| 248 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++; | |
| 249 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++; | |
| 250 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++; | |
| 251 src+= stride; | |
| 252 } | |
| 253 return numEq > c->ppMode.flatnessThreshold; | |
| 165 | 254 } |
| 255 | |
|
2036
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
256 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP) |
| 165 | 257 { |
|
1327
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
258 int i; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
259 #if 1 |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
260 for(i=0; i<2; i++){ |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
261 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
262 src += stride; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
263 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
264 src += stride; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
265 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
266 src += stride; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
267 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
268 src += stride; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
269 } |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
270 #else |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
271 for(i=0; i<8; i++){ |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
272 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
273 src += stride; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
274 } |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
275 #endif |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
276 return 1; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
277 } |
| 165 | 278 |
|
1327
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
279 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP) |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
280 { |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
281 #if 1 |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
282 #if 1 |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
283 int x; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
284 src+= stride*4; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
285 for(x=0; x<BLOCK_SIZE; x+=4) |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
286 { |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
287 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
288 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
289 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
290 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
291 } |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
292 #else |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
293 int x; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
294 src+= stride*3; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
295 for(x=0; x<BLOCK_SIZE; x++) |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
296 { |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
297 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
298 } |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
299 #endif |
| 165 | 300 return 1; |
|
1327
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
301 #else |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
302 int x; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
303 src+= stride*4; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
304 for(x=0; x<BLOCK_SIZE; x++) |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
305 { |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
306 int min=255; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
307 int max=0; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
308 int y; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
309 for(y=0; y<8; y++){ |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
310 int v= src[x + y*stride]; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
311 if(v>max) max=v; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
312 if(v<min) min=v; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
313 } |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
314 if(max-min > 2*QP) return 0; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
315 } |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
316 return 1; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
317 #endif |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
318 } |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
319 |
|
2036
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
320 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){ |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
321 if( isHorizDC_C(src, stride, c) ){ |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
322 if( isHorizMinMaxOk_C(src, stride, c->QP) ) |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
323 return 1; |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
324 else |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
325 return 0; |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
326 }else{ |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
327 return 2; |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
328 } |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
329 } |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
330 |
|
1327
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
331 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){ |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
332 if( isVertDC_C(src, stride, c) ){ |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
333 if( isVertMinMaxOk_C(src, stride, c->QP) ) |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
334 return 1; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
335 else |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
336 return 0; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
337 }else{ |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
338 return 2; |
|
854571532c89
blinking blocks around thin vertical lines and dots bugfix
michaelni
parents:
1282
diff
changeset
|
339 } |
| 165 | 340 } |
| 341 | |
|
2036
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
342 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c) |
| 165 | 343 { |
| 344 int y; | |
| 345 for(y=0; y<BLOCK_SIZE; y++) | |
| 346 { | |
|
2036
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
347 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]); |
| 165 | 348 |
|
2036
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
349 if(ABS(middleEnergy) < 8*c->QP) |
| 165 | 350 { |
| 351 const int q=(dst[3] - dst[4])/2; | |
| 352 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]); | |
| 353 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]); | |
| 354 | |
| 355 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) ); | |
| 356 d= MAX(d, 0); | |
| 357 | |
| 358 d= (5*d + 32) >> 6; | |
| 359 d*= SIGN(-middleEnergy); | |
| 360 | |
| 361 if(q>0) | |
| 362 { | |
| 363 d= d<0 ? 0 : d; | |
| 364 d= d>q ? q : d; | |
| 365 } | |
| 366 else | |
| 367 { | |
| 368 d= d>0 ? 0 : d; | |
| 369 d= d<q ? q : d; | |
| 370 } | |
| 371 | |
| 372 dst[3]-= d; | |
| 373 dst[4]+= d; | |
| 374 } | |
| 375 dst+= stride; | |
| 376 } | |
| 377 } | |
| 378 | |
| 379 /** | |
| 380 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block) | |
| 381 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version) | |
| 382 */ | |
|
2036
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
383 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c) |
| 165 | 384 { |
| 385 int y; | |
| 386 for(y=0; y<BLOCK_SIZE; y++) | |
| 387 { | |
|
2036
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
388 const int first= ABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0]; |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
389 const int last= ABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7]; |
| 165 | 390 |
| 2038 | 391 int sums[10]; |
| 392 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4; | |
| 393 sums[1] = sums[0] - first + dst[3]; | |
| 394 sums[2] = sums[1] - first + dst[4]; | |
| 395 sums[3] = sums[2] - first + dst[5]; | |
| 396 sums[4] = sums[3] - first + dst[6]; | |
| 397 sums[5] = sums[4] - dst[0] + dst[7]; | |
| 398 sums[6] = sums[5] - dst[1] + last; | |
| 399 sums[7] = sums[6] - dst[2] + last; | |
| 400 sums[8] = sums[7] - dst[3] + last; | |
| 401 sums[9] = sums[8] - dst[4] + last; | |
| 165 | 402 |
| 2038 | 403 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4; |
| 404 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4; | |
| 405 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4; | |
| 406 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4; | |
| 407 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4; | |
| 408 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4; | |
| 409 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4; | |
| 410 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4; | |
| 165 | 411 |
| 412 dst+= stride; | |
| 413 } | |
| 414 } | |
| 415 | |
| 169 | 416 /** |
| 417 * Experimental Filter 1 (Horizontal) | |
| 418 * will not damage linear gradients | |
| 419 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter | |
| 420 * can only smooth blocks at the expected locations (it cant smooth them if they did move) | |
| 421 * MMX2 version does correct clipping C version doesnt | |
| 422 * not identical with the vertical one | |
| 423 */ | |
| 424 static inline void horizX1Filter(uint8_t *src, int stride, int QP) | |
|
95
8bce253b537c
new postprocess code by Michael Niedermayer (michaelni@gmx.at)
arpi
parents:
diff
changeset
|
425 { |
| 169 | 426 int y; |
| 427 static uint64_t *lut= NULL; | |
| 428 if(lut==NULL) | |
| 429 { | |
| 430 int i; | |
| 431 lut= (uint64_t*)memalign(8, 256*8); | |
| 432 for(i=0; i<256; i++) | |
| 433 { | |
| 434 int v= i < 128 ? 2*i : 2*(i-256); | |
| 435 /* | |
| 436 //Simulate 112242211 9-Tap filter | |
| 437 uint64_t a= (v/16) & 0xFF; | |
| 438 uint64_t b= (v/8) & 0xFF; | |
| 439 uint64_t c= (v/4) & 0xFF; | |
| 440 uint64_t d= (3*v/8) & 0xFF; | |
| 441 */ | |
| 442 //Simulate piecewise linear interpolation | |
| 443 uint64_t a= (v/16) & 0xFF; | |
| 444 uint64_t b= (v*3/16) & 0xFF; | |
| 445 uint64_t c= (v*5/16) & 0xFF; | |
| 446 uint64_t d= (7*v/16) & 0xFF; | |
| 447 uint64_t A= (0x100 - a)&0xFF; | |
| 448 uint64_t B= (0x100 - b)&0xFF; | |
| 449 uint64_t C= (0x100 - c)&0xFF; | |
| 450 uint64_t D= (0x100 - c)&0xFF; | |
| 130 | 451 |
| 169 | 452 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) | |
| 453 (D<<24) | (C<<16) | (B<<8) | (A); | |
| 454 //lut[i] = (v<<32) | (v<<24); | |
| 134 | 455 } |
| 456 } | |
| 457 | |
| 169 | 458 for(y=0; y<BLOCK_SIZE; y++) |
| 134 | 459 { |
| 169 | 460 int a= src[1] - src[2]; |
| 461 int b= src[3] - src[4]; | |
| 462 int c= src[5] - src[6]; | |
| 134 | 463 |
| 169 | 464 int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0); |
| 167 | 465 |
| 169 | 466 if(d < QP) |
| 167 | 467 { |
| 169 | 468 int v = d * SIGN(-b); |
|
106
389391a6d0bf
rewrote the horizontal lowpass filter to fix a bug which caused a blocky look
michael
parents:
105
diff
changeset
|
469 |
| 169 | 470 src[1] +=v/8; |
| 471 src[2] +=v/4; | |
| 472 src[3] +=3*v/8; | |
| 473 src[4] -=3*v/8; | |
| 474 src[5] -=v/4; | |
| 475 src[6] -=v/8; | |
|
106
389391a6d0bf
rewrote the horizontal lowpass filter to fix a bug which caused a blocky look
michael
parents:
105
diff
changeset
|
476 |
| 169 | 477 } |
| 478 src+=stride; | |
|
106
389391a6d0bf
rewrote the horizontal lowpass filter to fix a bug which caused a blocky look
michael
parents:
105
diff
changeset
|
479 } |
|
389391a6d0bf
rewrote the horizontal lowpass filter to fix a bug which caused a blocky look
michael
parents:
105
diff
changeset
|
480 } |
|
389391a6d0bf
rewrote the horizontal lowpass filter to fix a bug which caused a blocky look
michael
parents:
105
diff
changeset
|
481 |
|
2037
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
482 /** |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
483 * accurate deblock filter |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
484 */ |
|
2039
f25e485a7850
mmx optimized version of the per line/accurate deblock filter
michael
parents:
2038
diff
changeset
|
485 static always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){ |
|
2037
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
486 int y; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
487 const int QP= c->QP; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
488 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
489 const int dcThreshold= dcOffset*2 + 1; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
490 //START_TIMER |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
491 src+= step*4; // src points to begin of the 8x8 Block |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
492 for(y=0; y<8; y++){ |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
493 int numEq= 0; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
494 |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
495 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
496 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
497 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
498 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
499 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
500 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
501 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
502 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
503 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
504 if(numEq > c->ppMode.flatnessThreshold){ |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
505 int min, max, x; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
506 |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
507 if(src[0] > src[step]){ |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
508 max= src[0]; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
509 min= src[step]; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
510 }else{ |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
511 max= src[step]; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
512 min= src[0]; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
513 } |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
514 for(x=2; x<8; x+=2){ |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
515 if(src[x*step] > src[(x+1)*step]){ |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
516 if(src[x *step] > max) max= src[ x *step]; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
517 if(src[(x+1)*step] < min) min= src[(x+1)*step]; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
518 }else{ |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
519 if(src[(x+1)*step] > max) max= src[(x+1)*step]; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
520 if(src[ x *step] < min) min= src[ x *step]; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
521 } |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
522 } |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
523 if(max-min < 2*QP){ |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
524 const int first= ABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0]; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
525 const int last= ABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step]; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
526 |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
527 int sums[10]; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
528 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
529 sums[1] = sums[0] - first + src[3*step]; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
530 sums[2] = sums[1] - first + src[4*step]; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
531 sums[3] = sums[2] - first + src[5*step]; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
532 sums[4] = sums[3] - first + src[6*step]; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
533 sums[5] = sums[4] - src[0*step] + src[7*step]; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
534 sums[6] = sums[5] - src[1*step] + last; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
535 sums[7] = sums[6] - src[2*step] + last; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
536 sums[8] = sums[7] - src[3*step] + last; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
537 sums[9] = sums[8] - src[4*step] + last; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
538 |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
539 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
540 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
541 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
542 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
543 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
544 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
545 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
546 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
547 } |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
548 }else{ |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
549 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]); |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
550 |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
551 if(ABS(middleEnergy) < 8*QP) |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
552 { |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
553 const int q=(src[3*step] - src[4*step])/2; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
554 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]); |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
555 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]); |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
556 |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
557 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) ); |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
558 d= MAX(d, 0); |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
559 |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
560 d= (5*d + 32) >> 6; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
561 d*= SIGN(-middleEnergy); |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
562 |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
563 if(q>0) |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
564 { |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
565 d= d<0 ? 0 : d; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
566 d= d>q ? q : d; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
567 } |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
568 else |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
569 { |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
570 d= d>0 ? 0 : d; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
571 d= d<q ? q : d; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
572 } |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
573 |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
574 src[3*step]-= d; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
575 src[4*step]+= d; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
576 } |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
577 } |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
578 |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
579 src += stride; |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
580 } |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
581 /*if(step==16){ |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
582 STOP_TIMER("step16") |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
583 }else{ |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
584 STOP_TIMER("stepX") |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
585 }*/ |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
586 } |
|
106
389391a6d0bf
rewrote the horizontal lowpass filter to fix a bug which caused a blocky look
michael
parents:
105
diff
changeset
|
587 |
| 171 | 588 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one |
| 169 | 589 //Plain C versions |
| 171 | 590 #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT) |
| 591 #define COMPILE_C | |
| 592 #endif | |
| 593 | |
|
2036
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
594 #ifdef ARCH_POWERPC |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
595 #ifdef HAVE_ALTIVEC |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
596 #define COMPILE_ALTIVEC |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
597 #endif //HAVE_ALTIVEC |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
598 #endif //ARCH_POWERPC |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
599 |
| 787 | 600 #ifdef ARCH_X86 |
| 171 | 601 |
| 602 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT) | |
| 603 #define COMPILE_MMX | |
| 604 #endif | |
| 605 | |
| 606 #if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT) | |
| 607 #define COMPILE_MMX2 | |
| 608 #endif | |
| 609 | |
| 610 #if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT) | |
| 611 #define COMPILE_3DNOW | |
| 612 #endif | |
| 787 | 613 #endif //ARCH_X86 |
| 171 | 614 |
| 615 #undef HAVE_MMX | |
| 616 #undef HAVE_MMX2 | |
| 617 #undef HAVE_3DNOW | |
|
2036
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
618 #undef HAVE_ALTIVEC |
| 171 | 619 #undef ARCH_X86 |
| 620 | |
| 621 #ifdef COMPILE_C | |
| 169 | 622 #undef HAVE_MMX |
| 623 #undef HAVE_MMX2 | |
| 624 #undef HAVE_3DNOW | |
| 625 #undef ARCH_X86 | |
| 626 #define RENAME(a) a ## _C | |
| 627 #include "postprocess_template.c" | |
| 171 | 628 #endif |
|
106
389391a6d0bf
rewrote the horizontal lowpass filter to fix a bug which caused a blocky look
michael
parents:
105
diff
changeset
|
629 |
|
2036
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
630 #ifdef ARCH_POWERPC |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
631 #ifdef COMPILE_ALTIVEC |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
632 #undef RENAME |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
633 #define HAVE_ALTIVEC |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
634 #define RENAME(a) a ## _altivec |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
635 #include "postprocess_altivec_template.c" |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
636 #include "postprocess_template.c" |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
637 #endif |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
638 #endif //ARCH_POWERPC |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
639 |
| 169 | 640 //MMX versions |
| 171 | 641 #ifdef COMPILE_MMX |
| 169 | 642 #undef RENAME |
| 643 #define HAVE_MMX | |
| 644 #undef HAVE_MMX2 | |
| 645 #undef HAVE_3DNOW | |
| 646 #define ARCH_X86 | |
| 647 #define RENAME(a) a ## _MMX | |
| 648 #include "postprocess_template.c" | |
| 171 | 649 #endif |
|
128
e5266b8e79be
much better horizontal filters (transpose & use the vertical ones) :)
michael
parents:
126
diff
changeset
|
650 |
| 169 | 651 //MMX2 versions |
| 171 | 652 #ifdef COMPILE_MMX2 |
| 169 | 653 #undef RENAME |
| 654 #define HAVE_MMX | |
| 655 #define HAVE_MMX2 | |
| 656 #undef HAVE_3DNOW | |
| 657 #define ARCH_X86 | |
| 658 #define RENAME(a) a ## _MMX2 | |
| 659 #include "postprocess_template.c" | |
| 171 | 660 #endif |
|
128
e5266b8e79be
much better horizontal filters (transpose & use the vertical ones) :)
michael
parents:
126
diff
changeset
|
661 |
| 169 | 662 //3DNOW versions |
| 171 | 663 #ifdef COMPILE_3DNOW |
| 169 | 664 #undef RENAME |
| 665 #define HAVE_MMX | |
| 666 #undef HAVE_MMX2 | |
| 667 #define HAVE_3DNOW | |
| 668 #define ARCH_X86 | |
| 669 #define RENAME(a) a ## _3DNow | |
| 670 #include "postprocess_template.c" | |
| 171 | 671 #endif |
|
128
e5266b8e79be
much better horizontal filters (transpose & use the vertical ones) :)
michael
parents:
126
diff
changeset
|
672 |
| 169 | 673 // minor note: the HAVE_xyz is messed up after that line so dont use it |
|
128
e5266b8e79be
much better horizontal filters (transpose & use the vertical ones) :)
michael
parents:
126
diff
changeset
|
674 |
| 169 | 675 static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, |
| 829 | 676 QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc) |
| 156 | 677 { |
| 787 | 678 PPContext *c= (PPContext *)vc; |
| 829 | 679 PPMode *ppMode= (PPMode *)vm; |
| 787 | 680 c->ppMode= *ppMode; //FIXME |
| 681 | |
| 169 | 682 // useing ifs here as they are faster than function pointers allthough the |
| 683 // difference wouldnt be messureable here but its much better because | |
| 684 // someone might exchange the cpu whithout restarting mplayer ;) | |
| 171 | 685 #ifdef RUNTIME_CPUDETECT |
| 787 | 686 #ifdef ARCH_X86 |
| 169 | 687 // ordered per speed fasterst first |
| 805 | 688 if(c->cpuCaps & PP_CPU_CAPS_MMX2) |
| 787 | 689 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
| 805 | 690 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW) |
| 787 | 691 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
| 805 | 692 else if(c->cpuCaps & PP_CPU_CAPS_MMX) |
| 787 | 693 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
| 169 | 694 else |
| 787 | 695 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
|
157
bc12fd7e6153
temp denoiser changes: (a-b)^2 instead of |a-b| and MMX2/3DNOW version
michael
parents:
156
diff
changeset
|
696 #else |
|
2036
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
697 #ifdef ARCH_POWERPC |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
698 #ifdef HAVE_ALTIVEC |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
699 else if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC) |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
700 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
701 else |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
702 #endif |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
703 #endif |
| 787 | 704 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
|
157
bc12fd7e6153
temp denoiser changes: (a-b)^2 instead of |a-b| and MMX2/3DNOW version
michael
parents:
156
diff
changeset
|
705 #endif |
| 171 | 706 #else //RUNTIME_CPUDETECT |
| 707 #ifdef HAVE_MMX2 | |
| 787 | 708 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
| 171 | 709 #elif defined (HAVE_3DNOW) |
| 787 | 710 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
| 171 | 711 #elif defined (HAVE_MMX) |
| 787 | 712 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
|
2036
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
713 #elif defined (HAVE_ALTIVEC) |
|
6a6c678517b3
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2031
diff
changeset
|
714 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
| 171 | 715 #else |
| 787 | 716 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
| 171 | 717 #endif |
| 718 #endif //!RUNTIME_CPUDETECT | |
| 156 | 719 } |
| 720 | |
| 169 | 721 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, |
| 722 // QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode); | |
| 96 | 723 |
| 116 | 724 /* -pp Command line Help |
| 725 */ | |
| 804 | 726 char *pp_help= |
| 833 | 727 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n" |
| 184 | 728 "long form example:\n" |
| 833 | 729 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n" |
| 184 | 730 "short form example:\n" |
| 833 | 731 "vb:a/hb:a/lb de,-vb\n" |
| 184 | 732 "more examples:\n" |
| 806 | 733 "tn:64:128:256\n" |
| 184 | 734 "Filters Options\n" |
| 735 "short long name short long option Description\n" | |
|
1467
a320fe172086
Ministry of English Composition fixes (courtesy of Diego Biurrun
tmmm
parents:
1327
diff
changeset
|
736 "* * a autoq CPU power dependent enabler\n" |
|
a320fe172086
Ministry of English Composition fixes (courtesy of Diego Biurrun
tmmm
parents:
1327
diff
changeset
|
737 " c chrom chrominance filtering enabled\n" |
|
a320fe172086
Ministry of English Composition fixes (courtesy of Diego Biurrun
tmmm
parents:
1327
diff
changeset
|
738 " y nochrom chrominance filtering disabled\n" |
|
a320fe172086
Ministry of English Composition fixes (courtesy of Diego Biurrun
tmmm
parents:
1327
diff
changeset
|
739 "hb hdeblock (2 threshold) horizontal deblocking filter\n" |
|
1197
d9cbc8ef5a33
better? default thresholds, if this is worse for any files, then tell us ASAP
michaelni
parents:
1196
diff
changeset
|
740 " 1. difference factor: default=32, higher -> more deblocking\n" |
|
d9cbc8ef5a33
better? default thresholds, if this is worse for any files, then tell us ASAP
michaelni
parents:
1196
diff
changeset
|
741 " 2. flatness threshold: default=39, lower -> more deblocking\n" |
| 184 | 742 " the h & v deblocking filters share these\n" |
|
1467
a320fe172086
Ministry of English Composition fixes (courtesy of Diego Biurrun
tmmm
parents:
1327
diff
changeset
|
743 " so you can't set different thresholds for h / v\n" |
|
a320fe172086
Ministry of English Composition fixes (courtesy of Diego Biurrun
tmmm
parents:
1327
diff
changeset
|
744 "vb vdeblock (2 threshold) vertical deblocking filter\n" |
|
2037
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
745 "ha hadeblock (2 threshold) horizontal deblocking filter\n" |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
746 "va vadeblock (2 threshold) vertical deblocking filter\n" |
|
1467
a320fe172086
Ministry of English Composition fixes (courtesy of Diego Biurrun
tmmm
parents:
1327
diff
changeset
|
747 "h1 x1hdeblock experimental h deblock filter 1\n" |
|
a320fe172086
Ministry of English Composition fixes (courtesy of Diego Biurrun
tmmm
parents:
1327
diff
changeset
|
748 "v1 x1vdeblock experimental v deblock filter 1\n" |
|
a320fe172086
Ministry of English Composition fixes (courtesy of Diego Biurrun
tmmm
parents:
1327
diff
changeset
|
749 "dr dering deringing filter\n" |
| 184 | 750 "al autolevels automatic brightness / contrast\n" |
| 751 " f fullyrange stretch luminance to (0..255)\n" | |
| 752 "lb linblenddeint linear blend deinterlacer\n" | |
| 753 "li linipoldeint linear interpolating deinterlace\n" | |
| 754 "ci cubicipoldeint cubic interpolating deinterlacer\n" | |
| 755 "md mediandeint median deinterlacer\n" | |
| 787 | 756 "fd ffmpegdeint ffmpeg deinterlacer\n" |
|
2037
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
757 "de default hb:a,vb:a,dr:a\n" |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
758 "fa fast h1:a,v1:a,dr:a\n" |
|
1467
a320fe172086
Ministry of English Composition fixes (courtesy of Diego Biurrun
tmmm
parents:
1327
diff
changeset
|
759 "tn tmpnoise (3 threshold) temporal noise reducer\n" |
| 184 | 760 " 1. <= 2. <= 3. larger -> stronger filtering\n" |
|
1467
a320fe172086
Ministry of English Composition fixes (courtesy of Diego Biurrun
tmmm
parents:
1327
diff
changeset
|
761 "fq forceQuant <quantizer> force quantizer\n" |
| 184 | 762 ; |
| 116 | 763 |
| 829 | 764 pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality) |
| 116 | 765 { |
| 766 char temp[GET_MODE_BUFFER_SIZE]; | |
| 767 char *p= temp; | |
| 787 | 768 char *filterDelimiters= ",/"; |
| 116 | 769 char *optionDelimiters= ":"; |
| 829 | 770 struct PPMode *ppMode; |
| 116 | 771 char *filterToken; |
| 772 | |
| 829 | 773 ppMode= memalign(8, sizeof(PPMode)); |
| 774 | |
| 775 ppMode->lumMode= 0; | |
| 776 ppMode->chromMode= 0; | |
| 777 ppMode->maxTmpNoise[0]= 700; | |
| 778 ppMode->maxTmpNoise[1]= 1500; | |
| 779 ppMode->maxTmpNoise[2]= 3000; | |
| 780 ppMode->maxAllowedY= 234; | |
| 781 ppMode->minAllowedY= 16; | |
|
1197
d9cbc8ef5a33
better? default thresholds, if this is worse for any files, then tell us ASAP
michaelni
parents:
1196
diff
changeset
|
782 ppMode->baseDcDiff= 256/8; |
|
d9cbc8ef5a33
better? default thresholds, if this is worse for any files, then tell us ASAP
michaelni
parents:
1196
diff
changeset
|
783 ppMode->flatnessThreshold= 56-16-1; |
| 829 | 784 ppMode->maxClippedThreshold= 0.01; |
| 785 ppMode->error=0; | |
| 793 | 786 |
| 116 | 787 strncpy(temp, name, GET_MODE_BUFFER_SIZE); |
| 788 | |
| 202 | 789 if(verbose>1) printf("pp: %s\n", name); |
| 156 | 790 |
| 116 | 791 for(;;){ |
| 792 char *filterName; | |
| 830 | 793 int q= 1000000; //PP_QUALITY_MAX; |
| 116 | 794 int chrom=-1; |
| 795 char *option; | |
| 796 char *options[OPTIONS_ARRAY_SIZE]; | |
| 797 int i; | |
| 798 int filterNameOk=0; | |
| 799 int numOfUnknownOptions=0; | |
| 800 int enable=1; //does the user want us to enabled or disabled the filter | |
| 801 | |
| 802 filterToken= strtok(p, filterDelimiters); | |
| 803 if(filterToken == NULL) break; | |
| 156 | 804 p+= strlen(filterToken) + 1; // p points to next filterToken |
| 116 | 805 filterName= strtok(filterToken, optionDelimiters); |
| 202 | 806 if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName); |
| 116 | 807 |
| 808 if(*filterName == '-') | |
| 809 { | |
| 810 enable=0; | |
| 811 filterName++; | |
| 812 } | |
| 156 | 813 |
| 116 | 814 for(;;){ //for all options |
| 815 option= strtok(NULL, optionDelimiters); | |
| 816 if(option == NULL) break; | |
| 817 | |
| 202 | 818 if(verbose>1) printf("pp: option: %s\n", option); |
| 116 | 819 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality; |
| 820 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0; | |
| 821 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1; | |
| 822 else | |
| 823 { | |
| 824 options[numOfUnknownOptions] = option; | |
| 825 numOfUnknownOptions++; | |
| 826 } | |
| 827 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break; | |
| 828 } | |
| 156 | 829 options[numOfUnknownOptions] = NULL; |
| 116 | 830 |
| 831 /* replace stuff from the replace Table */ | |
| 832 for(i=0; replaceTable[2*i]!=NULL; i++) | |
| 833 { | |
| 834 if(!strcmp(replaceTable[2*i], filterName)) | |
| 835 { | |
| 836 int newlen= strlen(replaceTable[2*i + 1]); | |
| 837 int plen; | |
| 838 int spaceLeft; | |
| 839 | |
| 840 if(p==NULL) p= temp, *p=0; //last filter | |
| 841 else p--, *p=','; //not last filter | |
| 842 | |
| 843 plen= strlen(p); | |
|
419
b71190bacce8
applied 64bit patch from Ulrich Hecht <uli at suse dot de>
alex
parents:
212
diff
changeset
|
844 spaceLeft= p - temp + plen; |
| 116 | 845 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE) |
| 846 { | |
| 829 | 847 ppMode->error++; |
| 116 | 848 break; |
| 849 } | |
| 850 memmove(p + newlen, p, plen+1); | |
| 851 memcpy(p, replaceTable[2*i + 1], newlen); | |
| 852 filterNameOk=1; | |
| 853 } | |
| 854 } | |
| 855 | |
| 856 for(i=0; filters[i].shortName!=NULL; i++) | |
| 857 { | |
| 156 | 858 // printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName); |
| 116 | 859 if( !strcmp(filters[i].longName, filterName) |
| 860 || !strcmp(filters[i].shortName, filterName)) | |
| 861 { | |
| 829 | 862 ppMode->lumMode &= ~filters[i].mask; |
| 863 ppMode->chromMode &= ~filters[i].mask; | |
| 116 | 864 |
| 865 filterNameOk=1; | |
| 866 if(!enable) break; // user wants to disable it | |
| 867 | |
| 868 if(q >= filters[i].minLumQuality) | |
| 829 | 869 ppMode->lumMode|= filters[i].mask; |
| 116 | 870 if(chrom==1 || (chrom==-1 && filters[i].chromDefault)) |
| 871 if(q >= filters[i].minChromQuality) | |
| 829 | 872 ppMode->chromMode|= filters[i].mask; |
| 116 | 873 |
| 874 if(filters[i].mask == LEVEL_FIX) | |
| 875 { | |
| 876 int o; | |
| 829 | 877 ppMode->minAllowedY= 16; |
| 878 ppMode->maxAllowedY= 234; | |
| 116 | 879 for(o=0; options[o]!=NULL; o++) |
|
182
3ccd74a91074
minor brightness/contrast bugfix / moved some global vars into ppMode
michael
parents:
181
diff
changeset
|
880 { |
| 116 | 881 if( !strcmp(options[o],"fullyrange") |
| 882 ||!strcmp(options[o],"f")) | |
| 883 { | |
| 829 | 884 ppMode->minAllowedY= 0; |
| 885 ppMode->maxAllowedY= 255; | |
| 116 | 886 numOfUnknownOptions--; |
| 887 } | |
|
182
3ccd74a91074
minor brightness/contrast bugfix / moved some global vars into ppMode
michael
parents:
181
diff
changeset
|
888 } |
| 116 | 889 } |
| 156 | 890 else if(filters[i].mask == TEMP_NOISE_FILTER) |
| 891 { | |
| 892 int o; | |
| 893 int numOfNoises=0; | |
| 894 | |
| 895 for(o=0; options[o]!=NULL; o++) | |
| 896 { | |
| 897 char *tail; | |
| 829 | 898 ppMode->maxTmpNoise[numOfNoises]= |
| 156 | 899 strtol(options[o], &tail, 0); |
| 900 if(tail!=options[o]) | |
| 901 { | |
| 902 numOfNoises++; | |
| 903 numOfUnknownOptions--; | |
| 904 if(numOfNoises >= 3) break; | |
| 905 } | |
| 906 } | |
| 907 } | |
|
2037
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
908 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK |
|
98d8283534bb
accurate/slow (per line instead of per block) deblock filter spport which is identical to what is recommanded in the mpeg4 spec
michael
parents:
2036
diff
changeset
|
909 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK) |
| 181 | 910 { |
| 911 int o; | |
| 912 | |
| 913 for(o=0; options[o]!=NULL && o<2; o++) | |
| 914 { | |
| 915 char *tail; | |
| 916 int val= strtol(options[o], &tail, 0); | |
| 917 if(tail==options[o]) break; | |
| 918 | |
| 919 numOfUnknownOptions--; | |
| 829 | 920 if(o==0) ppMode->baseDcDiff= val; |
| 921 else ppMode->flatnessThreshold= val; | |
| 181 | 922 } |
| 923 } | |
| 183 | 924 else if(filters[i].mask == FORCE_QUANT) |
| 925 { | |
| 926 int o; | |
| 829 | 927 ppMode->forcedQuant= 15; |
| 183 | 928 |
| 929 for(o=0; options[o]!=NULL && o<1; o++) | |
| 930 { | |
| 931 char *tail; | |
| 932 int val= strtol(options[o], &tail, 0); | |
| 933 if(tail==options[o]) break; | |
| 934 | |
| 935 numOfUnknownOptions--; | |
| 829 | 936 ppMode->forcedQuant= val; |
| 183 | 937 } |
| 938 } | |
| 116 | 939 } |
| 940 } | |
| 829 | 941 if(!filterNameOk) ppMode->error++; |
| 942 ppMode->error += numOfUnknownOptions; | |
| 116 | 943 } |
| 944 | |
| 829 | 945 if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode); |
| 946 if(ppMode->error) | |
| 947 { | |
| 948 fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name); | |
| 949 free(ppMode); | |
| 950 return NULL; | |
| 951 } | |
| 116 | 952 return ppMode; |
| 953 } | |
| 954 | |
| 829 | 955 void pp_free_mode(pp_mode_t *mode){ |
| 956 if(mode) free(mode); | |
| 957 } | |
| 958 | |
| 937 | 959 static void reallocAlign(void **p, int alignment, int size){ |
| 960 if(*p) free(*p); | |
| 961 *p= memalign(alignment, size); | |
| 962 memset(*p, 0, size); | |
| 963 } | |
| 964 | |
| 1196 | 965 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){ |
| 791 | 966 int mbWidth = (width+15)>>4; |
| 967 int mbHeight= (height+15)>>4; | |
| 937 | 968 int i; |
| 969 | |
| 970 c->stride= stride; | |
| 1196 | 971 c->qpStride= qpStride; |
| 787 | 972 |
| 937 | 973 reallocAlign((void **)&c->tempDst, 8, stride*24); |
| 974 reallocAlign((void **)&c->tempSrc, 8, stride*24); | |
| 975 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8); | |
| 976 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t)); | |
| 787 | 977 for(i=0; i<256; i++) |
| 978 c->yHistogram[i]= width*height/64*15/256; | |
| 979 | |
| 980 for(i=0; i<3; i++) | |
| 185 | 981 { |
| 787 | 982 //Note:the +17*1024 is just there so i dont have to worry about r/w over te end |
| 937 | 983 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024); |
| 984 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size | |
| 185 | 985 } |
| 937 | 986 |
| 1157 | 987 reallocAlign((void **)&c->deintTemp, 8, 2*width+32); |
| 1196 | 988 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T)); |
| 989 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T)); | |
| 937 | 990 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T)); |
| 991 } | |
| 179 | 992 |
| 1282 | 993 static void global_init(void){ |
| 1157 | 994 int i; |
| 995 memset(clip_table, 0, 256); | |
| 996 for(i=256; i<512; i++) | |
| 997 clip_table[i]= i; | |
| 998 memset(clip_table+512, 0, 256); | |
| 999 } | |
| 1000 | |
| 937 | 1001 pp_context_t *pp_get_context(int width, int height, int cpuCaps){ |
| 1002 PPContext *c= memalign(32, sizeof(PPContext)); | |
| 1003 int stride= (width+15)&(~15); //assumed / will realloc if needed | |
| 1196 | 1004 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed |
| 937 | 1005 |
| 1157 | 1006 global_init(); |
| 1007 | |
| 937 | 1008 memset(c, 0, sizeof(PPContext)); |
| 1009 c->cpuCaps= cpuCaps; | |
| 957 | 1010 if(cpuCaps&PP_FORMAT){ |
| 1011 c->hChromaSubSample= cpuCaps&0x3; | |
| 1012 c->vChromaSubSample= (cpuCaps>>4)&0x3; | |
| 1013 }else{ | |
| 1014 c->hChromaSubSample= 1; | |
| 1015 c->vChromaSubSample= 1; | |
| 1016 } | |
| 937 | 1017 |
| 1196 | 1018 reallocBuffers(c, width, height, stride, qpStride); |
| 937 | 1019 |
| 787 | 1020 c->frameNum=-1; |
| 179 | 1021 |
| 787 | 1022 return c; |
| 179 | 1023 } |
| 1024 | |
| 792 | 1025 void pp_free_context(void *vc){ |
| 787 | 1026 PPContext *c = (PPContext*)vc; |
| 1027 int i; | |
| 1028 | |
| 1029 for(i=0; i<3; i++) free(c->tempBlured[i]); | |
| 1030 for(i=0; i<3; i++) free(c->tempBluredPast[i]); | |
| 1031 | |
| 1032 free(c->tempBlocks); | |
| 1033 free(c->yHistogram); | |
| 1034 free(c->tempDst); | |
| 1035 free(c->tempSrc); | |
| 1036 free(c->deintTemp); | |
| 1196 | 1037 free(c->stdQPTable); |
| 791 | 1038 free(c->nonBQPTable); |
| 937 | 1039 free(c->forcedQPTable); |
| 1040 | |
| 1041 memset(c, 0, sizeof(PPContext)); | |
| 1042 | |
| 787 | 1043 free(c); |
| 1044 } | |
| 1045 | |
| 792 | 1046 void pp_postprocess(uint8_t * src[3], int srcStride[3], |
| 787 | 1047 uint8_t * dst[3], int dstStride[3], |
| 791 | 1048 int width, int height, |
| 787 | 1049 QP_STORE_T *QP_store, int QPStride, |
| 829 | 1050 pp_mode_t *vm, void *vc, int pict_type) |
| 116 | 1051 { |
| 791 | 1052 int mbWidth = (width+15)>>4; |
| 1053 int mbHeight= (height+15)>>4; | |
| 829 | 1054 PPMode *mode = (PPMode*)vm; |
| 791 | 1055 PPContext *c = (PPContext*)vc; |
| 937 | 1056 int minStride= MAX(srcStride[0], dstStride[0]); |
| 1196 | 1057 |
| 1058 if(c->stride < minStride || c->qpStride < QPStride) | |
| 1059 reallocBuffers(c, width, height, | |
| 1060 MAX(minStride, c->stride), | |
| 1061 MAX(c->qpStride, QPStride)); | |
| 787 | 1062 |
| 183 | 1063 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)) |
| 152 | 1064 { |
| 183 | 1065 int i; |
| 937 | 1066 QP_store= c->forcedQPTable; |
| 787 | 1067 QPStride= 0; |
| 183 | 1068 if(mode->lumMode & FORCE_QUANT) |
| 937 | 1069 for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant; |
| 183 | 1070 else |
| 937 | 1071 for(i=0; i<mbWidth; i++) QP_store[i]= 1; |
| 152 | 1072 } |
| 1196 | 1073 //printf("pict_type:%d\n", pict_type); |
| 1074 | |
| 1075 if(pict_type & PP_PICT_TYPE_QP2){ | |
| 1076 int i; | |
| 1077 const int count= mbHeight * QPStride; | |
| 1078 for(i=0; i<(count>>2); i++){ | |
| 1079 ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F; | |
| 1080 } | |
| 1081 for(i<<=2; i<count; i++){ | |
| 1082 c->stdQPTable[i] = QP_store[i]>>1; | |
| 1083 } | |
| 1084 QP_store= c->stdQPTable; | |
| 1085 } | |
| 1086 | |
| 791 | 1087 if(0){ |
| 1088 int x,y; | |
| 1089 for(y=0; y<mbHeight; y++){ | |
| 1090 for(x=0; x<mbWidth; x++){ | |
| 1091 printf("%2d ", QP_store[x + y*QPStride]); | |
| 1092 } | |
| 1093 printf("\n"); | |
| 1094 } | |
| 1095 printf("\n"); | |
| 1096 } | |
| 798 | 1097 |
| 1196 | 1098 if((pict_type&7)!=3) |
| 791 | 1099 { |
| 1196 | 1100 int i; |
| 1101 const int count= mbHeight * QPStride; | |
| 1102 for(i=0; i<(count>>2); i++){ | |
| 1724 | 1103 ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F; |
| 1196 | 1104 } |
| 1105 for(i<<=2; i<count; i++){ | |
| 1724 | 1106 c->nonBQPTable[i] = QP_store[i] & 0x3F; |
| 791 | 1107 } |
| 1108 } | |
| 152 | 1109 |
| 793 | 1110 if(verbose>2) |
| 202 | 1111 { |
| 1112 printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode); | |
| 1113 } | |
| 1114 | |
| 787 | 1115 postProcess(src[0], srcStride[0], dst[0], dstStride[0], |
| 1076 | 1116 width, height, QP_store, QPStride, 0, mode, c); |
| 116 | 1117 |
| 957 | 1118 width = (width )>>c->hChromaSubSample; |
| 1119 height = (height)>>c->vChromaSubSample; | |
| 116 | 1120 |
|
168
712c7a115164
use fastmemcpy for chrominance if no chrominance filtering is done
michael
parents:
167
diff
changeset
|
1121 if(mode->chromMode) |
|
712c7a115164
use fastmemcpy for chrominance if no chrominance filtering is done
michael
parents:
167
diff
changeset
|
1122 { |
| 787 | 1123 postProcess(src[1], srcStride[1], dst[1], dstStride[1], |
| 1076 | 1124 width, height, QP_store, QPStride, 1, mode, c); |
| 787 | 1125 postProcess(src[2], srcStride[2], dst[2], dstStride[2], |
| 1076 | 1126 width, height, QP_store, QPStride, 2, mode, c); |
|
168
712c7a115164
use fastmemcpy for chrominance if no chrominance filtering is done
michael
parents:
167
diff
changeset
|
1127 } |
| 787 | 1128 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]) |
|
168
712c7a115164
use fastmemcpy for chrominance if no chrominance filtering is done
michael
parents:
167
diff
changeset
|
1129 { |
| 791 | 1130 memcpy(dst[1], src[1], srcStride[1]*height); |
| 1131 memcpy(dst[2], src[2], srcStride[2]*height); | |
|
168
712c7a115164
use fastmemcpy for chrominance if no chrominance filtering is done
michael
parents:
167
diff
changeset
|
1132 } |
|
712c7a115164
use fastmemcpy for chrominance if no chrominance filtering is done
michael
parents:
167
diff
changeset
|
1133 else |
|
712c7a115164
use fastmemcpy for chrominance if no chrominance filtering is done
michael
parents:
167
diff
changeset
|
1134 { |
|
712c7a115164
use fastmemcpy for chrominance if no chrominance filtering is done
michael
parents:
167
diff
changeset
|
1135 int y; |
| 791 | 1136 for(y=0; y<height; y++) |
|
168
712c7a115164
use fastmemcpy for chrominance if no chrominance filtering is done
michael
parents:
167
diff
changeset
|
1137 { |
| 791 | 1138 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width); |
| 1139 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width); | |
|
168
712c7a115164
use fastmemcpy for chrominance if no chrominance filtering is done
michael
parents:
167
diff
changeset
|
1140 } |
|
712c7a115164
use fastmemcpy for chrominance if no chrominance filtering is done
michael
parents:
167
diff
changeset
|
1141 } |
| 116 | 1142 } |
| 1143 |
