Mercurial > libavcodec.hg
annotate imgresample.c @ 1757:3906ddbaffec libavcodec
optimization & bugfix extracted from the 4k line diff between ffmpeg 0.4.7 and http://www.alicestreet.com/ffh263.html
the other parts of the diff where
1. spelling fixes (rejected as only a small part of it could be applied automatically)
2. cosmetics (reindention, function reordering, var renaming, ...) with bugs (rejected)
3. rtp related stuff (rejetced as it breaks several codecs)
4. some changes to the intra/inter decission & scene change detection (quality tests needed first)
| author | michael |
|---|---|
| date | Sat, 24 Jan 2004 23:47:33 +0000 |
| parents | 766a2f4edbea |
| children | 0c23a5564489 |
| rev | line source |
|---|---|
| 0 | 1 /* |
| 2 * High quality image resampling with polyphase filters | |
| 429 | 3 * Copyright (c) 2001 Fabrice Bellard. |
| 0 | 4 * |
| 429 | 5 * This library is free software; you can redistribute it and/or |
| 6 * modify it under the terms of the GNU Lesser General Public | |
| 7 * License as published by the Free Software Foundation; either | |
| 8 * version 2 of the License, or (at your option) any later version. | |
| 0 | 9 * |
| 429 | 10 * This library is distributed in the hope that it will be useful, |
| 0 | 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 429 | 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 13 * Lesser General Public License for more details. | |
| 0 | 14 * |
| 429 | 15 * You should have received a copy of the GNU Lesser General Public |
| 16 * License along with this library; if not, write to the Free Software | |
| 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
| 0 | 18 */ |
| 1106 | 19 |
| 20 /** | |
| 21 * @file imgresample.c | |
| 22 * High quality image resampling with polyphase filters . | |
| 23 */ | |
| 24 | |
|
396
fce0a2520551
removed useless header includes - use av memory functions
glantau
parents:
18
diff
changeset
|
25 #include "avcodec.h" |
| 0 | 26 #include "dsputil.h" |
| 27 | |
| 17 | 28 #ifdef USE_FASTMEMCPY |
| 29 #include "fastmemcpy.h" | |
| 30 #endif | |
| 31 | |
| 0 | 32 #define NB_COMPONENTS 3 |
| 33 | |
| 34 #define PHASE_BITS 4 | |
| 35 #define NB_PHASES (1 << PHASE_BITS) | |
| 36 #define NB_TAPS 4 | |
| 37 #define FCENTER 1 /* index of the center of the filter */ | |
|
630
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
38 //#define TEST 1 /* Test it */ |
| 0 | 39 |
| 40 #define POS_FRAC_BITS 16 | |
| 41 #define POS_FRAC (1 << POS_FRAC_BITS) | |
| 42 /* 6 bits precision is needed for MMX */ | |
| 43 #define FILTER_BITS 8 | |
| 44 | |
| 45 #define LINE_BUF_HEIGHT (NB_TAPS * 4) | |
| 46 | |
| 47 struct ImgReSampleContext { | |
|
630
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
48 int iwidth, iheight, owidth, oheight, topBand, bottomBand, leftBand, rightBand; |
| 0 | 49 int h_incr, v_incr; |
| 1064 | 50 int16_t h_filters[NB_PHASES][NB_TAPS] __align8; /* horizontal filters */ |
| 51 int16_t v_filters[NB_PHASES][NB_TAPS] __align8; /* vertical filters */ | |
| 52 uint8_t *line_buf; | |
| 0 | 53 }; |
| 54 | |
| 55 static inline int get_phase(int pos) | |
| 56 { | |
| 57 return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1); | |
| 58 } | |
| 59 | |
| 60 /* This function must be optimized */ | |
|
1488
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
61 static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src, |
|
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
62 int src_width, int src_start, int src_incr, |
|
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
63 int16_t *filters) |
| 0 | 64 { |
| 65 int src_pos, phase, sum, i; | |
|
1488
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
66 const uint8_t *s; |
| 1064 | 67 int16_t *filter; |
| 0 | 68 |
| 69 src_pos = src_start; | |
| 70 for(i=0;i<dst_width;i++) { | |
| 71 #ifdef TEST | |
| 72 /* test */ | |
| 73 if ((src_pos >> POS_FRAC_BITS) < 0 || | |
| 74 (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS)) | |
| 653 | 75 av_abort(); |
| 0 | 76 #endif |
| 77 s = src + (src_pos >> POS_FRAC_BITS); | |
| 78 phase = get_phase(src_pos); | |
| 79 filter = filters + phase * NB_TAPS; | |
| 80 #if NB_TAPS == 4 | |
| 81 sum = s[0] * filter[0] + | |
| 82 s[1] * filter[1] + | |
| 83 s[2] * filter[2] + | |
| 84 s[3] * filter[3]; | |
| 85 #else | |
| 86 { | |
| 87 int j; | |
| 88 sum = 0; | |
| 89 for(j=0;j<NB_TAPS;j++) | |
| 90 sum += s[j] * filter[j]; | |
| 91 } | |
| 92 #endif | |
| 93 sum = sum >> FILTER_BITS; | |
| 94 if (sum < 0) | |
| 95 sum = 0; | |
| 96 else if (sum > 255) | |
| 97 sum = 255; | |
| 98 dst[0] = sum; | |
| 99 src_pos += src_incr; | |
| 100 dst++; | |
| 101 } | |
| 102 } | |
| 103 | |
| 104 /* This function must be optimized */ | |
|
1488
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
105 static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src, |
|
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
106 int wrap, int16_t *filter) |
| 0 | 107 { |
| 108 int sum, i; | |
|
1488
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
109 const uint8_t *s; |
| 0 | 110 |
| 111 s = src; | |
| 112 for(i=0;i<dst_width;i++) { | |
| 113 #if NB_TAPS == 4 | |
| 114 sum = s[0 * wrap] * filter[0] + | |
| 115 s[1 * wrap] * filter[1] + | |
| 116 s[2 * wrap] * filter[2] + | |
| 117 s[3 * wrap] * filter[3]; | |
| 118 #else | |
| 119 { | |
| 120 int j; | |
| 1064 | 121 uint8_t *s1 = s; |
| 0 | 122 |
| 123 sum = 0; | |
| 124 for(j=0;j<NB_TAPS;j++) { | |
| 125 sum += s1[0] * filter[j]; | |
| 126 s1 += wrap; | |
| 127 } | |
| 128 } | |
| 129 #endif | |
| 130 sum = sum >> FILTER_BITS; | |
| 131 if (sum < 0) | |
| 132 sum = 0; | |
| 133 else if (sum > 255) | |
| 134 sum = 255; | |
| 135 dst[0] = sum; | |
| 136 dst++; | |
| 137 s++; | |
| 138 } | |
| 139 } | |
| 140 | |
| 2 | 141 #ifdef HAVE_MMX |
| 0 | 142 |
| 143 #include "i386/mmx.h" | |
| 144 | |
| 145 #define FILTER4(reg) \ | |
| 146 {\ | |
| 147 s = src + (src_pos >> POS_FRAC_BITS);\ | |
| 148 phase = get_phase(src_pos);\ | |
| 149 filter = filters + phase * NB_TAPS;\ | |
| 150 movq_m2r(*s, reg);\ | |
| 151 punpcklbw_r2r(mm7, reg);\ | |
| 152 movq_m2r(*filter, mm6);\ | |
| 153 pmaddwd_r2r(reg, mm6);\ | |
| 154 movq_r2r(mm6, reg);\ | |
| 155 psrlq_i2r(32, reg);\ | |
| 156 paddd_r2r(mm6, reg);\ | |
| 157 psrad_i2r(FILTER_BITS, reg);\ | |
| 158 src_pos += src_incr;\ | |
| 159 } | |
| 160 | |
| 161 #define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq); | |
| 162 | |
| 163 /* XXX: do four pixels at a time */ | |
|
1488
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
164 static void h_resample_fast4_mmx(uint8_t *dst, int dst_width, |
|
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
165 const uint8_t *src, int src_width, |
| 1064 | 166 int src_start, int src_incr, int16_t *filters) |
| 0 | 167 { |
| 168 int src_pos, phase; | |
|
1488
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
169 const uint8_t *s; |
| 1064 | 170 int16_t *filter; |
| 0 | 171 mmx_t tmp; |
| 172 | |
| 173 src_pos = src_start; | |
| 174 pxor_r2r(mm7, mm7); | |
| 175 | |
| 176 while (dst_width >= 4) { | |
| 177 | |
| 178 FILTER4(mm0); | |
| 179 FILTER4(mm1); | |
| 180 FILTER4(mm2); | |
| 181 FILTER4(mm3); | |
| 182 | |
| 183 packuswb_r2r(mm7, mm0); | |
| 184 packuswb_r2r(mm7, mm1); | |
| 185 packuswb_r2r(mm7, mm3); | |
| 186 packuswb_r2r(mm7, mm2); | |
| 187 movq_r2m(mm0, tmp); | |
| 188 dst[0] = tmp.ub[0]; | |
| 189 movq_r2m(mm1, tmp); | |
| 190 dst[1] = tmp.ub[0]; | |
| 191 movq_r2m(mm2, tmp); | |
| 192 dst[2] = tmp.ub[0]; | |
| 193 movq_r2m(mm3, tmp); | |
| 194 dst[3] = tmp.ub[0]; | |
| 195 dst += 4; | |
| 196 dst_width -= 4; | |
| 197 } | |
| 198 while (dst_width > 0) { | |
| 199 FILTER4(mm0); | |
| 200 packuswb_r2r(mm7, mm0); | |
| 201 movq_r2m(mm0, tmp); | |
| 202 dst[0] = tmp.ub[0]; | |
| 203 dst++; | |
| 204 dst_width--; | |
| 205 } | |
| 206 emms(); | |
| 207 } | |
| 208 | |
|
1488
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
209 static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src, |
|
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
210 int wrap, int16_t *filter) |
| 0 | 211 { |
| 212 int sum, i, v; | |
|
1488
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
213 const uint8_t *s; |
| 0 | 214 mmx_t tmp; |
| 215 mmx_t coefs[4]; | |
| 216 | |
| 217 for(i=0;i<4;i++) { | |
| 218 v = filter[i]; | |
| 219 coefs[i].uw[0] = v; | |
| 220 coefs[i].uw[1] = v; | |
| 221 coefs[i].uw[2] = v; | |
| 222 coefs[i].uw[3] = v; | |
| 223 } | |
| 224 | |
| 225 pxor_r2r(mm7, mm7); | |
| 226 s = src; | |
| 227 while (dst_width >= 4) { | |
| 228 movq_m2r(s[0 * wrap], mm0); | |
| 229 punpcklbw_r2r(mm7, mm0); | |
| 230 movq_m2r(s[1 * wrap], mm1); | |
| 231 punpcklbw_r2r(mm7, mm1); | |
| 232 movq_m2r(s[2 * wrap], mm2); | |
| 233 punpcklbw_r2r(mm7, mm2); | |
| 234 movq_m2r(s[3 * wrap], mm3); | |
| 235 punpcklbw_r2r(mm7, mm3); | |
| 236 | |
| 237 pmullw_m2r(coefs[0], mm0); | |
| 238 pmullw_m2r(coefs[1], mm1); | |
| 239 pmullw_m2r(coefs[2], mm2); | |
| 240 pmullw_m2r(coefs[3], mm3); | |
| 241 | |
| 242 paddw_r2r(mm1, mm0); | |
| 243 paddw_r2r(mm3, mm2); | |
| 244 paddw_r2r(mm2, mm0); | |
| 245 psraw_i2r(FILTER_BITS, mm0); | |
| 246 | |
| 247 packuswb_r2r(mm7, mm0); | |
| 248 movq_r2m(mm0, tmp); | |
| 249 | |
| 1064 | 250 *(uint32_t *)dst = tmp.ud[0]; |
| 0 | 251 dst += 4; |
| 252 s += 4; | |
| 253 dst_width -= 4; | |
| 254 } | |
| 255 while (dst_width > 0) { | |
| 256 sum = s[0 * wrap] * filter[0] + | |
| 257 s[1 * wrap] * filter[1] + | |
| 258 s[2 * wrap] * filter[2] + | |
| 259 s[3 * wrap] * filter[3]; | |
| 260 sum = sum >> FILTER_BITS; | |
| 261 if (sum < 0) | |
| 262 sum = 0; | |
| 263 else if (sum > 255) | |
| 264 sum = 255; | |
| 265 dst[0] = sum; | |
| 266 dst++; | |
| 267 s++; | |
| 268 dst_width--; | |
| 269 } | |
| 270 emms(); | |
| 271 } | |
| 272 #endif | |
| 273 | |
|
894
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
274 #ifdef HAVE_ALTIVEC |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
275 typedef union { |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
276 vector unsigned char v; |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
277 unsigned char c[16]; |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
278 } vec_uc_t; |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
279 |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
280 typedef union { |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
281 vector signed short v; |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
282 signed short s[8]; |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
283 } vec_ss_t; |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
284 |
|
1488
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
285 void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src, |
|
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
286 int wrap, int16_t *filter) |
|
894
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
287 { |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
288 int sum, i; |
|
1488
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
289 const uint8_t *s; |
|
894
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
290 vector unsigned char *tv, tmp, dstv, zero; |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
291 vec_ss_t srchv[4], srclv[4], fv[4]; |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
292 vector signed short zeros, sumhv, sumlv; |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
293 s = src; |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
294 |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
295 for(i=0;i<4;i++) |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
296 { |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
297 /* |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
298 The vec_madds later on does an implicit >>15 on the result. |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
299 Since FILTER_BITS is 8, and we have 15 bits of magnitude in |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
300 a signed short, we have just enough bits to pre-shift our |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
301 filter constants <<7 to compensate for vec_madds. |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
302 */ |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
303 fv[i].s[0] = filter[i] << (15-FILTER_BITS); |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
304 fv[i].v = vec_splat(fv[i].v, 0); |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
305 } |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
306 |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
307 zero = vec_splat_u8(0); |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
308 zeros = vec_splat_s16(0); |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
309 |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
310 |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
311 /* |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
312 When we're resampling, we'd ideally like both our input buffers, |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
313 and output buffers to be 16-byte aligned, so we can do both aligned |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
314 reads and writes. Sadly we can't always have this at the moment, so |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
315 we opt for aligned writes, as unaligned writes have a huge overhead. |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
316 To do this, do enough scalar resamples to get dst 16-byte aligned. |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
317 */ |
|
898
6d5e3fe7aea1
Simplify an expression and eliminate a compile warning
philipjsg
parents:
894
diff
changeset
|
318 i = (-(int)dst) & 0xf; |
|
894
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
319 while(i>0) { |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
320 sum = s[0 * wrap] * filter[0] + |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
321 s[1 * wrap] * filter[1] + |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
322 s[2 * wrap] * filter[2] + |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
323 s[3 * wrap] * filter[3]; |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
324 sum = sum >> FILTER_BITS; |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
325 if (sum<0) sum = 0; else if (sum>255) sum=255; |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
326 dst[0] = sum; |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
327 dst++; |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
328 s++; |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
329 dst_width--; |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
330 i--; |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
331 } |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
332 |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
333 /* Do our altivec resampling on 16 pixels at once. */ |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
334 while(dst_width>=16) { |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
335 /* |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
336 Read 16 (potentially unaligned) bytes from each of |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
337 4 lines into 4 vectors, and split them into shorts. |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
338 Interleave the multipy/accumulate for the resample |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
339 filter with the loads to hide the 3 cycle latency |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
340 the vec_madds have. |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
341 */ |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
342 tv = (vector unsigned char *) &s[0 * wrap]; |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
343 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap])); |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
344 srchv[0].v = (vector signed short) vec_mergeh(zero, tmp); |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
345 srclv[0].v = (vector signed short) vec_mergel(zero, tmp); |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
346 sumhv = vec_madds(srchv[0].v, fv[0].v, zeros); |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
347 sumlv = vec_madds(srclv[0].v, fv[0].v, zeros); |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
348 |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
349 tv = (vector unsigned char *) &s[1 * wrap]; |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
350 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap])); |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
351 srchv[1].v = (vector signed short) vec_mergeh(zero, tmp); |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
352 srclv[1].v = (vector signed short) vec_mergel(zero, tmp); |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
353 sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv); |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
354 sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv); |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
355 |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
356 tv = (vector unsigned char *) &s[2 * wrap]; |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
357 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap])); |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
358 srchv[2].v = (vector signed short) vec_mergeh(zero, tmp); |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
359 srclv[2].v = (vector signed short) vec_mergel(zero, tmp); |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
360 sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv); |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
361 sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv); |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
362 |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
363 tv = (vector unsigned char *) &s[3 * wrap]; |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
364 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap])); |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
365 srchv[3].v = (vector signed short) vec_mergeh(zero, tmp); |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
366 srclv[3].v = (vector signed short) vec_mergel(zero, tmp); |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
367 sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv); |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
368 sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv); |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
369 |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
370 /* |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
371 Pack the results into our destination vector, |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
372 and do an aligned write of that back to memory. |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
373 */ |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
374 dstv = vec_packsu(sumhv, sumlv) ; |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
375 vec_st(dstv, 0, (vector unsigned char *) dst); |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
376 |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
377 dst+=16; |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
378 s+=16; |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
379 dst_width-=16; |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
380 } |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
381 |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
382 /* |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
383 If there are any leftover pixels, resample them |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
384 with the slow scalar method. |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
385 */ |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
386 while(dst_width>0) { |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
387 sum = s[0 * wrap] * filter[0] + |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
388 s[1 * wrap] * filter[1] + |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
389 s[2 * wrap] * filter[2] + |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
390 s[3 * wrap] * filter[3]; |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
391 sum = sum >> FILTER_BITS; |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
392 if (sum<0) sum = 0; else if (sum>255) sum=255; |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
393 dst[0] = sum; |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
394 dst++; |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
395 s++; |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
396 dst_width--; |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
397 } |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
398 } |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
399 #endif |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
400 |
| 0 | 401 /* slow version to handle limit cases. Does not need optimisation */ |
|
1488
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
402 static void h_resample_slow(uint8_t *dst, int dst_width, |
|
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
403 const uint8_t *src, int src_width, |
| 1064 | 404 int src_start, int src_incr, int16_t *filters) |
| 0 | 405 { |
| 406 int src_pos, phase, sum, j, v, i; | |
|
1488
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
407 const uint8_t *s, *src_end; |
| 1064 | 408 int16_t *filter; |
| 0 | 409 |
| 410 src_end = src + src_width; | |
| 411 src_pos = src_start; | |
| 412 for(i=0;i<dst_width;i++) { | |
| 413 s = src + (src_pos >> POS_FRAC_BITS); | |
| 414 phase = get_phase(src_pos); | |
| 415 filter = filters + phase * NB_TAPS; | |
| 416 sum = 0; | |
| 417 for(j=0;j<NB_TAPS;j++) { | |
| 418 if (s < src) | |
| 419 v = src[0]; | |
| 420 else if (s >= src_end) | |
| 421 v = src_end[-1]; | |
| 422 else | |
| 423 v = s[0]; | |
| 424 sum += v * filter[j]; | |
| 425 s++; | |
| 426 } | |
| 427 sum = sum >> FILTER_BITS; | |
| 428 if (sum < 0) | |
| 429 sum = 0; | |
| 430 else if (sum > 255) | |
| 431 sum = 255; | |
| 432 dst[0] = sum; | |
| 433 src_pos += src_incr; | |
| 434 dst++; | |
| 435 } | |
| 436 } | |
| 437 | |
|
1488
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
438 static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src, |
|
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
439 int src_width, int src_start, int src_incr, |
|
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
440 int16_t *filters) |
| 0 | 441 { |
| 442 int n, src_end; | |
| 443 | |
| 444 if (src_start < 0) { | |
| 445 n = (0 - src_start + src_incr - 1) / src_incr; | |
| 446 h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters); | |
| 447 dst += n; | |
| 448 dst_width -= n; | |
| 449 src_start += n * src_incr; | |
| 450 } | |
| 451 src_end = src_start + dst_width * src_incr; | |
| 452 if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) { | |
| 453 n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) / | |
| 454 src_incr; | |
| 455 } else { | |
| 456 n = dst_width; | |
| 457 } | |
| 2 | 458 #ifdef HAVE_MMX |
| 0 | 459 if ((mm_flags & MM_MMX) && NB_TAPS == 4) |
| 460 h_resample_fast4_mmx(dst, n, | |
| 461 src, src_width, src_start, src_incr, filters); | |
| 462 else | |
| 463 #endif | |
| 464 h_resample_fast(dst, n, | |
| 465 src, src_width, src_start, src_incr, filters); | |
| 466 if (n < dst_width) { | |
| 467 dst += n; | |
| 468 dst_width -= n; | |
| 469 src_start += n * src_incr; | |
| 470 h_resample_slow(dst, dst_width, | |
| 471 src, src_width, src_start, src_incr, filters); | |
| 472 } | |
| 473 } | |
| 474 | |
| 475 static void component_resample(ImgReSampleContext *s, | |
| 1064 | 476 uint8_t *output, int owrap, int owidth, int oheight, |
| 477 uint8_t *input, int iwrap, int iwidth, int iheight) | |
| 0 | 478 { |
| 479 int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y; | |
| 1064 | 480 uint8_t *new_line, *src_line; |
| 0 | 481 |
| 482 last_src_y = - FCENTER - 1; | |
| 483 /* position of the bottom of the filter in the source image */ | |
| 484 src_y = (last_src_y + NB_TAPS) * POS_FRAC; | |
| 485 ring_y = NB_TAPS; /* position in ring buffer */ | |
| 486 for(y=0;y<oheight;y++) { | |
| 487 /* apply horizontal filter on new lines from input if needed */ | |
| 488 src_y1 = src_y >> POS_FRAC_BITS; | |
| 489 while (last_src_y < src_y1) { | |
| 490 if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS) | |
| 491 ring_y = NB_TAPS; | |
| 492 last_src_y++; | |
|
630
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
493 /* handle limit conditions : replicate line (slightly |
|
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
494 inefficient because we filter multiple times) */ |
| 0 | 495 y1 = last_src_y; |
| 496 if (y1 < 0) { | |
| 497 y1 = 0; | |
| 498 } else if (y1 >= iheight) { | |
| 499 y1 = iheight - 1; | |
| 500 } | |
| 501 src_line = input + y1 * iwrap; | |
| 502 new_line = s->line_buf + ring_y * owidth; | |
| 503 /* apply filter and handle limit cases correctly */ | |
| 504 h_resample(new_line, owidth, | |
| 505 src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr, | |
| 506 &s->h_filters[0][0]); | |
| 507 /* handle ring buffer wraping */ | |
| 508 if (ring_y >= LINE_BUF_HEIGHT) { | |
| 509 memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth, | |
| 510 new_line, owidth); | |
| 511 } | |
| 512 } | |
| 513 /* apply vertical filter */ | |
| 514 phase_y = get_phase(src_y); | |
| 2 | 515 #ifdef HAVE_MMX |
| 0 | 516 /* desactivated MMX because loss of precision */ |
| 517 if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0) | |
| 518 v_resample4_mmx(output, owidth, | |
| 519 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth, | |
| 520 &s->v_filters[phase_y][0]); | |
| 521 else | |
| 522 #endif | |
|
894
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
523 #ifdef HAVE_ALTIVEC |
|
920
a0ad8e3452f2
practically disabling altivec resampling code (some ppl said its broken) patch by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
898
diff
changeset
|
524 if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6) |
|
894
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
525 v_resample16_altivec(output, owidth, |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
526 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth, |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
527 &s->v_filters[phase_y][0]); |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
528 else |
|
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
529 #endif |
| 0 | 530 v_resample(output, owidth, |
| 531 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth, | |
| 532 &s->v_filters[phase_y][0]); | |
| 533 | |
| 534 src_y += s->v_incr; | |
| 535 output += owrap; | |
| 536 } | |
| 537 } | |
| 538 | |
| 539 /* XXX: the following filter is quite naive, but it seems to suffice | |
| 540 for 4 taps */ | |
| 1064 | 541 static void build_filter(int16_t *filter, float factor) |
| 0 | 542 { |
| 543 int ph, i, v; | |
| 544 float x, y, tab[NB_TAPS], norm, mult; | |
| 545 | |
| 546 /* if upsampling, only need to interpolate, no filter */ | |
| 547 if (factor > 1.0) | |
| 548 factor = 1.0; | |
| 549 | |
| 550 for(ph=0;ph<NB_PHASES;ph++) { | |
| 551 norm = 0; | |
| 552 for(i=0;i<NB_TAPS;i++) { | |
| 553 | |
| 554 x = M_PI * ((float)(i - FCENTER) - (float)ph / NB_PHASES) * factor; | |
| 555 if (x == 0) | |
| 556 y = 1.0; | |
| 557 else | |
| 558 y = sin(x) / x; | |
| 559 tab[i] = y; | |
| 560 norm += y; | |
| 561 } | |
| 562 | |
| 563 /* normalize so that an uniform color remains the same */ | |
| 564 mult = (float)(1 << FILTER_BITS) / norm; | |
| 565 for(i=0;i<NB_TAPS;i++) { | |
| 566 v = (int)(tab[i] * mult); | |
| 567 filter[ph * NB_TAPS + i] = v; | |
| 568 } | |
| 569 } | |
| 570 } | |
| 571 | |
| 572 ImgReSampleContext *img_resample_init(int owidth, int oheight, | |
| 573 int iwidth, int iheight) | |
| 574 { | |
|
630
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
575 return img_resample_full_init(owidth, oheight, iwidth, iheight, 0, 0, 0, 0); |
|
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
576 } |
|
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
577 |
|
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
578 ImgReSampleContext *img_resample_full_init(int owidth, int oheight, |
|
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
579 int iwidth, int iheight, |
|
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
580 int topBand, int bottomBand, |
|
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
581 int leftBand, int rightBand) |
|
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
582 { |
| 0 | 583 ImgReSampleContext *s; |
| 584 | |
| 585 s = av_mallocz(sizeof(ImgReSampleContext)); | |
| 586 if (!s) | |
| 587 return NULL; | |
| 588 s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS)); | |
| 589 if (!s->line_buf) | |
| 590 goto fail; | |
| 591 | |
| 592 s->owidth = owidth; | |
| 593 s->oheight = oheight; | |
| 594 s->iwidth = iwidth; | |
| 595 s->iheight = iheight; | |
|
630
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
596 s->topBand = topBand; |
|
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
597 s->bottomBand = bottomBand; |
|
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
598 s->leftBand = leftBand; |
|
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
599 s->rightBand = rightBand; |
| 0 | 600 |
|
630
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
601 s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / owidth; |
|
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
602 s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / oheight; |
| 0 | 603 |
|
630
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
604 build_filter(&s->h_filters[0][0], (float) owidth / (float) (iwidth - leftBand - rightBand)); |
|
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
605 build_filter(&s->v_filters[0][0], (float) oheight / (float) (iheight - topBand - bottomBand)); |
| 0 | 606 |
| 607 return s; | |
| 608 fail: | |
|
396
fce0a2520551
removed useless header includes - use av memory functions
glantau
parents:
18
diff
changeset
|
609 av_free(s); |
| 0 | 610 return NULL; |
| 611 } | |
| 612 | |
| 613 void img_resample(ImgReSampleContext *s, | |
|
1488
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
614 AVPicture *output, const AVPicture *input) |
| 0 | 615 { |
| 616 int i, shift; | |
| 617 | |
| 618 for(i=0;i<3;i++) { | |
| 619 shift = (i == 0) ? 0 : 1; | |
| 620 component_resample(s, output->data[i], output->linesize[i], | |
| 621 s->owidth >> shift, s->oheight >> shift, | |
|
630
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
622 input->data[i] + (input->linesize[i] * (s->topBand >> shift)) + (s->leftBand >> shift), |
|
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
623 input->linesize[i], ((s->iwidth - s->leftBand - s->rightBand) >> shift), |
|
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
624 (s->iheight - s->topBand - s->bottomBand) >> shift); |
| 0 | 625 } |
| 626 } | |
| 627 | |
| 628 void img_resample_close(ImgReSampleContext *s) | |
| 629 { | |
|
396
fce0a2520551
removed useless header includes - use av memory functions
glantau
parents:
18
diff
changeset
|
630 av_free(s->line_buf); |
|
fce0a2520551
removed useless header includes - use av memory functions
glantau
parents:
18
diff
changeset
|
631 av_free(s); |
| 0 | 632 } |
| 633 | |
| 634 #ifdef TEST | |
| 635 | |
| 636 void *av_mallocz(int size) | |
| 637 { | |
| 638 void *ptr; | |
| 639 ptr = malloc(size); | |
| 640 memset(ptr, 0, size); | |
| 641 return ptr; | |
| 642 } | |
| 643 | |
|
630
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
644 void av_free(void *ptr) |
|
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
645 { |
|
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
646 /* XXX: this test should not be needed on most libcs */ |
|
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
647 if (ptr) |
|
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
648 free(ptr); |
|
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
649 } |
|
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
650 |
| 0 | 651 /* input */ |
| 652 #define XSIZE 256 | |
| 653 #define YSIZE 256 | |
| 1064 | 654 uint8_t img[XSIZE * YSIZE]; |
| 0 | 655 |
| 656 /* output */ | |
| 657 #define XSIZE1 512 | |
| 658 #define YSIZE1 512 | |
| 1064 | 659 uint8_t img1[XSIZE1 * YSIZE1]; |
| 660 uint8_t img2[XSIZE1 * YSIZE1]; | |
| 0 | 661 |
| 1064 | 662 void save_pgm(const char *filename, uint8_t *img, int xsize, int ysize) |
| 0 | 663 { |
| 664 FILE *f; | |
| 665 f=fopen(filename,"w"); | |
| 666 fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255); | |
| 667 fwrite(img,1, xsize * ysize,f); | |
| 668 fclose(f); | |
| 669 } | |
| 670 | |
| 1064 | 671 static void dump_filter(int16_t *filter) |
| 0 | 672 { |
| 673 int i, ph; | |
| 674 | |
| 675 for(ph=0;ph<NB_PHASES;ph++) { | |
| 676 printf("%2d: ", ph); | |
| 677 for(i=0;i<NB_TAPS;i++) { | |
| 678 printf(" %5.2f", filter[ph * NB_TAPS + i] / 256.0); | |
| 679 } | |
| 680 printf("\n"); | |
| 681 } | |
| 682 } | |
| 683 | |
| 2 | 684 #ifdef HAVE_MMX |
| 644 | 685 int mm_flags; |
| 0 | 686 #endif |
| 687 | |
| 688 int main(int argc, char **argv) | |
| 689 { | |
| 690 int x, y, v, i, xsize, ysize; | |
| 691 ImgReSampleContext *s; | |
| 692 float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 }; | |
| 693 char buf[256]; | |
| 694 | |
| 695 /* build test image */ | |
| 696 for(y=0;y<YSIZE;y++) { | |
| 697 for(x=0;x<XSIZE;x++) { | |
| 698 if (x < XSIZE/2 && y < YSIZE/2) { | |
| 699 if (x < XSIZE/4 && y < YSIZE/4) { | |
| 700 if ((x % 10) <= 6 && | |
| 701 (y % 10) <= 6) | |
| 702 v = 0xff; | |
| 703 else | |
| 704 v = 0x00; | |
| 705 } else if (x < XSIZE/4) { | |
| 706 if (x & 1) | |
| 707 v = 0xff; | |
| 708 else | |
| 709 v = 0; | |
| 710 } else if (y < XSIZE/4) { | |
| 711 if (y & 1) | |
| 712 v = 0xff; | |
| 713 else | |
| 714 v = 0; | |
| 715 } else { | |
| 716 if (y < YSIZE*3/8) { | |
| 717 if ((y+x) & 1) | |
| 718 v = 0xff; | |
| 719 else | |
| 720 v = 0; | |
| 721 } else { | |
| 722 if (((x+3) % 4) <= 1 && | |
| 723 ((y+3) % 4) <= 1) | |
| 724 v = 0xff; | |
| 725 else | |
| 726 v = 0x00; | |
| 727 } | |
| 728 } | |
| 729 } else if (x < XSIZE/2) { | |
| 730 v = ((x - (XSIZE/2)) * 255) / (XSIZE/2); | |
| 731 } else if (y < XSIZE/2) { | |
| 732 v = ((y - (XSIZE/2)) * 255) / (XSIZE/2); | |
| 733 } else { | |
| 734 v = ((x + y - XSIZE) * 255) / XSIZE; | |
| 735 } | |
|
630
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
736 img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v; |
| 0 | 737 } |
| 738 } | |
| 739 save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE); | |
| 740 for(i=0;i<sizeof(factors)/sizeof(float);i++) { | |
| 741 fact = factors[i]; | |
| 742 xsize = (int)(XSIZE * fact); | |
|
630
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
743 ysize = (int)((YSIZE - 100) * fact); |
| 644 | 744 s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0); |
| 0 | 745 printf("Factor=%0.2f\n", fact); |
| 746 dump_filter(&s->h_filters[0][0]); | |
| 747 component_resample(s, img1, xsize, xsize, ysize, | |
|
630
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
748 img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100); |
| 0 | 749 img_resample_close(s); |
| 750 | |
| 751 sprintf(buf, "/tmp/out%d.pgm", i); | |
| 752 save_pgm(buf, img1, xsize, ysize); | |
| 753 } | |
| 754 | |
| 755 /* mmx test */ | |
| 2 | 756 #ifdef HAVE_MMX |
| 0 | 757 printf("MMX test\n"); |
| 758 fact = 0.72; | |
| 759 xsize = (int)(XSIZE * fact); | |
| 760 ysize = (int)(YSIZE * fact); | |
| 761 mm_flags = MM_MMX; | |
| 762 s = img_resample_init(xsize, ysize, XSIZE, YSIZE); | |
| 763 component_resample(s, img1, xsize, xsize, ysize, | |
| 764 img, XSIZE, XSIZE, YSIZE); | |
| 765 | |
| 766 mm_flags = 0; | |
| 767 s = img_resample_init(xsize, ysize, XSIZE, YSIZE); | |
| 768 component_resample(s, img2, xsize, xsize, ysize, | |
| 769 img, XSIZE, XSIZE, YSIZE); | |
| 770 if (memcmp(img1, img2, xsize * ysize) != 0) { | |
| 771 fprintf(stderr, "mmx error\n"); | |
| 772 exit(1); | |
| 773 } | |
| 774 printf("MMX OK\n"); | |
| 775 #endif | |
| 776 return 0; | |
| 777 } | |
| 778 | |
| 779 #endif |
