Mercurial > audlegacy-plugins
comparison src/ffmpeg/libavcodec/imgresample.c @ 808:e8776388b02a trunk
[svn] - add ffmpeg
| author | nenolod |
|---|---|
| date | Mon, 12 Mar 2007 11:18:54 -0700 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 807:0f9c8d4d3ac4 | 808:e8776388b02a |
|---|---|
| 1 /* | |
| 2 * High quality image resampling with polyphase filters | |
| 3 * Copyright (c) 2001 Fabrice Bellard. | |
| 4 * | |
| 5 * This file is part of FFmpeg. | |
| 6 * | |
| 7 * FFmpeg is free software; you can redistribute it and/or | |
| 8 * modify it under the terms of the GNU Lesser General Public | |
| 9 * License as published by the Free Software Foundation; either | |
| 10 * version 2.1 of the License, or (at your option) any later version. | |
| 11 * | |
| 12 * FFmpeg is distributed in the hope that it will be useful, | |
| 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 15 * Lesser General Public License for more details. | |
| 16 * | |
| 17 * You should have received a copy of the GNU Lesser General Public | |
| 18 * License along with FFmpeg; if not, write to the Free Software | |
| 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
| 20 */ | |
| 21 | |
| 22 /** | |
| 23 * @file imgresample.c | |
| 24 * High quality image resampling with polyphase filters . | |
| 25 */ | |
| 26 | |
| 27 #include "avcodec.h" | |
| 28 #include "swscale.h" | |
| 29 #include "dsputil.h" | |
| 30 | |
| 31 #ifdef USE_FASTMEMCPY | |
| 32 #include "libvo/fastmemcpy.h" | |
| 33 #endif | |
| 34 | |
| 35 #define NB_COMPONENTS 3 | |
| 36 | |
| 37 #define PHASE_BITS 4 | |
| 38 #define NB_PHASES (1 << PHASE_BITS) | |
| 39 #define NB_TAPS 4 | |
| 40 #define FCENTER 1 /* index of the center of the filter */ | |
| 41 //#define TEST 1 /* Test it */ | |
| 42 | |
| 43 #define POS_FRAC_BITS 16 | |
| 44 #define POS_FRAC (1 << POS_FRAC_BITS) | |
| 45 /* 6 bits precision is needed for MMX */ | |
| 46 #define FILTER_BITS 8 | |
| 47 | |
| 48 #define LINE_BUF_HEIGHT (NB_TAPS * 4) | |
| 49 | |
| 50 struct ImgReSampleContext { | |
| 51 int iwidth, iheight, owidth, oheight; | |
| 52 int topBand, bottomBand, leftBand, rightBand; | |
| 53 int padtop, padbottom, padleft, padright; | |
| 54 int pad_owidth, pad_oheight; | |
| 55 int h_incr, v_incr; | |
| 56 DECLARE_ALIGNED_8(int16_t, h_filters[NB_PHASES][NB_TAPS]); /* horizontal filters */ | |
| 57 DECLARE_ALIGNED_8(int16_t, v_filters[NB_PHASES][NB_TAPS]); /* vertical filters */ | |
| 58 uint8_t *line_buf; | |
| 59 }; | |
| 60 | |
| 61 void av_build_filter(int16_t *filter, double factor, int tap_count, int phase_count, int scale, int type); | |
| 62 | |
| 63 static inline int get_phase(int pos) | |
| 64 { | |
| 65 return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1); | |
| 66 } | |
| 67 | |
| 68 /* This function must be optimized */ | |
| 69 static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src, | |
| 70 int src_width, int src_start, int src_incr, | |
| 71 int16_t *filters) | |
| 72 { | |
| 73 int src_pos, phase, sum, i; | |
| 74 const uint8_t *s; | |
| 75 int16_t *filter; | |
| 76 | |
| 77 src_pos = src_start; | |
| 78 for(i=0;i<dst_width;i++) { | |
| 79 #ifdef TEST | |
| 80 /* test */ | |
| 81 if ((src_pos >> POS_FRAC_BITS) < 0 || | |
| 82 (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS)) | |
| 83 av_abort(); | |
| 84 #endif | |
| 85 s = src + (src_pos >> POS_FRAC_BITS); | |
| 86 phase = get_phase(src_pos); | |
| 87 filter = filters + phase * NB_TAPS; | |
| 88 #if NB_TAPS == 4 | |
| 89 sum = s[0] * filter[0] + | |
| 90 s[1] * filter[1] + | |
| 91 s[2] * filter[2] + | |
| 92 s[3] * filter[3]; | |
| 93 #else | |
| 94 { | |
| 95 int j; | |
| 96 sum = 0; | |
| 97 for(j=0;j<NB_TAPS;j++) | |
| 98 sum += s[j] * filter[j]; | |
| 99 } | |
| 100 #endif | |
| 101 sum = sum >> FILTER_BITS; | |
| 102 if (sum < 0) | |
| 103 sum = 0; | |
| 104 else if (sum > 255) | |
| 105 sum = 255; | |
| 106 dst[0] = sum; | |
| 107 src_pos += src_incr; | |
| 108 dst++; | |
| 109 } | |
| 110 } | |
| 111 | |
| 112 /* This function must be optimized */ | |
| 113 static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src, | |
| 114 int wrap, int16_t *filter) | |
| 115 { | |
| 116 int sum, i; | |
| 117 const uint8_t *s; | |
| 118 | |
| 119 s = src; | |
| 120 for(i=0;i<dst_width;i++) { | |
| 121 #if NB_TAPS == 4 | |
| 122 sum = s[0 * wrap] * filter[0] + | |
| 123 s[1 * wrap] * filter[1] + | |
| 124 s[2 * wrap] * filter[2] + | |
| 125 s[3 * wrap] * filter[3]; | |
| 126 #else | |
| 127 { | |
| 128 int j; | |
| 129 uint8_t *s1 = s; | |
| 130 | |
| 131 sum = 0; | |
| 132 for(j=0;j<NB_TAPS;j++) { | |
| 133 sum += s1[0] * filter[j]; | |
| 134 s1 += wrap; | |
| 135 } | |
| 136 } | |
| 137 #endif | |
| 138 sum = sum >> FILTER_BITS; | |
| 139 if (sum < 0) | |
| 140 sum = 0; | |
| 141 else if (sum > 255) | |
| 142 sum = 255; | |
| 143 dst[0] = sum; | |
| 144 dst++; | |
| 145 s++; | |
| 146 } | |
| 147 } | |
| 148 | |
| 149 #ifdef HAVE_MMX | |
| 150 | |
| 151 #include "i386/mmx.h" | |
| 152 | |
| 153 #define FILTER4(reg) \ | |
| 154 {\ | |
| 155 s = src + (src_pos >> POS_FRAC_BITS);\ | |
| 156 phase = get_phase(src_pos);\ | |
| 157 filter = filters + phase * NB_TAPS;\ | |
| 158 movq_m2r(*s, reg);\ | |
| 159 punpcklbw_r2r(mm7, reg);\ | |
| 160 movq_m2r(*filter, mm6);\ | |
| 161 pmaddwd_r2r(reg, mm6);\ | |
| 162 movq_r2r(mm6, reg);\ | |
| 163 psrlq_i2r(32, reg);\ | |
| 164 paddd_r2r(mm6, reg);\ | |
| 165 psrad_i2r(FILTER_BITS, reg);\ | |
| 166 src_pos += src_incr;\ | |
| 167 } | |
| 168 | |
| 169 #define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq); | |
| 170 | |
| 171 /* XXX: do four pixels at a time */ | |
| 172 static void h_resample_fast4_mmx(uint8_t *dst, int dst_width, | |
| 173 const uint8_t *src, int src_width, | |
| 174 int src_start, int src_incr, int16_t *filters) | |
| 175 { | |
| 176 int src_pos, phase; | |
| 177 const uint8_t *s; | |
| 178 int16_t *filter; | |
| 179 mmx_t tmp; | |
| 180 | |
| 181 src_pos = src_start; | |
| 182 pxor_r2r(mm7, mm7); | |
| 183 | |
| 184 while (dst_width >= 4) { | |
| 185 | |
| 186 FILTER4(mm0); | |
| 187 FILTER4(mm1); | |
| 188 FILTER4(mm2); | |
| 189 FILTER4(mm3); | |
| 190 | |
| 191 packuswb_r2r(mm7, mm0); | |
| 192 packuswb_r2r(mm7, mm1); | |
| 193 packuswb_r2r(mm7, mm3); | |
| 194 packuswb_r2r(mm7, mm2); | |
| 195 movq_r2m(mm0, tmp); | |
| 196 dst[0] = tmp.ub[0]; | |
| 197 movq_r2m(mm1, tmp); | |
| 198 dst[1] = tmp.ub[0]; | |
| 199 movq_r2m(mm2, tmp); | |
| 200 dst[2] = tmp.ub[0]; | |
| 201 movq_r2m(mm3, tmp); | |
| 202 dst[3] = tmp.ub[0]; | |
| 203 dst += 4; | |
| 204 dst_width -= 4; | |
| 205 } | |
| 206 while (dst_width > 0) { | |
| 207 FILTER4(mm0); | |
| 208 packuswb_r2r(mm7, mm0); | |
| 209 movq_r2m(mm0, tmp); | |
| 210 dst[0] = tmp.ub[0]; | |
| 211 dst++; | |
| 212 dst_width--; | |
| 213 } | |
| 214 emms(); | |
| 215 } | |
| 216 | |
| 217 static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src, | |
| 218 int wrap, int16_t *filter) | |
| 219 { | |
| 220 int sum, i, v; | |
| 221 const uint8_t *s; | |
| 222 mmx_t tmp; | |
| 223 mmx_t coefs[4]; | |
| 224 | |
| 225 for(i=0;i<4;i++) { | |
| 226 v = filter[i]; | |
| 227 coefs[i].uw[0] = v; | |
| 228 coefs[i].uw[1] = v; | |
| 229 coefs[i].uw[2] = v; | |
| 230 coefs[i].uw[3] = v; | |
| 231 } | |
| 232 | |
| 233 pxor_r2r(mm7, mm7); | |
| 234 s = src; | |
| 235 while (dst_width >= 4) { | |
| 236 movq_m2r(s[0 * wrap], mm0); | |
| 237 punpcklbw_r2r(mm7, mm0); | |
| 238 movq_m2r(s[1 * wrap], mm1); | |
| 239 punpcklbw_r2r(mm7, mm1); | |
| 240 movq_m2r(s[2 * wrap], mm2); | |
| 241 punpcklbw_r2r(mm7, mm2); | |
| 242 movq_m2r(s[3 * wrap], mm3); | |
| 243 punpcklbw_r2r(mm7, mm3); | |
| 244 | |
| 245 pmullw_m2r(coefs[0], mm0); | |
| 246 pmullw_m2r(coefs[1], mm1); | |
| 247 pmullw_m2r(coefs[2], mm2); | |
| 248 pmullw_m2r(coefs[3], mm3); | |
| 249 | |
| 250 paddw_r2r(mm1, mm0); | |
| 251 paddw_r2r(mm3, mm2); | |
| 252 paddw_r2r(mm2, mm0); | |
| 253 psraw_i2r(FILTER_BITS, mm0); | |
| 254 | |
| 255 packuswb_r2r(mm7, mm0); | |
| 256 movq_r2m(mm0, tmp); | |
| 257 | |
| 258 *(uint32_t *)dst = tmp.ud[0]; | |
| 259 dst += 4; | |
| 260 s += 4; | |
| 261 dst_width -= 4; | |
| 262 } | |
| 263 while (dst_width > 0) { | |
| 264 sum = s[0 * wrap] * filter[0] + | |
| 265 s[1 * wrap] * filter[1] + | |
| 266 s[2 * wrap] * filter[2] + | |
| 267 s[3 * wrap] * filter[3]; | |
| 268 sum = sum >> FILTER_BITS; | |
| 269 if (sum < 0) | |
| 270 sum = 0; | |
| 271 else if (sum > 255) | |
| 272 sum = 255; | |
| 273 dst[0] = sum; | |
| 274 dst++; | |
| 275 s++; | |
| 276 dst_width--; | |
| 277 } | |
| 278 emms(); | |
| 279 } | |
| 280 #endif | |
| 281 | |
| 282 #ifdef HAVE_ALTIVEC | |
| 283 typedef union { | |
| 284 vector unsigned char v; | |
| 285 unsigned char c[16]; | |
| 286 } vec_uc_t; | |
| 287 | |
| 288 typedef union { | |
| 289 vector signed short v; | |
| 290 signed short s[8]; | |
| 291 } vec_ss_t; | |
| 292 | |
| 293 void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src, | |
| 294 int wrap, int16_t *filter) | |
| 295 { | |
| 296 int sum, i; | |
| 297 const uint8_t *s; | |
| 298 vector unsigned char *tv, tmp, dstv, zero; | |
| 299 vec_ss_t srchv[4], srclv[4], fv[4]; | |
| 300 vector signed short zeros, sumhv, sumlv; | |
| 301 s = src; | |
| 302 | |
| 303 for(i=0;i<4;i++) | |
| 304 { | |
| 305 /* | |
| 306 The vec_madds later on does an implicit >>15 on the result. | |
| 307 Since FILTER_BITS is 8, and we have 15 bits of magnitude in | |
| 308 a signed short, we have just enough bits to pre-shift our | |
| 309 filter constants <<7 to compensate for vec_madds. | |
| 310 */ | |
| 311 fv[i].s[0] = filter[i] << (15-FILTER_BITS); | |
| 312 fv[i].v = vec_splat(fv[i].v, 0); | |
| 313 } | |
| 314 | |
| 315 zero = vec_splat_u8(0); | |
| 316 zeros = vec_splat_s16(0); | |
| 317 | |
| 318 | |
| 319 /* | |
| 320 When we're resampling, we'd ideally like both our input buffers, | |
| 321 and output buffers to be 16-byte aligned, so we can do both aligned | |
| 322 reads and writes. Sadly we can't always have this at the moment, so | |
| 323 we opt for aligned writes, as unaligned writes have a huge overhead. | |
| 324 To do this, do enough scalar resamples to get dst 16-byte aligned. | |
| 325 */ | |
| 326 i = (-(int)dst) & 0xf; | |
| 327 while(i>0) { | |
| 328 sum = s[0 * wrap] * filter[0] + | |
| 329 s[1 * wrap] * filter[1] + | |
| 330 s[2 * wrap] * filter[2] + | |
| 331 s[3 * wrap] * filter[3]; | |
| 332 sum = sum >> FILTER_BITS; | |
| 333 if (sum<0) sum = 0; else if (sum>255) sum=255; | |
| 334 dst[0] = sum; | |
| 335 dst++; | |
| 336 s++; | |
| 337 dst_width--; | |
| 338 i--; | |
| 339 } | |
| 340 | |
| 341 /* Do our altivec resampling on 16 pixels at once. */ | |
| 342 while(dst_width>=16) { | |
| 343 /* | |
| 344 Read 16 (potentially unaligned) bytes from each of | |
| 345 4 lines into 4 vectors, and split them into shorts. | |
| 346 Interleave the multipy/accumulate for the resample | |
| 347 filter with the loads to hide the 3 cycle latency | |
| 348 the vec_madds have. | |
| 349 */ | |
| 350 tv = (vector unsigned char *) &s[0 * wrap]; | |
| 351 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap])); | |
| 352 srchv[0].v = (vector signed short) vec_mergeh(zero, tmp); | |
| 353 srclv[0].v = (vector signed short) vec_mergel(zero, tmp); | |
| 354 sumhv = vec_madds(srchv[0].v, fv[0].v, zeros); | |
| 355 sumlv = vec_madds(srclv[0].v, fv[0].v, zeros); | |
| 356 | |
| 357 tv = (vector unsigned char *) &s[1 * wrap]; | |
| 358 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap])); | |
| 359 srchv[1].v = (vector signed short) vec_mergeh(zero, tmp); | |
| 360 srclv[1].v = (vector signed short) vec_mergel(zero, tmp); | |
| 361 sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv); | |
| 362 sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv); | |
| 363 | |
| 364 tv = (vector unsigned char *) &s[2 * wrap]; | |
| 365 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap])); | |
| 366 srchv[2].v = (vector signed short) vec_mergeh(zero, tmp); | |
| 367 srclv[2].v = (vector signed short) vec_mergel(zero, tmp); | |
| 368 sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv); | |
| 369 sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv); | |
| 370 | |
| 371 tv = (vector unsigned char *) &s[3 * wrap]; | |
| 372 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap])); | |
| 373 srchv[3].v = (vector signed short) vec_mergeh(zero, tmp); | |
| 374 srclv[3].v = (vector signed short) vec_mergel(zero, tmp); | |
| 375 sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv); | |
| 376 sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv); | |
| 377 | |
| 378 /* | |
| 379 Pack the results into our destination vector, | |
| 380 and do an aligned write of that back to memory. | |
| 381 */ | |
| 382 dstv = vec_packsu(sumhv, sumlv) ; | |
| 383 vec_st(dstv, 0, (vector unsigned char *) dst); | |
| 384 | |
| 385 dst+=16; | |
| 386 s+=16; | |
| 387 dst_width-=16; | |
| 388 } | |
| 389 | |
| 390 /* | |
| 391 If there are any leftover pixels, resample them | |
| 392 with the slow scalar method. | |
| 393 */ | |
| 394 while(dst_width>0) { | |
| 395 sum = s[0 * wrap] * filter[0] + | |
| 396 s[1 * wrap] * filter[1] + | |
| 397 s[2 * wrap] * filter[2] + | |
| 398 s[3 * wrap] * filter[3]; | |
| 399 sum = sum >> FILTER_BITS; | |
| 400 if (sum<0) sum = 0; else if (sum>255) sum=255; | |
| 401 dst[0] = sum; | |
| 402 dst++; | |
| 403 s++; | |
| 404 dst_width--; | |
| 405 } | |
| 406 } | |
| 407 #endif | |
| 408 | |
| 409 /* slow version to handle limit cases. Does not need optimisation */ | |
| 410 static void h_resample_slow(uint8_t *dst, int dst_width, | |
| 411 const uint8_t *src, int src_width, | |
| 412 int src_start, int src_incr, int16_t *filters) | |
| 413 { | |
| 414 int src_pos, phase, sum, j, v, i; | |
| 415 const uint8_t *s, *src_end; | |
| 416 int16_t *filter; | |
| 417 | |
| 418 src_end = src + src_width; | |
| 419 src_pos = src_start; | |
| 420 for(i=0;i<dst_width;i++) { | |
| 421 s = src + (src_pos >> POS_FRAC_BITS); | |
| 422 phase = get_phase(src_pos); | |
| 423 filter = filters + phase * NB_TAPS; | |
| 424 sum = 0; | |
| 425 for(j=0;j<NB_TAPS;j++) { | |
| 426 if (s < src) | |
| 427 v = src[0]; | |
| 428 else if (s >= src_end) | |
| 429 v = src_end[-1]; | |
| 430 else | |
| 431 v = s[0]; | |
| 432 sum += v * filter[j]; | |
| 433 s++; | |
| 434 } | |
| 435 sum = sum >> FILTER_BITS; | |
| 436 if (sum < 0) | |
| 437 sum = 0; | |
| 438 else if (sum > 255) | |
| 439 sum = 255; | |
| 440 dst[0] = sum; | |
| 441 src_pos += src_incr; | |
| 442 dst++; | |
| 443 } | |
| 444 } | |
| 445 | |
| 446 static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src, | |
| 447 int src_width, int src_start, int src_incr, | |
| 448 int16_t *filters) | |
| 449 { | |
| 450 int n, src_end; | |
| 451 | |
| 452 if (src_start < 0) { | |
| 453 n = (0 - src_start + src_incr - 1) / src_incr; | |
| 454 h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters); | |
| 455 dst += n; | |
| 456 dst_width -= n; | |
| 457 src_start += n * src_incr; | |
| 458 } | |
| 459 src_end = src_start + dst_width * src_incr; | |
| 460 if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) { | |
| 461 n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) / | |
| 462 src_incr; | |
| 463 } else { | |
| 464 n = dst_width; | |
| 465 } | |
| 466 #ifdef HAVE_MMX | |
| 467 if ((mm_flags & MM_MMX) && NB_TAPS == 4) | |
| 468 h_resample_fast4_mmx(dst, n, | |
| 469 src, src_width, src_start, src_incr, filters); | |
| 470 else | |
| 471 #endif | |
| 472 h_resample_fast(dst, n, | |
| 473 src, src_width, src_start, src_incr, filters); | |
| 474 if (n < dst_width) { | |
| 475 dst += n; | |
| 476 dst_width -= n; | |
| 477 src_start += n * src_incr; | |
| 478 h_resample_slow(dst, dst_width, | |
| 479 src, src_width, src_start, src_incr, filters); | |
| 480 } | |
| 481 } | |
| 482 | |
| 483 static void component_resample(ImgReSampleContext *s, | |
| 484 uint8_t *output, int owrap, int owidth, int oheight, | |
| 485 uint8_t *input, int iwrap, int iwidth, int iheight) | |
| 486 { | |
| 487 int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y; | |
| 488 uint8_t *new_line, *src_line; | |
| 489 | |
| 490 last_src_y = - FCENTER - 1; | |
| 491 /* position of the bottom of the filter in the source image */ | |
| 492 src_y = (last_src_y + NB_TAPS) * POS_FRAC; | |
| 493 ring_y = NB_TAPS; /* position in ring buffer */ | |
| 494 for(y=0;y<oheight;y++) { | |
| 495 /* apply horizontal filter on new lines from input if needed */ | |
| 496 src_y1 = src_y >> POS_FRAC_BITS; | |
| 497 while (last_src_y < src_y1) { | |
| 498 if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS) | |
| 499 ring_y = NB_TAPS; | |
| 500 last_src_y++; | |
| 501 /* handle limit conditions : replicate line (slightly | |
| 502 inefficient because we filter multiple times) */ | |
| 503 y1 = last_src_y; | |
| 504 if (y1 < 0) { | |
| 505 y1 = 0; | |
| 506 } else if (y1 >= iheight) { | |
| 507 y1 = iheight - 1; | |
| 508 } | |
| 509 src_line = input + y1 * iwrap; | |
| 510 new_line = s->line_buf + ring_y * owidth; | |
| 511 /* apply filter and handle limit cases correctly */ | |
| 512 h_resample(new_line, owidth, | |
| 513 src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr, | |
| 514 &s->h_filters[0][0]); | |
| 515 /* handle ring buffer wraping */ | |
| 516 if (ring_y >= LINE_BUF_HEIGHT) { | |
| 517 memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth, | |
| 518 new_line, owidth); | |
| 519 } | |
| 520 } | |
| 521 /* apply vertical filter */ | |
| 522 phase_y = get_phase(src_y); | |
| 523 #ifdef HAVE_MMX | |
| 524 /* desactivated MMX because loss of precision */ | |
| 525 if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0) | |
| 526 v_resample4_mmx(output, owidth, | |
| 527 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth, | |
| 528 &s->v_filters[phase_y][0]); | |
| 529 else | |
| 530 #endif | |
| 531 #ifdef HAVE_ALTIVEC | |
| 532 if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6) | |
| 533 v_resample16_altivec(output, owidth, | |
| 534 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth, | |
| 535 &s->v_filters[phase_y][0]); | |
| 536 else | |
| 537 #endif | |
| 538 v_resample(output, owidth, | |
| 539 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth, | |
| 540 &s->v_filters[phase_y][0]); | |
| 541 | |
| 542 src_y += s->v_incr; | |
| 543 | |
| 544 output += owrap; | |
| 545 } | |
| 546 } | |
| 547 | |
| 548 ImgReSampleContext *img_resample_init(int owidth, int oheight, | |
| 549 int iwidth, int iheight) | |
| 550 { | |
| 551 return img_resample_full_init(owidth, oheight, iwidth, iheight, | |
| 552 0, 0, 0, 0, 0, 0, 0, 0); | |
| 553 } | |
| 554 | |
| 555 ImgReSampleContext *img_resample_full_init(int owidth, int oheight, | |
| 556 int iwidth, int iheight, | |
| 557 int topBand, int bottomBand, | |
| 558 int leftBand, int rightBand, | |
| 559 int padtop, int padbottom, | |
| 560 int padleft, int padright) | |
| 561 { | |
| 562 ImgReSampleContext *s; | |
| 563 | |
| 564 if (!owidth || !oheight || !iwidth || !iheight) | |
| 565 return NULL; | |
| 566 | |
| 567 s = av_mallocz(sizeof(ImgReSampleContext)); | |
| 568 if (!s) | |
| 569 return NULL; | |
| 570 if((unsigned)owidth >= UINT_MAX / (LINE_BUF_HEIGHT + NB_TAPS)) | |
| 571 return NULL; | |
| 572 s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS)); | |
| 573 if (!s->line_buf) | |
| 574 goto fail; | |
| 575 | |
| 576 s->owidth = owidth; | |
| 577 s->oheight = oheight; | |
| 578 s->iwidth = iwidth; | |
| 579 s->iheight = iheight; | |
| 580 | |
| 581 s->topBand = topBand; | |
| 582 s->bottomBand = bottomBand; | |
| 583 s->leftBand = leftBand; | |
| 584 s->rightBand = rightBand; | |
| 585 | |
| 586 s->padtop = padtop; | |
| 587 s->padbottom = padbottom; | |
| 588 s->padleft = padleft; | |
| 589 s->padright = padright; | |
| 590 | |
| 591 s->pad_owidth = owidth - (padleft + padright); | |
| 592 s->pad_oheight = oheight - (padtop + padbottom); | |
| 593 | |
| 594 s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / s->pad_owidth; | |
| 595 s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / s->pad_oheight; | |
| 596 | |
| 597 av_build_filter(&s->h_filters[0][0], (float) s->pad_owidth / | |
| 598 (float) (iwidth - leftBand - rightBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0); | |
| 599 av_build_filter(&s->v_filters[0][0], (float) s->pad_oheight / | |
| 600 (float) (iheight - topBand - bottomBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0); | |
| 601 | |
| 602 return s; | |
| 603 fail: | |
| 604 av_free(s); | |
| 605 return NULL; | |
| 606 } | |
| 607 | |
| 608 void img_resample(ImgReSampleContext *s, | |
| 609 AVPicture *output, const AVPicture *input) | |
| 610 { | |
| 611 int i, shift; | |
| 612 uint8_t* optr; | |
| 613 | |
| 614 for (i=0;i<3;i++) { | |
| 615 shift = (i == 0) ? 0 : 1; | |
| 616 | |
| 617 optr = output->data[i] + (((output->linesize[i] * | |
| 618 s->padtop) + s->padleft) >> shift); | |
| 619 | |
| 620 component_resample(s, optr, output->linesize[i], | |
| 621 s->pad_owidth >> shift, s->pad_oheight >> shift, | |
| 622 input->data[i] + (input->linesize[i] * | |
| 623 (s->topBand >> shift)) + (s->leftBand >> shift), | |
| 624 input->linesize[i], ((s->iwidth - s->leftBand - | |
| 625 s->rightBand) >> shift), | |
| 626 (s->iheight - s->topBand - s->bottomBand) >> shift); | |
| 627 } | |
| 628 } | |
| 629 | |
| 630 void img_resample_close(ImgReSampleContext *s) | |
| 631 { | |
| 632 av_free(s->line_buf); | |
| 633 av_free(s); | |
| 634 } | |
| 635 | |
| 636 struct SwsContext *sws_getContext(int srcW, int srcH, int srcFormat, | |
| 637 int dstW, int dstH, int dstFormat, | |
| 638 int flags, SwsFilter *srcFilter, | |
| 639 SwsFilter *dstFilter, double *param) | |
| 640 { | |
| 641 struct SwsContext *ctx; | |
| 642 | |
| 643 ctx = av_malloc(sizeof(struct SwsContext)); | |
| 644 if (ctx == NULL) { | |
| 645 av_log(NULL, AV_LOG_ERROR, "Cannot allocate a resampling context!\n"); | |
| 646 | |
| 647 return NULL; | |
| 648 } | |
| 649 | |
| 650 if ((srcH != dstH) || (srcW != dstW)) { | |
| 651 if ((srcFormat != PIX_FMT_YUV420P) || (dstFormat != PIX_FMT_YUV420P)) { | |
| 652 av_log(NULL, AV_LOG_INFO, "PIX_FMT_YUV420P will be used as an intermediate format for rescaling\n"); | |
| 653 } | |
| 654 ctx->resampling_ctx = img_resample_init(dstW, dstH, srcW, srcH); | |
| 655 } else { | |
| 656 ctx->resampling_ctx = av_malloc(sizeof(ImgReSampleContext)); | |
| 657 ctx->resampling_ctx->iheight = srcH; | |
| 658 ctx->resampling_ctx->iwidth = srcW; | |
| 659 ctx->resampling_ctx->oheight = dstH; | |
| 660 ctx->resampling_ctx->owidth = dstW; | |
| 661 } | |
| 662 ctx->src_pix_fmt = srcFormat; | |
| 663 ctx->dst_pix_fmt = dstFormat; | |
| 664 | |
| 665 return ctx; | |
| 666 } | |
| 667 | |
| 668 void sws_freeContext(struct SwsContext *ctx) | |
| 669 { | |
| 670 if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) || | |
| 671 (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) { | |
| 672 img_resample_close(ctx->resampling_ctx); | |
| 673 } else { | |
| 674 av_free(ctx->resampling_ctx); | |
| 675 } | |
| 676 av_free(ctx); | |
| 677 } | |
| 678 | |
| 679 | |
| 680 /** | |
| 681 * Checks if context is valid or reallocs a new one instead. | |
| 682 * If context is NULL, just calls sws_getContext() to get a new one. | |
| 683 * Otherwise, checks if the parameters are the same already saved in context. | |
| 684 * If that is the case, returns the current context. | |
| 685 * Otherwise, frees context and gets a new one. | |
| 686 * | |
| 687 * Be warned that srcFilter, dstFilter are not checked, they are | |
| 688 * asumed to remain valid. | |
| 689 */ | |
| 690 struct SwsContext *sws_getCachedContext(struct SwsContext *ctx, | |
| 691 int srcW, int srcH, int srcFormat, | |
| 692 int dstW, int dstH, int dstFormat, int flags, | |
| 693 SwsFilter *srcFilter, SwsFilter *dstFilter, double *param) | |
| 694 { | |
| 695 if (ctx != NULL) { | |
| 696 if ((ctx->resampling_ctx->iwidth != srcW) || | |
| 697 (ctx->resampling_ctx->iheight != srcH) || | |
| 698 (ctx->src_pix_fmt != srcFormat) || | |
| 699 (ctx->resampling_ctx->owidth != dstW) || | |
| 700 (ctx->resampling_ctx->oheight != dstH) || | |
| 701 (ctx->dst_pix_fmt != dstFormat)) | |
| 702 { | |
| 703 sws_freeContext(ctx); | |
| 704 ctx = NULL; | |
| 705 } | |
| 706 } | |
| 707 if (ctx == NULL) { | |
| 708 return sws_getContext(srcW, srcH, srcFormat, | |
| 709 dstW, dstH, dstFormat, flags, | |
| 710 srcFilter, dstFilter, param); | |
| 711 } | |
| 712 return ctx; | |
| 713 } | |
| 714 | |
| 715 int sws_scale(struct SwsContext *ctx, uint8_t* src[], int srcStride[], | |
| 716 int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[]) | |
| 717 { | |
| 718 AVPicture src_pict, dst_pict; | |
| 719 int i, res = 0; | |
| 720 AVPicture picture_format_temp; | |
| 721 AVPicture picture_resample_temp, *formatted_picture, *resampled_picture; | |
| 722 uint8_t *buf1 = NULL, *buf2 = NULL; | |
| 723 enum PixelFormat current_pix_fmt; | |
| 724 | |
| 725 for (i = 0; i < 3; i++) { | |
| 726 src_pict.data[i] = src[i]; | |
| 727 src_pict.linesize[i] = srcStride[i]; | |
| 728 dst_pict.data[i] = dst[i]; | |
| 729 dst_pict.linesize[i] = dstStride[i]; | |
| 730 } | |
| 731 if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) || | |
| 732 (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) { | |
| 733 /* We have to rescale the picture, but only YUV420P rescaling is supported... */ | |
| 734 | |
| 735 if (ctx->src_pix_fmt != PIX_FMT_YUV420P) { | |
| 736 int size; | |
| 737 | |
| 738 /* create temporary picture for rescaling input*/ | |
| 739 size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight); | |
| 740 buf1 = av_malloc(size); | |
| 741 if (!buf1) { | |
| 742 res = -1; | |
| 743 goto the_end; | |
| 744 } | |
| 745 formatted_picture = &picture_format_temp; | |
| 746 avpicture_fill((AVPicture*)formatted_picture, buf1, | |
| 747 PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight); | |
| 748 | |
| 749 if (img_convert((AVPicture*)formatted_picture, PIX_FMT_YUV420P, | |
| 750 &src_pict, ctx->src_pix_fmt, | |
| 751 ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight) < 0) { | |
| 752 | |
| 753 av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n"); | |
| 754 res = -1; | |
| 755 goto the_end; | |
| 756 } | |
| 757 } else { | |
| 758 formatted_picture = &src_pict; | |
| 759 } | |
| 760 | |
| 761 if (ctx->dst_pix_fmt != PIX_FMT_YUV420P) { | |
| 762 int size; | |
| 763 | |
| 764 /* create temporary picture for rescaling output*/ | |
| 765 size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight); | |
| 766 buf2 = av_malloc(size); | |
| 767 if (!buf2) { | |
| 768 res = -1; | |
| 769 goto the_end; | |
| 770 } | |
| 771 resampled_picture = &picture_resample_temp; | |
| 772 avpicture_fill((AVPicture*)resampled_picture, buf2, | |
| 773 PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight); | |
| 774 | |
| 775 } else { | |
| 776 resampled_picture = &dst_pict; | |
| 777 } | |
| 778 | |
| 779 /* ...and finally rescale!!! */ | |
| 780 img_resample(ctx->resampling_ctx, resampled_picture, formatted_picture); | |
| 781 current_pix_fmt = PIX_FMT_YUV420P; | |
| 782 } else { | |
| 783 resampled_picture = &src_pict; | |
| 784 current_pix_fmt = ctx->src_pix_fmt; | |
| 785 } | |
| 786 | |
| 787 if (current_pix_fmt != ctx->dst_pix_fmt) { | |
| 788 if (img_convert(&dst_pict, ctx->dst_pix_fmt, | |
| 789 resampled_picture, current_pix_fmt, | |
| 790 ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight) < 0) { | |
| 791 | |
| 792 av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n"); | |
| 793 | |
| 794 res = -1; | |
| 795 goto the_end; | |
| 796 } | |
| 797 } else if (resampled_picture != &dst_pict) { | |
| 798 img_copy(&dst_pict, resampled_picture, current_pix_fmt, | |
| 799 ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight); | |
| 800 } | |
| 801 | |
| 802 the_end: | |
| 803 av_free(buf1); | |
| 804 av_free(buf2); | |
| 805 return res; | |
| 806 } | |
| 807 | |
| 808 | |
| 809 #ifdef TEST | |
| 810 #include <stdio.h> | |
| 811 | |
| 812 /* input */ | |
| 813 #define XSIZE 256 | |
| 814 #define YSIZE 256 | |
| 815 uint8_t img[XSIZE * YSIZE]; | |
| 816 | |
| 817 /* output */ | |
| 818 #define XSIZE1 512 | |
| 819 #define YSIZE1 512 | |
| 820 uint8_t img1[XSIZE1 * YSIZE1]; | |
| 821 uint8_t img2[XSIZE1 * YSIZE1]; | |
| 822 | |
| 823 void save_pgm(const char *filename, uint8_t *img, int xsize, int ysize) | |
| 824 { | |
| 825 #undef fprintf | |
| 826 FILE *f; | |
| 827 f=fopen(filename,"w"); | |
| 828 fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255); | |
| 829 fwrite(img,1, xsize * ysize,f); | |
| 830 fclose(f); | |
| 831 #define fprintf please_use_av_log | |
| 832 } | |
| 833 | |
| 834 static void dump_filter(int16_t *filter) | |
| 835 { | |
| 836 int i, ph; | |
| 837 | |
| 838 for(ph=0;ph<NB_PHASES;ph++) { | |
| 839 av_log(NULL, AV_LOG_INFO, "%2d: ", ph); | |
| 840 for(i=0;i<NB_TAPS;i++) { | |
| 841 av_log(NULL, AV_LOG_INFO, " %5.2f", filter[ph * NB_TAPS + i] / 256.0); | |
| 842 } | |
| 843 av_log(NULL, AV_LOG_INFO, "\n"); | |
| 844 } | |
| 845 } | |
| 846 | |
| 847 #ifdef HAVE_MMX | |
| 848 int mm_flags; | |
| 849 #endif | |
| 850 | |
| 851 int main(int argc, char **argv) | |
| 852 { | |
| 853 int x, y, v, i, xsize, ysize; | |
| 854 ImgReSampleContext *s; | |
| 855 float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 }; | |
| 856 char buf[256]; | |
| 857 | |
| 858 /* build test image */ | |
| 859 for(y=0;y<YSIZE;y++) { | |
| 860 for(x=0;x<XSIZE;x++) { | |
| 861 if (x < XSIZE/2 && y < YSIZE/2) { | |
| 862 if (x < XSIZE/4 && y < YSIZE/4) { | |
| 863 if ((x % 10) <= 6 && | |
| 864 (y % 10) <= 6) | |
| 865 v = 0xff; | |
| 866 else | |
| 867 v = 0x00; | |
| 868 } else if (x < XSIZE/4) { | |
| 869 if (x & 1) | |
| 870 v = 0xff; | |
| 871 else | |
| 872 v = 0; | |
| 873 } else if (y < XSIZE/4) { | |
| 874 if (y & 1) | |
| 875 v = 0xff; | |
| 876 else | |
| 877 v = 0; | |
| 878 } else { | |
| 879 if (y < YSIZE*3/8) { | |
| 880 if ((y+x) & 1) | |
| 881 v = 0xff; | |
| 882 else | |
| 883 v = 0; | |
| 884 } else { | |
| 885 if (((x+3) % 4) <= 1 && | |
| 886 ((y+3) % 4) <= 1) | |
| 887 v = 0xff; | |
| 888 else | |
| 889 v = 0x00; | |
| 890 } | |
| 891 } | |
| 892 } else if (x < XSIZE/2) { | |
| 893 v = ((x - (XSIZE/2)) * 255) / (XSIZE/2); | |
| 894 } else if (y < XSIZE/2) { | |
| 895 v = ((y - (XSIZE/2)) * 255) / (XSIZE/2); | |
| 896 } else { | |
| 897 v = ((x + y - XSIZE) * 255) / XSIZE; | |
| 898 } | |
| 899 img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v; | |
| 900 } | |
| 901 } | |
| 902 save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE); | |
| 903 for(i=0;i<sizeof(factors)/sizeof(float);i++) { | |
| 904 fact = factors[i]; | |
| 905 xsize = (int)(XSIZE * fact); | |
| 906 ysize = (int)((YSIZE - 100) * fact); | |
| 907 s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0, 0, 0, 0, 0); | |
| 908 av_log(NULL, AV_LOG_INFO, "Factor=%0.2f\n", fact); | |
| 909 dump_filter(&s->h_filters[0][0]); | |
| 910 component_resample(s, img1, xsize, xsize, ysize, | |
| 911 img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100); | |
| 912 img_resample_close(s); | |
| 913 | |
| 914 snprintf(buf, sizeof(buf), "/tmp/out%d.pgm", i); | |
| 915 save_pgm(buf, img1, xsize, ysize); | |
| 916 } | |
| 917 | |
| 918 /* mmx test */ | |
| 919 #ifdef HAVE_MMX | |
| 920 av_log(NULL, AV_LOG_INFO, "MMX test\n"); | |
| 921 fact = 0.72; | |
| 922 xsize = (int)(XSIZE * fact); | |
| 923 ysize = (int)(YSIZE * fact); | |
| 924 mm_flags = MM_MMX; | |
| 925 s = img_resample_init(xsize, ysize, XSIZE, YSIZE); | |
| 926 component_resample(s, img1, xsize, xsize, ysize, | |
| 927 img, XSIZE, XSIZE, YSIZE); | |
| 928 | |
| 929 mm_flags = 0; | |
| 930 s = img_resample_init(xsize, ysize, XSIZE, YSIZE); | |
| 931 component_resample(s, img2, xsize, xsize, ysize, | |
| 932 img, XSIZE, XSIZE, YSIZE); | |
| 933 if (memcmp(img1, img2, xsize * ysize) != 0) { | |
| 934 av_log(NULL, AV_LOG_ERROR, "mmx error\n"); | |
| 935 exit(1); | |
| 936 } | |
| 937 av_log(NULL, AV_LOG_INFO, "MMX OK\n"); | |
| 938 #endif | |
| 939 return 0; | |
| 940 } | |
| 941 | |
| 942 #endif |
