Mercurial > libavcodec.hg
annotate liba52/imdct.c @ 4451:9fa2c8a7e4d8 libavcodec
Move lzo decompression to libavutil
| author | reimar |
|---|---|
| date | Wed, 31 Jan 2007 22:58:53 +0000 |
| parents | 0b546eab515d |
| children |
| rev | line source |
|---|---|
| 332 | 1 /* |
| 2 * imdct.c | |
| 1072 | 3 * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> |
| 332 | 4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> |
| 5 * | |
| 6 * The ifft algorithms in this file have been largely inspired by Dan | |
| 7 * Bernstein's work, djbfft, available at http://cr.yp.to/djbfft.html | |
| 8 * | |
| 9 * This file is part of a52dec, a free ATSC A-52 stream decoder. | |
| 10 * See http://liba52.sourceforge.net/ for updates. | |
| 11 * | |
| 12 * a52dec is free software; you can redistribute it and/or modify | |
| 13 * it under the terms of the GNU General Public License as published by | |
| 14 * the Free Software Foundation; either version 2 of the License, or | |
| 15 * (at your option) any later version. | |
| 16 * | |
| 17 * a52dec is distributed in the hope that it will be useful, | |
| 18 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 20 * GNU General Public License for more details. | |
| 21 * | |
| 22 * You should have received a copy of the GNU General Public License | |
| 23 * along with this program; if not, write to the Free Software | |
|
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
2967
diff
changeset
|
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 332 | 25 */ |
| 26 | |
| 27 #include "a52.h" | |
| 28 #include "a52_internal.h" | |
| 29 #include "mm_accel.h" | |
| 30 | |
| 31 typedef struct complex_s { | |
| 32 sample_t real; | |
| 33 sample_t imag; | |
| 34 } complex_t; | |
| 35 | |
| 36 static uint8_t fftorder[] = { | |
| 37 0,128, 64,192, 32,160,224, 96, 16,144, 80,208,240,112, 48,176, | |
| 38 8,136, 72,200, 40,168,232,104,248,120, 56,184, 24,152,216, 88, | |
| 39 4,132, 68,196, 36,164,228,100, 20,148, 84,212,244,116, 52,180, | |
| 40 252,124, 60,188, 28,156,220, 92, 12,140, 76,204,236,108, 44,172, | |
| 41 2,130, 66,194, 34,162,226, 98, 18,146, 82,210,242,114, 50,178, | |
| 42 10,138, 74,202, 42,170,234,106,250,122, 58,186, 26,154,218, 90, | |
| 43 254,126, 62,190, 30,158,222, 94, 14,142, 78,206,238,110, 46,174, | |
| 44 6,134, 70,198, 38,166,230,102,246,118, 54,182, 22,150,214, 86 | |
| 45 }; | |
| 46 | |
| 47 /* Root values for IFFT */ | |
| 48 static sample_t roots16[3]; | |
| 49 static sample_t roots32[7]; | |
| 50 static sample_t roots64[15]; | |
| 51 static sample_t roots128[31]; | |
| 52 | |
| 53 /* Twiddle factors for IMDCT */ | |
| 54 static complex_t pre1[128]; | |
| 55 static complex_t post1[64]; | |
| 56 static complex_t pre2[64]; | |
| 57 static complex_t post2[32]; | |
| 58 | |
| 59 static sample_t a52_imdct_window[256]; | |
| 60 | |
| 61 static void (* ifft128) (complex_t * buf); | |
| 62 static void (* ifft64) (complex_t * buf); | |
| 63 | |
| 64 static inline void ifft2 (complex_t * buf) | |
| 65 { | |
| 1072 | 66 sample_t r, i; |
| 332 | 67 |
| 68 r = buf[0].real; | |
| 69 i = buf[0].imag; | |
| 70 buf[0].real += buf[1].real; | |
| 71 buf[0].imag += buf[1].imag; | |
| 72 buf[1].real = r - buf[1].real; | |
| 73 buf[1].imag = i - buf[1].imag; | |
| 74 } | |
| 75 | |
| 76 static inline void ifft4 (complex_t * buf) | |
| 77 { | |
| 1072 | 78 sample_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; |
| 332 | 79 |
| 80 tmp1 = buf[0].real + buf[1].real; | |
| 81 tmp2 = buf[3].real + buf[2].real; | |
| 82 tmp3 = buf[0].imag + buf[1].imag; | |
| 83 tmp4 = buf[2].imag + buf[3].imag; | |
| 84 tmp5 = buf[0].real - buf[1].real; | |
| 85 tmp6 = buf[0].imag - buf[1].imag; | |
| 86 tmp7 = buf[2].imag - buf[3].imag; | |
| 87 tmp8 = buf[3].real - buf[2].real; | |
| 88 | |
| 89 buf[0].real = tmp1 + tmp2; | |
| 90 buf[0].imag = tmp3 + tmp4; | |
| 91 buf[2].real = tmp1 - tmp2; | |
| 92 buf[2].imag = tmp3 - tmp4; | |
| 93 buf[1].real = tmp5 + tmp7; | |
| 94 buf[1].imag = tmp6 + tmp8; | |
| 95 buf[3].real = tmp5 - tmp7; | |
| 96 buf[3].imag = tmp6 - tmp8; | |
| 97 } | |
| 98 | |
| 1072 | 99 /* basic radix-2 ifft butterfly */ |
| 100 | |
| 101 #define BUTTERFLY_0(t0,t1,W0,W1,d0,d1) do { \ | |
| 102 t0 = MUL (W1, d1) + MUL (W0, d0); \ | |
| 103 t1 = MUL (W0, d1) - MUL (W1, d0); \ | |
| 104 } while (0) | |
| 105 | |
| 106 /* radix-2 ifft butterfly with bias */ | |
| 107 | |
| 108 #define BUTTERFLY_B(t0,t1,W0,W1,d0,d1) do { \ | |
| 109 t0 = BIAS (MUL (d1, W1) + MUL (d0, W0)); \ | |
| 110 t1 = BIAS (MUL (d1, W0) - MUL (d0, W1)); \ | |
| 111 } while (0) | |
| 112 | |
| 332 | 113 /* the basic split-radix ifft butterfly */ |
| 114 | |
| 1072 | 115 #define BUTTERFLY(a0,a1,a2,a3,wr,wi) do { \ |
| 116 BUTTERFLY_0 (tmp5, tmp6, wr, wi, a2.real, a2.imag); \ | |
| 117 BUTTERFLY_0 (tmp8, tmp7, wr, wi, a3.imag, a3.real); \ | |
| 118 tmp1 = tmp5 + tmp7; \ | |
| 119 tmp2 = tmp6 + tmp8; \ | |
| 120 tmp3 = tmp6 - tmp8; \ | |
| 121 tmp4 = tmp7 - tmp5; \ | |
| 122 a2.real = a0.real - tmp1; \ | |
| 123 a2.imag = a0.imag - tmp2; \ | |
| 124 a3.real = a1.real - tmp3; \ | |
| 125 a3.imag = a1.imag - tmp4; \ | |
| 126 a0.real += tmp1; \ | |
| 127 a0.imag += tmp2; \ | |
| 128 a1.real += tmp3; \ | |
| 129 a1.imag += tmp4; \ | |
| 332 | 130 } while (0) |
| 131 | |
| 132 /* split-radix ifft butterfly, specialized for wr=1 wi=0 */ | |
| 133 | |
| 134 #define BUTTERFLY_ZERO(a0,a1,a2,a3) do { \ | |
| 135 tmp1 = a2.real + a3.real; \ | |
| 136 tmp2 = a2.imag + a3.imag; \ | |
| 137 tmp3 = a2.imag - a3.imag; \ | |
| 138 tmp4 = a3.real - a2.real; \ | |
| 139 a2.real = a0.real - tmp1; \ | |
| 140 a2.imag = a0.imag - tmp2; \ | |
| 141 a3.real = a1.real - tmp3; \ | |
| 142 a3.imag = a1.imag - tmp4; \ | |
| 143 a0.real += tmp1; \ | |
| 144 a0.imag += tmp2; \ | |
| 145 a1.real += tmp3; \ | |
| 146 a1.imag += tmp4; \ | |
| 147 } while (0) | |
| 148 | |
| 149 /* split-radix ifft butterfly, specialized for wr=wi */ | |
| 150 | |
| 151 #define BUTTERFLY_HALF(a0,a1,a2,a3,w) do { \ | |
| 1072 | 152 tmp5 = MUL (a2.real + a2.imag, w); \ |
| 153 tmp6 = MUL (a2.imag - a2.real, w); \ | |
| 154 tmp7 = MUL (a3.real - a3.imag, w); \ | |
| 155 tmp8 = MUL (a3.imag + a3.real, w); \ | |
| 332 | 156 tmp1 = tmp5 + tmp7; \ |
| 157 tmp2 = tmp6 + tmp8; \ | |
| 158 tmp3 = tmp6 - tmp8; \ | |
| 159 tmp4 = tmp7 - tmp5; \ | |
| 160 a2.real = a0.real - tmp1; \ | |
| 161 a2.imag = a0.imag - tmp2; \ | |
| 162 a3.real = a1.real - tmp3; \ | |
| 163 a3.imag = a1.imag - tmp4; \ | |
| 164 a0.real += tmp1; \ | |
| 165 a0.imag += tmp2; \ | |
| 166 a1.real += tmp3; \ | |
| 167 a1.imag += tmp4; \ | |
| 168 } while (0) | |
| 169 | |
| 170 static inline void ifft8 (complex_t * buf) | |
| 171 { | |
| 1072 | 172 sample_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; |
| 332 | 173 |
| 174 ifft4 (buf); | |
| 175 ifft2 (buf + 4); | |
| 176 ifft2 (buf + 6); | |
| 177 BUTTERFLY_ZERO (buf[0], buf[2], buf[4], buf[6]); | |
| 178 BUTTERFLY_HALF (buf[1], buf[3], buf[5], buf[7], roots16[1]); | |
| 179 } | |
| 180 | |
| 181 static void ifft_pass (complex_t * buf, sample_t * weight, int n) | |
| 182 { | |
| 183 complex_t * buf1; | |
| 184 complex_t * buf2; | |
| 185 complex_t * buf3; | |
| 1072 | 186 sample_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; |
| 332 | 187 int i; |
| 188 | |
| 189 buf++; | |
| 190 buf1 = buf + n; | |
| 191 buf2 = buf + 2 * n; | |
| 192 buf3 = buf + 3 * n; | |
| 193 | |
| 194 BUTTERFLY_ZERO (buf[-1], buf1[-1], buf2[-1], buf3[-1]); | |
| 195 | |
| 196 i = n - 1; | |
| 197 | |
| 198 do { | |
| 1072 | 199 BUTTERFLY (buf[0], buf1[0], buf2[0], buf3[0], |
| 200 weight[0], weight[2*i-n]); | |
| 332 | 201 buf++; |
| 202 buf1++; | |
| 203 buf2++; | |
| 204 buf3++; | |
| 205 weight++; | |
| 206 } while (--i); | |
| 207 } | |
| 208 | |
| 209 static void ifft16 (complex_t * buf) | |
| 210 { | |
| 211 ifft8 (buf); | |
| 212 ifft4 (buf + 8); | |
| 213 ifft4 (buf + 12); | |
| 1072 | 214 ifft_pass (buf, roots16, 4); |
| 332 | 215 } |
| 216 | |
| 217 static void ifft32 (complex_t * buf) | |
| 218 { | |
| 219 ifft16 (buf); | |
| 220 ifft8 (buf + 16); | |
| 221 ifft8 (buf + 24); | |
| 1072 | 222 ifft_pass (buf, roots32, 8); |
| 332 | 223 } |
| 224 | |
| 225 static void ifft64_c (complex_t * buf) | |
| 226 { | |
| 227 ifft32 (buf); | |
| 228 ifft16 (buf + 32); | |
| 229 ifft16 (buf + 48); | |
| 1072 | 230 ifft_pass (buf, roots64, 16); |
| 332 | 231 } |
| 232 | |
| 233 static void ifft128_c (complex_t * buf) | |
| 234 { | |
| 235 ifft32 (buf); | |
| 236 ifft16 (buf + 32); | |
| 237 ifft16 (buf + 48); | |
| 1072 | 238 ifft_pass (buf, roots64, 16); |
| 332 | 239 |
| 240 ifft32 (buf + 64); | |
| 241 ifft32 (buf + 96); | |
| 1072 | 242 ifft_pass (buf, roots128, 32); |
| 332 | 243 } |
| 244 | |
| 245 void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias) | |
| 246 { | |
| 247 int i, k; | |
| 248 sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2; | |
| 249 const sample_t * window = a52_imdct_window; | |
| 1072 | 250 complex_t buf[128]; |
| 2967 | 251 |
| 332 | 252 for (i = 0; i < 128; i++) { |
| 253 k = fftorder[i]; | |
| 254 t_r = pre1[i].real; | |
| 255 t_i = pre1[i].imag; | |
| 1072 | 256 BUTTERFLY_0 (buf[i].real, buf[i].imag, t_r, t_i, data[k], data[255-k]); |
| 332 | 257 } |
| 258 | |
| 259 ifft128 (buf); | |
| 260 | |
| 261 /* Post IFFT complex multiply plus IFFT complex conjugate*/ | |
| 262 /* Window and convert to real valued signal */ | |
| 263 for (i = 0; i < 64; i++) { | |
| 264 /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */ | |
| 265 t_r = post1[i].real; | |
| 266 t_i = post1[i].imag; | |
| 1072 | 267 BUTTERFLY_0 (a_r, a_i, t_i, t_r, buf[i].imag, buf[i].real); |
| 268 BUTTERFLY_0 (b_r, b_i, t_r, t_i, buf[127-i].imag, buf[127-i].real); | |
| 332 | 269 |
| 270 w_1 = window[2*i]; | |
| 271 w_2 = window[255-2*i]; | |
| 1072 | 272 BUTTERFLY_B (data[255-2*i], data[2*i], w_2, w_1, a_r, delay[2*i]); |
| 332 | 273 delay[2*i] = a_i; |
| 274 | |
| 275 w_1 = window[2*i+1]; | |
| 276 w_2 = window[254-2*i]; | |
| 1072 | 277 BUTTERFLY_B (data[2*i+1], data[254-2*i], w_1, w_2, b_r, delay[2*i+1]); |
| 332 | 278 delay[2*i+1] = b_i; |
| 279 } | |
| 280 } | |
| 281 | |
| 1072 | 282 void a52_imdct_256 (sample_t * data, sample_t * delay, sample_t bias) |
| 332 | 283 { |
| 284 int i, k; | |
| 285 sample_t t_r, t_i, a_r, a_i, b_r, b_i, c_r, c_i, d_r, d_i, w_1, w_2; | |
| 286 const sample_t * window = a52_imdct_window; | |
| 1072 | 287 complex_t buf1[64], buf2[64]; |
| 332 | 288 |
| 289 /* Pre IFFT complex multiply plus IFFT cmplx conjugate */ | |
| 290 for (i = 0; i < 64; i++) { | |
| 291 k = fftorder[i]; | |
| 292 t_r = pre2[i].real; | |
| 293 t_i = pre2[i].imag; | |
| 1072 | 294 BUTTERFLY_0 (buf1[i].real, buf1[i].imag, t_r, t_i, data[k], data[254-k]); |
| 295 BUTTERFLY_0 (buf2[i].real, buf2[i].imag, t_r, t_i, data[k+1], data[255-k]); | |
| 332 | 296 } |
| 297 | |
| 298 ifft64 (buf1); | |
| 299 ifft64 (buf2); | |
| 300 | |
| 301 /* Post IFFT complex multiply */ | |
| 302 /* Window and convert to real valued signal */ | |
| 303 for (i = 0; i < 32; i++) { | |
| 2967 | 304 /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */ |
| 332 | 305 t_r = post2[i].real; |
| 306 t_i = post2[i].imag; | |
| 1072 | 307 BUTTERFLY_0 (a_r, a_i, t_i, t_r, buf1[i].imag, buf1[i].real); |
| 308 BUTTERFLY_0 (b_r, b_i, t_r, t_i, buf1[63-i].imag, buf1[63-i].real); | |
| 309 BUTTERFLY_0 (c_r, c_i, t_i, t_r, buf2[i].imag, buf2[i].real); | |
| 310 BUTTERFLY_0 (d_r, d_i, t_r, t_i, buf2[63-i].imag, buf2[63-i].real); | |
| 332 | 311 |
| 312 w_1 = window[2*i]; | |
| 313 w_2 = window[255-2*i]; | |
| 1072 | 314 BUTTERFLY_B (data[255-2*i], data[2*i], w_2, w_1, a_r, delay[2*i]); |
| 332 | 315 delay[2*i] = c_i; |
| 316 | |
| 317 w_1 = window[128+2*i]; | |
| 318 w_2 = window[127-2*i]; | |
| 1072 | 319 BUTTERFLY_B (data[128+2*i], data[127-2*i], w_1, w_2, a_i, delay[127-2*i]); |
| 332 | 320 delay[127-2*i] = c_r; |
| 321 | |
| 322 w_1 = window[2*i+1]; | |
| 323 w_2 = window[254-2*i]; | |
| 1072 | 324 BUTTERFLY_B (data[254-2*i], data[2*i+1], w_2, w_1, b_i, delay[2*i+1]); |
| 332 | 325 delay[2*i+1] = d_r; |
| 326 | |
| 327 w_1 = window[129+2*i]; | |
| 328 w_2 = window[126-2*i]; | |
| 1072 | 329 BUTTERFLY_B (data[129+2*i], data[126-2*i], w_1, w_2, b_r, delay[126-2*i]); |
| 332 | 330 delay[126-2*i] = d_i; |
| 331 } | |
| 332 } | |
| 333 | |
| 334 static double besselI0 (double x) | |
| 335 { | |
| 336 double bessel = 1; | |
| 337 int i = 100; | |
| 338 | |
| 339 do | |
| 340 bessel = bessel * x / (i * i) + 1; | |
| 341 while (--i); | |
| 342 return bessel; | |
| 343 } | |
| 344 | |
| 345 void a52_imdct_init (uint32_t mm_accel) | |
| 346 { | |
| 347 int i, k; | |
| 348 double sum; | |
| 1072 | 349 double local_imdct_window[256]; |
| 332 | 350 |
| 351 /* compute imdct window - kaiser-bessel derived window, alpha = 5.0 */ | |
| 352 sum = 0; | |
| 353 for (i = 0; i < 256; i++) { | |
| 354 sum += besselI0 (i * (256 - i) * (5 * M_PI / 256) * (5 * M_PI / 256)); | |
| 1072 | 355 local_imdct_window[i] = sum; |
| 332 | 356 } |
| 357 sum++; | |
| 358 for (i = 0; i < 256; i++) | |
| 1072 | 359 a52_imdct_window[i] = SAMPLE (sqrt (local_imdct_window[i] / sum)); |
| 332 | 360 |
| 361 for (i = 0; i < 3; i++) | |
| 1072 | 362 roots16[i] = SAMPLE (cos ((M_PI / 8) * (i + 1))); |
| 332 | 363 |
| 364 for (i = 0; i < 7; i++) | |
| 1072 | 365 roots32[i] = SAMPLE (cos ((M_PI / 16) * (i + 1))); |
| 332 | 366 |
| 367 for (i = 0; i < 15; i++) | |
| 1072 | 368 roots64[i] = SAMPLE (cos ((M_PI / 32) * (i + 1))); |
| 332 | 369 |
| 370 for (i = 0; i < 31; i++) | |
| 1072 | 371 roots128[i] = SAMPLE (cos ((M_PI / 64) * (i + 1))); |
| 332 | 372 |
| 373 for (i = 0; i < 64; i++) { | |
| 374 k = fftorder[i] / 2 + 64; | |
| 1072 | 375 pre1[i].real = SAMPLE (cos ((M_PI / 256) * (k - 0.25))); |
| 376 pre1[i].imag = SAMPLE (sin ((M_PI / 256) * (k - 0.25))); | |
| 332 | 377 } |
| 378 | |
| 379 for (i = 64; i < 128; i++) { | |
| 380 k = fftorder[i] / 2 + 64; | |
| 1072 | 381 pre1[i].real = SAMPLE (-cos ((M_PI / 256) * (k - 0.25))); |
| 382 pre1[i].imag = SAMPLE (-sin ((M_PI / 256) * (k - 0.25))); | |
| 332 | 383 } |
| 384 | |
| 385 for (i = 0; i < 64; i++) { | |
| 1072 | 386 post1[i].real = SAMPLE (cos ((M_PI / 256) * (i + 0.5))); |
| 387 post1[i].imag = SAMPLE (sin ((M_PI / 256) * (i + 0.5))); | |
| 332 | 388 } |
| 389 | |
| 390 for (i = 0; i < 64; i++) { | |
| 391 k = fftorder[i] / 4; | |
| 1072 | 392 pre2[i].real = SAMPLE (cos ((M_PI / 128) * (k - 0.25))); |
| 393 pre2[i].imag = SAMPLE (sin ((M_PI / 128) * (k - 0.25))); | |
| 332 | 394 } |
| 395 | |
| 396 for (i = 0; i < 32; i++) { | |
| 1072 | 397 post2[i].real = SAMPLE (cos ((M_PI / 128) * (i + 0.5))); |
| 398 post2[i].imag = SAMPLE (sin ((M_PI / 128) * (i + 0.5))); | |
| 332 | 399 } |
| 400 | |
| 401 #ifdef LIBA52_DJBFFT | |
| 402 if (mm_accel & MM_ACCEL_DJBFFT) { | |
| 403 ifft128 = (void (*) (complex_t *)) fftc4_un128; | |
| 404 ifft64 = (void (*) (complex_t *)) fftc4_un64; | |
| 405 } else | |
| 406 #endif | |
| 407 { | |
| 408 ifft128 = ifft128_c; | |
| 409 ifft64 = ifft64_c; | |
| 410 } | |
| 411 } |
