Mercurial > mplayer.hg
comparison liba52/liba52_changes.diff @ 18721:722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
part 2: cosmetic changes
| author | rathann |
|---|---|
| date | Thu, 15 Jun 2006 22:59:40 +0000 |
| parents | 4bad7f00556e |
| children | 9960f9ef96fd |
comparison
equal
deleted
inserted
replaced
| 18720:4bad7f00556e | 18721:722ac20fac5f |
|---|---|
| 33 void a52_bit_allocate (a52_state_t * state, ba_t * ba, int bndstart, | 33 void a52_bit_allocate (a52_state_t * state, ba_t * ba, int bndstart, |
| 34 int start, int end, int fastleak, int slowleak, | 34 int start, int end, int fastleak, int slowleak, |
| 35 expbap_t * expbap); | 35 expbap_t * expbap); |
| 36 | 36 |
| 37 int a52_downmix_init (int input, int flags, sample_t * level, | 37 int a52_downmix_init (int input, int flags, sample_t * level, |
| 38 sample_t clev, sample_t slev); | 38 sample_t clev, sample_t slev); |
| 39 +void downmix_accel_init(uint32_t mm_accel); | 39 +void downmix_accel_init(uint32_t mm_accel); |
| 40 int a52_downmix_coeff (sample_t * coeff, int acmod, int output, sample_t level, | 40 int a52_downmix_coeff (sample_t * coeff, int acmod, int output, sample_t level, |
| 41 sample_t clev, sample_t slev); | 41 sample_t clev, sample_t slev); |
| 42 -void a52_downmix (sample_t * samples, int acmod, int output, sample_t bias, | 42 -void a52_downmix (sample_t * samples, int acmod, int output, sample_t bias, |
| 43 +extern void (*a52_downmix) (sample_t * samples, int acmod, int output, sample_t bias, | 43 +extern void (*a52_downmix) (sample_t * samples, int acmod, int output, sample_t bias, |
| 44 sample_t clev, sample_t slev); | 44 sample_t clev, sample_t slev); |
| 45 -void a52_upmix (sample_t * samples, int acmod, int output); | 45 -void a52_upmix (sample_t * samples, int acmod, int output); |
| 46 +extern void (*a52_upmix) (sample_t * samples, int acmod, int output); | 46 +extern void (*a52_upmix) (sample_t * samples, int acmod, int output); |
| 47 | 47 |
| 48 void a52_imdct_init (uint32_t mm_accel); | 48 void a52_imdct_init (uint32_t mm_accel); |
| 49 void a52_imdct_256 (sample_t * data, sample_t * delay, sample_t bias); | 49 void a52_imdct_256 (sample_t * data, sample_t * delay, sample_t bias); |
| 118 +} | 118 +} |
| 119 +# endif | 119 +# endif |
| 120 +#endif //!ARCH_X86 | 120 +#endif //!ARCH_X86 |
| 121 + | 121 + |
| 122 +#endif | 122 +#endif |
| 123 + | 123 + |
| 124 /* (stolen from the kernel) */ | 124 /* (stolen from the kernel) */ |
| 125 #ifdef WORDS_BIGENDIAN | 125 #ifdef WORDS_BIGENDIAN |
| 126 | 126 |
| 127 @@ -28,7 +74,7 @@ | 127 @@ -28,7 +74,7 @@ |
| 128 | 128 |
| 252 + if(mm_accel & MM_ACCEL_X86_MMX) a52_upmix= upmix_MMX; | 252 + if(mm_accel & MM_ACCEL_X86_MMX) a52_upmix= upmix_MMX; |
| 253 + if(mm_accel & MM_ACCEL_X86_SSE) a52_downmix= downmix_SSE; | 253 + if(mm_accel & MM_ACCEL_X86_SSE) a52_downmix= downmix_SSE; |
| 254 + if(mm_accel & MM_ACCEL_X86_3DNOW) a52_downmix= downmix_3dnow; | 254 + if(mm_accel & MM_ACCEL_X86_3DNOW) a52_downmix= downmix_3dnow; |
| 255 +#endif | 255 +#endif |
| 256 +} | 256 +} |
| 257 + | 257 + |
| 258 int a52_downmix_init (int input, int flags, sample_t * level, | 258 int a52_downmix_init (int input, int flags, sample_t * level, |
| 259 sample_t clev, sample_t slev) | 259 sample_t clev, sample_t slev) |
| 260 { | 260 { |
| 261 @@ -451,7 +480,7 @@ | 261 @@ -451,7 +480,7 @@ |
| 262 samples[i] = 0; | 262 samples[i] = 0; |
| 263 } | 263 } |
| 264 | 264 |
| 265 -void a52_downmix (sample_t * samples, int acmod, int output, sample_t bias, | 265 -void a52_downmix (sample_t * samples, int acmod, int output, sample_t bias, |
| 266 +void downmix_C (sample_t * samples, int acmod, int output, sample_t bias, | 266 +void downmix_C (sample_t * samples, int acmod, int output, sample_t bias, |
| 267 sample_t clev, sample_t slev) | 267 sample_t clev, sample_t slev) |
| 268 { | 268 { |
| 269 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | 269 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { |
| 270 @@ -563,7 +592,7 @@ | 270 @@ -563,7 +592,7 @@ |
| 271 break; | 271 break; |
| 272 | 272 |
| 1531 +static float __attribute__((aligned(16))) sseW5[128]; | 1531 +static float __attribute__((aligned(16))) sseW5[128]; |
| 1532 +static float __attribute__((aligned(16))) sseW6[256]; | 1532 +static float __attribute__((aligned(16))) sseW6[256]; |
| 1533 +static float __attribute__((aligned(16))) *sseW[7]= | 1533 +static float __attribute__((aligned(16))) *sseW[7]= |
| 1534 + {NULL /*sseW0*/,sseW1,sseW2,sseW3,sseW4,sseW5,sseW6}; | 1534 + {NULL /*sseW0*/,sseW1,sseW2,sseW3,sseW4,sseW5,sseW6}; |
| 1535 +static float __attribute__((aligned(16))) sseWindow[512]; | 1535 +static float __attribute__((aligned(16))) sseWindow[512]; |
| 1536 +#endif | 1536 +#endif |
| 1537 + | 1537 + |
| 1538 /* Root values for IFFT */ | 1538 /* Root values for IFFT */ |
| 1539 static sample_t roots16[3]; | 1539 static sample_t roots16[3]; |
| 1540 static sample_t roots32[7]; | 1540 static sample_t roots32[7]; |
| 1541 @@ -245,7 +322,7 @@ | 1541 @@ -245,7 +322,7 @@ |
| 1542 ifft_pass (buf, roots128 - 32, 32); | 1542 ifft_pass (buf, roots128 - 32, 32); |
| 1543 } | 1543 } |
| 1544 | 1544 |
| 1545 -void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias) | 1545 -void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias) |
| 1546 +void imdct_do_512 (sample_t * data, sample_t * delay, sample_t bias) | 1546 +void imdct_do_512 (sample_t * data, sample_t * delay, sample_t bias) |
| 1547 { | 1547 { |
| 1548 int i, k; | 1548 int i, k; |
| 1971 + "movlps %%xmm2, (%1, %%"REG_d", 8) \n\t" | 1971 + "movlps %%xmm2, (%1, %%"REG_d", 8) \n\t" |
| 1972 + "movhps %%xmm2, (%1, %%"REG_BP", 8) \n\t" | 1972 + "movhps %%xmm2, (%1, %%"REG_BP", 8) \n\t" |
| 1973 + "add $16, %%"REG_S" \n\t" | 1973 + "add $16, %%"REG_S" \n\t" |
| 1974 + "add $2, %%"REG_a" \n\t" // avoid complex addressing for P4 crap | 1974 + "add $2, %%"REG_a" \n\t" // avoid complex addressing for P4 crap |
| 1975 + "sub $16, %%"REG_D" \n\t" | 1975 + "sub $16, %%"REG_D" \n\t" |
| 1976 + " jnc 1b \n\t" | 1976 + "jnc 1b \n\t" |
| 1977 + "pop %%"REG_BP" \n\t"//no we didnt touch ebp *g* | 1977 + "pop %%"REG_BP" \n\t"//no we didnt touch ebp *g* |
| 1978 + :: "b" (data), "c" (buf) | 1978 + :: "b" (data), "c" (buf) |
| 1979 + : "%"REG_S, "%"REG_D, "%"REG_a, "%"REG_d | 1979 + : "%"REG_S, "%"REG_D, "%"REG_a, "%"REG_d |
| 1980 + ); | 1980 + ); |
| 1981 + | 1981 + |
| 2123 + "subps %%xmm2, %%xmm0 \n\t" | 2123 + "subps %%xmm2, %%xmm0 \n\t" |
| 2124 + "movaps %%xmm1, (%%"REG_S", %%"REG_D") \n\t" | 2124 + "movaps %%xmm1, (%%"REG_S", %%"REG_D") \n\t" |
| 2125 + "movaps %%xmm0, (%%"REG_d", %%"REG_D") \n\t" | 2125 + "movaps %%xmm0, (%%"REG_d", %%"REG_D") \n\t" |
| 2126 + "add $16, %%"REG_D" \n\t" | 2126 + "add $16, %%"REG_D" \n\t" |
| 2127 + "cmp %3, %%"REG_D" \n\t" //FIXME (opt) count against 0 | 2127 + "cmp %3, %%"REG_D" \n\t" //FIXME (opt) count against 0 |
| 2128 + " jb 2b \n\t" | 2128 + "jb 2b \n\t" |
| 2129 + "add %2, %%"REG_S" \n\t" | 2129 + "add %2, %%"REG_S" \n\t" |
| 2130 + "cmp %1, %%"REG_S" \n\t" | 2130 + "cmp %1, %%"REG_S" \n\t" |
| 2131 + " jb 1b \n\t" | 2131 + " jb 1b \n\t" |
| 2132 + :: "g" (buf), "m" (buf_offset), "m" (two_m_plus_one_shl3), "r" (two_m<<3), | 2132 + :: "g" (buf), "m" (buf_offset), "m" (two_m_plus_one_shl3), "r" (two_m<<3), |
| 2133 + "r" (sseW[m]) | 2133 + "r" (sseW[m]) |
| 2263 { | 2263 { |
| 2264 int i, k; | 2264 int i, k; |
| 2265 @@ -368,7 +1153,7 @@ | 2265 @@ -368,7 +1153,7 @@ |
| 2266 | 2266 |
| 2267 void a52_imdct_init (uint32_t mm_accel) | 2267 void a52_imdct_init (uint32_t mm_accel) |
| 2268 { | 2268 { |
| 2269 - int i, k; | 2269 - int i, k; |
| 2270 + int i, j, k; | 2270 + int i, j, k; |
| 2271 double sum; | 2271 double sum; |
| 2272 | 2272 |
| 2273 /* compute imdct window - kaiser-bessel derived window, alpha = 5.0 */ | 2273 /* compute imdct window - kaiser-bessel derived window, alpha = 5.0 */ |
| 2274 @@ -420,6 +1205,99 @@ | 2274 @@ -420,6 +1205,99 @@ |
| 2275 post2[i].real = cos ((M_PI / 128) * (i + 0.5)); | 2275 post2[i].real = cos ((M_PI / 128) * (i + 0.5)); |
| 2276 post2[i].imag = sin ((M_PI / 128) * (i + 0.5)); | 2276 post2[i].imag = sin ((M_PI / 128) * (i + 0.5)); |
| 2277 } | 2277 } |
| 2278 + for (i = 0; i < 128; i++) { | 2278 + for (i = 0; i < 128; i++) { |
| 2279 + xcos1[i] = -cos ((M_PI / 2048) * (8 * i + 1)); | 2279 + xcos1[i] = -cos ((M_PI / 2048) * (8 * i + 1)); |
| 2280 + xsin1[i] = -sin ((M_PI / 2048) * (8 * i + 1)); | 2280 + xsin1[i] = -sin ((M_PI / 2048) * (8 * i + 1)); |
| 2281 + } | 2281 + } |
| 2282 + for (i = 0; i < 7; i++) { | 2282 + for (i = 0; i < 7; i++) { |
| 2343 +#if defined(ARCH_X86) || defined(ARCH_X86_64) | 2343 +#if defined(ARCH_X86) || defined(ARCH_X86_64) |
| 2344 + if(mm_accel & MM_ACCEL_X86_SSE) | 2344 + if(mm_accel & MM_ACCEL_X86_SSE) |
| 2345 + { | 2345 + { |
| 2346 + fprintf (stderr, "Using SSE optimized IMDCT transform\n"); | 2346 + fprintf (stderr, "Using SSE optimized IMDCT transform\n"); |
| 2347 + a52_imdct_512 = imdct_do_512_sse; | 2347 + a52_imdct_512 = imdct_do_512_sse; |
| 2348 + } | 2348 + } |
| 2349 + else | 2349 + else |
| 2350 + if(mm_accel & MM_ACCEL_X86_3DNOWEXT) | 2350 + if(mm_accel & MM_ACCEL_X86_3DNOWEXT) |
| 2351 + { | 2351 + { |
| 2352 + fprintf (stderr, "Using 3DNowEx optimized IMDCT transform\n"); | 2352 + fprintf (stderr, "Using 3DNowEx optimized IMDCT transform\n"); |
| 2353 + a52_imdct_512 = imdct_do_512_3dnowex; | 2353 + a52_imdct_512 = imdct_do_512_3dnowex; |
| 2364 + if (mm_accel & MM_ACCEL_PPC_ALTIVEC) | 2364 + if (mm_accel & MM_ACCEL_PPC_ALTIVEC) |
| 2365 + { | 2365 + { |
| 2366 + fprintf(stderr, "Using AltiVec optimized IMDCT transform\n"); | 2366 + fprintf(stderr, "Using AltiVec optimized IMDCT transform\n"); |
| 2367 + a52_imdct_512 = imdct_do_512_altivec; | 2367 + a52_imdct_512 = imdct_do_512_altivec; |
| 2368 + } | 2368 + } |
| 2369 + else | 2369 + else |
| 2370 +#endif | 2370 +#endif |
| 2371 | 2371 |
| 2372 #ifdef LIBA52_DJBFFT | 2372 #ifdef LIBA52_DJBFFT |
| 2373 if (mm_accel & MM_ACCEL_DJBFFT) { | 2373 if (mm_accel & MM_ACCEL_DJBFFT) { |
| 2374 @@ -430,7 +1308,5 @@ | 2374 @@ -430,7 +1308,5 @@ |
| 2435 +#endif | 2435 +#endif |
| 2436 + if(((int)state->samples%16) && (mm_accel&MM_ACCEL_X86_SSE)){ | 2436 + if(((int)state->samples%16) && (mm_accel&MM_ACCEL_X86_SSE)){ |
| 2437 + mm_accel &=~MM_ACCEL_X86_SSE; | 2437 + mm_accel &=~MM_ACCEL_X86_SSE; |
| 2438 + fprintf(stderr, "liba52: unable to get 16 byte aligned memory disabling usage of SSE instructions\n"); | 2438 + fprintf(stderr, "liba52: unable to get 16 byte aligned memory disabling usage of SSE instructions\n"); |
| 2439 + } | 2439 + } |
| 2440 + | 2440 + |
| 2441 if (state->samples == NULL) { | 2441 if (state->samples == NULL) { |
| 2442 free (state); | 2442 free (state); |
| 2443 return NULL; | 2443 return NULL; |
| 2444 @@ -74,6 +92,7 @@ | 2444 @@ -74,6 +92,7 @@ |
| 2445 state->lfsr_state = 1; | 2445 state->lfsr_state = 1; |
