comparison liba52/liba52_changes.diff @ 18721:722ac20fac5f

sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com< part 2: cosmetic changes
author rathann
date Thu, 15 Jun 2006 22:59:40 +0000
parents 4bad7f00556e
children 9960f9ef96fd
comparison
equal deleted inserted replaced
18720:4bad7f00556e 18721:722ac20fac5f
33 void a52_bit_allocate (a52_state_t * state, ba_t * ba, int bndstart, 33 void a52_bit_allocate (a52_state_t * state, ba_t * ba, int bndstart,
34 int start, int end, int fastleak, int slowleak, 34 int start, int end, int fastleak, int slowleak,
35 expbap_t * expbap); 35 expbap_t * expbap);
36 36
37 int a52_downmix_init (int input, int flags, sample_t * level, 37 int a52_downmix_init (int input, int flags, sample_t * level,
38 sample_t clev, sample_t slev); 38 sample_t clev, sample_t slev);
39 +void downmix_accel_init(uint32_t mm_accel); 39 +void downmix_accel_init(uint32_t mm_accel);
40 int a52_downmix_coeff (sample_t * coeff, int acmod, int output, sample_t level, 40 int a52_downmix_coeff (sample_t * coeff, int acmod, int output, sample_t level,
41 sample_t clev, sample_t slev); 41 sample_t clev, sample_t slev);
42 -void a52_downmix (sample_t * samples, int acmod, int output, sample_t bias, 42 -void a52_downmix (sample_t * samples, int acmod, int output, sample_t bias,
43 +extern void (*a52_downmix) (sample_t * samples, int acmod, int output, sample_t bias, 43 +extern void (*a52_downmix) (sample_t * samples, int acmod, int output, sample_t bias,
44 sample_t clev, sample_t slev); 44 sample_t clev, sample_t slev);
45 -void a52_upmix (sample_t * samples, int acmod, int output); 45 -void a52_upmix (sample_t * samples, int acmod, int output);
46 +extern void (*a52_upmix) (sample_t * samples, int acmod, int output); 46 +extern void (*a52_upmix) (sample_t * samples, int acmod, int output);
47 47
48 void a52_imdct_init (uint32_t mm_accel); 48 void a52_imdct_init (uint32_t mm_accel);
49 void a52_imdct_256 (sample_t * data, sample_t * delay, sample_t bias); 49 void a52_imdct_256 (sample_t * data, sample_t * delay, sample_t bias);
118 +} 118 +}
119 +# endif 119 +# endif
120 +#endif //!ARCH_X86 120 +#endif //!ARCH_X86
121 + 121 +
122 +#endif 122 +#endif
123 + 123 +
124 /* (stolen from the kernel) */ 124 /* (stolen from the kernel) */
125 #ifdef WORDS_BIGENDIAN 125 #ifdef WORDS_BIGENDIAN
126 126
127 @@ -28,7 +74,7 @@ 127 @@ -28,7 +74,7 @@
128 128
252 + if(mm_accel & MM_ACCEL_X86_MMX) a52_upmix= upmix_MMX; 252 + if(mm_accel & MM_ACCEL_X86_MMX) a52_upmix= upmix_MMX;
253 + if(mm_accel & MM_ACCEL_X86_SSE) a52_downmix= downmix_SSE; 253 + if(mm_accel & MM_ACCEL_X86_SSE) a52_downmix= downmix_SSE;
254 + if(mm_accel & MM_ACCEL_X86_3DNOW) a52_downmix= downmix_3dnow; 254 + if(mm_accel & MM_ACCEL_X86_3DNOW) a52_downmix= downmix_3dnow;
255 +#endif 255 +#endif
256 +} 256 +}
257 + 257 +
258 int a52_downmix_init (int input, int flags, sample_t * level, 258 int a52_downmix_init (int input, int flags, sample_t * level,
259 sample_t clev, sample_t slev) 259 sample_t clev, sample_t slev)
260 { 260 {
261 @@ -451,7 +480,7 @@ 261 @@ -451,7 +480,7 @@
262 samples[i] = 0; 262 samples[i] = 0;
263 } 263 }
264 264
265 -void a52_downmix (sample_t * samples, int acmod, int output, sample_t bias, 265 -void a52_downmix (sample_t * samples, int acmod, int output, sample_t bias,
266 +void downmix_C (sample_t * samples, int acmod, int output, sample_t bias, 266 +void downmix_C (sample_t * samples, int acmod, int output, sample_t bias,
267 sample_t clev, sample_t slev) 267 sample_t clev, sample_t slev)
268 { 268 {
269 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { 269 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
270 @@ -563,7 +592,7 @@ 270 @@ -563,7 +592,7 @@
271 break; 271 break;
272 272
1531 +static float __attribute__((aligned(16))) sseW5[128]; 1531 +static float __attribute__((aligned(16))) sseW5[128];
1532 +static float __attribute__((aligned(16))) sseW6[256]; 1532 +static float __attribute__((aligned(16))) sseW6[256];
1533 +static float __attribute__((aligned(16))) *sseW[7]= 1533 +static float __attribute__((aligned(16))) *sseW[7]=
1534 + {NULL /*sseW0*/,sseW1,sseW2,sseW3,sseW4,sseW5,sseW6}; 1534 + {NULL /*sseW0*/,sseW1,sseW2,sseW3,sseW4,sseW5,sseW6};
1535 +static float __attribute__((aligned(16))) sseWindow[512]; 1535 +static float __attribute__((aligned(16))) sseWindow[512];
1536 +#endif 1536 +#endif
1537 + 1537 +
1538 /* Root values for IFFT */ 1538 /* Root values for IFFT */
1539 static sample_t roots16[3]; 1539 static sample_t roots16[3];
1540 static sample_t roots32[7]; 1540 static sample_t roots32[7];
1541 @@ -245,7 +322,7 @@ 1541 @@ -245,7 +322,7 @@
1542 ifft_pass (buf, roots128 - 32, 32); 1542 ifft_pass (buf, roots128 - 32, 32);
1543 } 1543 }
1544 1544
1545 -void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias) 1545 -void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias)
1546 +void imdct_do_512 (sample_t * data, sample_t * delay, sample_t bias) 1546 +void imdct_do_512 (sample_t * data, sample_t * delay, sample_t bias)
1547 { 1547 {
1548 int i, k; 1548 int i, k;
1971 + "movlps %%xmm2, (%1, %%"REG_d", 8) \n\t" 1971 + "movlps %%xmm2, (%1, %%"REG_d", 8) \n\t"
1972 + "movhps %%xmm2, (%1, %%"REG_BP", 8) \n\t" 1972 + "movhps %%xmm2, (%1, %%"REG_BP", 8) \n\t"
1973 + "add $16, %%"REG_S" \n\t" 1973 + "add $16, %%"REG_S" \n\t"
1974 + "add $2, %%"REG_a" \n\t" // avoid complex addressing for P4 crap 1974 + "add $2, %%"REG_a" \n\t" // avoid complex addressing for P4 crap
1975 + "sub $16, %%"REG_D" \n\t" 1975 + "sub $16, %%"REG_D" \n\t"
1976 + " jnc 1b \n\t" 1976 + "jnc 1b \n\t"
1977 + "pop %%"REG_BP" \n\t"//no we didnt touch ebp *g* 1977 + "pop %%"REG_BP" \n\t"//no we didnt touch ebp *g*
1978 + :: "b" (data), "c" (buf) 1978 + :: "b" (data), "c" (buf)
1979 + : "%"REG_S, "%"REG_D, "%"REG_a, "%"REG_d 1979 + : "%"REG_S, "%"REG_D, "%"REG_a, "%"REG_d
1980 + ); 1980 + );
1981 + 1981 +
2123 + "subps %%xmm2, %%xmm0 \n\t" 2123 + "subps %%xmm2, %%xmm0 \n\t"
2124 + "movaps %%xmm1, (%%"REG_S", %%"REG_D") \n\t" 2124 + "movaps %%xmm1, (%%"REG_S", %%"REG_D") \n\t"
2125 + "movaps %%xmm0, (%%"REG_d", %%"REG_D") \n\t" 2125 + "movaps %%xmm0, (%%"REG_d", %%"REG_D") \n\t"
2126 + "add $16, %%"REG_D" \n\t" 2126 + "add $16, %%"REG_D" \n\t"
2127 + "cmp %3, %%"REG_D" \n\t" //FIXME (opt) count against 0 2127 + "cmp %3, %%"REG_D" \n\t" //FIXME (opt) count against 0
2128 + " jb 2b \n\t" 2128 + "jb 2b \n\t"
2129 + "add %2, %%"REG_S" \n\t" 2129 + "add %2, %%"REG_S" \n\t"
2130 + "cmp %1, %%"REG_S" \n\t" 2130 + "cmp %1, %%"REG_S" \n\t"
2131 + " jb 1b \n\t" 2131 + " jb 1b \n\t"
2132 + :: "g" (buf), "m" (buf_offset), "m" (two_m_plus_one_shl3), "r" (two_m<<3), 2132 + :: "g" (buf), "m" (buf_offset), "m" (two_m_plus_one_shl3), "r" (two_m<<3),
2133 + "r" (sseW[m]) 2133 + "r" (sseW[m])
2263 { 2263 {
2264 int i, k; 2264 int i, k;
2265 @@ -368,7 +1153,7 @@ 2265 @@ -368,7 +1153,7 @@
2266 2266
2267 void a52_imdct_init (uint32_t mm_accel) 2267 void a52_imdct_init (uint32_t mm_accel)
2268 { 2268 {
2269 - int i, k; 2269 - int i, k;
2270 + int i, j, k; 2270 + int i, j, k;
2271 double sum; 2271 double sum;
2272 2272
2273 /* compute imdct window - kaiser-bessel derived window, alpha = 5.0 */ 2273 /* compute imdct window - kaiser-bessel derived window, alpha = 5.0 */
2274 @@ -420,6 +1205,99 @@ 2274 @@ -420,6 +1205,99 @@
2275 post2[i].real = cos ((M_PI / 128) * (i + 0.5)); 2275 post2[i].real = cos ((M_PI / 128) * (i + 0.5));
2276 post2[i].imag = sin ((M_PI / 128) * (i + 0.5)); 2276 post2[i].imag = sin ((M_PI / 128) * (i + 0.5));
2277 } 2277 }
2278 + for (i = 0; i < 128; i++) { 2278 + for (i = 0; i < 128; i++) {
2279 + xcos1[i] = -cos ((M_PI / 2048) * (8 * i + 1)); 2279 + xcos1[i] = -cos ((M_PI / 2048) * (8 * i + 1));
2280 + xsin1[i] = -sin ((M_PI / 2048) * (8 * i + 1)); 2280 + xsin1[i] = -sin ((M_PI / 2048) * (8 * i + 1));
2281 + } 2281 + }
2282 + for (i = 0; i < 7; i++) { 2282 + for (i = 0; i < 7; i++) {
2343 +#if defined(ARCH_X86) || defined(ARCH_X86_64) 2343 +#if defined(ARCH_X86) || defined(ARCH_X86_64)
2344 + if(mm_accel & MM_ACCEL_X86_SSE) 2344 + if(mm_accel & MM_ACCEL_X86_SSE)
2345 + { 2345 + {
2346 + fprintf (stderr, "Using SSE optimized IMDCT transform\n"); 2346 + fprintf (stderr, "Using SSE optimized IMDCT transform\n");
2347 + a52_imdct_512 = imdct_do_512_sse; 2347 + a52_imdct_512 = imdct_do_512_sse;
2348 + } 2348 + }
2349 + else 2349 + else
2350 + if(mm_accel & MM_ACCEL_X86_3DNOWEXT) 2350 + if(mm_accel & MM_ACCEL_X86_3DNOWEXT)
2351 + { 2351 + {
2352 + fprintf (stderr, "Using 3DNowEx optimized IMDCT transform\n"); 2352 + fprintf (stderr, "Using 3DNowEx optimized IMDCT transform\n");
2353 + a52_imdct_512 = imdct_do_512_3dnowex; 2353 + a52_imdct_512 = imdct_do_512_3dnowex;
2364 + if (mm_accel & MM_ACCEL_PPC_ALTIVEC) 2364 + if (mm_accel & MM_ACCEL_PPC_ALTIVEC)
2365 + { 2365 + {
2366 + fprintf(stderr, "Using AltiVec optimized IMDCT transform\n"); 2366 + fprintf(stderr, "Using AltiVec optimized IMDCT transform\n");
2367 + a52_imdct_512 = imdct_do_512_altivec; 2367 + a52_imdct_512 = imdct_do_512_altivec;
2368 + } 2368 + }
2369 + else 2369 + else
2370 +#endif 2370 +#endif
2371 2371
2372 #ifdef LIBA52_DJBFFT 2372 #ifdef LIBA52_DJBFFT
2373 if (mm_accel & MM_ACCEL_DJBFFT) { 2373 if (mm_accel & MM_ACCEL_DJBFFT) {
2374 @@ -430,7 +1308,5 @@ 2374 @@ -430,7 +1308,5 @@
2435 +#endif 2435 +#endif
2436 + if(((int)state->samples%16) && (mm_accel&MM_ACCEL_X86_SSE)){ 2436 + if(((int)state->samples%16) && (mm_accel&MM_ACCEL_X86_SSE)){
2437 + mm_accel &=~MM_ACCEL_X86_SSE; 2437 + mm_accel &=~MM_ACCEL_X86_SSE;
2438 + fprintf(stderr, "liba52: unable to get 16 byte aligned memory disabling usage of SSE instructions\n"); 2438 + fprintf(stderr, "liba52: unable to get 16 byte aligned memory disabling usage of SSE instructions\n");
2439 + } 2439 + }
2440 + 2440 +
2441 if (state->samples == NULL) { 2441 if (state->samples == NULL) {
2442 free (state); 2442 free (state);
2443 return NULL; 2443 return NULL;
2444 @@ -74,6 +92,7 @@ 2444 @@ -74,6 +92,7 @@
2445 state->lfsr_state = 1; 2445 state->lfsr_state = 1;