Mercurial > libavcodec.hg
comparison dca.c @ 11617:bb17732c00ef libavcodec
DCA: break out lfe_interpolation_fir() inner loops to a function
This enables SIMD optimisations of this function.
| author | mru |
|---|---|
| date | Mon, 12 Apr 2010 20:45:25 +0000 |
| parents | a207cc043de8 |
| children | 1492bdc1d9d0 |
comparison
equal
deleted
inserted
replaced
| 11616:1461e6044153 | 11617:bb17732c00ef |
|---|---|
| 39 #include "put_bits.h" | 39 #include "put_bits.h" |
| 40 #include "dcadata.h" | 40 #include "dcadata.h" |
| 41 #include "dcahuff.h" | 41 #include "dcahuff.h" |
| 42 #include "dca.h" | 42 #include "dca.h" |
| 43 #include "synth_filter.h" | 43 #include "synth_filter.h" |
| 44 #include "dcadsp.h" | |
| 44 | 45 |
| 45 //#define TRACE | 46 //#define TRACE |
| 46 | 47 |
| 47 #define DCA_PRIM_CHANNELS_MAX (5) | 48 #define DCA_PRIM_CHANNELS_MAX (5) |
| 48 #define DCA_SUBBANDS (32) | 49 #define DCA_SUBBANDS (32) |
| 254 | 255 |
| 255 int debug_flag; ///< used for suppressing repeated error messages output | 256 int debug_flag; ///< used for suppressing repeated error messages output |
| 256 DSPContext dsp; | 257 DSPContext dsp; |
| 257 FFTContext imdct; | 258 FFTContext imdct; |
| 258 SynthFilterContext synth; | 259 SynthFilterContext synth; |
| 260 DCADSPContext dcadsp; | |
| 259 } DCAContext; | 261 } DCAContext; |
| 260 | 262 |
| 261 static const uint16_t dca_vlc_offs[] = { | 263 static const uint16_t dca_vlc_offs[] = { |
| 262 0, 512, 640, 768, 1282, 1794, 2436, 3080, 3770, 4454, 5364, | 264 0, 512, 640, 768, 1282, 1794, 2436, 3080, 3770, 4454, 5364, |
| 263 5372, 5380, 5388, 5392, 5396, 5412, 5420, 5428, 5460, 5492, 5508, | 265 5372, 5380, 5388, 5392, 5396, 5412, 5420, 5428, 5460, 5492, 5508, |
| 786 samples_out+= 32; | 788 samples_out+= 32; |
| 787 | 789 |
| 788 } | 790 } |
| 789 } | 791 } |
| 790 | 792 |
| 791 static void lfe_interpolation_fir(int decimation_select, | 793 static void lfe_interpolation_fir(DCAContext *s, int decimation_select, |
| 792 int num_deci_sample, float *samples_in, | 794 int num_deci_sample, float *samples_in, |
| 793 float *samples_out, float scale, | 795 float *samples_out, float scale, |
| 794 float bias) | 796 float bias) |
| 795 { | 797 { |
| 796 /* samples_in: An array holding decimated samples. | 798 /* samples_in: An array holding decimated samples. |
| 799 * from last subframe as history. | 801 * from last subframe as history. |
| 800 * | 802 * |
| 801 * samples_out: An array holding interpolated samples | 803 * samples_out: An array holding interpolated samples |
| 802 */ | 804 */ |
| 803 | 805 |
| 804 int decifactor, k, j; | 806 int decifactor; |
| 805 const float *prCoeff; | 807 const float *prCoeff; |
| 806 int deciindex; | 808 int deciindex; |
| 807 | 809 |
| 808 /* Select decimation filter */ | 810 /* Select decimation filter */ |
| 809 if (decimation_select == 1) { | 811 if (decimation_select == 1) { |
| 813 decifactor = 32; | 815 decifactor = 32; |
| 814 prCoeff = lfe_fir_64; | 816 prCoeff = lfe_fir_64; |
| 815 } | 817 } |
| 816 /* Interpolation */ | 818 /* Interpolation */ |
| 817 for (deciindex = 0; deciindex < num_deci_sample; deciindex++) { | 819 for (deciindex = 0; deciindex < num_deci_sample; deciindex++) { |
| 818 float *samples_out2 = samples_out + decifactor; | 820 s->dcadsp.lfe_fir(samples_out, samples_in, prCoeff, decifactor, |
| 819 const float *cf0 = prCoeff; | 821 scale, bias); |
| 820 const float *cf1 = prCoeff + 256; | |
| 821 | |
| 822 /* One decimated sample generates 2*decifactor interpolated ones */ | |
| 823 for (k = 0; k < decifactor; k++) { | |
| 824 float v0 = 0.0; | |
| 825 float v1 = 0.0; | |
| 826 for (j = 0; j < 256 / decifactor; j++) { | |
| 827 float s = samples_in[-j]; | |
| 828 v0 += s * *cf0++; | |
| 829 v1 += s * *--cf1; | |
| 830 } | |
| 831 *samples_out++ = (v0 * scale) + bias; | |
| 832 *samples_out2++ = (v1 * scale) + bias; | |
| 833 } | |
| 834 | |
| 835 samples_in++; | 822 samples_in++; |
| 836 samples_out += decifactor; | 823 samples_out += 2 * decifactor; |
| 837 } | 824 } |
| 838 } | 825 } |
| 839 | 826 |
| 840 /* downmixing routines */ | 827 /* downmixing routines */ |
| 841 #define MIX_REAR1(samples, si1, rs, coef) \ | 828 #define MIX_REAR1(samples, si1, rs, coef) \ |
| 1081 | 1068 |
| 1082 /* Generate LFE samples for this subsubframe FIXME!!! */ | 1069 /* Generate LFE samples for this subsubframe FIXME!!! */ |
| 1083 if (s->output & DCA_LFE) { | 1070 if (s->output & DCA_LFE) { |
| 1084 int lfe_samples = 2 * s->lfe * s->subsubframes; | 1071 int lfe_samples = 2 * s->lfe * s->subsubframes; |
| 1085 | 1072 |
| 1086 lfe_interpolation_fir(s->lfe, 2 * s->lfe, | 1073 lfe_interpolation_fir(s, s->lfe, 2 * s->lfe, |
| 1087 s->lfe_data + lfe_samples + | 1074 s->lfe_data + lfe_samples + |
| 1088 2 * s->lfe * subsubframe, | 1075 2 * s->lfe * subsubframe, |
| 1089 &s->samples[256 * dca_lfe_index[s->amode]], | 1076 &s->samples[256 * dca_lfe_index[s->amode]], |
| 1090 (1.0/256.0)*s->scale_bias, s->add_bias); | 1077 (1.0/256.0)*s->scale_bias, s->add_bias); |
| 1091 /* Outputs 20bits pcm samples */ | 1078 /* Outputs 20bits pcm samples */ |
| 1311 dca_init_vlcs(); | 1298 dca_init_vlcs(); |
| 1312 | 1299 |
| 1313 dsputil_init(&s->dsp, avctx); | 1300 dsputil_init(&s->dsp, avctx); |
| 1314 ff_mdct_init(&s->imdct, 6, 1, 1.0); | 1301 ff_mdct_init(&s->imdct, 6, 1, 1.0); |
| 1315 ff_synth_filter_init(&s->synth); | 1302 ff_synth_filter_init(&s->synth); |
| 1303 ff_dcadsp_init(&s->dcadsp); | |
| 1316 | 1304 |
| 1317 for(i = 0; i < 6; i++) | 1305 for(i = 0; i < 6; i++) |
| 1318 s->samples_chanptr[i] = s->samples + i * 256; | 1306 s->samples_chanptr[i] = s->samples + i * 256; |
| 1319 avctx->sample_fmt = SAMPLE_FMT_S16; | 1307 avctx->sample_fmt = SAMPLE_FMT_S16; |
| 1320 | 1308 |
