Mercurial > libavcodec.hg
diff dca.c @ 11608:bd4754d81e42 libavcodec
DCA: simplify lfe_interpolation_fir()
This reorders the lfe_fir tables, and drops the mirrored half,
such that the loops in lfe_interpolation_fir() can be simplified.
The new loop structure should be easier to implement with SIMD.
Static data size is reduced by 2kB.
3% faster on Cortex-A8.
| author | mru |
|---|---|
| date | Mon, 12 Apr 2010 11:14:55 +0000 |
| parents | 554450259db8 |
| children | a207cc043de8 |
line wrap: on
line diff
--- a/dca.c Mon Apr 12 11:14:51 2010 +0000 +++ b/dca.c Mon Apr 12 11:14:55 2010 +0000 @@ -802,28 +802,37 @@ int decifactor, k, j; const float *prCoeff; - - int interp_index = 0; /* Index to the interpolated samples */ int deciindex; /* Select decimation filter */ if (decimation_select == 1) { - decifactor = 128; + decifactor = 64; prCoeff = lfe_fir_128; } else { - decifactor = 64; + decifactor = 32; prCoeff = lfe_fir_64; } /* Interpolation */ for (deciindex = 0; deciindex < num_deci_sample; deciindex++) { - /* One decimated sample generates decifactor interpolated ones */ + float *samples_out2 = samples_out + decifactor; + const float *cf0 = prCoeff; + const float *cf1 = prCoeff + 256; + + /* One decimated sample generates 2*decifactor interpolated ones */ for (k = 0; k < decifactor; k++) { - float rTmp = 0.0; - //FIXME the coeffs are symetric, fix that - for (j = 0; j < 512 / decifactor; j++) - rTmp += samples_in[deciindex - j] * prCoeff[k + j * decifactor]; - samples_out[interp_index++] = (rTmp * scale) + bias; + float v0 = 0.0; + float v1 = 0.0; + for (j = 0; j < 256 / decifactor; j++) { + float s = samples_in[-j]; + v0 += s * *cf0++; + v1 += s * *--cf1; + } + *samples_out++ = (v0 * scale) + bias; + *samples_out2++ = (v1 * scale) + bias; } + + samples_in++; + samples_out += decifactor; } }
