Mercurial > libavcodec.hg
annotate fft.h @ 12510:ef2f2db5b7be libavcodec
Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
code directly also and remove loop setup. 20% faster in function, 0.8% overall.
See "[PATCH] unroll loop in h264_idct_add8_sse2()" thread on ML.
| author | rbultje |
|---|---|
| date | Fri, 24 Sep 2010 14:05:45 +0000 |
| parents | 00676ed9b822 |
| children |
| rev | line source |
|---|---|
| 11370 | 1 /* |
| 2 * Copyright (c) 2000, 2001, 2002 Fabrice Bellard | |
| 3 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> | |
| 4 * | |
| 5 * This file is part of FFmpeg. | |
| 6 * | |
| 7 * FFmpeg is free software; you can redistribute it and/or | |
| 8 * modify it under the terms of the GNU Lesser General Public | |
| 9 * License as published by the Free Software Foundation; either | |
| 10 * version 2.1 of the License, or (at your option) any later version. | |
| 11 * | |
| 12 * FFmpeg is distributed in the hope that it will be useful, | |
| 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 15 * Lesser General Public License for more details. | |
| 16 * | |
| 17 * You should have received a copy of the GNU Lesser General Public | |
| 18 * License along with FFmpeg; if not, write to the Free Software | |
| 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
| 20 */ | |
| 21 | |
| 22 #ifndef AVCODEC_FFT_H | |
| 23 #define AVCODEC_FFT_H | |
| 24 | |
| 25 #include <stdint.h> | |
| 26 #include "config.h" | |
| 27 #include "libavutil/mem.h" | |
| 11392 | 28 #include "avfft.h" |
| 11370 | 29 |
| 30 /* FFT computation */ | |
| 31 | |
| 11392 | 32 struct FFTContext { |
| 11370 | 33 int nbits; |
| 34 int inverse; | |
| 35 uint16_t *revtab; | |
| 36 FFTComplex *tmp_buf; | |
| 37 int mdct_size; /* size of MDCT (i.e. number of input data * 2) */ | |
| 12407 | 38 int mdct_bits; /* n = 2^nbits */ |
| 11370 | 39 /* pre/post rotation tables */ |
| 40 FFTSample *tcos; | |
| 41 FFTSample *tsin; | |
| 42 void (*fft_permute)(struct FFTContext *s, FFTComplex *z); | |
| 43 void (*fft_calc)(struct FFTContext *s, FFTComplex *z); | |
| 44 void (*imdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input); | |
| 45 void (*imdct_half)(struct FFTContext *s, FFTSample *output, const FFTSample *input); | |
| 46 void (*mdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input); | |
| 47 int permutation; | |
| 48 #define FF_MDCT_PERM_NONE 0 | |
| 49 #define FF_MDCT_PERM_INTERLEAVE 1 | |
| 11392 | 50 }; |
| 11370 | 51 |
| 52 #if CONFIG_HARDCODED_TABLES | |
| 53 #define COSTABLE_CONST const | |
| 54 #define SINTABLE_CONST const | |
| 55 #define SINETABLE_CONST const | |
| 56 #else | |
| 57 #define COSTABLE_CONST | |
| 58 #define SINTABLE_CONST | |
| 59 #define SINETABLE_CONST | |
| 60 #endif | |
| 61 | |
| 62 #define COSTABLE(size) \ | |
| 63 COSTABLE_CONST DECLARE_ALIGNED(16, FFTSample, ff_cos_##size)[size/2] | |
| 64 #define SINTABLE(size) \ | |
| 65 SINTABLE_CONST DECLARE_ALIGNED(16, FFTSample, ff_sin_##size)[size/2] | |
| 66 #define SINETABLE(size) \ | |
| 67 SINETABLE_CONST DECLARE_ALIGNED(16, float, ff_sine_##size)[size] | |
| 68 extern COSTABLE(16); | |
| 69 extern COSTABLE(32); | |
| 70 extern COSTABLE(64); | |
| 71 extern COSTABLE(128); | |
| 72 extern COSTABLE(256); | |
| 73 extern COSTABLE(512); | |
| 74 extern COSTABLE(1024); | |
| 75 extern COSTABLE(2048); | |
| 76 extern COSTABLE(4096); | |
| 77 extern COSTABLE(8192); | |
| 78 extern COSTABLE(16384); | |
| 79 extern COSTABLE(32768); | |
| 80 extern COSTABLE(65536); | |
| 81 extern COSTABLE_CONST FFTSample* const ff_cos_tabs[17]; | |
| 82 | |
| 83 /** | |
| 12024 | 84 * Initialize the cosine table in ff_cos_tabs[index] |
| 11370 | 85 * \param index index in ff_cos_tabs array of the table to initialize |
| 86 */ | |
| 87 void ff_init_ff_cos_tabs(int index); | |
| 88 | |
| 89 extern SINTABLE(16); | |
| 90 extern SINTABLE(32); | |
| 91 extern SINTABLE(64); | |
| 92 extern SINTABLE(128); | |
| 93 extern SINTABLE(256); | |
| 94 extern SINTABLE(512); | |
| 95 extern SINTABLE(1024); | |
| 96 extern SINTABLE(2048); | |
| 97 extern SINTABLE(4096); | |
| 98 extern SINTABLE(8192); | |
| 99 extern SINTABLE(16384); | |
| 100 extern SINTABLE(32768); | |
| 101 extern SINTABLE(65536); | |
| 102 | |
| 103 /** | |
| 12024 | 104 * Set up a complex FFT. |
| 11370 | 105 * @param nbits log2 of the length of the input array |
| 106 * @param inverse if 0 perform the forward transform, if 1 perform the inverse | |
| 107 */ | |
| 108 int ff_fft_init(FFTContext *s, int nbits, int inverse); | |
| 109 void ff_fft_permute_c(FFTContext *s, FFTComplex *z); | |
| 110 void ff_fft_calc_c(FFTContext *s, FFTComplex *z); | |
| 111 | |
| 112 void ff_fft_init_altivec(FFTContext *s); | |
| 113 void ff_fft_init_mmx(FFTContext *s); | |
| 114 void ff_fft_init_arm(FFTContext *s); | |
| 12099 | 115 void ff_dct_init_mmx(DCTContext *s); |
| 11370 | 116 |
| 117 /** | |
| 118 * Do the permutation needed BEFORE calling ff_fft_calc(). | |
| 119 */ | |
| 120 static inline void ff_fft_permute(FFTContext *s, FFTComplex *z) | |
| 121 { | |
| 122 s->fft_permute(s, z); | |
| 123 } | |
| 124 /** | |
| 125 * Do a complex FFT with the parameters defined in ff_fft_init(). The | |
| 126 * input data must be permuted before. No 1.0/sqrt(n) normalization is done. | |
| 127 */ | |
| 128 static inline void ff_fft_calc(FFTContext *s, FFTComplex *z) | |
| 129 { | |
| 130 s->fft_calc(s, z); | |
| 131 } | |
| 132 void ff_fft_end(FFTContext *s); | |
| 133 | |
| 134 /* MDCT computation */ | |
| 135 | |
| 136 static inline void ff_imdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input) | |
| 137 { | |
| 138 s->imdct_calc(s, output, input); | |
| 139 } | |
| 140 static inline void ff_imdct_half(FFTContext *s, FFTSample *output, const FFTSample *input) | |
| 141 { | |
| 142 s->imdct_half(s, output, input); | |
| 143 } | |
| 144 | |
| 145 static inline void ff_mdct_calc(FFTContext *s, FFTSample *output, | |
| 146 const FFTSample *input) | |
| 147 { | |
| 148 s->mdct_calc(s, output, input); | |
| 149 } | |
| 150 | |
| 151 /** | |
|
11944
052b9c58ccc4
Remove VLA in ff_kbd_window_init, limit window size to 1024
mru
parents:
11535
diff
changeset
|
152 * Maximum window size for ff_kbd_window_init. |
|
052b9c58ccc4
Remove VLA in ff_kbd_window_init, limit window size to 1024
mru
parents:
11535
diff
changeset
|
153 */ |
|
052b9c58ccc4
Remove VLA in ff_kbd_window_init, limit window size to 1024
mru
parents:
11535
diff
changeset
|
154 #define FF_KBD_WINDOW_MAX 1024 |
|
052b9c58ccc4
Remove VLA in ff_kbd_window_init, limit window size to 1024
mru
parents:
11535
diff
changeset
|
155 |
|
052b9c58ccc4
Remove VLA in ff_kbd_window_init, limit window size to 1024
mru
parents:
11535
diff
changeset
|
156 /** |
| 11370 | 157 * Generate a Kaiser-Bessel Derived Window. |
| 158 * @param window pointer to half window | |
| 159 * @param alpha determines window shape | |
|
11944
052b9c58ccc4
Remove VLA in ff_kbd_window_init, limit window size to 1024
mru
parents:
11535
diff
changeset
|
160 * @param n size of half window, max FF_KBD_WINDOW_MAX |
| 11370 | 161 */ |
| 162 void ff_kbd_window_init(float *window, float alpha, int n); | |
| 163 | |
| 164 /** | |
| 165 * Generate a sine window. | |
| 166 * @param window pointer to half window | |
| 167 * @param n size of half window | |
| 168 */ | |
| 169 void ff_sine_window_init(float *window, int n); | |
| 170 | |
| 171 /** | |
| 172 * initialize the specified entry of ff_sine_windows | |
| 173 */ | |
| 174 void ff_init_ff_sine_windows(int index); | |
| 175 extern SINETABLE( 32); | |
| 176 extern SINETABLE( 64); | |
| 177 extern SINETABLE( 128); | |
| 178 extern SINETABLE( 256); | |
| 179 extern SINETABLE( 512); | |
| 180 extern SINETABLE(1024); | |
| 181 extern SINETABLE(2048); | |
| 182 extern SINETABLE(4096); | |
| 183 extern SINETABLE_CONST float * const ff_sine_windows[13]; | |
| 184 | |
| 185 int ff_mdct_init(FFTContext *s, int nbits, int inverse, double scale); | |
| 186 void ff_imdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input); | |
| 187 void ff_imdct_half_c(FFTContext *s, FFTSample *output, const FFTSample *input); | |
| 188 void ff_mdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input); | |
| 189 void ff_mdct_end(FFTContext *s); | |
| 190 | |
| 191 /* Real Discrete Fourier Transform */ | |
| 192 | |
| 11392 | 193 struct RDFTContext { |
| 11370 | 194 int nbits; |
| 195 int inverse; | |
| 196 int sign_convention; | |
| 197 | |
| 198 /* pre/post rotation tables */ | |
| 199 const FFTSample *tcos; | |
| 200 SINTABLE_CONST FFTSample *tsin; | |
| 201 FFTContext fft; | |
| 11512 | 202 void (*rdft_calc)(struct RDFTContext *s, FFTSample *z); |
| 11392 | 203 }; |
| 11370 | 204 |
| 205 /** | |
| 12024 | 206 * Set up a real FFT. |
| 11370 | 207 * @param nbits log2 of the length of the input array |
| 208 * @param trans the type of transform | |
| 209 */ | |
| 210 int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans); | |
| 211 void ff_rdft_end(RDFTContext *s); | |
| 212 | |
| 11532 | 213 void ff_rdft_init_arm(RDFTContext *s); |
| 214 | |
| 11512 | 215 static av_always_inline void ff_rdft_calc(RDFTContext *s, FFTSample *data) |
| 216 { | |
| 217 s->rdft_calc(s, data); | |
| 218 } | |
| 219 | |
| 11370 | 220 /* Discrete Cosine Transform */ |
| 221 | |
| 11392 | 222 struct DCTContext { |
| 11370 | 223 int nbits; |
| 224 int inverse; | |
| 225 RDFTContext rdft; | |
| 226 const float *costab; | |
| 227 FFTSample *csc2; | |
|
11518
c4d18d452f82
Call DCT by function pointer. Needed for any future ASM implementation and
vitor
parents:
11516
diff
changeset
|
228 void (*dct_calc)(struct DCTContext *s, FFTSample *data); |
|
12026
3f3d08bb5cf8
More mp{1,2,3} 32-point DCT transform to our common DCT framework.
vitor
parents:
12024
diff
changeset
|
229 void (*dct32)(FFTSample *out, const FFTSample *in); |
| 11392 | 230 }; |
| 11370 | 231 |
| 232 /** | |
| 12024 | 233 * Set up DCT. |
|
11535
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11532
diff
changeset
|
234 * @param nbits size of the input array: |
|
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11532
diff
changeset
|
235 * (1 << nbits) for DCT-II, DCT-III and DST-I |
|
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11532
diff
changeset
|
236 * (1 << nbits) + 1 for DCT-I |
|
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11532
diff
changeset
|
237 * |
|
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11532
diff
changeset
|
238 * @note the first element of the input of DST-I is ignored |
| 11370 | 239 */ |
|
11535
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11532
diff
changeset
|
240 int ff_dct_init(DCTContext *s, int nbits, enum DCTTransformType type); |
| 11370 | 241 void ff_dct_calc(DCTContext *s, FFTSample *data); |
| 242 void ff_dct_end (DCTContext *s); | |
| 243 | |
| 244 #endif /* AVCODEC_FFT_H */ |
