annotate src/ffmpeg/libavcodec/ppc/fft_altivec.c @ 854:aac49941ee8f trunk

[svn] statusicon 0.3: alternative right-click menu with simple playback control commands
author giacomo
date Wed, 14 Mar 2007 07:44:00 -0700
parents e8776388b02a
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
808
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1 /*
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2 * FFT/IFFT transforms
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3 * AltiVec-enabled
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4 * Copyright (c) 2003 Romain Dolbeau <romain@dolbeau.org>
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
5 * Based on code Copyright (c) 2002 Fabrice Bellard.
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
6 *
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
7 * This file is part of FFmpeg.
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
8 *
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
9 * FFmpeg is free software; you can redistribute it and/or
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
10 * modify it under the terms of the GNU Lesser General Public
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
11 * License as published by the Free Software Foundation; either
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
12 * version 2.1 of the License, or (at your option) any later version.
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
13 *
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
14 * FFmpeg is distributed in the hope that it will be useful,
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
17 * Lesser General Public License for more details.
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
18 *
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
19 * You should have received a copy of the GNU Lesser General Public
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
20 * License along with FFmpeg; if not, write to the Free Software
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
22 */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
23 #include "../dsputil.h"
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
24
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
25 #include "gcc_fixes.h"
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
26
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
27 #include "dsputil_altivec.h"
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
28
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
29 /*
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
30 those three macros are from libavcodec/fft.c
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
31 and are required for the reference C code
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
32 */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
33 /* butter fly op */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
34 #define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
35 {\
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
36 FFTSample ax, ay, bx, by;\
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
37 bx=pre1;\
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
38 by=pim1;\
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
39 ax=qre1;\
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
40 ay=qim1;\
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
41 pre = (bx + ax);\
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
42 pim = (by + ay);\
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
43 qre = (bx - ax);\
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
44 qim = (by - ay);\
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
45 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
46 #define MUL16(a,b) ((a) * (b))
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
47 #define CMUL(pre, pim, are, aim, bre, bim) \
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
48 {\
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
49 pre = (MUL16(are, bre) - MUL16(aim, bim));\
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
50 pim = (MUL16(are, bim) + MUL16(bre, aim));\
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
51 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
52
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
53
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
54 /**
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
55 * Do a complex FFT with the parameters defined in ff_fft_init(). The
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
56 * input data must be permuted before with s->revtab table. No
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
57 * 1.0/sqrt(n) normalization is done.
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
58 * AltiVec-enabled
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
59 * This code assumes that the 'z' pointer is 16 bytes-aligned
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
60 * It also assumes all FFTComplex are 8 bytes-aligned pair of float
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
61 * The code is exactly the same as the SSE version, except
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
62 * that successive MUL + ADD/SUB have been merged into
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
63 * fused multiply-add ('vec_madd' in altivec)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
64 */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
65 void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
66 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
67 POWERPC_PERF_DECLARE(altivec_fft_num, s->nbits >= 6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
68 #ifdef CONFIG_DARWIN
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
69 register const vector float vczero = (const vector float)(0.);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
70 #else
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
71 register const vector float vczero = (const vector float){0.,0.,0.,0.};
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
72 #endif
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
73
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
74 int ln = s->nbits;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
75 int j, np, np2;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
76 int nblocks, nloops;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
77 register FFTComplex *p, *q;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
78 FFTComplex *cptr, *cptr1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
79 int k;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
80
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
81 POWERPC_PERF_START_COUNT(altivec_fft_num, s->nbits >= 6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
82
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
83 np = 1 << ln;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
84
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
85 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
86 vector float *r, a, b, a1, c1, c2;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
87
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
88 r = (vector float *)&z[0];
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
89
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
90 c1 = vcii(p,p,n,n);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
91
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
92 if (s->inverse)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
93 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
94 c2 = vcii(p,p,n,p);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
95 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
96 else
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
97 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
98 c2 = vcii(p,p,p,n);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
99 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
100
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
101 j = (np >> 2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
102 do {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
103 a = vec_ld(0, r);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
104 a1 = vec_ld(sizeof(vector float), r);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
105
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
106 b = vec_perm(a,a,vcprmle(1,0,3,2));
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
107 a = vec_madd(a,c1,b);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
108 /* do the pass 0 butterfly */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
109
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
110 b = vec_perm(a1,a1,vcprmle(1,0,3,2));
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
111 b = vec_madd(a1,c1,b);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
112 /* do the pass 0 butterfly */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
113
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
114 /* multiply third by -i */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
115 b = vec_perm(b,b,vcprmle(2,3,1,0));
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
116
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
117 /* do the pass 1 butterfly */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
118 vec_st(vec_madd(b,c2,a), 0, r);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
119 vec_st(vec_nmsub(b,c2,a), sizeof(vector float), r);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
120
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
121 r += 2;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
122 } while (--j != 0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
123 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
124 /* pass 2 .. ln-1 */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
125
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
126 nblocks = np >> 3;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
127 nloops = 1 << 2;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
128 np2 = np >> 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
129
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
130 cptr1 = s->exptab1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
131 do {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
132 p = z;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
133 q = z + nloops;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
134 j = nblocks;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
135 do {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
136 cptr = cptr1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
137 k = nloops >> 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
138 do {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
139 vector float a,b,c,t1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
140
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
141 a = vec_ld(0, (float*)p);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
142 b = vec_ld(0, (float*)q);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
143
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
144 /* complex mul */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
145 c = vec_ld(0, (float*)cptr);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
146 /* cre*re cim*re */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
147 t1 = vec_madd(c, vec_perm(b,b,vcprmle(2,2,0,0)),vczero);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
148 c = vec_ld(sizeof(vector float), (float*)cptr);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
149 /* -cim*im cre*im */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
150 b = vec_madd(c, vec_perm(b,b,vcprmle(3,3,1,1)),t1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
151
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
152 /* butterfly */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
153 vec_st(vec_add(a,b), 0, (float*)p);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
154 vec_st(vec_sub(a,b), 0, (float*)q);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
155
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
156 p += 2;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
157 q += 2;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
158 cptr += 4;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
159 } while (--k);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
160
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
161 p += nloops;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
162 q += nloops;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
163 } while (--j);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
164 cptr1 += nloops * 2;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
165 nblocks = nblocks >> 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
166 nloops = nloops << 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
167 } while (nblocks != 0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
168
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
169 POWERPC_PERF_STOP_COUNT(altivec_fft_num, s->nbits >= 6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
170 }