Mercurial > libavcodec.hg
annotate liba52/resample_mmx.c @ 4057:ba767c63a07f libavcodec
remove unused variables
| author | bcoudurier |
|---|---|
| date | Sun, 22 Oct 2006 15:15:15 +0000 |
| parents | 1d69d79f7cc3 |
| children |
| rev | line source |
|---|---|
|
3673
1d69d79f7cc3
Exchange informal GPL notice by official license header.
diego
parents:
2967
diff
changeset
|
1 /* |
|
1d69d79f7cc3
Exchange informal GPL notice by official license header.
diego
parents:
2967
diff
changeset
|
2 * resample_mmx.c |
|
1d69d79f7cc3
Exchange informal GPL notice by official license header.
diego
parents:
2967
diff
changeset
|
3 * Copyright (C) 2003 Michael Niedermayer <michaelni@gmx.at> |
|
1d69d79f7cc3
Exchange informal GPL notice by official license header.
diego
parents:
2967
diff
changeset
|
4 * |
|
1d69d79f7cc3
Exchange informal GPL notice by official license header.
diego
parents:
2967
diff
changeset
|
5 * This file is part of a52dec, a free ATSC A-52 stream decoder. |
|
1d69d79f7cc3
Exchange informal GPL notice by official license header.
diego
parents:
2967
diff
changeset
|
6 * See http://liba52.sourceforge.net/ for updates. |
|
1d69d79f7cc3
Exchange informal GPL notice by official license header.
diego
parents:
2967
diff
changeset
|
7 * |
|
1d69d79f7cc3
Exchange informal GPL notice by official license header.
diego
parents:
2967
diff
changeset
|
8 * a52dec is free software; you can redistribute it and/or modify |
|
1d69d79f7cc3
Exchange informal GPL notice by official license header.
diego
parents:
2967
diff
changeset
|
9 * it under the terms of the GNU General Public License as published by |
|
1d69d79f7cc3
Exchange informal GPL notice by official license header.
diego
parents:
2967
diff
changeset
|
10 * the Free Software Foundation; either version 2 of the License, or |
|
1d69d79f7cc3
Exchange informal GPL notice by official license header.
diego
parents:
2967
diff
changeset
|
11 * (at your option) any later version. |
|
1d69d79f7cc3
Exchange informal GPL notice by official license header.
diego
parents:
2967
diff
changeset
|
12 * |
|
1d69d79f7cc3
Exchange informal GPL notice by official license header.
diego
parents:
2967
diff
changeset
|
13 * a52dec is distributed in the hope that it will be useful, |
|
1d69d79f7cc3
Exchange informal GPL notice by official license header.
diego
parents:
2967
diff
changeset
|
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
1d69d79f7cc3
Exchange informal GPL notice by official license header.
diego
parents:
2967
diff
changeset
|
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
1d69d79f7cc3
Exchange informal GPL notice by official license header.
diego
parents:
2967
diff
changeset
|
16 * GNU General Public License for more details. |
|
1d69d79f7cc3
Exchange informal GPL notice by official license header.
diego
parents:
2967
diff
changeset
|
17 * |
|
1d69d79f7cc3
Exchange informal GPL notice by official license header.
diego
parents:
2967
diff
changeset
|
18 * You should have received a copy of the GNU General Public License |
|
1d69d79f7cc3
Exchange informal GPL notice by official license header.
diego
parents:
2967
diff
changeset
|
19 * along with this program; if not, write to the Free Software |
|
1d69d79f7cc3
Exchange informal GPL notice by official license header.
diego
parents:
2967
diff
changeset
|
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
1d69d79f7cc3
Exchange informal GPL notice by official license header.
diego
parents:
2967
diff
changeset
|
21 */ |
| 1193 | 22 |
| 2967 | 23 /* optimization TODO / NOTES |
| 24 movntq is slightly faster (0.5% with the current test.c benchmark) | |
| 1193 | 25 (but thats just test.c so that needs to be testd in reallity) |
| 2967 | 26 and it would mean (C / MMX2 / MMX / 3DNOW) versions |
| 1193 | 27 */ |
| 28 | |
| 2352 | 29 static uint64_t __attribute__((aligned(8))) attribute_used magicF2W= 0x43c0000043c00000LL; |
| 30 static uint64_t __attribute__((aligned(8))) attribute_used wm1010= 0xFFFF0000FFFF0000LL; | |
| 31 static uint64_t __attribute__((aligned(8))) attribute_used wm0101= 0x0000FFFF0000FFFFLL; | |
| 32 static uint64_t __attribute__((aligned(8))) attribute_used wm1100= 0xFFFFFFFF00000000LL; | |
| 1193 | 33 |
| 34 static int a52_resample_MONO_to_5_MMX(float * _f, int16_t * s16){ | |
| 35 int32_t * f = (int32_t *) _f; | |
| 36 asm volatile( | |
| 37 "movl $-512, %%esi \n\t" | |
| 38 "movq "MANGLE(magicF2W)", %%mm7 \n\t" | |
| 39 "movq "MANGLE(wm1100)", %%mm3 \n\t" | |
| 40 "movq "MANGLE(wm0101)", %%mm4 \n\t" | |
| 41 "movq "MANGLE(wm1010)", %%mm5 \n\t" | |
| 42 "pxor %%mm6, %%mm6 \n\t" | |
| 43 "1: \n\t" | |
| 44 "movq (%1, %%esi, 2), %%mm0 \n\t" | |
| 45 "movq 8(%1, %%esi, 2), %%mm1 \n\t" | |
| 46 "leal (%%esi, %%esi, 4), %%edi \n\t" | |
| 47 "psubd %%mm7, %%mm0 \n\t" | |
| 48 "psubd %%mm7, %%mm1 \n\t" | |
| 49 "packssdw %%mm1, %%mm0 \n\t" | |
| 50 "movq %%mm0, %%mm1 \n\t" | |
| 51 "pand %%mm4, %%mm0 \n\t" | |
| 52 "pand %%mm5, %%mm1 \n\t" | |
| 53 "movq %%mm6, (%0, %%edi) \n\t" // 0 0 0 0 | |
| 54 "movd %%mm0, 8(%0, %%edi) \n\t" // A 0 | |
| 55 "pand %%mm3, %%mm0 \n\t" | |
| 56 "movd %%mm6, 12(%0, %%edi) \n\t" // 0 0 | |
| 57 "movd %%mm1, 16(%0, %%edi) \n\t" // 0 B | |
| 58 "pand %%mm3, %%mm1 \n\t" | |
| 59 "movd %%mm6, 20(%0, %%edi) \n\t" // 0 0 | |
| 60 "movq %%mm0, 24(%0, %%edi) \n\t" // 0 0 C 0 | |
| 61 "movq %%mm1, 32(%0, %%edi) \n\t" // 0 0 0 B | |
| 62 "addl $8, %%esi \n\t" | |
| 63 " jnz 1b \n\t" | |
| 64 "emms \n\t" | |
| 65 :: "r" (s16+1280), "r" (f+256) | |
| 66 :"%esi", "%edi", "memory" | |
| 67 ); | |
| 68 return 5*256; | |
| 69 } | |
| 70 | |
| 71 static int a52_resample_STEREO_to_2_MMX(float * _f, int16_t * s16){ | |
| 72 int32_t * f = (int32_t *) _f; | |
| 73 /* benchmark scores are 0.3% better with SSE but we would need to set bias=0 and premultiply it | |
| 74 #ifdef HAVE_SSE | |
| 75 asm volatile( | |
| 76 "movl $-1024, %%esi \n\t" | |
| 77 "1: \n\t" | |
| 78 "cvtps2pi (%1, %%esi), %%mm0 \n\t" | |
| 79 "cvtps2pi 1024(%1, %%esi), %%mm2\n\t" | |
| 80 "movq %%mm0, %%mm1 \n\t" | |
| 81 "punpcklwd %%mm2, %%mm0 \n\t" | |
| 82 "punpckhwd %%mm2, %%mm1 \n\t" | |
| 83 "movq %%mm0, (%0, %%esi) \n\t" | |
| 84 "movq %%mm1, 8(%0, %%esi) \n\t" | |
| 85 "addl $16, %%esi \n\t" | |
| 86 " jnz 1b \n\t" | |
| 87 "emms \n\t" | |
| 88 :: "r" (s16+512), "r" (f+256) | |
| 89 :"%esi", "memory" | |
| 90 );*/ | |
| 91 asm volatile( | |
| 92 "movl $-1024, %%esi \n\t" | |
| 93 "movq "MANGLE(magicF2W)", %%mm7 \n\t" | |
| 94 "1: \n\t" | |
| 95 "movq (%1, %%esi), %%mm0 \n\t" | |
| 96 "movq 8(%1, %%esi), %%mm1 \n\t" | |
| 97 "movq 1024(%1, %%esi), %%mm2 \n\t" | |
| 98 "movq 1032(%1, %%esi), %%mm3 \n\t" | |
| 99 "psubd %%mm7, %%mm0 \n\t" | |
| 100 "psubd %%mm7, %%mm1 \n\t" | |
| 101 "psubd %%mm7, %%mm2 \n\t" | |
| 102 "psubd %%mm7, %%mm3 \n\t" | |
| 103 "packssdw %%mm1, %%mm0 \n\t" | |
| 104 "packssdw %%mm3, %%mm2 \n\t" | |
| 105 "movq %%mm0, %%mm1 \n\t" | |
| 106 "punpcklwd %%mm2, %%mm0 \n\t" | |
| 107 "punpckhwd %%mm2, %%mm1 \n\t" | |
| 108 "movq %%mm0, (%0, %%esi) \n\t" | |
| 109 "movq %%mm1, 8(%0, %%esi) \n\t" | |
| 110 "addl $16, %%esi \n\t" | |
| 111 " jnz 1b \n\t" | |
| 112 "emms \n\t" | |
| 113 :: "r" (s16+512), "r" (f+256) | |
| 114 :"%esi", "memory" | |
| 115 ); | |
| 116 return 2*256; | |
| 117 } | |
| 118 | |
| 119 static int a52_resample_3F_to_5_MMX(float * _f, int16_t * s16){ | |
| 120 int32_t * f = (int32_t *) _f; | |
| 121 asm volatile( | |
| 122 "movl $-1024, %%esi \n\t" | |
| 123 "movq "MANGLE(magicF2W)", %%mm7 \n\t" | |
| 124 "pxor %%mm6, %%mm6 \n\t" | |
| 125 "movq %%mm7, %%mm5 \n\t" | |
| 126 "punpckldq %%mm6, %%mm5 \n\t" | |
| 127 "1: \n\t" | |
| 128 "movd (%1, %%esi), %%mm0 \n\t" | |
| 129 "punpckldq 2048(%1, %%esi), %%mm0\n\t" | |
| 130 "movd 1024(%1, %%esi), %%mm1 \n\t" | |
| 131 "punpckldq 4(%1, %%esi), %%mm1 \n\t" | |
| 132 "movd 2052(%1, %%esi), %%mm2 \n\t" | |
| 133 "movq %%mm7, %%mm3 \n\t" | |
| 134 "punpckldq 1028(%1, %%esi), %%mm3\n\t" | |
| 135 "movd 8(%1, %%esi), %%mm4 \n\t" | |
| 136 "punpckldq 2056(%1, %%esi), %%mm4\n\t" | |
| 137 "leal (%%esi, %%esi, 4), %%edi \n\t" | |
| 138 "sarl $1, %%edi \n\t" | |
| 139 "psubd %%mm7, %%mm0 \n\t" | |
| 140 "psubd %%mm7, %%mm1 \n\t" | |
| 141 "psubd %%mm5, %%mm2 \n\t" | |
| 142 "psubd %%mm7, %%mm3 \n\t" | |
| 143 "psubd %%mm7, %%mm4 \n\t" | |
| 144 "packssdw %%mm6, %%mm0 \n\t" | |
| 145 "packssdw %%mm2, %%mm1 \n\t" | |
| 146 "packssdw %%mm4, %%mm3 \n\t" | |
| 147 "movq %%mm0, (%0, %%edi) \n\t" | |
| 148 "movq %%mm1, 8(%0, %%edi) \n\t" | |
| 149 "movq %%mm3, 16(%0, %%edi) \n\t" | |
| 2967 | 150 |
| 1193 | 151 "movd 1032(%1, %%esi), %%mm1 \n\t" |
| 152 "punpckldq 12(%1, %%esi), %%mm1\n\t" | |
| 153 "movd 2060(%1, %%esi), %%mm2 \n\t" | |
| 154 "movq %%mm7, %%mm3 \n\t" | |
| 155 "punpckldq 1036(%1, %%esi), %%mm3\n\t" | |
| 156 "pxor %%mm0, %%mm0 \n\t" | |
| 157 "psubd %%mm7, %%mm1 \n\t" | |
| 158 "psubd %%mm5, %%mm2 \n\t" | |
| 159 "psubd %%mm7, %%mm3 \n\t" | |
| 160 "packssdw %%mm1, %%mm0 \n\t" | |
| 161 "packssdw %%mm3, %%mm2 \n\t" | |
| 162 "movq %%mm0, 24(%0, %%edi) \n\t" | |
| 163 "movq %%mm2, 32(%0, %%edi) \n\t" | |
| 2967 | 164 |
| 1193 | 165 "addl $16, %%esi \n\t" |
| 166 " jnz 1b \n\t" | |
| 167 "emms \n\t" | |
| 168 :: "r" (s16+1280), "r" (f+256) | |
| 169 :"%esi", "%edi", "memory" | |
| 170 ); | |
| 171 return 5*256; | |
| 172 } | |
| 173 | |
| 174 static int a52_resample_2F_2R_to_4_MMX(float * _f, int16_t * s16){ | |
| 175 int32_t * f = (int32_t *) _f; | |
| 176 asm volatile( | |
| 177 "movl $-1024, %%esi \n\t" | |
| 178 "movq "MANGLE(magicF2W)", %%mm7 \n\t" | |
| 179 "1: \n\t" | |
| 180 "movq (%1, %%esi), %%mm0 \n\t" | |
| 181 "movq 8(%1, %%esi), %%mm1 \n\t" | |
| 182 "movq 1024(%1, %%esi), %%mm2 \n\t" | |
| 183 "movq 1032(%1, %%esi), %%mm3 \n\t" | |
| 184 "psubd %%mm7, %%mm0 \n\t" | |
| 185 "psubd %%mm7, %%mm1 \n\t" | |
| 186 "psubd %%mm7, %%mm2 \n\t" | |
| 187 "psubd %%mm7, %%mm3 \n\t" | |
| 188 "packssdw %%mm1, %%mm0 \n\t" | |
| 189 "packssdw %%mm3, %%mm2 \n\t" | |
| 190 "movq 2048(%1, %%esi), %%mm3 \n\t" | |
| 191 "movq 2056(%1, %%esi), %%mm4 \n\t" | |
| 192 "movq 3072(%1, %%esi), %%mm5 \n\t" | |
| 193 "movq 3080(%1, %%esi), %%mm6 \n\t" | |
| 194 "psubd %%mm7, %%mm3 \n\t" | |
| 195 "psubd %%mm7, %%mm4 \n\t" | |
| 196 "psubd %%mm7, %%mm5 \n\t" | |
| 197 "psubd %%mm7, %%mm6 \n\t" | |
| 198 "packssdw %%mm4, %%mm3 \n\t" | |
| 199 "packssdw %%mm6, %%mm5 \n\t" | |
| 200 "movq %%mm0, %%mm1 \n\t" | |
| 201 "movq %%mm3, %%mm4 \n\t" | |
| 202 "punpcklwd %%mm2, %%mm0 \n\t" | |
| 203 "punpckhwd %%mm2, %%mm1 \n\t" | |
| 204 "punpcklwd %%mm5, %%mm3 \n\t" | |
| 205 "punpckhwd %%mm5, %%mm4 \n\t" | |
| 206 "movq %%mm0, %%mm2 \n\t" | |
| 207 "movq %%mm1, %%mm5 \n\t" | |
| 208 "punpckldq %%mm3, %%mm0 \n\t" | |
| 209 "punpckhdq %%mm3, %%mm2 \n\t" | |
| 210 "punpckldq %%mm4, %%mm1 \n\t" | |
| 211 "punpckhdq %%mm4, %%mm5 \n\t" | |
| 212 "movq %%mm0, (%0, %%esi,2) \n\t" | |
| 213 "movq %%mm2, 8(%0, %%esi,2) \n\t" | |
| 214 "movq %%mm1, 16(%0, %%esi,2) \n\t" | |
| 215 "movq %%mm5, 24(%0, %%esi,2) \n\t" | |
| 216 "addl $16, %%esi \n\t" | |
| 217 " jnz 1b \n\t" | |
| 218 "emms \n\t" | |
| 219 :: "r" (s16+1024), "r" (f+256) | |
| 220 :"%esi", "memory" | |
| 221 ); | |
| 222 return 4*256; | |
| 223 } | |
| 224 | |
| 225 static int a52_resample_3F_2R_to_5_MMX(float * _f, int16_t * s16){ | |
| 226 int32_t * f = (int32_t *) _f; | |
| 227 asm volatile( | |
| 228 "movl $-1024, %%esi \n\t" | |
| 229 "movq "MANGLE(magicF2W)", %%mm7 \n\t" | |
| 230 "1: \n\t" | |
| 231 "movd (%1, %%esi), %%mm0 \n\t" | |
| 232 "punpckldq 2048(%1, %%esi), %%mm0\n\t" | |
| 233 "movd 3072(%1, %%esi), %%mm1 \n\t" | |
| 234 "punpckldq 4096(%1, %%esi), %%mm1\n\t" | |
| 235 "movd 1024(%1, %%esi), %%mm2 \n\t" | |
| 236 "punpckldq 4(%1, %%esi), %%mm2 \n\t" | |
| 237 "movd 2052(%1, %%esi), %%mm3 \n\t" | |
| 238 "punpckldq 3076(%1, %%esi), %%mm3\n\t" | |
| 239 "movd 4100(%1, %%esi), %%mm4 \n\t" | |
| 240 "punpckldq 1028(%1, %%esi), %%mm4\n\t" | |
| 241 "movd 8(%1, %%esi), %%mm5 \n\t" | |
| 242 "punpckldq 2056(%1, %%esi), %%mm5\n\t" | |
| 243 "leal (%%esi, %%esi, 4), %%edi \n\t" | |
| 244 "sarl $1, %%edi \n\t" | |
| 245 "psubd %%mm7, %%mm0 \n\t" | |
| 246 "psubd %%mm7, %%mm1 \n\t" | |
| 247 "psubd %%mm7, %%mm2 \n\t" | |
| 248 "psubd %%mm7, %%mm3 \n\t" | |
| 249 "psubd %%mm7, %%mm4 \n\t" | |
| 250 "psubd %%mm7, %%mm5 \n\t" | |
| 251 "packssdw %%mm1, %%mm0 \n\t" | |
| 252 "packssdw %%mm3, %%mm2 \n\t" | |
| 253 "packssdw %%mm5, %%mm4 \n\t" | |
| 254 "movq %%mm0, (%0, %%edi) \n\t" | |
| 255 "movq %%mm2, 8(%0, %%edi) \n\t" | |
| 256 "movq %%mm4, 16(%0, %%edi) \n\t" | |
| 2967 | 257 |
| 1193 | 258 "movd 3080(%1, %%esi), %%mm0 \n\t" |
| 259 "punpckldq 4104(%1, %%esi), %%mm0\n\t" | |
| 260 "movd 1032(%1, %%esi), %%mm1 \n\t" | |
| 261 "punpckldq 12(%1, %%esi), %%mm1\n\t" | |
| 262 "movd 2060(%1, %%esi), %%mm2 \n\t" | |
| 263 "punpckldq 3084(%1, %%esi), %%mm2\n\t" | |
| 264 "movd 4108(%1, %%esi), %%mm3 \n\t" | |
| 265 "punpckldq 1036(%1, %%esi), %%mm3\n\t" | |
| 266 "psubd %%mm7, %%mm0 \n\t" | |
| 267 "psubd %%mm7, %%mm1 \n\t" | |
| 268 "psubd %%mm7, %%mm2 \n\t" | |
| 269 "psubd %%mm7, %%mm3 \n\t" | |
| 270 "packssdw %%mm1, %%mm0 \n\t" | |
| 271 "packssdw %%mm3, %%mm2 \n\t" | |
| 272 "movq %%mm0, 24(%0, %%edi) \n\t" | |
| 273 "movq %%mm2, 32(%0, %%edi) \n\t" | |
| 2967 | 274 |
| 1193 | 275 "addl $16, %%esi \n\t" |
| 276 " jnz 1b \n\t" | |
| 277 "emms \n\t" | |
| 278 :: "r" (s16+1280), "r" (f+256) | |
| 279 :"%esi", "%edi", "memory" | |
| 280 ); | |
| 281 return 5*256; | |
| 282 } | |
| 283 | |
| 284 static int a52_resample_MONO_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
| 285 int32_t * f = (int32_t *) _f; | |
| 286 asm volatile( | |
| 287 "movl $-1024, %%esi \n\t" | |
| 288 "movq "MANGLE(magicF2W)", %%mm7 \n\t" | |
| 289 "pxor %%mm6, %%mm6 \n\t" | |
| 290 "1: \n\t" | |
| 291 "movq 1024(%1, %%esi), %%mm0 \n\t" | |
| 292 "movq 1032(%1, %%esi), %%mm1 \n\t" | |
| 293 "movq (%1, %%esi), %%mm2 \n\t" | |
| 294 "movq 8(%1, %%esi), %%mm3 \n\t" | |
| 295 "psubd %%mm7, %%mm0 \n\t" | |
| 296 "psubd %%mm7, %%mm1 \n\t" | |
| 297 "psubd %%mm7, %%mm2 \n\t" | |
| 298 "psubd %%mm7, %%mm3 \n\t" | |
| 299 "packssdw %%mm1, %%mm0 \n\t" | |
| 300 "packssdw %%mm3, %%mm2 \n\t" | |
| 301 "movq %%mm0, %%mm1 \n\t" | |
| 302 "punpcklwd %%mm2, %%mm0 \n\t" | |
| 303 "punpckhwd %%mm2, %%mm1 \n\t" | |
| 304 "leal (%%esi, %%esi, 2), %%edi \n\t" | |
| 305 "movq %%mm6, (%0, %%edi) \n\t" | |
| 306 "movd %%mm0, 8(%0, %%edi) \n\t" | |
| 307 "punpckhdq %%mm0, %%mm0 \n\t" | |
| 308 "movq %%mm6, 12(%0, %%edi) \n\t" | |
| 309 "movd %%mm0, 20(%0, %%edi) \n\t" | |
| 310 "movq %%mm6, 24(%0, %%edi) \n\t" | |
| 311 "movd %%mm1, 32(%0, %%edi) \n\t" | |
| 312 "punpckhdq %%mm1, %%mm1 \n\t" | |
| 313 "movq %%mm6, 36(%0, %%edi) \n\t" | |
| 314 "movd %%mm1, 44(%0, %%edi) \n\t" | |
| 315 "addl $16, %%esi \n\t" | |
| 316 " jnz 1b \n\t" | |
| 317 "emms \n\t" | |
| 318 :: "r" (s16+1536), "r" (f+256) | |
| 319 :"%esi", "%edi", "memory" | |
| 320 ); | |
| 321 return 6*256; | |
| 322 } | |
| 323 | |
| 324 static int a52_resample_STEREO_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
| 325 int32_t * f = (int32_t *) _f; | |
| 326 asm volatile( | |
| 327 "movl $-1024, %%esi \n\t" | |
| 328 "movq "MANGLE(magicF2W)", %%mm7 \n\t" | |
| 329 "pxor %%mm6, %%mm6 \n\t" | |
| 330 "1: \n\t" | |
| 331 "movq 1024(%1, %%esi), %%mm0 \n\t" | |
| 332 "movq 2048(%1, %%esi), %%mm1 \n\t" | |
| 2967 | 333 "movq (%1, %%esi), %%mm5 \n\t" |
| 1193 | 334 "psubd %%mm7, %%mm0 \n\t" |
| 335 "psubd %%mm7, %%mm1 \n\t" | |
| 336 "psubd %%mm7, %%mm5 \n\t" | |
| 337 "leal (%%esi, %%esi, 2), %%edi \n\t" | |
| 2967 | 338 |
| 1193 | 339 "pxor %%mm4, %%mm4 \n\t" |
| 340 "packssdw %%mm5, %%mm0 \n\t" // FfAa | |
| 341 "packssdw %%mm4, %%mm1 \n\t" // 00Bb | |
| 342 "punpckhwd %%mm0, %%mm4 \n\t" // F0f0 | |
| 343 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
| 344 "movq %%mm0, %%mm1 \n\t" // BAba | |
| 345 "punpckldq %%mm4, %%mm3 \n\t" // f0XX | |
| 346 "punpckldq %%mm6, %%mm0 \n\t" // 00ba | |
| 347 "punpckhdq %%mm1, %%mm3 \n\t" // BAf0 | |
| 2967 | 348 |
| 1193 | 349 "movq %%mm0, (%0, %%edi) \n\t" // 00ba |
| 350 "punpckhdq %%mm4, %%mm0 \n\t" // F000 | |
| 351 "movq %%mm3, 8(%0, %%edi) \n\t" // BAf0 | |
| 352 "movq %%mm0, 16(%0, %%edi) \n\t" // F000 | |
| 353 "addl $8, %%esi \n\t" | |
| 354 " jnz 1b \n\t" | |
| 355 "emms \n\t" | |
| 356 :: "r" (s16+1536), "r" (f+256) | |
| 357 :"%esi", "%edi", "memory" | |
| 358 ); | |
| 359 return 6*256; | |
| 360 } | |
| 361 | |
| 362 static int a52_resample_3F_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
| 363 int32_t * f = (int32_t *) _f; | |
| 364 asm volatile( | |
| 365 "movl $-1024, %%esi \n\t" | |
| 366 "movq "MANGLE(magicF2W)", %%mm7 \n\t" | |
| 367 "pxor %%mm6, %%mm6 \n\t" | |
| 368 "1: \n\t" | |
| 369 "movq 1024(%1, %%esi), %%mm0 \n\t" | |
| 370 "movq 3072(%1, %%esi), %%mm1 \n\t" | |
| 371 "movq 2048(%1, %%esi), %%mm4 \n\t" | |
| 2967 | 372 "movq (%1, %%esi), %%mm5 \n\t" |
| 1193 | 373 "psubd %%mm7, %%mm0 \n\t" |
| 374 "psubd %%mm7, %%mm1 \n\t" | |
| 375 "psubd %%mm7, %%mm4 \n\t" | |
| 376 "psubd %%mm7, %%mm5 \n\t" | |
| 377 "leal (%%esi, %%esi, 2), %%edi \n\t" | |
| 2967 | 378 |
| 1193 | 379 "packssdw %%mm4, %%mm0 \n\t" // EeAa |
| 380 "packssdw %%mm5, %%mm1 \n\t" // FfBb | |
| 381 "movq %%mm0, %%mm2 \n\t" // EeAa | |
| 382 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
| 383 "punpckhwd %%mm1, %%mm2 \n\t" // FEfe | |
| 384 "movq %%mm0, %%mm1 \n\t" // BAba | |
| 385 "punpckldq %%mm6, %%mm0 \n\t" // 00ba | |
| 386 "punpckhdq %%mm1, %%mm1 \n\t" // BABA | |
| 2967 | 387 |
| 1193 | 388 "movq %%mm0, (%0, %%edi) \n\t" |
| 389 "punpckhdq %%mm2, %%mm0 \n\t" // FE00 | |
| 390 "punpckldq %%mm1, %%mm2 \n\t" // BAfe | |
| 391 "movq %%mm2, 8(%0, %%edi) \n\t" | |
| 392 "movq %%mm0, 16(%0, %%edi) \n\t" | |
| 393 "addl $8, %%esi \n\t" | |
| 394 " jnz 1b \n\t" | |
| 395 "emms \n\t" | |
| 396 :: "r" (s16+1536), "r" (f+256) | |
| 397 :"%esi", "%edi", "memory" | |
| 398 ); | |
| 399 return 6*256; | |
| 400 } | |
| 401 | |
| 402 static int a52_resample_2F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
| 403 int32_t * f = (int32_t *) _f; | |
| 404 asm volatile( | |
| 405 "movl $-1024, %%esi \n\t" | |
| 406 "movq "MANGLE(magicF2W)", %%mm7 \n\t" | |
| 407 // "pxor %%mm6, %%mm6 \n\t" | |
| 408 "1: \n\t" | |
| 409 "movq 1024(%1, %%esi), %%mm0 \n\t" | |
| 410 "movq 2048(%1, %%esi), %%mm1 \n\t" | |
| 411 "movq 3072(%1, %%esi), %%mm2 \n\t" | |
| 412 "movq 4096(%1, %%esi), %%mm3 \n\t" | |
| 2967 | 413 "movq (%1, %%esi), %%mm5 \n\t" |
| 1193 | 414 "psubd %%mm7, %%mm0 \n\t" |
| 415 "psubd %%mm7, %%mm1 \n\t" | |
| 416 "psubd %%mm7, %%mm2 \n\t" | |
| 417 "psubd %%mm7, %%mm3 \n\t" | |
| 418 "psubd %%mm7, %%mm5 \n\t" | |
| 419 "leal (%%esi, %%esi, 2), %%edi \n\t" | |
| 2967 | 420 |
| 1193 | 421 "packssdw %%mm2, %%mm0 \n\t" // CcAa |
| 422 "packssdw %%mm3, %%mm1 \n\t" // DdBb | |
| 423 "packssdw %%mm5, %%mm5 \n\t" // FfFf | |
| 424 "movq %%mm0, %%mm2 \n\t" // CcAa | |
| 425 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
| 426 "punpckhwd %%mm1, %%mm2 \n\t" // DCdc | |
| 427 "pxor %%mm4, %%mm4 \n\t" // 0000 | |
| 428 "punpcklwd %%mm5, %%mm4 \n\t" // F0f0 | |
| 429 "movq %%mm0, %%mm1 \n\t" // BAba | |
| 430 "movq %%mm4, %%mm3 \n\t" // F0f0 | |
| 431 "punpckldq %%mm2, %%mm0 \n\t" // dcba | |
| 432 "punpckhdq %%mm1, %%mm1 \n\t" // BABA | |
| 433 "punpckldq %%mm1, %%mm4 \n\t" // BAf0 | |
| 434 "punpckhdq %%mm3, %%mm2 \n\t" // F0DC | |
| 2967 | 435 |
| 1193 | 436 "movq %%mm0, (%0, %%edi) \n\t" |
| 437 "movq %%mm4, 8(%0, %%edi) \n\t" | |
| 438 "movq %%mm2, 16(%0, %%edi) \n\t" | |
| 439 "addl $8, %%esi \n\t" | |
| 440 " jnz 1b \n\t" | |
| 441 "emms \n\t" | |
| 442 :: "r" (s16+1536), "r" (f+256) | |
| 443 :"%esi", "%edi", "memory" | |
| 444 ); | |
| 445 return 6*256; | |
| 446 } | |
| 447 | |
| 448 static int a52_resample_3F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
| 449 int32_t * f = (int32_t *) _f; | |
| 450 asm volatile( | |
| 451 "movl $-1024, %%esi \n\t" | |
| 452 "movq "MANGLE(magicF2W)", %%mm7 \n\t" | |
| 453 // "pxor %%mm6, %%mm6 \n\t" | |
| 454 "1: \n\t" | |
| 455 "movq 1024(%1, %%esi), %%mm0 \n\t" | |
| 456 "movq 3072(%1, %%esi), %%mm1 \n\t" | |
| 457 "movq 4096(%1, %%esi), %%mm2 \n\t" | |
| 458 "movq 5120(%1, %%esi), %%mm3 \n\t" | |
| 459 "movq 2048(%1, %%esi), %%mm4 \n\t" | |
| 2967 | 460 "movq (%1, %%esi), %%mm5 \n\t" |
| 1193 | 461 "psubd %%mm7, %%mm0 \n\t" |
| 462 "psubd %%mm7, %%mm1 \n\t" | |
| 463 "psubd %%mm7, %%mm2 \n\t" | |
| 464 "psubd %%mm7, %%mm3 \n\t" | |
| 465 "psubd %%mm7, %%mm4 \n\t" | |
| 466 "psubd %%mm7, %%mm5 \n\t" | |
| 467 "leal (%%esi, %%esi, 2), %%edi \n\t" | |
| 2967 | 468 |
| 1193 | 469 "packssdw %%mm2, %%mm0 \n\t" // CcAa |
| 470 "packssdw %%mm3, %%mm1 \n\t" // DdBb | |
| 471 "packssdw %%mm4, %%mm4 \n\t" // EeEe | |
| 472 "packssdw %%mm5, %%mm5 \n\t" // FfFf | |
| 473 "movq %%mm0, %%mm2 \n\t" // CcAa | |
| 474 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
| 475 "punpckhwd %%mm1, %%mm2 \n\t" // DCdc | |
| 476 "punpcklwd %%mm5, %%mm4 \n\t" // FEfe | |
| 477 "movq %%mm0, %%mm1 \n\t" // BAba | |
| 478 "movq %%mm4, %%mm3 \n\t" // FEfe | |
| 479 "punpckldq %%mm2, %%mm0 \n\t" // dcba | |
| 480 "punpckhdq %%mm1, %%mm1 \n\t" // BABA | |
| 481 "punpckldq %%mm1, %%mm4 \n\t" // BAfe | |
| 482 "punpckhdq %%mm3, %%mm2 \n\t" // FEDC | |
| 2967 | 483 |
| 1193 | 484 "movq %%mm0, (%0, %%edi) \n\t" |
| 485 "movq %%mm4, 8(%0, %%edi) \n\t" | |
| 486 "movq %%mm2, 16(%0, %%edi) \n\t" | |
| 487 "addl $8, %%esi \n\t" | |
| 488 " jnz 1b \n\t" | |
| 489 "emms \n\t" | |
| 490 :: "r" (s16+1536), "r" (f+256) | |
| 491 :"%esi", "%edi", "memory" | |
| 492 ); | |
| 493 return 6*256; | |
| 494 } | |
| 495 | |
| 496 | |
| 497 static void* a52_resample_MMX(int flags, int ch){ | |
| 498 switch (flags) { | |
| 499 case A52_MONO: | |
| 500 if(ch==5) return a52_resample_MONO_to_5_MMX; | |
| 501 break; | |
| 502 case A52_CHANNEL: | |
| 503 case A52_STEREO: | |
| 504 case A52_DOLBY: | |
| 505 if(ch==2) return a52_resample_STEREO_to_2_MMX; | |
| 506 break; | |
| 507 case A52_3F: | |
| 508 if(ch==5) return a52_resample_3F_to_5_MMX; | |
| 509 break; | |
| 510 case A52_2F2R: | |
| 511 if(ch==4) return a52_resample_2F_2R_to_4_MMX; | |
| 512 break; | |
| 513 case A52_3F2R: | |
| 514 if(ch==5) return a52_resample_3F_2R_to_5_MMX; | |
| 515 break; | |
| 516 case A52_MONO | A52_LFE: | |
| 517 if(ch==6) return a52_resample_MONO_LFE_to_6_MMX; | |
| 518 break; | |
| 519 case A52_CHANNEL | A52_LFE: | |
| 520 case A52_STEREO | A52_LFE: | |
| 521 case A52_DOLBY | A52_LFE: | |
| 522 if(ch==6) return a52_resample_STEREO_LFE_to_6_MMX; | |
| 523 break; | |
| 524 case A52_3F | A52_LFE: | |
| 525 if(ch==6) return a52_resample_3F_LFE_to_6_MMX; | |
| 526 break; | |
| 527 case A52_2F2R | A52_LFE: | |
| 528 if(ch==6) return a52_resample_2F_2R_LFE_to_6_MMX; | |
| 529 break; | |
| 530 case A52_3F2R | A52_LFE: | |
| 531 if(ch==6) return a52_resample_3F_2R_LFE_to_6_MMX; | |
| 532 break; | |
| 533 } | |
| 534 return NULL; | |
| 535 } | |
| 536 | |
| 537 |
