annotate arm/fft_init_arm.c @ 11032:01bd040f8607 libavcodec

Unroll main loop so the edge==0 case is seperate. This allows many things to be simplified away. h264 decoder is overall 1% faster with a mbaff sample and 0.1% slower with the cathedral sample, probably because the slow loop filter code must be loaded into the code cache for each first MB of each row but isnt used for the following MBs.
author michael
date Thu, 28 Jan 2010 01:24:25 +0000
parents 89a852950c34
children 4b3da727d832
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
10175
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents:
diff changeset
1 /*
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents:
diff changeset
2 * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents:
diff changeset
3 *
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents:
diff changeset
4 * This file is part of FFmpeg.
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents:
diff changeset
5 *
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents:
diff changeset
6 * FFmpeg is free software; you can redistribute it and/or
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents:
diff changeset
7 * modify it under the terms of the GNU Lesser General Public
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents:
diff changeset
8 * License as published by the Free Software Foundation; either
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents:
diff changeset
9 * version 2.1 of the License, or (at your option) any later version.
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents:
diff changeset
10 *
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents:
diff changeset
11 * FFmpeg is distributed in the hope that it will be useful,
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents:
diff changeset
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents:
diff changeset
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents:
diff changeset
14 * Lesser General Public License for more details.
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents:
diff changeset
15 *
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents:
diff changeset
16 * You should have received a copy of the GNU Lesser General Public
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents:
diff changeset
17 * License along with FFmpeg; if not, write to the Free Software
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents:
diff changeset
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents:
diff changeset
19 */
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents:
diff changeset
20
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents:
diff changeset
21 #include "libavcodec/dsputil.h"
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents:
diff changeset
22
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents:
diff changeset
23 void ff_fft_permute_neon(FFTContext *s, FFTComplex *z);
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents:
diff changeset
24 void ff_fft_calc_neon(FFTContext *s, FFTComplex *z);
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents:
diff changeset
25
10199
38ab367d4231 Merge FFTContext and MDCTContext
mru
parents: 10175
diff changeset
26 void ff_imdct_calc_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
38ab367d4231 Merge FFTContext and MDCTContext
mru
parents: 10175
diff changeset
27 void ff_imdct_half_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
38ab367d4231 Merge FFTContext and MDCTContext
mru
parents: 10175
diff changeset
28 void ff_mdct_calc_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
10175
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents:
diff changeset
29
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents:
diff changeset
30 av_cold void ff_fft_init_arm(FFTContext *s)
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents:
diff changeset
31 {
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents:
diff changeset
32 if (HAVE_NEON) {
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents:
diff changeset
33 s->fft_permute = ff_fft_permute_neon;
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents:
diff changeset
34 s->fft_calc = ff_fft_calc_neon;
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents:
diff changeset
35 s->imdct_calc = ff_imdct_calc_neon;
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents:
diff changeset
36 s->imdct_half = ff_imdct_half_neon;
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents:
diff changeset
37 s->mdct_calc = ff_mdct_calc_neon;
10205
89a852950c34 ARM: interleave cos/sin tables for improved NEON MDCT
mru
parents: 10199
diff changeset
38 s->permutation = FF_MDCT_PERM_INTERLEAVE;
10175
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents:
diff changeset
39 }
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents:
diff changeset
40 }