Mercurial > libavcodec.hg
annotate arm/dsputil_init_armv6.c @ 12510:ef2f2db5b7be libavcodec
Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
code directly also and remove loop setup. 20% faster in function, 0.8% overall.
See "[PATCH] unroll loop in h264_idct_add8_sse2()" thread on ML.
| author | rbultje |
|---|---|
| date | Fri, 24 Sep 2010 14:05:45 +0000 |
| parents | ad6d17b36a3a |
| children |
| rev | line source |
|---|---|
| 7060 | 1 /* |
| 10359 | 2 * Copyright (c) 2009 Mans Rullgard <mans@mansr.com> |
| 7060 | 3 * |
| 4 * This file is part of FFmpeg. | |
| 5 * | |
| 6 * FFmpeg is free software; you can redistribute it and/or | |
| 7 * modify it under the terms of the GNU Lesser General Public | |
| 8 * License as published by the Free Software Foundation; either | |
| 9 * version 2.1 of the License, or (at your option) any later version. | |
| 10 * | |
| 11 * FFmpeg is distributed in the hope that it will be useful, | |
| 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 14 * Lesser General Public License for more details. | |
| 15 * | |
| 16 * You should have received a copy of the GNU Lesser General Public | |
| 17 * License along with FFmpeg; if not, write to the Free Software | |
| 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
| 19 */ | |
| 20 | |
|
11108
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
21 #include <stdint.h> |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
22 |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
23 #include "libavcodec/avcodec.h" |
| 7060 | 24 #include "libavcodec/dsputil.h" |
| 10359 | 25 #include "dsputil_arm.h" |
| 7060 | 26 |
| 10359 | 27 void ff_simple_idct_armv6(DCTELEM *data); |
| 28 void ff_simple_idct_put_armv6(uint8_t *dest, int line_size, DCTELEM *data); | |
| 29 void ff_simple_idct_add_armv6(uint8_t *dest, int line_size, DCTELEM *data); | |
| 7060 | 30 |
|
11108
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
31 void ff_put_pixels16_armv6(uint8_t *, const uint8_t *, int, int); |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
32 void ff_put_pixels16_x2_armv6(uint8_t *, const uint8_t *, int, int); |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
33 void ff_put_pixels16_y2_armv6(uint8_t *, const uint8_t *, int, int); |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
34 |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
35 void ff_put_pixels16_x2_no_rnd_armv6(uint8_t *, const uint8_t *, int, int); |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
36 void ff_put_pixels16_y2_no_rnd_armv6(uint8_t *, const uint8_t *, int, int); |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
37 |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
38 void ff_avg_pixels16_armv6(uint8_t *, const uint8_t *, int, int); |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
39 |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
40 void ff_put_pixels8_armv6(uint8_t *, const uint8_t *, int, int); |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
41 void ff_put_pixels8_x2_armv6(uint8_t *, const uint8_t *, int, int); |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
42 void ff_put_pixels8_y2_armv6(uint8_t *, const uint8_t *, int, int); |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
43 |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
44 void ff_put_pixels8_x2_no_rnd_armv6(uint8_t *, const uint8_t *, int, int); |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
45 void ff_put_pixels8_y2_no_rnd_armv6(uint8_t *, const uint8_t *, int, int); |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
46 |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
47 void ff_avg_pixels8_armv6(uint8_t *, const uint8_t *, int, int); |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
48 |
| 10372 | 49 void ff_add_pixels_clamped_armv6(const DCTELEM *block, |
| 50 uint8_t *restrict pixels, | |
| 51 int line_size); | |
| 52 | |
| 11113 | 53 void ff_get_pixels_armv6(DCTELEM *block, const uint8_t *pixels, int stride); |
| 11114 | 54 void ff_diff_pixels_armv6(DCTELEM *block, const uint8_t *s1, |
| 55 const uint8_t *s2, int stride); | |
| 11113 | 56 |
| 11109 | 57 int ff_pix_abs16_armv6(void *s, uint8_t *blk1, uint8_t *blk2, |
| 58 int line_size, int h); | |
| 11110 | 59 int ff_pix_abs16_x2_armv6(void *s, uint8_t *blk1, uint8_t *blk2, |
| 60 int line_size, int h); | |
| 11111 | 61 int ff_pix_abs16_y2_armv6(void *s, uint8_t *blk1, uint8_t *blk2, |
| 62 int line_size, int h); | |
| 11109 | 63 |
| 11112 | 64 int ff_pix_abs8_armv6(void *s, uint8_t *blk1, uint8_t *blk2, |
| 65 int line_size, int h); | |
| 66 | |
| 11115 | 67 int ff_sse16_armv6(void *s, uint8_t *blk1, uint8_t *blk2, |
| 68 int line_size, int h); | |
| 69 | |
| 11116 | 70 int ff_pix_norm1_armv6(uint8_t *pix, int line_size); |
| 11117 | 71 int ff_pix_sum_armv6(uint8_t *pix, int line_size); |
| 11116 | 72 |
| 10359 | 73 void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx) |
| 7060 | 74 { |
| 10359 | 75 if (!avctx->lowres && (avctx->idct_algo == FF_IDCT_AUTO || |
| 76 avctx->idct_algo == FF_IDCT_SIMPLEARMV6)) { | |
| 10362 | 77 c->idct_put = ff_simple_idct_put_armv6; |
| 78 c->idct_add = ff_simple_idct_add_armv6; | |
| 79 c->idct = ff_simple_idct_armv6; | |
| 80 c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM; | |
| 10359 | 81 } |
| 10372 | 82 |
|
11108
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
83 c->put_pixels_tab[0][0] = ff_put_pixels16_armv6; |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
84 c->put_pixels_tab[0][1] = ff_put_pixels16_x2_armv6; |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
85 c->put_pixels_tab[0][2] = ff_put_pixels16_y2_armv6; |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
86 /* c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_armv6; */ |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
87 c->put_pixels_tab[1][0] = ff_put_pixels8_armv6; |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
88 c->put_pixels_tab[1][1] = ff_put_pixels8_x2_armv6; |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
89 c->put_pixels_tab[1][2] = ff_put_pixels8_y2_armv6; |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
90 /* c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_armv6; */ |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
91 |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
92 c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_armv6; |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
93 c->put_no_rnd_pixels_tab[0][1] = ff_put_pixels16_x2_no_rnd_armv6; |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
94 c->put_no_rnd_pixels_tab[0][2] = ff_put_pixels16_y2_no_rnd_armv6; |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
95 /* c->put_no_rnd_pixels_tab[0][3] = ff_put_pixels16_xy2_no_rnd_armv6; */ |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
96 c->put_no_rnd_pixels_tab[1][0] = ff_put_pixels8_armv6; |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
97 c->put_no_rnd_pixels_tab[1][1] = ff_put_pixels8_x2_no_rnd_armv6; |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
98 c->put_no_rnd_pixels_tab[1][2] = ff_put_pixels8_y2_no_rnd_armv6; |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
99 /* c->put_no_rnd_pixels_tab[1][3] = ff_put_pixels8_xy2_no_rnd_armv6; */ |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
100 |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
101 c->avg_pixels_tab[0][0] = ff_avg_pixels16_armv6; |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
102 c->avg_pixels_tab[1][0] = ff_avg_pixels8_armv6; |
|
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
103 |
| 10372 | 104 c->add_pixels_clamped = ff_add_pixels_clamped_armv6; |
| 11113 | 105 c->get_pixels = ff_get_pixels_armv6; |
| 11114 | 106 c->diff_pixels = ff_diff_pixels_armv6; |
| 11109 | 107 |
| 108 c->pix_abs[0][0] = ff_pix_abs16_armv6; | |
| 11110 | 109 c->pix_abs[0][1] = ff_pix_abs16_x2_armv6; |
| 11111 | 110 c->pix_abs[0][2] = ff_pix_abs16_y2_armv6; |
| 11109 | 111 |
| 11112 | 112 c->pix_abs[1][0] = ff_pix_abs8_armv6; |
| 113 | |
| 11109 | 114 c->sad[0] = ff_pix_abs16_armv6; |
| 11112 | 115 c->sad[1] = ff_pix_abs8_armv6; |
| 11115 | 116 |
| 117 c->sse[0] = ff_sse16_armv6; | |
| 11116 | 118 |
| 119 c->pix_norm1 = ff_pix_norm1_armv6; | |
| 11117 | 120 c->pix_sum = ff_pix_sum_armv6; |
| 7060 | 121 } |
