Mercurial > libavcodec.hg
annotate arm/dsputil_arm.S @ 12530:63edd10ad4bc libavcodec tip
Try to fix crashes introduced by r25218
r25218 made assumptions about the existence of past reference frames that
weren't necessarily true.
| author | darkshikari |
|---|---|
| date | Tue, 28 Sep 2010 09:06:22 +0000 |
| parents | 361a5fcb4393 |
| children |
| rev | line source |
|---|---|
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1 @ |
| 8359 | 2 @ ARMv4 optimized DSP utils |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
3 @ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp> |
|
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
4 @ |
|
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
5 @ This file is part of FFmpeg. |
|
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
6 @ |
|
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
7 @ FFmpeg is free software; you can redistribute it and/or |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
8 @ modify it under the terms of the GNU Lesser General Public |
|
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
9 @ License as published by the Free Software Foundation; either |
|
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
10 @ version 2.1 of the License, or (at your option) any later version. |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
11 @ |
|
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
12 @ FFmpeg is distributed in the hope that it will be useful, |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
13 @ but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
14 @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
15 @ Lesser General Public License for more details. |
|
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
16 @ |
|
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
17 @ You should have received a copy of the GNU Lesser General Public |
|
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
18 @ License along with FFmpeg; if not, write to the Free Software |
|
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
2967
diff
changeset
|
19 @ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
20 @ |
|
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
21 |
|
6528
28d0e7dac45d
pld instruction are used in libavcodec/armv4l/dsputil_arm_s.S which can be used
diego
parents:
5641
diff
changeset
|
22 #include "config.h" |
| 8069 | 23 #include "asm.S" |
|
6528
28d0e7dac45d
pld instruction are used in libavcodec/armv4l/dsputil_arm_s.S which can be used
diego
parents:
5641
diff
changeset
|
24 |
| 8070 | 25 preserve8 |
| 26 | |
| 8590 | 27 #if !HAVE_PLD |
|
6528
28d0e7dac45d
pld instruction are used in libavcodec/armv4l/dsputil_arm_s.S which can be used
diego
parents:
5641
diff
changeset
|
28 .macro pld reg |
|
28d0e7dac45d
pld instruction are used in libavcodec/armv4l/dsputil_arm_s.S which can be used
diego
parents:
5641
diff
changeset
|
29 .endm |
|
28d0e7dac45d
pld instruction are used in libavcodec/armv4l/dsputil_arm_s.S which can be used
diego
parents:
5641
diff
changeset
|
30 #endif |
|
28d0e7dac45d
pld instruction are used in libavcodec/armv4l/dsputil_arm_s.S which can be used
diego
parents:
5641
diff
changeset
|
31 |
| 8590 | 32 #if HAVE_ARMV5TE |
| 8070 | 33 function ff_prefetch_arm, export=1 |
| 10355 | 34 subs r2, r2, #1 |
| 35 pld [r0] | |
| 36 add r0, r0, r1 | |
| 37 bne ff_prefetch_arm | |
| 38 bx lr | |
| 11443 | 39 endfunc |
| 8070 | 40 #endif |
| 41 | |
| 10357 | 42 .macro ALIGN_QWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4 |
| 10355 | 43 mov \Rd0, \Rn0, lsr #(\shift * 8) |
| 44 mov \Rd1, \Rn1, lsr #(\shift * 8) | |
| 45 mov \Rd2, \Rn2, lsr #(\shift * 8) | |
| 46 mov \Rd3, \Rn3, lsr #(\shift * 8) | |
| 47 orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8) | |
| 48 orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8) | |
| 49 orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8) | |
| 50 orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8) | |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
51 .endm |
| 10357 | 52 .macro ALIGN_DWORD shift, R0, R1, R2 |
| 10355 | 53 mov \R0, \R0, lsr #(\shift * 8) |
| 54 orr \R0, \R0, \R1, lsl #(32 - \shift * 8) | |
| 55 mov \R1, \R1, lsr #(\shift * 8) | |
| 56 orr \R1, \R1, \R2, lsl #(32 - \shift * 8) | |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
57 .endm |
| 10357 | 58 .macro ALIGN_DWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2 |
| 10355 | 59 mov \Rdst0, \Rsrc0, lsr #(\shift * 8) |
| 60 mov \Rdst1, \Rsrc1, lsr #(\shift * 8) | |
| 61 orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8)) | |
| 62 orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8)) | |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
63 .endm |
|
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
64 |
|
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
65 .macro RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask |
|
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
66 @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1) |
|
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
67 @ Rmask = 0xFEFEFEFE |
|
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
68 @ Rn = destroy |
| 10355 | 69 eor \Rd0, \Rn0, \Rm0 |
| 70 eor \Rd1, \Rn1, \Rm1 | |
| 71 orr \Rn0, \Rn0, \Rm0 | |
| 72 orr \Rn1, \Rn1, \Rm1 | |
| 73 and \Rd0, \Rd0, \Rmask | |
| 74 and \Rd1, \Rd1, \Rmask | |
| 75 sub \Rd0, \Rn0, \Rd0, lsr #1 | |
| 76 sub \Rd1, \Rn1, \Rd1, lsr #1 | |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
77 .endm |
|
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
78 |
|
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
79 .macro NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask |
|
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
80 @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1) |
|
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
81 @ Rmask = 0xFEFEFEFE |
|
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
82 @ Rn = destroy |
| 10355 | 83 eor \Rd0, \Rn0, \Rm0 |
| 84 eor \Rd1, \Rn1, \Rm1 | |
| 85 and \Rn0, \Rn0, \Rm0 | |
| 86 and \Rn1, \Rn1, \Rm1 | |
| 87 and \Rd0, \Rd0, \Rmask | |
| 88 and \Rd1, \Rd1, \Rmask | |
| 89 add \Rd0, \Rn0, \Rd0, lsr #1 | |
| 90 add \Rd1, \Rn1, \Rd1, lsr #1 | |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
91 .endm |
|
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
92 |
| 8682 | 93 .macro JMP_ALIGN tmp, reg |
| 10355 | 94 ands \tmp, \reg, #3 |
| 95 bic \reg, \reg, #3 | |
| 96 beq 1f | |
| 97 subs \tmp, \tmp, #1 | |
| 98 beq 2f | |
| 99 subs \tmp, \tmp, #1 | |
| 100 beq 3f | |
| 8680 | 101 b 4f |
| 102 .endm | |
| 103 | |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
104 @ ---------------------------------------------------------------- |
|
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
105 .align 5 |
| 10363 | 106 function ff_put_pixels16_arm, export=1 |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
107 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
|
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
108 @ block = word aligned, pixles = unaligned |
| 10355 | 109 pld [r1] |
| 10356 | 110 push {r4-r11, lr} |
| 10355 | 111 JMP_ALIGN r5, r1 |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
112 1: |
| 10356 | 113 ldm r1, {r4-r7} |
| 10355 | 114 add r1, r1, r2 |
| 10356 | 115 stm r0, {r4-r7} |
| 10355 | 116 pld [r1] |
| 117 subs r3, r3, #1 | |
| 118 add r0, r0, r2 | |
| 119 bne 1b | |
| 10356 | 120 pop {r4-r11, pc} |
|
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
121 .align 5 |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
122 2: |
| 10356 | 123 ldm r1, {r4-r8} |
| 10355 | 124 add r1, r1, r2 |
| 10357 | 125 ALIGN_QWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8 |
| 10355 | 126 pld [r1] |
| 127 subs r3, r3, #1 | |
| 10356 | 128 stm r0, {r9-r12} |
| 10355 | 129 add r0, r0, r2 |
| 130 bne 2b | |
| 10356 | 131 pop {r4-r11, pc} |
|
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
132 .align 5 |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
133 3: |
| 10356 | 134 ldm r1, {r4-r8} |
| 10355 | 135 add r1, r1, r2 |
| 10357 | 136 ALIGN_QWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8 |
| 10355 | 137 pld [r1] |
| 138 subs r3, r3, #1 | |
| 10356 | 139 stm r0, {r9-r12} |
| 10355 | 140 add r0, r0, r2 |
| 141 bne 3b | |
| 10356 | 142 pop {r4-r11, pc} |
|
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
143 .align 5 |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
144 4: |
| 10356 | 145 ldm r1, {r4-r8} |
| 10355 | 146 add r1, r1, r2 |
| 10357 | 147 ALIGN_QWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8 |
| 10355 | 148 pld [r1] |
| 149 subs r3, r3, #1 | |
| 10356 | 150 stm r0, {r9-r12} |
| 10355 | 151 add r0, r0, r2 |
| 152 bne 4b | |
| 10356 | 153 pop {r4-r11,pc} |
| 11443 | 154 endfunc |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
155 |
|
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
156 @ ---------------------------------------------------------------- |
|
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
157 .align 5 |
| 10363 | 158 function ff_put_pixels8_arm, export=1 |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
159 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
|
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
160 @ block = word aligned, pixles = unaligned |
| 10355 | 161 pld [r1] |
| 10356 | 162 push {r4-r5,lr} |
| 10355 | 163 JMP_ALIGN r5, r1 |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
164 1: |
| 10356 | 165 ldm r1, {r4-r5} |
| 10355 | 166 add r1, r1, r2 |
| 167 subs r3, r3, #1 | |
| 168 pld [r1] | |
| 10356 | 169 stm r0, {r4-r5} |
| 10355 | 170 add r0, r0, r2 |
| 171 bne 1b | |
| 10356 | 172 pop {r4-r5,pc} |
|
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
173 .align 5 |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
174 2: |
| 10356 | 175 ldm r1, {r4-r5, r12} |
| 10355 | 176 add r1, r1, r2 |
| 10357 | 177 ALIGN_DWORD 1, r4, r5, r12 |
| 10355 | 178 pld [r1] |
| 179 subs r3, r3, #1 | |
| 10356 | 180 stm r0, {r4-r5} |
| 10355 | 181 add r0, r0, r2 |
| 182 bne 2b | |
| 10356 | 183 pop {r4-r5,pc} |
|
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
184 .align 5 |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
185 3: |
| 10356 | 186 ldm r1, {r4-r5, r12} |
| 10355 | 187 add r1, r1, r2 |
| 10357 | 188 ALIGN_DWORD 2, r4, r5, r12 |
| 10355 | 189 pld [r1] |
| 190 subs r3, r3, #1 | |
| 10356 | 191 stm r0, {r4-r5} |
| 10355 | 192 add r0, r0, r2 |
| 193 bne 3b | |
| 10356 | 194 pop {r4-r5,pc} |
|
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
195 .align 5 |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
196 4: |
| 10356 | 197 ldm r1, {r4-r5, r12} |
| 10355 | 198 add r1, r1, r2 |
| 10357 | 199 ALIGN_DWORD 3, r4, r5, r12 |
| 10355 | 200 pld [r1] |
| 201 subs r3, r3, #1 | |
| 10356 | 202 stm r0, {r4-r5} |
| 10355 | 203 add r0, r0, r2 |
| 204 bne 4b | |
| 10356 | 205 pop {r4-r5,pc} |
| 11443 | 206 endfunc |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
207 |
|
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
208 @ ---------------------------------------------------------------- |
|
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
209 .align 5 |
| 10363 | 210 function ff_put_pixels8_x2_arm, export=1 |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
211 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
|
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
212 @ block = word aligned, pixles = unaligned |
| 10355 | 213 pld [r1] |
| 10356 | 214 push {r4-r10,lr} |
| 10355 | 215 ldr r12, =0xfefefefe |
| 216 JMP_ALIGN r5, r1 | |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
217 1: |
| 10356 | 218 ldm r1, {r4-r5, r10} |
| 10355 | 219 add r1, r1, r2 |
| 10357 | 220 ALIGN_DWORD_D 1, r6, r7, r4, r5, r10 |
| 10355 | 221 pld [r1] |
| 222 RND_AVG32 r8, r9, r4, r5, r6, r7, r12 | |
| 223 subs r3, r3, #1 | |
| 10356 | 224 stm r0, {r8-r9} |
| 10355 | 225 add r0, r0, r2 |
| 226 bne 1b | |
| 10356 | 227 pop {r4-r10,pc} |
|
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
228 .align 5 |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
229 2: |
| 10356 | 230 ldm r1, {r4-r5, r10} |
| 10355 | 231 add r1, r1, r2 |
| 10357 | 232 ALIGN_DWORD_D 1, r6, r7, r4, r5, r10 |
| 233 ALIGN_DWORD_D 2, r8, r9, r4, r5, r10 | |
| 10355 | 234 pld [r1] |
| 235 RND_AVG32 r4, r5, r6, r7, r8, r9, r12 | |
| 236 subs r3, r3, #1 | |
| 10356 | 237 stm r0, {r4-r5} |
| 10355 | 238 add r0, r0, r2 |
| 239 bne 2b | |
| 10356 | 240 pop {r4-r10,pc} |
|
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
241 .align 5 |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
242 3: |
| 10356 | 243 ldm r1, {r4-r5, r10} |
| 10355 | 244 add r1, r1, r2 |
| 10357 | 245 ALIGN_DWORD_D 2, r6, r7, r4, r5, r10 |
| 246 ALIGN_DWORD_D 3, r8, r9, r4, r5, r10 | |
| 10355 | 247 pld [r1] |
| 248 RND_AVG32 r4, r5, r6, r7, r8, r9, r12 | |
| 249 subs r3, r3, #1 | |
| 10356 | 250 stm r0, {r4-r5} |
| 10355 | 251 add r0, r0, r2 |
| 252 bne 3b | |
| 10356 | 253 pop {r4-r10,pc} |
|
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
254 .align 5 |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
255 4: |
| 10356 | 256 ldm r1, {r4-r5, r10} |
| 10355 | 257 add r1, r1, r2 |
| 10357 | 258 ALIGN_DWORD_D 3, r6, r7, r4, r5, r10 |
| 10355 | 259 pld [r1] |
| 260 RND_AVG32 r8, r9, r6, r7, r5, r10, r12 | |
| 261 subs r3, r3, #1 | |
| 10356 | 262 stm r0, {r8-r9} |
| 10355 | 263 add r0, r0, r2 |
| 264 bne 4b | |
| 10356 | 265 pop {r4-r10,pc} |
| 11443 | 266 endfunc |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
267 |
|
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
268 .align 5 |
| 10363 | 269 function ff_put_no_rnd_pixels8_x2_arm, export=1 |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
270 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
|
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
271 @ block = word aligned, pixles = unaligned |
| 10355 | 272 pld [r1] |
| 10356 | 273 push {r4-r10,lr} |
| 10355 | 274 ldr r12, =0xfefefefe |
| 275 JMP_ALIGN r5, r1 | |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
276 1: |
| 10356 | 277 ldm r1, {r4-r5, r10} |
| 10355 | 278 add r1, r1, r2 |
| 10357 | 279 ALIGN_DWORD_D 1, r6, r7, r4, r5, r10 |
| 10355 | 280 pld [r1] |
| 281 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12 | |
| 282 subs r3, r3, #1 | |
| 10356 | 283 stm r0, {r8-r9} |
| 10355 | 284 add r0, r0, r2 |
| 285 bne 1b | |
| 10356 | 286 pop {r4-r10,pc} |
|
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
287 .align 5 |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
288 2: |
| 10356 | 289 ldm r1, {r4-r5, r10} |
| 10355 | 290 add r1, r1, r2 |
| 10357 | 291 ALIGN_DWORD_D 1, r6, r7, r4, r5, r10 |
| 292 ALIGN_DWORD_D 2, r8, r9, r4, r5, r10 | |
| 10355 | 293 pld [r1] |
| 294 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12 | |
| 295 subs r3, r3, #1 | |
| 10356 | 296 stm r0, {r4-r5} |
| 10355 | 297 add r0, r0, r2 |
| 298 bne 2b | |
| 10356 | 299 pop {r4-r10,pc} |
|
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
300 .align 5 |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
301 3: |
| 10356 | 302 ldm r1, {r4-r5, r10} |
| 10355 | 303 add r1, r1, r2 |
| 10357 | 304 ALIGN_DWORD_D 2, r6, r7, r4, r5, r10 |
| 305 ALIGN_DWORD_D 3, r8, r9, r4, r5, r10 | |
| 10355 | 306 pld [r1] |
| 307 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12 | |
| 308 subs r3, r3, #1 | |
| 10356 | 309 stm r0, {r4-r5} |
| 10355 | 310 add r0, r0, r2 |
| 311 bne 3b | |
| 10356 | 312 pop {r4-r10,pc} |
|
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
313 .align 5 |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
314 4: |
| 10356 | 315 ldm r1, {r4-r5, r10} |
| 10355 | 316 add r1, r1, r2 |
| 10357 | 317 ALIGN_DWORD_D 3, r6, r7, r4, r5, r10 |
| 10355 | 318 pld [r1] |
| 319 NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12 | |
| 320 subs r3, r3, #1 | |
| 10356 | 321 stm r0, {r8-r9} |
| 10355 | 322 add r0, r0, r2 |
| 323 bne 4b | |
| 10356 | 324 pop {r4-r10,pc} |
| 11443 | 325 endfunc |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
326 |
|
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
327 |
|
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
328 @ ---------------------------------------------------------------- |
|
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
329 .align 5 |
| 10363 | 330 function ff_put_pixels8_y2_arm, export=1 |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
331 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
|
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
332 @ block = word aligned, pixles = unaligned |
| 10355 | 333 pld [r1] |
| 10356 | 334 push {r4-r11,lr} |
| 10355 | 335 mov r3, r3, lsr #1 |
| 336 ldr r12, =0xfefefefe | |
| 337 JMP_ALIGN r5, r1 | |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
338 1: |
| 10356 | 339 ldm r1, {r4-r5} |
| 10355 | 340 add r1, r1, r2 |
| 10356 | 341 6: ldm r1, {r6-r7} |
| 10355 | 342 add r1, r1, r2 |
| 343 pld [r1] | |
| 344 RND_AVG32 r8, r9, r4, r5, r6, r7, r12 | |
| 10356 | 345 ldm r1, {r4-r5} |
| 10355 | 346 add r1, r1, r2 |
| 10356 | 347 stm r0, {r8-r9} |
| 10355 | 348 add r0, r0, r2 |
| 349 pld [r1] | |
| 350 RND_AVG32 r8, r9, r6, r7, r4, r5, r12 | |
| 351 subs r3, r3, #1 | |
| 10356 | 352 stm r0, {r8-r9} |
| 10355 | 353 add r0, r0, r2 |
| 354 bne 6b | |
| 10356 | 355 pop {r4-r11,pc} |
|
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
356 .align 5 |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
357 2: |
| 10356 | 358 ldm r1, {r4-r6} |
| 10355 | 359 add r1, r1, r2 |
| 360 pld [r1] | |
| 10357 | 361 ALIGN_DWORD 1, r4, r5, r6 |
| 10356 | 362 6: ldm r1, {r7-r9} |
| 10355 | 363 add r1, r1, r2 |
| 364 pld [r1] | |
| 10357 | 365 ALIGN_DWORD 1, r7, r8, r9 |
| 10355 | 366 RND_AVG32 r10, r11, r4, r5, r7, r8, r12 |
| 10356 | 367 stm r0, {r10-r11} |
| 10355 | 368 add r0, r0, r2 |
| 10356 | 369 ldm r1, {r4-r6} |
| 10355 | 370 add r1, r1, r2 |
| 371 pld [r1] | |
| 10357 | 372 ALIGN_DWORD 1, r4, r5, r6 |
| 10355 | 373 subs r3, r3, #1 |
| 374 RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | |
| 10356 | 375 stm r0, {r10-r11} |
| 10355 | 376 add r0, r0, r2 |
| 377 bne 6b | |
| 10356 | 378 pop {r4-r11,pc} |
|
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
379 .align 5 |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
380 3: |
| 10356 | 381 ldm r1, {r4-r6} |
| 10355 | 382 add r1, r1, r2 |
| 383 pld [r1] | |
| 10357 | 384 ALIGN_DWORD 2, r4, r5, r6 |
| 10356 | 385 6: ldm r1, {r7-r9} |
| 10355 | 386 add r1, r1, r2 |
| 387 pld [r1] | |
| 10357 | 388 ALIGN_DWORD 2, r7, r8, r9 |
| 10355 | 389 RND_AVG32 r10, r11, r4, r5, r7, r8, r12 |
| 10356 | 390 stm r0, {r10-r11} |
| 10355 | 391 add r0, r0, r2 |
| 10356 | 392 ldm r1, {r4-r6} |
| 10355 | 393 add r1, r1, r2 |
| 394 pld [r1] | |
| 10357 | 395 ALIGN_DWORD 2, r4, r5, r6 |
| 10355 | 396 subs r3, r3, #1 |
| 397 RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | |
| 10356 | 398 stm r0, {r10-r11} |
| 10355 | 399 add r0, r0, r2 |
| 400 bne 6b | |
| 10356 | 401 pop {r4-r11,pc} |
|
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
402 .align 5 |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
403 4: |
| 10356 | 404 ldm r1, {r4-r6} |
| 10355 | 405 add r1, r1, r2 |
| 406 pld [r1] | |
| 10357 | 407 ALIGN_DWORD 3, r4, r5, r6 |
| 10356 | 408 6: ldm r1, {r7-r9} |
| 10355 | 409 add r1, r1, r2 |
| 410 pld [r1] | |
| 10357 | 411 ALIGN_DWORD 3, r7, r8, r9 |
| 10355 | 412 RND_AVG32 r10, r11, r4, r5, r7, r8, r12 |
| 10356 | 413 stm r0, {r10-r11} |
| 10355 | 414 add r0, r0, r2 |
| 10356 | 415 ldm r1, {r4-r6} |
| 10355 | 416 add r1, r1, r2 |
| 417 pld [r1] | |
| 10357 | 418 ALIGN_DWORD 3, r4, r5, r6 |
| 10355 | 419 subs r3, r3, #1 |
| 420 RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | |
| 10356 | 421 stm r0, {r10-r11} |
| 10355 | 422 add r0, r0, r2 |
| 423 bne 6b | |
| 10356 | 424 pop {r4-r11,pc} |
| 11443 | 425 endfunc |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
426 |
|
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
427 .align 5 |
| 10363 | 428 function ff_put_no_rnd_pixels8_y2_arm, export=1 |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
429 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
|
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
430 @ block = word aligned, pixles = unaligned |
| 10355 | 431 pld [r1] |
| 10356 | 432 push {r4-r11,lr} |
| 10355 | 433 mov r3, r3, lsr #1 |
| 434 ldr r12, =0xfefefefe | |
| 435 JMP_ALIGN r5, r1 | |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
436 1: |
| 10356 | 437 ldm r1, {r4-r5} |
| 10355 | 438 add r1, r1, r2 |
| 10356 | 439 6: ldm r1, {r6-r7} |
| 10355 | 440 add r1, r1, r2 |
| 441 pld [r1] | |
| 442 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12 | |
| 10356 | 443 ldm r1, {r4-r5} |
| 10355 | 444 add r1, r1, r2 |
| 10356 | 445 stm r0, {r8-r9} |
| 10355 | 446 add r0, r0, r2 |
| 447 pld [r1] | |
| 448 NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12 | |
| 449 subs r3, r3, #1 | |
| 10356 | 450 stm r0, {r8-r9} |
| 10355 | 451 add r0, r0, r2 |
| 452 bne 6b | |
| 10356 | 453 pop {r4-r11,pc} |
|
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
454 .align 5 |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
455 2: |
| 10356 | 456 ldm r1, {r4-r6} |
| 10355 | 457 add r1, r1, r2 |
| 458 pld [r1] | |
| 10357 | 459 ALIGN_DWORD 1, r4, r5, r6 |
| 10356 | 460 6: ldm r1, {r7-r9} |
| 10355 | 461 add r1, r1, r2 |
| 462 pld [r1] | |
| 10357 | 463 ALIGN_DWORD 1, r7, r8, r9 |
| 10355 | 464 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 |
| 10356 | 465 stm r0, {r10-r11} |
| 10355 | 466 add r0, r0, r2 |
| 10356 | 467 ldm r1, {r4-r6} |
| 10355 | 468 add r1, r1, r2 |
| 469 pld [r1] | |
| 10357 | 470 ALIGN_DWORD 1, r4, r5, r6 |
| 10355 | 471 subs r3, r3, #1 |
| 472 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | |
| 10356 | 473 stm r0, {r10-r11} |
| 10355 | 474 add r0, r0, r2 |
| 475 bne 6b | |
| 10356 | 476 pop {r4-r11,pc} |
|
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
477 .align 5 |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
478 3: |
| 10356 | 479 ldm r1, {r4-r6} |
| 10355 | 480 add r1, r1, r2 |
| 481 pld [r1] | |
| 10357 | 482 ALIGN_DWORD 2, r4, r5, r6 |
| 10356 | 483 6: ldm r1, {r7-r9} |
| 10355 | 484 add r1, r1, r2 |
| 485 pld [r1] | |
| 10357 | 486 ALIGN_DWORD 2, r7, r8, r9 |
| 10355 | 487 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 |
| 10356 | 488 stm r0, {r10-r11} |
| 10355 | 489 add r0, r0, r2 |
| 10356 | 490 ldm r1, {r4-r6} |
| 10355 | 491 add r1, r1, r2 |
| 492 pld [r1] | |
| 10357 | 493 ALIGN_DWORD 2, r4, r5, r6 |
| 10355 | 494 subs r3, r3, #1 |
| 495 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | |
| 10356 | 496 stm r0, {r10-r11} |
| 10355 | 497 add r0, r0, r2 |
| 498 bne 6b | |
| 10356 | 499 pop {r4-r11,pc} |
|
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
500 .align 5 |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
501 4: |
| 10356 | 502 ldm r1, {r4-r6} |
| 10355 | 503 add r1, r1, r2 |
| 504 pld [r1] | |
| 10357 | 505 ALIGN_DWORD 3, r4, r5, r6 |
| 10356 | 506 6: ldm r1, {r7-r9} |
| 10355 | 507 add r1, r1, r2 |
| 508 pld [r1] | |
| 10357 | 509 ALIGN_DWORD 3, r7, r8, r9 |
| 10355 | 510 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 |
| 10356 | 511 stm r0, {r10-r11} |
| 10355 | 512 add r0, r0, r2 |
| 10356 | 513 ldm r1, {r4-r6} |
| 10355 | 514 add r1, r1, r2 |
| 515 pld [r1] | |
| 10357 | 516 ALIGN_DWORD 3, r4, r5, r6 |
| 10355 | 517 subs r3, r3, #1 |
| 518 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | |
| 10356 | 519 stm r0, {r10-r11} |
| 10355 | 520 add r0, r0, r2 |
| 521 bne 6b | |
| 10356 | 522 pop {r4-r11,pc} |
| 11443 | 523 endfunc |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
524 |
| 8679 | 525 .ltorg |
| 526 | |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
527 @ ---------------------------------------------------------------- |
| 8679 | 528 .macro RND_XY2_IT align, rnd |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
529 @ l1= (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202) |
|
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
530 @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2) |
|
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
531 .if \align == 0 |
| 10356 | 532 ldm r1, {r6-r8} |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
533 .elseif \align == 3 |
| 10356 | 534 ldm r1, {r5-r7} |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
535 .else |
| 10356 | 536 ldm r1, {r8-r10} |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
537 .endif |
| 10355 | 538 add r1, r1, r2 |
| 539 pld [r1] | |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
540 .if \align == 0 |
| 10357 | 541 ALIGN_DWORD_D 1, r4, r5, r6, r7, r8 |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
542 .elseif \align == 1 |
| 10357 | 543 ALIGN_DWORD_D 1, r4, r5, r8, r9, r10 |
| 544 ALIGN_DWORD_D 2, r6, r7, r8, r9, r10 | |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
545 .elseif \align == 2 |
| 10357 | 546 ALIGN_DWORD_D 2, r4, r5, r8, r9, r10 |
| 547 ALIGN_DWORD_D 3, r6, r7, r8, r9, r10 | |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
548 .elseif \align == 3 |
| 10357 | 549 ALIGN_DWORD_D 3, r4, r5, r5, r6, r7 |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
550 .endif |
| 10355 | 551 ldr r14, =0x03030303 |
| 552 tst r3, #1 | |
| 553 and r8, r4, r14 | |
| 554 and r9, r5, r14 | |
| 555 and r10, r6, r14 | |
| 556 and r11, r7, r14 | |
| 557 andeq r14, r14, r14, \rnd #1 | |
| 558 add r8, r8, r10 | |
| 559 add r9, r9, r11 | |
| 560 ldr r12, =0xfcfcfcfc >> 2 | |
| 561 addeq r8, r8, r14 | |
| 562 addeq r9, r9, r14 | |
| 563 and r4, r12, r4, lsr #2 | |
| 564 and r5, r12, r5, lsr #2 | |
| 565 and r6, r12, r6, lsr #2 | |
| 566 and r7, r12, r7, lsr #2 | |
| 567 add r10, r4, r6 | |
| 568 add r11, r5, r7 | |
| 569 subs r3, r3, #1 | |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
570 .endm |
|
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
571 |
| 8679 | 572 .macro RND_XY2_EXPAND align, rnd |
| 10355 | 573 RND_XY2_IT \align, \rnd |
| 10356 | 574 6: push {r8-r11} |
| 10355 | 575 RND_XY2_IT \align, \rnd |
| 10356 | 576 pop {r4-r7} |
| 10355 | 577 add r4, r4, r8 |
| 578 add r5, r5, r9 | |
| 579 ldr r14, =0x0f0f0f0f | |
| 580 add r6, r6, r10 | |
| 581 add r7, r7, r11 | |
| 582 and r4, r14, r4, lsr #2 | |
| 583 and r5, r14, r5, lsr #2 | |
| 584 add r4, r4, r6 | |
| 585 add r5, r5, r7 | |
| 10356 | 586 stm r0, {r4-r5} |
| 10355 | 587 add r0, r0, r2 |
| 588 bge 6b | |
| 10356 | 589 pop {r4-r11,pc} |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
590 .endm |
|
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
591 |
|
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
592 .align 5 |
| 10363 | 593 function ff_put_pixels8_xy2_arm, export=1 |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
594 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
|
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
595 @ block = word aligned, pixles = unaligned |
| 10355 | 596 pld [r1] |
| 10356 | 597 push {r4-r11,lr} @ R14 is also called LR |
| 10355 | 598 JMP_ALIGN r5, r1 |
| 10371 | 599 1: RND_XY2_EXPAND 0, lsl |
| 600 .align 5 | |
| 601 2: RND_XY2_EXPAND 1, lsl | |
|
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
602 .align 5 |
| 10371 | 603 3: RND_XY2_EXPAND 2, lsl |
|
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
604 .align 5 |
| 10371 | 605 4: RND_XY2_EXPAND 3, lsl |
| 11443 | 606 endfunc |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
607 |
|
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
608 .align 5 |
| 10363 | 609 function ff_put_no_rnd_pixels8_xy2_arm, export=1 |
|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
610 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
|
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
611 @ block = word aligned, pixles = unaligned |
| 10355 | 612 pld [r1] |
| 10356 | 613 push {r4-r11,lr} |
| 10355 | 614 JMP_ALIGN r5, r1 |
| 10371 | 615 1: RND_XY2_EXPAND 0, lsr |
| 616 .align 5 | |
| 617 2: RND_XY2_EXPAND 1, lsr | |
|
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
618 .align 5 |
| 10371 | 619 3: RND_XY2_EXPAND 2, lsr |
|
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
620 .align 5 |
| 10371 | 621 4: RND_XY2_EXPAND 3, lsr |
| 11443 | 622 endfunc |
| 8072 | 623 |
|
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
624 .align 5 |
| 10374 | 625 @ void ff_add_pixels_clamped_arm(int16_t *block, uint8_t *dest, int stride) |
| 626 function ff_add_pixels_clamped_arm, export=1 | |
| 8072 | 627 push {r4-r10} |
| 628 mov r10, #8 | |
| 629 1: | |
| 630 ldr r4, [r1] /* load dest */ | |
| 631 /* block[0] and block[1]*/ | |
| 632 ldrsh r5, [r0] | |
| 633 ldrsh r7, [r0, #2] | |
| 634 and r6, r4, #0xFF | |
| 635 and r8, r4, #0xFF00 | |
| 636 add r6, r5, r6 | |
| 637 add r8, r7, r8, lsr #8 | |
| 638 mvn r5, r5 | |
| 639 mvn r7, r7 | |
| 640 tst r6, #0x100 | |
| 641 movne r6, r5, lsr #24 | |
| 642 tst r8, #0x100 | |
| 643 movne r8, r7, lsr #24 | |
| 644 mov r9, r6 | |
| 645 ldrsh r5, [r0, #4] /* moved form [A] */ | |
| 10355 | 646 orr r9, r9, r8, lsl #8 |
| 8072 | 647 /* block[2] and block[3] */ |
| 648 /* [A] */ | |
| 649 ldrsh r7, [r0, #6] | |
| 650 and r6, r4, #0xFF0000 | |
| 651 and r8, r4, #0xFF000000 | |
| 10355 | 652 add r6, r5, r6, lsr #16 |
| 653 add r8, r7, r8, lsr #24 | |
| 8072 | 654 mvn r5, r5 |
| 655 mvn r7, r7 | |
| 656 tst r6, #0x100 | |
| 657 movne r6, r5, lsr #24 | |
| 658 tst r8, #0x100 | |
| 659 movne r8, r7, lsr #24 | |
| 10355 | 660 orr r9, r9, r6, lsl #16 |
| 8072 | 661 ldr r4, [r1, #4] /* moved form [B] */ |
| 10355 | 662 orr r9, r9, r8, lsl #24 |
| 8072 | 663 /* store dest */ |
| 664 ldrsh r5, [r0, #8] /* moved form [C] */ | |
| 665 str r9, [r1] | |
| 666 | |
| 667 /* load dest */ | |
| 668 /* [B] */ | |
| 669 /* block[4] and block[5] */ | |
| 670 /* [C] */ | |
| 671 ldrsh r7, [r0, #10] | |
| 672 and r6, r4, #0xFF | |
| 673 and r8, r4, #0xFF00 | |
| 674 add r6, r5, r6 | |
| 10355 | 675 add r8, r7, r8, lsr #8 |
| 8072 | 676 mvn r5, r5 |
| 677 mvn r7, r7 | |
| 678 tst r6, #0x100 | |
| 679 movne r6, r5, lsr #24 | |
| 680 tst r8, #0x100 | |
| 681 movne r8, r7, lsr #24 | |
| 682 mov r9, r6 | |
| 683 ldrsh r5, [r0, #12] /* moved from [D] */ | |
| 10355 | 684 orr r9, r9, r8, lsl #8 |
| 8072 | 685 /* block[6] and block[7] */ |
| 686 /* [D] */ | |
| 687 ldrsh r7, [r0, #14] | |
| 688 and r6, r4, #0xFF0000 | |
| 689 and r8, r4, #0xFF000000 | |
| 10355 | 690 add r6, r5, r6, lsr #16 |
| 691 add r8, r7, r8, lsr #24 | |
| 8072 | 692 mvn r5, r5 |
| 693 mvn r7, r7 | |
| 694 tst r6, #0x100 | |
| 695 movne r6, r5, lsr #24 | |
| 696 tst r8, #0x100 | |
| 697 movne r8, r7, lsr #24 | |
| 10355 | 698 orr r9, r9, r6, lsl #16 |
| 8072 | 699 add r0, r0, #16 /* moved from [E] */ |
| 10355 | 700 orr r9, r9, r8, lsl #24 |
| 8072 | 701 subs r10, r10, #1 /* moved from [F] */ |
| 702 /* store dest */ | |
| 703 str r9, [r1, #4] | |
| 704 | |
| 705 /* [E] */ | |
| 706 /* [F] */ | |
| 707 add r1, r1, r2 | |
| 708 bne 1b | |
| 709 | |
| 710 pop {r4-r10} | |
| 711 bx lr | |
| 11443 | 712 endfunc |
