annotate src/ffmpeg/libavcodec/sparc/dsputil_vis.c @ 815:23a5aa2c545c trunk

[svn] - bork bork bork
author nenolod
date Mon, 12 Mar 2007 13:06:30 -0700
parents e8776388b02a
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
808
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1 /*
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2 * dsputil_vis.c
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3 * Copyright (C) 2003 David S. Miller <davem@redhat.com>
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4 *
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
5 * This file is part of FFmpeg.
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
6 *
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
7 * FFmpeg is free software; you can redistribute it and/or
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
8 * modify it under the terms of the GNU Lesser General Public
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
9 * License as published by the Free Software Foundation; either
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
10 * version 2.1 of the License, or (at your option) any later version.
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
11 *
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
12 * FFmpeg is distributed in the hope that it will be useful,
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
15 * Lesser General Public License for more details.
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
16 *
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
17 * You should have received a copy of the GNU Lesser General Public
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
18 * License along with FFmpeg; if not, write to the Free Software
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
20 */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
21
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
22 /* The *no_round* functions have been added by James A. Morrison, 2003,2004.
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
23 The vis code from libmpeg2 was adapted for ffmpeg by James A. Morrison.
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
24 */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
25
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
26 #include "config.h"
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
27
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
28 #ifdef ARCH_SPARC
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
29
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
30 #include <inttypes.h>
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
31 #include <signal.h>
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
32 #include <setjmp.h>
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
33
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
34 #include "../dsputil.h"
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
35
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
36 #include "vis.h"
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
37
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
38 /* The trick used in some of this file is the formula from the MMX
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
39 * motion comp code, which is:
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
40 *
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
41 * (x+y+1)>>1 == (x|y)-((x^y)>>1)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
42 *
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
43 * This allows us to average 8 bytes at a time in a 64-bit FPU reg.
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
44 * We avoid overflows by masking before we do the shift, and we
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
45 * implement the shift by multiplying by 1/2 using mul8x16. So in
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
46 * VIS this is (assume 'x' is in f0, 'y' is in f2, a repeating mask
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
47 * of '0xfe' is in f4, a repeating mask of '0x7f' is in f6, and
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
48 * the value 0x80808080 is in f8):
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
49 *
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
50 * fxor f0, f2, f10
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
51 * fand f10, f4, f10
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
52 * fmul8x16 f8, f10, f10
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
53 * fand f10, f6, f10
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
54 * for f0, f2, f12
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
55 * fpsub16 f12, f10, f10
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
56 */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
57
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
58 #define ATTR_ALIGN(alignd) __attribute__ ((aligned(alignd)))
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
59
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
60 #define DUP4(x) {x, x, x, x}
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
61 #define DUP8(x) {x, x, x, x, x, x, x, x}
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
62 static const int16_t constants1[] ATTR_ALIGN(8) = DUP4 (1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
63 static const int16_t constants2[] ATTR_ALIGN(8) = DUP4 (2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
64 static const int16_t constants3[] ATTR_ALIGN(8) = DUP4 (3);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
65 static const int16_t constants6[] ATTR_ALIGN(8) = DUP4 (6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
66 static const int8_t constants_fe[] ATTR_ALIGN(8) = DUP8 (0xfe);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
67 static const int8_t constants_7f[] ATTR_ALIGN(8) = DUP8 (0x7f);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
68 static const int8_t constants128[] ATTR_ALIGN(8) = DUP8 (128);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
69 static const int16_t constants256_512[] ATTR_ALIGN(8) =
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
70 {256, 512, 256, 512};
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
71 static const int16_t constants256_1024[] ATTR_ALIGN(8) =
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
72 {256, 1024, 256, 1024};
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
73
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
74 #define REF_0 0
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
75 #define REF_0_1 1
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
76 #define REF_2 2
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
77 #define REF_2_1 3
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
78 #define REF_4 4
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
79 #define REF_4_1 5
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
80 #define REF_6 6
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
81 #define REF_6_1 7
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
82 #define REF_S0 8
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
83 #define REF_S0_1 9
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
84 #define REF_S2 10
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
85 #define REF_S2_1 11
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
86 #define REF_S4 12
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
87 #define REF_S4_1 13
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
88 #define REF_S6 14
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
89 #define REF_S6_1 15
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
90 #define DST_0 16
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
91 #define DST_1 17
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
92 #define DST_2 18
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
93 #define DST_3 19
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
94 #define CONST_1 20
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
95 #define CONST_2 20
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
96 #define CONST_3 20
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
97 #define CONST_6 20
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
98 #define MASK_fe 20
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
99 #define CONST_128 22
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
100 #define CONST_256 22
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
101 #define CONST_512 22
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
102 #define CONST_1024 22
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
103 #define TMP0 24
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
104 #define TMP1 25
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
105 #define TMP2 26
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
106 #define TMP3 27
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
107 #define TMP4 28
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
108 #define TMP5 29
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
109 #define ZERO 30
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
110 #define MASK_7f 30
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
111
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
112 #define TMP6 32
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
113 #define TMP8 34
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
114 #define TMP10 36
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
115 #define TMP12 38
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
116 #define TMP14 40
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
117 #define TMP16 42
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
118 #define TMP18 44
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
119 #define TMP20 46
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
120 #define TMP22 48
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
121 #define TMP24 50
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
122 #define TMP26 52
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
123 #define TMP28 54
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
124 #define TMP30 56
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
125 #define TMP32 58
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
126
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
127 static void MC_put_o_16_vis (uint8_t * dest, const uint8_t * _ref,
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
128 const int stride, int height)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
129 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
130 uint8_t *ref = (uint8_t *) _ref;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
131
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
132 ref = vis_alignaddr(ref);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
133 do { /* 5 cycles */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
134 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
135
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
136 vis_ld64_2(ref, 8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
137
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
138 vis_ld64_2(ref, 16, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
139 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
140
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
141 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
142 vis_st64(REF_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
143
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
144 vis_faligndata(TMP2, TMP4, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
145 vis_st64_2(REF_2, dest, 8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
146 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
147 } while (--height);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
148 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
149
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
150 static void MC_put_o_8_vis (uint8_t * dest, const uint8_t * _ref,
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
151 const int stride, int height)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
152 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
153 uint8_t *ref = (uint8_t *) _ref;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
154
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
155 ref = vis_alignaddr(ref);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
156 do { /* 4 cycles */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
157 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
158
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
159 vis_ld64(ref[8], TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
160 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
161
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
162 /* stall */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
163
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
164 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
165 vis_st64(REF_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
166 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
167 } while (--height);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
168 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
169
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
170
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
171 static void MC_avg_o_16_vis (uint8_t * dest, const uint8_t * _ref,
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
172 const int stride, int height)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
173 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
174 uint8_t *ref = (uint8_t *) _ref;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
175 int stride_8 = stride + 8;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
176
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
177 ref = vis_alignaddr(ref);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
178
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
179 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
180
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
181 vis_ld64(ref[8], TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
182
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
183 vis_ld64(ref[16], TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
184
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
185 vis_ld64(dest[0], DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
186
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
187 vis_ld64(dest[8], DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
188
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
189 vis_ld64(constants_fe[0], MASK_fe);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
190 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
191
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
192 vis_ld64(constants_7f[0], MASK_7f);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
193 vis_faligndata(TMP2, TMP4, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
194
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
195 vis_ld64(constants128[0], CONST_128);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
196
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
197 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
198 height = (height >> 1) - 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
199
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
200 do { /* 24 cycles */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
201 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
202 vis_xor(DST_0, REF_0, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
203
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
204 vis_ld64_2(ref, 8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
205 vis_and(TMP6, MASK_fe, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
206
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
207 vis_ld64_2(ref, 16, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
208 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
209 vis_mul8x16(CONST_128, TMP6, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
210 vis_xor(DST_2, REF_2, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
211
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
212 vis_and(TMP8, MASK_fe, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
213
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
214 vis_or(DST_0, REF_0, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
215 vis_ld64_2(dest, stride, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
216 vis_mul8x16(CONST_128, TMP8, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
217
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
218 vis_or(DST_2, REF_2, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
219 vis_ld64_2(dest, stride_8, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
220
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
221 vis_ld64(ref[0], TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
222 vis_and(TMP6, MASK_7f, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
223
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
224 vis_and(TMP8, MASK_7f, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
225
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
226 vis_psub16(TMP10, TMP6, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
227 vis_st64(TMP6, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
228
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
229 vis_psub16(TMP12, TMP8, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
230 vis_st64_2(TMP8, dest, 8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
231
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
232 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
233 vis_ld64_2(ref, 8, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
234 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
235
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
236 vis_ld64_2(ref, 16, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
237 vis_faligndata(TMP2, TMP4, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
238 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
239
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
240 vis_xor(DST_0, REF_0, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
241
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
242 vis_and(TMP20, MASK_fe, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
243
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
244 vis_xor(DST_2, REF_2, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
245 vis_mul8x16(CONST_128, TMP20, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
246
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
247 vis_and(TMP22, MASK_fe, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
248
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
249 vis_or(DST_0, REF_0, TMP24);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
250 vis_mul8x16(CONST_128, TMP22, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
251
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
252 vis_or(DST_2, REF_2, TMP26);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
253
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
254 vis_ld64_2(dest, stride, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
255 vis_faligndata(TMP14, TMP16, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
256
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
257 vis_ld64_2(dest, stride_8, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
258 vis_faligndata(TMP16, TMP18, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
259
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
260 vis_and(TMP20, MASK_7f, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
261
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
262 vis_and(TMP22, MASK_7f, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
263
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
264 vis_psub16(TMP24, TMP20, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
265 vis_st64(TMP20, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
266
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
267 vis_psub16(TMP26, TMP22, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
268 vis_st64_2(TMP22, dest, 8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
269 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
270 } while (--height);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
271
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
272 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
273 vis_xor(DST_0, REF_0, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
274
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
275 vis_ld64_2(ref, 8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
276 vis_and(TMP6, MASK_fe, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
277
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
278 vis_ld64_2(ref, 16, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
279 vis_mul8x16(CONST_128, TMP6, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
280 vis_xor(DST_2, REF_2, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
281
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
282 vis_and(TMP8, MASK_fe, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
283
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
284 vis_or(DST_0, REF_0, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
285 vis_ld64_2(dest, stride, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
286 vis_mul8x16(CONST_128, TMP8, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
287
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
288 vis_or(DST_2, REF_2, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
289 vis_ld64_2(dest, stride_8, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
290
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
291 vis_ld64(ref[0], TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
292 vis_and(TMP6, MASK_7f, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
293
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
294 vis_and(TMP8, MASK_7f, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
295
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
296 vis_psub16(TMP10, TMP6, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
297 vis_st64(TMP6, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
298
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
299 vis_psub16(TMP12, TMP8, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
300 vis_st64_2(TMP8, dest, 8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
301
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
302 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
303 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
304
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
305 vis_faligndata(TMP2, TMP4, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
306
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
307 vis_xor(DST_0, REF_0, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
308
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
309 vis_and(TMP20, MASK_fe, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
310
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
311 vis_xor(DST_2, REF_2, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
312 vis_mul8x16(CONST_128, TMP20, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
313
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
314 vis_and(TMP22, MASK_fe, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
315
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
316 vis_or(DST_0, REF_0, TMP24);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
317 vis_mul8x16(CONST_128, TMP22, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
318
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
319 vis_or(DST_2, REF_2, TMP26);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
320
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
321 vis_and(TMP20, MASK_7f, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
322
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
323 vis_and(TMP22, MASK_7f, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
324
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
325 vis_psub16(TMP24, TMP20, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
326 vis_st64(TMP20, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
327
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
328 vis_psub16(TMP26, TMP22, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
329 vis_st64_2(TMP22, dest, 8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
330 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
331
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
332 static void MC_avg_o_8_vis (uint8_t * dest, const uint8_t * _ref,
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
333 const int stride, int height)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
334 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
335 uint8_t *ref = (uint8_t *) _ref;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
336
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
337 ref = vis_alignaddr(ref);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
338
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
339 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
340
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
341 vis_ld64(ref[8], TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
342
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
343 vis_ld64(dest[0], DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
344
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
345 vis_ld64(constants_fe[0], MASK_fe);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
346
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
347 vis_ld64(constants_7f[0], MASK_7f);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
348 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
349
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
350 vis_ld64(constants128[0], CONST_128);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
351
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
352 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
353 height = (height >> 1) - 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
354
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
355 do { /* 12 cycles */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
356 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
357 vis_xor(DST_0, REF_0, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
358
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
359 vis_ld64(ref[8], TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
360 vis_and(TMP4, MASK_fe, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
361
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
362 vis_or(DST_0, REF_0, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
363 vis_ld64_2(dest, stride, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
364 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
365 vis_mul8x16(CONST_128, TMP4, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
366
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
367 vis_ld64(ref[0], TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
368 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
369
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
370 vis_ld64(ref[8], TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
371 vis_xor(DST_0, REF_0, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
372 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
373
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
374 vis_and(TMP0, MASK_fe, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
375
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
376 vis_and(TMP4, MASK_7f, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
377
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
378 vis_psub16(TMP6, TMP4, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
379 vis_st64(TMP4, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
380 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
381 vis_mul8x16(CONST_128, TMP0, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
382
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
383 vis_or(DST_0, REF_0, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
384 vis_ld64_2(dest, stride, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
385
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
386 vis_faligndata(TMP12, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
387
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
388 vis_and(TMP0, MASK_7f, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
389
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
390 vis_psub16(TMP6, TMP0, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
391 vis_st64(TMP4, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
392 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
393 } while (--height);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
394
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
395 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
396 vis_xor(DST_0, REF_0, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
397
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
398 vis_ld64(ref[8], TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
399 vis_and(TMP4, MASK_fe, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
400
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
401 vis_or(DST_0, REF_0, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
402 vis_ld64_2(dest, stride, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
403 vis_mul8x16(CONST_128, TMP4, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
404
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
405 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
406
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
407 vis_xor(DST_0, REF_0, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
408
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
409 vis_and(TMP0, MASK_fe, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
410
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
411 vis_and(TMP4, MASK_7f, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
412
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
413 vis_psub16(TMP6, TMP4, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
414 vis_st64(TMP4, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
415 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
416 vis_mul8x16(CONST_128, TMP0, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
417
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
418 vis_or(DST_0, REF_0, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
419
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
420 vis_and(TMP0, MASK_7f, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
421
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
422 vis_psub16(TMP6, TMP0, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
423 vis_st64(TMP4, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
424 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
425
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
426 static void MC_put_x_16_vis (uint8_t * dest, const uint8_t * _ref,
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
427 const int stride, int height)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
428 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
429 uint8_t *ref = (uint8_t *) _ref;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
430 unsigned long off = (unsigned long) ref & 0x7;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
431 unsigned long off_plus_1 = off + 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
432
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
433 ref = vis_alignaddr(ref);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
434
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
435 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
436
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
437 vis_ld64_2(ref, 8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
438
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
439 vis_ld64_2(ref, 16, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
440
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
441 vis_ld64(constants_fe[0], MASK_fe);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
442
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
443 vis_ld64(constants_7f[0], MASK_7f);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
444 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
445
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
446 vis_ld64(constants128[0], CONST_128);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
447 vis_faligndata(TMP2, TMP4, REF_4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
448
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
449 if (off != 0x7) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
450 vis_alignaddr_g0((void *)off_plus_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
451 vis_faligndata(TMP0, TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
452 vis_faligndata(TMP2, TMP4, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
453 } else {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
454 vis_src1(TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
455 vis_src1(TMP4, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
456 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
457
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
458 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
459 height = (height >> 1) - 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
460
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
461 do { /* 34 cycles */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
462 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
463 vis_xor(REF_0, REF_2, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
464
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
465 vis_ld64_2(ref, 8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
466 vis_xor(REF_4, REF_6, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
467
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
468 vis_ld64_2(ref, 16, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
469 vis_and(TMP6, MASK_fe, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
470 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
471
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
472 vis_ld64(ref[0], TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
473 vis_mul8x16(CONST_128, TMP6, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
474 vis_and(TMP8, MASK_fe, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
475
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
476 vis_ld64_2(ref, 8, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
477 vis_mul8x16(CONST_128, TMP8, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
478 vis_or(REF_0, REF_2, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
479
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
480 vis_ld64_2(ref, 16, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
481 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
482 vis_or(REF_4, REF_6, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
483
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
484 vis_alignaddr_g0((void *)off);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
485
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
486 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
487
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
488 vis_faligndata(TMP2, TMP4, REF_4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
489
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
490 if (off != 0x7) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
491 vis_alignaddr_g0((void *)off_plus_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
492 vis_faligndata(TMP0, TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
493 vis_faligndata(TMP2, TMP4, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
494 } else {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
495 vis_src1(TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
496 vis_src1(TMP4, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
497 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
498
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
499 vis_and(TMP6, MASK_7f, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
500
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
501 vis_and(TMP8, MASK_7f, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
502
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
503 vis_psub16(TMP10, TMP6, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
504 vis_st64(TMP6, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
505
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
506 vis_psub16(TMP12, TMP8, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
507 vis_st64_2(TMP8, dest, 8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
508 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
509
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
510 vis_xor(REF_0, REF_2, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
511
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
512 vis_xor(REF_4, REF_6, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
513
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
514 vis_and(TMP6, MASK_fe, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
515
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
516 vis_mul8x16(CONST_128, TMP6, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
517 vis_and(TMP8, MASK_fe, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
518
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
519 vis_mul8x16(CONST_128, TMP8, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
520 vis_or(REF_0, REF_2, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
521
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
522 vis_or(REF_4, REF_6, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
523
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
524 vis_alignaddr_g0((void *)off);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
525
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
526 vis_faligndata(TMP14, TMP16, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
527
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
528 vis_faligndata(TMP16, TMP18, REF_4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
529
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
530 if (off != 0x7) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
531 vis_alignaddr_g0((void *)off_plus_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
532 vis_faligndata(TMP14, TMP16, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
533 vis_faligndata(TMP16, TMP18, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
534 } else {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
535 vis_src1(TMP16, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
536 vis_src1(TMP18, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
537 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
538
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
539 vis_and(TMP6, MASK_7f, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
540
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
541 vis_and(TMP8, MASK_7f, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
542
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
543 vis_psub16(TMP10, TMP6, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
544 vis_st64(TMP6, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
545
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
546 vis_psub16(TMP12, TMP8, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
547 vis_st64_2(TMP8, dest, 8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
548 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
549 } while (--height);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
550
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
551 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
552 vis_xor(REF_0, REF_2, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
553
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
554 vis_ld64_2(ref, 8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
555 vis_xor(REF_4, REF_6, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
556
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
557 vis_ld64_2(ref, 16, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
558 vis_and(TMP6, MASK_fe, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
559
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
560 vis_mul8x16(CONST_128, TMP6, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
561 vis_and(TMP8, MASK_fe, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
562
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
563 vis_mul8x16(CONST_128, TMP8, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
564 vis_or(REF_0, REF_2, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
565
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
566 vis_or(REF_4, REF_6, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
567
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
568 vis_alignaddr_g0((void *)off);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
569
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
570 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
571
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
572 vis_faligndata(TMP2, TMP4, REF_4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
573
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
574 if (off != 0x7) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
575 vis_alignaddr_g0((void *)off_plus_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
576 vis_faligndata(TMP0, TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
577 vis_faligndata(TMP2, TMP4, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
578 } else {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
579 vis_src1(TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
580 vis_src1(TMP4, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
581 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
582
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
583 vis_and(TMP6, MASK_7f, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
584
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
585 vis_and(TMP8, MASK_7f, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
586
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
587 vis_psub16(TMP10, TMP6, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
588 vis_st64(TMP6, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
589
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
590 vis_psub16(TMP12, TMP8, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
591 vis_st64_2(TMP8, dest, 8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
592 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
593
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
594 vis_xor(REF_0, REF_2, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
595
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
596 vis_xor(REF_4, REF_6, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
597
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
598 vis_and(TMP6, MASK_fe, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
599
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
600 vis_mul8x16(CONST_128, TMP6, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
601 vis_and(TMP8, MASK_fe, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
602
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
603 vis_mul8x16(CONST_128, TMP8, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
604 vis_or(REF_0, REF_2, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
605
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
606 vis_or(REF_4, REF_6, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
607
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
608 vis_and(TMP6, MASK_7f, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
609
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
610 vis_and(TMP8, MASK_7f, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
611
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
612 vis_psub16(TMP10, TMP6, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
613 vis_st64(TMP6, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
614
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
615 vis_psub16(TMP12, TMP8, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
616 vis_st64_2(TMP8, dest, 8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
617 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
618
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
619 static void MC_put_x_8_vis (uint8_t * dest, const uint8_t * _ref,
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
620 const int stride, int height)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
621 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
622 uint8_t *ref = (uint8_t *) _ref;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
623 unsigned long off = (unsigned long) ref & 0x7;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
624 unsigned long off_plus_1 = off + 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
625
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
626 ref = vis_alignaddr(ref);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
627
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
628 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
629
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
630 vis_ld64(ref[8], TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
631
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
632 vis_ld64(constants_fe[0], MASK_fe);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
633
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
634 vis_ld64(constants_7f[0], MASK_7f);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
635
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
636 vis_ld64(constants128[0], CONST_128);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
637 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
638
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
639 if (off != 0x7) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
640 vis_alignaddr_g0((void *)off_plus_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
641 vis_faligndata(TMP0, TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
642 } else {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
643 vis_src1(TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
644 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
645
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
646 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
647 height = (height >> 1) - 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
648
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
649 do { /* 20 cycles */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
650 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
651 vis_xor(REF_0, REF_2, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
652
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
653 vis_ld64_2(ref, 8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
654 vis_and(TMP4, MASK_fe, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
655 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
656
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
657 vis_ld64(ref[0], TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
658 vis_or(REF_0, REF_2, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
659 vis_mul8x16(CONST_128, TMP4, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
660
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
661 vis_alignaddr_g0((void *)off);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
662
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
663 vis_ld64_2(ref, 8, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
664 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
665 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
666
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
667 if (off != 0x7) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
668 vis_alignaddr_g0((void *)off_plus_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
669 vis_faligndata(TMP0, TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
670 } else {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
671 vis_src1(TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
672 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
673
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
674 vis_and(TMP4, MASK_7f, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
675
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
676 vis_psub16(TMP6, TMP4, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
677 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
678 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
679
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
680 vis_xor(REF_0, REF_2, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
681
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
682 vis_and(TMP12, MASK_fe, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
683
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
684 vis_or(REF_0, REF_2, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
685 vis_mul8x16(CONST_128, TMP12, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
686
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
687 vis_alignaddr_g0((void *)off);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
688 vis_faligndata(TMP8, TMP10, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
689 if (off != 0x7) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
690 vis_alignaddr_g0((void *)off_plus_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
691 vis_faligndata(TMP8, TMP10, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
692 } else {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
693 vis_src1(TMP10, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
694 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
695
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
696 vis_and(TMP12, MASK_7f, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
697
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
698 vis_psub16(TMP14, TMP12, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
699 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
700 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
701 } while (--height);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
702
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
703 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
704 vis_xor(REF_0, REF_2, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
705
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
706 vis_ld64_2(ref, 8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
707 vis_and(TMP4, MASK_fe, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
708
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
709 vis_or(REF_0, REF_2, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
710 vis_mul8x16(CONST_128, TMP4, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
711
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
712 vis_alignaddr_g0((void *)off);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
713
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
714 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
715
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
716 if (off != 0x7) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
717 vis_alignaddr_g0((void *)off_plus_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
718 vis_faligndata(TMP0, TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
719 } else {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
720 vis_src1(TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
721 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
722
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
723 vis_and(TMP4, MASK_7f, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
724
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
725 vis_psub16(TMP6, TMP4, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
726 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
727 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
728
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
729 vis_xor(REF_0, REF_2, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
730
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
731 vis_and(TMP12, MASK_fe, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
732
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
733 vis_or(REF_0, REF_2, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
734 vis_mul8x16(CONST_128, TMP12, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
735
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
736 vis_and(TMP12, MASK_7f, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
737
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
738 vis_psub16(TMP14, TMP12, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
739 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
740 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
741 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
742
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
743 static void MC_avg_x_16_vis (uint8_t * dest, const uint8_t * _ref,
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
744 const int stride, int height)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
745 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
746 uint8_t *ref = (uint8_t *) _ref;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
747 unsigned long off = (unsigned long) ref & 0x7;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
748 unsigned long off_plus_1 = off + 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
749
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
750 vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
751
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
752 vis_ld64(constants3[0], CONST_3);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
753 vis_fzero(ZERO);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
754 vis_ld64(constants256_512[0], CONST_256);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
755
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
756 ref = vis_alignaddr(ref);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
757 do { /* 26 cycles */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
758 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
759
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
760 vis_ld64(ref[8], TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
761
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
762 vis_alignaddr_g0((void *)off);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
763
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
764 vis_ld64(ref[16], TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
765
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
766 vis_ld64(dest[0], DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
767 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
768
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
769 vis_ld64(dest[8], DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
770 vis_faligndata(TMP2, TMP4, REF_4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
771
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
772 if (off != 0x7) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
773 vis_alignaddr_g0((void *)off_plus_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
774 vis_faligndata(TMP0, TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
775 vis_faligndata(TMP2, TMP4, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
776 } else {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
777 vis_src1(TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
778 vis_src1(TMP4, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
779 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
780
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
781 vis_mul8x16au(REF_0, CONST_256, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
782
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
783 vis_pmerge(ZERO, REF_2, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
784 vis_mul8x16au(REF_0_1, CONST_256, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
785
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
786 vis_pmerge(ZERO, REF_2_1, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
787
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
788 vis_padd16(TMP0, TMP4, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
789
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
790 vis_mul8x16al(DST_0, CONST_512, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
791 vis_padd16(TMP2, TMP6, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
792
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
793 vis_mul8x16al(DST_1, CONST_512, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
794
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
795 vis_mul8x16au(REF_6, CONST_256, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
796
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
797 vis_padd16(TMP0, TMP4, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
798 vis_mul8x16au(REF_6_1, CONST_256, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
799
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
800 vis_padd16(TMP2, TMP6, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
801 vis_mul8x16au(REF_4, CONST_256, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
802
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
803 vis_padd16(TMP0, CONST_3, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
804 vis_mul8x16au(REF_4_1, CONST_256, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
805
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
806 vis_padd16(TMP2, CONST_3, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
807 vis_pack16(TMP8, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
808
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
809 vis_pack16(TMP10, DST_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
810 vis_padd16(TMP16, TMP12, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
811
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
812 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
813 vis_mul8x16al(DST_2, CONST_512, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
814 vis_padd16(TMP18, TMP14, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
815
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
816 vis_mul8x16al(DST_3, CONST_512, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
817 vis_padd16(TMP0, CONST_3, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
818
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
819 vis_padd16(TMP2, CONST_3, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
820
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
821 vis_padd16(TMP0, TMP4, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
822
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
823 vis_padd16(TMP2, TMP6, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
824 vis_pack16(TMP0, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
825
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
826 vis_pack16(TMP2, DST_3);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
827 vis_st64(DST_2, dest[8]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
828
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
829 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
830 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
831 } while (--height);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
832 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
833
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
834 static void MC_avg_x_8_vis (uint8_t * dest, const uint8_t * _ref,
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
835 const int stride, int height)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
836 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
837 uint8_t *ref = (uint8_t *) _ref;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
838 unsigned long off = (unsigned long) ref & 0x7;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
839 unsigned long off_plus_1 = off + 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
840 int stride_times_2 = stride << 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
841
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
842 vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
843
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
844 vis_ld64(constants3[0], CONST_3);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
845 vis_fzero(ZERO);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
846 vis_ld64(constants256_512[0], CONST_256);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
847
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
848 ref = vis_alignaddr(ref);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
849 height >>= 2;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
850 do { /* 47 cycles */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
851 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
852
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
853 vis_ld64_2(ref, 8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
854 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
855
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
856 vis_alignaddr_g0((void *)off);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
857
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
858 vis_ld64(ref[0], TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
859 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
860
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
861 vis_ld64_2(ref, 8, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
862 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
863
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
864 vis_ld64(ref[0], TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
865
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
866 vis_ld64_2(ref, 8, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
867 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
868 vis_faligndata(TMP4, TMP6, REF_4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
869
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
870 vis_ld64(ref[0], TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
871
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
872 vis_ld64_2(ref, 8, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
873 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
874 vis_faligndata(TMP8, TMP10, REF_S0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
875
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
876 vis_faligndata(TMP12, TMP14, REF_S4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
877
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
878 if (off != 0x7) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
879 vis_alignaddr_g0((void *)off_plus_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
880
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
881 vis_ld64(dest[0], DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
882 vis_faligndata(TMP0, TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
883
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
884 vis_ld64_2(dest, stride, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
885 vis_faligndata(TMP4, TMP6, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
886
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
887 vis_faligndata(TMP8, TMP10, REF_S2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
888
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
889 vis_faligndata(TMP12, TMP14, REF_S6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
890 } else {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
891 vis_ld64(dest[0], DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
892 vis_src1(TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
893
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
894 vis_ld64_2(dest, stride, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
895 vis_src1(TMP6, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
896
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
897 vis_src1(TMP10, REF_S2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
898
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
899 vis_src1(TMP14, REF_S6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
900 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
901
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
902 vis_pmerge(ZERO, REF_0, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
903 vis_mul8x16au(REF_0_1, CONST_256, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
904
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
905 vis_pmerge(ZERO, REF_2, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
906 vis_mul8x16au(REF_2_1, CONST_256, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
907
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
908 vis_padd16(TMP0, CONST_3, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
909 vis_mul8x16al(DST_0, CONST_512, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
910
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
911 vis_padd16(TMP2, CONST_3, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
912 vis_mul8x16al(DST_1, CONST_512, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
913
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
914 vis_padd16(TMP0, TMP4, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
915 vis_mul8x16au(REF_4, CONST_256, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
916
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
917 vis_padd16(TMP2, TMP6, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
918 vis_mul8x16au(REF_4_1, CONST_256, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
919
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
920 vis_padd16(TMP0, TMP16, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
921 vis_mul8x16au(REF_6, CONST_256, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
922
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
923 vis_padd16(TMP2, TMP18, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
924 vis_mul8x16au(REF_6_1, CONST_256, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
925
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
926 vis_padd16(TMP8, CONST_3, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
927 vis_mul8x16al(DST_2, CONST_512, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
928
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
929 vis_padd16(TMP8, TMP12, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
930 vis_mul8x16al(DST_3, CONST_512, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
931
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
932 vis_padd16(TMP10, TMP14, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
933 vis_pack16(TMP0, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
934
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
935 vis_pack16(TMP2, DST_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
936 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
937 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
938 vis_padd16(TMP10, CONST_3, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
939
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
940 vis_ld64_2(dest, stride, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
941 vis_padd16(TMP8, TMP16, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
942
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
943 vis_ld64_2(dest, stride_times_2, TMP4/*DST_2*/);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
944 vis_padd16(TMP10, TMP18, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
945 vis_pack16(TMP8, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
946
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
947 vis_pack16(TMP10, DST_3);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
948 vis_st64(DST_2, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
949 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
950
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
951 vis_mul8x16au(REF_S0_1, CONST_256, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
952 vis_pmerge(ZERO, REF_S0, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
953
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
954 vis_pmerge(ZERO, REF_S2, TMP24);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
955 vis_mul8x16au(REF_S2_1, CONST_256, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
956
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
957 vis_padd16(TMP0, CONST_3, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
958 vis_mul8x16au(REF_S4, CONST_256, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
959
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
960 vis_padd16(TMP2, CONST_3, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
961 vis_mul8x16au(REF_S4_1, CONST_256, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
962
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
963 vis_padd16(TMP0, TMP24, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
964 vis_mul8x16au(REF_S6, CONST_256, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
965
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
966 vis_padd16(TMP2, TMP6, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
967 vis_mul8x16au(REF_S6_1, CONST_256, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
968
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
969 vis_padd16(TMP8, CONST_3, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
970 vis_mul8x16al(DST_0, CONST_512, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
971
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
972 vis_padd16(TMP10, CONST_3, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
973 vis_mul8x16al(DST_1, CONST_512, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
974
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
975 vis_padd16(TMP8, TMP12, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
976 vis_mul8x16al(TMP4/*DST_2*/, CONST_512, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
977
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
978 vis_mul8x16al(TMP5/*DST_3*/, CONST_512, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
979 vis_padd16(TMP0, TMP16, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
980
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
981 vis_padd16(TMP2, TMP18, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
982 vis_pack16(TMP0, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
983
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
984 vis_padd16(TMP10, TMP14, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
985 vis_pack16(TMP2, DST_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
986 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
987 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
988
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
989 vis_padd16(TMP8, TMP20, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
990
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
991 vis_padd16(TMP10, TMP22, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
992 vis_pack16(TMP8, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
993
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
994 vis_pack16(TMP10, DST_3);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
995 vis_st64(DST_2, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
996 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
997 } while (--height);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
998 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
999
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1000 static void MC_put_y_16_vis (uint8_t * dest, const uint8_t * _ref,
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1001 const int stride, int height)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1002 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1003 uint8_t *ref = (uint8_t *) _ref;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1004
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1005 ref = vis_alignaddr(ref);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1006 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1007
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1008 vis_ld64_2(ref, 8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1009
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1010 vis_ld64_2(ref, 16, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1011 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1012
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1013 vis_ld64(ref[0], TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1014 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1015
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1016 vis_ld64_2(ref, 8, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1017 vis_faligndata(TMP2, TMP4, REF_4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1018
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1019 vis_ld64_2(ref, 16, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1020 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1021
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1022 vis_ld64(constants_fe[0], MASK_fe);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1023 vis_faligndata(TMP6, TMP8, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1024
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1025 vis_ld64(constants_7f[0], MASK_7f);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1026 vis_faligndata(TMP8, TMP10, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1027
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1028 vis_ld64(constants128[0], CONST_128);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1029 height = (height >> 1) - 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1030 do { /* 24 cycles */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1031 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1032 vis_xor(REF_0, REF_2, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1033
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1034 vis_ld64_2(ref, 8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1035 vis_xor(REF_4, REF_6, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1036
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1037 vis_ld64_2(ref, 16, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1038 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1039 vis_or(REF_0, REF_2, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1040
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1041 vis_ld64(ref[0], TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1042 vis_or(REF_4, REF_6, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1043
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1044 vis_ld64_2(ref, 8, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1045 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1046
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1047 vis_ld64_2(ref, 16, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1048 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1049 vis_faligndata(TMP2, TMP4, REF_4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1050
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1051 vis_and(TMP12, MASK_fe, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1052
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1053 vis_and(TMP16, MASK_fe, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1054 vis_mul8x16(CONST_128, TMP12, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1055
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1056 vis_mul8x16(CONST_128, TMP16, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1057 vis_xor(REF_0, REF_2, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1058
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1059 vis_xor(REF_4, REF_6, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1060
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1061 vis_or(REF_0, REF_2, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1062
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1063 vis_and(TMP12, MASK_7f, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1064
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1065 vis_and(TMP16, MASK_7f, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1066
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1067 vis_psub16(TMP14, TMP12, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1068 vis_st64(TMP12, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1069
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1070 vis_psub16(TMP18, TMP16, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1071 vis_st64_2(TMP16, dest, 8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1072 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1073
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1074 vis_or(REF_4, REF_6, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1075
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1076 vis_and(TMP0, MASK_fe, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1077
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1078 vis_and(TMP2, MASK_fe, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1079 vis_mul8x16(CONST_128, TMP0, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1080
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1081 vis_faligndata(TMP6, TMP8, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1082 vis_mul8x16(CONST_128, TMP2, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1083
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1084 vis_faligndata(TMP8, TMP10, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1085
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1086 vis_and(TMP0, MASK_7f, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1087
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1088 vis_and(TMP2, MASK_7f, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1089
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1090 vis_psub16(TMP20, TMP0, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1091 vis_st64(TMP0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1092
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1093 vis_psub16(TMP18, TMP2, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1094 vis_st64_2(TMP2, dest, 8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1095 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1096 } while (--height);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1097
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1098 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1099 vis_xor(REF_0, REF_2, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1100
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1101 vis_ld64_2(ref, 8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1102 vis_xor(REF_4, REF_6, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1103
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1104 vis_ld64_2(ref, 16, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1105 vis_or(REF_0, REF_2, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1106
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1107 vis_or(REF_4, REF_6, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1108
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1109 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1110
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1111 vis_faligndata(TMP2, TMP4, REF_4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1112
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1113 vis_and(TMP12, MASK_fe, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1114
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1115 vis_and(TMP16, MASK_fe, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1116 vis_mul8x16(CONST_128, TMP12, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1117
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1118 vis_mul8x16(CONST_128, TMP16, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1119 vis_xor(REF_0, REF_2, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1120
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1121 vis_xor(REF_4, REF_6, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1122
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1123 vis_or(REF_0, REF_2, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1124
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1125 vis_and(TMP12, MASK_7f, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1126
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1127 vis_and(TMP16, MASK_7f, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1128
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1129 vis_psub16(TMP14, TMP12, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1130 vis_st64(TMP12, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1131
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1132 vis_psub16(TMP18, TMP16, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1133 vis_st64_2(TMP16, dest, 8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1134 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1135
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1136 vis_or(REF_4, REF_6, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1137
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1138 vis_and(TMP0, MASK_fe, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1139
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1140 vis_and(TMP2, MASK_fe, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1141 vis_mul8x16(CONST_128, TMP0, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1142
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1143 vis_mul8x16(CONST_128, TMP2, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1144
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1145 vis_and(TMP0, MASK_7f, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1146
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1147 vis_and(TMP2, MASK_7f, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1148
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1149 vis_psub16(TMP20, TMP0, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1150 vis_st64(TMP0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1151
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1152 vis_psub16(TMP18, TMP2, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1153 vis_st64_2(TMP2, dest, 8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1154 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1155
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1156 static void MC_put_y_8_vis (uint8_t * dest, const uint8_t * _ref,
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1157 const int stride, int height)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1158 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1159 uint8_t *ref = (uint8_t *) _ref;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1160
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1161 ref = vis_alignaddr(ref);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1162 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1163
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1164 vis_ld64_2(ref, 8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1165 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1166
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1167 vis_ld64(ref[0], TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1168
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1169 vis_ld64_2(ref, 8, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1170 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1171
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1172 vis_ld64(constants_fe[0], MASK_fe);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1173 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1174
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1175 vis_ld64(constants_7f[0], MASK_7f);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1176 vis_faligndata(TMP4, TMP6, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1177
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1178 vis_ld64(constants128[0], CONST_128);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1179 height = (height >> 1) - 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1180 do { /* 12 cycles */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1181 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1182 vis_xor(REF_0, REF_2, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1183
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1184 vis_ld64_2(ref, 8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1185 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1186 vis_and(TMP4, MASK_fe, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1187
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1188 vis_or(REF_0, REF_2, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1189 vis_mul8x16(CONST_128, TMP4, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1190
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1191 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1192 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1193
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1194 vis_ld64_2(ref, 8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1195 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1196 vis_xor(REF_0, REF_2, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1197
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1198 vis_and(TMP4, MASK_7f, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1199
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1200 vis_and(TMP12, MASK_fe, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1201
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1202 vis_mul8x16(CONST_128, TMP12, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1203 vis_or(REF_0, REF_2, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1204
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1205 vis_psub16(TMP6, TMP4, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1206 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1207 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1208
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1209 vis_faligndata(TMP0, TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1210
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1211 vis_and(TMP12, MASK_7f, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1212
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1213 vis_psub16(TMP14, TMP12, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1214 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1215 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1216 } while (--height);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1217
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1218 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1219 vis_xor(REF_0, REF_2, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1220
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1221 vis_ld64_2(ref, 8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1222 vis_and(TMP4, MASK_fe, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1223
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1224 vis_or(REF_0, REF_2, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1225 vis_mul8x16(CONST_128, TMP4, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1226
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1227 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1228
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1229 vis_xor(REF_0, REF_2, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1230
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1231 vis_and(TMP4, MASK_7f, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1232
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1233 vis_and(TMP12, MASK_fe, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1234
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1235 vis_mul8x16(CONST_128, TMP12, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1236 vis_or(REF_0, REF_2, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1237
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1238 vis_psub16(TMP6, TMP4, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1239 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1240 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1241
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1242 vis_and(TMP12, MASK_7f, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1243
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1244 vis_psub16(TMP14, TMP12, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1245 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1246 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1247
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1248 static void MC_avg_y_16_vis (uint8_t * dest, const uint8_t * _ref,
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1249 const int stride, int height)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1250 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1251 uint8_t *ref = (uint8_t *) _ref;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1252 int stride_8 = stride + 8;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1253 int stride_16 = stride + 16;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1254
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1255 vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1256
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1257 ref = vis_alignaddr(ref);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1258
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1259 vis_ld64(ref[ 0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1260 vis_fzero(ZERO);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1261
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1262 vis_ld64(ref[ 8], TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1263
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1264 vis_ld64(ref[16], TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1265
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1266 vis_ld64(constants3[0], CONST_3);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1267 vis_faligndata(TMP0, TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1268
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1269 vis_ld64(constants256_512[0], CONST_256);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1270 vis_faligndata(TMP2, TMP4, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1271 height >>= 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1272
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1273 do { /* 31 cycles */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1274 vis_ld64_2(ref, stride, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1275 vis_pmerge(ZERO, REF_2, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1276 vis_mul8x16au(REF_2_1, CONST_256, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1277
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1278 vis_ld64_2(ref, stride_8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1279 vis_pmerge(ZERO, REF_6, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1280 vis_mul8x16au(REF_6_1, CONST_256, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1281
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1282 vis_ld64_2(ref, stride_16, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1283 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1284
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1285 vis_ld64(dest[0], DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1286 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1287
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1288 vis_ld64_2(dest, 8, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1289 vis_faligndata(TMP2, TMP4, REF_4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1290
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1291 vis_ld64_2(ref, stride, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1292 vis_pmerge(ZERO, REF_0, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1293 vis_mul8x16au(REF_0_1, CONST_256, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1294
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1295 vis_ld64_2(ref, stride_8, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1296 vis_pmerge(ZERO, REF_4, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1297
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1298 vis_ld64_2(ref, stride_16, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1299 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1300
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1301 vis_ld64_2(dest, stride, REF_S0/*DST_4*/);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1302 vis_faligndata(TMP6, TMP8, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1303 vis_mul8x16au(REF_4_1, CONST_256, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1304
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1305 vis_ld64_2(dest, stride_8, REF_S2/*DST_6*/);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1306 vis_faligndata(TMP8, TMP10, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1307 vis_mul8x16al(DST_0, CONST_512, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1308
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1309 vis_padd16(TMP0, CONST_3, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1310 vis_mul8x16al(DST_1, CONST_512, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1311
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1312 vis_padd16(TMP2, CONST_3, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1313 vis_mul8x16al(DST_2, CONST_512, TMP24);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1314
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1315 vis_padd16(TMP4, CONST_3, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1316 vis_mul8x16al(DST_3, CONST_512, TMP26);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1317
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1318 vis_padd16(TMP6, CONST_3, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1319
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1320 vis_padd16(TMP12, TMP20, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1321 vis_mul8x16al(REF_S0, CONST_512, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1322
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1323 vis_padd16(TMP14, TMP22, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1324 vis_mul8x16al(REF_S0_1, CONST_512, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1325
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1326 vis_padd16(TMP16, TMP24, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1327 vis_mul8x16al(REF_S2, CONST_512, TMP24);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1328
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1329 vis_padd16(TMP18, TMP26, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1330 vis_mul8x16al(REF_S2_1, CONST_512, TMP26);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1331
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1332 vis_padd16(TMP12, TMP0, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1333 vis_mul8x16au(REF_2, CONST_256, TMP28);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1334
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1335 vis_padd16(TMP14, TMP2, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1336 vis_mul8x16au(REF_2_1, CONST_256, TMP30);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1337
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1338 vis_padd16(TMP16, TMP4, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1339 vis_mul8x16au(REF_6, CONST_256, REF_S4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1340
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1341 vis_padd16(TMP18, TMP6, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1342 vis_mul8x16au(REF_6_1, CONST_256, REF_S6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1343
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1344 vis_pack16(TMP12, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1345 vis_padd16(TMP28, TMP0, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1346
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1347 vis_pack16(TMP14, DST_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1348 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1349 vis_padd16(TMP30, TMP2, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1350
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1351 vis_pack16(TMP16, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1352 vis_padd16(REF_S4, TMP4, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1353
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1354 vis_pack16(TMP18, DST_3);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1355 vis_st64_2(DST_2, dest, 8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1356 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1357 vis_padd16(REF_S6, TMP6, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1358
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1359 vis_padd16(TMP12, TMP20, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1360
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1361 vis_padd16(TMP14, TMP22, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1362 vis_pack16(TMP12, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1363
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1364 vis_padd16(TMP16, TMP24, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1365 vis_pack16(TMP14, DST_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1366 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1367
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1368 vis_padd16(TMP18, TMP26, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1369 vis_pack16(TMP16, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1370
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1371 vis_pack16(TMP18, DST_3);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1372 vis_st64_2(DST_2, dest, 8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1373 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1374 } while (--height);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1375 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1376
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1377 static void MC_avg_y_8_vis (uint8_t * dest, const uint8_t * _ref,
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1378 const int stride, int height)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1379 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1380 uint8_t *ref = (uint8_t *) _ref;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1381 int stride_8 = stride + 8;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1382
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1383 vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1384
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1385 ref = vis_alignaddr(ref);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1386
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1387 vis_ld64(ref[ 0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1388 vis_fzero(ZERO);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1389
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1390 vis_ld64(ref[ 8], TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1391
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1392 vis_ld64(constants3[0], CONST_3);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1393 vis_faligndata(TMP0, TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1394
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1395 vis_ld64(constants256_512[0], CONST_256);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1396
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1397 height >>= 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1398 do { /* 20 cycles */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1399 vis_ld64_2(ref, stride, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1400 vis_pmerge(ZERO, REF_2, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1401 vis_mul8x16au(REF_2_1, CONST_256, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1402
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1403 vis_ld64_2(ref, stride_8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1404 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1405
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1406 vis_ld64(dest[0], DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1407
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1408 vis_ld64_2(dest, stride, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1409 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1410
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1411 vis_ld64_2(ref, stride, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1412 vis_mul8x16al(DST_0, CONST_512, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1413 vis_pmerge(ZERO, REF_0, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1414
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1415 vis_ld64_2(ref, stride_8, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1416 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1417 vis_mul8x16al(DST_1, CONST_512, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1418 vis_pmerge(ZERO, REF_0_1, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1419
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1420 vis_padd16(TMP12, CONST_3, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1421 vis_mul8x16al(DST_2, CONST_512, TMP24);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1422
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1423 vis_padd16(TMP14, CONST_3, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1424 vis_mul8x16al(DST_3, CONST_512, TMP26);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1425
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1426 vis_faligndata(TMP4, TMP6, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1427
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1428 vis_padd16(TMP8, TMP12, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1429
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1430 vis_padd16(TMP10, TMP14, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1431 vis_mul8x16au(REF_2, CONST_256, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1432
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1433 vis_padd16(TMP8, TMP16, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1434 vis_mul8x16au(REF_2_1, CONST_256, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1435
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1436 vis_padd16(TMP10, TMP18, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1437 vis_pack16(TMP0, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1438
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1439 vis_pack16(TMP2, DST_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1440 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1441 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1442 vis_padd16(TMP12, TMP20, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1443
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1444 vis_padd16(TMP14, TMP22, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1445
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1446 vis_padd16(TMP12, TMP24, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1447
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1448 vis_padd16(TMP14, TMP26, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1449 vis_pack16(TMP0, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1450
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1451 vis_pack16(TMP2, DST_3);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1452 vis_st64(DST_2, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1453 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1454 } while (--height);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1455 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1456
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1457 static void MC_put_xy_16_vis (uint8_t * dest, const uint8_t * _ref,
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1458 const int stride, int height)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1459 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1460 uint8_t *ref = (uint8_t *) _ref;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1461 unsigned long off = (unsigned long) ref & 0x7;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1462 unsigned long off_plus_1 = off + 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1463 int stride_8 = stride + 8;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1464 int stride_16 = stride + 16;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1465
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1466 vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1467
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1468 ref = vis_alignaddr(ref);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1469
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1470 vis_ld64(ref[ 0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1471 vis_fzero(ZERO);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1472
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1473 vis_ld64(ref[ 8], TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1474
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1475 vis_ld64(ref[16], TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1476
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1477 vis_ld64(constants2[0], CONST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1478 vis_faligndata(TMP0, TMP2, REF_S0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1479
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1480 vis_ld64(constants256_512[0], CONST_256);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1481 vis_faligndata(TMP2, TMP4, REF_S4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1482
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1483 if (off != 0x7) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1484 vis_alignaddr_g0((void *)off_plus_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1485 vis_faligndata(TMP0, TMP2, REF_S2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1486 vis_faligndata(TMP2, TMP4, REF_S6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1487 } else {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1488 vis_src1(TMP2, REF_S2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1489 vis_src1(TMP4, REF_S6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1490 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1491
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1492 height >>= 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1493 do {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1494 vis_ld64_2(ref, stride, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1495 vis_mul8x16au(REF_S0, CONST_256, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1496 vis_pmerge(ZERO, REF_S0_1, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1497
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1498 vis_alignaddr_g0((void *)off);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1499
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1500 vis_ld64_2(ref, stride_8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1501 vis_mul8x16au(REF_S2, CONST_256, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1502 vis_pmerge(ZERO, REF_S2_1, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1503
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1504 vis_ld64_2(ref, stride_16, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1505 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1506 vis_mul8x16au(REF_S4, CONST_256, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1507 vis_pmerge(ZERO, REF_S4_1, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1508
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1509 vis_ld64_2(ref, stride, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1510 vis_mul8x16au(REF_S6, CONST_256, TMP24);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1511 vis_pmerge(ZERO, REF_S6_1, TMP26);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1512
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1513 vis_ld64_2(ref, stride_8, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1514 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1515
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1516 vis_ld64_2(ref, stride_16, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1517 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1518 vis_faligndata(TMP2, TMP4, REF_4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1519
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1520 vis_faligndata(TMP6, TMP8, REF_S0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1521
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1522 vis_faligndata(TMP8, TMP10, REF_S4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1523
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1524 if (off != 0x7) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1525 vis_alignaddr_g0((void *)off_plus_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1526 vis_faligndata(TMP0, TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1527 vis_faligndata(TMP2, TMP4, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1528 vis_faligndata(TMP6, TMP8, REF_S2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1529 vis_faligndata(TMP8, TMP10, REF_S6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1530 } else {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1531 vis_src1(TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1532 vis_src1(TMP4, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1533 vis_src1(TMP8, REF_S2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1534 vis_src1(TMP10, REF_S6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1535 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1536
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1537 vis_mul8x16au(REF_0, CONST_256, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1538 vis_pmerge(ZERO, REF_0_1, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1539
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1540 vis_mul8x16au(REF_2, CONST_256, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1541 vis_pmerge(ZERO, REF_2_1, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1542
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1543 vis_padd16(TMP0, CONST_2, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1544 vis_mul8x16au(REF_4, CONST_256, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1545
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1546 vis_padd16(TMP2, CONST_2, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1547 vis_mul8x16au(REF_4_1, CONST_256, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1548
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1549 vis_padd16(TMP8, TMP4, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1550 vis_mul8x16au(REF_6, CONST_256, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1551
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1552 vis_padd16(TMP10, TMP6, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1553 vis_mul8x16au(REF_6_1, CONST_256, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1554
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1555 vis_padd16(TMP12, TMP8, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1556
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1557 vis_padd16(TMP14, TMP10, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1558
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1559 vis_padd16(TMP12, TMP16, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1560
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1561 vis_padd16(TMP14, TMP18, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1562 vis_pack16(TMP12, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1563
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1564 vis_pack16(TMP14, DST_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1565 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1566 vis_padd16(TMP0, CONST_2, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1567
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1568 vis_mul8x16au(REF_S0, CONST_256, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1569 vis_padd16(TMP2, CONST_2, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1570
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1571 vis_mul8x16au(REF_S0_1, CONST_256, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1572 vis_padd16(TMP12, TMP4, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1573
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1574 vis_mul8x16au(REF_S2, CONST_256, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1575 vis_padd16(TMP14, TMP6, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1576
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1577 vis_mul8x16au(REF_S2_1, CONST_256, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1578 vis_padd16(TMP20, TMP12, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1579
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1580 vis_padd16(TMP22, TMP14, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1581
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1582 vis_padd16(TMP20, TMP24, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1583
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1584 vis_padd16(TMP22, TMP26, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1585 vis_pack16(TMP20, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1586
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1587 vis_pack16(TMP22, DST_3);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1588 vis_st64_2(DST_2, dest, 8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1589 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1590 vis_padd16(TMP0, TMP4, TMP24);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1591
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1592 vis_mul8x16au(REF_S4, CONST_256, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1593 vis_padd16(TMP2, TMP6, TMP26);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1594
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1595 vis_mul8x16au(REF_S4_1, CONST_256, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1596 vis_padd16(TMP24, TMP8, TMP24);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1597
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1598 vis_padd16(TMP26, TMP10, TMP26);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1599 vis_pack16(TMP24, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1600
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1601 vis_pack16(TMP26, DST_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1602 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1603 vis_pmerge(ZERO, REF_S6, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1604
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1605 vis_pmerge(ZERO, REF_S6_1, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1606
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1607 vis_padd16(TMP0, TMP4, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1608
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1609 vis_padd16(TMP2, TMP6, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1610
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1611 vis_padd16(TMP0, TMP12, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1612
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1613 vis_padd16(TMP2, TMP14, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1614 vis_pack16(TMP0, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1615
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1616 vis_pack16(TMP2, DST_3);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1617 vis_st64_2(DST_2, dest, 8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1618 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1619 } while (--height);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1620 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1621
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1622 static void MC_put_xy_8_vis (uint8_t * dest, const uint8_t * _ref,
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1623 const int stride, int height)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1624 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1625 uint8_t *ref = (uint8_t *) _ref;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1626 unsigned long off = (unsigned long) ref & 0x7;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1627 unsigned long off_plus_1 = off + 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1628 int stride_8 = stride + 8;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1629
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1630 vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1631
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1632 ref = vis_alignaddr(ref);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1633
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1634 vis_ld64(ref[ 0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1635 vis_fzero(ZERO);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1636
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1637 vis_ld64(ref[ 8], TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1638
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1639 vis_ld64(constants2[0], CONST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1640
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1641 vis_ld64(constants256_512[0], CONST_256);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1642 vis_faligndata(TMP0, TMP2, REF_S0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1643
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1644 if (off != 0x7) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1645 vis_alignaddr_g0((void *)off_plus_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1646 vis_faligndata(TMP0, TMP2, REF_S2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1647 } else {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1648 vis_src1(TMP2, REF_S2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1649 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1650
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1651 height >>= 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1652 do { /* 26 cycles */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1653 vis_ld64_2(ref, stride, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1654 vis_mul8x16au(REF_S0, CONST_256, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1655 vis_pmerge(ZERO, REF_S2, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1656
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1657 vis_alignaddr_g0((void *)off);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1658
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1659 vis_ld64_2(ref, stride_8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1660 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1661 vis_mul8x16au(REF_S0_1, CONST_256, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1662 vis_pmerge(ZERO, REF_S2_1, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1663
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1664 vis_ld64_2(ref, stride, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1665
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1666 vis_ld64_2(ref, stride_8, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1667 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1668 vis_faligndata(TMP0, TMP2, REF_S4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1669
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1670 vis_pmerge(ZERO, REF_S4, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1671
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1672 vis_pmerge(ZERO, REF_S4_1, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1673
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1674 vis_faligndata(TMP4, TMP6, REF_S0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1675
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1676 if (off != 0x7) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1677 vis_alignaddr_g0((void *)off_plus_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1678 vis_faligndata(TMP0, TMP2, REF_S6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1679 vis_faligndata(TMP4, TMP6, REF_S2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1680 } else {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1681 vis_src1(TMP2, REF_S6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1682 vis_src1(TMP6, REF_S2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1683 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1684
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1685 vis_padd16(TMP18, CONST_2, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1686 vis_mul8x16au(REF_S6, CONST_256, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1687
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1688 vis_padd16(TMP20, CONST_2, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1689 vis_mul8x16au(REF_S6_1, CONST_256, TMP24);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1690
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1691 vis_mul8x16au(REF_S0, CONST_256, TMP26);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1692 vis_pmerge(ZERO, REF_S0_1, TMP28);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1693
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1694 vis_mul8x16au(REF_S2, CONST_256, TMP30);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1695 vis_padd16(TMP18, TMP22, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1696
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1697 vis_mul8x16au(REF_S2_1, CONST_256, TMP32);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1698 vis_padd16(TMP20, TMP24, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1699
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1700 vis_padd16(TMP8, TMP18, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1701
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1702 vis_padd16(TMP10, TMP20, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1703
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1704 vis_padd16(TMP8, TMP12, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1705
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1706 vis_padd16(TMP10, TMP14, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1707 vis_pack16(TMP8, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1708
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1709 vis_pack16(TMP10, DST_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1710 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1711 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1712 vis_padd16(TMP18, TMP26, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1713
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1714 vis_padd16(TMP20, TMP28, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1715
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1716 vis_padd16(TMP18, TMP30, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1717
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1718 vis_padd16(TMP20, TMP32, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1719 vis_pack16(TMP18, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1720
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1721 vis_pack16(TMP20, DST_3);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1722 vis_st64(DST_2, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1723 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1724 } while (--height);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1725 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1726
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1727 static void MC_avg_xy_16_vis (uint8_t * dest, const uint8_t * _ref,
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1728 const int stride, int height)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1729 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1730 uint8_t *ref = (uint8_t *) _ref;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1731 unsigned long off = (unsigned long) ref & 0x7;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1732 unsigned long off_plus_1 = off + 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1733 int stride_8 = stride + 8;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1734 int stride_16 = stride + 16;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1735
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1736 vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1737
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1738 ref = vis_alignaddr(ref);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1739
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1740 vis_ld64(ref[ 0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1741 vis_fzero(ZERO);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1742
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1743 vis_ld64(ref[ 8], TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1744
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1745 vis_ld64(ref[16], TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1746
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1747 vis_ld64(constants6[0], CONST_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1748 vis_faligndata(TMP0, TMP2, REF_S0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1749
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1750 vis_ld64(constants256_1024[0], CONST_256);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1751 vis_faligndata(TMP2, TMP4, REF_S4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1752
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1753 if (off != 0x7) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1754 vis_alignaddr_g0((void *)off_plus_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1755 vis_faligndata(TMP0, TMP2, REF_S2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1756 vis_faligndata(TMP2, TMP4, REF_S6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1757 } else {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1758 vis_src1(TMP2, REF_S2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1759 vis_src1(TMP4, REF_S6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1760 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1761
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1762 height >>= 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1763 do { /* 55 cycles */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1764 vis_ld64_2(ref, stride, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1765 vis_mul8x16au(REF_S0, CONST_256, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1766 vis_pmerge(ZERO, REF_S0_1, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1767
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1768 vis_alignaddr_g0((void *)off);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1769
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1770 vis_ld64_2(ref, stride_8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1771 vis_mul8x16au(REF_S2, CONST_256, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1772 vis_pmerge(ZERO, REF_S2_1, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1773
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1774 vis_ld64_2(ref, stride_16, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1775 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1776 vis_mul8x16au(REF_S4, CONST_256, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1777 vis_pmerge(ZERO, REF_S4_1, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1778
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1779 vis_ld64_2(ref, stride, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1780 vis_mul8x16au(REF_S6, CONST_256, TMP24);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1781 vis_pmerge(ZERO, REF_S6_1, TMP26);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1782
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1783 vis_ld64_2(ref, stride_8, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1784 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1785
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1786 vis_ld64_2(ref, stride_16, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1787 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1788 vis_faligndata(TMP2, TMP4, REF_4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1789
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1790 vis_ld64(dest[0], DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1791 vis_faligndata(TMP6, TMP8, REF_S0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1792
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1793 vis_ld64_2(dest, 8, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1794 vis_faligndata(TMP8, TMP10, REF_S4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1795
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1796 if (off != 0x7) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1797 vis_alignaddr_g0((void *)off_plus_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1798 vis_faligndata(TMP0, TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1799 vis_faligndata(TMP2, TMP4, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1800 vis_faligndata(TMP6, TMP8, REF_S2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1801 vis_faligndata(TMP8, TMP10, REF_S6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1802 } else {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1803 vis_src1(TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1804 vis_src1(TMP4, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1805 vis_src1(TMP8, REF_S2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1806 vis_src1(TMP10, REF_S6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1807 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1808
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1809 vis_mul8x16al(DST_0, CONST_1024, TMP30);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1810 vis_pmerge(ZERO, REF_0, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1811
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1812 vis_mul8x16al(DST_1, CONST_1024, TMP32);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1813 vis_pmerge(ZERO, REF_0_1, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1814
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1815 vis_mul8x16au(REF_2, CONST_256, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1816 vis_pmerge(ZERO, REF_2_1, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1817
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1818 vis_mul8x16al(DST_2, CONST_1024, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1819 vis_padd16(TMP0, CONST_6, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1820
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1821 vis_mul8x16al(DST_3, CONST_1024, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1822 vis_padd16(TMP2, CONST_6, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1823
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1824 vis_padd16(TMP0, TMP4, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1825 vis_mul8x16au(REF_4, CONST_256, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1826
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1827 vis_padd16(TMP2, TMP6, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1828 vis_mul8x16au(REF_4_1, CONST_256, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1829
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1830 vis_padd16(TMP12, TMP0, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1831 vis_mul8x16au(REF_6, CONST_256, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1832
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1833 vis_padd16(TMP14, TMP2, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1834 vis_mul8x16au(REF_6_1, CONST_256, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1835
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1836 vis_padd16(TMP12, TMP16, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1837 vis_mul8x16au(REF_S0, CONST_256, REF_4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1838
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1839 vis_padd16(TMP14, TMP18, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1840 vis_mul8x16au(REF_S0_1, CONST_256, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1841
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1842 vis_padd16(TMP12, TMP30, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1843
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1844 vis_padd16(TMP14, TMP32, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1845 vis_pack16(TMP12, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1846
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1847 vis_pack16(TMP14, DST_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1848 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1849 vis_padd16(TMP4, CONST_6, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1850
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1851 vis_ld64_2(dest, stride, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1852 vis_padd16(TMP6, CONST_6, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1853 vis_mul8x16au(REF_S2, CONST_256, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1854
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1855 vis_padd16(TMP4, TMP8, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1856 vis_mul8x16au(REF_S2_1, CONST_256, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1857
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1858 vis_padd16(TMP6, TMP10, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1859
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1860 vis_padd16(TMP20, TMP4, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1861
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1862 vis_padd16(TMP22, TMP6, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1863
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1864 vis_padd16(TMP20, TMP24, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1865
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1866 vis_padd16(TMP22, TMP26, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1867
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1868 vis_padd16(TMP20, REF_0, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1869 vis_mul8x16au(REF_S4, CONST_256, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1870
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1871 vis_padd16(TMP22, REF_2, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1872 vis_pack16(TMP20, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1873
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1874 vis_pack16(TMP22, DST_3);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1875 vis_st64_2(DST_2, dest, 8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1876 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1877
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1878 vis_ld64_2(dest, 8, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1879 vis_mul8x16al(DST_0, CONST_1024, TMP30);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1880 vis_pmerge(ZERO, REF_S4_1, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1881
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1882 vis_mul8x16al(DST_1, CONST_1024, TMP32);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1883 vis_padd16(REF_4, TMP0, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1884
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1885 vis_mul8x16au(REF_S6, CONST_256, REF_4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1886 vis_padd16(REF_6, TMP2, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1887
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1888 vis_mul8x16au(REF_S6_1, CONST_256, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1889 vis_padd16(TMP8, TMP12, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1890
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1891 vis_padd16(TMP10, TMP14, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1892
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1893 vis_padd16(TMP8, TMP30, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1894
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1895 vis_padd16(TMP10, TMP32, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1896 vis_pack16(TMP8, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1897
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1898 vis_pack16(TMP10, DST_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1899 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1900
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1901 vis_padd16(REF_0, TMP4, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1902
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1903 vis_mul8x16al(DST_2, CONST_1024, TMP30);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1904 vis_padd16(REF_2, TMP6, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1905
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1906 vis_mul8x16al(DST_3, CONST_1024, TMP32);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1907 vis_padd16(REF_0, REF_4, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1908
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1909 vis_padd16(REF_2, REF_6, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1910
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1911 vis_padd16(REF_0, TMP30, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1912
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1913 /* stall */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1914
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1915 vis_padd16(REF_2, TMP32, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1916 vis_pack16(REF_0, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1917
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1918 vis_pack16(REF_2, DST_3);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1919 vis_st64_2(DST_2, dest, 8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1920 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1921 } while (--height);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1922 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1923
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1924 static void MC_avg_xy_8_vis (uint8_t * dest, const uint8_t * _ref,
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1925 const int stride, int height)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1926 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1927 uint8_t *ref = (uint8_t *) _ref;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1928 unsigned long off = (unsigned long) ref & 0x7;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1929 unsigned long off_plus_1 = off + 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1930 int stride_8 = stride + 8;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1931
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1932 vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1933
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1934 ref = vis_alignaddr(ref);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1935
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1936 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1937 vis_fzero(ZERO);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1938
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1939 vis_ld64_2(ref, 8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1940
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1941 vis_ld64(constants6[0], CONST_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1942
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1943 vis_ld64(constants256_1024[0], CONST_256);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1944 vis_faligndata(TMP0, TMP2, REF_S0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1945
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1946 if (off != 0x7) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1947 vis_alignaddr_g0((void *)off_plus_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1948 vis_faligndata(TMP0, TMP2, REF_S2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1949 } else {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1950 vis_src1(TMP2, REF_S2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1951 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1952
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1953 height >>= 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1954 do { /* 31 cycles */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1955 vis_ld64_2(ref, stride, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1956 vis_mul8x16au(REF_S0, CONST_256, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1957 vis_pmerge(ZERO, REF_S0_1, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1958
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1959 vis_ld64_2(ref, stride_8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1960 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1961 vis_mul8x16au(REF_S2, CONST_256, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1962 vis_pmerge(ZERO, REF_S2_1, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1963
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1964 vis_alignaddr_g0((void *)off);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1965
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1966 vis_ld64_2(ref, stride, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1967 vis_faligndata(TMP0, TMP2, REF_S4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1968
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1969 vis_ld64_2(ref, stride_8, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1970 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1971
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1972 vis_ld64(dest[0], DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1973 vis_faligndata(TMP4, TMP6, REF_S0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1974
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1975 vis_ld64_2(dest, stride, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1976
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1977 if (off != 0x7) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1978 vis_alignaddr_g0((void *)off_plus_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1979 vis_faligndata(TMP0, TMP2, REF_S6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1980 vis_faligndata(TMP4, TMP6, REF_S2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1981 } else {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1982 vis_src1(TMP2, REF_S6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1983 vis_src1(TMP6, REF_S2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1984 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1985
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1986 vis_mul8x16al(DST_0, CONST_1024, TMP30);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1987 vis_pmerge(ZERO, REF_S4, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1988
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1989 vis_mul8x16al(DST_1, CONST_1024, TMP32);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1990 vis_pmerge(ZERO, REF_S4_1, TMP24);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1991
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1992 vis_mul8x16au(REF_S6, CONST_256, TMP26);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1993 vis_pmerge(ZERO, REF_S6_1, TMP28);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1994
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1995 vis_mul8x16au(REF_S0, CONST_256, REF_S4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1996 vis_padd16(TMP22, CONST_6, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1997
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1998 vis_mul8x16au(REF_S0_1, CONST_256, REF_S6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
1999 vis_padd16(TMP24, CONST_6, TMP24);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2000
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2001 vis_mul8x16al(DST_2, CONST_1024, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2002 vis_padd16(TMP22, TMP26, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2003
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2004 vis_mul8x16al(DST_3, CONST_1024, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2005 vis_padd16(TMP24, TMP28, TMP24);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2006
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2007 vis_mul8x16au(REF_S2, CONST_256, TMP26);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2008 vis_padd16(TMP8, TMP22, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2009
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2010 vis_mul8x16au(REF_S2_1, CONST_256, TMP28);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2011 vis_padd16(TMP10, TMP24, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2012
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2013 vis_padd16(TMP8, TMP12, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2014
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2015 vis_padd16(TMP10, TMP14, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2016
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2017 vis_padd16(TMP8, TMP30, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2018
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2019 vis_padd16(TMP10, TMP32, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2020 vis_pack16(TMP8, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2021
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2022 vis_pack16(TMP10, DST_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2023 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2024 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2025
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2026 vis_padd16(REF_S4, TMP22, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2027
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2028 vis_padd16(REF_S6, TMP24, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2029
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2030 vis_padd16(TMP12, TMP26, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2031
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2032 vis_padd16(TMP14, TMP28, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2033
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2034 vis_padd16(TMP12, REF_0, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2035
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2036 vis_padd16(TMP14, REF_2, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2037 vis_pack16(TMP12, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2038
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2039 vis_pack16(TMP14, DST_3);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2040 vis_st64(DST_2, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2041 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2042 } while (--height);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2043 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2044
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2045 /* End of rounding code */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2046
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2047 /* Start of no rounding code */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2048 /* The trick used in some of this file is the formula from the MMX
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2049 * motion comp code, which is:
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2050 *
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2051 * (x+y)>>1 == (x&y)+((x^y)>>1)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2052 *
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2053 * This allows us to average 8 bytes at a time in a 64-bit FPU reg.
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2054 * We avoid overflows by masking before we do the shift, and we
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2055 * implement the shift by multiplying by 1/2 using mul8x16. So in
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2056 * VIS this is (assume 'x' is in f0, 'y' is in f2, a repeating mask
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2057 * of '0xfe' is in f4, a repeating mask of '0x7f' is in f6, and
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2058 * the value 0x80808080 is in f8):
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2059 *
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2060 * fxor f0, f2, f10
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2061 * fand f10, f4, f10
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2062 * fmul8x16 f8, f10, f10
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2063 * fand f10, f6, f10
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2064 * fand f0, f2, f12
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2065 * fpadd16 f12, f10, f10
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2066 */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2067
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2068 static void MC_put_no_round_o_16_vis (uint8_t * dest, const uint8_t * _ref,
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2069 const int stride, int height)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2070 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2071 uint8_t *ref = (uint8_t *) _ref;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2072
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2073 ref = vis_alignaddr(ref);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2074 do { /* 5 cycles */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2075 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2076
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2077 vis_ld64_2(ref, 8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2078
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2079 vis_ld64_2(ref, 16, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2080 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2081
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2082 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2083 vis_st64(REF_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2084
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2085 vis_faligndata(TMP2, TMP4, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2086 vis_st64_2(REF_2, dest, 8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2087 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2088 } while (--height);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2089 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2090
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2091 static void MC_put_no_round_o_8_vis (uint8_t * dest, const uint8_t * _ref,
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2092 const int stride, int height)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2093 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2094 uint8_t *ref = (uint8_t *) _ref;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2095
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2096 ref = vis_alignaddr(ref);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2097 do { /* 4 cycles */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2098 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2099
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2100 vis_ld64(ref[8], TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2101 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2102
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2103 /* stall */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2104
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2105 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2106 vis_st64(REF_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2107 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2108 } while (--height);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2109 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2110
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2111
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2112 static void MC_avg_no_round_o_16_vis (uint8_t * dest, const uint8_t * _ref,
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2113 const int stride, int height)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2114 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2115 uint8_t *ref = (uint8_t *) _ref;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2116 int stride_8 = stride + 8;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2117
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2118 ref = vis_alignaddr(ref);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2119
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2120 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2121
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2122 vis_ld64(ref[8], TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2123
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2124 vis_ld64(ref[16], TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2125
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2126 vis_ld64(dest[0], DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2127
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2128 vis_ld64(dest[8], DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2129
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2130 vis_ld64(constants_fe[0], MASK_fe);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2131 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2132
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2133 vis_ld64(constants_7f[0], MASK_7f);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2134 vis_faligndata(TMP2, TMP4, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2135
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2136 vis_ld64(constants128[0], CONST_128);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2137
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2138 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2139 height = (height >> 1) - 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2140
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2141 do { /* 24 cycles */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2142 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2143 vis_xor(DST_0, REF_0, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2144
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2145 vis_ld64_2(ref, 8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2146 vis_and(TMP6, MASK_fe, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2147
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2148 vis_ld64_2(ref, 16, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2149 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2150 vis_mul8x16(CONST_128, TMP6, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2151 vis_xor(DST_2, REF_2, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2152
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2153 vis_and(TMP8, MASK_fe, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2154
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2155 vis_and(DST_0, REF_0, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2156 vis_ld64_2(dest, stride, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2157 vis_mul8x16(CONST_128, TMP8, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2158
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2159 vis_and(DST_2, REF_2, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2160 vis_ld64_2(dest, stride_8, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2161
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2162 vis_ld64(ref[0], TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2163 vis_and(TMP6, MASK_7f, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2164
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2165 vis_and(TMP8, MASK_7f, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2166
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2167 vis_padd16(TMP10, TMP6, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2168 vis_st64(TMP6, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2169
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2170 vis_padd16(TMP12, TMP8, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2171 vis_st64_2(TMP8, dest, 8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2172
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2173 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2174 vis_ld64_2(ref, 8, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2175 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2176
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2177 vis_ld64_2(ref, 16, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2178 vis_faligndata(TMP2, TMP4, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2179 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2180
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2181 vis_xor(DST_0, REF_0, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2182
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2183 vis_and(TMP20, MASK_fe, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2184
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2185 vis_xor(DST_2, REF_2, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2186 vis_mul8x16(CONST_128, TMP20, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2187
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2188 vis_and(TMP22, MASK_fe, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2189
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2190 vis_and(DST_0, REF_0, TMP24);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2191 vis_mul8x16(CONST_128, TMP22, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2192
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2193 vis_and(DST_2, REF_2, TMP26);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2194
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2195 vis_ld64_2(dest, stride, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2196 vis_faligndata(TMP14, TMP16, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2197
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2198 vis_ld64_2(dest, stride_8, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2199 vis_faligndata(TMP16, TMP18, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2200
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2201 vis_and(TMP20, MASK_7f, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2202
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2203 vis_and(TMP22, MASK_7f, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2204
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2205 vis_padd16(TMP24, TMP20, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2206 vis_st64(TMP20, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2207
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2208 vis_padd16(TMP26, TMP22, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2209 vis_st64_2(TMP22, dest, 8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2210 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2211 } while (--height);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2212
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2213 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2214 vis_xor(DST_0, REF_0, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2215
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2216 vis_ld64_2(ref, 8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2217 vis_and(TMP6, MASK_fe, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2218
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2219 vis_ld64_2(ref, 16, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2220 vis_mul8x16(CONST_128, TMP6, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2221 vis_xor(DST_2, REF_2, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2222
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2223 vis_and(TMP8, MASK_fe, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2224
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2225 vis_and(DST_0, REF_0, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2226 vis_ld64_2(dest, stride, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2227 vis_mul8x16(CONST_128, TMP8, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2228
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2229 vis_and(DST_2, REF_2, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2230 vis_ld64_2(dest, stride_8, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2231
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2232 vis_ld64(ref[0], TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2233 vis_and(TMP6, MASK_7f, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2234
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2235 vis_and(TMP8, MASK_7f, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2236
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2237 vis_padd16(TMP10, TMP6, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2238 vis_st64(TMP6, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2239
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2240 vis_padd16(TMP12, TMP8, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2241 vis_st64_2(TMP8, dest, 8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2242
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2243 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2244 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2245
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2246 vis_faligndata(TMP2, TMP4, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2247
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2248 vis_xor(DST_0, REF_0, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2249
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2250 vis_and(TMP20, MASK_fe, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2251
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2252 vis_xor(DST_2, REF_2, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2253 vis_mul8x16(CONST_128, TMP20, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2254
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2255 vis_and(TMP22, MASK_fe, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2256
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2257 vis_and(DST_0, REF_0, TMP24);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2258 vis_mul8x16(CONST_128, TMP22, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2259
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2260 vis_and(DST_2, REF_2, TMP26);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2261
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2262 vis_and(TMP20, MASK_7f, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2263
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2264 vis_and(TMP22, MASK_7f, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2265
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2266 vis_padd16(TMP24, TMP20, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2267 vis_st64(TMP20, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2268
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2269 vis_padd16(TMP26, TMP22, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2270 vis_st64_2(TMP22, dest, 8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2271 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2272
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2273 static void MC_avg_no_round_o_8_vis (uint8_t * dest, const uint8_t * _ref,
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2274 const int stride, int height)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2275 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2276 uint8_t *ref = (uint8_t *) _ref;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2277
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2278 ref = vis_alignaddr(ref);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2279
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2280 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2281
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2282 vis_ld64(ref[8], TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2283
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2284 vis_ld64(dest[0], DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2285
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2286 vis_ld64(constants_fe[0], MASK_fe);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2287
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2288 vis_ld64(constants_7f[0], MASK_7f);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2289 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2290
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2291 vis_ld64(constants128[0], CONST_128);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2292
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2293 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2294 height = (height >> 1) - 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2295
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2296 do { /* 12 cycles */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2297 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2298 vis_xor(DST_0, REF_0, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2299
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2300 vis_ld64(ref[8], TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2301 vis_and(TMP4, MASK_fe, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2302
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2303 vis_and(DST_0, REF_0, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2304 vis_ld64_2(dest, stride, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2305 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2306 vis_mul8x16(CONST_128, TMP4, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2307
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2308 vis_ld64(ref[0], TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2309 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2310
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2311 vis_ld64(ref[8], TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2312 vis_xor(DST_0, REF_0, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2313 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2314
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2315 vis_and(TMP0, MASK_fe, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2316
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2317 vis_and(TMP4, MASK_7f, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2318
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2319 vis_padd16(TMP6, TMP4, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2320 vis_st64(TMP4, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2321 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2322 vis_mul8x16(CONST_128, TMP0, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2323
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2324 vis_and(DST_0, REF_0, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2325 vis_ld64_2(dest, stride, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2326
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2327 vis_faligndata(TMP12, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2328
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2329 vis_and(TMP0, MASK_7f, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2330
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2331 vis_padd16(TMP6, TMP0, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2332 vis_st64(TMP4, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2333 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2334 } while (--height);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2335
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2336 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2337 vis_xor(DST_0, REF_0, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2338
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2339 vis_ld64(ref[8], TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2340 vis_and(TMP4, MASK_fe, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2341
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2342 vis_and(DST_0, REF_0, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2343 vis_ld64_2(dest, stride, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2344 vis_mul8x16(CONST_128, TMP4, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2345
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2346 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2347
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2348 vis_xor(DST_0, REF_0, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2349
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2350 vis_and(TMP0, MASK_fe, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2351
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2352 vis_and(TMP4, MASK_7f, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2353
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2354 vis_padd16(TMP6, TMP4, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2355 vis_st64(TMP4, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2356 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2357 vis_mul8x16(CONST_128, TMP0, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2358
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2359 vis_and(DST_0, REF_0, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2360
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2361 vis_and(TMP0, MASK_7f, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2362
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2363 vis_padd16(TMP6, TMP0, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2364 vis_st64(TMP4, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2365 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2366
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2367 static void MC_put_no_round_x_16_vis (uint8_t * dest, const uint8_t * _ref,
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2368 const int stride, int height)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2369 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2370 uint8_t *ref = (uint8_t *) _ref;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2371 unsigned long off = (unsigned long) ref & 0x7;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2372 unsigned long off_plus_1 = off + 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2373
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2374 ref = vis_alignaddr(ref);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2375
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2376 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2377
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2378 vis_ld64_2(ref, 8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2379
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2380 vis_ld64_2(ref, 16, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2381
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2382 vis_ld64(constants_fe[0], MASK_fe);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2383
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2384 vis_ld64(constants_7f[0], MASK_7f);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2385 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2386
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2387 vis_ld64(constants128[0], CONST_128);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2388 vis_faligndata(TMP2, TMP4, REF_4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2389
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2390 if (off != 0x7) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2391 vis_alignaddr_g0((void *)off_plus_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2392 vis_faligndata(TMP0, TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2393 vis_faligndata(TMP2, TMP4, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2394 } else {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2395 vis_src1(TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2396 vis_src1(TMP4, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2397 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2398
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2399 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2400 height = (height >> 1) - 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2401
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2402 do { /* 34 cycles */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2403 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2404 vis_xor(REF_0, REF_2, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2405
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2406 vis_ld64_2(ref, 8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2407 vis_xor(REF_4, REF_6, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2408
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2409 vis_ld64_2(ref, 16, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2410 vis_and(TMP6, MASK_fe, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2411 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2412
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2413 vis_ld64(ref[0], TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2414 vis_mul8x16(CONST_128, TMP6, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2415 vis_and(TMP8, MASK_fe, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2416
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2417 vis_ld64_2(ref, 8, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2418 vis_mul8x16(CONST_128, TMP8, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2419 vis_and(REF_0, REF_2, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2420
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2421 vis_ld64_2(ref, 16, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2422 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2423 vis_and(REF_4, REF_6, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2424
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2425 vis_alignaddr_g0((void *)off);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2426
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2427 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2428
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2429 vis_faligndata(TMP2, TMP4, REF_4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2430
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2431 if (off != 0x7) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2432 vis_alignaddr_g0((void *)off_plus_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2433 vis_faligndata(TMP0, TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2434 vis_faligndata(TMP2, TMP4, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2435 } else {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2436 vis_src1(TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2437 vis_src1(TMP4, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2438 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2439
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2440 vis_and(TMP6, MASK_7f, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2441
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2442 vis_and(TMP8, MASK_7f, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2443
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2444 vis_padd16(TMP10, TMP6, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2445 vis_st64(TMP6, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2446
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2447 vis_padd16(TMP12, TMP8, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2448 vis_st64_2(TMP8, dest, 8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2449 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2450
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2451 vis_xor(REF_0, REF_2, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2452
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2453 vis_xor(REF_4, REF_6, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2454
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2455 vis_and(TMP6, MASK_fe, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2456
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2457 vis_mul8x16(CONST_128, TMP6, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2458 vis_and(TMP8, MASK_fe, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2459
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2460 vis_mul8x16(CONST_128, TMP8, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2461 vis_and(REF_0, REF_2, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2462
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2463 vis_and(REF_4, REF_6, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2464
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2465 vis_alignaddr_g0((void *)off);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2466
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2467 vis_faligndata(TMP14, TMP16, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2468
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2469 vis_faligndata(TMP16, TMP18, REF_4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2470
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2471 if (off != 0x7) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2472 vis_alignaddr_g0((void *)off_plus_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2473 vis_faligndata(TMP14, TMP16, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2474 vis_faligndata(TMP16, TMP18, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2475 } else {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2476 vis_src1(TMP16, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2477 vis_src1(TMP18, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2478 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2479
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2480 vis_and(TMP6, MASK_7f, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2481
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2482 vis_and(TMP8, MASK_7f, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2483
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2484 vis_padd16(TMP10, TMP6, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2485 vis_st64(TMP6, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2486
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2487 vis_padd16(TMP12, TMP8, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2488 vis_st64_2(TMP8, dest, 8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2489 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2490 } while (--height);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2491
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2492 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2493 vis_xor(REF_0, REF_2, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2494
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2495 vis_ld64_2(ref, 8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2496 vis_xor(REF_4, REF_6, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2497
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2498 vis_ld64_2(ref, 16, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2499 vis_and(TMP6, MASK_fe, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2500
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2501 vis_mul8x16(CONST_128, TMP6, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2502 vis_and(TMP8, MASK_fe, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2503
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2504 vis_mul8x16(CONST_128, TMP8, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2505 vis_and(REF_0, REF_2, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2506
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2507 vis_and(REF_4, REF_6, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2508
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2509 vis_alignaddr_g0((void *)off);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2510
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2511 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2512
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2513 vis_faligndata(TMP2, TMP4, REF_4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2514
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2515 if (off != 0x7) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2516 vis_alignaddr_g0((void *)off_plus_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2517 vis_faligndata(TMP0, TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2518 vis_faligndata(TMP2, TMP4, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2519 } else {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2520 vis_src1(TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2521 vis_src1(TMP4, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2522 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2523
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2524 vis_and(TMP6, MASK_7f, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2525
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2526 vis_and(TMP8, MASK_7f, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2527
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2528 vis_padd16(TMP10, TMP6, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2529 vis_st64(TMP6, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2530
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2531 vis_padd16(TMP12, TMP8, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2532 vis_st64_2(TMP8, dest, 8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2533 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2534
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2535 vis_xor(REF_0, REF_2, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2536
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2537 vis_xor(REF_4, REF_6, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2538
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2539 vis_and(TMP6, MASK_fe, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2540
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2541 vis_mul8x16(CONST_128, TMP6, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2542 vis_and(TMP8, MASK_fe, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2543
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2544 vis_mul8x16(CONST_128, TMP8, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2545 vis_and(REF_0, REF_2, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2546
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2547 vis_and(REF_4, REF_6, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2548
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2549 vis_and(TMP6, MASK_7f, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2550
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2551 vis_and(TMP8, MASK_7f, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2552
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2553 vis_padd16(TMP10, TMP6, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2554 vis_st64(TMP6, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2555
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2556 vis_padd16(TMP12, TMP8, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2557 vis_st64_2(TMP8, dest, 8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2558 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2559
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2560 static void MC_put_no_round_x_8_vis (uint8_t * dest, const uint8_t * _ref,
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2561 const int stride, int height)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2562 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2563 uint8_t *ref = (uint8_t *) _ref;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2564 unsigned long off = (unsigned long) ref & 0x7;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2565 unsigned long off_plus_1 = off + 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2566
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2567 ref = vis_alignaddr(ref);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2568
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2569 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2570
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2571 vis_ld64(ref[8], TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2572
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2573 vis_ld64(constants_fe[0], MASK_fe);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2574
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2575 vis_ld64(constants_7f[0], MASK_7f);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2576
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2577 vis_ld64(constants128[0], CONST_128);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2578 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2579
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2580 if (off != 0x7) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2581 vis_alignaddr_g0((void *)off_plus_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2582 vis_faligndata(TMP0, TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2583 } else {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2584 vis_src1(TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2585 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2586
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2587 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2588 height = (height >> 1) - 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2589
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2590 do { /* 20 cycles */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2591 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2592 vis_xor(REF_0, REF_2, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2593
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2594 vis_ld64_2(ref, 8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2595 vis_and(TMP4, MASK_fe, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2596 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2597
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2598 vis_ld64(ref[0], TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2599 vis_and(REF_0, REF_2, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2600 vis_mul8x16(CONST_128, TMP4, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2601
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2602 vis_alignaddr_g0((void *)off);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2603
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2604 vis_ld64_2(ref, 8, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2605 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2606 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2607
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2608 if (off != 0x7) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2609 vis_alignaddr_g0((void *)off_plus_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2610 vis_faligndata(TMP0, TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2611 } else {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2612 vis_src1(TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2613 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2614
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2615 vis_and(TMP4, MASK_7f, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2616
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2617 vis_padd16(TMP6, TMP4, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2618 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2619 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2620
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2621 vis_xor(REF_0, REF_2, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2622
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2623 vis_and(TMP12, MASK_fe, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2624
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2625 vis_and(REF_0, REF_2, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2626 vis_mul8x16(CONST_128, TMP12, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2627
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2628 vis_alignaddr_g0((void *)off);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2629 vis_faligndata(TMP8, TMP10, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2630 if (off != 0x7) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2631 vis_alignaddr_g0((void *)off_plus_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2632 vis_faligndata(TMP8, TMP10, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2633 } else {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2634 vis_src1(TMP10, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2635 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2636
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2637 vis_and(TMP12, MASK_7f, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2638
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2639 vis_padd16(TMP14, TMP12, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2640 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2641 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2642 } while (--height);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2643
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2644 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2645 vis_xor(REF_0, REF_2, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2646
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2647 vis_ld64_2(ref, 8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2648 vis_and(TMP4, MASK_fe, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2649
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2650 vis_and(REF_0, REF_2, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2651 vis_mul8x16(CONST_128, TMP4, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2652
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2653 vis_alignaddr_g0((void *)off);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2654
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2655 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2656
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2657 if (off != 0x7) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2658 vis_alignaddr_g0((void *)off_plus_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2659 vis_faligndata(TMP0, TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2660 } else {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2661 vis_src1(TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2662 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2663
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2664 vis_and(TMP4, MASK_7f, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2665
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2666 vis_padd16(TMP6, TMP4, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2667 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2668 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2669
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2670 vis_xor(REF_0, REF_2, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2671
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2672 vis_and(TMP12, MASK_fe, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2673
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2674 vis_and(REF_0, REF_2, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2675 vis_mul8x16(CONST_128, TMP12, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2676
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2677 vis_and(TMP12, MASK_7f, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2678
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2679 vis_padd16(TMP14, TMP12, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2680 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2681 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2682 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2683
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2684 static void MC_avg_no_round_x_16_vis (uint8_t * dest, const uint8_t * _ref,
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2685 const int stride, int height)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2686 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2687 uint8_t *ref = (uint8_t *) _ref;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2688 unsigned long off = (unsigned long) ref & 0x7;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2689 unsigned long off_plus_1 = off + 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2690
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2691 vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2692
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2693 vis_ld64(constants3[0], CONST_3);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2694 vis_fzero(ZERO);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2695 vis_ld64(constants256_512[0], CONST_256);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2696
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2697 ref = vis_alignaddr(ref);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2698 do { /* 26 cycles */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2699 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2700
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2701 vis_ld64(ref[8], TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2702
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2703 vis_alignaddr_g0((void *)off);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2704
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2705 vis_ld64(ref[16], TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2706
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2707 vis_ld64(dest[0], DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2708 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2709
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2710 vis_ld64(dest[8], DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2711 vis_faligndata(TMP2, TMP4, REF_4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2712
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2713 if (off != 0x7) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2714 vis_alignaddr_g0((void *)off_plus_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2715 vis_faligndata(TMP0, TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2716 vis_faligndata(TMP2, TMP4, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2717 } else {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2718 vis_src1(TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2719 vis_src1(TMP4, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2720 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2721
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2722 vis_mul8x16au(REF_0, CONST_256, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2723
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2724 vis_pmerge(ZERO, REF_2, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2725 vis_mul8x16au(REF_0_1, CONST_256, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2726
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2727 vis_pmerge(ZERO, REF_2_1, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2728
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2729 vis_padd16(TMP0, TMP4, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2730
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2731 vis_mul8x16al(DST_0, CONST_512, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2732 vis_padd16(TMP2, TMP6, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2733
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2734 vis_mul8x16al(DST_1, CONST_512, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2735
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2736 vis_mul8x16au(REF_6, CONST_256, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2737
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2738 vis_padd16(TMP0, TMP4, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2739 vis_mul8x16au(REF_6_1, CONST_256, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2740
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2741 vis_padd16(TMP2, TMP6, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2742 vis_mul8x16au(REF_4, CONST_256, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2743
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2744 vis_padd16(TMP0, CONST_3, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2745 vis_mul8x16au(REF_4_1, CONST_256, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2746
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2747 vis_padd16(TMP2, CONST_3, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2748 vis_pack16(TMP8, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2749
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2750 vis_pack16(TMP10, DST_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2751 vis_padd16(TMP16, TMP12, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2752
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2753 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2754 vis_mul8x16al(DST_2, CONST_512, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2755 vis_padd16(TMP18, TMP14, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2756
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2757 vis_mul8x16al(DST_3, CONST_512, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2758 vis_padd16(TMP0, CONST_3, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2759
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2760 vis_padd16(TMP2, CONST_3, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2761
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2762 vis_padd16(TMP0, TMP4, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2763
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2764 vis_padd16(TMP2, TMP6, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2765 vis_pack16(TMP0, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2766
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2767 vis_pack16(TMP2, DST_3);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2768 vis_st64(DST_2, dest[8]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2769
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2770 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2771 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2772 } while (--height);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2773 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2774
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2775 static void MC_avg_no_round_x_8_vis (uint8_t * dest, const uint8_t * _ref,
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2776 const int stride, int height)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2777 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2778 uint8_t *ref = (uint8_t *) _ref;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2779 unsigned long off = (unsigned long) ref & 0x7;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2780 unsigned long off_plus_1 = off + 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2781 int stride_times_2 = stride << 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2782
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2783 vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2784
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2785 vis_ld64(constants3[0], CONST_3);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2786 vis_fzero(ZERO);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2787 vis_ld64(constants256_512[0], CONST_256);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2788
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2789 ref = vis_alignaddr(ref);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2790 height >>= 2;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2791 do { /* 47 cycles */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2792 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2793
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2794 vis_ld64_2(ref, 8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2795 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2796
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2797 vis_alignaddr_g0((void *)off);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2798
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2799 vis_ld64(ref[0], TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2800 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2801
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2802 vis_ld64_2(ref, 8, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2803 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2804
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2805 vis_ld64(ref[0], TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2806
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2807 vis_ld64_2(ref, 8, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2808 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2809 vis_faligndata(TMP4, TMP6, REF_4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2810
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2811 vis_ld64(ref[0], TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2812
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2813 vis_ld64_2(ref, 8, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2814 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2815 vis_faligndata(TMP8, TMP10, REF_S0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2816
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2817 vis_faligndata(TMP12, TMP14, REF_S4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2818
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2819 if (off != 0x7) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2820 vis_alignaddr_g0((void *)off_plus_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2821
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2822 vis_ld64(dest[0], DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2823 vis_faligndata(TMP0, TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2824
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2825 vis_ld64_2(dest, stride, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2826 vis_faligndata(TMP4, TMP6, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2827
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2828 vis_faligndata(TMP8, TMP10, REF_S2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2829
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2830 vis_faligndata(TMP12, TMP14, REF_S6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2831 } else {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2832 vis_ld64(dest[0], DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2833 vis_src1(TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2834
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2835 vis_ld64_2(dest, stride, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2836 vis_src1(TMP6, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2837
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2838 vis_src1(TMP10, REF_S2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2839
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2840 vis_src1(TMP14, REF_S6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2841 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2842
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2843 vis_pmerge(ZERO, REF_0, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2844 vis_mul8x16au(REF_0_1, CONST_256, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2845
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2846 vis_pmerge(ZERO, REF_2, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2847 vis_mul8x16au(REF_2_1, CONST_256, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2848
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2849 vis_padd16(TMP0, CONST_3, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2850 vis_mul8x16al(DST_0, CONST_512, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2851
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2852 vis_padd16(TMP2, CONST_3, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2853 vis_mul8x16al(DST_1, CONST_512, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2854
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2855 vis_padd16(TMP0, TMP4, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2856 vis_mul8x16au(REF_4, CONST_256, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2857
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2858 vis_padd16(TMP2, TMP6, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2859 vis_mul8x16au(REF_4_1, CONST_256, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2860
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2861 vis_padd16(TMP0, TMP16, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2862 vis_mul8x16au(REF_6, CONST_256, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2863
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2864 vis_padd16(TMP2, TMP18, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2865 vis_mul8x16au(REF_6_1, CONST_256, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2866
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2867 vis_padd16(TMP8, CONST_3, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2868 vis_mul8x16al(DST_2, CONST_512, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2869
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2870 vis_padd16(TMP8, TMP12, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2871 vis_mul8x16al(DST_3, CONST_512, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2872
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2873 vis_padd16(TMP10, TMP14, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2874 vis_pack16(TMP0, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2875
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2876 vis_pack16(TMP2, DST_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2877 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2878 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2879 vis_padd16(TMP10, CONST_3, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2880
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2881 vis_ld64_2(dest, stride, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2882 vis_padd16(TMP8, TMP16, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2883
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2884 vis_ld64_2(dest, stride_times_2, TMP4/*DST_2*/);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2885 vis_padd16(TMP10, TMP18, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2886 vis_pack16(TMP8, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2887
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2888 vis_pack16(TMP10, DST_3);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2889 vis_st64(DST_2, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2890 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2891
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2892 vis_mul8x16au(REF_S0_1, CONST_256, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2893 vis_pmerge(ZERO, REF_S0, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2894
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2895 vis_pmerge(ZERO, REF_S2, TMP24);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2896 vis_mul8x16au(REF_S2_1, CONST_256, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2897
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2898 vis_padd16(TMP0, CONST_3, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2899 vis_mul8x16au(REF_S4, CONST_256, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2900
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2901 vis_padd16(TMP2, CONST_3, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2902 vis_mul8x16au(REF_S4_1, CONST_256, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2903
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2904 vis_padd16(TMP0, TMP24, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2905 vis_mul8x16au(REF_S6, CONST_256, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2906
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2907 vis_padd16(TMP2, TMP6, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2908 vis_mul8x16au(REF_S6_1, CONST_256, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2909
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2910 vis_padd16(TMP8, CONST_3, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2911 vis_mul8x16al(DST_0, CONST_512, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2912
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2913 vis_padd16(TMP10, CONST_3, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2914 vis_mul8x16al(DST_1, CONST_512, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2915
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2916 vis_padd16(TMP8, TMP12, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2917 vis_mul8x16al(TMP4/*DST_2*/, CONST_512, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2918
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2919 vis_mul8x16al(TMP5/*DST_3*/, CONST_512, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2920 vis_padd16(TMP0, TMP16, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2921
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2922 vis_padd16(TMP2, TMP18, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2923 vis_pack16(TMP0, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2924
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2925 vis_padd16(TMP10, TMP14, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2926 vis_pack16(TMP2, DST_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2927 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2928 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2929
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2930 vis_padd16(TMP8, TMP20, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2931
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2932 vis_padd16(TMP10, TMP22, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2933 vis_pack16(TMP8, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2934
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2935 vis_pack16(TMP10, DST_3);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2936 vis_st64(DST_2, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2937 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2938 } while (--height);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2939 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2940
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2941 static void MC_put_no_round_y_16_vis (uint8_t * dest, const uint8_t * _ref,
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2942 const int stride, int height)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2943 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2944 uint8_t *ref = (uint8_t *) _ref;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2945
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2946 ref = vis_alignaddr(ref);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2947 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2948
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2949 vis_ld64_2(ref, 8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2950
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2951 vis_ld64_2(ref, 16, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2952 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2953
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2954 vis_ld64(ref[0], TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2955 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2956
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2957 vis_ld64_2(ref, 8, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2958 vis_faligndata(TMP2, TMP4, REF_4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2959
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2960 vis_ld64_2(ref, 16, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2961 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2962
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2963 vis_ld64(constants_fe[0], MASK_fe);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2964 vis_faligndata(TMP6, TMP8, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2965
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2966 vis_ld64(constants_7f[0], MASK_7f);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2967 vis_faligndata(TMP8, TMP10, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2968
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2969 vis_ld64(constants128[0], CONST_128);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2970 height = (height >> 1) - 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2971 do { /* 24 cycles */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2972 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2973 vis_xor(REF_0, REF_2, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2974
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2975 vis_ld64_2(ref, 8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2976 vis_xor(REF_4, REF_6, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2977
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2978 vis_ld64_2(ref, 16, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2979 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2980 vis_and(REF_0, REF_2, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2981
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2982 vis_ld64(ref[0], TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2983 vis_and(REF_4, REF_6, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2984
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2985 vis_ld64_2(ref, 8, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2986 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2987
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2988 vis_ld64_2(ref, 16, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2989 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2990 vis_faligndata(TMP2, TMP4, REF_4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2991
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2992 vis_and(TMP12, MASK_fe, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2993
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2994 vis_and(TMP16, MASK_fe, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2995 vis_mul8x16(CONST_128, TMP12, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2996
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2997 vis_mul8x16(CONST_128, TMP16, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2998 vis_xor(REF_0, REF_2, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
2999
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3000 vis_xor(REF_4, REF_6, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3001
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3002 vis_and(REF_0, REF_2, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3003
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3004 vis_and(TMP12, MASK_7f, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3005
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3006 vis_and(TMP16, MASK_7f, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3007
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3008 vis_padd16(TMP14, TMP12, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3009 vis_st64(TMP12, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3010
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3011 vis_padd16(TMP18, TMP16, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3012 vis_st64_2(TMP16, dest, 8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3013 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3014
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3015 vis_and(REF_4, REF_6, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3016
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3017 vis_and(TMP0, MASK_fe, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3018
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3019 vis_and(TMP2, MASK_fe, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3020 vis_mul8x16(CONST_128, TMP0, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3021
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3022 vis_faligndata(TMP6, TMP8, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3023 vis_mul8x16(CONST_128, TMP2, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3024
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3025 vis_faligndata(TMP8, TMP10, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3026
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3027 vis_and(TMP0, MASK_7f, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3028
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3029 vis_and(TMP2, MASK_7f, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3030
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3031 vis_padd16(TMP20, TMP0, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3032 vis_st64(TMP0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3033
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3034 vis_padd16(TMP18, TMP2, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3035 vis_st64_2(TMP2, dest, 8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3036 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3037 } while (--height);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3038
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3039 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3040 vis_xor(REF_0, REF_2, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3041
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3042 vis_ld64_2(ref, 8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3043 vis_xor(REF_4, REF_6, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3044
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3045 vis_ld64_2(ref, 16, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3046 vis_and(REF_0, REF_2, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3047
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3048 vis_and(REF_4, REF_6, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3049
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3050 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3051
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3052 vis_faligndata(TMP2, TMP4, REF_4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3053
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3054 vis_and(TMP12, MASK_fe, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3055
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3056 vis_and(TMP16, MASK_fe, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3057 vis_mul8x16(CONST_128, TMP12, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3058
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3059 vis_mul8x16(CONST_128, TMP16, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3060 vis_xor(REF_0, REF_2, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3061
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3062 vis_xor(REF_4, REF_6, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3063
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3064 vis_and(REF_0, REF_2, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3065
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3066 vis_and(TMP12, MASK_7f, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3067
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3068 vis_and(TMP16, MASK_7f, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3069
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3070 vis_padd16(TMP14, TMP12, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3071 vis_st64(TMP12, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3072
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3073 vis_padd16(TMP18, TMP16, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3074 vis_st64_2(TMP16, dest, 8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3075 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3076
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3077 vis_and(REF_4, REF_6, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3078
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3079 vis_and(TMP0, MASK_fe, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3080
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3081 vis_and(TMP2, MASK_fe, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3082 vis_mul8x16(CONST_128, TMP0, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3083
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3084 vis_mul8x16(CONST_128, TMP2, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3085
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3086 vis_and(TMP0, MASK_7f, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3087
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3088 vis_and(TMP2, MASK_7f, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3089
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3090 vis_padd16(TMP20, TMP0, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3091 vis_st64(TMP0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3092
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3093 vis_padd16(TMP18, TMP2, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3094 vis_st64_2(TMP2, dest, 8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3095 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3096
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3097 static void MC_put_no_round_y_8_vis (uint8_t * dest, const uint8_t * _ref,
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3098 const int stride, int height)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3099 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3100 uint8_t *ref = (uint8_t *) _ref;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3101
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3102 ref = vis_alignaddr(ref);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3103 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3104
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3105 vis_ld64_2(ref, 8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3106 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3107
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3108 vis_ld64(ref[0], TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3109
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3110 vis_ld64_2(ref, 8, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3111 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3112
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3113 vis_ld64(constants_fe[0], MASK_fe);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3114 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3115
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3116 vis_ld64(constants_7f[0], MASK_7f);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3117 vis_faligndata(TMP4, TMP6, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3118
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3119 vis_ld64(constants128[0], CONST_128);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3120 height = (height >> 1) - 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3121 do { /* 12 cycles */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3122 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3123 vis_xor(REF_0, REF_2, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3124
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3125 vis_ld64_2(ref, 8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3126 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3127 vis_and(TMP4, MASK_fe, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3128
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3129 vis_and(REF_0, REF_2, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3130 vis_mul8x16(CONST_128, TMP4, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3131
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3132 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3133 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3134
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3135 vis_ld64_2(ref, 8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3136 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3137 vis_xor(REF_0, REF_2, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3138
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3139 vis_and(TMP4, MASK_7f, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3140
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3141 vis_and(TMP12, MASK_fe, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3142
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3143 vis_mul8x16(CONST_128, TMP12, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3144 vis_and(REF_0, REF_2, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3145
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3146 vis_padd16(TMP6, TMP4, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3147 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3148 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3149
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3150 vis_faligndata(TMP0, TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3151
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3152 vis_and(TMP12, MASK_7f, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3153
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3154 vis_padd16(TMP14, TMP12, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3155 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3156 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3157 } while (--height);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3158
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3159 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3160 vis_xor(REF_0, REF_2, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3161
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3162 vis_ld64_2(ref, 8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3163 vis_and(TMP4, MASK_fe, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3164
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3165 vis_and(REF_0, REF_2, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3166 vis_mul8x16(CONST_128, TMP4, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3167
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3168 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3169
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3170 vis_xor(REF_0, REF_2, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3171
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3172 vis_and(TMP4, MASK_7f, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3173
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3174 vis_and(TMP12, MASK_fe, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3175
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3176 vis_mul8x16(CONST_128, TMP12, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3177 vis_and(REF_0, REF_2, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3178
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3179 vis_padd16(TMP6, TMP4, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3180 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3181 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3182
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3183 vis_and(TMP12, MASK_7f, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3184
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3185 vis_padd16(TMP14, TMP12, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3186 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3187 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3188
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3189 static void MC_avg_no_round_y_16_vis (uint8_t * dest, const uint8_t * _ref,
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3190 const int stride, int height)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3191 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3192 uint8_t *ref = (uint8_t *) _ref;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3193 int stride_8 = stride + 8;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3194 int stride_16 = stride + 16;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3195
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3196 vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3197
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3198 ref = vis_alignaddr(ref);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3199
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3200 vis_ld64(ref[ 0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3201 vis_fzero(ZERO);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3202
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3203 vis_ld64(ref[ 8], TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3204
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3205 vis_ld64(ref[16], TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3206
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3207 vis_ld64(constants3[0], CONST_3);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3208 vis_faligndata(TMP0, TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3209
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3210 vis_ld64(constants256_512[0], CONST_256);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3211 vis_faligndata(TMP2, TMP4, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3212 height >>= 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3213
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3214 do { /* 31 cycles */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3215 vis_ld64_2(ref, stride, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3216 vis_pmerge(ZERO, REF_2, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3217 vis_mul8x16au(REF_2_1, CONST_256, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3218
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3219 vis_ld64_2(ref, stride_8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3220 vis_pmerge(ZERO, REF_6, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3221 vis_mul8x16au(REF_6_1, CONST_256, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3222
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3223 vis_ld64_2(ref, stride_16, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3224 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3225
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3226 vis_ld64(dest[0], DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3227 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3228
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3229 vis_ld64_2(dest, 8, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3230 vis_faligndata(TMP2, TMP4, REF_4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3231
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3232 vis_ld64_2(ref, stride, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3233 vis_pmerge(ZERO, REF_0, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3234 vis_mul8x16au(REF_0_1, CONST_256, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3235
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3236 vis_ld64_2(ref, stride_8, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3237 vis_pmerge(ZERO, REF_4, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3238
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3239 vis_ld64_2(ref, stride_16, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3240 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3241
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3242 vis_ld64_2(dest, stride, REF_S0/*DST_4*/);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3243 vis_faligndata(TMP6, TMP8, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3244 vis_mul8x16au(REF_4_1, CONST_256, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3245
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3246 vis_ld64_2(dest, stride_8, REF_S2/*DST_6*/);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3247 vis_faligndata(TMP8, TMP10, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3248 vis_mul8x16al(DST_0, CONST_512, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3249
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3250 vis_padd16(TMP0, CONST_3, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3251 vis_mul8x16al(DST_1, CONST_512, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3252
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3253 vis_padd16(TMP2, CONST_3, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3254 vis_mul8x16al(DST_2, CONST_512, TMP24);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3255
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3256 vis_padd16(TMP4, CONST_3, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3257 vis_mul8x16al(DST_3, CONST_512, TMP26);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3258
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3259 vis_padd16(TMP6, CONST_3, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3260
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3261 vis_padd16(TMP12, TMP20, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3262 vis_mul8x16al(REF_S0, CONST_512, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3263
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3264 vis_padd16(TMP14, TMP22, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3265 vis_mul8x16al(REF_S0_1, CONST_512, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3266
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3267 vis_padd16(TMP16, TMP24, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3268 vis_mul8x16al(REF_S2, CONST_512, TMP24);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3269
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3270 vis_padd16(TMP18, TMP26, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3271 vis_mul8x16al(REF_S2_1, CONST_512, TMP26);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3272
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3273 vis_padd16(TMP12, TMP0, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3274 vis_mul8x16au(REF_2, CONST_256, TMP28);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3275
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3276 vis_padd16(TMP14, TMP2, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3277 vis_mul8x16au(REF_2_1, CONST_256, TMP30);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3278
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3279 vis_padd16(TMP16, TMP4, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3280 vis_mul8x16au(REF_6, CONST_256, REF_S4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3281
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3282 vis_padd16(TMP18, TMP6, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3283 vis_mul8x16au(REF_6_1, CONST_256, REF_S6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3284
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3285 vis_pack16(TMP12, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3286 vis_padd16(TMP28, TMP0, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3287
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3288 vis_pack16(TMP14, DST_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3289 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3290 vis_padd16(TMP30, TMP2, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3291
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3292 vis_pack16(TMP16, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3293 vis_padd16(REF_S4, TMP4, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3294
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3295 vis_pack16(TMP18, DST_3);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3296 vis_st64_2(DST_2, dest, 8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3297 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3298 vis_padd16(REF_S6, TMP6, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3299
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3300 vis_padd16(TMP12, TMP20, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3301
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3302 vis_padd16(TMP14, TMP22, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3303 vis_pack16(TMP12, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3304
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3305 vis_padd16(TMP16, TMP24, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3306 vis_pack16(TMP14, DST_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3307 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3308
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3309 vis_padd16(TMP18, TMP26, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3310 vis_pack16(TMP16, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3311
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3312 vis_pack16(TMP18, DST_3);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3313 vis_st64_2(DST_2, dest, 8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3314 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3315 } while (--height);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3316 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3317
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3318 static void MC_avg_no_round_y_8_vis (uint8_t * dest, const uint8_t * _ref,
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3319 const int stride, int height)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3320 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3321 uint8_t *ref = (uint8_t *) _ref;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3322 int stride_8 = stride + 8;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3323
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3324 vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3325
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3326 ref = vis_alignaddr(ref);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3327
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3328 vis_ld64(ref[ 0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3329 vis_fzero(ZERO);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3330
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3331 vis_ld64(ref[ 8], TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3332
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3333 vis_ld64(constants3[0], CONST_3);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3334 vis_faligndata(TMP0, TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3335
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3336 vis_ld64(constants256_512[0], CONST_256);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3337
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3338 height >>= 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3339 do { /* 20 cycles */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3340 vis_ld64_2(ref, stride, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3341 vis_pmerge(ZERO, REF_2, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3342 vis_mul8x16au(REF_2_1, CONST_256, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3343
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3344 vis_ld64_2(ref, stride_8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3345 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3346
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3347 vis_ld64(dest[0], DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3348
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3349 vis_ld64_2(dest, stride, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3350 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3351
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3352 vis_ld64_2(ref, stride, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3353 vis_mul8x16al(DST_0, CONST_512, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3354 vis_pmerge(ZERO, REF_0, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3355
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3356 vis_ld64_2(ref, stride_8, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3357 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3358 vis_mul8x16al(DST_1, CONST_512, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3359 vis_pmerge(ZERO, REF_0_1, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3360
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3361 vis_padd16(TMP12, CONST_3, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3362 vis_mul8x16al(DST_2, CONST_512, TMP24);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3363
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3364 vis_padd16(TMP14, CONST_3, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3365 vis_mul8x16al(DST_3, CONST_512, TMP26);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3366
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3367 vis_faligndata(TMP4, TMP6, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3368
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3369 vis_padd16(TMP8, TMP12, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3370
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3371 vis_padd16(TMP10, TMP14, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3372 vis_mul8x16au(REF_2, CONST_256, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3373
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3374 vis_padd16(TMP8, TMP16, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3375 vis_mul8x16au(REF_2_1, CONST_256, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3376
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3377 vis_padd16(TMP10, TMP18, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3378 vis_pack16(TMP0, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3379
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3380 vis_pack16(TMP2, DST_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3381 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3382 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3383 vis_padd16(TMP12, TMP20, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3384
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3385 vis_padd16(TMP14, TMP22, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3386
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3387 vis_padd16(TMP12, TMP24, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3388
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3389 vis_padd16(TMP14, TMP26, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3390 vis_pack16(TMP0, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3391
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3392 vis_pack16(TMP2, DST_3);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3393 vis_st64(DST_2, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3394 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3395 } while (--height);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3396 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3397
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3398 static void MC_put_no_round_xy_16_vis (uint8_t * dest, const uint8_t * _ref,
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3399 const int stride, int height)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3400 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3401 uint8_t *ref = (uint8_t *) _ref;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3402 unsigned long off = (unsigned long) ref & 0x7;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3403 unsigned long off_plus_1 = off + 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3404 int stride_8 = stride + 8;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3405 int stride_16 = stride + 16;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3406
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3407 vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3408
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3409 ref = vis_alignaddr(ref);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3410
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3411 vis_ld64(ref[ 0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3412 vis_fzero(ZERO);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3413
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3414 vis_ld64(ref[ 8], TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3415
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3416 vis_ld64(ref[16], TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3417
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3418 vis_ld64(constants1[0], CONST_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3419 vis_faligndata(TMP0, TMP2, REF_S0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3420
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3421 vis_ld64(constants256_512[0], CONST_256);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3422 vis_faligndata(TMP2, TMP4, REF_S4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3423
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3424 if (off != 0x7) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3425 vis_alignaddr_g0((void *)off_plus_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3426 vis_faligndata(TMP0, TMP2, REF_S2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3427 vis_faligndata(TMP2, TMP4, REF_S6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3428 } else {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3429 vis_src1(TMP2, REF_S2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3430 vis_src1(TMP4, REF_S6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3431 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3432
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3433 height >>= 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3434 do {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3435 vis_ld64_2(ref, stride, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3436 vis_mul8x16au(REF_S0, CONST_256, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3437 vis_pmerge(ZERO, REF_S0_1, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3438
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3439 vis_alignaddr_g0((void *)off);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3440
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3441 vis_ld64_2(ref, stride_8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3442 vis_mul8x16au(REF_S2, CONST_256, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3443 vis_pmerge(ZERO, REF_S2_1, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3444
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3445 vis_ld64_2(ref, stride_16, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3446 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3447 vis_mul8x16au(REF_S4, CONST_256, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3448 vis_pmerge(ZERO, REF_S4_1, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3449
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3450 vis_ld64_2(ref, stride, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3451 vis_mul8x16au(REF_S6, CONST_256, TMP24);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3452 vis_pmerge(ZERO, REF_S6_1, TMP26);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3453
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3454 vis_ld64_2(ref, stride_8, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3455 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3456
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3457 vis_ld64_2(ref, stride_16, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3458 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3459 vis_faligndata(TMP2, TMP4, REF_4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3460
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3461 vis_faligndata(TMP6, TMP8, REF_S0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3462
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3463 vis_faligndata(TMP8, TMP10, REF_S4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3464
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3465 if (off != 0x7) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3466 vis_alignaddr_g0((void *)off_plus_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3467 vis_faligndata(TMP0, TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3468 vis_faligndata(TMP2, TMP4, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3469 vis_faligndata(TMP6, TMP8, REF_S2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3470 vis_faligndata(TMP8, TMP10, REF_S6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3471 } else {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3472 vis_src1(TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3473 vis_src1(TMP4, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3474 vis_src1(TMP8, REF_S2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3475 vis_src1(TMP10, REF_S6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3476 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3477
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3478 vis_mul8x16au(REF_0, CONST_256, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3479 vis_pmerge(ZERO, REF_0_1, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3480
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3481 vis_mul8x16au(REF_2, CONST_256, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3482 vis_pmerge(ZERO, REF_2_1, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3483
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3484 vis_padd16(TMP0, CONST_2, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3485 vis_mul8x16au(REF_4, CONST_256, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3486
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3487 vis_padd16(TMP2, CONST_1, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3488 vis_mul8x16au(REF_4_1, CONST_256, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3489
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3490 vis_padd16(TMP8, TMP4, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3491 vis_mul8x16au(REF_6, CONST_256, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3492
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3493 vis_padd16(TMP10, TMP6, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3494 vis_mul8x16au(REF_6_1, CONST_256, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3495
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3496 vis_padd16(TMP12, TMP8, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3497
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3498 vis_padd16(TMP14, TMP10, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3499
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3500 vis_padd16(TMP12, TMP16, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3501
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3502 vis_padd16(TMP14, TMP18, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3503 vis_pack16(TMP12, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3504
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3505 vis_pack16(TMP14, DST_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3506 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3507 vis_padd16(TMP0, CONST_1, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3508
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3509 vis_mul8x16au(REF_S0, CONST_256, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3510 vis_padd16(TMP2, CONST_1, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3511
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3512 vis_mul8x16au(REF_S0_1, CONST_256, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3513 vis_padd16(TMP12, TMP4, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3514
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3515 vis_mul8x16au(REF_S2, CONST_256, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3516 vis_padd16(TMP14, TMP6, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3517
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3518 vis_mul8x16au(REF_S2_1, CONST_256, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3519 vis_padd16(TMP20, TMP12, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3520
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3521 vis_padd16(TMP22, TMP14, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3522
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3523 vis_padd16(TMP20, TMP24, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3524
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3525 vis_padd16(TMP22, TMP26, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3526 vis_pack16(TMP20, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3527
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3528 vis_pack16(TMP22, DST_3);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3529 vis_st64_2(DST_2, dest, 8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3530 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3531 vis_padd16(TMP0, TMP4, TMP24);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3532
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3533 vis_mul8x16au(REF_S4, CONST_256, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3534 vis_padd16(TMP2, TMP6, TMP26);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3535
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3536 vis_mul8x16au(REF_S4_1, CONST_256, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3537 vis_padd16(TMP24, TMP8, TMP24);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3538
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3539 vis_padd16(TMP26, TMP10, TMP26);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3540 vis_pack16(TMP24, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3541
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3542 vis_pack16(TMP26, DST_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3543 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3544 vis_pmerge(ZERO, REF_S6, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3545
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3546 vis_pmerge(ZERO, REF_S6_1, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3547
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3548 vis_padd16(TMP0, TMP4, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3549
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3550 vis_padd16(TMP2, TMP6, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3551
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3552 vis_padd16(TMP0, TMP12, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3553
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3554 vis_padd16(TMP2, TMP14, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3555 vis_pack16(TMP0, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3556
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3557 vis_pack16(TMP2, DST_3);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3558 vis_st64_2(DST_2, dest, 8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3559 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3560 } while (--height);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3561 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3562
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3563 static void MC_put_no_round_xy_8_vis (uint8_t * dest, const uint8_t * _ref,
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3564 const int stride, int height)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3565 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3566 uint8_t *ref = (uint8_t *) _ref;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3567 unsigned long off = (unsigned long) ref & 0x7;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3568 unsigned long off_plus_1 = off + 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3569 int stride_8 = stride + 8;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3570
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3571 vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3572
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3573 ref = vis_alignaddr(ref);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3574
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3575 vis_ld64(ref[ 0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3576 vis_fzero(ZERO);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3577
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3578 vis_ld64(ref[ 8], TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3579
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3580 vis_ld64(constants1[0], CONST_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3581
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3582 vis_ld64(constants256_512[0], CONST_256);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3583 vis_faligndata(TMP0, TMP2, REF_S0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3584
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3585 if (off != 0x7) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3586 vis_alignaddr_g0((void *)off_plus_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3587 vis_faligndata(TMP0, TMP2, REF_S2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3588 } else {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3589 vis_src1(TMP2, REF_S2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3590 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3591
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3592 height >>= 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3593 do { /* 26 cycles */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3594 vis_ld64_2(ref, stride, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3595 vis_mul8x16au(REF_S0, CONST_256, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3596 vis_pmerge(ZERO, REF_S2, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3597
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3598 vis_alignaddr_g0((void *)off);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3599
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3600 vis_ld64_2(ref, stride_8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3601 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3602 vis_mul8x16au(REF_S0_1, CONST_256, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3603 vis_pmerge(ZERO, REF_S2_1, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3604
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3605 vis_ld64_2(ref, stride, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3606
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3607 vis_ld64_2(ref, stride_8, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3608 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3609 vis_faligndata(TMP0, TMP2, REF_S4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3610
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3611 vis_pmerge(ZERO, REF_S4, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3612
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3613 vis_pmerge(ZERO, REF_S4_1, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3614
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3615 vis_faligndata(TMP4, TMP6, REF_S0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3616
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3617 if (off != 0x7) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3618 vis_alignaddr_g0((void *)off_plus_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3619 vis_faligndata(TMP0, TMP2, REF_S6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3620 vis_faligndata(TMP4, TMP6, REF_S2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3621 } else {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3622 vis_src1(TMP2, REF_S6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3623 vis_src1(TMP6, REF_S2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3624 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3625
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3626 vis_padd16(TMP18, CONST_1, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3627 vis_mul8x16au(REF_S6, CONST_256, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3628
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3629 vis_padd16(TMP20, CONST_1, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3630 vis_mul8x16au(REF_S6_1, CONST_256, TMP24);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3631
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3632 vis_mul8x16au(REF_S0, CONST_256, TMP26);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3633 vis_pmerge(ZERO, REF_S0_1, TMP28);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3634
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3635 vis_mul8x16au(REF_S2, CONST_256, TMP30);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3636 vis_padd16(TMP18, TMP22, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3637
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3638 vis_mul8x16au(REF_S2_1, CONST_256, TMP32);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3639 vis_padd16(TMP20, TMP24, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3640
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3641 vis_padd16(TMP8, TMP18, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3642
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3643 vis_padd16(TMP10, TMP20, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3644
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3645 vis_padd16(TMP8, TMP12, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3646
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3647 vis_padd16(TMP10, TMP14, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3648 vis_pack16(TMP8, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3649
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3650 vis_pack16(TMP10, DST_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3651 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3652 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3653 vis_padd16(TMP18, TMP26, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3654
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3655 vis_padd16(TMP20, TMP28, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3656
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3657 vis_padd16(TMP18, TMP30, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3658
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3659 vis_padd16(TMP20, TMP32, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3660 vis_pack16(TMP18, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3661
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3662 vis_pack16(TMP20, DST_3);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3663 vis_st64(DST_2, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3664 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3665 } while (--height);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3666 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3667
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3668 static void MC_avg_no_round_xy_16_vis (uint8_t * dest, const uint8_t * _ref,
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3669 const int stride, int height)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3670 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3671 uint8_t *ref = (uint8_t *) _ref;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3672 unsigned long off = (unsigned long) ref & 0x7;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3673 unsigned long off_plus_1 = off + 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3674 int stride_8 = stride + 8;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3675 int stride_16 = stride + 16;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3676
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3677 vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3678
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3679 ref = vis_alignaddr(ref);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3680
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3681 vis_ld64(ref[ 0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3682 vis_fzero(ZERO);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3683
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3684 vis_ld64(ref[ 8], TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3685
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3686 vis_ld64(ref[16], TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3687
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3688 vis_ld64(constants6[0], CONST_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3689 vis_faligndata(TMP0, TMP2, REF_S0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3690
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3691 vis_ld64(constants256_1024[0], CONST_256);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3692 vis_faligndata(TMP2, TMP4, REF_S4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3693
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3694 if (off != 0x7) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3695 vis_alignaddr_g0((void *)off_plus_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3696 vis_faligndata(TMP0, TMP2, REF_S2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3697 vis_faligndata(TMP2, TMP4, REF_S6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3698 } else {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3699 vis_src1(TMP2, REF_S2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3700 vis_src1(TMP4, REF_S6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3701 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3702
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3703 height >>= 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3704 do { /* 55 cycles */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3705 vis_ld64_2(ref, stride, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3706 vis_mul8x16au(REF_S0, CONST_256, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3707 vis_pmerge(ZERO, REF_S0_1, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3708
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3709 vis_alignaddr_g0((void *)off);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3710
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3711 vis_ld64_2(ref, stride_8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3712 vis_mul8x16au(REF_S2, CONST_256, TMP16);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3713 vis_pmerge(ZERO, REF_S2_1, TMP18);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3714
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3715 vis_ld64_2(ref, stride_16, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3716 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3717 vis_mul8x16au(REF_S4, CONST_256, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3718 vis_pmerge(ZERO, REF_S4_1, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3719
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3720 vis_ld64_2(ref, stride, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3721 vis_mul8x16au(REF_S6, CONST_256, TMP24);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3722 vis_pmerge(ZERO, REF_S6_1, TMP26);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3723
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3724 vis_ld64_2(ref, stride_8, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3725 vis_faligndata(TMP0, TMP2, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3726
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3727 vis_ld64_2(ref, stride_16, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3728 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3729 vis_faligndata(TMP2, TMP4, REF_4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3730
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3731 vis_ld64(dest[0], DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3732 vis_faligndata(TMP6, TMP8, REF_S0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3733
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3734 vis_ld64_2(dest, 8, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3735 vis_faligndata(TMP8, TMP10, REF_S4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3736
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3737 if (off != 0x7) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3738 vis_alignaddr_g0((void *)off_plus_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3739 vis_faligndata(TMP0, TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3740 vis_faligndata(TMP2, TMP4, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3741 vis_faligndata(TMP6, TMP8, REF_S2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3742 vis_faligndata(TMP8, TMP10, REF_S6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3743 } else {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3744 vis_src1(TMP2, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3745 vis_src1(TMP4, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3746 vis_src1(TMP8, REF_S2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3747 vis_src1(TMP10, REF_S6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3748 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3749
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3750 vis_mul8x16al(DST_0, CONST_1024, TMP30);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3751 vis_pmerge(ZERO, REF_0, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3752
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3753 vis_mul8x16al(DST_1, CONST_1024, TMP32);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3754 vis_pmerge(ZERO, REF_0_1, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3755
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3756 vis_mul8x16au(REF_2, CONST_256, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3757 vis_pmerge(ZERO, REF_2_1, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3758
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3759 vis_mul8x16al(DST_2, CONST_1024, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3760 vis_padd16(TMP0, CONST_6, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3761
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3762 vis_mul8x16al(DST_3, CONST_1024, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3763 vis_padd16(TMP2, CONST_6, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3764
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3765 vis_padd16(TMP0, TMP4, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3766 vis_mul8x16au(REF_4, CONST_256, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3767
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3768 vis_padd16(TMP2, TMP6, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3769 vis_mul8x16au(REF_4_1, CONST_256, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3770
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3771 vis_padd16(TMP12, TMP0, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3772 vis_mul8x16au(REF_6, CONST_256, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3773
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3774 vis_padd16(TMP14, TMP2, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3775 vis_mul8x16au(REF_6_1, CONST_256, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3776
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3777 vis_padd16(TMP12, TMP16, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3778 vis_mul8x16au(REF_S0, CONST_256, REF_4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3779
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3780 vis_padd16(TMP14, TMP18, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3781 vis_mul8x16au(REF_S0_1, CONST_256, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3782
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3783 vis_padd16(TMP12, TMP30, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3784
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3785 vis_padd16(TMP14, TMP32, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3786 vis_pack16(TMP12, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3787
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3788 vis_pack16(TMP14, DST_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3789 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3790 vis_padd16(TMP4, CONST_6, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3791
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3792 vis_ld64_2(dest, stride, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3793 vis_padd16(TMP6, CONST_6, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3794 vis_mul8x16au(REF_S2, CONST_256, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3795
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3796 vis_padd16(TMP4, TMP8, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3797 vis_mul8x16au(REF_S2_1, CONST_256, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3798
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3799 vis_padd16(TMP6, TMP10, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3800
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3801 vis_padd16(TMP20, TMP4, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3802
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3803 vis_padd16(TMP22, TMP6, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3804
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3805 vis_padd16(TMP20, TMP24, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3806
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3807 vis_padd16(TMP22, TMP26, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3808
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3809 vis_padd16(TMP20, REF_0, TMP20);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3810 vis_mul8x16au(REF_S4, CONST_256, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3811
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3812 vis_padd16(TMP22, REF_2, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3813 vis_pack16(TMP20, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3814
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3815 vis_pack16(TMP22, DST_3);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3816 vis_st64_2(DST_2, dest, 8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3817 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3818
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3819 vis_ld64_2(dest, 8, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3820 vis_mul8x16al(DST_0, CONST_1024, TMP30);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3821 vis_pmerge(ZERO, REF_S4_1, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3822
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3823 vis_mul8x16al(DST_1, CONST_1024, TMP32);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3824 vis_padd16(REF_4, TMP0, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3825
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3826 vis_mul8x16au(REF_S6, CONST_256, REF_4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3827 vis_padd16(REF_6, TMP2, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3828
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3829 vis_mul8x16au(REF_S6_1, CONST_256, REF_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3830 vis_padd16(TMP8, TMP12, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3831
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3832 vis_padd16(TMP10, TMP14, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3833
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3834 vis_padd16(TMP8, TMP30, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3835
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3836 vis_padd16(TMP10, TMP32, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3837 vis_pack16(TMP8, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3838
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3839 vis_pack16(TMP10, DST_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3840 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3841
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3842 vis_padd16(REF_0, TMP4, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3843
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3844 vis_mul8x16al(DST_2, CONST_1024, TMP30);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3845 vis_padd16(REF_2, TMP6, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3846
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3847 vis_mul8x16al(DST_3, CONST_1024, TMP32);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3848 vis_padd16(REF_0, REF_4, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3849
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3850 vis_padd16(REF_2, REF_6, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3851
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3852 vis_padd16(REF_0, TMP30, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3853
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3854 /* stall */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3855
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3856 vis_padd16(REF_2, TMP32, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3857 vis_pack16(REF_0, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3858
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3859 vis_pack16(REF_2, DST_3);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3860 vis_st64_2(DST_2, dest, 8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3861 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3862 } while (--height);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3863 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3864
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3865 static void MC_avg_no_round_xy_8_vis (uint8_t * dest, const uint8_t * _ref,
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3866 const int stride, int height)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3867 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3868 uint8_t *ref = (uint8_t *) _ref;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3869 unsigned long off = (unsigned long) ref & 0x7;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3870 unsigned long off_plus_1 = off + 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3871 int stride_8 = stride + 8;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3872
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3873 vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3874
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3875 ref = vis_alignaddr(ref);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3876
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3877 vis_ld64(ref[0], TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3878 vis_fzero(ZERO);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3879
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3880 vis_ld64_2(ref, 8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3881
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3882 vis_ld64(constants6[0], CONST_6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3883
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3884 vis_ld64(constants256_1024[0], CONST_256);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3885 vis_faligndata(TMP0, TMP2, REF_S0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3886
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3887 if (off != 0x7) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3888 vis_alignaddr_g0((void *)off_plus_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3889 vis_faligndata(TMP0, TMP2, REF_S2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3890 } else {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3891 vis_src1(TMP2, REF_S2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3892 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3893
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3894 height >>= 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3895 do { /* 31 cycles */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3896 vis_ld64_2(ref, stride, TMP0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3897 vis_mul8x16au(REF_S0, CONST_256, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3898 vis_pmerge(ZERO, REF_S0_1, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3899
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3900 vis_ld64_2(ref, stride_8, TMP2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3901 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3902 vis_mul8x16au(REF_S2, CONST_256, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3903 vis_pmerge(ZERO, REF_S2_1, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3904
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3905 vis_alignaddr_g0((void *)off);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3906
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3907 vis_ld64_2(ref, stride, TMP4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3908 vis_faligndata(TMP0, TMP2, REF_S4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3909
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3910 vis_ld64_2(ref, stride_8, TMP6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3911 ref += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3912
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3913 vis_ld64(dest[0], DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3914 vis_faligndata(TMP4, TMP6, REF_S0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3915
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3916 vis_ld64_2(dest, stride, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3917
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3918 if (off != 0x7) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3919 vis_alignaddr_g0((void *)off_plus_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3920 vis_faligndata(TMP0, TMP2, REF_S6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3921 vis_faligndata(TMP4, TMP6, REF_S2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3922 } else {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3923 vis_src1(TMP2, REF_S6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3924 vis_src1(TMP6, REF_S2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3925 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3926
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3927 vis_mul8x16al(DST_0, CONST_1024, TMP30);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3928 vis_pmerge(ZERO, REF_S4, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3929
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3930 vis_mul8x16al(DST_1, CONST_1024, TMP32);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3931 vis_pmerge(ZERO, REF_S4_1, TMP24);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3932
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3933 vis_mul8x16au(REF_S6, CONST_256, TMP26);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3934 vis_pmerge(ZERO, REF_S6_1, TMP28);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3935
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3936 vis_mul8x16au(REF_S0, CONST_256, REF_S4);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3937 vis_padd16(TMP22, CONST_6, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3938
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3939 vis_mul8x16au(REF_S0_1, CONST_256, REF_S6);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3940 vis_padd16(TMP24, CONST_6, TMP24);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3941
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3942 vis_mul8x16al(DST_2, CONST_1024, REF_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3943 vis_padd16(TMP22, TMP26, TMP22);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3944
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3945 vis_mul8x16al(DST_3, CONST_1024, REF_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3946 vis_padd16(TMP24, TMP28, TMP24);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3947
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3948 vis_mul8x16au(REF_S2, CONST_256, TMP26);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3949 vis_padd16(TMP8, TMP22, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3950
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3951 vis_mul8x16au(REF_S2_1, CONST_256, TMP28);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3952 vis_padd16(TMP10, TMP24, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3953
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3954 vis_padd16(TMP8, TMP12, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3955
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3956 vis_padd16(TMP10, TMP14, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3957
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3958 vis_padd16(TMP8, TMP30, TMP8);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3959
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3960 vis_padd16(TMP10, TMP32, TMP10);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3961 vis_pack16(TMP8, DST_0);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3962
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3963 vis_pack16(TMP10, DST_1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3964 vis_st64(DST_0, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3965 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3966
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3967 vis_padd16(REF_S4, TMP22, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3968
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3969 vis_padd16(REF_S6, TMP24, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3970
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3971 vis_padd16(TMP12, TMP26, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3972
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3973 vis_padd16(TMP14, TMP28, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3974
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3975 vis_padd16(TMP12, REF_0, TMP12);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3976
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3977 vis_padd16(TMP14, REF_2, TMP14);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3978 vis_pack16(TMP12, DST_2);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3979
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3980 vis_pack16(TMP14, DST_3);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3981 vis_st64(DST_2, dest[0]);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3982 dest += stride;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3983 } while (--height);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3984 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3985
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3986 /* End of no rounding code */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3987
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3988 static sigjmp_buf jmpbuf;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3989 static volatile sig_atomic_t canjump = 0;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3990
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3991 static void sigill_handler (int sig)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3992 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3993 if (!canjump) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3994 signal (sig, SIG_DFL);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3995 raise (sig);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3996 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3997
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3998 canjump = 0;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
3999 siglongjmp (jmpbuf, 1);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4000 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4001
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4002 #define ACCEL_SPARC_VIS 1
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4003 #define ACCEL_SPARC_VIS2 2
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4004
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4005 static int vis_level ()
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4006 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4007 int accel = 0;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4008
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4009 signal (SIGILL, sigill_handler);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4010 if (sigsetjmp (jmpbuf, 1)) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4011 signal (SIGILL, SIG_DFL);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4012 return accel;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4013 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4014
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4015 canjump = 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4016
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4017 /* pdist %f0, %f0, %f0 */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4018 __asm__ __volatile__(".word\t0x81b007c0");
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4019
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4020 canjump = 0;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4021 accel |= ACCEL_SPARC_VIS;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4022
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4023 if (sigsetjmp (jmpbuf, 1)) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4024 signal (SIGILL, SIG_DFL);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4025 return accel;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4026 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4027
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4028 canjump = 1;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4029
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4030 /* edge8n %g0, %g0, %g0 */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4031 __asm__ __volatile__(".word\t0x81b00020");
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4032
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4033 canjump = 0;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4034 accel |= ACCEL_SPARC_VIS2;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4035
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4036 signal (SIGILL, SIG_DFL);
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4037
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4038 return accel;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4039 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4040
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4041 /* libavcodec initialization code */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4042 void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx)
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4043 {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4044 /* VIS specific optimisations */
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4045 int accel = vis_level ();
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4046
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4047 if (accel & ACCEL_SPARC_VIS) {
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4048 c->put_pixels_tab[0][0] = MC_put_o_16_vis;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4049 c->put_pixels_tab[0][1] = MC_put_x_16_vis;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4050 c->put_pixels_tab[0][2] = MC_put_y_16_vis;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4051 c->put_pixels_tab[0][3] = MC_put_xy_16_vis;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4052
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4053 c->put_pixels_tab[1][0] = MC_put_o_8_vis;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4054 c->put_pixels_tab[1][1] = MC_put_x_8_vis;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4055 c->put_pixels_tab[1][2] = MC_put_y_8_vis;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4056 c->put_pixels_tab[1][3] = MC_put_xy_8_vis;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4057
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4058 c->avg_pixels_tab[0][0] = MC_avg_o_16_vis;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4059 c->avg_pixels_tab[0][1] = MC_avg_x_16_vis;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4060 c->avg_pixels_tab[0][2] = MC_avg_y_16_vis;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4061 c->avg_pixels_tab[0][3] = MC_avg_xy_16_vis;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4062
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4063 c->avg_pixels_tab[1][0] = MC_avg_o_8_vis;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4064 c->avg_pixels_tab[1][1] = MC_avg_x_8_vis;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4065 c->avg_pixels_tab[1][2] = MC_avg_y_8_vis;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4066 c->avg_pixels_tab[1][3] = MC_avg_xy_8_vis;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4067
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4068 c->put_no_rnd_pixels_tab[0][0] = MC_put_no_round_o_16_vis;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4069 c->put_no_rnd_pixels_tab[0][1] = MC_put_no_round_x_16_vis;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4070 c->put_no_rnd_pixels_tab[0][2] = MC_put_no_round_y_16_vis;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4071 c->put_no_rnd_pixels_tab[0][3] = MC_put_no_round_xy_16_vis;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4072
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4073 c->put_no_rnd_pixels_tab[1][0] = MC_put_no_round_o_8_vis;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4074 c->put_no_rnd_pixels_tab[1][1] = MC_put_no_round_x_8_vis;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4075 c->put_no_rnd_pixels_tab[1][2] = MC_put_no_round_y_8_vis;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4076 c->put_no_rnd_pixels_tab[1][3] = MC_put_no_round_xy_8_vis;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4077
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4078 c->avg_no_rnd_pixels_tab[0][0] = MC_avg_no_round_o_16_vis;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4079 c->avg_no_rnd_pixels_tab[0][1] = MC_avg_no_round_x_16_vis;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4080 c->avg_no_rnd_pixels_tab[0][2] = MC_avg_no_round_y_16_vis;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4081 c->avg_no_rnd_pixels_tab[0][3] = MC_avg_no_round_xy_16_vis;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4082
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4083 c->avg_no_rnd_pixels_tab[1][0] = MC_avg_no_round_o_8_vis;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4084 c->avg_no_rnd_pixels_tab[1][1] = MC_avg_no_round_x_8_vis;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4085 c->avg_no_rnd_pixels_tab[1][2] = MC_avg_no_round_y_8_vis;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4086 c->avg_no_rnd_pixels_tab[1][3] = MC_avg_no_round_xy_8_vis;
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4087 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4088 }
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4089
e8776388b02a [svn] - add ffmpeg
nenolod
parents:
diff changeset
4090 #endif /* !(ARCH_SPARC) */