|
808
|
1 /*
|
|
|
2 * Sun mediaLib optimized DSP utils
|
|
|
3 * Copyright (c) 2001 Fabrice Bellard.
|
|
|
4 *
|
|
|
5 * This file is part of FFmpeg.
|
|
|
6 *
|
|
|
7 * FFmpeg is free software; you can redistribute it and/or
|
|
|
8 * modify it under the terms of the GNU Lesser General Public
|
|
|
9 * License as published by the Free Software Foundation; either
|
|
|
10 * version 2.1 of the License, or (at your option) any later version.
|
|
|
11 *
|
|
|
12 * FFmpeg is distributed in the hope that it will be useful,
|
|
|
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
15 * Lesser General Public License for more details.
|
|
|
16 *
|
|
|
17 * You should have received a copy of the GNU Lesser General Public
|
|
|
18 * License along with FFmpeg; if not, write to the Free Software
|
|
|
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
20 */
|
|
|
21
|
|
|
22 #include "../dsputil.h"
|
|
|
23 #include "../mpegvideo.h"
|
|
|
24
|
|
|
25 #include <mlib_types.h>
|
|
|
26 #include <mlib_status.h>
|
|
|
27 #include <mlib_sys.h>
|
|
|
28 #include <mlib_algebra.h>
|
|
|
29 #include <mlib_video.h>
|
|
|
30
|
|
|
31 /* misc */
|
|
|
32
|
|
|
33 static void get_pixels_mlib(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
|
|
|
34 {
|
|
|
35 int i;
|
|
|
36
|
|
|
37 for (i=0;i<8;i++) {
|
|
|
38 mlib_VectorConvert_S16_U8_Mod((mlib_s16 *)block, (mlib_u8 *)pixels, 8);
|
|
|
39
|
|
|
40 pixels += line_size;
|
|
|
41 block += 8;
|
|
|
42 }
|
|
|
43 }
|
|
|
44
|
|
|
45 static void diff_pixels_mlib(DCTELEM *restrict block, const uint8_t *s1, const uint8_t *s2, int line_size)
|
|
|
46 {
|
|
|
47 int i;
|
|
|
48
|
|
|
49 for (i=0;i<8;i++) {
|
|
|
50 mlib_VectorSub_S16_U8_Mod((mlib_s16 *)block, (mlib_u8 *)s1, (mlib_u8 *)s2, 8);
|
|
|
51
|
|
|
52 s1 += line_size;
|
|
|
53 s2 += line_size;
|
|
|
54 block += 8;
|
|
|
55 }
|
|
|
56 }
|
|
|
57
|
|
|
58 static void add_pixels_clamped_mlib(const DCTELEM *block, uint8_t *pixels, int line_size)
|
|
|
59 {
|
|
|
60 mlib_VideoAddBlock_U8_S16(pixels, (mlib_s16 *)block, line_size);
|
|
|
61 }
|
|
|
62
|
|
|
63 /* put block, width 16 pixel, height 8/16 */
|
|
|
64
|
|
|
65 static void put_pixels16_mlib (uint8_t * dest, const uint8_t * ref,
|
|
|
66 int stride, int height)
|
|
|
67 {
|
|
|
68 switch (height) {
|
|
|
69 case 8:
|
|
|
70 mlib_VideoCopyRef_U8_U8_16x8(dest, (uint8_t *)ref, stride);
|
|
|
71 break;
|
|
|
72
|
|
|
73 case 16:
|
|
|
74 mlib_VideoCopyRef_U8_U8_16x16(dest, (uint8_t *)ref, stride);
|
|
|
75 break;
|
|
|
76
|
|
|
77 default:
|
|
|
78 assert(0);
|
|
|
79 }
|
|
|
80 }
|
|
|
81
|
|
|
82 static void put_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref,
|
|
|
83 int stride, int height)
|
|
|
84 {
|
|
|
85 switch (height) {
|
|
|
86 case 8:
|
|
|
87 mlib_VideoInterpX_U8_U8_16x8(dest, (uint8_t *)ref, stride, stride);
|
|
|
88 break;
|
|
|
89
|
|
|
90 case 16:
|
|
|
91 mlib_VideoInterpX_U8_U8_16x16(dest, (uint8_t *)ref, stride, stride);
|
|
|
92 break;
|
|
|
93
|
|
|
94 default:
|
|
|
95 assert(0);
|
|
|
96 }
|
|
|
97 }
|
|
|
98
|
|
|
99 static void put_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref,
|
|
|
100 int stride, int height)
|
|
|
101 {
|
|
|
102 switch (height) {
|
|
|
103 case 8:
|
|
|
104 mlib_VideoInterpY_U8_U8_16x8(dest, (uint8_t *)ref, stride, stride);
|
|
|
105 break;
|
|
|
106
|
|
|
107 case 16:
|
|
|
108 mlib_VideoInterpY_U8_U8_16x16(dest, (uint8_t *)ref, stride, stride);
|
|
|
109 break;
|
|
|
110
|
|
|
111 default:
|
|
|
112 assert(0);
|
|
|
113 }
|
|
|
114 }
|
|
|
115
|
|
|
116 static void put_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref,
|
|
|
117 int stride, int height)
|
|
|
118 {
|
|
|
119 switch (height) {
|
|
|
120 case 8:
|
|
|
121 mlib_VideoInterpXY_U8_U8_16x8(dest, (uint8_t *)ref, stride, stride);
|
|
|
122 break;
|
|
|
123
|
|
|
124 case 16:
|
|
|
125 mlib_VideoInterpXY_U8_U8_16x16(dest, (uint8_t *)ref, stride, stride);
|
|
|
126 break;
|
|
|
127
|
|
|
128 default:
|
|
|
129 assert(0);
|
|
|
130 }
|
|
|
131 }
|
|
|
132
|
|
|
133 /* put block, width 8 pixel, height 4/8/16 */
|
|
|
134
|
|
|
135 static void put_pixels8_mlib (uint8_t * dest, const uint8_t * ref,
|
|
|
136 int stride, int height)
|
|
|
137 {
|
|
|
138 switch (height) {
|
|
|
139 case 4:
|
|
|
140 mlib_VideoCopyRef_U8_U8_8x4(dest, (uint8_t *)ref, stride);
|
|
|
141 break;
|
|
|
142
|
|
|
143 case 8:
|
|
|
144 mlib_VideoCopyRef_U8_U8_8x8(dest, (uint8_t *)ref, stride);
|
|
|
145 break;
|
|
|
146
|
|
|
147 case 16:
|
|
|
148 mlib_VideoCopyRef_U8_U8_8x16(dest, (uint8_t *)ref, stride);
|
|
|
149 break;
|
|
|
150
|
|
|
151 default:
|
|
|
152 assert(0);
|
|
|
153 }
|
|
|
154 }
|
|
|
155
|
|
|
156 static void put_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref,
|
|
|
157 int stride, int height)
|
|
|
158 {
|
|
|
159 switch (height) {
|
|
|
160 case 4:
|
|
|
161 mlib_VideoInterpX_U8_U8_8x4(dest, (uint8_t *)ref, stride, stride);
|
|
|
162 break;
|
|
|
163
|
|
|
164 case 8:
|
|
|
165 mlib_VideoInterpX_U8_U8_8x8(dest, (uint8_t *)ref, stride, stride);
|
|
|
166 break;
|
|
|
167
|
|
|
168 case 16:
|
|
|
169 mlib_VideoInterpX_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride);
|
|
|
170 break;
|
|
|
171
|
|
|
172 default:
|
|
|
173 assert(0);
|
|
|
174 }
|
|
|
175 }
|
|
|
176
|
|
|
177 static void put_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref,
|
|
|
178 int stride, int height)
|
|
|
179 {
|
|
|
180 switch (height) {
|
|
|
181 case 4:
|
|
|
182 mlib_VideoInterpY_U8_U8_8x4(dest, (uint8_t *)ref, stride, stride);
|
|
|
183 break;
|
|
|
184
|
|
|
185 case 8:
|
|
|
186 mlib_VideoInterpY_U8_U8_8x8(dest, (uint8_t *)ref, stride, stride);
|
|
|
187 break;
|
|
|
188
|
|
|
189 case 16:
|
|
|
190 mlib_VideoInterpY_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride);
|
|
|
191 break;
|
|
|
192
|
|
|
193 default:
|
|
|
194 assert(0);
|
|
|
195 }
|
|
|
196 }
|
|
|
197
|
|
|
198 static void put_pixels8_xy2_mlib(uint8_t * dest, const uint8_t * ref,
|
|
|
199 int stride, int height)
|
|
|
200 {
|
|
|
201 switch (height) {
|
|
|
202 case 4:
|
|
|
203 mlib_VideoInterpXY_U8_U8_8x4(dest, (uint8_t *)ref, stride, stride);
|
|
|
204 break;
|
|
|
205
|
|
|
206 case 8:
|
|
|
207 mlib_VideoInterpXY_U8_U8_8x8(dest, (uint8_t *)ref, stride, stride);
|
|
|
208 break;
|
|
|
209
|
|
|
210 case 16:
|
|
|
211 mlib_VideoInterpXY_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride);
|
|
|
212 break;
|
|
|
213
|
|
|
214 default:
|
|
|
215 assert(0);
|
|
|
216 }
|
|
|
217 }
|
|
|
218
|
|
|
219 /* average block, width 16 pixel, height 8/16 */
|
|
|
220
|
|
|
221 static void avg_pixels16_mlib (uint8_t * dest, const uint8_t * ref,
|
|
|
222 int stride, int height)
|
|
|
223 {
|
|
|
224 switch (height) {
|
|
|
225 case 8:
|
|
|
226 mlib_VideoCopyRefAve_U8_U8_16x8(dest, (uint8_t *)ref, stride);
|
|
|
227 break;
|
|
|
228
|
|
|
229 case 16:
|
|
|
230 mlib_VideoCopyRefAve_U8_U8_16x16(dest, (uint8_t *)ref, stride);
|
|
|
231 break;
|
|
|
232
|
|
|
233 default:
|
|
|
234 assert(0);
|
|
|
235 }
|
|
|
236 }
|
|
|
237
|
|
|
238 static void avg_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref,
|
|
|
239 int stride, int height)
|
|
|
240 {
|
|
|
241 switch (height) {
|
|
|
242 case 8:
|
|
|
243 mlib_VideoInterpAveX_U8_U8_16x8(dest, (uint8_t *)ref, stride, stride);
|
|
|
244 break;
|
|
|
245
|
|
|
246 case 16:
|
|
|
247 mlib_VideoInterpAveX_U8_U8_16x16(dest, (uint8_t *)ref, stride, stride);
|
|
|
248 break;
|
|
|
249
|
|
|
250 default:
|
|
|
251 assert(0);
|
|
|
252 }
|
|
|
253 }
|
|
|
254
|
|
|
255 static void avg_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref,
|
|
|
256 int stride, int height)
|
|
|
257 {
|
|
|
258 switch (height) {
|
|
|
259 case 8:
|
|
|
260 mlib_VideoInterpAveY_U8_U8_16x8(dest, (uint8_t *)ref, stride, stride);
|
|
|
261 break;
|
|
|
262
|
|
|
263 case 16:
|
|
|
264 mlib_VideoInterpAveY_U8_U8_16x16(dest, (uint8_t *)ref, stride, stride);
|
|
|
265 break;
|
|
|
266
|
|
|
267 default:
|
|
|
268 assert(0);
|
|
|
269 }
|
|
|
270 }
|
|
|
271
|
|
|
272 static void avg_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref,
|
|
|
273 int stride, int height)
|
|
|
274 {
|
|
|
275 switch (height) {
|
|
|
276 case 8:
|
|
|
277 mlib_VideoInterpAveXY_U8_U8_16x8(dest, (uint8_t *)ref, stride, stride);
|
|
|
278 break;
|
|
|
279
|
|
|
280 case 16:
|
|
|
281 mlib_VideoInterpAveXY_U8_U8_16x16(dest, (uint8_t *)ref, stride, stride);
|
|
|
282 break;
|
|
|
283
|
|
|
284 default:
|
|
|
285 assert(0);
|
|
|
286 }
|
|
|
287 }
|
|
|
288
|
|
|
289 /* average block, width 8 pixel, height 4/8/16 */
|
|
|
290
|
|
|
291 static void avg_pixels8_mlib (uint8_t * dest, const uint8_t * ref,
|
|
|
292 int stride, int height)
|
|
|
293 {
|
|
|
294 switch (height) {
|
|
|
295 case 4:
|
|
|
296 mlib_VideoCopyRefAve_U8_U8_8x4(dest, (uint8_t *)ref, stride);
|
|
|
297 break;
|
|
|
298
|
|
|
299 case 8:
|
|
|
300 mlib_VideoCopyRefAve_U8_U8_8x8(dest, (uint8_t *)ref, stride);
|
|
|
301 break;
|
|
|
302
|
|
|
303 case 16:
|
|
|
304 mlib_VideoCopyRefAve_U8_U8_8x16(dest, (uint8_t *)ref, stride);
|
|
|
305 break;
|
|
|
306
|
|
|
307 default:
|
|
|
308 assert(0);
|
|
|
309 }
|
|
|
310 }
|
|
|
311
|
|
|
312 static void avg_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref,
|
|
|
313 int stride, int height)
|
|
|
314 {
|
|
|
315 switch (height) {
|
|
|
316 case 4:
|
|
|
317 mlib_VideoInterpAveX_U8_U8_8x4(dest, (uint8_t *)ref, stride, stride);
|
|
|
318 break;
|
|
|
319
|
|
|
320 case 8:
|
|
|
321 mlib_VideoInterpAveX_U8_U8_8x8(dest, (uint8_t *)ref, stride, stride);
|
|
|
322 break;
|
|
|
323
|
|
|
324 case 16:
|
|
|
325 mlib_VideoInterpAveX_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride);
|
|
|
326 break;
|
|
|
327
|
|
|
328 default:
|
|
|
329 assert(0);
|
|
|
330 }
|
|
|
331 }
|
|
|
332
|
|
|
333 static void avg_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref,
|
|
|
334 int stride, int height)
|
|
|
335 {
|
|
|
336 switch (height) {
|
|
|
337 case 4:
|
|
|
338 mlib_VideoInterpAveY_U8_U8_8x4(dest, (uint8_t *)ref, stride, stride);
|
|
|
339 break;
|
|
|
340
|
|
|
341 case 8:
|
|
|
342 mlib_VideoInterpAveY_U8_U8_8x8(dest, (uint8_t *)ref, stride, stride);
|
|
|
343 break;
|
|
|
344
|
|
|
345 case 16:
|
|
|
346 mlib_VideoInterpAveY_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride);
|
|
|
347 break;
|
|
|
348
|
|
|
349 default:
|
|
|
350 assert(0);
|
|
|
351 }
|
|
|
352 }
|
|
|
353
|
|
|
354 static void avg_pixels8_xy2_mlib(uint8_t * dest, const uint8_t * ref,
|
|
|
355 int stride, int height)
|
|
|
356 {
|
|
|
357 switch (height) {
|
|
|
358 case 4:
|
|
|
359 mlib_VideoInterpAveXY_U8_U8_8x4(dest, (uint8_t *)ref, stride, stride);
|
|
|
360 break;
|
|
|
361
|
|
|
362 case 8:
|
|
|
363 mlib_VideoInterpAveXY_U8_U8_8x8(dest, (uint8_t *)ref, stride, stride);
|
|
|
364 break;
|
|
|
365
|
|
|
366 case 16:
|
|
|
367 mlib_VideoInterpAveXY_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride);
|
|
|
368 break;
|
|
|
369
|
|
|
370 default:
|
|
|
371 assert(0);
|
|
|
372 }
|
|
|
373 }
|
|
|
374
|
|
|
375 /* swap byte order of a buffer */
|
|
|
376
|
|
|
377 static void bswap_buf_mlib(uint32_t *dst, uint32_t *src, int w)
|
|
|
378 {
|
|
|
379 mlib_VectorReverseByteOrder_U32_U32(dst, src, w);
|
|
|
380 }
|
|
|
381
|
|
|
382 /* transformations */
|
|
|
383
|
|
|
384 static void ff_idct_put_mlib(uint8_t *dest, int line_size, DCTELEM *data)
|
|
|
385 {
|
|
|
386 int i;
|
|
|
387 uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
|
|
388
|
|
|
389 mlib_VideoIDCT8x8_S16_S16 (data, data);
|
|
|
390
|
|
|
391 for(i=0;i<8;i++) {
|
|
|
392 dest[0] = cm[data[0]];
|
|
|
393 dest[1] = cm[data[1]];
|
|
|
394 dest[2] = cm[data[2]];
|
|
|
395 dest[3] = cm[data[3]];
|
|
|
396 dest[4] = cm[data[4]];
|
|
|
397 dest[5] = cm[data[5]];
|
|
|
398 dest[6] = cm[data[6]];
|
|
|
399 dest[7] = cm[data[7]];
|
|
|
400
|
|
|
401 dest += line_size;
|
|
|
402 data += 8;
|
|
|
403 }
|
|
|
404 }
|
|
|
405
|
|
|
406 static void ff_idct_add_mlib(uint8_t *dest, int line_size, DCTELEM *data)
|
|
|
407 {
|
|
|
408 mlib_VideoIDCT8x8_S16_S16 (data, data);
|
|
|
409 mlib_VideoAddBlock_U8_S16(dest, (mlib_s16 *)data, line_size);
|
|
|
410 }
|
|
|
411
|
|
|
412 static void ff_idct_mlib(DCTELEM *data)
|
|
|
413 {
|
|
|
414 mlib_VideoIDCT8x8_S16_S16 (data, data);
|
|
|
415 }
|
|
|
416
|
|
|
417 static void ff_fdct_mlib(DCTELEM *data)
|
|
|
418 {
|
|
|
419 mlib_VideoDCT8x8_S16_S16 (data, data);
|
|
|
420 }
|
|
|
421
|
|
|
422 void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx)
|
|
|
423 {
|
|
|
424 c->get_pixels = get_pixels_mlib;
|
|
|
425 c->diff_pixels = diff_pixels_mlib;
|
|
|
426 c->add_pixels_clamped = add_pixels_clamped_mlib;
|
|
|
427
|
|
|
428 c->put_pixels_tab[0][0] = put_pixels16_mlib;
|
|
|
429 c->put_pixels_tab[0][1] = put_pixels16_x2_mlib;
|
|
|
430 c->put_pixels_tab[0][2] = put_pixels16_y2_mlib;
|
|
|
431 c->put_pixels_tab[0][3] = put_pixels16_xy2_mlib;
|
|
|
432 c->put_pixels_tab[1][0] = put_pixels8_mlib;
|
|
|
433 c->put_pixels_tab[1][1] = put_pixels8_x2_mlib;
|
|
|
434 c->put_pixels_tab[1][2] = put_pixels8_y2_mlib;
|
|
|
435 c->put_pixels_tab[1][3] = put_pixels8_xy2_mlib;
|
|
|
436
|
|
|
437 c->avg_pixels_tab[0][0] = avg_pixels16_mlib;
|
|
|
438 c->avg_pixels_tab[0][1] = avg_pixels16_x2_mlib;
|
|
|
439 c->avg_pixels_tab[0][2] = avg_pixels16_y2_mlib;
|
|
|
440 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mlib;
|
|
|
441 c->avg_pixels_tab[1][0] = avg_pixels8_mlib;
|
|
|
442 c->avg_pixels_tab[1][1] = avg_pixels8_x2_mlib;
|
|
|
443 c->avg_pixels_tab[1][2] = avg_pixels8_y2_mlib;
|
|
|
444 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mlib;
|
|
|
445
|
|
|
446 c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mlib;
|
|
|
447 c->put_no_rnd_pixels_tab[1][0] = put_pixels8_mlib;
|
|
|
448
|
|
|
449 c->bswap_buf = bswap_buf_mlib;
|
|
|
450 }
|
|
|
451
|
|
|
452 void MPV_common_init_mlib(MpegEncContext *s)
|
|
|
453 {
|
|
|
454 if(s->avctx->dct_algo==FF_DCT_AUTO || s->avctx->dct_algo==FF_DCT_MLIB){
|
|
|
455 s->dsp.fdct = ff_fdct_mlib;
|
|
|
456 }
|
|
|
457
|
|
|
458 if(s->avctx->idct_algo==FF_IDCT_AUTO || s->avctx->idct_algo==FF_IDCT_MLIB){
|
|
|
459 s->dsp.idct_put= ff_idct_put_mlib;
|
|
|
460 s->dsp.idct_add= ff_idct_add_mlib;
|
|
|
461 s->dsp.idct = ff_idct_mlib;
|
|
|
462 s->dsp.idct_permutation_type= FF_NO_IDCT_PERM;
|
|
|
463 }
|
|
|
464 }
|