Mercurial > libavcodec.hg
annotate alpha/dsputil_alpha.c @ 513:fb670ca9f8eb libavcodec
Use updated motion compensation routines.
| author | mellum |
|---|---|
| date | Wed, 03 Jul 2002 01:09:44 +0000 |
| parents | fa4425cf6b31 |
| children | 70113647b50d |
| rev | line source |
|---|---|
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
1 /* |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
2 * Alpha optimized DSP utils |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
3 * Copyright (c) 2002 Falk Hueffner <falk@debian.org> |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
4 * |
| 429 | 5 * This library is free software; you can redistribute it and/or |
| 6 * modify it under the terms of the GNU Lesser General Public | |
| 7 * License as published by the Free Software Foundation; either | |
| 8 * version 2 of the License, or (at your option) any later version. | |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
9 * |
| 429 | 10 * This library is distributed in the hope that it will be useful, |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 429 | 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 13 * Lesser General Public License for more details. | |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
14 * |
| 429 | 15 * You should have received a copy of the GNU Lesser General Public |
| 16 * License along with this library; if not, write to the Free Software | |
| 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
18 */ |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
19 |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
20 #include "asm.h" |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
21 #include "../dsputil.h" |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
22 |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
23 void simple_idct_axp(DCTELEM *block); |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
24 |
|
511
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
509
diff
changeset
|
25 void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels, |
|
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
509
diff
changeset
|
26 int line_size, int h); |
|
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
27 void put_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels, |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
28 int line_size); |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
29 void add_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels, |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
30 int line_size); |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
31 |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
32 #if 0 |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
33 /* These functions were the base for the optimized assembler routines, |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
34 and remain here for documentation purposes. */ |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
35 static void put_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels, |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
36 int line_size) |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
37 { |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
38 int i = 8; |
|
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
39 uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */ |
|
505
7a976bf93394
Ugly hack to make the assembler accept MVI instructions.
mellum
parents:
429
diff
changeset
|
40 |
|
7a976bf93394
Ugly hack to make the assembler accept MVI instructions.
mellum
parents:
429
diff
changeset
|
41 ASM_ACCEPT_MVI; |
|
7a976bf93394
Ugly hack to make the assembler accept MVI instructions.
mellum
parents:
429
diff
changeset
|
42 |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
43 do { |
|
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
44 uint64_t shorts0, shorts1; |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
45 |
|
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
46 shorts0 = ldq(block); |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
47 shorts0 = maxsw4(shorts0, 0); |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
48 shorts0 = minsw4(shorts0, clampmask); |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
49 stl(pkwb(shorts0), pixels); |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
50 |
|
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
51 shorts1 = ldq(block + 4); |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
52 shorts1 = maxsw4(shorts1, 0); |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
53 shorts1 = minsw4(shorts1, clampmask); |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
54 stl(pkwb(shorts1), pixels + 4); |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
55 |
|
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
56 pixels += line_size; |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
57 block += 8; |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
58 } while (--i); |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
59 } |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
60 |
|
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
61 void add_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels, |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
62 int line_size) |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
63 { |
|
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
64 int h = 8; |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
65 /* Keep this function a leaf function by generating the constants |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
66 manually (mainly for the hack value ;-). */ |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
67 uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */ |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
68 uint64_t signmask = zap(-1, 0x33); |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
69 signmask ^= signmask >> 1; /* 0x8000800080008000 */ |
|
505
7a976bf93394
Ugly hack to make the assembler accept MVI instructions.
mellum
parents:
429
diff
changeset
|
70 |
|
7a976bf93394
Ugly hack to make the assembler accept MVI instructions.
mellum
parents:
429
diff
changeset
|
71 ASM_ACCEPT_MVI; |
|
7a976bf93394
Ugly hack to make the assembler accept MVI instructions.
mellum
parents:
429
diff
changeset
|
72 |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
73 do { |
|
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
74 uint64_t shorts0, pix0, signs0; |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
75 uint64_t shorts1, pix1, signs1; |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
76 |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
77 shorts0 = ldq(block); |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
78 shorts1 = ldq(block + 4); |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
79 |
|
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
80 pix0 = unpkbw(ldl(pixels)); |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
81 /* Signed subword add (MMX paddw). */ |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
82 signs0 = shorts0 & signmask; |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
83 shorts0 &= ~signmask; |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
84 shorts0 += pix0; |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
85 shorts0 ^= signs0; |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
86 /* Clamp. */ |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
87 shorts0 = maxsw4(shorts0, 0); |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
88 shorts0 = minsw4(shorts0, clampmask); |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
89 |
|
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
90 /* Next 4. */ |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
91 pix1 = unpkbw(ldl(pixels + 4)); |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
92 signs1 = shorts1 & signmask; |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
93 shorts1 &= ~signmask; |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
94 shorts1 += pix1; |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
95 shorts1 ^= signs1; |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
96 shorts1 = maxsw4(shorts1, 0); |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
97 shorts1 = minsw4(shorts1, clampmask); |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
98 |
|
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
99 stl(pkwb(shorts0), pixels); |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
100 stl(pkwb(shorts1), pixels + 4); |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
101 |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
102 pixels += line_size; |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
103 block += 8; |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
104 } while (--h); |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
105 } |
|
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
106 #endif |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
107 |
| 513 | 108 static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b) |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
109 { |
| 513 | 110 return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1); |
| 111 } | |
| 112 | |
| 113 static inline uint64_t avg2(uint64_t a, uint64_t b) | |
| 114 { | |
| 115 return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1); | |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
116 } |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
117 |
| 513 | 118 static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4) |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
119 { |
| 513 | 120 uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2) |
| 121 + ((l2 & ~BYTE_VEC(0x03)) >> 2) | |
| 122 + ((l3 & ~BYTE_VEC(0x03)) >> 2) | |
| 123 + ((l4 & ~BYTE_VEC(0x03)) >> 2); | |
| 124 uint64_t r2 = (( (l1 & BYTE_VEC(0x03)) | |
| 125 + (l2 & BYTE_VEC(0x03)) | |
| 126 + (l3 & BYTE_VEC(0x03)) | |
| 127 + (l4 & BYTE_VEC(0x03)) | |
| 128 + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03); | |
| 129 return r1 + r2; | |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
130 } |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
131 |
| 513 | 132 static inline uint64_t avg4_no_rnd(uint64_t l1, uint64_t l2, |
| 133 uint64_t l3, uint64_t l4) | |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
134 { |
| 513 | 135 uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2) |
| 136 + ((l2 & ~BYTE_VEC(0x03)) >> 2) | |
| 137 + ((l3 & ~BYTE_VEC(0x03)) >> 2) | |
| 138 + ((l4 & ~BYTE_VEC(0x03)) >> 2); | |
| 139 uint64_t r2 = (( (l1 & BYTE_VEC(0x03)) | |
| 140 + (l2 & BYTE_VEC(0x03)) | |
| 141 + (l3 & BYTE_VEC(0x03)) | |
| 142 + (l4 & BYTE_VEC(0x03)) | |
| 143 + BYTE_VEC(0x01)) >> 2) & BYTE_VEC(0x03); | |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
144 return r1 + r2; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
145 } |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
146 |
| 513 | 147 #define OP(LOAD, STORE, INCR) \ |
| 148 do { \ | |
| 149 STORE(LOAD(pixels), block); \ | |
| 150 pixels += line_size; \ | |
| 151 block += INCR; \ | |
| 152 } while (--h) | |
| 153 | |
| 154 #define OP_X2(LOAD, STORE, INCR) \ | |
| 155 do { \ | |
| 156 uint64_t pix1, pix2; \ | |
| 157 \ | |
| 158 pix1 = LOAD(pixels); \ | |
| 159 pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \ | |
| 160 STORE(AVG2(pix1, pix2), block); \ | |
| 161 pixels += line_size; \ | |
| 162 block += INCR; \ | |
| 163 } while (--h) | |
| 164 | |
| 165 #define OP_Y2(LOAD, STORE, INCR) \ | |
| 166 do { \ | |
| 167 uint64_t pix = LOAD(pixels); \ | |
| 168 do { \ | |
| 169 uint64_t next_pix; \ | |
| 170 \ | |
| 171 pixels += line_size; \ | |
| 172 next_pix = LOAD(pixels); \ | |
| 173 STORE(AVG2(pix, next_pix), block); \ | |
| 174 block += INCR; \ | |
| 175 pix = next_pix; \ | |
| 176 } while (--h); \ | |
| 177 } while (0) | |
| 178 | |
| 179 #define OP_XY2(LOAD, STORE, INCR) \ | |
| 180 do { \ | |
| 181 uint64_t pix1 = LOAD(pixels); \ | |
| 182 uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \ | |
| 183 \ | |
| 184 do { \ | |
| 185 uint64_t next_pix1, next_pix2; \ | |
| 186 \ | |
| 187 pixels += line_size; \ | |
| 188 next_pix1 = LOAD(pixels); \ | |
| 189 next_pix2 = next_pix1 >> 8 | ((uint64_t) pixels[8] << 56); \ | |
| 190 \ | |
| 191 STORE(AVG4(pix1, pix2, next_pix1, next_pix2), block); \ | |
| 192 \ | |
| 193 block += INCR; \ | |
| 194 pix1 = next_pix1; \ | |
| 195 pix2 = next_pix2; \ | |
| 196 } while (--h); \ | |
| 197 } while (0) | |
| 198 | |
| 199 #define MAKE_OP(BTYPE, OPNAME, SUFF, OPKIND, STORE, INCR) \ | |
| 200 static void OPNAME ## _pixels ## SUFF ## _axp(BTYPE *block, \ | |
| 201 const uint8_t *pixels, \ | |
| 202 int line_size, int h) \ | |
| 203 { \ | |
| 204 if ((size_t) pixels & 0x7) { \ | |
| 205 OPKIND(uldq, STORE, INCR); \ | |
| 206 } else { \ | |
| 207 OPKIND(ldq, STORE, INCR); \ | |
| 208 } \ | |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
209 } |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
210 |
| 513 | 211 #define PIXOP(BTYPE, OPNAME, STORE, INCR) \ |
| 212 MAKE_OP(BTYPE, OPNAME, , OP, STORE, INCR); \ | |
| 213 MAKE_OP(BTYPE, OPNAME, _x2, OP_X2, STORE, INCR); \ | |
| 214 MAKE_OP(BTYPE, OPNAME, _y2, OP_Y2, STORE, INCR); \ | |
| 215 MAKE_OP(BTYPE, OPNAME, _xy2, OP_XY2, STORE, INCR); | |
| 216 | |
| 217 /* Rounding primitives. */ | |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
218 #define AVG2 avg2 |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
219 #define AVG4 avg4 |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
220 #define STORE(l, b) stq(l, b) |
| 513 | 221 PIXOP(uint8_t, put, STORE, line_size); |
| 222 | |
| 223 #undef STORE | |
| 224 #define STORE(l, b) stq(AVG2(l, ldq(b)), b); | |
| 225 PIXOP(uint8_t, avg, STORE, line_size); | |
| 226 | |
| 227 /* Not rounding primitives. */ | |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
228 #undef AVG2 |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
229 #undef AVG4 |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
230 #undef STORE |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
231 #define AVG2 avg2_no_rnd |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
232 #define AVG4 avg4_no_rnd |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
233 #define STORE(l, b) stq(l, b) |
| 513 | 234 PIXOP(uint8_t, put_no_rnd, STORE, line_size); |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
235 |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
236 #undef STORE |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
237 #define STORE(l, b) stq(AVG2(l, ldq(b)), b); |
| 513 | 238 PIXOP(uint8_t, avg_no_rnd, STORE, line_size); |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
239 |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
240 void dsputil_init_alpha(void) |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
241 { |
|
511
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
509
diff
changeset
|
242 put_pixels_tab[0] = put_pixels_axp_asm; |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
243 put_pixels_tab[1] = put_pixels_x2_axp; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
244 put_pixels_tab[2] = put_pixels_y2_axp; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
245 put_pixels_tab[3] = put_pixels_xy2_axp; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
246 |
|
511
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
509
diff
changeset
|
247 put_no_rnd_pixels_tab[0] = put_pixels_axp_asm; |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
248 put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_axp; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
249 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_axp; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
250 put_no_rnd_pixels_tab[3] = put_no_rnd_pixels_xy2_axp; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
251 |
| 513 | 252 avg_pixels_tab[0] = avg_pixels_axp; |
| 253 avg_pixels_tab[1] = avg_pixels_x2_axp; | |
| 254 avg_pixels_tab[2] = avg_pixels_y2_axp; | |
| 255 avg_pixels_tab[3] = avg_pixels_xy2_axp; | |
| 256 | |
| 257 avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels_axp; | |
| 258 avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels_x2_axp; | |
| 259 avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels_y2_axp; | |
| 260 avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels_xy2_axp; | |
| 261 | |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
262 /* amask clears all bits that correspond to present features. */ |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
263 if (amask(AMASK_MVI) == 0) { |
|
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
264 put_pixels_clamped = put_pixels_clamped_mvi_asm; |
|
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
265 add_pixels_clamped = add_pixels_clamped_mvi_asm; |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
266 } |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
267 } |
