Mercurial > libavcodec.hg
annotate dsputil.c @ 11864:7204cb7dd601 libavcodec
Quant changes only once per MB so move the corresponding scale factor assignment
out of the block decoding loop. Indeo4 doesn't use any scale table but the quant
level itself as scale. Therefore access scale table only if its pointer != NULL.
| author | maxim |
|---|---|
| date | Thu, 10 Jun 2010 17:31:12 +0000 |
| parents | eaf3ffb5df56 |
| children | f2007d7c3f1d |
| rev | line source |
|---|---|
| 0 | 1 /* |
| 2 * DSP utils | |
|
8629
04423b2f6e0b
cosmetics: Remove pointless period after copyright statement non-sentences.
diego
parents:
8627
diff
changeset
|
3 * Copyright (c) 2000, 2001 Fabrice Bellard |
|
1739
07a484280a82
copyright year update of the files i touched and remembered, things look annoyingly unmaintained otherwise
michael
parents:
1729
diff
changeset
|
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> |
| 0 | 5 * |
| 5214 | 6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> |
| 7 * | |
|
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
8 * This file is part of FFmpeg. |
|
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
9 * |
|
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
10 * FFmpeg is free software; you can redistribute it and/or |
| 429 | 11 * modify it under the terms of the GNU Lesser General Public |
| 12 * License as published by the Free Software Foundation; either | |
|
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
13 * version 2.1 of the License, or (at your option) any later version. |
| 0 | 14 * |
|
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
15 * FFmpeg is distributed in the hope that it will be useful, |
| 0 | 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 429 | 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 18 * Lesser General Public License for more details. | |
| 0 | 19 * |
| 429 | 20 * You should have received a copy of the GNU Lesser General Public |
|
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
21 * License along with FFmpeg; if not, write to the Free Software |
|
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
3029
diff
changeset
|
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 0 | 23 */ |
| 2967 | 24 |
| 1106 | 25 /** |
|
11644
7dd2a45249a9
Remove explicit filename from Doxygen @file commands.
diego
parents:
11637
diff
changeset
|
26 * @file |
| 1106 | 27 * DSP utils |
| 28 */ | |
| 2967 | 29 |
| 0 | 30 #include "avcodec.h" |
| 31 #include "dsputil.h" | |
| 1092 | 32 #include "simple_idct.h" |
| 1557 | 33 #include "faandct.h" |
| 6407 | 34 #include "faanidct.h" |
|
8627
d6bab465b82c
moves mid_pred() into mathops.h (with arch specific code split by directory)
aurel
parents:
8596
diff
changeset
|
35 #include "mathops.h" |
|
10748
36611425fedb
Add required header #includes for mpegvideo.h and config.h.
diego
parents:
10644
diff
changeset
|
36 #include "mpegvideo.h" |
|
36611425fedb
Add required header #includes for mpegvideo.h and config.h.
diego
parents:
10644
diff
changeset
|
37 #include "config.h" |
|
11375
84963c795459
Move some prototypes from dsputil.c to reasonable header files
mru
parents:
11369
diff
changeset
|
38 #include "lpc.h" |
|
84963c795459
Move some prototypes from dsputil.c to reasonable header files
mru
parents:
11369
diff
changeset
|
39 #include "ac3dec.h" |
|
84963c795459
Move some prototypes from dsputil.c to reasonable header files
mru
parents:
11369
diff
changeset
|
40 #include "vorbis.h" |
|
84963c795459
Move some prototypes from dsputil.c to reasonable header files
mru
parents:
11369
diff
changeset
|
41 #include "png.h" |
| 676 | 42 |
| 4176 | 43 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, }; |
| 4179 | 44 uint32_t ff_squareTbl[512] = {0, }; |
| 0 | 45 |
| 6387 | 46 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size |
| 47 #define pb_7f (~0UL/255 * 0x7f) | |
| 48 #define pb_80 (~0UL/255 * 0x80) | |
| 6385 | 49 |
| 1064 | 50 const uint8_t ff_zigzag_direct[64] = { |
|
706
e65798d228ea
idct permutation cleanup, idct can be selected per context now
michaelni
parents:
689
diff
changeset
|
51 0, 1, 8, 16, 9, 2, 3, 10, |
|
e65798d228ea
idct permutation cleanup, idct can be selected per context now
michaelni
parents:
689
diff
changeset
|
52 17, 24, 32, 25, 18, 11, 4, 5, |
| 34 | 53 12, 19, 26, 33, 40, 48, 41, 34, |
|
706
e65798d228ea
idct permutation cleanup, idct can be selected per context now
michaelni
parents:
689
diff
changeset
|
54 27, 20, 13, 6, 7, 14, 21, 28, |
| 34 | 55 35, 42, 49, 56, 57, 50, 43, 36, |
| 56 29, 22, 15, 23, 30, 37, 44, 51, | |
| 57 58, 59, 52, 45, 38, 31, 39, 46, | |
| 58 53, 60, 61, 54, 47, 55, 62, 63 | |
| 59 }; | |
| 60 | |
| 1567 | 61 /* Specific zigzag scan for 248 idct. NOTE that unlike the |
| 62 specification, we interleave the fields */ | |
| 63 const uint8_t ff_zigzag248_direct[64] = { | |
| 64 0, 8, 1, 9, 16, 24, 2, 10, | |
| 65 17, 25, 32, 40, 48, 56, 33, 41, | |
| 66 18, 26, 3, 11, 4, 12, 19, 27, | |
| 67 34, 42, 49, 57, 50, 58, 35, 43, | |
| 68 20, 28, 5, 13, 6, 14, 21, 29, | |
| 69 36, 44, 51, 59, 52, 60, 37, 45, | |
| 70 22, 30, 7, 15, 23, 31, 38, 46, | |
| 71 53, 61, 54, 62, 39, 47, 55, 63, | |
| 72 }; | |
| 73 | |
| 220 | 74 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */ |
| 11369 | 75 DECLARE_ALIGNED(16, uint16_t, inv_zigzag_direct16)[64]; |
| 220 | 76 |
| 1064 | 77 const uint8_t ff_alternate_horizontal_scan[64] = { |
| 2967 | 78 0, 1, 2, 3, 8, 9, 16, 17, |
| 34 | 79 10, 11, 4, 5, 6, 7, 15, 14, |
| 2967 | 80 13, 12, 19, 18, 24, 25, 32, 33, |
| 34 | 81 26, 27, 20, 21, 22, 23, 28, 29, |
| 2967 | 82 30, 31, 34, 35, 40, 41, 48, 49, |
| 34 | 83 42, 43, 36, 37, 38, 39, 44, 45, |
| 2967 | 84 46, 47, 50, 51, 56, 57, 58, 59, |
| 34 | 85 52, 53, 54, 55, 60, 61, 62, 63, |
| 86 }; | |
| 87 | |
| 1064 | 88 const uint8_t ff_alternate_vertical_scan[64] = { |
| 2967 | 89 0, 8, 16, 24, 1, 9, 2, 10, |
| 34 | 90 17, 25, 32, 40, 48, 56, 57, 49, |
| 2967 | 91 41, 33, 26, 18, 3, 11, 4, 12, |
| 34 | 92 19, 27, 34, 42, 50, 58, 35, 43, |
| 2967 | 93 51, 59, 20, 28, 5, 13, 6, 14, |
| 34 | 94 21, 29, 36, 44, 52, 60, 37, 45, |
| 2967 | 95 53, 61, 22, 30, 7, 15, 23, 31, |
| 34 | 96 38, 46, 54, 62, 39, 47, 55, 63, |
| 97 }; | |
| 98 | |
| 10207 | 99 /* a*inverse[b]>>32 == a/b for all 0<=a<=16909558 && 2<=b<=256 |
| 100 * for a>16909558, is an overestimate by less than 1 part in 1<<24 */ | |
| 101 const uint32_t ff_inverse[257]={ | |
| 2967 | 102 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757, |
| 103 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154, | |
| 104 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709, | |
| 105 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333, | |
| 106 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367, | |
| 107 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283, | |
| 108 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315, | |
| 109 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085, | |
| 110 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498, | |
| 111 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675, | |
| 112 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441, | |
| 113 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183, | |
| 114 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712, | |
| 115 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400, | |
| 116 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163, | |
| 117 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641, | |
| 118 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573, | |
| 119 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737, | |
| 120 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493, | |
| 121 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373, | |
| 122 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368, | |
| 123 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671, | |
| 124 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767, | |
| 125 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740, | |
| 126 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751, | |
| 127 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635, | |
| 128 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593, | |
| 129 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944, | |
| 130 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933, | |
| 131 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575, | |
| 132 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532, | |
| 220 | 133 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010, |
| 10207 | 134 16777216 |
| 220 | 135 }; |
| 136 | |
| 1092 | 137 /* Input permutation for the simple_idct_mmx */ |
| 138 static const uint8_t simple_mmx_permutation[64]={ | |
| 2979 | 139 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, |
| 140 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, | |
| 141 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, | |
| 142 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, | |
| 143 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, | |
| 144 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, | |
| 145 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, | |
| 146 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, | |
| 1092 | 147 }; |
| 148 | |
|
6600
c3213c91124c
Add a new IDCT permutation, used in xvid_sse2 and possibly future similar IDCTs.
astrange
parents:
6450
diff
changeset
|
149 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7}; |
|
c3213c91124c
Add a new IDCT permutation, used in xvid_sse2 and possibly future similar IDCTs.
astrange
parents:
6450
diff
changeset
|
150 |
| 6438 | 151 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){ |
| 152 int i; | |
| 153 int end; | |
| 154 | |
| 155 st->scantable= src_scantable; | |
| 156 | |
| 157 for(i=0; i<64; i++){ | |
| 158 int j; | |
| 159 j = src_scantable[i]; | |
| 160 st->permutated[i] = permutation[j]; | |
| 8590 | 161 #if ARCH_PPC |
| 6438 | 162 st->inverse[j] = i; |
| 163 #endif | |
| 164 } | |
| 165 | |
| 166 end=-1; | |
| 167 for(i=0; i<64; i++){ | |
| 168 int j; | |
| 169 j = st->permutated[i]; | |
| 170 if(j>end) end=j; | |
| 171 st->raster_end[i]= end; | |
| 172 } | |
| 173 } | |
| 174 | |
| 1064 | 175 static int pix_sum_c(uint8_t * pix, int line_size) |
| 612 | 176 { |
| 177 int s, i, j; | |
| 178 | |
| 179 s = 0; | |
| 180 for (i = 0; i < 16; i++) { | |
| 2979 | 181 for (j = 0; j < 16; j += 8) { |
| 182 s += pix[0]; | |
| 183 s += pix[1]; | |
| 184 s += pix[2]; | |
| 185 s += pix[3]; | |
| 186 s += pix[4]; | |
| 187 s += pix[5]; | |
| 188 s += pix[6]; | |
| 189 s += pix[7]; | |
| 190 pix += 8; | |
| 191 } | |
| 192 pix += line_size - 16; | |
| 612 | 193 } |
| 194 return s; | |
| 195 } | |
| 196 | |
| 1064 | 197 static int pix_norm1_c(uint8_t * pix, int line_size) |
| 612 | 198 { |
| 199 int s, i, j; | |
| 4179 | 200 uint32_t *sq = ff_squareTbl + 256; |
| 612 | 201 |
| 202 s = 0; | |
| 203 for (i = 0; i < 16; i++) { | |
| 2979 | 204 for (j = 0; j < 16; j += 8) { |
|
997
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
205 #if 0 |
| 2979 | 206 s += sq[pix[0]]; |
| 207 s += sq[pix[1]]; | |
| 208 s += sq[pix[2]]; | |
| 209 s += sq[pix[3]]; | |
| 210 s += sq[pix[4]]; | |
| 211 s += sq[pix[5]]; | |
| 212 s += sq[pix[6]]; | |
| 213 s += sq[pix[7]]; | |
|
997
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
214 #else |
|
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
215 #if LONG_MAX > 2147483647 |
| 2979 | 216 register uint64_t x=*(uint64_t*)pix; |
| 217 s += sq[x&0xff]; | |
| 218 s += sq[(x>>8)&0xff]; | |
| 219 s += sq[(x>>16)&0xff]; | |
| 220 s += sq[(x>>24)&0xff]; | |
|
997
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
221 s += sq[(x>>32)&0xff]; |
|
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
222 s += sq[(x>>40)&0xff]; |
|
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
223 s += sq[(x>>48)&0xff]; |
|
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
224 s += sq[(x>>56)&0xff]; |
|
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
225 #else |
| 2979 | 226 register uint32_t x=*(uint32_t*)pix; |
| 227 s += sq[x&0xff]; | |
| 228 s += sq[(x>>8)&0xff]; | |
| 229 s += sq[(x>>16)&0xff]; | |
| 230 s += sq[(x>>24)&0xff]; | |
|
997
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
231 x=*(uint32_t*)(pix+4); |
|
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
232 s += sq[x&0xff]; |
|
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
233 s += sq[(x>>8)&0xff]; |
|
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
234 s += sq[(x>>16)&0xff]; |
|
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
235 s += sq[(x>>24)&0xff]; |
|
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
236 #endif |
|
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
237 #endif |
| 2979 | 238 pix += 8; |
| 239 } | |
| 240 pix += line_size - 16; | |
| 612 | 241 } |
| 242 return s; | |
| 243 } | |
| 244 | |
| 6241 | 245 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){ |
| 1273 | 246 int i; |
| 2967 | 247 |
| 1273 | 248 for(i=0; i+8<=w; i+=8){ |
| 249 dst[i+0]= bswap_32(src[i+0]); | |
| 250 dst[i+1]= bswap_32(src[i+1]); | |
| 251 dst[i+2]= bswap_32(src[i+2]); | |
| 252 dst[i+3]= bswap_32(src[i+3]); | |
| 253 dst[i+4]= bswap_32(src[i+4]); | |
| 254 dst[i+5]= bswap_32(src[i+5]); | |
| 255 dst[i+6]= bswap_32(src[i+6]); | |
| 256 dst[i+7]= bswap_32(src[i+7]); | |
| 257 } | |
| 258 for(;i<w; i++){ | |
| 259 dst[i+0]= bswap_32(src[i+0]); | |
| 260 } | |
| 261 } | |
| 612 | 262 |
| 2184 | 263 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) |
| 264 { | |
| 265 int s, i; | |
| 4179 | 266 uint32_t *sq = ff_squareTbl + 256; |
| 2184 | 267 |
| 268 s = 0; | |
| 269 for (i = 0; i < h; i++) { | |
| 270 s += sq[pix1[0] - pix2[0]]; | |
| 271 s += sq[pix1[1] - pix2[1]]; | |
| 272 s += sq[pix1[2] - pix2[2]]; | |
| 273 s += sq[pix1[3] - pix2[3]]; | |
| 274 pix1 += line_size; | |
| 275 pix2 += line_size; | |
| 276 } | |
| 277 return s; | |
| 278 } | |
| 279 | |
| 1708 | 280 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) |
| 936 | 281 { |
| 282 int s, i; | |
| 4179 | 283 uint32_t *sq = ff_squareTbl + 256; |
| 936 | 284 |
| 285 s = 0; | |
| 1708 | 286 for (i = 0; i < h; i++) { |
| 936 | 287 s += sq[pix1[0] - pix2[0]]; |
| 288 s += sq[pix1[1] - pix2[1]]; | |
| 289 s += sq[pix1[2] - pix2[2]]; | |
| 290 s += sq[pix1[3] - pix2[3]]; | |
| 291 s += sq[pix1[4] - pix2[4]]; | |
| 292 s += sq[pix1[5] - pix2[5]]; | |
| 293 s += sq[pix1[6] - pix2[6]]; | |
| 294 s += sq[pix1[7] - pix2[7]]; | |
| 295 pix1 += line_size; | |
| 296 pix2 += line_size; | |
| 297 } | |
| 298 return s; | |
| 299 } | |
| 300 | |
| 1708 | 301 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
| 884 | 302 { |
|
1012
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
303 int s, i; |
| 4179 | 304 uint32_t *sq = ff_squareTbl + 256; |
| 884 | 305 |
| 306 s = 0; | |
| 1708 | 307 for (i = 0; i < h; i++) { |
|
1012
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
308 s += sq[pix1[ 0] - pix2[ 0]]; |
|
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
309 s += sq[pix1[ 1] - pix2[ 1]]; |
|
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
310 s += sq[pix1[ 2] - pix2[ 2]]; |
|
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
311 s += sq[pix1[ 3] - pix2[ 3]]; |
|
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
312 s += sq[pix1[ 4] - pix2[ 4]]; |
|
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
313 s += sq[pix1[ 5] - pix2[ 5]]; |
|
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
314 s += sq[pix1[ 6] - pix2[ 6]]; |
|
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
315 s += sq[pix1[ 7] - pix2[ 7]]; |
|
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
316 s += sq[pix1[ 8] - pix2[ 8]]; |
|
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
317 s += sq[pix1[ 9] - pix2[ 9]]; |
|
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
318 s += sq[pix1[10] - pix2[10]]; |
|
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
319 s += sq[pix1[11] - pix2[11]]; |
|
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
320 s += sq[pix1[12] - pix2[12]]; |
|
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
321 s += sq[pix1[13] - pix2[13]]; |
|
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
322 s += sq[pix1[14] - pix2[14]]; |
|
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
323 s += sq[pix1[15] - pix2[15]]; |
|
997
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
324 |
|
1012
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
325 pix1 += line_size; |
|
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
326 pix2 += line_size; |
| 884 | 327 } |
| 328 return s; | |
| 329 } | |
| 330 | |
| 6437 | 331 /* draw the edges of width 'w' of an image of size width, height */ |
| 332 //FIXME check that this is ok for mpeg4 interlaced | |
| 333 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w) | |
| 334 { | |
| 335 uint8_t *ptr, *last_line; | |
| 336 int i; | |
| 337 | |
| 338 last_line = buf + (height - 1) * wrap; | |
| 339 for(i=0;i<w;i++) { | |
| 340 /* top and bottom */ | |
| 341 memcpy(buf - (i + 1) * wrap, buf, width); | |
| 342 memcpy(last_line + (i + 1) * wrap, last_line, width); | |
| 343 } | |
| 344 /* left and right */ | |
| 345 ptr = buf; | |
| 346 for(i=0;i<height;i++) { | |
| 347 memset(ptr - w, ptr[0], w); | |
| 348 memset(ptr + width, ptr[width-1], w); | |
| 349 ptr += wrap; | |
| 350 } | |
| 351 /* corners */ | |
| 352 for(i=0;i<w;i++) { | |
| 353 memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */ | |
| 354 memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */ | |
| 355 memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */ | |
| 356 memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */ | |
| 357 } | |
| 358 } | |
| 359 | |
| 6445 | 360 /** |
| 361 * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples. | |
| 362 * @param buf destination buffer | |
| 363 * @param src source buffer | |
| 364 * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers | |
| 365 * @param block_w width of block | |
| 366 * @param block_h height of block | |
| 367 * @param src_x x coordinate of the top left sample of the block in the source buffer | |
| 368 * @param src_y y coordinate of the top left sample of the block in the source buffer | |
| 369 * @param w width of the source buffer | |
| 370 * @param h height of the source buffer | |
| 371 */ | |
| 11784 | 372 void ff_emulated_edge_mc(uint8_t *buf, const uint8_t *src, int linesize, int block_w, int block_h, |
| 6445 | 373 int src_x, int src_y, int w, int h){ |
| 374 int x, y; | |
| 375 int start_y, start_x, end_y, end_x; | |
| 376 | |
| 377 if(src_y>= h){ | |
| 378 src+= (h-1-src_y)*linesize; | |
| 379 src_y=h-1; | |
| 380 }else if(src_y<=-block_h){ | |
| 381 src+= (1-block_h-src_y)*linesize; | |
| 382 src_y=1-block_h; | |
| 383 } | |
| 384 if(src_x>= w){ | |
| 385 src+= (w-1-src_x); | |
| 386 src_x=w-1; | |
| 387 }else if(src_x<=-block_w){ | |
| 388 src+= (1-block_w-src_x); | |
| 389 src_x=1-block_w; | |
| 390 } | |
| 391 | |
| 392 start_y= FFMAX(0, -src_y); | |
| 393 start_x= FFMAX(0, -src_x); | |
| 394 end_y= FFMIN(block_h, h-src_y); | |
| 395 end_x= FFMIN(block_w, w-src_x); | |
| 396 | |
| 397 // copy existing part | |
| 398 for(y=start_y; y<end_y; y++){ | |
| 399 for(x=start_x; x<end_x; x++){ | |
| 400 buf[x + y*linesize]= src[x + y*linesize]; | |
| 401 } | |
| 402 } | |
| 403 | |
| 404 //top | |
| 405 for(y=0; y<start_y; y++){ | |
| 406 for(x=start_x; x<end_x; x++){ | |
| 407 buf[x + y*linesize]= buf[x + start_y*linesize]; | |
| 408 } | |
| 409 } | |
| 410 | |
| 411 //bottom | |
| 412 for(y=end_y; y<block_h; y++){ | |
| 413 for(x=start_x; x<end_x; x++){ | |
| 414 buf[x + y*linesize]= buf[x + (end_y-1)*linesize]; | |
| 415 } | |
| 416 } | |
| 417 | |
| 418 for(y=0; y<block_h; y++){ | |
| 419 //left | |
| 420 for(x=0; x<start_x; x++){ | |
| 421 buf[x + y*linesize]= buf[start_x + y*linesize]; | |
| 422 } | |
| 423 | |
| 424 //right | |
| 425 for(x=end_x; x<block_w; x++){ | |
| 426 buf[x + y*linesize]= buf[end_x - 1 + y*linesize]; | |
| 427 } | |
| 428 } | |
| 429 } | |
| 430 | |
| 1064 | 431 static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size) |
| 0 | 432 { |
| 433 int i; | |
| 434 | |
| 435 /* read the pixels */ | |
| 436 for(i=0;i<8;i++) { | |
| 516 | 437 block[0] = pixels[0]; |
| 438 block[1] = pixels[1]; | |
| 439 block[2] = pixels[2]; | |
| 440 block[3] = pixels[3]; | |
| 441 block[4] = pixels[4]; | |
| 442 block[5] = pixels[5]; | |
| 443 block[6] = pixels[6]; | |
| 444 block[7] = pixels[7]; | |
| 445 pixels += line_size; | |
| 446 block += 8; | |
| 0 | 447 } |
| 448 } | |
| 449 | |
| 1064 | 450 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1, |
| 2979 | 451 const uint8_t *s2, int stride){ |
| 324 | 452 int i; |
| 453 | |
| 454 /* read the pixels */ | |
| 455 for(i=0;i<8;i++) { | |
| 516 | 456 block[0] = s1[0] - s2[0]; |
| 457 block[1] = s1[1] - s2[1]; | |
| 458 block[2] = s1[2] - s2[2]; | |
| 459 block[3] = s1[3] - s2[3]; | |
| 460 block[4] = s1[4] - s2[4]; | |
| 461 block[5] = s1[5] - s2[5]; | |
| 462 block[6] = s1[6] - s2[6]; | |
| 463 block[7] = s1[7] - s2[7]; | |
| 324 | 464 s1 += stride; |
| 465 s2 += stride; | |
| 516 | 466 block += 8; |
| 324 | 467 } |
| 468 } | |
| 469 | |
| 470 | |
| 1064 | 471 static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, |
| 2979 | 472 int line_size) |
| 0 | 473 { |
| 474 int i; | |
| 4176 | 475 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
| 2967 | 476 |
| 0 | 477 /* read the pixels */ |
| 478 for(i=0;i<8;i++) { | |
| 516 | 479 pixels[0] = cm[block[0]]; |
| 480 pixels[1] = cm[block[1]]; | |
| 481 pixels[2] = cm[block[2]]; | |
| 482 pixels[3] = cm[block[3]]; | |
| 483 pixels[4] = cm[block[4]]; | |
| 484 pixels[5] = cm[block[5]]; | |
| 485 pixels[6] = cm[block[6]]; | |
| 486 pixels[7] = cm[block[7]]; | |
| 487 | |
| 488 pixels += line_size; | |
| 489 block += 8; | |
| 0 | 490 } |
| 491 } | |
| 492 | |
| 2256 | 493 static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, |
| 2979 | 494 int line_size) |
| 2256 | 495 { |
| 496 int i; | |
| 4176 | 497 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
| 2967 | 498 |
| 2256 | 499 /* read the pixels */ |
| 500 for(i=0;i<4;i++) { | |
| 501 pixels[0] = cm[block[0]]; | |
| 502 pixels[1] = cm[block[1]]; | |
| 503 pixels[2] = cm[block[2]]; | |
| 504 pixels[3] = cm[block[3]]; | |
| 505 | |
| 506 pixels += line_size; | |
| 507 block += 8; | |
| 508 } | |
| 509 } | |
| 510 | |
| 2257 | 511 static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels, |
| 2979 | 512 int line_size) |
| 2257 | 513 { |
| 514 int i; | |
| 4176 | 515 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
| 2967 | 516 |
| 2257 | 517 /* read the pixels */ |
| 518 for(i=0;i<2;i++) { | |
| 519 pixels[0] = cm[block[0]]; | |
| 520 pixels[1] = cm[block[1]]; | |
| 521 | |
| 522 pixels += line_size; | |
| 523 block += 8; | |
| 524 } | |
| 525 } | |
| 526 | |
| 2967 | 527 static void put_signed_pixels_clamped_c(const DCTELEM *block, |
|
1984
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
528 uint8_t *restrict pixels, |
|
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
529 int line_size) |
|
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
530 { |
|
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
531 int i, j; |
|
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
532 |
|
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
533 for (i = 0; i < 8; i++) { |
|
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
534 for (j = 0; j < 8; j++) { |
|
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
535 if (*block < -128) |
|
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
536 *pixels = 0; |
|
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
537 else if (*block > 127) |
|
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
538 *pixels = 255; |
|
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
539 else |
|
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
540 *pixels = (uint8_t)(*block + 128); |
|
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
541 block++; |
|
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
542 pixels++; |
|
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
543 } |
|
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
544 pixels += (line_size - 8); |
|
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
545 } |
|
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
546 } |
|
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
547 |
| 11231 | 548 static void put_pixels_nonclamped_c(const DCTELEM *block, uint8_t *restrict pixels, |
| 549 int line_size) | |
| 550 { | |
| 551 int i; | |
| 552 | |
| 553 /* read the pixels */ | |
| 554 for(i=0;i<8;i++) { | |
| 555 pixels[0] = block[0]; | |
| 556 pixels[1] = block[1]; | |
| 557 pixels[2] = block[2]; | |
| 558 pixels[3] = block[3]; | |
| 559 pixels[4] = block[4]; | |
| 560 pixels[5] = block[5]; | |
| 561 pixels[6] = block[6]; | |
| 562 pixels[7] = block[7]; | |
| 563 | |
| 564 pixels += line_size; | |
| 565 block += 8; | |
| 566 } | |
| 567 } | |
| 568 | |
| 1064 | 569 static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, |
| 516 | 570 int line_size) |
| 0 | 571 { |
| 572 int i; | |
| 4176 | 573 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
| 2967 | 574 |
| 0 | 575 /* read the pixels */ |
| 576 for(i=0;i<8;i++) { | |
| 516 | 577 pixels[0] = cm[pixels[0] + block[0]]; |
| 578 pixels[1] = cm[pixels[1] + block[1]]; | |
| 579 pixels[2] = cm[pixels[2] + block[2]]; | |
| 580 pixels[3] = cm[pixels[3] + block[3]]; | |
| 581 pixels[4] = cm[pixels[4] + block[4]]; | |
| 582 pixels[5] = cm[pixels[5] + block[5]]; | |
| 583 pixels[6] = cm[pixels[6] + block[6]]; | |
| 584 pixels[7] = cm[pixels[7] + block[7]]; | |
| 585 pixels += line_size; | |
| 586 block += 8; | |
| 0 | 587 } |
| 588 } | |
| 2256 | 589 |
| 590 static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, | |
| 591 int line_size) | |
| 592 { | |
| 593 int i; | |
| 4176 | 594 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
| 2967 | 595 |
| 2256 | 596 /* read the pixels */ |
| 597 for(i=0;i<4;i++) { | |
| 598 pixels[0] = cm[pixels[0] + block[0]]; | |
| 599 pixels[1] = cm[pixels[1] + block[1]]; | |
| 600 pixels[2] = cm[pixels[2] + block[2]]; | |
| 601 pixels[3] = cm[pixels[3] + block[3]]; | |
| 602 pixels += line_size; | |
| 603 block += 8; | |
| 604 } | |
| 605 } | |
| 2257 | 606 |
| 607 static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels, | |
| 608 int line_size) | |
| 609 { | |
| 610 int i; | |
| 4176 | 611 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
| 2967 | 612 |
| 2257 | 613 /* read the pixels */ |
| 614 for(i=0;i<2;i++) { | |
| 615 pixels[0] = cm[pixels[0] + block[0]]; | |
| 616 pixels[1] = cm[pixels[1] + block[1]]; | |
| 617 pixels += line_size; | |
| 618 block += 8; | |
| 619 } | |
| 620 } | |
| 2763 | 621 |
| 622 static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size) | |
| 623 { | |
| 624 int i; | |
| 625 for(i=0;i<8;i++) { | |
| 626 pixels[0] += block[0]; | |
| 627 pixels[1] += block[1]; | |
| 628 pixels[2] += block[2]; | |
| 629 pixels[3] += block[3]; | |
| 630 pixels[4] += block[4]; | |
| 631 pixels[5] += block[5]; | |
| 632 pixels[6] += block[6]; | |
| 633 pixels[7] += block[7]; | |
| 634 pixels += line_size; | |
| 635 block += 8; | |
| 636 } | |
| 637 } | |
| 638 | |
| 639 static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size) | |
| 640 { | |
| 641 int i; | |
| 642 for(i=0;i<4;i++) { | |
| 643 pixels[0] += block[0]; | |
| 644 pixels[1] += block[1]; | |
| 645 pixels[2] += block[2]; | |
| 646 pixels[3] += block[3]; | |
| 647 pixels += line_size; | |
| 648 block += 4; | |
| 649 } | |
| 650 } | |
| 651 | |
|
4988
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
652 static int sum_abs_dctelem_c(DCTELEM *block) |
|
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
653 { |
|
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
654 int sum=0, i; |
|
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
655 for(i=0; i<64; i++) |
|
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
656 sum+= FFABS(block[i]); |
|
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
657 return sum; |
|
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
658 } |
|
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
659 |
| 11231 | 660 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h) |
| 661 { | |
| 662 int i; | |
| 663 | |
| 664 for (i = 0; i < h; i++) { | |
| 665 memset(block, value, 16); | |
| 666 block += line_size; | |
| 667 } | |
| 668 } | |
| 669 | |
| 670 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h) | |
| 671 { | |
| 672 int i; | |
| 673 | |
| 674 for (i = 0; i < h; i++) { | |
| 675 memset(block, value, 8); | |
| 676 block += line_size; | |
| 677 } | |
| 678 } | |
| 679 | |
| 680 static void scale_block_c(const uint8_t src[64]/*align 8*/, uint8_t *dst/*align 8*/, int linesize) | |
| 681 { | |
| 682 int i, j; | |
| 11459 | 683 uint16_t *dst1 = (uint16_t *) dst; |
| 684 uint16_t *dst2 = (uint16_t *)(dst + linesize); | |
| 11231 | 685 |
| 686 for (j = 0; j < 8; j++) { | |
| 687 for (i = 0; i < 8; i++) { | |
| 688 dst1[i] = dst2[i] = src[i] * 0x0101; | |
| 689 } | |
| 690 src += 8; | |
| 691 dst1 += linesize; | |
| 692 dst2 += linesize; | |
| 693 } | |
| 694 } | |
| 695 | |
| 385 | 696 #if 0 |
| 697 | |
| 698 #define PIXOP2(OPNAME, OP) \ | |
| 651 | 699 static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
| 385 | 700 {\ |
| 701 int i;\ | |
| 702 for(i=0; i<h; i++){\ | |
|
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
703 OP(*((uint64_t*)block), AV_RN64(pixels));\ |
| 385 | 704 pixels+=line_size;\ |
| 705 block +=line_size;\ | |
| 706 }\ | |
| 707 }\ | |
| 708 \ | |
| 859 | 709 static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
| 385 | 710 {\ |
| 711 int i;\ | |
| 712 for(i=0; i<h; i++){\ | |
|
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
713 const uint64_t a= AV_RN64(pixels );\ |
|
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
714 const uint64_t b= AV_RN64(pixels+1);\ |
| 385 | 715 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ |
| 716 pixels+=line_size;\ | |
| 717 block +=line_size;\ | |
| 718 }\ | |
| 719 }\ | |
| 720 \ | |
| 859 | 721 static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
| 385 | 722 {\ |
| 723 int i;\ | |
| 724 for(i=0; i<h; i++){\ | |
|
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
725 const uint64_t a= AV_RN64(pixels );\ |
|
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
726 const uint64_t b= AV_RN64(pixels+1);\ |
| 385 | 727 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ |
| 728 pixels+=line_size;\ | |
| 729 block +=line_size;\ | |
| 730 }\ | |
| 731 }\ | |
| 732 \ | |
| 859 | 733 static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
| 385 | 734 {\ |
| 735 int i;\ | |
| 736 for(i=0; i<h; i++){\ | |
|
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
737 const uint64_t a= AV_RN64(pixels );\ |
|
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
738 const uint64_t b= AV_RN64(pixels+line_size);\ |
| 385 | 739 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ |
| 740 pixels+=line_size;\ | |
| 741 block +=line_size;\ | |
| 742 }\ | |
| 743 }\ | |
| 744 \ | |
| 859 | 745 static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
| 385 | 746 {\ |
| 747 int i;\ | |
| 748 for(i=0; i<h; i++){\ | |
|
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
749 const uint64_t a= AV_RN64(pixels );\ |
|
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
750 const uint64_t b= AV_RN64(pixels+line_size);\ |
| 385 | 751 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ |
| 752 pixels+=line_size;\ | |
| 753 block +=line_size;\ | |
| 754 }\ | |
| 755 }\ | |
| 756 \ | |
| 859 | 757 static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
| 385 | 758 {\ |
| 759 int i;\ | |
|
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
760 const uint64_t a= AV_RN64(pixels );\ |
|
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
761 const uint64_t b= AV_RN64(pixels+1);\ |
| 385 | 762 uint64_t l0= (a&0x0303030303030303ULL)\ |
| 763 + (b&0x0303030303030303ULL)\ | |
| 764 + 0x0202020202020202ULL;\ | |
| 765 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
| 766 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
| 767 uint64_t l1,h1;\ | |
| 768 \ | |
| 769 pixels+=line_size;\ | |
| 770 for(i=0; i<h; i+=2){\ | |
|
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
771 uint64_t a= AV_RN64(pixels );\ |
|
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
772 uint64_t b= AV_RN64(pixels+1);\ |
| 385 | 773 l1= (a&0x0303030303030303ULL)\ |
| 774 + (b&0x0303030303030303ULL);\ | |
| 775 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
| 776 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
| 777 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
| 778 pixels+=line_size;\ | |
| 779 block +=line_size;\ | |
|
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
780 a= AV_RN64(pixels );\ |
|
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
781 b= AV_RN64(pixels+1);\ |
| 385 | 782 l0= (a&0x0303030303030303ULL)\ |
| 783 + (b&0x0303030303030303ULL)\ | |
| 784 + 0x0202020202020202ULL;\ | |
| 785 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
| 786 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
| 787 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
| 788 pixels+=line_size;\ | |
| 789 block +=line_size;\ | |
| 790 }\ | |
| 791 }\ | |
| 792 \ | |
| 859 | 793 static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
| 385 | 794 {\ |
| 795 int i;\ | |
|
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
796 const uint64_t a= AV_RN64(pixels );\ |
|
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
797 const uint64_t b= AV_RN64(pixels+1);\ |
| 385 | 798 uint64_t l0= (a&0x0303030303030303ULL)\ |
| 799 + (b&0x0303030303030303ULL)\ | |
| 800 + 0x0101010101010101ULL;\ | |
| 801 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
| 802 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
| 803 uint64_t l1,h1;\ | |
| 804 \ | |
| 805 pixels+=line_size;\ | |
| 806 for(i=0; i<h; i+=2){\ | |
|
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
807 uint64_t a= AV_RN64(pixels );\ |
|
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
808 uint64_t b= AV_RN64(pixels+1);\ |
| 385 | 809 l1= (a&0x0303030303030303ULL)\ |
| 810 + (b&0x0303030303030303ULL);\ | |
| 811 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
| 812 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
| 813 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
| 814 pixels+=line_size;\ | |
| 815 block +=line_size;\ | |
|
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
816 a= AV_RN64(pixels );\ |
|
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
817 b= AV_RN64(pixels+1);\ |
| 385 | 818 l0= (a&0x0303030303030303ULL)\ |
| 819 + (b&0x0303030303030303ULL)\ | |
| 820 + 0x0101010101010101ULL;\ | |
| 821 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
| 822 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
| 823 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
| 824 pixels+=line_size;\ | |
| 825 block +=line_size;\ | |
| 826 }\ | |
| 827 }\ | |
| 828 \ | |
| 859 | 829 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\ |
| 830 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\ | |
| 831 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\ | |
| 832 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\ | |
| 833 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\ | |
| 834 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\ | |
| 835 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8) | |
| 385 | 836 |
| 837 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) ) | |
| 838 #else // 64 bit variant | |
| 839 | |
| 840 #define PIXOP2(OPNAME, OP) \ | |
|
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
841 static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
842 int i;\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
843 for(i=0; i<h; i++){\ |
|
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
844 OP(*((uint16_t*)(block )), AV_RN16(pixels ));\ |
|
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
845 pixels+=line_size;\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
846 block +=line_size;\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
847 }\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
848 }\ |
| 1168 | 849 static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
| 850 int i;\ | |
| 851 for(i=0; i<h; i++){\ | |
|
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
852 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\ |
| 1168 | 853 pixels+=line_size;\ |
| 854 block +=line_size;\ | |
| 855 }\ | |
| 856 }\ | |
| 859 | 857 static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
| 385 | 858 int i;\ |
| 859 for(i=0; i<h; i++){\ | |
|
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
860 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\ |
|
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
861 OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\ |
| 385 | 862 pixels+=line_size;\ |
| 863 block +=line_size;\ | |
| 864 }\ | |
| 865 }\ | |
| 859 | 866 static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
| 867 OPNAME ## _pixels8_c(block, pixels, line_size, h);\ | |
| 651 | 868 }\ |
| 385 | 869 \ |
| 651 | 870 static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
| 871 int src_stride1, int src_stride2, int h){\ | |
| 385 | 872 int i;\ |
| 873 for(i=0; i<h; i++){\ | |
| 651 | 874 uint32_t a,b;\ |
|
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
875 a= AV_RN32(&src1[i*src_stride1 ]);\ |
|
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
876 b= AV_RN32(&src2[i*src_stride2 ]);\ |
| 1264 | 877 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\ |
|
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
878 a= AV_RN32(&src1[i*src_stride1+4]);\ |
|
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
879 b= AV_RN32(&src2[i*src_stride2+4]);\ |
| 1264 | 880 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\ |
| 385 | 881 }\ |
| 882 }\ | |
| 883 \ | |
| 651 | 884 static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
| 885 int src_stride1, int src_stride2, int h){\ | |
| 385 | 886 int i;\ |
| 887 for(i=0; i<h; i++){\ | |
| 651 | 888 uint32_t a,b;\ |
|
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
889 a= AV_RN32(&src1[i*src_stride1 ]);\ |
|
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
890 b= AV_RN32(&src2[i*src_stride2 ]);\ |
| 1264 | 891 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ |
|
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
892 a= AV_RN32(&src1[i*src_stride1+4]);\ |
|
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
893 b= AV_RN32(&src2[i*src_stride2+4]);\ |
| 1264 | 894 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\ |
| 385 | 895 }\ |
| 896 }\ | |
| 897 \ | |
| 1168 | 898 static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
| 899 int src_stride1, int src_stride2, int h){\ | |
| 900 int i;\ | |
| 901 for(i=0; i<h; i++){\ | |
| 902 uint32_t a,b;\ | |
|
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
903 a= AV_RN32(&src1[i*src_stride1 ]);\ |
|
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
904 b= AV_RN32(&src2[i*src_stride2 ]);\ |
| 1264 | 905 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ |
| 1168 | 906 }\ |
| 907 }\ | |
| 908 \ | |
|
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
909 static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
910 int src_stride1, int src_stride2, int h){\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
911 int i;\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
912 for(i=0; i<h; i++){\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
913 uint32_t a,b;\ |
|
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
914 a= AV_RN16(&src1[i*src_stride1 ]);\ |
|
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
915 b= AV_RN16(&src2[i*src_stride2 ]);\ |
|
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
916 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
917 }\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
918 }\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
919 \ |
| 651 | 920 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
| 921 int src_stride1, int src_stride2, int h){\ | |
| 922 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ | |
| 923 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\ | |
| 924 }\ | |
| 925 \ | |
| 926 static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ | |
| 927 int src_stride1, int src_stride2, int h){\ | |
| 928 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ | |
| 929 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\ | |
| 930 }\ | |
| 931 \ | |
| 859 | 932 static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
| 651 | 933 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ |
| 934 }\ | |
| 935 \ | |
| 859 | 936 static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
| 651 | 937 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ |
| 938 }\ | |
| 939 \ | |
| 859 | 940 static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
| 651 | 941 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ |
| 942 }\ | |
| 943 \ | |
| 859 | 944 static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
| 651 | 945 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ |
| 385 | 946 }\ |
| 947 \ | |
| 11783 | 948 static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ |
| 651 | 949 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ |
| 950 int i;\ | |
| 951 for(i=0; i<h; i++){\ | |
| 952 uint32_t a, b, c, d, l0, l1, h0, h1;\ | |
|
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
953 a= AV_RN32(&src1[i*src_stride1]);\ |
|
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
954 b= AV_RN32(&src2[i*src_stride2]);\ |
|
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
955 c= AV_RN32(&src3[i*src_stride3]);\ |
|
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
956 d= AV_RN32(&src4[i*src_stride4]);\ |
| 651 | 957 l0= (a&0x03030303UL)\ |
| 958 + (b&0x03030303UL)\ | |
| 959 + 0x02020202UL;\ | |
| 960 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
| 961 + ((b&0xFCFCFCFCUL)>>2);\ | |
| 962 l1= (c&0x03030303UL)\ | |
| 963 + (d&0x03030303UL);\ | |
| 964 h1= ((c&0xFCFCFCFCUL)>>2)\ | |
| 965 + ((d&0xFCFCFCFCUL)>>2);\ | |
| 966 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
|
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
967 a= AV_RN32(&src1[i*src_stride1+4]);\ |
|
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
968 b= AV_RN32(&src2[i*src_stride2+4]);\ |
|
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
969 c= AV_RN32(&src3[i*src_stride3+4]);\ |
|
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
970 d= AV_RN32(&src4[i*src_stride4+4]);\ |
| 651 | 971 l0= (a&0x03030303UL)\ |
| 972 + (b&0x03030303UL)\ | |
| 973 + 0x02020202UL;\ | |
| 974 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
| 975 + ((b&0xFCFCFCFCUL)>>2);\ | |
| 976 l1= (c&0x03030303UL)\ | |
| 977 + (d&0x03030303UL);\ | |
| 978 h1= ((c&0xFCFCFCFCUL)>>2)\ | |
| 979 + ((d&0xFCFCFCFCUL)>>2);\ | |
| 980 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
| 981 }\ | |
| 982 }\ | |
|
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
983 \ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
984 static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
985 OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
986 }\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
987 \ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
988 static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
989 OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
990 }\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
991 \ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
992 static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
993 OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
994 }\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
995 \ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
996 static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
997 OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
998 }\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
999 \ |
| 11783 | 1000 static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ |
| 651 | 1001 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ |
| 385 | 1002 int i;\ |
| 1003 for(i=0; i<h; i++){\ | |
| 651 | 1004 uint32_t a, b, c, d, l0, l1, h0, h1;\ |
|
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1005 a= AV_RN32(&src1[i*src_stride1]);\ |
|
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1006 b= AV_RN32(&src2[i*src_stride2]);\ |
|
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1007 c= AV_RN32(&src3[i*src_stride3]);\ |
|
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1008 d= AV_RN32(&src4[i*src_stride4]);\ |
| 651 | 1009 l0= (a&0x03030303UL)\ |
| 1010 + (b&0x03030303UL)\ | |
| 1011 + 0x01010101UL;\ | |
| 1012 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
| 1013 + ((b&0xFCFCFCFCUL)>>2);\ | |
| 1014 l1= (c&0x03030303UL)\ | |
| 1015 + (d&0x03030303UL);\ | |
| 1016 h1= ((c&0xFCFCFCFCUL)>>2)\ | |
| 1017 + ((d&0xFCFCFCFCUL)>>2);\ | |
| 1018 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
|
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1019 a= AV_RN32(&src1[i*src_stride1+4]);\ |
|
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1020 b= AV_RN32(&src2[i*src_stride2+4]);\ |
|
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1021 c= AV_RN32(&src3[i*src_stride3+4]);\ |
|
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1022 d= AV_RN32(&src4[i*src_stride4+4]);\ |
| 651 | 1023 l0= (a&0x03030303UL)\ |
| 1024 + (b&0x03030303UL)\ | |
| 1025 + 0x01010101UL;\ | |
| 1026 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
| 1027 + ((b&0xFCFCFCFCUL)>>2);\ | |
| 1028 l1= (c&0x03030303UL)\ | |
| 1029 + (d&0x03030303UL);\ | |
| 1030 h1= ((c&0xFCFCFCFCUL)>>2)\ | |
| 1031 + ((d&0xFCFCFCFCUL)>>2);\ | |
| 1032 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
| 385 | 1033 }\ |
| 1034 }\ | |
| 11783 | 1035 static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ |
| 651 | 1036 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ |
| 1037 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | |
| 1038 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | |
| 1039 }\ | |
| 11783 | 1040 static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ |
| 651 | 1041 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ |
| 1042 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | |
| 1043 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | |
| 1044 }\ | |
| 385 | 1045 \ |
|
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1046 static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1047 {\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1048 int i, a0, b0, a1, b1;\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1049 a0= pixels[0];\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1050 b0= pixels[1] + 2;\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1051 a0 += b0;\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1052 b0 += pixels[2];\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1053 \ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1054 pixels+=line_size;\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1055 for(i=0; i<h; i+=2){\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1056 a1= pixels[0];\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1057 b1= pixels[1];\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1058 a1 += b1;\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1059 b1 += pixels[2];\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1060 \ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1061 block[0]= (a1+a0)>>2; /* FIXME non put */\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1062 block[1]= (b1+b0)>>2;\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1063 \ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1064 pixels+=line_size;\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1065 block +=line_size;\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1066 \ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1067 a0= pixels[0];\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1068 b0= pixels[1] + 2;\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1069 a0 += b0;\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1070 b0 += pixels[2];\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1071 \ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1072 block[0]= (a1+a0)>>2;\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1073 block[1]= (b1+b0)>>2;\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1074 pixels+=line_size;\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1075 block +=line_size;\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1076 }\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1077 }\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1078 \ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1079 static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1080 {\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1081 int i;\ |
|
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1082 const uint32_t a= AV_RN32(pixels );\ |
|
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1083 const uint32_t b= AV_RN32(pixels+1);\ |
|
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1084 uint32_t l0= (a&0x03030303UL)\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1085 + (b&0x03030303UL)\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1086 + 0x02020202UL;\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1087 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1088 + ((b&0xFCFCFCFCUL)>>2);\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1089 uint32_t l1,h1;\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1090 \ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1091 pixels+=line_size;\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1092 for(i=0; i<h; i+=2){\ |
|
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1093 uint32_t a= AV_RN32(pixels );\ |
|
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1094 uint32_t b= AV_RN32(pixels+1);\ |
|
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1095 l1= (a&0x03030303UL)\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1096 + (b&0x03030303UL);\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1097 h1= ((a&0xFCFCFCFCUL)>>2)\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1098 + ((b&0xFCFCFCFCUL)>>2);\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1099 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1100 pixels+=line_size;\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1101 block +=line_size;\ |
|
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1102 a= AV_RN32(pixels );\ |
|
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1103 b= AV_RN32(pixels+1);\ |
|
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1104 l0= (a&0x03030303UL)\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1105 + (b&0x03030303UL)\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1106 + 0x02020202UL;\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1107 h0= ((a&0xFCFCFCFCUL)>>2)\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1108 + ((b&0xFCFCFCFCUL)>>2);\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1109 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1110 pixels+=line_size;\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1111 block +=line_size;\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1112 }\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1113 }\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1114 \ |
| 859 | 1115 static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
| 385 | 1116 {\ |
| 1117 int j;\ | |
| 1118 for(j=0; j<2; j++){\ | |
| 1119 int i;\ | |
|
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1120 const uint32_t a= AV_RN32(pixels );\ |
|
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1121 const uint32_t b= AV_RN32(pixels+1);\ |
| 385 | 1122 uint32_t l0= (a&0x03030303UL)\ |
| 1123 + (b&0x03030303UL)\ | |
| 1124 + 0x02020202UL;\ | |
| 1125 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ | |
| 1126 + ((b&0xFCFCFCFCUL)>>2);\ | |
| 1127 uint32_t l1,h1;\ | |
| 1128 \ | |
| 1129 pixels+=line_size;\ | |
| 1130 for(i=0; i<h; i+=2){\ | |
|
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1131 uint32_t a= AV_RN32(pixels );\ |
|
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1132 uint32_t b= AV_RN32(pixels+1);\ |
| 385 | 1133 l1= (a&0x03030303UL)\ |
| 1134 + (b&0x03030303UL);\ | |
| 1135 h1= ((a&0xFCFCFCFCUL)>>2)\ | |
| 1136 + ((b&0xFCFCFCFCUL)>>2);\ | |
| 1137 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
| 1138 pixels+=line_size;\ | |
| 1139 block +=line_size;\ | |
|
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1140 a= AV_RN32(pixels );\ |
|
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1141 b= AV_RN32(pixels+1);\ |
| 385 | 1142 l0= (a&0x03030303UL)\ |
| 1143 + (b&0x03030303UL)\ | |
| 1144 + 0x02020202UL;\ | |
| 1145 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
| 1146 + ((b&0xFCFCFCFCUL)>>2);\ | |
| 1147 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
| 1148 pixels+=line_size;\ | |
| 1149 block +=line_size;\ | |
| 1150 }\ | |
| 1151 pixels+=4-line_size*(h+1);\ | |
| 1152 block +=4-line_size*h;\ | |
| 1153 }\ | |
| 1154 }\ | |
| 1155 \ | |
| 859 | 1156 static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
| 385 | 1157 {\ |
| 1158 int j;\ | |
| 1159 for(j=0; j<2; j++){\ | |
| 1160 int i;\ | |
|
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1161 const uint32_t a= AV_RN32(pixels );\ |
|
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1162 const uint32_t b= AV_RN32(pixels+1);\ |
| 385 | 1163 uint32_t l0= (a&0x03030303UL)\ |
| 1164 + (b&0x03030303UL)\ | |
| 1165 + 0x01010101UL;\ | |
| 1166 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ | |
| 1167 + ((b&0xFCFCFCFCUL)>>2);\ | |
| 1168 uint32_t l1,h1;\ | |
| 1169 \ | |
| 1170 pixels+=line_size;\ | |
| 1171 for(i=0; i<h; i+=2){\ | |
|
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1172 uint32_t a= AV_RN32(pixels );\ |
|
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1173 uint32_t b= AV_RN32(pixels+1);\ |
| 385 | 1174 l1= (a&0x03030303UL)\ |
| 1175 + (b&0x03030303UL);\ | |
| 1176 h1= ((a&0xFCFCFCFCUL)>>2)\ | |
| 1177 + ((b&0xFCFCFCFCUL)>>2);\ | |
| 1178 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
| 1179 pixels+=line_size;\ | |
| 1180 block +=line_size;\ | |
|
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1181 a= AV_RN32(pixels );\ |
|
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1182 b= AV_RN32(pixels+1);\ |
| 385 | 1183 l0= (a&0x03030303UL)\ |
| 1184 + (b&0x03030303UL)\ | |
| 1185 + 0x01010101UL;\ | |
| 1186 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
| 1187 + ((b&0xFCFCFCFCUL)>>2);\ | |
| 1188 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
| 1189 pixels+=line_size;\ | |
| 1190 block +=line_size;\ | |
| 1191 }\ | |
| 1192 pixels+=4-line_size*(h+1);\ | |
| 1193 block +=4-line_size*h;\ | |
| 1194 }\ | |
| 1195 }\ | |
| 1196 \ | |
| 859 | 1197 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\ |
| 1198 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\ | |
| 1199 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\ | |
| 1200 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\ | |
| 1201 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\ | |
| 1202 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\ | |
| 1203 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\ | |
| 1204 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\ | |
| 651 | 1205 |
| 1264 | 1206 #define op_avg(a, b) a = rnd_avg32(a, b) |
| 385 | 1207 #endif |
| 1208 #define op_put(a, b) a = b | |
| 1209 | |
| 1210 PIXOP2(avg, op_avg) | |
| 1211 PIXOP2(put, op_put) | |
| 1212 #undef op_avg | |
| 1213 #undef op_put | |
| 1214 | |
| 0 | 1215 #define avg2(a,b) ((a+b+1)>>1) |
| 1216 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2) | |
| 1217 | |
| 1864 | 1218 static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){ |
| 1219 put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h); | |
| 1220 } | |
| 1221 | |
| 1222 static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){ | |
| 1223 put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h); | |
| 1224 } | |
| 753 | 1225 |
| 1064 | 1226 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder) |
| 255 | 1227 { |
| 1228 const int A=(16-x16)*(16-y16); | |
| 1229 const int B=( x16)*(16-y16); | |
| 1230 const int C=(16-x16)*( y16); | |
| 1231 const int D=( x16)*( y16); | |
| 1232 int i; | |
| 1233 | |
| 1234 for(i=0; i<h; i++) | |
| 1235 { | |
| 651 | 1236 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8; |
| 1237 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8; | |
| 1238 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8; | |
| 1239 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8; | |
| 1240 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8; | |
| 1241 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8; | |
| 1242 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8; | |
| 1243 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8; | |
| 1244 dst+= stride; | |
| 1245 src+= stride; | |
| 255 | 1246 } |
| 1247 } | |
| 1248 | |
|
3248
7aa9f80e7954
mmx implementation of 3-point GMC. (5x faster than C)
lorenm
parents:
3245
diff
changeset
|
1249 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, |
| 753 | 1250 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height) |
| 1251 { | |
| 1252 int y, vx, vy; | |
| 1253 const int s= 1<<shift; | |
| 2967 | 1254 |
| 753 | 1255 width--; |
| 1256 height--; | |
| 1257 | |
| 1258 for(y=0; y<h; y++){ | |
| 1259 int x; | |
| 1260 | |
| 1261 vx= ox; | |
| 1262 vy= oy; | |
| 1263 for(x=0; x<8; x++){ //XXX FIXME optimize | |
| 1264 int src_x, src_y, frac_x, frac_y, index; | |
| 1265 | |
| 1266 src_x= vx>>16; | |
| 1267 src_y= vy>>16; | |
| 1268 frac_x= src_x&(s-1); | |
| 1269 frac_y= src_y&(s-1); | |
| 1270 src_x>>=shift; | |
| 1271 src_y>>=shift; | |
| 2967 | 1272 |
| 753 | 1273 if((unsigned)src_x < width){ |
| 1274 if((unsigned)src_y < height){ | |
| 1275 index= src_x + src_y*stride; | |
| 1276 dst[y*stride + x]= ( ( src[index ]*(s-frac_x) | |
| 1277 + src[index +1]* frac_x )*(s-frac_y) | |
| 1278 + ( src[index+stride ]*(s-frac_x) | |
| 1279 + src[index+stride+1]* frac_x )* frac_y | |
| 1280 + r)>>(shift*2); | |
| 1281 }else{ | |
| 4594 | 1282 index= src_x + av_clip(src_y, 0, height)*stride; |
| 2967 | 1283 dst[y*stride + x]= ( ( src[index ]*(s-frac_x) |
| 753 | 1284 + src[index +1]* frac_x )*s |
| 1285 + r)>>(shift*2); | |
| 1286 } | |
| 1287 }else{ | |
| 1288 if((unsigned)src_y < height){ | |
| 4594 | 1289 index= av_clip(src_x, 0, width) + src_y*stride; |
| 2967 | 1290 dst[y*stride + x]= ( ( src[index ]*(s-frac_y) |
| 753 | 1291 + src[index+stride ]* frac_y )*s |
| 1292 + r)>>(shift*2); | |
| 1293 }else{ | |
| 4594 | 1294 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride; |
| 753 | 1295 dst[y*stride + x]= src[index ]; |
| 1296 } | |
| 1297 } | |
| 2967 | 1298 |
| 753 | 1299 vx+= dxx; |
| 1300 vy+= dyx; | |
| 1301 } | |
| 1302 ox += dxy; | |
| 1303 oy += dyy; | |
| 1304 } | |
| 1305 } | |
|
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1306 |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1307 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1308 switch(width){ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1309 case 2: put_pixels2_c (dst, src, stride, height); break; |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1310 case 4: put_pixels4_c (dst, src, stride, height); break; |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1311 case 8: put_pixels8_c (dst, src, stride, height); break; |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1312 case 16:put_pixels16_c(dst, src, stride, height); break; |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1313 } |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1314 } |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1315 |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1316 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1317 int i,j; |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1318 for (i=0; i < height; i++) { |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1319 for (j=0; j < width; j++) { |
| 2979 | 1320 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11; |
|
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1321 } |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1322 src += stride; |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1323 dst += stride; |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1324 } |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1325 } |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1326 |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1327 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1328 int i,j; |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1329 for (i=0; i < height; i++) { |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1330 for (j=0; j < width; j++) { |
| 2979 | 1331 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11; |
|
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1332 } |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1333 src += stride; |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1334 dst += stride; |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1335 } |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1336 } |
| 2967 | 1337 |
|
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1338 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1339 int i,j; |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1340 for (i=0; i < height; i++) { |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1341 for (j=0; j < width; j++) { |
| 2979 | 1342 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11; |
|
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1343 } |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1344 src += stride; |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1345 dst += stride; |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1346 } |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1347 } |
| 2967 | 1348 |
|
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1349 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1350 int i,j; |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1351 for (i=0; i < height; i++) { |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1352 for (j=0; j < width; j++) { |
| 2979 | 1353 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15; |
|
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1354 } |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1355 src += stride; |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1356 dst += stride; |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1357 } |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1358 } |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1359 |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1360 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1361 int i,j; |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1362 for (i=0; i < height; i++) { |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1363 for (j=0; j < width; j++) { |
| 2979 | 1364 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; |
|
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1365 } |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1366 src += stride; |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1367 dst += stride; |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1368 } |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1369 } |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1370 |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1371 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1372 int i,j; |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1373 for (i=0; i < height; i++) { |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1374 for (j=0; j < width; j++) { |
| 2979 | 1375 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11; |
|
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1376 } |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1377 src += stride; |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1378 dst += stride; |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1379 } |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1380 } |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1381 |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1382 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1383 int i,j; |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1384 for (i=0; i < height; i++) { |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1385 for (j=0; j < width; j++) { |
| 2979 | 1386 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; |
|
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1387 } |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1388 src += stride; |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1389 dst += stride; |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1390 } |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1391 } |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1392 |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1393 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1394 int i,j; |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1395 for (i=0; i < height; i++) { |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1396 for (j=0; j < width; j++) { |
| 2979 | 1397 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15; |
|
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1398 } |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1399 src += stride; |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1400 dst += stride; |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1401 } |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1402 } |
| 1319 | 1403 |
| 1404 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
| 1405 switch(width){ | |
| 1406 case 2: avg_pixels2_c (dst, src, stride, height); break; | |
| 1407 case 4: avg_pixels4_c (dst, src, stride, height); break; | |
| 1408 case 8: avg_pixels8_c (dst, src, stride, height); break; | |
| 1409 case 16:avg_pixels16_c(dst, src, stride, height); break; | |
| 1410 } | |
| 1411 } | |
| 1412 | |
| 1413 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
| 1414 int i,j; | |
| 1415 for (i=0; i < height; i++) { | |
| 1416 for (j=0; j < width; j++) { | |
| 2979 | 1417 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1; |
| 1319 | 1418 } |
| 1419 src += stride; | |
| 1420 dst += stride; | |
| 1421 } | |
| 1422 } | |
| 1423 | |
| 1424 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
| 1425 int i,j; | |
| 1426 for (i=0; i < height; i++) { | |
| 1427 for (j=0; j < width; j++) { | |
| 2979 | 1428 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1; |
| 1319 | 1429 } |
| 1430 src += stride; | |
| 1431 dst += stride; | |
| 1432 } | |
| 1433 } | |
| 2967 | 1434 |
| 1319 | 1435 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
| 1436 int i,j; | |
| 1437 for (i=0; i < height; i++) { | |
| 1438 for (j=0; j < width; j++) { | |
| 2979 | 1439 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1; |
| 1319 | 1440 } |
| 1441 src += stride; | |
| 1442 dst += stride; | |
| 1443 } | |
| 1444 } | |
| 2967 | 1445 |
| 1319 | 1446 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
| 1447 int i,j; | |
| 1448 for (i=0; i < height; i++) { | |
| 1449 for (j=0; j < width; j++) { | |
| 2979 | 1450 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1; |
| 1319 | 1451 } |
| 1452 src += stride; | |
| 1453 dst += stride; | |
| 1454 } | |
| 1455 } | |
| 1456 | |
| 1457 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
| 1458 int i,j; | |
| 1459 for (i=0; i < height; i++) { | |
| 1460 for (j=0; j < width; j++) { | |
| 2979 | 1461 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; |
| 1319 | 1462 } |
| 1463 src += stride; | |
| 1464 dst += stride; | |
| 1465 } | |
| 1466 } | |
| 1467 | |
| 1468 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
| 1469 int i,j; | |
| 1470 for (i=0; i < height; i++) { | |
| 1471 for (j=0; j < width; j++) { | |
| 2979 | 1472 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1; |
| 1319 | 1473 } |
| 1474 src += stride; | |
| 1475 dst += stride; | |
| 1476 } | |
| 1477 } | |
| 1478 | |
| 1479 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
| 1480 int i,j; | |
| 1481 for (i=0; i < height; i++) { | |
| 1482 for (j=0; j < width; j++) { | |
| 2979 | 1483 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; |
| 1319 | 1484 } |
| 1485 src += stride; | |
| 1486 dst += stride; | |
| 1487 } | |
| 1488 } | |
| 1489 | |
| 1490 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
| 1491 int i,j; | |
| 1492 for (i=0; i < height; i++) { | |
| 1493 for (j=0; j < width; j++) { | |
| 2979 | 1494 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1; |
| 1319 | 1495 } |
| 1496 src += stride; | |
| 1497 dst += stride; | |
| 1498 } | |
| 1499 } | |
|
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1500 #if 0 |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1501 #define TPEL_WIDTH(width)\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1502 static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1503 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1504 static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1505 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1506 static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1507 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1508 static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1509 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1510 static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1511 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1512 static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1513 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1514 static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1515 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1516 static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1517 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1518 static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1519 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);} |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1520 #endif |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1521 |
| 1168 | 1522 #define H264_CHROMA_MC(OPNAME, OP)\ |
| 1523 static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | |
| 1524 const int A=(8-x)*(8-y);\ | |
| 1525 const int B=( x)*(8-y);\ | |
| 1526 const int C=(8-x)*( y);\ | |
| 1527 const int D=( x)*( y);\ | |
| 1528 int i;\ | |
| 1529 \ | |
| 1530 assert(x<8 && y<8 && x>=0 && y>=0);\ | |
| 1531 \ | |
|
6052
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1532 if(D){\ |
| 6054 | 1533 for(i=0; i<h; i++){\ |
| 6053 | 1534 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ |
| 1535 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ | |
| 1536 dst+= stride;\ | |
| 1537 src+= stride;\ | |
| 1538 }\ | |
|
6052
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1539 }else{\ |
|
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1540 const int E= B+C;\ |
|
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1541 const int step= C ? stride : 1;\ |
| 6054 | 1542 for(i=0; i<h; i++){\ |
|
6052
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1543 OP(dst[0], (A*src[0] + E*src[step+0]));\ |
|
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1544 OP(dst[1], (A*src[1] + E*src[step+1]));\ |
|
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1545 dst+= stride;\ |
|
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1546 src+= stride;\ |
|
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1547 }\ |
|
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1548 }\ |
| 1168 | 1549 }\ |
| 1550 \ | |
| 1551 static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | |
| 1552 const int A=(8-x)*(8-y);\ | |
| 1553 const int B=( x)*(8-y);\ | |
| 1554 const int C=(8-x)*( y);\ | |
| 1555 const int D=( x)*( y);\ | |
| 1556 int i;\ | |
| 1557 \ | |
| 1558 assert(x<8 && y<8 && x>=0 && y>=0);\ | |
| 1559 \ | |
|
6052
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1560 if(D){\ |
| 6054 | 1561 for(i=0; i<h; i++){\ |
| 6053 | 1562 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ |
| 1563 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ | |
| 1564 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ | |
| 1565 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ | |
| 1566 dst+= stride;\ | |
| 1567 src+= stride;\ | |
| 1568 }\ | |
|
6052
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1569 }else{\ |
|
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1570 const int E= B+C;\ |
|
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1571 const int step= C ? stride : 1;\ |
| 6054 | 1572 for(i=0; i<h; i++){\ |
|
6052
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1573 OP(dst[0], (A*src[0] + E*src[step+0]));\ |
|
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1574 OP(dst[1], (A*src[1] + E*src[step+1]));\ |
|
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1575 OP(dst[2], (A*src[2] + E*src[step+2]));\ |
|
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1576 OP(dst[3], (A*src[3] + E*src[step+3]));\ |
|
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1577 dst+= stride;\ |
|
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1578 src+= stride;\ |
|
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1579 }\ |
|
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1580 }\ |
| 1168 | 1581 }\ |
| 1582 \ | |
| 1583 static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | |
| 1584 const int A=(8-x)*(8-y);\ | |
| 1585 const int B=( x)*(8-y);\ | |
| 1586 const int C=(8-x)*( y);\ | |
| 1587 const int D=( x)*( y);\ | |
| 1588 int i;\ | |
| 1589 \ | |
| 1590 assert(x<8 && y<8 && x>=0 && y>=0);\ | |
| 1591 \ | |
|
6051
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1592 if(D){\ |
| 6054 | 1593 for(i=0; i<h; i++){\ |
| 6053 | 1594 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ |
| 1595 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ | |
| 1596 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ | |
| 1597 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ | |
| 1598 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\ | |
| 1599 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\ | |
| 1600 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\ | |
| 1601 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\ | |
| 1602 dst+= stride;\ | |
| 1603 src+= stride;\ | |
| 1604 }\ | |
|
6051
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1605 }else{\ |
|
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1606 const int E= B+C;\ |
|
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1607 const int step= C ? stride : 1;\ |
| 6054 | 1608 for(i=0; i<h; i++){\ |
|
6051
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1609 OP(dst[0], (A*src[0] + E*src[step+0]));\ |
|
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1610 OP(dst[1], (A*src[1] + E*src[step+1]));\ |
|
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1611 OP(dst[2], (A*src[2] + E*src[step+2]));\ |
|
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1612 OP(dst[3], (A*src[3] + E*src[step+3]));\ |
|
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1613 OP(dst[4], (A*src[4] + E*src[step+4]));\ |
|
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1614 OP(dst[5], (A*src[5] + E*src[step+5]));\ |
|
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1615 OP(dst[6], (A*src[6] + E*src[step+6]));\ |
|
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1616 OP(dst[7], (A*src[7] + E*src[step+7]));\ |
|
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1617 dst+= stride;\ |
|
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1618 src+= stride;\ |
|
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1619 }\ |
|
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1620 }\ |
| 1168 | 1621 } |
| 1622 | |
| 1623 #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1) | |
| 1624 #define op_put(a, b) a = (((b) + 32)>>6) | |
| 1625 | |
| 1626 H264_CHROMA_MC(put_ , op_put) | |
| 1627 H264_CHROMA_MC(avg_ , op_avg) | |
| 1628 #undef op_avg | |
| 1629 #undef op_put | |
| 1630 | |
|
9439
ef3a7b711cc0
Rename put_no_rnd_h264_chroma* to reflect its usage in VC1 only
conrad
parents:
9437
diff
changeset
|
1631 static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){ |
| 3663 | 1632 const int A=(8-x)*(8-y); |
| 1633 const int B=( x)*(8-y); | |
| 1634 const int C=(8-x)*( y); | |
| 1635 const int D=( x)*( y); | |
| 1636 int i; | |
| 1637 | |
| 1638 assert(x<8 && y<8 && x>=0 && y>=0); | |
| 1639 | |
| 1640 for(i=0; i<h; i++) | |
| 1641 { | |
| 1642 dst[0] = (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6; | |
| 1643 dst[1] = (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6; | |
| 1644 dst[2] = (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6; | |
| 1645 dst[3] = (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6; | |
| 1646 dst[4] = (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6; | |
| 1647 dst[5] = (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6; | |
| 1648 dst[6] = (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6; | |
| 1649 dst[7] = (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6; | |
| 1650 dst+= stride; | |
| 1651 src+= stride; | |
| 1652 } | |
| 1653 } | |
| 1654 | |
| 9440 | 1655 static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){ |
| 1656 const int A=(8-x)*(8-y); | |
| 1657 const int B=( x)*(8-y); | |
| 1658 const int C=(8-x)*( y); | |
| 1659 const int D=( x)*( y); | |
| 1660 int i; | |
| 1661 | |
| 1662 assert(x<8 && y<8 && x>=0 && y>=0); | |
| 1663 | |
| 1664 for(i=0; i<h; i++) | |
| 1665 { | |
| 1666 dst[0] = avg2(dst[0], ((A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6)); | |
| 1667 dst[1] = avg2(dst[1], ((A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6)); | |
| 1668 dst[2] = avg2(dst[2], ((A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6)); | |
| 1669 dst[3] = avg2(dst[3], ((A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6)); | |
| 1670 dst[4] = avg2(dst[4], ((A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6)); | |
| 1671 dst[5] = avg2(dst[5], ((A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6)); | |
| 1672 dst[6] = avg2(dst[6], ((A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6)); | |
| 1673 dst[7] = avg2(dst[7], ((A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6)); | |
| 1674 dst+= stride; | |
| 1675 src+= stride; | |
| 1676 } | |
| 1677 } | |
| 1678 | |
| 651 | 1679 #define QPEL_MC(r, OPNAME, RND, OP) \ |
| 1064 | 1680 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ |
| 4176 | 1681 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
| 651 | 1682 int i;\ |
| 1683 for(i=0; i<h; i++)\ | |
| 1684 {\ | |
| 1685 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\ | |
| 1686 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\ | |
| 1687 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\ | |
| 1688 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\ | |
| 1689 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\ | |
| 1690 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\ | |
| 1691 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\ | |
| 1692 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\ | |
| 1693 dst+=dstStride;\ | |
| 1694 src+=srcStride;\ | |
| 1695 }\ | |
| 1696 }\ | |
| 1697 \ | |
| 1064 | 1698 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
| 984 | 1699 const int w=8;\ |
| 4176 | 1700 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
| 651 | 1701 int i;\ |
| 1702 for(i=0; i<w; i++)\ | |
| 1703 {\ | |
| 1704 const int src0= src[0*srcStride];\ | |
| 1705 const int src1= src[1*srcStride];\ | |
| 1706 const int src2= src[2*srcStride];\ | |
| 1707 const int src3= src[3*srcStride];\ | |
| 1708 const int src4= src[4*srcStride];\ | |
| 1709 const int src5= src[5*srcStride];\ | |
| 1710 const int src6= src[6*srcStride];\ | |
| 1711 const int src7= src[7*srcStride];\ | |
| 1712 const int src8= src[8*srcStride];\ | |
| 1713 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\ | |
| 1714 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\ | |
| 1715 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\ | |
| 1716 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\ | |
| 1717 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\ | |
| 1718 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\ | |
| 1719 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\ | |
| 1720 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\ | |
| 1721 dst++;\ | |
| 1722 src++;\ | |
| 1723 }\ | |
| 1724 }\ | |
| 1725 \ | |
| 1064 | 1726 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ |
| 4176 | 1727 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
| 651 | 1728 int i;\ |
| 954 | 1729 \ |
| 651 | 1730 for(i=0; i<h; i++)\ |
| 1731 {\ | |
| 1732 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\ | |
| 1733 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\ | |
| 1734 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\ | |
| 1735 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\ | |
| 1736 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\ | |
| 1737 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\ | |
| 1738 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\ | |
| 1739 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\ | |
| 1740 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\ | |
| 1741 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\ | |
| 1742 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\ | |
| 1743 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\ | |
| 1744 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\ | |
| 1745 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\ | |
| 1746 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\ | |
| 1747 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\ | |
| 1748 dst+=dstStride;\ | |
| 1749 src+=srcStride;\ | |
| 1750 }\ | |
| 255 | 1751 }\ |
| 1752 \ | |
| 1064 | 1753 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
| 4176 | 1754 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
| 651 | 1755 int i;\ |
| 954 | 1756 const int w=16;\ |
| 651 | 1757 for(i=0; i<w; i++)\ |
| 1758 {\ | |
| 1759 const int src0= src[0*srcStride];\ | |
| 1760 const int src1= src[1*srcStride];\ | |
| 1761 const int src2= src[2*srcStride];\ | |
| 1762 const int src3= src[3*srcStride];\ | |
| 1763 const int src4= src[4*srcStride];\ | |
| 1764 const int src5= src[5*srcStride];\ | |
| 1765 const int src6= src[6*srcStride];\ | |
| 1766 const int src7= src[7*srcStride];\ | |
| 1767 const int src8= src[8*srcStride];\ | |
| 1768 const int src9= src[9*srcStride];\ | |
| 1769 const int src10= src[10*srcStride];\ | |
| 1770 const int src11= src[11*srcStride];\ | |
| 1771 const int src12= src[12*srcStride];\ | |
| 1772 const int src13= src[13*srcStride];\ | |
| 1773 const int src14= src[14*srcStride];\ | |
| 1774 const int src15= src[15*srcStride];\ | |
| 1775 const int src16= src[16*srcStride];\ | |
| 1776 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\ | |
| 1777 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\ | |
| 1778 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\ | |
| 1779 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\ | |
| 1780 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\ | |
| 1781 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\ | |
| 1782 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\ | |
| 1783 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\ | |
| 1784 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\ | |
| 1785 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\ | |
| 1786 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\ | |
| 1787 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\ | |
| 1788 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\ | |
| 1789 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\ | |
| 1790 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\ | |
| 1791 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\ | |
| 1792 dst++;\ | |
| 1793 src++;\ | |
| 1794 }\ | |
| 255 | 1795 }\ |
| 1796 \ | |
| 1064 | 1797 static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\ |
| 859 | 1798 OPNAME ## pixels8_c(dst, src, stride, 8);\ |
| 255 | 1799 }\ |
| 1800 \ | |
| 1064 | 1801 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 1802 uint8_t half[64];\ | |
| 651 | 1803 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ |
| 1804 OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\ | |
| 1805 }\ | |
| 1806 \ | |
| 1064 | 1807 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 651 | 1808 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\ |
| 255 | 1809 }\ |
| 1810 \ | |
| 1064 | 1811 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 1812 uint8_t half[64];\ | |
| 651 | 1813 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ |
| 1814 OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\ | |
| 1815 }\ | |
| 1816 \ | |
| 1064 | 1817 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 1818 uint8_t full[16*9];\ | |
| 1819 uint8_t half[64];\ | |
| 651 | 1820 copy_block9(full, src, 16, stride, 9);\ |
| 984 | 1821 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ |
| 651 | 1822 OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\ |
| 1823 }\ | |
| 1824 \ | |
| 1064 | 1825 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 1826 uint8_t full[16*9];\ | |
| 651 | 1827 copy_block9(full, src, 16, stride, 9);\ |
| 984 | 1828 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\ |
| 255 | 1829 }\ |
| 1830 \ | |
| 1064 | 1831 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 1832 uint8_t full[16*9];\ | |
| 1833 uint8_t half[64];\ | |
| 651 | 1834 copy_block9(full, src, 16, stride, 9);\ |
| 984 | 1835 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ |
| 651 | 1836 OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\ |
| 1837 }\ | |
| 1064 | 1838 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 1839 uint8_t full[16*9];\ | |
| 1840 uint8_t halfH[72];\ | |
| 1841 uint8_t halfV[64];\ | |
| 1842 uint8_t halfHV[64];\ | |
| 651 | 1843 copy_block9(full, src, 16, stride, 9);\ |
| 1844 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
| 984 | 1845 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ |
| 1846 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
| 651 | 1847 OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ |
| 255 | 1848 }\ |
| 1064 | 1849 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 1850 uint8_t full[16*9];\ | |
| 1851 uint8_t halfH[72];\ | |
| 1852 uint8_t halfHV[64];\ | |
| 984 | 1853 copy_block9(full, src, 16, stride, 9);\ |
| 1854 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
| 1855 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ | |
| 1856 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
| 1857 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ | |
| 1858 }\ | |
| 1064 | 1859 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 1860 uint8_t full[16*9];\ | |
| 1861 uint8_t halfH[72];\ | |
| 1862 uint8_t halfV[64];\ | |
| 1863 uint8_t halfHV[64];\ | |
| 651 | 1864 copy_block9(full, src, 16, stride, 9);\ |
| 1865 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
| 984 | 1866 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ |
| 1867 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
| 651 | 1868 OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ |
| 255 | 1869 }\ |
| 1064 | 1870 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 1871 uint8_t full[16*9];\ | |
| 1872 uint8_t halfH[72];\ | |
| 1873 uint8_t halfHV[64];\ | |
| 984 | 1874 copy_block9(full, src, 16, stride, 9);\ |
| 1875 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
| 1876 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ | |
| 1877 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
| 1878 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ | |
| 1879 }\ | |
| 1064 | 1880 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 1881 uint8_t full[16*9];\ | |
| 1882 uint8_t halfH[72];\ | |
| 1883 uint8_t halfV[64];\ | |
| 1884 uint8_t halfHV[64];\ | |
| 651 | 1885 copy_block9(full, src, 16, stride, 9);\ |
| 1886 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
| 984 | 1887 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ |
| 1888 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
| 651 | 1889 OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ |
| 1890 }\ | |
| 1064 | 1891 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 1892 uint8_t full[16*9];\ | |
| 1893 uint8_t halfH[72];\ | |
| 1894 uint8_t halfHV[64];\ | |
| 984 | 1895 copy_block9(full, src, 16, stride, 9);\ |
| 1896 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
| 1897 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ | |
| 1898 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
| 1899 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ | |
| 1900 }\ | |
| 1064 | 1901 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 1902 uint8_t full[16*9];\ | |
| 1903 uint8_t halfH[72];\ | |
| 1904 uint8_t halfV[64];\ | |
| 1905 uint8_t halfHV[64];\ | |
| 651 | 1906 copy_block9(full, src, 16, stride, 9);\ |
| 1907 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\ | |
| 984 | 1908 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ |
| 1909 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
| 651 | 1910 OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ |
| 255 | 1911 }\ |
| 1064 | 1912 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 1913 uint8_t full[16*9];\ | |
| 1914 uint8_t halfH[72];\ | |
| 1915 uint8_t halfHV[64];\ | |
| 984 | 1916 copy_block9(full, src, 16, stride, 9);\ |
| 1917 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
| 1918 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ | |
| 1919 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
| 1920 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ | |
| 1921 }\ | |
| 1064 | 1922 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 1923 uint8_t halfH[72];\ | |
| 1924 uint8_t halfHV[64];\ | |
| 651 | 1925 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ |
| 984 | 1926 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ |
| 651 | 1927 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ |
| 1928 }\ | |
| 1064 | 1929 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 1930 uint8_t halfH[72];\ | |
| 1931 uint8_t halfHV[64];\ | |
| 651 | 1932 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ |
| 984 | 1933 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ |
| 651 | 1934 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ |
| 1935 }\ | |
| 1064 | 1936 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 1937 uint8_t full[16*9];\ | |
| 1938 uint8_t halfH[72];\ | |
| 1939 uint8_t halfV[64];\ | |
| 1940 uint8_t halfHV[64];\ | |
| 651 | 1941 copy_block9(full, src, 16, stride, 9);\ |
| 1942 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
| 984 | 1943 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ |
| 1944 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
| 651 | 1945 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\ |
| 255 | 1946 }\ |
| 1064 | 1947 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 1948 uint8_t full[16*9];\ | |
| 1949 uint8_t halfH[72];\ | |
| 984 | 1950 copy_block9(full, src, 16, stride, 9);\ |
| 1951 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
| 1952 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ | |
| 1953 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ | |
| 1954 }\ | |
| 1064 | 1955 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 1956 uint8_t full[16*9];\ | |
| 1957 uint8_t halfH[72];\ | |
| 1958 uint8_t halfV[64];\ | |
| 1959 uint8_t halfHV[64];\ | |
| 651 | 1960 copy_block9(full, src, 16, stride, 9);\ |
| 1961 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
| 984 | 1962 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ |
| 1963 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
| 651 | 1964 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\ |
| 1965 }\ | |
| 1064 | 1966 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 1967 uint8_t full[16*9];\ | |
| 1968 uint8_t halfH[72];\ | |
| 984 | 1969 copy_block9(full, src, 16, stride, 9);\ |
| 1970 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
| 1971 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ | |
| 1972 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ | |
| 1973 }\ | |
| 1064 | 1974 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 1975 uint8_t halfH[72];\ | |
| 651 | 1976 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ |
| 984 | 1977 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ |
| 651 | 1978 }\ |
| 1064 | 1979 static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\ |
| 859 | 1980 OPNAME ## pixels16_c(dst, src, stride, 16);\ |
| 255 | 1981 }\ |
| 651 | 1982 \ |
| 1064 | 1983 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 1984 uint8_t half[256];\ | |
| 651 | 1985 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\ |
| 1986 OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\ | |
| 1987 }\ | |
| 1988 \ | |
| 1064 | 1989 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 651 | 1990 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\ |
| 1991 }\ | |
| 1992 \ | |
| 1064 | 1993 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 1994 uint8_t half[256];\ | |
| 651 | 1995 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\ |
| 1996 OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\ | |
| 1997 }\ | |
| 1998 \ | |
| 1064 | 1999 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 2000 uint8_t full[24*17];\ | |
| 2001 uint8_t half[256];\ | |
| 651 | 2002 copy_block17(full, src, 24, stride, 17);\ |
| 954 | 2003 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\ |
| 651 | 2004 OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\ |
| 255 | 2005 }\ |
| 651 | 2006 \ |
| 1064 | 2007 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 2008 uint8_t full[24*17];\ | |
| 651 | 2009 copy_block17(full, src, 24, stride, 17);\ |
| 954 | 2010 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\ |
| 651 | 2011 }\ |
| 2012 \ | |
| 1064 | 2013 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 2014 uint8_t full[24*17];\ | |
| 2015 uint8_t half[256];\ | |
| 651 | 2016 copy_block17(full, src, 24, stride, 17);\ |
| 954 | 2017 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\ |
| 651 | 2018 OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\ |
| 255 | 2019 }\ |
| 1064 | 2020 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 2021 uint8_t full[24*17];\ | |
| 2022 uint8_t halfH[272];\ | |
| 2023 uint8_t halfV[256];\ | |
| 2024 uint8_t halfHV[256];\ | |
| 651 | 2025 copy_block17(full, src, 24, stride, 17);\ |
| 2026 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
| 954 | 2027 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ |
| 2028 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
| 651 | 2029 OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ |
| 2030 }\ | |
| 1064 | 2031 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 2032 uint8_t full[24*17];\ | |
| 2033 uint8_t halfH[272];\ | |
| 2034 uint8_t halfHV[256];\ | |
| 984 | 2035 copy_block17(full, src, 24, stride, 17);\ |
| 2036 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
| 2037 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ | |
| 2038 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
| 2039 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ | |
| 2040 }\ | |
| 1064 | 2041 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 2042 uint8_t full[24*17];\ | |
| 2043 uint8_t halfH[272];\ | |
| 2044 uint8_t halfV[256];\ | |
| 2045 uint8_t halfHV[256];\ | |
| 651 | 2046 copy_block17(full, src, 24, stride, 17);\ |
| 2047 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
| 954 | 2048 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ |
| 2049 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
| 651 | 2050 OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ |
| 2051 }\ | |
| 1064 | 2052 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 2053 uint8_t full[24*17];\ | |
| 2054 uint8_t halfH[272];\ | |
| 2055 uint8_t halfHV[256];\ | |
| 984 | 2056 copy_block17(full, src, 24, stride, 17);\ |
| 2057 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
| 2058 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ | |
| 2059 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
| 2060 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ | |
| 2061 }\ | |
| 1064 | 2062 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 2063 uint8_t full[24*17];\ | |
| 2064 uint8_t halfH[272];\ | |
| 2065 uint8_t halfV[256];\ | |
| 2066 uint8_t halfHV[256];\ | |
| 651 | 2067 copy_block17(full, src, 24, stride, 17);\ |
| 2068 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
| 954 | 2069 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ |
| 2070 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
| 651 | 2071 OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ |
| 255 | 2072 }\ |
| 1064 | 2073 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 2074 uint8_t full[24*17];\ | |
| 2075 uint8_t halfH[272];\ | |
| 2076 uint8_t halfHV[256];\ | |
| 984 | 2077 copy_block17(full, src, 24, stride, 17);\ |
| 2078 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
| 2079 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ | |
| 2080 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
| 2081 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ | |
| 2082 }\ | |
| 1064 | 2083 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 2084 uint8_t full[24*17];\ | |
| 2085 uint8_t halfH[272];\ | |
| 2086 uint8_t halfV[256];\ | |
| 2087 uint8_t halfHV[256];\ | |
| 651 | 2088 copy_block17(full, src, 24, stride, 17);\ |
| 2089 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\ | |
| 954 | 2090 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ |
| 2091 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
| 651 | 2092 OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ |
| 2093 }\ | |
| 1064 | 2094 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 2095 uint8_t full[24*17];\ | |
| 2096 uint8_t halfH[272];\ | |
| 2097 uint8_t halfHV[256];\ | |
| 984 | 2098 copy_block17(full, src, 24, stride, 17);\ |
| 2099 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
| 2100 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ | |
| 2101 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
| 2102 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ | |
| 2103 }\ | |
| 1064 | 2104 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 2105 uint8_t halfH[272];\ | |
| 2106 uint8_t halfHV[256];\ | |
| 651 | 2107 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ |
| 954 | 2108 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ |
| 651 | 2109 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ |
| 255 | 2110 }\ |
| 1064 | 2111 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 2112 uint8_t halfH[272];\ | |
| 2113 uint8_t halfHV[256];\ | |
| 651 | 2114 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ |
| 954 | 2115 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ |
| 651 | 2116 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ |
| 2117 }\ | |
| 1064 | 2118 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 2119 uint8_t full[24*17];\ | |
| 2120 uint8_t halfH[272];\ | |
| 2121 uint8_t halfV[256];\ | |
| 2122 uint8_t halfHV[256];\ | |
| 651 | 2123 copy_block17(full, src, 24, stride, 17);\ |
| 2124 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
| 954 | 2125 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ |
| 2126 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
| 651 | 2127 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\ |
| 255 | 2128 }\ |
| 1064 | 2129 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 2130 uint8_t full[24*17];\ | |
| 2131 uint8_t halfH[272];\ | |
| 984 | 2132 copy_block17(full, src, 24, stride, 17);\ |
| 2133 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
| 2134 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ | |
| 2135 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ | |
| 2136 }\ | |
| 1064 | 2137 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 2138 uint8_t full[24*17];\ | |
| 2139 uint8_t halfH[272];\ | |
| 2140 uint8_t halfV[256];\ | |
| 2141 uint8_t halfHV[256];\ | |
| 651 | 2142 copy_block17(full, src, 24, stride, 17);\ |
| 2143 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
| 954 | 2144 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ |
| 2145 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
| 651 | 2146 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\ |
| 2147 }\ | |
| 1064 | 2148 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 2149 uint8_t full[24*17];\ | |
| 2150 uint8_t halfH[272];\ | |
| 984 | 2151 copy_block17(full, src, 24, stride, 17);\ |
| 2152 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
| 2153 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ | |
| 2154 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ | |
| 2155 }\ | |
| 1064 | 2156 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\ |
| 2157 uint8_t halfH[272];\ | |
| 651 | 2158 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ |
| 954 | 2159 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ |
| 859 | 2160 } |
| 255 | 2161 |
| 651 | 2162 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1) |
| 2163 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1) | |
| 2164 #define op_put(a, b) a = cm[((b) + 16)>>5] | |
| 2165 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5] | |
| 2166 | |
| 2167 QPEL_MC(0, put_ , _ , op_put) | |
| 2168 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd) | |
| 2169 QPEL_MC(0, avg_ , _ , op_avg) | |
| 2170 //QPEL_MC(1, avg_no_rnd , _ , op_avg) | |
| 2171 #undef op_avg | |
| 2172 #undef op_avg_no_rnd | |
| 2173 #undef op_put | |
| 2174 #undef op_put_no_rnd | |
| 255 | 2175 |
| 1168 | 2176 #if 1 |
| 2177 #define H264_LOWPASS(OPNAME, OP, OP2) \ | |
| 5151 | 2178 static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
|
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2179 const int h=2;\ |
| 4176 | 2180 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
|
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2181 int i;\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2182 for(i=0; i<h; i++)\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2183 {\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2184 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2185 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2186 dst+=dstStride;\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2187 src+=srcStride;\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2188 }\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2189 }\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2190 \ |
| 5151 | 2191 static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
|
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2192 const int w=2;\ |
| 4176 | 2193 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
|
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2194 int i;\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2195 for(i=0; i<w; i++)\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2196 {\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2197 const int srcB= src[-2*srcStride];\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2198 const int srcA= src[-1*srcStride];\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2199 const int src0= src[0 *srcStride];\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2200 const int src1= src[1 *srcStride];\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2201 const int src2= src[2 *srcStride];\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2202 const int src3= src[3 *srcStride];\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2203 const int src4= src[4 *srcStride];\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2204 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2205 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2206 dst++;\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2207 src++;\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2208 }\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2209 }\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2210 \ |
| 5151 | 2211 static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ |
|
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2212 const int h=2;\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2213 const int w=2;\ |
| 4176 | 2214 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
|
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2215 int i;\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2216 src -= 2*srcStride;\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2217 for(i=0; i<h+5; i++)\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2218 {\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2219 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2220 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2221 tmp+=tmpStride;\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2222 src+=srcStride;\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2223 }\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2224 tmp -= tmpStride*(h+5-2);\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2225 for(i=0; i<w; i++)\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2226 {\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2227 const int tmpB= tmp[-2*tmpStride];\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2228 const int tmpA= tmp[-1*tmpStride];\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2229 const int tmp0= tmp[0 *tmpStride];\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2230 const int tmp1= tmp[1 *tmpStride];\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2231 const int tmp2= tmp[2 *tmpStride];\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2232 const int tmp3= tmp[3 *tmpStride];\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2233 const int tmp4= tmp[4 *tmpStride];\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2234 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2235 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2236 dst++;\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2237 tmp++;\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2238 }\ |
|
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2239 }\ |
| 1168 | 2240 static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
| 2241 const int h=4;\ | |
| 4176 | 2242 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
| 1168 | 2243 int i;\ |
| 2244 for(i=0; i<h; i++)\ | |
| 2245 {\ | |
| 2246 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\ | |
| 2247 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\ | |
| 2248 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\ | |
| 2249 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\ | |
| 2250 dst+=dstStride;\ | |
| 2251 src+=srcStride;\ | |
| 2252 }\ | |
| 2253 }\ | |
| 2254 \ | |
| 2255 static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
| 2256 const int w=4;\ | |
| 4176 | 2257 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
| 1168 | 2258 int i;\ |
| 2259 for(i=0; i<w; i++)\ | |
| 2260 {\ | |
| 2261 const int srcB= src[-2*srcStride];\ | |
| 2262 const int srcA= src[-1*srcStride];\ | |
| 2263 const int src0= src[0 *srcStride];\ | |
| 2264 const int src1= src[1 *srcStride];\ | |
| 2265 const int src2= src[2 *srcStride];\ | |
| 2266 const int src3= src[3 *srcStride];\ | |
| 2267 const int src4= src[4 *srcStride];\ | |
| 2268 const int src5= src[5 *srcStride];\ | |
| 2269 const int src6= src[6 *srcStride];\ | |
| 2270 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ | |
| 2271 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ | |
| 2272 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ | |
| 2273 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ | |
| 2274 dst++;\ | |
| 2275 src++;\ | |
| 2276 }\ | |
| 2277 }\ | |
| 2278 \ | |
| 2279 static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | |
| 2280 const int h=4;\ | |
| 2281 const int w=4;\ | |
| 4176 | 2282 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
| 1168 | 2283 int i;\ |
| 2284 src -= 2*srcStride;\ | |
| 2285 for(i=0; i<h+5; i++)\ | |
| 2286 {\ | |
| 2287 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\ | |
| 2288 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\ | |
| 2289 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\ | |
| 2290 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\ | |
| 2291 tmp+=tmpStride;\ | |
| 2292 src+=srcStride;\ | |
| 2293 }\ | |
| 2294 tmp -= tmpStride*(h+5-2);\ | |
| 2295 for(i=0; i<w; i++)\ | |
| 2296 {\ | |
| 2297 const int tmpB= tmp[-2*tmpStride];\ | |
| 2298 const int tmpA= tmp[-1*tmpStride];\ | |
| 2299 const int tmp0= tmp[0 *tmpStride];\ | |
| 2300 const int tmp1= tmp[1 *tmpStride];\ | |
| 2301 const int tmp2= tmp[2 *tmpStride];\ | |
| 2302 const int tmp3= tmp[3 *tmpStride];\ | |
| 2303 const int tmp4= tmp[4 *tmpStride];\ | |
| 2304 const int tmp5= tmp[5 *tmpStride];\ | |
| 2305 const int tmp6= tmp[6 *tmpStride];\ | |
| 2306 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ | |
| 2307 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ | |
| 2308 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\ | |
| 2309 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\ | |
| 2310 dst++;\ | |
| 2311 tmp++;\ | |
| 2312 }\ | |
| 2313 }\ | |
| 2314 \ | |
| 2315 static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
| 2316 const int h=8;\ | |
| 4176 | 2317 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
| 1168 | 2318 int i;\ |
| 2319 for(i=0; i<h; i++)\ | |
| 2320 {\ | |
| 2321 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\ | |
| 2322 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\ | |
| 2323 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\ | |
| 2324 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\ | |
| 2325 OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\ | |
| 2326 OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\ | |
| 2327 OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\ | |
| 2328 OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\ | |
| 2329 dst+=dstStride;\ | |
| 2330 src+=srcStride;\ | |
| 2331 }\ | |
| 2332 }\ | |
| 2333 \ | |
| 2334 static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
| 2335 const int w=8;\ | |
| 4176 | 2336 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
| 1168 | 2337 int i;\ |
| 2338 for(i=0; i<w; i++)\ | |
| 2339 {\ | |
| 2340 const int srcB= src[-2*srcStride];\ | |
| 2341 const int srcA= src[-1*srcStride];\ | |
| 2342 const int src0= src[0 *srcStride];\ | |
| 2343 const int src1= src[1 *srcStride];\ | |
| 2344 const int src2= src[2 *srcStride];\ | |
| 2345 const int src3= src[3 *srcStride];\ | |
| 2346 const int src4= src[4 *srcStride];\ | |
| 2347 const int src5= src[5 *srcStride];\ | |
| 2348 const int src6= src[6 *srcStride];\ | |
| 2349 const int src7= src[7 *srcStride];\ | |
| 2350 const int src8= src[8 *srcStride];\ | |
| 2351 const int src9= src[9 *srcStride];\ | |
| 2352 const int src10=src[10*srcStride];\ | |
| 2353 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ | |
| 2354 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ | |
| 2355 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ | |
| 2356 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ | |
| 2357 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\ | |
| 2358 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\ | |
| 2359 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\ | |
| 2360 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\ | |
| 2361 dst++;\ | |
| 2362 src++;\ | |
| 2363 }\ | |
| 2364 }\ | |
| 2365 \ | |
| 2366 static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | |
| 2367 const int h=8;\ | |
| 2368 const int w=8;\ | |
| 4176 | 2369 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
| 1168 | 2370 int i;\ |
| 2371 src -= 2*srcStride;\ | |
| 2372 for(i=0; i<h+5; i++)\ | |
| 2373 {\ | |
| 2374 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\ | |
| 2375 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\ | |
| 2376 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\ | |
| 2377 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\ | |
| 2378 tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\ | |
| 2379 tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\ | |
| 2380 tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\ | |
| 2381 tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\ | |
| 2382 tmp+=tmpStride;\ | |
| 2383 src+=srcStride;\ | |
| 2384 }\ | |
| 2385 tmp -= tmpStride*(h+5-2);\ | |
| 2386 for(i=0; i<w; i++)\ | |
| 2387 {\ | |
| 2388 const int tmpB= tmp[-2*tmpStride];\ | |
| 2389 const int tmpA= tmp[-1*tmpStride];\ | |
| 2390 const int tmp0= tmp[0 *tmpStride];\ | |
| 2391 const int tmp1= tmp[1 *tmpStride];\ | |
| 2392 const int tmp2= tmp[2 *tmpStride];\ | |
| 2393 const int tmp3= tmp[3 *tmpStride];\ | |
| 2394 const int tmp4= tmp[4 *tmpStride];\ | |
| 2395 const int tmp5= tmp[5 *tmpStride];\ | |
| 2396 const int tmp6= tmp[6 *tmpStride];\ | |
| 2397 const int tmp7= tmp[7 *tmpStride];\ | |
| 2398 const int tmp8= tmp[8 *tmpStride];\ | |
| 2399 const int tmp9= tmp[9 *tmpStride];\ | |
| 2400 const int tmp10=tmp[10*tmpStride];\ | |
| 2401 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ | |
| 2402 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ | |
| 2403 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\ | |
| 2404 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\ | |
| 2405 OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\ | |
| 2406 OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\ | |
| 2407 OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\ | |
| 2408 OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\ | |
| 2409 dst++;\ | |
| 2410 tmp++;\ | |
| 2411 }\ | |
| 2412 }\ | |
| 2413 \ | |
| 2414 static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
| 2415 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\ | |
| 2416 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\ | |
| 2417 src += 8*srcStride;\ | |
| 2418 dst += 8*dstStride;\ | |
| 2419 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\ | |
| 2420 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\ | |
| 2421 }\ | |
| 2422 \ | |
| 2423 static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
| 2424 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\ | |
| 2425 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\ | |
| 2426 src += 8*srcStride;\ | |
| 2427 dst += 8*dstStride;\ | |
| 2428 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\ | |
| 2429 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\ | |
| 2430 }\ | |
| 2431 \ | |
| 2432 static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | |
| 2433 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\ | |
| 2434 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\ | |
| 2435 src += 8*srcStride;\ | |
| 2436 dst += 8*dstStride;\ | |
| 2437 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\ | |
| 2438 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\ | |
| 2439 }\ | |
| 2440 | |
| 2441 #define H264_MC(OPNAME, SIZE) \ | |
| 2442 static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\ | |
| 2443 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\ | |
| 2444 }\ | |
| 2445 \ | |
| 2446 static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\ | |
| 2447 uint8_t half[SIZE*SIZE];\ | |
| 2448 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\ | |
| 2449 OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\ | |
| 2450 }\ | |
| 2451 \ | |
| 2452 static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\ | |
| 2453 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\ | |
| 2454 }\ | |
| 2455 \ | |
| 2456 static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\ | |
| 2457 uint8_t half[SIZE*SIZE];\ | |
| 2458 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\ | |
| 2459 OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\ | |
| 2460 }\ | |
| 2461 \ | |
| 2462 static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\ | |
| 2463 uint8_t full[SIZE*(SIZE+5)];\ | |
| 2464 uint8_t * const full_mid= full + SIZE*2;\ | |
| 2465 uint8_t half[SIZE*SIZE];\ | |
| 2466 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
| 2467 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\ | |
| 2468 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\ | |
| 2469 }\ | |
| 2470 \ | |
| 2471 static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\ | |
| 2472 uint8_t full[SIZE*(SIZE+5)];\ | |
| 2473 uint8_t * const full_mid= full + SIZE*2;\ | |
| 2474 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
| 2475 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\ | |
| 2476 }\ | |
| 2477 \ | |
| 2478 static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\ | |
| 2479 uint8_t full[SIZE*(SIZE+5)];\ | |
| 2480 uint8_t * const full_mid= full + SIZE*2;\ | |
| 2481 uint8_t half[SIZE*SIZE];\ | |
| 2482 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
| 2483 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\ | |
| 2484 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\ | |
| 2485 }\ | |
| 2486 \ | |
| 2487 static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\ | |
| 2488 uint8_t full[SIZE*(SIZE+5)];\ | |
| 2489 uint8_t * const full_mid= full + SIZE*2;\ | |
| 2490 uint8_t halfH[SIZE*SIZE];\ | |
| 2491 uint8_t halfV[SIZE*SIZE];\ | |
| 2492 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ | |
| 2493 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
| 2494 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
| 2495 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
| 2496 }\ | |
| 2497 \ | |
| 2498 static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\ | |
| 2499 uint8_t full[SIZE*(SIZE+5)];\ | |
| 2500 uint8_t * const full_mid= full + SIZE*2;\ | |
| 2501 uint8_t halfH[SIZE*SIZE];\ | |
| 2502 uint8_t halfV[SIZE*SIZE];\ | |
| 2503 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ | |
| 2504 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ | |
| 2505 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
| 2506 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
| 2507 }\ | |
| 2508 \ | |
| 2509 static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\ | |
| 2510 uint8_t full[SIZE*(SIZE+5)];\ | |
| 2511 uint8_t * const full_mid= full + SIZE*2;\ | |
| 2512 uint8_t halfH[SIZE*SIZE];\ | |
| 2513 uint8_t halfV[SIZE*SIZE];\ | |
| 2514 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ | |
| 2515 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
| 2516 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
| 2517 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
| 2518 }\ | |
| 2519 \ | |
| 2520 static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\ | |
| 2521 uint8_t full[SIZE*(SIZE+5)];\ | |
| 2522 uint8_t * const full_mid= full + SIZE*2;\ | |
| 2523 uint8_t halfH[SIZE*SIZE];\ | |
| 2524 uint8_t halfV[SIZE*SIZE];\ | |
| 2525 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ | |
| 2526 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ | |
| 2527 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
| 2528 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
| 2529 }\ | |
| 2530 \ | |
| 2531 static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\ | |
| 2532 int16_t tmp[SIZE*(SIZE+5)];\ | |
| 2533 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\ | |
| 2534 }\ | |
| 2535 \ | |
| 2536 static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\ | |
| 2537 int16_t tmp[SIZE*(SIZE+5)];\ | |
| 2538 uint8_t halfH[SIZE*SIZE];\ | |
| 2539 uint8_t halfHV[SIZE*SIZE];\ | |
| 2540 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ | |
| 2541 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
| 2542 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ | |
| 2543 }\ | |
| 2544 \ | |
| 2545 static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\ | |
| 2546 int16_t tmp[SIZE*(SIZE+5)];\ | |
| 2547 uint8_t halfH[SIZE*SIZE];\ | |
| 2548 uint8_t halfHV[SIZE*SIZE];\ | |
| 2549 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ | |
| 2550 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
| 2551 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ | |
| 2552 }\ | |
| 2553 \ | |
| 2554 static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\ | |
| 2555 uint8_t full[SIZE*(SIZE+5)];\ | |
| 2556 uint8_t * const full_mid= full + SIZE*2;\ | |
| 2557 int16_t tmp[SIZE*(SIZE+5)];\ | |
| 2558 uint8_t halfV[SIZE*SIZE];\ | |
| 2559 uint8_t halfHV[SIZE*SIZE];\ | |
| 2560 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
| 2561 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
| 2562 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
| 2563 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ | |
| 2564 }\ | |
| 2565 \ | |
| 2566 static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\ | |
| 2567 uint8_t full[SIZE*(SIZE+5)];\ | |
| 2568 uint8_t * const full_mid= full + SIZE*2;\ | |
| 2569 int16_t tmp[SIZE*(SIZE+5)];\ | |
| 2570 uint8_t halfV[SIZE*SIZE];\ | |
| 2571 uint8_t halfHV[SIZE*SIZE];\ | |
| 2572 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ | |
| 2573 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
| 2574 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
| 2575 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ | |
| 2576 }\ | |
| 2577 | |
| 2578 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1) | |
| 2579 //#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7) | |
| 2580 #define op_put(a, b) a = cm[((b) + 16)>>5] | |
| 2581 #define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1) | |
| 2582 #define op2_put(a, b) a = cm[((b) + 512)>>10] | |
| 2583 | |
| 2584 H264_LOWPASS(put_ , op_put, op2_put) | |
| 2585 H264_LOWPASS(avg_ , op_avg, op2_avg) | |
|
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2586 H264_MC(put_, 2) |
| 1168 | 2587 H264_MC(put_, 4) |
| 2588 H264_MC(put_, 8) | |
| 2589 H264_MC(put_, 16) | |
| 2590 H264_MC(avg_, 4) | |
| 2591 H264_MC(avg_, 8) | |
| 2592 H264_MC(avg_, 16) | |
| 2593 | |
| 2594 #undef op_avg | |
| 2595 #undef op_put | |
| 2596 #undef op2_avg | |
| 2597 #undef op2_put | |
| 2598 #endif | |
| 2599 | |
| 936 | 2600 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){ |
| 4176 | 2601 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
| 936 | 2602 int i; |
| 2603 | |
| 2604 for(i=0; i<h; i++){ | |
| 2605 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4]; | |
| 2606 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4]; | |
| 2607 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4]; | |
| 2608 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4]; | |
| 2609 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4]; | |
| 2610 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4]; | |
| 2611 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4]; | |
| 2612 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4]; | |
| 2613 dst+=dstStride; | |
| 2967 | 2614 src+=srcStride; |
| 936 | 2615 } |
| 2616 } | |
| 2617 | |
| 8590 | 2618 #if CONFIG_CAVS_DECODER |
|
3395
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2619 /* AVS specific */ |
|
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2620 void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) { |
|
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2621 put_pixels8_c(dst, src, stride, 8); |
|
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2622 } |
|
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2623 void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) { |
|
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2624 avg_pixels8_c(dst, src, stride, 8); |
|
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2625 } |
|
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2626 void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) { |
|
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2627 put_pixels16_c(dst, src, stride, 16); |
|
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2628 } |
|
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2629 void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) { |
|
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2630 avg_pixels16_c(dst, src, stride, 16); |
|
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2631 } |
| 3432 | 2632 #endif /* CONFIG_CAVS_DECODER */ |
|
3395
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2633 |
|
9995
3141f69e3905
Do not check for both CONFIG_VC1_DECODER and CONFIG_WMV3_DECODER,
diego
parents:
9975
diff
changeset
|
2634 #if CONFIG_VC1_DECODER |
| 3526 | 2635 /* VC-1 specific */ |
|
11378
f46b68960464
Move some VC1 dsp prototypes to dsputil.h; they are defined in dsputil.c
mru
parents:
11376
diff
changeset
|
2636 void ff_put_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { |
| 3526 | 2637 put_pixels8_c(dst, src, stride, 8); |
| 2638 } | |
|
11378
f46b68960464
Move some VC1 dsp prototypes to dsputil.h; they are defined in dsputil.c
mru
parents:
11376
diff
changeset
|
2639 void ff_avg_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { |
| 9437 | 2640 avg_pixels8_c(dst, src, stride, 8); |
| 2641 } | |
|
9995
3141f69e3905
Do not check for both CONFIG_VC1_DECODER and CONFIG_WMV3_DECODER,
diego
parents:
9975
diff
changeset
|
2642 #endif /* CONFIG_VC1_DECODER */ |
| 3526 | 2643 |
| 8590 | 2644 #if CONFIG_RV40_DECODER |
| 8232 | 2645 static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ |
| 2646 put_pixels16_xy2_c(dst, src, stride, 16); | |
| 2647 } | |
| 2648 static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ | |
| 2649 avg_pixels16_xy2_c(dst, src, stride, 16); | |
| 2650 } | |
| 2651 static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){ | |
| 2652 put_pixels8_xy2_c(dst, src, stride, 8); | |
| 2653 } | |
| 2654 static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){ | |
| 2655 avg_pixels8_xy2_c(dst, src, stride, 8); | |
| 2656 } | |
| 2657 #endif /* CONFIG_RV40_DECODER */ | |
| 2658 | |
| 936 | 2659 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){ |
| 4176 | 2660 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
| 936 | 2661 int i; |
| 2662 | |
| 2663 for(i=0; i<w; i++){ | |
| 2664 const int src_1= src[ -srcStride]; | |
| 2665 const int src0 = src[0 ]; | |
| 2666 const int src1 = src[ srcStride]; | |
| 2667 const int src2 = src[2*srcStride]; | |
| 2668 const int src3 = src[3*srcStride]; | |
| 2669 const int src4 = src[4*srcStride]; | |
| 2670 const int src5 = src[5*srcStride]; | |
| 2671 const int src6 = src[6*srcStride]; | |
| 2672 const int src7 = src[7*srcStride]; | |
| 2673 const int src8 = src[8*srcStride]; | |
| 2674 const int src9 = src[9*srcStride]; | |
| 2675 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4]; | |
| 2676 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4]; | |
| 2677 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4]; | |
| 2678 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4]; | |
| 2679 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4]; | |
| 2680 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4]; | |
| 2681 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4]; | |
| 2682 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4]; | |
| 2683 src++; | |
| 2684 dst++; | |
| 2685 } | |
| 2686 } | |
| 2687 | |
| 2688 static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){ | |
| 2689 put_pixels8_c(dst, src, stride, 8); | |
| 2690 } | |
| 2691 | |
| 2692 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){ | |
| 2693 uint8_t half[64]; | |
| 2694 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); | |
| 2695 put_pixels8_l2(dst, src, half, stride, stride, 8, 8); | |
| 2696 } | |
| 2697 | |
| 2698 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){ | |
| 2699 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8); | |
| 2700 } | |
| 2701 | |
| 2702 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){ | |
| 2703 uint8_t half[64]; | |
| 2704 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); | |
| 2705 put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8); | |
| 2706 } | |
| 2707 | |
| 2708 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){ | |
| 2709 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8); | |
| 2710 } | |
| 2711 | |
| 2712 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){ | |
| 2713 uint8_t halfH[88]; | |
| 2714 uint8_t halfV[64]; | |
| 2715 uint8_t halfHV[64]; | |
| 2716 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); | |
| 2717 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8); | |
| 2718 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); | |
| 2719 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8); | |
| 2720 } | |
| 2721 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){ | |
| 2722 uint8_t halfH[88]; | |
| 2723 uint8_t halfV[64]; | |
| 2724 uint8_t halfHV[64]; | |
| 2725 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); | |
| 2726 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8); | |
| 2727 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); | |
| 2728 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8); | |
| 2729 } | |
| 2730 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){ | |
| 2731 uint8_t halfH[88]; | |
| 2732 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); | |
| 2733 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8); | |
| 2734 } | |
| 2735 | |
| 1644 | 2736 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){ |
|
10749
5cca4b6c459d
Get rid of pointless CONFIG_ANY_H263 preprocessor definition.
diego
parents:
10748
diff
changeset
|
2737 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { |
| 1644 | 2738 int x; |
| 2739 const int strength= ff_h263_loop_filter_strength[qscale]; | |
| 2967 | 2740 |
| 1644 | 2741 for(x=0; x<8; x++){ |
| 2742 int d1, d2, ad1; | |
| 2743 int p0= src[x-2*stride]; | |
| 2744 int p1= src[x-1*stride]; | |
| 2745 int p2= src[x+0*stride]; | |
| 2746 int p3= src[x+1*stride]; | |
| 2747 int d = (p0 - p3 + 4*(p2 - p1)) / 8; | |
| 2748 | |
| 2749 if (d<-2*strength) d1= 0; | |
| 2750 else if(d<- strength) d1=-2*strength - d; | |
| 2751 else if(d< strength) d1= d; | |
| 2752 else if(d< 2*strength) d1= 2*strength - d; | |
| 2753 else d1= 0; | |
| 2967 | 2754 |
| 1644 | 2755 p1 += d1; |
| 2756 p2 -= d1; | |
| 2757 if(p1&256) p1= ~(p1>>31); | |
| 2758 if(p2&256) p2= ~(p2>>31); | |
| 2967 | 2759 |
| 1644 | 2760 src[x-1*stride] = p1; |
| 2761 src[x+0*stride] = p2; | |
| 2762 | |
| 4001 | 2763 ad1= FFABS(d1)>>1; |
| 2967 | 2764 |
| 4594 | 2765 d2= av_clip((p0-p3)/4, -ad1, ad1); |
| 2967 | 2766 |
| 1644 | 2767 src[x-2*stride] = p0 - d2; |
| 2768 src[x+ stride] = p3 + d2; | |
| 2769 } | |
|
5394
e9a6215f4e3a
help some gcc version to optimize out those functions
aurel
parents:
5291
diff
changeset
|
2770 } |
| 1644 | 2771 } |
| 2772 | |
| 2773 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){ | |
|
10749
5cca4b6c459d
Get rid of pointless CONFIG_ANY_H263 preprocessor definition.
diego
parents:
10748
diff
changeset
|
2774 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { |
| 1644 | 2775 int y; |
| 2776 const int strength= ff_h263_loop_filter_strength[qscale]; | |
| 2967 | 2777 |
| 1644 | 2778 for(y=0; y<8; y++){ |
| 2779 int d1, d2, ad1; | |
| 2780 int p0= src[y*stride-2]; | |
| 2781 int p1= src[y*stride-1]; | |
| 2782 int p2= src[y*stride+0]; | |
| 2783 int p3= src[y*stride+1]; | |
| 2784 int d = (p0 - p3 + 4*(p2 - p1)) / 8; | |
| 2785 | |
| 2786 if (d<-2*strength) d1= 0; | |
| 2787 else if(d<- strength) d1=-2*strength - d; | |
| 2788 else if(d< strength) d1= d; | |
| 2789 else if(d< 2*strength) d1= 2*strength - d; | |
| 2790 else d1= 0; | |
| 2967 | 2791 |
| 1644 | 2792 p1 += d1; |
| 2793 p2 -= d1; | |
| 2794 if(p1&256) p1= ~(p1>>31); | |
| 2795 if(p2&256) p2= ~(p2>>31); | |
| 2967 | 2796 |
| 1644 | 2797 src[y*stride-1] = p1; |
| 2798 src[y*stride+0] = p2; | |
| 2799 | |
| 4001 | 2800 ad1= FFABS(d1)>>1; |
| 2967 | 2801 |
| 4594 | 2802 d2= av_clip((p0-p3)/4, -ad1, ad1); |
| 2967 | 2803 |
| 1644 | 2804 src[y*stride-2] = p0 - d2; |
| 2805 src[y*stride+1] = p3 + d2; | |
| 2806 } | |
|
5394
e9a6215f4e3a
help some gcc version to optimize out those functions
aurel
parents:
5291
diff
changeset
|
2807 } |
| 1644 | 2808 } |
| 936 | 2809 |
| 2045 | 2810 static void h261_loop_filter_c(uint8_t *src, int stride){ |
| 2811 int x,y,xy,yz; | |
| 2812 int temp[64]; | |
| 2813 | |
| 2814 for(x=0; x<8; x++){ | |
| 2815 temp[x ] = 4*src[x ]; | |
| 2816 temp[x + 7*8] = 4*src[x + 7*stride]; | |
| 2817 } | |
| 2818 for(y=1; y<7; y++){ | |
| 2819 for(x=0; x<8; x++){ | |
| 2820 xy = y * stride + x; | |
| 2821 yz = y * 8 + x; | |
| 2822 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride]; | |
|
2044
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2823 } |
|
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2824 } |
| 2967 | 2825 |
| 2045 | 2826 for(y=0; y<8; y++){ |
| 2827 src[ y*stride] = (temp[ y*8] + 2)>>2; | |
| 2828 src[7+y*stride] = (temp[7+y*8] + 2)>>2; | |
| 2829 for(x=1; x<7; x++){ | |
| 2830 xy = y * stride + x; | |
| 2831 yz = y * 8 + x; | |
| 2832 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4; | |
|
2044
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2833 } |
|
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2834 } |
|
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2835 } |
|
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2836 |
| 1708 | 2837 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
| 0 | 2838 { |
| 2839 int s, i; | |
| 2840 | |
| 2841 s = 0; | |
| 1708 | 2842 for(i=0;i<h;i++) { |
| 0 | 2843 s += abs(pix1[0] - pix2[0]); |
| 2844 s += abs(pix1[1] - pix2[1]); | |
| 2845 s += abs(pix1[2] - pix2[2]); | |
| 2846 s += abs(pix1[3] - pix2[3]); | |
| 2847 s += abs(pix1[4] - pix2[4]); | |
| 2848 s += abs(pix1[5] - pix2[5]); | |
| 2849 s += abs(pix1[6] - pix2[6]); | |
| 2850 s += abs(pix1[7] - pix2[7]); | |
| 2851 s += abs(pix1[8] - pix2[8]); | |
| 2852 s += abs(pix1[9] - pix2[9]); | |
| 2853 s += abs(pix1[10] - pix2[10]); | |
| 2854 s += abs(pix1[11] - pix2[11]); | |
| 2855 s += abs(pix1[12] - pix2[12]); | |
| 2856 s += abs(pix1[13] - pix2[13]); | |
| 2857 s += abs(pix1[14] - pix2[14]); | |
| 2858 s += abs(pix1[15] - pix2[15]); | |
| 2859 pix1 += line_size; | |
| 2860 pix2 += line_size; | |
| 2861 } | |
| 2862 return s; | |
| 2863 } | |
| 2864 | |
| 1708 | 2865 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
| 0 | 2866 { |
| 2867 int s, i; | |
| 2868 | |
| 2869 s = 0; | |
| 1708 | 2870 for(i=0;i<h;i++) { |
| 0 | 2871 s += abs(pix1[0] - avg2(pix2[0], pix2[1])); |
| 2872 s += abs(pix1[1] - avg2(pix2[1], pix2[2])); | |
| 2873 s += abs(pix1[2] - avg2(pix2[2], pix2[3])); | |
| 2874 s += abs(pix1[3] - avg2(pix2[3], pix2[4])); | |
| 2875 s += abs(pix1[4] - avg2(pix2[4], pix2[5])); | |
| 2876 s += abs(pix1[5] - avg2(pix2[5], pix2[6])); | |
| 2877 s += abs(pix1[6] - avg2(pix2[6], pix2[7])); | |
| 2878 s += abs(pix1[7] - avg2(pix2[7], pix2[8])); | |
| 2879 s += abs(pix1[8] - avg2(pix2[8], pix2[9])); | |
| 2880 s += abs(pix1[9] - avg2(pix2[9], pix2[10])); | |
| 2881 s += abs(pix1[10] - avg2(pix2[10], pix2[11])); | |
| 2882 s += abs(pix1[11] - avg2(pix2[11], pix2[12])); | |
| 2883 s += abs(pix1[12] - avg2(pix2[12], pix2[13])); | |
| 2884 s += abs(pix1[13] - avg2(pix2[13], pix2[14])); | |
| 2885 s += abs(pix1[14] - avg2(pix2[14], pix2[15])); | |
| 2886 s += abs(pix1[15] - avg2(pix2[15], pix2[16])); | |
| 2887 pix1 += line_size; | |
| 2888 pix2 += line_size; | |
| 2889 } | |
| 2890 return s; | |
| 2891 } | |
| 2892 | |
| 1708 | 2893 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
| 0 | 2894 { |
| 2895 int s, i; | |
| 1064 | 2896 uint8_t *pix3 = pix2 + line_size; |
| 0 | 2897 |
| 2898 s = 0; | |
| 1708 | 2899 for(i=0;i<h;i++) { |
| 0 | 2900 s += abs(pix1[0] - avg2(pix2[0], pix3[0])); |
| 2901 s += abs(pix1[1] - avg2(pix2[1], pix3[1])); | |
| 2902 s += abs(pix1[2] - avg2(pix2[2], pix3[2])); | |
| 2903 s += abs(pix1[3] - avg2(pix2[3], pix3[3])); | |
| 2904 s += abs(pix1[4] - avg2(pix2[4], pix3[4])); | |
| 2905 s += abs(pix1[5] - avg2(pix2[5], pix3[5])); | |
| 2906 s += abs(pix1[6] - avg2(pix2[6], pix3[6])); | |
| 2907 s += abs(pix1[7] - avg2(pix2[7], pix3[7])); | |
| 2908 s += abs(pix1[8] - avg2(pix2[8], pix3[8])); | |
| 2909 s += abs(pix1[9] - avg2(pix2[9], pix3[9])); | |
| 2910 s += abs(pix1[10] - avg2(pix2[10], pix3[10])); | |
| 2911 s += abs(pix1[11] - avg2(pix2[11], pix3[11])); | |
| 2912 s += abs(pix1[12] - avg2(pix2[12], pix3[12])); | |
| 2913 s += abs(pix1[13] - avg2(pix2[13], pix3[13])); | |
| 2914 s += abs(pix1[14] - avg2(pix2[14], pix3[14])); | |
| 2915 s += abs(pix1[15] - avg2(pix2[15], pix3[15])); | |
| 2916 pix1 += line_size; | |
| 2917 pix2 += line_size; | |
| 2918 pix3 += line_size; | |
| 2919 } | |
| 2920 return s; | |
| 2921 } | |
| 2922 | |
| 1708 | 2923 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
| 0 | 2924 { |
| 2925 int s, i; | |
| 1064 | 2926 uint8_t *pix3 = pix2 + line_size; |
| 0 | 2927 |
| 2928 s = 0; | |
| 1708 | 2929 for(i=0;i<h;i++) { |
| 0 | 2930 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); |
| 2931 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); | |
| 2932 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); | |
| 2933 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); | |
| 2934 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); | |
| 2935 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); | |
| 2936 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); | |
| 2937 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); | |
| 2938 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9])); | |
| 2939 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10])); | |
| 2940 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11])); | |
| 2941 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12])); | |
| 2942 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13])); | |
| 2943 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14])); | |
| 2944 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15])); | |
| 2945 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16])); | |
| 2946 pix1 += line_size; | |
| 2947 pix2 += line_size; | |
| 2948 pix3 += line_size; | |
| 2949 } | |
| 2950 return s; | |
| 2951 } | |
| 2952 | |
| 1708 | 2953 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
| 294 | 2954 { |
| 2955 int s, i; | |
| 2956 | |
| 2957 s = 0; | |
| 1708 | 2958 for(i=0;i<h;i++) { |
| 294 | 2959 s += abs(pix1[0] - pix2[0]); |
| 2960 s += abs(pix1[1] - pix2[1]); | |
| 2961 s += abs(pix1[2] - pix2[2]); | |
| 2962 s += abs(pix1[3] - pix2[3]); | |
| 2963 s += abs(pix1[4] - pix2[4]); | |
| 2964 s += abs(pix1[5] - pix2[5]); | |
| 2965 s += abs(pix1[6] - pix2[6]); | |
| 2966 s += abs(pix1[7] - pix2[7]); | |
| 2967 pix1 += line_size; | |
| 2968 pix2 += line_size; | |
| 2969 } | |
| 2970 return s; | |
| 2971 } | |
| 2972 | |
| 1708 | 2973 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
| 294 | 2974 { |
| 2975 int s, i; | |
| 2976 | |
| 2977 s = 0; | |
| 1708 | 2978 for(i=0;i<h;i++) { |
| 294 | 2979 s += abs(pix1[0] - avg2(pix2[0], pix2[1])); |
| 2980 s += abs(pix1[1] - avg2(pix2[1], pix2[2])); | |
| 2981 s += abs(pix1[2] - avg2(pix2[2], pix2[3])); | |
| 2982 s += abs(pix1[3] - avg2(pix2[3], pix2[4])); | |
| 2983 s += abs(pix1[4] - avg2(pix2[4], pix2[5])); | |
| 2984 s += abs(pix1[5] - avg2(pix2[5], pix2[6])); | |
| 2985 s += abs(pix1[6] - avg2(pix2[6], pix2[7])); | |
| 2986 s += abs(pix1[7] - avg2(pix2[7], pix2[8])); | |
| 2987 pix1 += line_size; | |
| 2988 pix2 += line_size; | |
| 2989 } | |
| 2990 return s; | |
| 2991 } | |
| 2992 | |
| 1708 | 2993 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
| 294 | 2994 { |
| 2995 int s, i; | |
| 1064 | 2996 uint8_t *pix3 = pix2 + line_size; |
| 294 | 2997 |
| 2998 s = 0; | |
| 1708 | 2999 for(i=0;i<h;i++) { |
| 294 | 3000 s += abs(pix1[0] - avg2(pix2[0], pix3[0])); |
| 3001 s += abs(pix1[1] - avg2(pix2[1], pix3[1])); | |
| 3002 s += abs(pix1[2] - avg2(pix2[2], pix3[2])); | |
| 3003 s += abs(pix1[3] - avg2(pix2[3], pix3[3])); | |
| 3004 s += abs(pix1[4] - avg2(pix2[4], pix3[4])); | |
| 3005 s += abs(pix1[5] - avg2(pix2[5], pix3[5])); | |
| 3006 s += abs(pix1[6] - avg2(pix2[6], pix3[6])); | |
| 3007 s += abs(pix1[7] - avg2(pix2[7], pix3[7])); | |
| 3008 pix1 += line_size; | |
| 3009 pix2 += line_size; | |
| 3010 pix3 += line_size; | |
| 3011 } | |
| 3012 return s; | |
| 3013 } | |
| 3014 | |
| 1708 | 3015 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
| 294 | 3016 { |
| 3017 int s, i; | |
| 1064 | 3018 uint8_t *pix3 = pix2 + line_size; |
| 294 | 3019 |
| 3020 s = 0; | |
| 1708 | 3021 for(i=0;i<h;i++) { |
| 294 | 3022 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); |
| 3023 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); | |
| 3024 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); | |
| 3025 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); | |
| 3026 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); | |
| 3027 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); | |
| 3028 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); | |
| 3029 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); | |
| 3030 pix1 += line_size; | |
| 3031 pix2 += line_size; | |
| 3032 pix3 += line_size; | |
| 3033 } | |
| 3034 return s; | |
| 3035 } | |
| 3036 | |
| 2834 | 3037 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){ |
| 3038 MpegEncContext *c = v; | |
|
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3039 int score1=0; |
|
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3040 int score2=0; |
|
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3041 int x,y; |
| 2066 | 3042 |
|
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3043 for(y=0; y<h; y++){ |
|
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3044 for(x=0; x<16; x++){ |
|
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3045 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]); |
|
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3046 } |
|
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3047 if(y+1<h){ |
|
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3048 for(x=0; x<15; x++){ |
| 4001 | 3049 score2+= FFABS( s1[x ] - s1[x +stride] |
|
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3050 - s1[x+1] + s1[x+1+stride]) |
| 4001 | 3051 -FFABS( s2[x ] - s2[x +stride] |
|
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3052 - s2[x+1] + s2[x+1+stride]); |
|
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3053 } |
|
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3054 } |
|
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3055 s1+= stride; |
|
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3056 s2+= stride; |
|
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3057 } |
| 2066 | 3058 |
| 4001 | 3059 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight; |
| 3060 else return score1 + FFABS(score2)*8; | |
|
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3061 } |
|
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3062 |
| 2834 | 3063 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){ |
| 3064 MpegEncContext *c = v; | |
|
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3065 int score1=0; |
|
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3066 int score2=0; |
|
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3067 int x,y; |
| 2967 | 3068 |
|
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3069 for(y=0; y<h; y++){ |
|
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3070 for(x=0; x<8; x++){ |
|
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3071 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]); |
|
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3072 } |
|
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3073 if(y+1<h){ |
|
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3074 for(x=0; x<7; x++){ |
| 4001 | 3075 score2+= FFABS( s1[x ] - s1[x +stride] |
|
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3076 - s1[x+1] + s1[x+1+stride]) |
| 4001 | 3077 -FFABS( s2[x ] - s2[x +stride] |
|
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3078 - s2[x+1] + s2[x+1+stride]); |
|
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3079 } |
|
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3080 } |
|
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3081 s1+= stride; |
|
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3082 s2+= stride; |
|
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3083 } |
| 2967 | 3084 |
| 4001 | 3085 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight; |
| 3086 else return score1 + FFABS(score2)*8; | |
|
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3087 } |
|
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3088 |
| 1784 | 3089 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){ |
| 3090 int i; | |
| 3091 unsigned int sum=0; | |
| 3092 | |
| 3093 for(i=0; i<8*8; i++){ | |
| 3094 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT)); | |
| 3095 int w= weight[i]; | |
| 3096 b>>= RECON_SHIFT; | |
| 3097 assert(-512<b && b<512); | |
| 3098 | |
| 3099 sum += (w*b)*(w*b)>>4; | |
| 3100 } | |
| 3101 return sum>>2; | |
| 3102 } | |
| 3103 | |
| 3104 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){ | |
| 3105 int i; | |
| 3106 | |
| 3107 for(i=0; i<8*8; i++){ | |
| 3108 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT); | |
| 2967 | 3109 } |
| 1784 | 3110 } |
| 3111 | |
| 1100 | 3112 /** |
| 3113 * permutes an 8x8 block. | |
| 1101 | 3114 * @param block the block which will be permuted according to the given permutation vector |
| 1100 | 3115 * @param permutation the permutation vector |
| 3116 * @param last the last non zero coefficient in scantable order, used to speed the permutation up | |
| 2967 | 3117 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not |
| 1101 | 3118 * (inverse) permutated to scantable order! |
| 1100 | 3119 */ |
| 1064 | 3120 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last) |
|
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
3121 { |
| 764 | 3122 int i; |
| 945 | 3123 DCTELEM temp[64]; |
| 2967 | 3124 |
| 764 | 3125 if(last<=0) return; |
| 5129 | 3126 //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations |
|
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
3127 |
| 764 | 3128 for(i=0; i<=last; i++){ |
| 3129 const int j= scantable[i]; | |
| 3130 temp[j]= block[j]; | |
| 3131 block[j]=0; | |
| 3132 } | |
| 2967 | 3133 |
| 764 | 3134 for(i=0; i<=last; i++){ |
| 3135 const int j= scantable[i]; | |
| 3136 const int perm_j= permutation[j]; | |
| 3137 block[perm_j]= temp[j]; | |
| 3138 } | |
|
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
3139 } |
| 34 | 3140 |
| 1729 | 3141 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){ |
| 3142 return 0; | |
| 3143 } | |
| 3144 | |
| 3145 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){ | |
| 3146 int i; | |
| 2967 | 3147 |
|
8976
e7d87561b42b
Making the arrays accomodate an extra intra 8x8 cmp function
romansh
parents:
8785
diff
changeset
|
3148 memset(cmp, 0, sizeof(void*)*6); |
|
e7d87561b42b
Making the arrays accomodate an extra intra 8x8 cmp function
romansh
parents:
8785
diff
changeset
|
3149 |
|
e7d87561b42b
Making the arrays accomodate an extra intra 8x8 cmp function
romansh
parents:
8785
diff
changeset
|
3150 for(i=0; i<6; i++){ |
| 1729 | 3151 switch(type&0xFF){ |
| 3152 case FF_CMP_SAD: | |
| 3153 cmp[i]= c->sad[i]; | |
| 3154 break; | |
| 3155 case FF_CMP_SATD: | |
| 3156 cmp[i]= c->hadamard8_diff[i]; | |
| 3157 break; | |
| 3158 case FF_CMP_SSE: | |
| 3159 cmp[i]= c->sse[i]; | |
| 3160 break; | |
| 3161 case FF_CMP_DCT: | |
| 3162 cmp[i]= c->dct_sad[i]; | |
| 3163 break; | |
|
3010
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3164 case FF_CMP_DCT264: |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3165 cmp[i]= c->dct264_sad[i]; |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3166 break; |
| 2382 | 3167 case FF_CMP_DCTMAX: |
| 3168 cmp[i]= c->dct_max[i]; | |
| 3169 break; | |
| 1729 | 3170 case FF_CMP_PSNR: |
| 3171 cmp[i]= c->quant_psnr[i]; | |
| 3172 break; | |
| 3173 case FF_CMP_BIT: | |
| 3174 cmp[i]= c->bit[i]; | |
| 3175 break; | |
| 3176 case FF_CMP_RD: | |
| 3177 cmp[i]= c->rd[i]; | |
| 3178 break; | |
| 3179 case FF_CMP_VSAD: | |
| 3180 cmp[i]= c->vsad[i]; | |
| 3181 break; | |
| 3182 case FF_CMP_VSSE: | |
| 3183 cmp[i]= c->vsse[i]; | |
| 3184 break; | |
| 3185 case FF_CMP_ZERO: | |
| 3186 cmp[i]= zero_cmp; | |
| 3187 break; | |
|
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3188 case FF_CMP_NSSE: |
|
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3189 cmp[i]= c->nsse[i]; |
|
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3190 break; |
| 11485 | 3191 #if CONFIG_DWT |
| 2184 | 3192 case FF_CMP_W53: |
| 3193 cmp[i]= c->w53[i]; | |
| 3194 break; | |
| 3195 case FF_CMP_W97: | |
| 3196 cmp[i]= c->w97[i]; | |
| 3197 break; | |
|
3373
b8996cc5ccae
Disable w53 and w97 cmp methods when snow encoder is disabled
gpoirier
parents:
3323
diff
changeset
|
3198 #endif |
| 1729 | 3199 default: |
| 3200 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n"); | |
| 3201 } | |
| 3202 } | |
| 3203 } | |
| 3204 | |
| 8288 | 3205 static void clear_block_c(DCTELEM *block) |
| 3206 { | |
| 3207 memset(block, 0, sizeof(DCTELEM)*64); | |
| 3208 } | |
| 3209 | |
| 1101 | 3210 /** |
| 3211 * memset(blocks, 0, sizeof(DCTELEM)*6*64) | |
| 3212 */ | |
|
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3213 static void clear_blocks_c(DCTELEM *blocks) |
| 296 | 3214 { |
| 3215 memset(blocks, 0, sizeof(DCTELEM)*6*64); | |
| 3216 } | |
| 3217 | |
| 866 | 3218 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){ |
| 6385 | 3219 long i; |
| 3220 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){ | |
| 3221 long a = *(long*)(src+i); | |
| 3222 long b = *(long*)(dst+i); | |
| 3223 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80); | |
| 866 | 3224 } |
| 3225 for(; i<w; i++) | |
| 3226 dst[i+0] += src[i+0]; | |
| 3227 } | |
| 3228 | |
| 6384 | 3229 static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ |
| 6385 | 3230 long i; |
| 6384 | 3231 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){ |
| 3232 long a = *(long*)(src1+i); | |
| 3233 long b = *(long*)(src2+i); | |
| 6385 | 3234 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80); |
| 6384 | 3235 } |
| 3236 for(; i<w; i++) | |
| 3237 dst[i] = src1[i]+src2[i]; | |
| 3238 } | |
| 3239 | |
| 866 | 3240 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ |
| 6385 | 3241 long i; |
| 8590 | 3242 #if !HAVE_FAST_UNALIGNED |
| 6385 | 3243 if((long)src2 & (sizeof(long)-1)){ |
| 6386 | 3244 for(i=0; i+7<w; i+=8){ |
| 3245 dst[i+0] = src1[i+0]-src2[i+0]; | |
| 3246 dst[i+1] = src1[i+1]-src2[i+1]; | |
| 3247 dst[i+2] = src1[i+2]-src2[i+2]; | |
| 3248 dst[i+3] = src1[i+3]-src2[i+3]; | |
| 3249 dst[i+4] = src1[i+4]-src2[i+4]; | |
| 3250 dst[i+5] = src1[i+5]-src2[i+5]; | |
| 3251 dst[i+6] = src1[i+6]-src2[i+6]; | |
| 3252 dst[i+7] = src1[i+7]-src2[i+7]; | |
| 3253 } | |
| 6385 | 3254 }else |
| 3255 #endif | |
| 3256 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){ | |
| 3257 long a = *(long*)(src1+i); | |
| 3258 long b = *(long*)(src2+i); | |
| 3259 *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80); | |
| 3260 } | |
| 866 | 3261 for(; i<w; i++) |
| 3262 dst[i+0] = src1[i+0]-src2[i+0]; | |
| 3263 } | |
| 3264 | |
| 10431 | 3265 static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){ |
| 8760 | 3266 int i; |
| 3267 uint8_t l, lt; | |
| 3268 | |
| 3269 l= *left; | |
| 3270 lt= *left_top; | |
| 3271 | |
| 3272 for(i=0; i<w; i++){ | |
| 3273 l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i]; | |
| 3274 lt= src1[i]; | |
| 3275 dst[i]= l; | |
| 3276 } | |
| 3277 | |
| 3278 *left= l; | |
| 3279 *left_top= lt; | |
| 3280 } | |
| 3281 | |
| 10431 | 3282 static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){ |
| 1527 | 3283 int i; |
| 3284 uint8_t l, lt; | |
| 3285 | |
| 3286 l= *left; | |
| 3287 lt= *left_top; | |
| 3288 | |
| 3289 for(i=0; i<w; i++){ | |
| 3290 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF); | |
| 3291 lt= src1[i]; | |
| 3292 l= src2[i]; | |
| 3293 dst[i]= l - pred; | |
| 2967 | 3294 } |
| 1527 | 3295 |
| 3296 *left= l; | |
| 3297 *left_top= lt; | |
| 3298 } | |
| 3299 | |
|
10420
442ab0c41eae
Huffyuv: Add missing const to src pointers in dsputil functions.
astrange
parents:
10370
diff
changeset
|
3300 static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){ |
| 10370 | 3301 int i; |
| 3302 | |
| 3303 for(i=0; i<w-1; i++){ | |
| 3304 acc+= src[i]; | |
| 3305 dst[i]= acc; | |
| 3306 i++; | |
| 3307 acc+= src[i]; | |
| 3308 dst[i]= acc; | |
| 3309 } | |
| 3310 | |
| 3311 for(; i<w; i++){ | |
| 3312 acc+= src[i]; | |
| 3313 dst[i]= acc; | |
| 3314 } | |
| 3315 | |
| 3316 return acc; | |
| 3317 } | |
| 3318 | |
| 3319 #if HAVE_BIGENDIAN | |
| 3320 #define B 3 | |
| 3321 #define G 2 | |
| 3322 #define R 1 | |
|
10878
a8620b001ed3
Implement alpha channel decoding for BGR HuffYUV.
astrange
parents:
10867
diff
changeset
|
3323 #define A 0 |
| 10370 | 3324 #else |
| 3325 #define B 0 | |
| 3326 #define G 1 | |
| 3327 #define R 2 | |
|
10878
a8620b001ed3
Implement alpha channel decoding for BGR HuffYUV.
astrange
parents:
10867
diff
changeset
|
3328 #define A 3 |
| 10370 | 3329 #endif |
|
10878
a8620b001ed3
Implement alpha channel decoding for BGR HuffYUV.
astrange
parents:
10867
diff
changeset
|
3330 static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){ |
| 10370 | 3331 int i; |
|
10878
a8620b001ed3
Implement alpha channel decoding for BGR HuffYUV.
astrange
parents:
10867
diff
changeset
|
3332 int r,g,b,a; |
| 10370 | 3333 r= *red; |
| 3334 g= *green; | |
| 3335 b= *blue; | |
|
10878
a8620b001ed3
Implement alpha channel decoding for BGR HuffYUV.
astrange
parents:
10867
diff
changeset
|
3336 a= *alpha; |
| 10370 | 3337 |
| 3338 for(i=0; i<w; i++){ | |
| 3339 b+= src[4*i+B]; | |
| 3340 g+= src[4*i+G]; | |
| 3341 r+= src[4*i+R]; | |
|
10878
a8620b001ed3
Implement alpha channel decoding for BGR HuffYUV.
astrange
parents:
10867
diff
changeset
|
3342 a+= src[4*i+A]; |
| 10370 | 3343 |
| 3344 dst[4*i+B]= b; | |
| 3345 dst[4*i+G]= g; | |
| 3346 dst[4*i+R]= r; | |
|
10878
a8620b001ed3
Implement alpha channel decoding for BGR HuffYUV.
astrange
parents:
10867
diff
changeset
|
3347 dst[4*i+A]= a; |
| 10370 | 3348 } |
| 3349 | |
| 3350 *red= r; | |
| 3351 *green= g; | |
| 3352 *blue= b; | |
|
10878
a8620b001ed3
Implement alpha channel decoding for BGR HuffYUV.
astrange
parents:
10867
diff
changeset
|
3353 *alpha= a; |
| 10370 | 3354 } |
| 3355 #undef B | |
| 3356 #undef G | |
| 3357 #undef R | |
|
10878
a8620b001ed3
Implement alpha channel decoding for BGR HuffYUV.
astrange
parents:
10867
diff
changeset
|
3358 #undef A |
| 10370 | 3359 |
| 936 | 3360 #define BUTTERFLY2(o1,o2,i1,i2) \ |
| 3361 o1= (i1)+(i2);\ | |
| 3362 o2= (i1)-(i2); | |
| 3363 | |
| 3364 #define BUTTERFLY1(x,y) \ | |
| 3365 {\ | |
| 3366 int a,b;\ | |
| 3367 a= x;\ | |
| 3368 b= y;\ | |
| 3369 x= a+b;\ | |
| 3370 y= a-b;\ | |
| 3371 } | |
| 3372 | |
| 4001 | 3373 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y))) |
| 936 | 3374 |
| 1708 | 3375 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ |
| 936 | 3376 int i; |
| 3377 int temp[64]; | |
| 3378 int sum=0; | |
| 2967 | 3379 |
| 1708 | 3380 assert(h==8); |
| 936 | 3381 |
| 3382 for(i=0; i<8; i++){ | |
| 3383 //FIXME try pointer walks | |
| 3384 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]); | |
| 3385 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]); | |
| 3386 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]); | |
| 3387 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]); | |
| 2967 | 3388 |
| 936 | 3389 BUTTERFLY1(temp[8*i+0], temp[8*i+2]); |
| 3390 BUTTERFLY1(temp[8*i+1], temp[8*i+3]); | |
| 3391 BUTTERFLY1(temp[8*i+4], temp[8*i+6]); | |
| 3392 BUTTERFLY1(temp[8*i+5], temp[8*i+7]); | |
| 2967 | 3393 |
| 936 | 3394 BUTTERFLY1(temp[8*i+0], temp[8*i+4]); |
| 3395 BUTTERFLY1(temp[8*i+1], temp[8*i+5]); | |
| 3396 BUTTERFLY1(temp[8*i+2], temp[8*i+6]); | |
| 3397 BUTTERFLY1(temp[8*i+3], temp[8*i+7]); | |
| 3398 } | |
| 3399 | |
| 3400 for(i=0; i<8; i++){ | |
| 3401 BUTTERFLY1(temp[8*0+i], temp[8*1+i]); | |
| 3402 BUTTERFLY1(temp[8*2+i], temp[8*3+i]); | |
| 3403 BUTTERFLY1(temp[8*4+i], temp[8*5+i]); | |
| 3404 BUTTERFLY1(temp[8*6+i], temp[8*7+i]); | |
| 2967 | 3405 |
| 936 | 3406 BUTTERFLY1(temp[8*0+i], temp[8*2+i]); |
| 3407 BUTTERFLY1(temp[8*1+i], temp[8*3+i]); | |
| 3408 BUTTERFLY1(temp[8*4+i], temp[8*6+i]); | |
| 3409 BUTTERFLY1(temp[8*5+i], temp[8*7+i]); | |
| 3410 | |
| 2967 | 3411 sum += |
| 936 | 3412 BUTTERFLYA(temp[8*0+i], temp[8*4+i]) |
| 3413 +BUTTERFLYA(temp[8*1+i], temp[8*5+i]) | |
| 3414 +BUTTERFLYA(temp[8*2+i], temp[8*6+i]) | |
| 3415 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); | |
| 3416 } | |
| 3417 #if 0 | |
| 3418 static int maxi=0; | |
| 3419 if(sum>maxi){ | |
| 3420 maxi=sum; | |
| 3421 printf("MAX:%d\n", maxi); | |
| 3422 } | |
| 3423 #endif | |
| 3424 return sum; | |
| 3425 } | |
| 3426 | |
| 1729 | 3427 static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){ |
| 936 | 3428 int i; |
| 3429 int temp[64]; | |
| 3430 int sum=0; | |
| 2967 | 3431 |
| 1729 | 3432 assert(h==8); |
| 2967 | 3433 |
| 936 | 3434 for(i=0; i<8; i++){ |
| 3435 //FIXME try pointer walks | |
| 1729 | 3436 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]); |
| 3437 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]); | |
| 3438 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]); | |
| 3439 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]); | |
| 2967 | 3440 |
| 936 | 3441 BUTTERFLY1(temp[8*i+0], temp[8*i+2]); |
| 3442 BUTTERFLY1(temp[8*i+1], temp[8*i+3]); | |
| 3443 BUTTERFLY1(temp[8*i+4], temp[8*i+6]); | |
| 3444 BUTTERFLY1(temp[8*i+5], temp[8*i+7]); | |
| 2967 | 3445 |
| 936 | 3446 BUTTERFLY1(temp[8*i+0], temp[8*i+4]); |
| 3447 BUTTERFLY1(temp[8*i+1], temp[8*i+5]); | |
| 3448 BUTTERFLY1(temp[8*i+2], temp[8*i+6]); | |
| 3449 BUTTERFLY1(temp[8*i+3], temp[8*i+7]); | |
| 3450 } | |
| 3451 | |
| 3452 for(i=0; i<8; i++){ | |
| 3453 BUTTERFLY1(temp[8*0+i], temp[8*1+i]); | |
| 3454 BUTTERFLY1(temp[8*2+i], temp[8*3+i]); | |
| 3455 BUTTERFLY1(temp[8*4+i], temp[8*5+i]); | |
| 3456 BUTTERFLY1(temp[8*6+i], temp[8*7+i]); | |
| 2967 | 3457 |
| 936 | 3458 BUTTERFLY1(temp[8*0+i], temp[8*2+i]); |
| 3459 BUTTERFLY1(temp[8*1+i], temp[8*3+i]); | |
| 3460 BUTTERFLY1(temp[8*4+i], temp[8*6+i]); | |
| 3461 BUTTERFLY1(temp[8*5+i], temp[8*7+i]); | |
| 2967 | 3462 |
| 3463 sum += | |
| 936 | 3464 BUTTERFLYA(temp[8*0+i], temp[8*4+i]) |
| 3465 +BUTTERFLYA(temp[8*1+i], temp[8*5+i]) | |
| 3466 +BUTTERFLYA(temp[8*2+i], temp[8*6+i]) | |
| 3467 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); | |
| 3468 } | |
| 2967 | 3469 |
| 4001 | 3470 sum -= FFABS(temp[8*0] + temp[8*4]); // -mean |
| 2967 | 3471 |
| 936 | 3472 return sum; |
| 3473 } | |
| 3474 | |
| 1708 | 3475 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
| 936 | 3476 MpegEncContext * const s= (MpegEncContext *)c; |
| 11195 | 3477 LOCAL_ALIGNED_16(DCTELEM, temp, [64]); |
| 2967 | 3478 |
| 1708 | 3479 assert(h==8); |
| 936 | 3480 |
| 3481 s->dsp.diff_pixels(temp, src1, src2, stride); | |
| 1092 | 3482 s->dsp.fdct(temp); |
|
4988
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
3483 return s->dsp.sum_abs_dctelem(temp); |
| 936 | 3484 } |
| 3485 | |
| 8590 | 3486 #if CONFIG_GPL |
|
3010
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3487 #define DCT8_1D {\ |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3488 const int s07 = SRC(0) + SRC(7);\ |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3489 const int s16 = SRC(1) + SRC(6);\ |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3490 const int s25 = SRC(2) + SRC(5);\ |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3491 const int s34 = SRC(3) + SRC(4);\ |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3492 const int a0 = s07 + s34;\ |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3493 const int a1 = s16 + s25;\ |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3494 const int a2 = s07 - s34;\ |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3495 const int a3 = s16 - s25;\ |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3496 const int d07 = SRC(0) - SRC(7);\ |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3497 const int d16 = SRC(1) - SRC(6);\ |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3498 const int d25 = SRC(2) - SRC(5);\ |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3499 const int d34 = SRC(3) - SRC(4);\ |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3500 const int a4 = d16 + d25 + (d07 + (d07>>1));\ |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3501 const int a5 = d07 - d34 - (d25 + (d25>>1));\ |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3502 const int a6 = d07 + d34 - (d16 + (d16>>1));\ |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3503 const int a7 = d16 - d25 + (d34 + (d34>>1));\ |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3504 DST(0, a0 + a1 ) ;\ |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3505 DST(1, a4 + (a7>>2)) ;\ |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3506 DST(2, a2 + (a3>>1)) ;\ |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3507 DST(3, a5 + (a6>>2)) ;\ |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3508 DST(4, a0 - a1 ) ;\ |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3509 DST(5, a6 - (a5>>2)) ;\ |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3510 DST(6, (a2>>1) - a3 ) ;\ |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3511 DST(7, (a4>>2) - a7 ) ;\ |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3512 } |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3513 |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3514 static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3515 MpegEncContext * const s= (MpegEncContext *)c; |
| 5256 | 3516 DCTELEM dct[8][8]; |
|
3010
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3517 int i; |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3518 int sum=0; |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3519 |
| 5256 | 3520 s->dsp.diff_pixels(dct[0], src1, src2, stride); |
|
3010
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3521 |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3522 #define SRC(x) dct[i][x] |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3523 #define DST(x,v) dct[i][x]= v |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3524 for( i = 0; i < 8; i++ ) |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3525 DCT8_1D |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3526 #undef SRC |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3527 #undef DST |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3528 |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3529 #define SRC(x) dct[x][i] |
| 4001 | 3530 #define DST(x,v) sum += FFABS(v) |
|
3010
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3531 for( i = 0; i < 8; i++ ) |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3532 DCT8_1D |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3533 #undef SRC |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3534 #undef DST |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3535 return sum; |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3536 } |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3537 #endif |
|
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3538 |
| 2382 | 3539 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
| 3540 MpegEncContext * const s= (MpegEncContext *)c; | |
| 11195 | 3541 LOCAL_ALIGNED_16(DCTELEM, temp, [64]); |
| 2382 | 3542 int sum=0, i; |
| 2967 | 3543 |
| 2382 | 3544 assert(h==8); |
| 3545 | |
| 3546 s->dsp.diff_pixels(temp, src1, src2, stride); | |
| 3547 s->dsp.fdct(temp); | |
| 3548 | |
| 3549 for(i=0; i<64; i++) | |
| 4001 | 3550 sum= FFMAX(sum, FFABS(temp[i])); |
| 2967 | 3551 |
| 2382 | 3552 return sum; |
| 3553 } | |
| 3554 | |
| 1708 | 3555 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
| 936 | 3556 MpegEncContext * const s= (MpegEncContext *)c; |
| 11195 | 3557 LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]); |
| 11193 | 3558 DCTELEM * const bak = temp+64; |
| 936 | 3559 int sum=0, i; |
| 3560 | |
| 1708 | 3561 assert(h==8); |
| 936 | 3562 s->mb_intra=0; |
| 2967 | 3563 |
| 936 | 3564 s->dsp.diff_pixels(temp, src1, src2, stride); |
| 2967 | 3565 |
| 936 | 3566 memcpy(bak, temp, 64*sizeof(DCTELEM)); |
| 2967 | 3567 |
| 1013 | 3568 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); |
| 1689 | 3569 s->dct_unquantize_inter(s, temp, 0, s->qscale); |
| 6001 | 3570 ff_simple_idct(temp); //FIXME |
| 2967 | 3571 |
| 936 | 3572 for(i=0; i<64; i++) |
| 3573 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]); | |
| 2967 | 3574 |
| 936 | 3575 return sum; |
| 3576 } | |
| 3577 | |
| 1708 | 3578 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
| 1007 | 3579 MpegEncContext * const s= (MpegEncContext *)c; |
| 1064 | 3580 const uint8_t *scantable= s->intra_scantable.permutated; |
| 11195 | 3581 LOCAL_ALIGNED_16(DCTELEM, temp, [64]); |
| 3582 LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]); | |
| 3583 LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]); | |
| 6719 | 3584 int i, last, run, bits, level, distortion, start_i; |
| 1007 | 3585 const int esc_length= s->ac_esc_length; |
| 3586 uint8_t * length; | |
| 3587 uint8_t * last_length; | |
| 2967 | 3588 |
| 1708 | 3589 assert(h==8); |
| 3590 | |
| 10068 | 3591 copy_block8(lsrc1, src1, 8, stride, 8); |
| 3592 copy_block8(lsrc2, src2, 8, stride, 8); | |
| 3593 | |
| 3594 s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8); | |
| 1007 | 3595 |
| 1013 | 3596 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); |
| 3597 | |
| 3598 bits=0; | |
| 2967 | 3599 |
| 1013 | 3600 if (s->mb_intra) { |
| 2967 | 3601 start_i = 1; |
| 1013 | 3602 length = s->intra_ac_vlc_length; |
| 3603 last_length= s->intra_ac_vlc_last_length; | |
| 3604 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma | |
| 3605 } else { | |
| 3606 start_i = 0; | |
| 3607 length = s->inter_ac_vlc_length; | |
| 3608 last_length= s->inter_ac_vlc_last_length; | |
| 3609 } | |
| 2967 | 3610 |
| 1013 | 3611 if(last>=start_i){ |
| 1007 | 3612 run=0; |
| 3613 for(i=start_i; i<last; i++){ | |
| 3614 int j= scantable[i]; | |
| 3615 level= temp[j]; | |
| 2967 | 3616 |
| 1007 | 3617 if(level){ |
| 3618 level+=64; | |
| 3619 if((level&(~127)) == 0){ | |
| 3620 bits+= length[UNI_AC_ENC_INDEX(run, level)]; | |
| 3621 }else | |
| 3622 bits+= esc_length; | |
| 3623 run=0; | |
| 3624 }else | |
| 3625 run++; | |
| 3626 } | |
| 3627 i= scantable[last]; | |
| 2967 | 3628 |
| 1011 | 3629 level= temp[i] + 64; |
| 3630 | |
| 3631 assert(level - 64); | |
| 2967 | 3632 |
| 1007 | 3633 if((level&(~127)) == 0){ |
| 3634 bits+= last_length[UNI_AC_ENC_INDEX(run, level)]; | |
| 3635 }else | |
| 3636 bits+= esc_length; | |
| 2967 | 3637 |
| 1013 | 3638 } |
| 3639 | |
| 3640 if(last>=0){ | |
| 1689 | 3641 if(s->mb_intra) |
| 3642 s->dct_unquantize_intra(s, temp, 0, s->qscale); | |
| 3643 else | |
| 3644 s->dct_unquantize_inter(s, temp, 0, s->qscale); | |
| 1007 | 3645 } |
| 2967 | 3646 |
| 10068 | 3647 s->dsp.idct_add(lsrc2, 8, temp); |
| 3648 | |
| 3649 distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8); | |
| 6719 | 3650 |
| 3651 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7); | |
| 1007 | 3652 } |
| 3653 | |
| 1708 | 3654 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
| 1007 | 3655 MpegEncContext * const s= (MpegEncContext *)c; |
| 1064 | 3656 const uint8_t *scantable= s->intra_scantable.permutated; |
| 11195 | 3657 LOCAL_ALIGNED_16(DCTELEM, temp, [64]); |
| 1007 | 3658 int i, last, run, bits, level, start_i; |
| 3659 const int esc_length= s->ac_esc_length; | |
| 3660 uint8_t * length; | |
| 3661 uint8_t * last_length; | |
| 1708 | 3662 |
| 3663 assert(h==8); | |
| 2967 | 3664 |
| 1013 | 3665 s->dsp.diff_pixels(temp, src1, src2, stride); |
| 1007 | 3666 |
| 1013 | 3667 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); |
| 3668 | |
| 3669 bits=0; | |
| 2967 | 3670 |
| 1007 | 3671 if (s->mb_intra) { |
| 2967 | 3672 start_i = 1; |
| 1007 | 3673 length = s->intra_ac_vlc_length; |
| 3674 last_length= s->intra_ac_vlc_last_length; | |
| 1013 | 3675 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma |
| 1007 | 3676 } else { |
| 3677 start_i = 0; | |
| 3678 length = s->inter_ac_vlc_length; | |
| 3679 last_length= s->inter_ac_vlc_last_length; | |
| 3680 } | |
| 2967 | 3681 |
| 1013 | 3682 if(last>=start_i){ |
| 1007 | 3683 run=0; |
| 3684 for(i=start_i; i<last; i++){ | |
| 3685 int j= scantable[i]; | |
| 3686 level= temp[j]; | |
| 2967 | 3687 |
| 1007 | 3688 if(level){ |
| 3689 level+=64; | |
| 3690 if((level&(~127)) == 0){ | |
| 3691 bits+= length[UNI_AC_ENC_INDEX(run, level)]; | |
| 3692 }else | |
| 3693 bits+= esc_length; | |
| 3694 run=0; | |
| 3695 }else | |
| 3696 run++; | |
| 3697 } | |
| 3698 i= scantable[last]; | |
| 2967 | 3699 |
| 1013 | 3700 level= temp[i] + 64; |
| 2967 | 3701 |
| 1013 | 3702 assert(level - 64); |
| 2967 | 3703 |
| 1007 | 3704 if((level&(~127)) == 0){ |
| 3705 bits+= last_length[UNI_AC_ENC_INDEX(run, level)]; | |
| 3706 }else | |
| 3707 bits+= esc_length; | |
| 3708 } | |
| 3709 | |
| 3710 return bits; | |
| 3711 } | |
| 3712 | |
| 8978 | 3713 #define VSAD_INTRA(size) \ |
| 3714 static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \ | |
| 3715 int score=0; \ | |
| 3716 int x,y; \ | |
| 3717 \ | |
| 3718 for(y=1; y<h; y++){ \ | |
| 3719 for(x=0; x<size; x+=4){ \ | |
| 3720 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \ | |
| 3721 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \ | |
| 3722 } \ | |
| 3723 s+= stride; \ | |
| 3724 } \ | |
| 3725 \ | |
| 3726 return score; \ | |
| 1729 | 3727 } |
| 8978 | 3728 VSAD_INTRA(8) |
| 3729 VSAD_INTRA(16) | |
| 1729 | 3730 |
| 3731 static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){ | |
| 3732 int score=0; | |
| 3733 int x,y; | |
| 2967 | 3734 |
| 1729 | 3735 for(y=1; y<h; y++){ |
| 3736 for(x=0; x<16; x++){ | |
| 4001 | 3737 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]); |
| 1729 | 3738 } |
| 3739 s1+= stride; | |
| 3740 s2+= stride; | |
| 3741 } | |
| 2967 | 3742 |
| 1729 | 3743 return score; |
| 3744 } | |
| 3745 | |
| 3746 #define SQ(a) ((a)*(a)) | |
| 8978 | 3747 #define VSSE_INTRA(size) \ |
| 3748 static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \ | |
| 3749 int score=0; \ | |
| 3750 int x,y; \ | |
| 3751 \ | |
| 3752 for(y=1; y<h; y++){ \ | |
| 3753 for(x=0; x<size; x+=4){ \ | |
| 3754 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \ | |
| 3755 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \ | |
| 3756 } \ | |
| 3757 s+= stride; \ | |
| 3758 } \ | |
| 3759 \ | |
| 3760 return score; \ | |
| 1729 | 3761 } |
| 8978 | 3762 VSSE_INTRA(8) |
| 3763 VSSE_INTRA(16) | |
| 1729 | 3764 |
| 3765 static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){ | |
| 3766 int score=0; | |
| 3767 int x,y; | |
| 2967 | 3768 |
| 1729 | 3769 for(y=1; y<h; y++){ |
| 3770 for(x=0; x<16; x++){ | |
| 3771 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]); | |
| 3772 } | |
| 3773 s1+= stride; | |
| 3774 s2+= stride; | |
| 3775 } | |
| 2967 | 3776 |
| 1729 | 3777 return score; |
| 3778 } | |
| 3779 | |
| 5255 | 3780 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2, |
| 3781 int size){ | |
| 4749 | 3782 int score=0; |
| 3783 int i; | |
| 3784 for(i=0; i<size; i++) | |
| 3785 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]); | |
| 3786 return score; | |
| 3787 } | |
| 3788 | |
|
6056
558c1fd0ee72
Fix typo in macro name: WARPER8_16_SQ --> WRAPPER8_16_SQ.
diego
parents:
6054
diff
changeset
|
3789 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c) |
|
558c1fd0ee72
Fix typo in macro name: WARPER8_16_SQ --> WRAPPER8_16_SQ.
diego
parents:
6054
diff
changeset
|
3790 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c) |
|
558c1fd0ee72
Fix typo in macro name: WARPER8_16_SQ --> WRAPPER8_16_SQ.
diego
parents:
6054
diff
changeset
|
3791 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c) |
| 8590 | 3792 #if CONFIG_GPL |
|
6056
558c1fd0ee72
Fix typo in macro name: WARPER8_16_SQ --> WRAPPER8_16_SQ.
diego
parents:
6054
diff
changeset
|
3793 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c) |
| 3013 | 3794 #endif |
|
6056
558c1fd0ee72
Fix typo in macro name: WARPER8_16_SQ --> WRAPPER8_16_SQ.
diego
parents:
6054
diff
changeset
|
3795 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c) |
|
558c1fd0ee72
Fix typo in macro name: WARPER8_16_SQ --> WRAPPER8_16_SQ.
diego
parents:
6054
diff
changeset
|
3796 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) |
|
558c1fd0ee72
Fix typo in macro name: WARPER8_16_SQ --> WRAPPER8_16_SQ.
diego
parents:
6054
diff
changeset
|
3797 WRAPPER8_16_SQ(rd8x8_c, rd16_c) |
|
558c1fd0ee72
Fix typo in macro name: WARPER8_16_SQ --> WRAPPER8_16_SQ.
diego
parents:
6054
diff
changeset
|
3798 WRAPPER8_16_SQ(bit8x8_c, bit16_c) |
| 936 | 3799 |
|
3568
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3800 static void vector_fmul_c(float *dst, const float *src, int len){ |
|
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3801 int i; |
|
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3802 for(i=0; i<len; i++) |
|
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3803 dst[i] *= src[i]; |
|
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3804 } |
|
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3805 |
|
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3806 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){ |
|
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3807 int i; |
|
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3808 src1 += len-1; |
|
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3809 for(i=0; i<len; i++) |
|
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3810 dst[i] = src0[i] * src1[-i]; |
|
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3811 } |
|
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3812 |
|
10300
4d1b9ca628fc
Drop unused args from vector_fmul_add_add, simpify code, and rename
mru
parents:
10219
diff
changeset
|
3813 static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){ |
|
3568
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3814 int i; |
|
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3815 for(i=0; i<len; i++) |
|
10300
4d1b9ca628fc
Drop unused args from vector_fmul_add_add, simpify code, and rename
mru
parents:
10219
diff
changeset
|
3816 dst[i] = src0[i] * src1[i] + src2[i]; |
|
3568
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3817 } |
|
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3818 |
| 7261 | 3819 void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len){ |
| 7263 | 3820 int i,j; |
| 3821 dst += len; | |
| 3822 win += len; | |
| 3823 src0+= len; | |
| 3824 for(i=-len, j=len-1; i<0; i++, j--) { | |
| 3825 float s0 = src0[i]; | |
| 3826 float s1 = src1[j]; | |
| 3827 float wi = win[i]; | |
| 3828 float wj = win[j]; | |
| 3829 dst[i] = s0*wj - s1*wi + add_bias; | |
| 3830 dst[j] = s0*wi + s1*wj + add_bias; | |
| 3831 } | |
| 7261 | 3832 } |
| 3833 | |
| 10219 | 3834 static void vector_fmul_scalar_c(float *dst, const float *src, float mul, |
| 3835 int len) | |
| 3836 { | |
| 3837 int i; | |
| 3838 for (i = 0; i < len; i++) | |
| 3839 dst[i] = src[i] * mul; | |
| 3840 } | |
| 3841 | |
| 3842 static void vector_fmul_sv_scalar_2_c(float *dst, const float *src, | |
| 3843 const float **sv, float mul, int len) | |
| 3844 { | |
| 3845 int i; | |
| 3846 for (i = 0; i < len; i += 2, sv++) { | |
| 3847 dst[i ] = src[i ] * sv[0][0] * mul; | |
| 3848 dst[i+1] = src[i+1] * sv[0][1] * mul; | |
| 3849 } | |
| 3850 } | |
| 3851 | |
| 3852 static void vector_fmul_sv_scalar_4_c(float *dst, const float *src, | |
| 3853 const float **sv, float mul, int len) | |
| 3854 { | |
| 3855 int i; | |
| 3856 for (i = 0; i < len; i += 4, sv++) { | |
| 3857 dst[i ] = src[i ] * sv[0][0] * mul; | |
| 3858 dst[i+1] = src[i+1] * sv[0][1] * mul; | |
| 3859 dst[i+2] = src[i+2] * sv[0][2] * mul; | |
| 3860 dst[i+3] = src[i+3] * sv[0][3] * mul; | |
| 3861 } | |
| 3862 } | |
| 3863 | |
| 3864 static void sv_fmul_scalar_2_c(float *dst, const float **sv, float mul, | |
| 3865 int len) | |
| 3866 { | |
| 3867 int i; | |
| 3868 for (i = 0; i < len; i += 2, sv++) { | |
| 3869 dst[i ] = sv[0][0] * mul; | |
| 3870 dst[i+1] = sv[0][1] * mul; | |
| 3871 } | |
| 3872 } | |
| 3873 | |
| 3874 static void sv_fmul_scalar_4_c(float *dst, const float **sv, float mul, | |
| 3875 int len) | |
| 3876 { | |
| 3877 int i; | |
| 3878 for (i = 0; i < len; i += 4, sv++) { | |
| 3879 dst[i ] = sv[0][0] * mul; | |
| 3880 dst[i+1] = sv[0][1] * mul; | |
| 3881 dst[i+2] = sv[0][2] * mul; | |
| 3882 dst[i+3] = sv[0][3] * mul; | |
| 3883 } | |
| 3884 } | |
| 3885 | |
| 3886 static void butterflies_float_c(float *restrict v1, float *restrict v2, | |
| 3887 int len) | |
| 3888 { | |
| 3889 int i; | |
| 3890 for (i = 0; i < len; i++) { | |
| 3891 float t = v1[i] - v2[i]; | |
| 3892 v1[i] += v2[i]; | |
| 3893 v2[i] = t; | |
| 3894 } | |
| 3895 } | |
| 3896 | |
| 3897 static float scalarproduct_float_c(const float *v1, const float *v2, int len) | |
| 3898 { | |
| 3899 float p = 0.0; | |
| 3900 int i; | |
| 3901 | |
| 3902 for (i = 0; i < len; i++) | |
| 3903 p += v1[i] * v2[i]; | |
| 3904 | |
| 3905 return p; | |
| 3906 } | |
| 3907 | |
| 7564 | 3908 static void int32_to_float_fmul_scalar_c(float *dst, const int *src, float mul, int len){ |
| 3909 int i; | |
| 3910 for(i=0; i<len; i++) | |
| 3911 dst[i] = src[i] * mul; | |
| 3912 } | |
| 3913 | |
|
10104
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3914 static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini, |
|
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3915 uint32_t maxi, uint32_t maxisign) |
|
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3916 { |
|
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3917 |
|
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3918 if(a > mini) return mini; |
|
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3919 else if((a^(1<<31)) > maxisign) return maxi; |
|
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3920 else return a; |
|
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3921 } |
|
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3922 |
| 10105 | 3923 static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){ |
|
10104
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3924 int i; |
|
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3925 uint32_t mini = *(uint32_t*)min; |
|
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3926 uint32_t maxi = *(uint32_t*)max; |
|
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3927 uint32_t maxisign = maxi ^ (1<<31); |
|
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3928 uint32_t *dsti = (uint32_t*)dst; |
| 10105 | 3929 const uint32_t *srci = (const uint32_t*)src; |
|
10104
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3930 for(i=0; i<len; i+=8) { |
|
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3931 dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign); |
|
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3932 dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign); |
|
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3933 dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign); |
|
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3934 dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign); |
|
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3935 dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign); |
|
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3936 dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign); |
|
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3937 dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign); |
|
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3938 dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign); |
|
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3939 } |
|
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3940 } |
| 10105 | 3941 static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){ |
|
10104
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3942 int i; |
|
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3943 if(min < 0 && max > 0) { |
|
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3944 vector_clipf_c_opposite_sign(dst, src, &min, &max, len); |
|
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3945 } else { |
|
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3946 for(i=0; i < len; i+=8) { |
|
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3947 dst[i ] = av_clipf(src[i ], min, max); |
|
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3948 dst[i + 1] = av_clipf(src[i + 1], min, max); |
|
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3949 dst[i + 2] = av_clipf(src[i + 2], min, max); |
|
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3950 dst[i + 3] = av_clipf(src[i + 3], min, max); |
|
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3951 dst[i + 4] = av_clipf(src[i + 4], min, max); |
|
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3952 dst[i + 5] = av_clipf(src[i + 5], min, max); |
|
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3953 dst[i + 6] = av_clipf(src[i + 6], min, max); |
|
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3954 dst[i + 7] = av_clipf(src[i + 7], min, max); |
|
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3955 } |
|
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3956 } |
|
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3957 } |
|
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3958 |
| 7261 | 3959 static av_always_inline int float_to_int16_one(const float *src){ |
| 3960 int_fast32_t tmp = *(const int32_t*)src; | |
| 3961 if(tmp & 0xf0000){ | |
| 3962 tmp = (0x43c0ffff - tmp)>>31; | |
| 3963 // is this faster on some gcc/cpu combinations? | |
| 3964 // if(tmp > 0x43c0ffff) tmp = 0xFFFF; | |
| 3965 // else tmp = 0; | |
| 3966 } | |
| 3967 return tmp - 0x8000; | |
| 3968 } | |
| 3969 | |
| 7218 | 3970 void ff_float_to_int16_c(int16_t *dst, const float *src, long len){ |
|
3568
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3971 int i; |
| 7261 | 3972 for(i=0; i<len; i++) |
| 3973 dst[i] = float_to_int16_one(src+i); | |
| 3974 } | |
| 3975 | |
|
7286
e267f2519248
float_to_int16_interleave: change src to an array of pointers instead of assuming it's contiguous.
lorenm
parents:
7263
diff
changeset
|
3976 void ff_float_to_int16_interleave_c(int16_t *dst, const float **src, long len, int channels){ |
| 7261 | 3977 int i,j,c; |
| 3978 if(channels==2){ | |
| 3979 for(i=0; i<len; i++){ | |
|
7286
e267f2519248
float_to_int16_interleave: change src to an array of pointers instead of assuming it's contiguous.
lorenm
parents:
7263
diff
changeset
|
3980 dst[2*i] = float_to_int16_one(src[0]+i); |
|
e267f2519248
float_to_int16_interleave: change src to an array of pointers instead of assuming it's contiguous.
lorenm
parents:
7263
diff
changeset
|
3981 dst[2*i+1] = float_to_int16_one(src[1]+i); |
|
3568
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3982 } |
| 7261 | 3983 }else{ |
|
7286
e267f2519248
float_to_int16_interleave: change src to an array of pointers instead of assuming it's contiguous.
lorenm
parents:
7263
diff
changeset
|
3984 for(c=0; c<channels; c++) |
| 7261 | 3985 for(i=0, j=c; i<len; i++, j+=channels) |
|
7286
e267f2519248
float_to_int16_interleave: change src to an array of pointers instead of assuming it's contiguous.
lorenm
parents:
7263
diff
changeset
|
3986 dst[j] = float_to_int16_one(src[c]+i); |
|
3568
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3987 } |
|
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3988 } |
|
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3989 |
|
7203
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
3990 static int32_t scalarproduct_int16_c(int16_t * v1, int16_t * v2, int order, int shift) |
|
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
3991 { |
|
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
3992 int res = 0; |
|
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
3993 |
|
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
3994 while (order--) |
|
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
3995 res += (*v1++ * *v2++) >> shift; |
|
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
3996 |
|
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
3997 return res; |
|
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
3998 } |
|
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
3999 |
| 10644 | 4000 static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, int16_t *v2, int16_t *v3, int order, int mul) |
| 4001 { | |
| 4002 int res = 0; | |
| 4003 while (order--) { | |
| 4004 res += *v1 * *v2++; | |
| 4005 *v1++ += mul * *v3++; | |
| 4006 } | |
| 4007 return res; | |
| 4008 } | |
| 4009 | |
| 5887 | 4010 #define W0 2048 |
| 4011 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */ | |
| 4012 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */ | |
| 4013 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */ | |
| 4014 #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */ | |
| 4015 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */ | |
| 4016 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */ | |
| 4017 #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */ | |
| 4018 | |
| 4019 static void wmv2_idct_row(short * b) | |
| 4020 { | |
| 4021 int s1,s2; | |
| 4022 int a0,a1,a2,a3,a4,a5,a6,a7; | |
| 4023 /*step 1*/ | |
| 4024 a1 = W1*b[1]+W7*b[7]; | |
| 4025 a7 = W7*b[1]-W1*b[7]; | |
| 4026 a5 = W5*b[5]+W3*b[3]; | |
| 4027 a3 = W3*b[5]-W5*b[3]; | |
| 4028 a2 = W2*b[2]+W6*b[6]; | |
| 4029 a6 = W6*b[2]-W2*b[6]; | |
| 4030 a0 = W0*b[0]+W0*b[4]; | |
| 4031 a4 = W0*b[0]-W0*b[4]; | |
| 4032 /*step 2*/ | |
| 4033 s1 = (181*(a1-a5+a7-a3)+128)>>8;//1,3,5,7, | |
| 4034 s2 = (181*(a1-a5-a7+a3)+128)>>8; | |
| 4035 /*step 3*/ | |
| 4036 b[0] = (a0+a2+a1+a5 + (1<<7))>>8; | |
| 4037 b[1] = (a4+a6 +s1 + (1<<7))>>8; | |
| 4038 b[2] = (a4-a6 +s2 + (1<<7))>>8; | |
| 4039 b[3] = (a0-a2+a7+a3 + (1<<7))>>8; | |
| 4040 b[4] = (a0-a2-a7-a3 + (1<<7))>>8; | |
| 4041 b[5] = (a4-a6 -s2 + (1<<7))>>8; | |
| 4042 b[6] = (a4+a6 -s1 + (1<<7))>>8; | |
| 4043 b[7] = (a0+a2-a1-a5 + (1<<7))>>8; | |
| 4044 } | |
| 4045 static void wmv2_idct_col(short * b) | |
| 4046 { | |
| 4047 int s1,s2; | |
| 4048 int a0,a1,a2,a3,a4,a5,a6,a7; | |
| 4049 /*step 1, with extended precision*/ | |
| 4050 a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3; | |
| 4051 a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3; | |
| 4052 a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3; | |
| 4053 a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3; | |
| 4054 a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3; | |
| 4055 a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3; | |
| 4056 a0 = (W0*b[8*0]+W0*b[8*4] )>>3; | |
| 4057 a4 = (W0*b[8*0]-W0*b[8*4] )>>3; | |
| 4058 /*step 2*/ | |
| 4059 s1 = (181*(a1-a5+a7-a3)+128)>>8; | |
| 4060 s2 = (181*(a1-a5-a7+a3)+128)>>8; | |
| 4061 /*step 3*/ | |
| 4062 b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14; | |
| 4063 b[8*1] = (a4+a6 +s1 + (1<<13))>>14; | |
| 4064 b[8*2] = (a4-a6 +s2 + (1<<13))>>14; | |
| 4065 b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14; | |
| 4066 | |
| 4067 b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14; | |
| 4068 b[8*5] = (a4-a6 -s2 + (1<<13))>>14; | |
| 4069 b[8*6] = (a4+a6 -s1 + (1<<13))>>14; | |
| 4070 b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14; | |
| 4071 } | |
| 4072 void ff_wmv2_idct_c(short * block){ | |
| 4073 int i; | |
| 4074 | |
| 4075 for(i=0;i<64;i+=8){ | |
| 4076 wmv2_idct_row(block+i); | |
| 4077 } | |
| 4078 for(i=0;i<8;i++){ | |
| 4079 wmv2_idct_col(block+i); | |
| 4080 } | |
| 4081 } | |
| 1092 | 4082 /* XXX: those functions should be suppressed ASAP when all IDCTs are |
| 4083 converted */ | |
| 5887 | 4084 static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block) |
| 4085 { | |
| 4086 ff_wmv2_idct_c(block); | |
| 4087 put_pixels_clamped_c(block, dest, line_size); | |
| 4088 } | |
| 4089 static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block) | |
| 4090 { | |
| 4091 ff_wmv2_idct_c(block); | |
| 4092 add_pixels_clamped_c(block, dest, line_size); | |
| 4093 } | |
| 1092 | 4094 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block) |
| 4095 { | |
| 4096 j_rev_dct (block); | |
| 4097 put_pixels_clamped_c(block, dest, line_size); | |
| 4098 } | |
| 4099 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block) | |
| 4100 { | |
| 4101 j_rev_dct (block); | |
| 4102 add_pixels_clamped_c(block, dest, line_size); | |
| 4103 } | |
| 4104 | |
| 2256 | 4105 static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block) |
| 4106 { | |
| 4107 j_rev_dct4 (block); | |
| 4108 put_pixels_clamped4_c(block, dest, line_size); | |
| 4109 } | |
| 4110 static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block) | |
| 4111 { | |
| 4112 j_rev_dct4 (block); | |
| 4113 add_pixels_clamped4_c(block, dest, line_size); | |
| 4114 } | |
| 4115 | |
| 2257 | 4116 static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block) |
| 4117 { | |
| 4118 j_rev_dct2 (block); | |
| 4119 put_pixels_clamped2_c(block, dest, line_size); | |
| 4120 } | |
| 4121 static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block) | |
| 4122 { | |
| 4123 j_rev_dct2 (block); | |
| 4124 add_pixels_clamped2_c(block, dest, line_size); | |
| 4125 } | |
| 4126 | |
| 2259 | 4127 static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block) |
| 4128 { | |
| 4176 | 4129 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
| 2259 | 4130 |
| 4131 dest[0] = cm[(block[0] + 4)>>3]; | |
| 4132 } | |
| 4133 static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block) | |
| 4134 { | |
| 4176 | 4135 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
| 2259 | 4136 |
| 4137 dest[0] = cm[dest[0] + ((block[0] + 4)>>3)]; | |
| 4138 } | |
| 4139 | |
| 5143 | 4140 static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; } |
|
3215
06f98047ff26
prefetch pixels for future motion compensation. 2-5% faster h264.
lorenm
parents:
3199
diff
changeset
|
4141 |
| 1201 | 4142 /* init static data */ |
| 10867 | 4143 av_cold void dsputil_static_init(void) |
| 0 | 4144 { |
| 751 | 4145 int i; |
| 0 | 4146 |
| 4176 | 4147 for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i; |
| 1201 | 4148 for(i=0;i<MAX_NEG_CROP;i++) { |
| 4176 | 4149 ff_cropTbl[i] = 0; |
| 4150 ff_cropTbl[i + MAX_NEG_CROP + 256] = 255; | |
| 1201 | 4151 } |
| 2967 | 4152 |
| 1201 | 4153 for(i=0;i<512;i++) { |
| 4179 | 4154 ff_squareTbl[i] = (i - 256) * (i - 256); |
| 1201 | 4155 } |
| 2967 | 4156 |
| 4197 | 4157 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1; |
| 1201 | 4158 } |
| 0 | 4159 |
|
4281
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4160 int ff_check_alignment(void){ |
|
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4161 static int did_fail=0; |
| 11369 | 4162 DECLARE_ALIGNED(16, int, aligned); |
|
4281
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4163 |
| 9259 | 4164 if((intptr_t)&aligned & 15){ |
|
4281
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4165 if(!did_fail){ |
| 8590 | 4166 #if HAVE_MMX || HAVE_ALTIVEC |
|
4281
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4167 av_log(NULL, AV_LOG_ERROR, |
| 4292 | 4168 "Compiler did not align stack variables. Libavcodec has been miscompiled\n" |
| 4169 "and may be very slow or crash. This is not a bug in libavcodec,\n" | |
|
5542
b0a566346fb1
Add attribute that forces alignment of stack to functions that need it.
ramiro
parents:
5520
diff
changeset
|
4170 "but in the compiler. You may try recompiling using gcc >= 4.2.\n" |
|
b0a566346fb1
Add attribute that forces alignment of stack to functions that need it.
ramiro
parents:
5520
diff
changeset
|
4171 "Do not report crashes to FFmpeg developers.\n"); |
|
4281
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4172 #endif |
|
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4173 did_fail=1; |
|
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4174 } |
|
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4175 return -1; |
|
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4176 } |
|
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4177 return 0; |
|
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4178 } |
| 861 | 4179 |
| 10867 | 4180 av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx) |
| 1201 | 4181 { |
| 4182 int i; | |
| 0 | 4183 |
|
4281
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4184 ff_check_alignment(); |
|
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4185 |
| 8590 | 4186 #if CONFIG_ENCODERS |
| 1567 | 4187 if(avctx->dct_algo==FF_DCT_FASTINT) { |
| 1092 | 4188 c->fdct = fdct_ifast; |
| 2979 | 4189 c->fdct248 = fdct_ifast248; |
| 2967 | 4190 } |
| 1567 | 4191 else if(avctx->dct_algo==FF_DCT_FAAN) { |
| 1557 | 4192 c->fdct = ff_faandct; |
| 2979 | 4193 c->fdct248 = ff_faandct248; |
| 2967 | 4194 } |
| 1567 | 4195 else { |
| 1092 | 4196 c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default |
| 2979 | 4197 c->fdct248 = ff_fdct248_islow; |
| 1567 | 4198 } |
| 1092 | 4199 #endif //CONFIG_ENCODERS |
| 4200 | |
| 2256 | 4201 if(avctx->lowres==1){ |
|
8596
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4202 if(avctx->idct_algo==FF_IDCT_INT || avctx->idct_algo==FF_IDCT_AUTO || !CONFIG_H264_DECODER){ |
|
2272
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
4203 c->idct_put= ff_jref_idct4_put; |
|
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
4204 c->idct_add= ff_jref_idct4_add; |
|
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
4205 }else{ |
|
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
4206 c->idct_put= ff_h264_lowres_idct_put_c; |
|
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
4207 c->idct_add= ff_h264_lowres_idct_add_c; |
|
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
4208 } |
| 2256 | 4209 c->idct = j_rev_dct4; |
| 1092 | 4210 c->idct_permutation_type= FF_NO_IDCT_PERM; |
| 2257 | 4211 }else if(avctx->lowres==2){ |
| 4212 c->idct_put= ff_jref_idct2_put; | |
| 4213 c->idct_add= ff_jref_idct2_add; | |
| 4214 c->idct = j_rev_dct2; | |
| 4215 c->idct_permutation_type= FF_NO_IDCT_PERM; | |
| 2259 | 4216 }else if(avctx->lowres==3){ |
| 4217 c->idct_put= ff_jref_idct1_put; | |
| 4218 c->idct_add= ff_jref_idct1_add; | |
| 4219 c->idct = j_rev_dct1; | |
| 4220 c->idct_permutation_type= FF_NO_IDCT_PERM; | |
| 2256 | 4221 }else{ |
| 4222 if(avctx->idct_algo==FF_IDCT_INT){ | |
| 4223 c->idct_put= ff_jref_idct_put; | |
| 4224 c->idct_add= ff_jref_idct_add; | |
| 4225 c->idct = j_rev_dct; | |
| 4226 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; | |
|
9975
d6d7e8d4a04d
Do not redundantly check for both CONFIG_THEORA_DECODER and CONFIG_VP3_DECODER.
diego
parents:
9586
diff
changeset
|
4227 }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER ) && |
| 5007 | 4228 avctx->idct_algo==FF_IDCT_VP3){ |
| 2693 | 4229 c->idct_put= ff_vp3_idct_put_c; |
| 4230 c->idct_add= ff_vp3_idct_add_c; | |
| 4231 c->idct = ff_vp3_idct_c; | |
| 4232 c->idct_permutation_type= FF_NO_IDCT_PERM; | |
| 5887 | 4233 }else if(avctx->idct_algo==FF_IDCT_WMV2){ |
| 4234 c->idct_put= ff_wmv2_idct_put_c; | |
| 4235 c->idct_add= ff_wmv2_idct_add_c; | |
| 4236 c->idct = ff_wmv2_idct_c; | |
| 4237 c->idct_permutation_type= FF_NO_IDCT_PERM; | |
| 6407 | 4238 }else if(avctx->idct_algo==FF_IDCT_FAAN){ |
| 4239 c->idct_put= ff_faanidct_put; | |
| 4240 c->idct_add= ff_faanidct_add; | |
| 4241 c->idct = ff_faanidct; | |
| 4242 c->idct_permutation_type= FF_NO_IDCT_PERM; | |
|
8596
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4243 }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) { |
| 8120 | 4244 c->idct_put= ff_ea_idct_put_c; |
| 4245 c->idct_permutation_type= FF_NO_IDCT_PERM; | |
| 11231 | 4246 }else if(CONFIG_BINK_DECODER && avctx->idct_algo==FF_IDCT_BINK) { |
| 4247 c->idct = ff_bink_idct_c; | |
| 4248 c->idct_add = ff_bink_idct_add_c; | |
| 4249 c->idct_put = ff_bink_idct_put_c; | |
| 4250 c->idct_permutation_type = FF_NO_IDCT_PERM; | |
| 2256 | 4251 }else{ //accurate/default |
| 6001 | 4252 c->idct_put= ff_simple_idct_put; |
| 4253 c->idct_add= ff_simple_idct_add; | |
| 4254 c->idct = ff_simple_idct; | |
| 2256 | 4255 c->idct_permutation_type= FF_NO_IDCT_PERM; |
| 4256 } | |
| 1092 | 4257 } |
| 4258 | |
|
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4259 c->get_pixels = get_pixels_c; |
|
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4260 c->diff_pixels = diff_pixels_c; |
|
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4261 c->put_pixels_clamped = put_pixels_clamped_c; |
|
1984
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
4262 c->put_signed_pixels_clamped = put_signed_pixels_clamped_c; |
| 11231 | 4263 c->put_pixels_nonclamped = put_pixels_nonclamped_c; |
|
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4264 c->add_pixels_clamped = add_pixels_clamped_c; |
| 2763 | 4265 c->add_pixels8 = add_pixels8_c; |
| 4266 c->add_pixels4 = add_pixels4_c; | |
|
4988
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
4267 c->sum_abs_dctelem = sum_abs_dctelem_c; |
|
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4268 c->gmc1 = gmc1_c; |
|
3248
7aa9f80e7954
mmx implementation of 3-point GMC. (5x faster than C)
lorenm
parents:
3245
diff
changeset
|
4269 c->gmc = ff_gmc_c; |
| 8288 | 4270 c->clear_block = clear_block_c; |
|
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4271 c->clear_blocks = clear_blocks_c; |
|
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4272 c->pix_sum = pix_sum_c; |
|
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4273 c->pix_norm1 = pix_norm1_c; |
|
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4274 |
| 11231 | 4275 c->fill_block_tab[0] = fill_block16_c; |
| 4276 c->fill_block_tab[1] = fill_block8_c; | |
| 4277 c->scale_block = scale_block_c; | |
| 4278 | |
| 859 | 4279 /* TODO [0] 16 [1] 8 */ |
| 1708 | 4280 c->pix_abs[0][0] = pix_abs16_c; |
| 4281 c->pix_abs[0][1] = pix_abs16_x2_c; | |
| 4282 c->pix_abs[0][2] = pix_abs16_y2_c; | |
| 4283 c->pix_abs[0][3] = pix_abs16_xy2_c; | |
| 4284 c->pix_abs[1][0] = pix_abs8_c; | |
| 4285 c->pix_abs[1][1] = pix_abs8_x2_c; | |
| 4286 c->pix_abs[1][2] = pix_abs8_y2_c; | |
| 4287 c->pix_abs[1][3] = pix_abs8_xy2_c; | |
|
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4288 |
| 859 | 4289 #define dspfunc(PFX, IDX, NUM) \ |
| 4290 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \ | |
| 4291 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \ | |
| 4292 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \ | |
| 4293 c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c | |
|
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4294 |
| 859 | 4295 dspfunc(put, 0, 16); |
| 4296 dspfunc(put_no_rnd, 0, 16); | |
| 4297 dspfunc(put, 1, 8); | |
| 4298 dspfunc(put_no_rnd, 1, 8); | |
|
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4299 dspfunc(put, 2, 4); |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4300 dspfunc(put, 3, 2); |
| 0 | 4301 |
| 859 | 4302 dspfunc(avg, 0, 16); |
| 4303 dspfunc(avg_no_rnd, 0, 16); | |
| 4304 dspfunc(avg, 1, 8); | |
| 4305 dspfunc(avg_no_rnd, 1, 8); | |
| 1319 | 4306 dspfunc(avg, 2, 4); |
| 4307 dspfunc(avg, 3, 2); | |
| 859 | 4308 #undef dspfunc |
| 857 | 4309 |
| 1864 | 4310 c->put_no_rnd_pixels_l2[0]= put_no_rnd_pixels16_l2_c; |
| 4311 c->put_no_rnd_pixels_l2[1]= put_no_rnd_pixels8_l2_c; | |
| 4312 | |
|
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4313 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c; |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4314 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c; |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4315 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c; |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4316 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c; |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4317 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c; |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4318 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c; |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4319 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c; |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4320 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c; |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4321 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c; |
|
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4322 |
| 1319 | 4323 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c; |
| 4324 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c; | |
| 4325 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c; | |
| 4326 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c; | |
| 4327 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c; | |
| 4328 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c; | |
| 4329 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c; | |
| 4330 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c; | |
| 4331 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c; | |
| 4332 | |
| 859 | 4333 #define dspfunc(PFX, IDX, NUM) \ |
| 4334 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \ | |
| 4335 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \ | |
| 4336 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \ | |
| 4337 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \ | |
| 4338 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \ | |
| 4339 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \ | |
| 4340 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \ | |
| 4341 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \ | |
| 4342 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \ | |
| 4343 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \ | |
| 4344 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \ | |
| 4345 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \ | |
| 4346 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \ | |
| 4347 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \ | |
| 4348 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \ | |
| 4349 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c | |
| 857 | 4350 |
| 859 | 4351 dspfunc(put_qpel, 0, 16); |
| 4352 dspfunc(put_no_rnd_qpel, 0, 16); | |
| 4353 | |
| 4354 dspfunc(avg_qpel, 0, 16); | |
| 4355 /* dspfunc(avg_no_rnd_qpel, 0, 16); */ | |
| 857 | 4356 |
| 859 | 4357 dspfunc(put_qpel, 1, 8); |
| 4358 dspfunc(put_no_rnd_qpel, 1, 8); | |
| 4359 | |
| 4360 dspfunc(avg_qpel, 1, 8); | |
| 4361 /* dspfunc(avg_no_rnd_qpel, 1, 8); */ | |
| 1168 | 4362 |
| 4363 dspfunc(put_h264_qpel, 0, 16); | |
| 4364 dspfunc(put_h264_qpel, 1, 8); | |
| 4365 dspfunc(put_h264_qpel, 2, 4); | |
|
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
4366 dspfunc(put_h264_qpel, 3, 2); |
| 1168 | 4367 dspfunc(avg_h264_qpel, 0, 16); |
| 4368 dspfunc(avg_h264_qpel, 1, 8); | |
| 4369 dspfunc(avg_h264_qpel, 2, 4); | |
| 4370 | |
| 859 | 4371 #undef dspfunc |
| 1168 | 4372 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c; |
| 4373 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c; | |
| 4374 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c; | |
| 4375 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c; | |
| 4376 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c; | |
| 4377 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c; | |
|
9439
ef3a7b711cc0
Rename put_no_rnd_h264_chroma* to reflect its usage in VC1 only
conrad
parents:
9437
diff
changeset
|
4378 c->put_no_rnd_vc1_chroma_pixels_tab[0]= put_no_rnd_vc1_chroma_mc8_c; |
| 9440 | 4379 c->avg_no_rnd_vc1_chroma_pixels_tab[0]= avg_no_rnd_vc1_chroma_mc8_c; |
| 857 | 4380 |
| 6437 | 4381 c->draw_edges = draw_edges_c; |
| 4382 | |
| 8590 | 4383 #if CONFIG_CAVS_DECODER |
|
3395
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
4384 ff_cavsdsp_init(c,avctx); |
| 3432 | 4385 #endif |
| 9585 | 4386 |
| 4387 #if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER | |
| 4388 ff_mlp_init(c, avctx); | |
| 4389 #endif | |
|
9995
3141f69e3905
Do not check for both CONFIG_VC1_DECODER and CONFIG_WMV3_DECODER,
diego
parents:
9975
diff
changeset
|
4390 #if CONFIG_VC1_DECODER |
| 3526 | 4391 ff_vc1dsp_init(c,avctx); |
| 4392 #endif | |
|
9995
3141f69e3905
Do not check for both CONFIG_VC1_DECODER and CONFIG_WMV3_DECODER,
diego
parents:
9975
diff
changeset
|
4393 #if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER |
| 5887 | 4394 ff_intrax8dsp_init(c,avctx); |
| 4395 #endif | |
| 8590 | 4396 #if CONFIG_RV30_DECODER |
| 8410 | 4397 ff_rv30dsp_init(c,avctx); |
| 4398 #endif | |
| 8590 | 4399 #if CONFIG_RV40_DECODER |
| 8232 | 4400 ff_rv40dsp_init(c,avctx); |
| 4401 c->put_rv40_qpel_pixels_tab[0][15] = put_rv40_qpel16_mc33_c; | |
| 4402 c->avg_rv40_qpel_pixels_tab[0][15] = avg_rv40_qpel16_mc33_c; | |
| 4403 c->put_rv40_qpel_pixels_tab[1][15] = put_rv40_qpel8_mc33_c; | |
| 4404 c->avg_rv40_qpel_pixels_tab[1][15] = avg_rv40_qpel8_mc33_c; | |
| 4405 #endif | |
|
3395
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
4406 |
| 936 | 4407 c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c; |
| 4408 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c; | |
| 4409 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c; | |
| 4410 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c; | |
| 4411 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c; | |
| 4412 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c; | |
| 4413 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c; | |
| 4414 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c; | |
| 2967 | 4415 |
| 1708 | 4416 #define SET_CMP_FUNC(name) \ |
| 4417 c->name[0]= name ## 16_c;\ | |
| 4418 c->name[1]= name ## 8x8_c; | |
| 2967 | 4419 |
| 1708 | 4420 SET_CMP_FUNC(hadamard8_diff) |
| 1729 | 4421 c->hadamard8_diff[4]= hadamard8_intra16_c; |
| 8978 | 4422 c->hadamard8_diff[5]= hadamard8_intra8x8_c; |
| 1708 | 4423 SET_CMP_FUNC(dct_sad) |
| 2382 | 4424 SET_CMP_FUNC(dct_max) |
| 8590 | 4425 #if CONFIG_GPL |
|
3010
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
4426 SET_CMP_FUNC(dct264_sad) |
| 3013 | 4427 #endif |
| 1708 | 4428 c->sad[0]= pix_abs16_c; |
| 4429 c->sad[1]= pix_abs8_c; | |
| 4430 c->sse[0]= sse16_c; | |
| 4431 c->sse[1]= sse8_c; | |
| 2184 | 4432 c->sse[2]= sse4_c; |
| 1708 | 4433 SET_CMP_FUNC(quant_psnr) |
| 4434 SET_CMP_FUNC(rd) | |
| 4435 SET_CMP_FUNC(bit) | |
| 1729 | 4436 c->vsad[0]= vsad16_c; |
| 4437 c->vsad[4]= vsad_intra16_c; | |
| 8978 | 4438 c->vsad[5]= vsad_intra8_c; |
| 1729 | 4439 c->vsse[0]= vsse16_c; |
| 4440 c->vsse[4]= vsse_intra16_c; | |
| 8978 | 4441 c->vsse[5]= vsse_intra8_c; |
|
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
4442 c->nsse[0]= nsse16_c; |
|
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
4443 c->nsse[1]= nsse8_c; |
| 11485 | 4444 #if CONFIG_DWT |
| 4445 ff_dsputil_init_dwt(c); | |
|
3373
b8996cc5ccae
Disable w53 and w97 cmp methods when snow encoder is disabled
gpoirier
parents:
3323
diff
changeset
|
4446 #endif |
| 2184 | 4447 |
| 4749 | 4448 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c; |
| 4449 | |
| 866 | 4450 c->add_bytes= add_bytes_c; |
| 6384 | 4451 c->add_bytes_l2= add_bytes_l2_c; |
| 866 | 4452 c->diff_bytes= diff_bytes_c; |
| 8760 | 4453 c->add_hfyu_median_prediction= add_hfyu_median_prediction_c; |
| 1527 | 4454 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c; |
| 10370 | 4455 c->add_hfyu_left_prediction = add_hfyu_left_prediction_c; |
| 4456 c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c; | |
| 1273 | 4457 c->bswap_buf= bswap_buf; |
| 8590 | 4458 #if CONFIG_PNG_DECODER |
| 6384 | 4459 c->add_png_paeth_prediction= ff_add_png_paeth_prediction; |
| 4460 #endif | |
| 2633 | 4461 |
|
10749
5cca4b6c459d
Get rid of pointless CONFIG_ANY_H263 preprocessor definition.
diego
parents:
10748
diff
changeset
|
4462 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { |
| 5278 | 4463 c->h263_h_loop_filter= h263_h_loop_filter_c; |
| 4464 c->h263_v_loop_filter= h263_v_loop_filter_c; | |
|
5277
7b3fcb7c61ce
Avoid linking with h263.c functions when the relevant codecs
aurel
parents:
5256
diff
changeset
|
4465 } |
| 2967 | 4466 |
|
9975
d6d7e8d4a04d
Do not redundantly check for both CONFIG_THEORA_DECODER and CONFIG_VP3_DECODER.
diego
parents:
9586
diff
changeset
|
4467 if (CONFIG_VP3_DECODER) { |
| 7995 | 4468 c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c; |
| 4469 c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c; | |
| 11637 | 4470 c->vp3_idct_dc_add= ff_vp3_idct_dc_add_c; |
| 7995 | 4471 } |
|
8785
bee83b3f9a6b
move vp6_filter_diag4() to a new vp6dsp.c file and use it throught dsputil
aurel
parents:
8760
diff
changeset
|
4472 if (CONFIG_VP6_DECODER) { |
|
bee83b3f9a6b
move vp6_filter_diag4() to a new vp6dsp.c file and use it throught dsputil
aurel
parents:
8760
diff
changeset
|
4473 c->vp6_filter_diag4= ff_vp6_filter_diag4_c; |
|
bee83b3f9a6b
move vp6_filter_diag4() to a new vp6dsp.c file and use it throught dsputil
aurel
parents:
8760
diff
changeset
|
4474 } |
| 7995 | 4475 |
| 2045 | 4476 c->h261_loop_filter= h261_loop_filter_c; |
| 2967 | 4477 |
| 1784 | 4478 c->try_8x8basis= try_8x8basis_c; |
| 4479 c->add_8x8basis= add_8x8basis_c; | |
| 866 | 4480 |
| 8590 | 4481 #if CONFIG_VORBIS_DECODER |
|
3536
545a15c19c91
sse & sse2 implementations of vorbis channel coupling.
lorenm
parents:
3526
diff
changeset
|
4482 c->vorbis_inverse_coupling = vorbis_inverse_coupling; |
|
545a15c19c91
sse & sse2 implementations of vorbis channel coupling.
lorenm
parents:
3526
diff
changeset
|
4483 #endif |
| 8590 | 4484 #if CONFIG_AC3_DECODER |
| 7563 | 4485 c->ac3_downmix = ff_ac3_downmix_c; |
| 4486 #endif | |
|
10429
289dd8daf4ee
add CONFIG_LPC to the build system for lpc dsputil functions. fixes build
jbr
parents:
10424
diff
changeset
|
4487 #if CONFIG_LPC |
|
10424
94595d0e617c
Move autocorrelation function from flacenc.c to lpc.c. Also rename the
jbr
parents:
10421
diff
changeset
|
4488 c->lpc_compute_autocorr = ff_lpc_compute_autocorr; |
|
10429
289dd8daf4ee
add CONFIG_LPC to the build system for lpc dsputil functions. fixes build
jbr
parents:
10424
diff
changeset
|
4489 #endif |
|
3568
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4490 c->vector_fmul = vector_fmul_c; |
|
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4491 c->vector_fmul_reverse = vector_fmul_reverse_c; |
|
10300
4d1b9ca628fc
Drop unused args from vector_fmul_add_add, simpify code, and rename
mru
parents:
10219
diff
changeset
|
4492 c->vector_fmul_add = vector_fmul_add_c; |
| 7261 | 4493 c->vector_fmul_window = ff_vector_fmul_window_c; |
| 7564 | 4494 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c; |
|
10104
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
4495 c->vector_clipf = vector_clipf_c; |
|
3568
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4496 c->float_to_int16 = ff_float_to_int16_c; |
| 7261 | 4497 c->float_to_int16_interleave = ff_float_to_int16_interleave_c; |
|
7203
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
4498 c->scalarproduct_int16 = scalarproduct_int16_c; |
| 10644 | 4499 c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c; |
| 10219 | 4500 c->scalarproduct_float = scalarproduct_float_c; |
| 4501 c->butterflies_float = butterflies_float_c; | |
| 4502 c->vector_fmul_scalar = vector_fmul_scalar_c; | |
| 4503 | |
| 4504 c->vector_fmul_sv_scalar[0] = vector_fmul_sv_scalar_2_c; | |
| 4505 c->vector_fmul_sv_scalar[1] = vector_fmul_sv_scalar_4_c; | |
| 4506 | |
| 4507 c->sv_fmul_scalar[0] = sv_fmul_scalar_2_c; | |
| 4508 c->sv_fmul_scalar[1] = sv_fmul_scalar_4_c; | |
|
3536
545a15c19c91
sse & sse2 implementations of vorbis channel coupling.
lorenm
parents:
3526
diff
changeset
|
4509 |
| 3245 | 4510 c->shrink[0]= ff_img_copy_plane; |
| 4511 c->shrink[1]= ff_shrink22; | |
| 4512 c->shrink[2]= ff_shrink44; | |
| 4513 c->shrink[3]= ff_shrink88; | |
| 4514 | |
|
3215
06f98047ff26
prefetch pixels for future motion compensation. 2-5% faster h264.
lorenm
parents:
3199
diff
changeset
|
4515 c->prefetch= just_return; |
|
06f98047ff26
prefetch pixels for future motion compensation. 2-5% faster h264.
lorenm
parents:
3199
diff
changeset
|
4516 |
|
3807
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4517 memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab)); |
|
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4518 memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab)); |
|
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4519 |
|
8596
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4520 if (HAVE_MMX) dsputil_init_mmx (c, avctx); |
|
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4521 if (ARCH_ARM) dsputil_init_arm (c, avctx); |
|
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4522 if (CONFIG_MLIB) dsputil_init_mlib (c, avctx); |
|
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4523 if (HAVE_VIS) dsputil_init_vis (c, avctx); |
|
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4524 if (ARCH_ALPHA) dsputil_init_alpha (c, avctx); |
|
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4525 if (ARCH_PPC) dsputil_init_ppc (c, avctx); |
|
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4526 if (HAVE_MMI) dsputil_init_mmi (c, avctx); |
|
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4527 if (ARCH_SH4) dsputil_init_sh4 (c, avctx); |
|
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4528 if (ARCH_BFIN) dsputil_init_bfin (c, avctx); |
| 1092 | 4529 |
|
3807
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4530 for(i=0; i<64; i++){ |
|
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4531 if(!c->put_2tap_qpel_pixels_tab[0][i]) |
|
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4532 c->put_2tap_qpel_pixels_tab[0][i]= c->put_h264_qpel_pixels_tab[0][i]; |
|
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4533 if(!c->avg_2tap_qpel_pixels_tab[0][i]) |
|
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4534 c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i]; |
|
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4535 } |
|
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4536 |
| 1092 | 4537 switch(c->idct_permutation_type){ |
| 4538 case FF_NO_IDCT_PERM: | |
| 4539 for(i=0; i<64; i++) | |
| 4540 c->idct_permutation[i]= i; | |
| 4541 break; | |
| 4542 case FF_LIBMPEG2_IDCT_PERM: | |
| 4543 for(i=0; i<64; i++) | |
| 4544 c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); | |
| 4545 break; | |
| 4546 case FF_SIMPLE_IDCT_PERM: | |
| 4547 for(i=0; i<64; i++) | |
| 4548 c->idct_permutation[i]= simple_mmx_permutation[i]; | |
| 4549 break; | |
| 4550 case FF_TRANSPOSE_IDCT_PERM: | |
| 4551 for(i=0; i<64; i++) | |
| 4552 c->idct_permutation[i]= ((i&7)<<3) | (i>>3); | |
| 4553 break; | |
|
2696
9699d325049d
porting the mmx&sse2 (sse2 untested) vp3 idcts to the lavc idct API
michael
parents:
2693
diff
changeset
|
4554 case FF_PARTTRANS_IDCT_PERM: |
|
9699d325049d
porting the mmx&sse2 (sse2 untested) vp3 idcts to the lavc idct API
michael
parents:
2693
diff
changeset
|
4555 for(i=0; i<64; i++) |
|
9699d325049d
porting the mmx&sse2 (sse2 untested) vp3 idcts to the lavc idct API
michael
parents:
2693
diff
changeset
|
4556 c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3); |
|
9699d325049d
porting the mmx&sse2 (sse2 untested) vp3 idcts to the lavc idct API
michael
parents:
2693
diff
changeset
|
4557 break; |
|
6600
c3213c91124c
Add a new IDCT permutation, used in xvid_sse2 and possibly future similar IDCTs.
astrange
parents:
6450
diff
changeset
|
4558 case FF_SSE2_IDCT_PERM: |
|
c3213c91124c
Add a new IDCT permutation, used in xvid_sse2 and possibly future similar IDCTs.
astrange
parents:
6450
diff
changeset
|
4559 for(i=0; i<64; i++) |
|
c3213c91124c
Add a new IDCT permutation, used in xvid_sse2 and possibly future similar IDCTs.
astrange
parents:
6450
diff
changeset
|
4560 c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7]; |
|
c3213c91124c
Add a new IDCT permutation, used in xvid_sse2 and possibly future similar IDCTs.
astrange
parents:
6450
diff
changeset
|
4561 break; |
| 1092 | 4562 default: |
|
1598
932d306bf1dc
av_log() patch by (Michel Bardiaux <mbardiaux at peaktime dot be>)
michael
parents:
1571
diff
changeset
|
4563 av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n"); |
| 1092 | 4564 } |
| 0 | 4565 } |
|
252
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
4566 |
