Mercurial > libavcodec.hg
annotate 4xm.c @ 3559:c02459cd0d31 libavcodec
slightly faster ff_imdct_calc_3dn2() on amd64. (gcc added a bunch of useless movsxd)
| author | lorenm |
|---|---|
| date | Tue, 08 Aug 2006 21:47:11 +0000 |
| parents | 68721b62a528 |
| children | 1843a85123b7 |
| rev | line source |
|---|---|
| 1293 | 1 /* |
| 2 * 4XM codec | |
| 3 * Copyright (c) 2003 Michael Niedermayer | |
| 4 * | |
| 5 * This library is free software; you can redistribute it and/or | |
| 6 * modify it under the terms of the GNU Lesser General Public | |
| 7 * License as published by the Free Software Foundation; either | |
| 8 * version 2 of the License, or (at your option) any later version. | |
| 9 * | |
| 10 * This library is distributed in the hope that it will be useful, | |
| 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 13 * Lesser General Public License for more details. | |
| 14 * | |
| 15 * You should have received a copy of the GNU Lesser General Public | |
| 16 * License along with this library; if not, write to the Free Software | |
|
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
2967
diff
changeset
|
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 1293 | 18 */ |
| 2967 | 19 |
| 1293 | 20 /** |
| 21 * @file 4xm.c | |
| 22 * 4XM codec. | |
| 23 */ | |
| 2967 | 24 |
| 1293 | 25 #include "avcodec.h" |
| 26 #include "dsputil.h" | |
| 27 #include "mpegvideo.h" | |
| 28 | |
| 29 //#undef NDEBUG | |
| 30 //#include <assert.h> | |
| 31 | |
| 32 #define BLOCK_TYPE_VLC_BITS 5 | |
| 33 #define ACDC_VLC_BITS 9 | |
| 34 | |
| 35 #define CFRAME_BUFFER_COUNT 100 | |
| 36 | |
| 37 static const uint8_t block_type_tab[4][8][2]={ | |
| 38 { //{8,4,2}x{8,4,2} | |
| 39 { 0,1}, { 2,2}, { 6,3}, {14,4}, {30,5}, {31,5}, { 0,0} | |
| 40 },{ //{8,4}x1 | |
| 41 { 0,1}, { 0,0}, { 2,2}, { 6,3}, {14,4}, {15,4}, { 0,0} | |
| 42 },{ //1x{8,4} | |
| 43 { 0,1}, { 2,2}, { 0,0}, { 6,3}, {14,4}, {15,4}, { 0,0} | |
| 44 },{ //1x2, 2x1 | |
| 45 { 0,1}, { 0,0}, { 0,0}, { 2,2}, { 6,3}, {14,4}, {15,4} | |
| 46 } | |
| 47 }; | |
| 48 | |
| 49 static const uint8_t size2index[4][4]={ | |
| 50 {-1, 3, 1, 1}, | |
| 51 { 3, 0, 0, 0}, | |
| 52 { 2, 0, 0, 0}, | |
| 53 { 2, 0, 0, 0}, | |
| 54 }; | |
| 55 | |
| 56 static const int8_t mv[256][2]={ | |
| 57 { 0, 0},{ 0, -1},{ -1, 0},{ 1, 0},{ 0, 1},{ -1, -1},{ 1, -1},{ -1, 1}, | |
| 58 { 1, 1},{ 0, -2},{ -2, 0},{ 2, 0},{ 0, 2},{ -1, -2},{ 1, -2},{ -2, -1}, | |
| 59 { 2, -1},{ -2, 1},{ 2, 1},{ -1, 2},{ 1, 2},{ -2, -2},{ 2, -2},{ -2, 2}, | |
| 60 { 2, 2},{ 0, -3},{ -3, 0},{ 3, 0},{ 0, 3},{ -1, -3},{ 1, -3},{ -3, -1}, | |
| 61 { 3, -1},{ -3, 1},{ 3, 1},{ -1, 3},{ 1, 3},{ -2, -3},{ 2, -3},{ -3, -2}, | |
| 62 { 3, -2},{ -3, 2},{ 3, 2},{ -2, 3},{ 2, 3},{ 0, -4},{ -4, 0},{ 4, 0}, | |
| 63 { 0, 4},{ -1, -4},{ 1, -4},{ -4, -1},{ 4, -1},{ 4, 1},{ -1, 4},{ 1, 4}, | |
| 64 { -3, -3},{ -3, 3},{ 3, 3},{ -2, -4},{ -4, -2},{ 4, -2},{ -4, 2},{ -2, 4}, | |
| 65 { 2, 4},{ -3, -4},{ 3, -4},{ 4, -3},{ -5, 0},{ -4, 3},{ -3, 4},{ 3, 4}, | |
| 66 { -1, -5},{ -5, -1},{ -5, 1},{ -1, 5},{ -2, -5},{ 2, -5},{ 5, -2},{ 5, 2}, | |
| 67 { -4, -4},{ -4, 4},{ -3, -5},{ -5, -3},{ -5, 3},{ 3, 5},{ -6, 0},{ 0, 6}, | |
| 68 { -6, -1},{ -6, 1},{ 1, 6},{ 2, -6},{ -6, 2},{ 2, 6},{ -5, -4},{ 5, 4}, | |
| 69 { 4, 5},{ -6, -3},{ 6, 3},{ -7, 0},{ -1, -7},{ 5, -5},{ -7, 1},{ -1, 7}, | |
| 70 { 4, -6},{ 6, 4},{ -2, -7},{ -7, 2},{ -3, -7},{ 7, -3},{ 3, 7},{ 6, -5}, | |
| 71 { 0, -8},{ -1, -8},{ -7, -4},{ -8, 1},{ 4, 7},{ 2, -8},{ -2, 8},{ 6, 6}, | |
| 72 { -8, 3},{ 5, -7},{ -5, 7},{ 8, -4},{ 0, -9},{ -9, -1},{ 1, 9},{ 7, -6}, | |
| 73 { -7, 6},{ -5, -8},{ -5, 8},{ -9, 3},{ 9, -4},{ 7, -7},{ 8, -6},{ 6, 8}, | |
| 74 { 10, 1},{-10, 2},{ 9, -5},{ 10, -3},{ -8, -7},{-10, -4},{ 6, -9},{-11, 0}, | |
| 75 { 11, 1},{-11, -2},{ -2, 11},{ 7, -9},{ -7, 9},{ 10, 6},{ -4, 11},{ 8, -9}, | |
| 76 { 8, 9},{ 5, 11},{ 7,-10},{ 12, -3},{ 11, 6},{ -9, -9},{ 8, 10},{ 5, 12}, | |
| 77 {-11, 7},{ 13, 2},{ 6,-12},{ 10, 9},{-11, 8},{ -7, 12},{ 0, 14},{ 14, -2}, | |
| 78 { -9, 11},{ -6, 13},{-14, -4},{ -5,-14},{ 5, 14},{-15, -1},{-14, -6},{ 3,-15}, | |
| 79 { 11,-11},{ -7, 14},{ -5, 15},{ 8,-14},{ 15, 6},{ 3, 16},{ 7,-15},{-16, 5}, | |
| 80 { 0, 17},{-16, -6},{-10, 14},{-16, 7},{ 12, 13},{-16, 8},{-17, 6},{-18, 3}, | |
| 81 { -7, 17},{ 15, 11},{ 16, 10},{ 2,-19},{ 3,-19},{-11,-16},{-18, 8},{-19, -6}, | |
| 82 { 2,-20},{-17,-11},{-10,-18},{ 8, 19},{-21, -1},{-20, 7},{ -4, 21},{ 21, 5}, | |
| 83 { 15, 16},{ 2,-22},{-10,-20},{-22, 5},{ 20,-11},{ -7,-22},{-12, 20},{ 23, -5}, | |
| 84 { 13,-20},{ 24, -2},{-15, 19},{-11, 22},{ 16, 19},{ 23,-10},{-18,-18},{ -9,-24}, | |
| 85 { 24,-10},{ -3, 26},{-23, 13},{-18,-20},{ 17, 21},{ -4, 27},{ 27, 6},{ 1,-28}, | |
| 86 {-11, 26},{-17,-23},{ 7, 28},{ 11,-27},{ 29, 5},{-23,-19},{-28,-11},{-21, 22}, | |
| 87 {-30, 7},{-17, 26},{-27, 16},{ 13, 29},{ 19,-26},{ 10,-31},{-14,-30},{ 20,-27}, | |
| 88 {-29, 18},{-16,-31},{-28,-22},{ 21,-30},{-25, 28},{ 26,-29},{ 25,-32},{-32,-32} | |
| 89 }; | |
| 90 | |
| 91 // this is simply the scaled down elementwise product of the standard jpeg quantizer table and the AAN premul table | |
| 92 static const uint8_t dequant_table[64]={ | |
| 93 16, 15, 13, 19, 24, 31, 28, 17, | |
| 94 17, 23, 25, 31, 36, 63, 45, 21, | |
| 95 18, 24, 27, 37, 52, 59, 49, 20, | |
| 96 16, 28, 34, 40, 60, 80, 51, 20, | |
| 97 18, 31, 48, 66, 68, 86, 56, 21, | |
| 98 19, 38, 56, 59, 64, 64, 48, 20, | |
| 99 27, 48, 55, 55, 56, 51, 35, 15, | |
| 100 20, 35, 34, 32, 31, 22, 15, 8, | |
| 101 }; | |
| 102 | |
| 103 static VLC block_type_vlc[4]; | |
| 104 | |
| 105 | |
| 106 typedef struct CFrameBuffer{ | |
| 107 int allocated_size; | |
| 108 int size; | |
| 109 int id; | |
| 110 uint8_t *data; | |
| 111 }CFrameBuffer; | |
| 112 | |
| 113 typedef struct FourXContext{ | |
| 114 AVCodecContext *avctx; | |
| 115 DSPContext dsp; | |
| 116 AVFrame current_picture, last_picture; | |
| 117 GetBitContext pre_gb; ///< ac/dc prefix | |
| 118 GetBitContext gb; | |
| 119 uint8_t *bytestream; | |
| 120 uint16_t *wordstream; | |
| 121 int mv[256]; | |
| 122 VLC pre_vlc; | |
| 123 int last_dc; | |
| 3089 | 124 DECLARE_ALIGNED_8(DCTELEM, block[6][64]); |
| 1293 | 125 uint8_t *bitstream_buffer; |
|
3066
04b924f8f5a5
warning fixes by Luca Abeni, lucabe72 ##@## email ##.## it
diego
parents:
3036
diff
changeset
|
126 unsigned int bitstream_buffer_size; |
| 1293 | 127 CFrameBuffer cfrm[CFRAME_BUFFER_COUNT]; |
| 128 } FourXContext; | |
| 129 | |
| 130 | |
| 131 #define FIX_1_082392200 70936 | |
| 132 #define FIX_1_414213562 92682 | |
| 133 #define FIX_1_847759065 121095 | |
| 134 #define FIX_2_613125930 171254 | |
| 135 | |
| 136 #define MULTIPLY(var,const) (((var)*(const)) >> 16) | |
| 137 | |
| 138 static void idct(DCTELEM block[64]){ | |
| 139 int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; | |
| 140 int tmp10, tmp11, tmp12, tmp13; | |
| 141 int z5, z10, z11, z12, z13; | |
| 142 int i; | |
| 143 int temp[64]; | |
| 2967 | 144 |
| 1293 | 145 for(i=0; i<8; i++){ |
| 146 tmp10 = block[8*0 + i] + block[8*4 + i]; | |
| 147 tmp11 = block[8*0 + i] - block[8*4 + i]; | |
| 148 | |
| 149 tmp13 = block[8*2 + i] + block[8*6 + i]; | |
| 150 tmp12 = MULTIPLY(block[8*2 + i] - block[8*6 + i], FIX_1_414213562) - tmp13; | |
| 151 | |
| 152 tmp0 = tmp10 + tmp13; | |
| 153 tmp3 = tmp10 - tmp13; | |
| 154 tmp1 = tmp11 + tmp12; | |
| 155 tmp2 = tmp11 - tmp12; | |
| 2967 | 156 |
| 1293 | 157 z13 = block[8*5 + i] + block[8*3 + i]; |
| 158 z10 = block[8*5 + i] - block[8*3 + i]; | |
| 159 z11 = block[8*1 + i] + block[8*7 + i]; | |
| 160 z12 = block[8*1 + i] - block[8*7 + i]; | |
| 161 | |
| 162 tmp7 = z11 + z13; | |
| 163 tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); | |
| 164 | |
| 165 z5 = MULTIPLY(z10 + z12, FIX_1_847759065); | |
| 166 tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; | |
| 167 tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; | |
| 168 | |
| 169 tmp6 = tmp12 - tmp7; | |
| 170 tmp5 = tmp11 - tmp6; | |
| 171 tmp4 = tmp10 + tmp5; | |
| 172 | |
| 173 temp[8*0 + i] = tmp0 + tmp7; | |
| 174 temp[8*7 + i] = tmp0 - tmp7; | |
| 175 temp[8*1 + i] = tmp1 + tmp6; | |
| 176 temp[8*6 + i] = tmp1 - tmp6; | |
| 177 temp[8*2 + i] = tmp2 + tmp5; | |
| 178 temp[8*5 + i] = tmp2 - tmp5; | |
| 179 temp[8*4 + i] = tmp3 + tmp4; | |
| 180 temp[8*3 + i] = tmp3 - tmp4; | |
| 181 } | |
| 2967 | 182 |
| 1293 | 183 for(i=0; i<8*8; i+=8){ |
| 184 tmp10 = temp[0 + i] + temp[4 + i]; | |
| 185 tmp11 = temp[0 + i] - temp[4 + i]; | |
| 186 | |
| 187 tmp13 = temp[2 + i] + temp[6 + i]; | |
| 188 tmp12 = MULTIPLY(temp[2 + i] - temp[6 + i], FIX_1_414213562) - tmp13; | |
| 189 | |
| 190 tmp0 = tmp10 + tmp13; | |
| 191 tmp3 = tmp10 - tmp13; | |
| 192 tmp1 = tmp11 + tmp12; | |
| 193 tmp2 = tmp11 - tmp12; | |
| 194 | |
| 195 z13 = temp[5 + i] + temp[3 + i]; | |
| 196 z10 = temp[5 + i] - temp[3 + i]; | |
| 197 z11 = temp[1 + i] + temp[7 + i]; | |
| 198 z12 = temp[1 + i] - temp[7 + i]; | |
| 199 | |
| 200 tmp7 = z11 + z13; | |
| 201 tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); | |
| 202 | |
| 203 z5 = MULTIPLY(z10 + z12, FIX_1_847759065); | |
| 204 tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; | |
| 205 tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; | |
| 206 | |
| 207 tmp6 = tmp12 - tmp7; | |
| 208 tmp5 = tmp11 - tmp6; | |
| 209 tmp4 = tmp10 + tmp5; | |
| 210 | |
| 211 block[0 + i] = (tmp0 + tmp7)>>6; | |
| 212 block[7 + i] = (tmp0 - tmp7)>>6; | |
| 213 block[1 + i] = (tmp1 + tmp6)>>6; | |
| 214 block[6 + i] = (tmp1 - tmp6)>>6; | |
| 215 block[2 + i] = (tmp2 + tmp5)>>6; | |
| 216 block[5 + i] = (tmp2 - tmp5)>>6; | |
| 217 block[4 + i] = (tmp3 + tmp4)>>6; | |
| 218 block[3 + i] = (tmp3 - tmp4)>>6; | |
| 219 } | |
| 220 } | |
| 221 | |
| 222 static void init_vlcs(FourXContext *f){ | |
| 223 int i; | |
| 224 | |
|
2370
26560d4fdb1f
Memory leak fix patch by (Burkhard Plaum <plaum >at< ipf.uni-stuttgart )dot( de>)
michael
parents:
2028
diff
changeset
|
225 for(i=0; i<4; i++){ |
| 2967 | 226 init_vlc(&block_type_vlc[i], BLOCK_TYPE_VLC_BITS, 7, |
|
2370
26560d4fdb1f
Memory leak fix patch by (Burkhard Plaum <plaum >at< ipf.uni-stuttgart )dot( de>)
michael
parents:
2028
diff
changeset
|
227 &block_type_tab[i][0][1], 2, 1, |
|
26560d4fdb1f
Memory leak fix patch by (Burkhard Plaum <plaum >at< ipf.uni-stuttgart )dot( de>)
michael
parents:
2028
diff
changeset
|
228 &block_type_tab[i][0][0], 2, 1, 1); |
| 1293 | 229 } |
| 230 } | |
| 231 | |
| 232 static void init_mv(FourXContext *f){ | |
| 233 int i; | |
| 234 | |
| 235 for(i=0; i<256; i++){ | |
| 236 f->mv[i] = mv[i][0] + mv[i][1]*f->current_picture.linesize[0]/2; | |
| 237 } | |
| 238 } | |
| 239 | |
| 240 static inline void mcdc(uint16_t *dst, uint16_t *src, int log2w, int h, int stride, int scale, int dc){ | |
| 241 int i; | |
| 242 dc*= 0x10001; | |
| 243 | |
| 244 switch(log2w){ | |
| 245 case 0: | |
| 246 for(i=0; i<h; i++){ | |
| 247 dst[0] = scale*src[0] + dc; | |
| 248 if(scale) src += stride; | |
| 249 dst += stride; | |
| 250 } | |
| 251 break; | |
| 252 case 1: | |
| 253 for(i=0; i<h; i++){ | |
| 254 ((uint32_t*)dst)[0] = scale*((uint32_t*)src)[0] + dc; | |
| 255 if(scale) src += stride; | |
| 256 dst += stride; | |
| 257 } | |
| 258 break; | |
| 259 case 2: | |
| 260 for(i=0; i<h; i++){ | |
| 261 ((uint32_t*)dst)[0] = scale*((uint32_t*)src)[0] + dc; | |
| 262 ((uint32_t*)dst)[1] = scale*((uint32_t*)src)[1] + dc; | |
| 263 if(scale) src += stride; | |
| 264 dst += stride; | |
| 265 } | |
| 266 break; | |
| 267 case 3: | |
| 268 for(i=0; i<h; i++){ | |
| 269 ((uint32_t*)dst)[0] = scale*((uint32_t*)src)[0] + dc; | |
| 270 ((uint32_t*)dst)[1] = scale*((uint32_t*)src)[1] + dc; | |
| 271 ((uint32_t*)dst)[2] = scale*((uint32_t*)src)[2] + dc; | |
| 272 ((uint32_t*)dst)[3] = scale*((uint32_t*)src)[3] + dc; | |
| 273 if(scale) src += stride; | |
| 274 dst += stride; | |
| 275 } | |
| 276 break; | |
| 277 default: assert(0); | |
| 278 } | |
| 279 } | |
| 280 | |
| 281 static void decode_p_block(FourXContext *f, uint16_t *dst, uint16_t *src, int log2w, int log2h, int stride){ | |
| 282 const int index= size2index[log2h][log2w]; | |
| 283 const int h= 1<<log2h; | |
| 284 int code= get_vlc2(&f->gb, block_type_vlc[index].table, BLOCK_TYPE_VLC_BITS, 1); | |
| 2967 | 285 |
| 1293 | 286 assert(code>=0 && code<=6); |
| 287 | |
| 288 if(code == 0){ | |
| 289 src += f->mv[ *f->bytestream++ ]; | |
| 290 mcdc(dst, src, log2w, h, stride, 1, 0); | |
| 291 }else if(code == 1){ | |
| 292 log2h--; | |
| 293 decode_p_block(f, dst , src , log2w, log2h, stride); | |
| 294 decode_p_block(f, dst + (stride<<log2h), src + (stride<<log2h), log2w, log2h, stride); | |
| 295 }else if(code == 2){ | |
| 296 log2w--; | |
| 297 decode_p_block(f, dst , src , log2w, log2h, stride); | |
| 298 decode_p_block(f, dst + (1<<log2w), src + (1<<log2w), log2w, log2h, stride); | |
| 299 }else if(code == 4){ | |
| 300 src += f->mv[ *f->bytestream++ ]; | |
| 301 mcdc(dst, src, log2w, h, stride, 1, le2me_16(*f->wordstream++)); | |
| 302 }else if(code == 5){ | |
| 303 mcdc(dst, src, log2w, h, stride, 0, le2me_16(*f->wordstream++)); | |
| 304 }else if(code == 6){ | |
| 305 if(log2w){ | |
| 306 dst[0] = le2me_16(*f->wordstream++); | |
| 307 dst[1] = le2me_16(*f->wordstream++); | |
| 308 }else{ | |
| 309 dst[0 ] = le2me_16(*f->wordstream++); | |
| 310 dst[stride] = le2me_16(*f->wordstream++); | |
| 311 } | |
| 312 } | |
| 313 } | |
| 314 | |
| 315 static int get32(void *p){ | |
| 316 return le2me_32(*(uint32_t*)p); | |
| 317 } | |
| 318 | |
| 319 static int decode_p_frame(FourXContext *f, uint8_t *buf, int length){ | |
| 320 int x, y; | |
| 321 const int width= f->avctx->width; | |
| 322 const int height= f->avctx->height; | |
| 323 uint16_t *src= (uint16_t*)f->last_picture.data[0]; | |
| 324 uint16_t *dst= (uint16_t*)f->current_picture.data[0]; | |
| 325 const int stride= f->current_picture.linesize[0]>>1; | |
| 2422 | 326 const unsigned int bitstream_size= get32(buf+8); |
| 327 const unsigned int bytestream_size= get32(buf+16); | |
| 328 const unsigned int wordstream_size= get32(buf+12); | |
| 2967 | 329 |
| 2422 | 330 if(bitstream_size+ bytestream_size+ wordstream_size + 20 != length |
| 331 || bitstream_size > (1<<26) | |
| 332 || bytestream_size > (1<<26) | |
| 333 || wordstream_size > (1<<26) | |
| 334 ){ | |
| 2967 | 335 av_log(f->avctx, AV_LOG_ERROR, "lengths %d %d %d %d\n", bitstream_size, bytestream_size, wordstream_size, |
| 1293 | 336 bitstream_size+ bytestream_size+ wordstream_size - length); |
| 2422 | 337 return -1; |
| 338 } | |
| 2967 | 339 |
| 1293 | 340 f->bitstream_buffer= av_fast_realloc(f->bitstream_buffer, &f->bitstream_buffer_size, bitstream_size + FF_INPUT_BUFFER_PADDING_SIZE); |
| 341 f->dsp.bswap_buf((uint32_t*)f->bitstream_buffer, (uint32_t*)(buf + 20), bitstream_size/4); | |
| 342 init_get_bits(&f->gb, f->bitstream_buffer, 8*bitstream_size); | |
| 343 | |
| 344 f->wordstream= (uint16_t*)(buf + 20 + bitstream_size); | |
| 345 f->bytestream= buf + 20 + bitstream_size + wordstream_size; | |
| 2967 | 346 |
| 1293 | 347 init_mv(f); |
| 2967 | 348 |
| 1293 | 349 for(y=0; y<height; y+=8){ |
| 350 for(x=0; x<width; x+=8){ | |
| 351 decode_p_block(f, dst + x, src + x, 3, 3, stride); | |
| 352 } | |
| 2967 | 353 src += 8*stride; |
| 354 dst += 8*stride; | |
| 1293 | 355 } |
| 2967 | 356 |
| 1293 | 357 if(bitstream_size != (get_bits_count(&f->gb)+31)/32*4) |
| 2967 | 358 av_log(f->avctx, AV_LOG_ERROR, " %d %td %td bytes left\n", |
| 359 bitstream_size - (get_bits_count(&f->gb)+31)/32*4, | |
| 1293 | 360 bytestream_size - (f->bytestream - (buf + 20 + bitstream_size + wordstream_size)), |
| 361 wordstream_size - (((uint8_t*)f->wordstream) - (buf + 20 + bitstream_size)) | |
| 362 ); | |
| 2967 | 363 |
| 1293 | 364 return 0; |
| 365 } | |
| 366 | |
| 367 /** | |
| 368 * decode block and dequantize. | |
| 369 * Note this is allmost identical to mjpeg | |
| 370 */ | |
| 371 static int decode_i_block(FourXContext *f, DCTELEM *block){ | |
| 372 int code, i, j, level, val; | |
| 373 | |
| 374 /* DC coef */ | |
| 375 val = get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3); | |
| 376 if (val>>4){ | |
|
1598
932d306bf1dc
av_log() patch by (Michel Bardiaux <mbardiaux at peaktime dot be>)
michael
parents:
1514
diff
changeset
|
377 av_log(f->avctx, AV_LOG_ERROR, "error dc run != 0\n"); |
| 1293 | 378 } |
| 379 | |
| 380 if(val) | |
| 381 val = get_xbits(&f->gb, val); | |
| 382 | |
| 383 val = val * dequant_table[0] + f->last_dc; | |
| 384 f->last_dc = | |
| 385 block[0] = val; | |
| 386 /* AC coefs */ | |
| 387 i = 1; | |
| 388 for(;;) { | |
| 389 code = get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3); | |
| 2967 | 390 |
| 1293 | 391 /* EOB */ |
| 392 if (code == 0) | |
| 393 break; | |
| 394 if (code == 0xf0) { | |
| 395 i += 16; | |
| 396 } else { | |
| 397 level = get_xbits(&f->gb, code & 0xf); | |
| 398 i += code >> 4; | |
| 399 if (i >= 64) { | |
|
1598
932d306bf1dc
av_log() patch by (Michel Bardiaux <mbardiaux at peaktime dot be>)
michael
parents:
1514
diff
changeset
|
400 av_log(f->avctx, AV_LOG_ERROR, "run %d oveflow\n", i); |
| 1293 | 401 return 0; |
| 402 } | |
| 403 | |
| 404 j= ff_zigzag_direct[i]; | |
| 405 block[j] = level * dequant_table[j]; | |
| 406 i++; | |
| 407 if (i >= 64) | |
| 408 break; | |
| 409 } | |
| 410 } | |
| 411 | |
| 412 return 0; | |
| 413 } | |
| 414 | |
| 415 static inline void idct_put(FourXContext *f, int x, int y){ | |
| 416 DCTELEM (*block)[64]= f->block; | |
| 417 int stride= f->current_picture.linesize[0]>>1; | |
| 418 int i; | |
| 419 uint16_t *dst = ((uint16_t*)f->current_picture.data[0]) + y * stride + x; | |
| 2967 | 420 |
| 1295 | 421 for(i=0; i<4; i++){ |
| 422 block[i][0] += 0x80*8*8; | |
| 423 idct(block[i]); | |
| 424 } | |
| 1293 | 425 |
| 1295 | 426 if(!(f->avctx->flags&CODEC_FLAG_GRAY)){ |
| 427 for(i=4; i<6; i++) idct(block[i]); | |
| 428 } | |
| 1293 | 429 |
| 1514 | 430 /* Note transform is: |
| 431 y= ( 1b + 4g + 2r)/14 | |
| 432 cb=( 3b - 2g - 1r)/14 | |
| 433 cr=(-1b - 4g + 5r)/14 | |
| 2967 | 434 */ |
| 1293 | 435 for(y=0; y<8; y++){ |
| 436 for(x=0; x<8; x++){ | |
| 437 DCTELEM *temp= block[(x>>2) + 2*(y>>2)] + 2*(x&3) + 2*8*(y&3); //FIXME optimize | |
| 1295 | 438 int cb= block[4][x + 8*y]; |
| 439 int cr= block[5][x + 8*y]; | |
| 1293 | 440 int cg= (cb + cr)>>1; |
| 441 int y; | |
| 2967 | 442 |
| 1295 | 443 cb+=cb; |
| 2967 | 444 |
| 1293 | 445 y = temp[0]; |
| 1295 | 446 dst[0 ]= ((y+cb)>>3) + (((y-cg)&0xFC)<<3) + (((y+cr)&0xF8)<<8); |
| 1293 | 447 y = temp[1]; |
| 1295 | 448 dst[1 ]= ((y+cb)>>3) + (((y-cg)&0xFC)<<3) + (((y+cr)&0xF8)<<8); |
| 1293 | 449 y = temp[8]; |
| 1295 | 450 dst[ stride]= ((y+cb)>>3) + (((y-cg)&0xFC)<<3) + (((y+cr)&0xF8)<<8); |
| 1293 | 451 y = temp[9]; |
| 1295 | 452 dst[1+stride]= ((y+cb)>>3) + (((y-cg)&0xFC)<<3) + (((y+cr)&0xF8)<<8); |
| 1293 | 453 dst += 2; |
| 454 } | |
| 455 dst += 2*stride - 2*8; | |
| 456 } | |
| 457 } | |
| 458 | |
| 459 static int decode_i_mb(FourXContext *f){ | |
| 460 int i; | |
| 2967 | 461 |
| 1293 | 462 f->dsp.clear_blocks(f->block[0]); |
| 2967 | 463 |
| 1293 | 464 for(i=0; i<6; i++){ |
| 465 if(decode_i_block(f, f->block[i]) < 0) | |
| 466 return -1; | |
| 467 } | |
| 2967 | 468 |
| 1293 | 469 return 0; |
| 470 } | |
| 471 | |
| 472 static uint8_t *read_huffman_tables(FourXContext *f, uint8_t * const buf){ | |
| 473 int frequency[512]; | |
| 474 uint8_t flag[512]; | |
| 475 int up[512]; | |
| 476 uint8_t len_tab[257]; | |
| 477 int bits_tab[257]; | |
| 478 int start, end; | |
| 479 uint8_t *ptr= buf; | |
| 480 int j; | |
| 2967 | 481 |
| 1293 | 482 memset(frequency, 0, sizeof(frequency)); |
| 483 memset(up, -1, sizeof(up)); | |
| 484 | |
| 485 start= *ptr++; | |
| 486 end= *ptr++; | |
| 487 for(;;){ | |
| 488 int i; | |
| 2967 | 489 |
| 1293 | 490 for(i=start; i<=end; i++){ |
| 491 frequency[i]= *ptr++; | |
| 492 // printf("%d %d %d\n", start, end, frequency[i]); | |
| 493 } | |
| 494 start= *ptr++; | |
| 495 if(start==0) break; | |
| 2967 | 496 |
| 1293 | 497 end= *ptr++; |
| 498 } | |
| 499 frequency[256]=1; | |
| 500 | |
| 2967 | 501 while((ptr - buf)&3) ptr++; // 4byte align |
| 1293 | 502 |
| 503 // for(j=0; j<16; j++) | |
| 504 // printf("%2X", ptr[j]); | |
| 2967 | 505 |
| 1293 | 506 for(j=257; j<512; j++){ |
| 1294 | 507 int min_freq[2]= {256*256, 256*256}; |
| 508 int smallest[2]= {0, 0}; | |
| 1293 | 509 int i; |
| 510 for(i=0; i<j; i++){ | |
| 511 if(frequency[i] == 0) continue; | |
| 1294 | 512 if(frequency[i] < min_freq[1]){ |
| 513 if(frequency[i] < min_freq[0]){ | |
| 514 min_freq[1]= min_freq[0]; smallest[1]= smallest[0]; | |
| 515 min_freq[0]= frequency[i];smallest[0]= i; | |
| 516 }else{ | |
| 517 min_freq[1]= frequency[i];smallest[1]= i; | |
| 518 } | |
| 1293 | 519 } |
| 520 } | |
| 1294 | 521 if(min_freq[1] == 256*256) break; |
| 2967 | 522 |
| 1294 | 523 frequency[j]= min_freq[0] + min_freq[1]; |
| 1293 | 524 flag[ smallest[0] ]= 0; |
| 525 flag[ smallest[1] ]= 1; | |
| 2967 | 526 up[ smallest[0] ]= |
| 1293 | 527 up[ smallest[1] ]= j; |
| 528 frequency[ smallest[0] ]= frequency[ smallest[1] ]= 0; | |
| 529 } | |
| 530 | |
| 531 for(j=0; j<257; j++){ | |
| 532 int node; | |
| 533 int len=0; | |
| 534 int bits=0; | |
| 535 | |
| 536 for(node= j; up[node] != -1; node= up[node]){ | |
| 537 bits += flag[node]<<len; | |
| 538 len++; | |
|
1598
932d306bf1dc
av_log() patch by (Michel Bardiaux <mbardiaux at peaktime dot be>)
michael
parents:
1514
diff
changeset
|
539 if(len > 31) av_log(f->avctx, AV_LOG_ERROR, "vlc length overflow\n"); //can this happen at all ? |
| 1293 | 540 } |
| 2967 | 541 |
| 1293 | 542 bits_tab[j]= bits; |
| 543 len_tab[j]= len; | |
| 544 } | |
| 2967 | 545 |
| 546 init_vlc(&f->pre_vlc, ACDC_VLC_BITS, 257, | |
| 1293 | 547 len_tab , 1, 1, |
|
2370
26560d4fdb1f
Memory leak fix patch by (Burkhard Plaum <plaum >at< ipf.uni-stuttgart )dot( de>)
michael
parents:
2028
diff
changeset
|
548 bits_tab, 4, 4, 0); |
| 2967 | 549 |
| 1293 | 550 return ptr; |
| 551 } | |
| 552 | |
| 553 static int decode_i_frame(FourXContext *f, uint8_t *buf, int length){ | |
| 554 int x, y; | |
| 555 const int width= f->avctx->width; | |
| 556 const int height= f->avctx->height; | |
| 557 uint16_t *dst= (uint16_t*)f->current_picture.data[0]; | |
| 558 const int stride= f->current_picture.linesize[0]>>1; | |
| 2422 | 559 const unsigned int bitstream_size= get32(buf); |
| 1478 | 560 const int token_count __attribute__((unused)) = get32(buf + bitstream_size + 8); |
| 2422 | 561 unsigned int prestream_size= 4*get32(buf + bitstream_size + 4); |
| 1293 | 562 uint8_t *prestream= buf + bitstream_size + 12; |
| 2967 | 563 |
| 2422 | 564 if(prestream_size + bitstream_size + 12 != length |
| 565 || bitstream_size > (1<<26) | |
| 566 || prestream_size > (1<<26)){ | |
|
2628
511e3afc43e1
Ministry of English Composition, reporting for duty (and the word is "skipped", not "skiped"; "skiped" would rhyme with "hyped")
melanson
parents:
2453
diff
changeset
|
567 av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d %d\n", prestream_size, bitstream_size, length); |
| 2422 | 568 return -1; |
| 569 } | |
| 2967 | 570 |
| 1293 | 571 prestream= read_huffman_tables(f, prestream); |
| 572 | |
| 573 init_get_bits(&f->gb, buf + 4, 8*bitstream_size); | |
| 574 | |
| 575 prestream_size= length + buf - prestream; | |
| 1294 | 576 |
| 1293 | 577 f->bitstream_buffer= av_fast_realloc(f->bitstream_buffer, &f->bitstream_buffer_size, prestream_size + FF_INPUT_BUFFER_PADDING_SIZE); |
| 578 f->dsp.bswap_buf((uint32_t*)f->bitstream_buffer, (uint32_t*)prestream, prestream_size/4); | |
| 579 init_get_bits(&f->pre_gb, f->bitstream_buffer, 8*prestream_size); | |
| 580 | |
| 581 f->last_dc= 0*128*8*8; | |
| 2967 | 582 |
| 1293 | 583 for(y=0; y<height; y+=16){ |
| 584 for(x=0; x<width; x+=16){ | |
| 585 if(decode_i_mb(f) < 0) | |
| 586 return -1; | |
| 587 | |
| 588 idct_put(f, x, y); | |
| 589 } | |
| 2967 | 590 dst += 16*stride; |
| 1293 | 591 } |
| 592 | |
| 593 if(get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3) != 256) | |
|
2628
511e3afc43e1
Ministry of English Composition, reporting for duty (and the word is "skipped", not "skiped"; "skiped" would rhyme with "hyped")
melanson
parents:
2453
diff
changeset
|
594 av_log(f->avctx, AV_LOG_ERROR, "end mismatch\n"); |
| 2967 | 595 |
| 1293 | 596 return 0; |
| 597 } | |
| 598 | |
| 2967 | 599 static int decode_frame(AVCodecContext *avctx, |
| 1293 | 600 void *data, int *data_size, |
| 601 uint8_t *buf, int buf_size) | |
| 602 { | |
| 603 FourXContext * const f = avctx->priv_data; | |
| 604 AVFrame *picture = data; | |
| 605 AVFrame *p, temp; | |
| 606 int i, frame_4cc, frame_size; | |
| 607 | |
| 608 frame_4cc= get32(buf); | |
|
3303
68721b62a528
sanity checks, some might have been exploitable ...
michael
parents:
3089
diff
changeset
|
609 if(buf_size != get32(buf+4)+8 || buf_size < 20){ |
|
2628
511e3afc43e1
Ministry of English Composition, reporting for duty (and the word is "skipped", not "skiped"; "skiped" would rhyme with "hyped")
melanson
parents:
2453
diff
changeset
|
610 av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d\n", buf_size, get32(buf+4)); |
| 1293 | 611 } |
| 612 | |
| 613 if(frame_4cc == ff_get_fourcc("cfrm")){ | |
| 614 int free_index=-1; | |
| 615 const int data_size= buf_size - 20; | |
| 616 const int id= get32(buf+12); | |
| 617 const int whole_size= get32(buf+16); | |
| 618 CFrameBuffer *cfrm; | |
| 619 | |
| 620 for(i=0; i<CFRAME_BUFFER_COUNT; i++){ | |
| 621 if(f->cfrm[i].id && f->cfrm[i].id < avctx->frame_number) | |
|
1598
932d306bf1dc
av_log() patch by (Michel Bardiaux <mbardiaux at peaktime dot be>)
michael
parents:
1514
diff
changeset
|
622 av_log(f->avctx, AV_LOG_ERROR, "lost c frame %d\n", f->cfrm[i].id); |
| 1293 | 623 } |
| 2967 | 624 |
| 1293 | 625 for(i=0; i<CFRAME_BUFFER_COUNT; i++){ |
| 626 if(f->cfrm[i].id == id) break; | |
| 627 if(f->cfrm[i].size == 0 ) free_index= i; | |
| 628 } | |
| 629 | |
| 630 if(i>=CFRAME_BUFFER_COUNT){ | |
| 631 i= free_index; | |
| 632 f->cfrm[i].id= id; | |
| 633 } | |
| 634 cfrm= &f->cfrm[i]; | |
| 2967 | 635 |
| 1293 | 636 cfrm->data= av_fast_realloc(cfrm->data, &cfrm->allocated_size, cfrm->size + data_size + FF_INPUT_BUFFER_PADDING_SIZE); |
|
3303
68721b62a528
sanity checks, some might have been exploitable ...
michael
parents:
3089
diff
changeset
|
637 if(!cfrm->data){ //explicit check needed as memcpy below might not catch a NULL |
|
68721b62a528
sanity checks, some might have been exploitable ...
michael
parents:
3089
diff
changeset
|
638 av_log(f->avctx, AV_LOG_ERROR, "realloc falure"); |
|
68721b62a528
sanity checks, some might have been exploitable ...
michael
parents:
3089
diff
changeset
|
639 return -1; |
|
68721b62a528
sanity checks, some might have been exploitable ...
michael
parents:
3089
diff
changeset
|
640 } |
| 2967 | 641 |
| 1293 | 642 memcpy(cfrm->data + cfrm->size, buf+20, data_size); |
| 643 cfrm->size += data_size; | |
| 2967 | 644 |
| 1293 | 645 if(cfrm->size >= whole_size){ |
| 646 buf= cfrm->data; | |
| 647 frame_size= cfrm->size; | |
| 2967 | 648 |
| 1293 | 649 if(id != avctx->frame_number){ |
|
2628
511e3afc43e1
Ministry of English Composition, reporting for duty (and the word is "skipped", not "skiped"; "skiped" would rhyme with "hyped")
melanson
parents:
2453
diff
changeset
|
650 av_log(f->avctx, AV_LOG_ERROR, "cframe id mismatch %d %d\n", id, avctx->frame_number); |
| 1293 | 651 } |
| 2967 | 652 |
| 1293 | 653 cfrm->size= cfrm->id= 0; |
| 654 frame_4cc= ff_get_fourcc("pfrm"); | |
| 655 }else | |
| 656 return buf_size; | |
| 657 }else{ | |
| 658 buf= buf + 12; | |
| 659 frame_size= buf_size - 12; | |
| 2967 | 660 } |
| 1293 | 661 |
| 662 temp= f->current_picture; | |
| 663 f->current_picture= f->last_picture; | |
| 664 f->last_picture= temp; | |
| 665 | |
| 666 p= &f->current_picture; | |
| 667 avctx->coded_frame= p; | |
| 668 | |
| 669 avctx->flags |= CODEC_FLAG_EMU_EDGE; // alternatively we would have to use our own buffer management | |
| 670 | |
| 671 if(p->data[0]) | |
| 672 avctx->release_buffer(avctx, p); | |
| 673 | |
| 674 p->reference= 1; | |
| 675 if(avctx->get_buffer(avctx, p) < 0){ | |
|
1598
932d306bf1dc
av_log() patch by (Michel Bardiaux <mbardiaux at peaktime dot be>)
michael
parents:
1514
diff
changeset
|
676 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); |
| 1293 | 677 return -1; |
| 678 } | |
| 679 | |
| 680 if(frame_4cc == ff_get_fourcc("ifrm")){ | |
| 681 p->pict_type= I_TYPE; | |
| 682 if(decode_i_frame(f, buf, frame_size) < 0) | |
| 683 return -1; | |
| 684 }else if(frame_4cc == ff_get_fourcc("pfrm")){ | |
| 685 p->pict_type= P_TYPE; | |
| 686 if(decode_p_frame(f, buf, frame_size) < 0) | |
| 687 return -1; | |
| 688 }else if(frame_4cc == ff_get_fourcc("snd_")){ | |
|
1598
932d306bf1dc
av_log() patch by (Michel Bardiaux <mbardiaux at peaktime dot be>)
michael
parents:
1514
diff
changeset
|
689 av_log(avctx, AV_LOG_ERROR, "ignoring snd_ chunk length:%d\n", buf_size); |
| 1293 | 690 }else{ |
|
1598
932d306bf1dc
av_log() patch by (Michel Bardiaux <mbardiaux at peaktime dot be>)
michael
parents:
1514
diff
changeset
|
691 av_log(avctx, AV_LOG_ERROR, "ignoring unknown chunk length:%d\n", buf_size); |
| 1293 | 692 } |
| 693 | |
| 694 #if 0 | |
| 695 for(i=0; i<20; i++){ | |
| 696 printf("%2X %c ", buf[i], clip(buf[i],16,126)); | |
| 697 } | |
| 698 #endif | |
| 699 | |
| 700 p->key_frame= p->pict_type == I_TYPE; | |
| 701 | |
| 702 *picture= *p; | |
| 703 *data_size = sizeof(AVPicture); | |
| 704 | |
| 705 emms_c(); | |
| 2967 | 706 |
| 1293 | 707 return buf_size; |
| 708 } | |
| 709 | |
| 710 | |
| 711 static void common_init(AVCodecContext *avctx){ | |
| 712 FourXContext * const f = avctx->priv_data; | |
| 713 | |
| 714 dsputil_init(&f->dsp, avctx); | |
| 715 | |
| 716 f->avctx= avctx; | |
| 717 } | |
| 718 | |
| 719 static int decode_init(AVCodecContext *avctx){ | |
| 720 FourXContext * const f = avctx->priv_data; | |
| 2967 | 721 |
| 1293 | 722 common_init(avctx); |
| 723 init_vlcs(f); | |
| 724 | |
| 725 avctx->pix_fmt= PIX_FMT_RGB565; | |
| 726 | |
| 727 return 0; | |
| 728 } | |
| 729 | |
| 730 | |
| 731 static int decode_end(AVCodecContext *avctx){ | |
| 732 FourXContext * const f = avctx->priv_data; | |
| 733 int i; | |
| 734 | |
| 735 av_freep(&f->bitstream_buffer); | |
| 736 f->bitstream_buffer_size=0; | |
| 737 for(i=0; i<CFRAME_BUFFER_COUNT; i++){ | |
| 738 av_freep(&f->cfrm[i].data); | |
| 739 f->cfrm[i].allocated_size= 0; | |
| 740 } | |
| 741 free_vlc(&f->pre_vlc); | |
| 2967 | 742 |
| 1293 | 743 return 0; |
| 744 } | |
| 745 | |
| 746 AVCodec fourxm_decoder = { | |
| 747 "4xm", | |
| 748 CODEC_TYPE_VIDEO, | |
| 749 CODEC_ID_4XM, | |
| 750 sizeof(FourXContext), | |
| 751 decode_init, | |
| 752 NULL, | |
| 753 decode_end, | |
| 754 decode_frame, | |
| 755 /*CODEC_CAP_DR1,*/ | |
| 756 }; | |
| 757 |
