Mercurial > libavcodec.hg
annotate eatgq.c @ 12510:ef2f2db5b7be libavcodec
Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
code directly also and remove loop setup. 20% faster in function, 0.8% overall.
See "[PATCH] unroll loop in h264_idct_add8_sse2()" thread on ML.
| author | rbultje |
|---|---|
| date | Fri, 24 Sep 2010 14:05:45 +0000 |
| parents | 7dd2a45249a9 |
| children |
| rev | line source |
|---|---|
| 8119 | 1 /* |
| 2 * Electronic Arts TGQ Video Decoder | |
| 3 * Copyright (c) 2007-2008 Peter Ross <pross@xvid.org> | |
| 4 * | |
| 5 * This file is part of FFmpeg. | |
| 6 * | |
| 7 * FFmpeg is free software; you can redistribute it and/or | |
| 8 * modify it under the terms of the GNU Lesser General Public | |
| 9 * License as published by the Free Software Foundation; either | |
| 10 * version 2.1 of the License, or (at your option) any later version. | |
| 11 * | |
| 12 * FFmpeg is distributed in the hope that it will be useful, | |
| 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 15 * Lesser General Public License for more details. | |
| 16 * | |
| 17 * You should have received a copy of the GNU Lesser General Public | |
| 18 * License along with FFmpeg; if not, write to the Free Software | |
| 19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
| 20 */ | |
| 21 | |
| 22 /** | |
|
11644
7dd2a45249a9
Remove explicit filename from Doxygen @file commands.
diego
parents:
11560
diff
changeset
|
23 * @file |
| 8119 | 24 * Electronic Arts TGQ Video Decoder |
| 25 * @author Peter Ross <pross@xvid.org> | |
| 26 * | |
| 27 * Technical details here: | |
| 28 * http://wiki.multimedia.cx/index.php?title=Electronic_Arts_TGQ | |
| 29 */ | |
| 30 | |
| 31 #include "avcodec.h" | |
| 32 #define ALT_BITSTREAM_READER_LE | |
| 9428 | 33 #include "get_bits.h" |
| 8119 | 34 #include "bytestream.h" |
| 35 #include "dsputil.h" | |
|
8218
03054192daac
Move aanscales tables to their own file; fixes compilation without encoders.
diego
parents:
8119
diff
changeset
|
36 #include "aandcttab.h" |
| 8119 | 37 |
| 38 typedef struct TgqContext { | |
| 39 AVCodecContext *avctx; | |
| 40 DSPContext dsp; | |
| 41 AVFrame frame; | |
| 42 int width,height; | |
| 43 ScanTable scantable; | |
| 44 int qtable[64]; | |
| 11369 | 45 DECLARE_ALIGNED(16, DCTELEM, block)[6][64]; |
| 8119 | 46 } TgqContext; |
| 47 | |
| 48 static av_cold int tgq_decode_init(AVCodecContext *avctx){ | |
| 49 TgqContext *s = avctx->priv_data; | |
| 50 s->avctx = avctx; | |
| 51 if(avctx->idct_algo==FF_IDCT_AUTO) | |
| 52 avctx->idct_algo=FF_IDCT_EA; | |
| 53 dsputil_init(&s->dsp, avctx); | |
| 54 ff_init_scantable(s->dsp.idct_permutation, &s->scantable, ff_zigzag_direct); | |
| 55 avctx->time_base = (AVRational){1, 15}; | |
| 56 avctx->pix_fmt = PIX_FMT_YUV420P; | |
| 57 return 0; | |
| 58 } | |
| 59 | |
| 60 static void tgq_decode_block(TgqContext *s, DCTELEM block[64], GetBitContext *gb){ | |
| 61 uint8_t *perm = s->scantable.permutated; | |
| 62 int i,j,value; | |
| 63 block[0] = get_sbits(gb,8) * s->qtable[0]; | |
| 64 for(i=1; i<64; ) { | |
| 65 switch(show_bits(gb,3)) { | |
| 66 case 4: | |
| 67 block[perm[i++]] = 0; | |
| 68 case 0: | |
| 69 block[perm[i++]] = 0; | |
| 70 skip_bits(gb,3); | |
| 71 break; | |
| 72 case 5: | |
| 73 case 1: | |
| 74 skip_bits(gb,2); | |
| 75 value = get_bits(gb,6); | |
| 76 for(j=0; j<value; j++) | |
| 77 block[perm[i++]] = 0; | |
| 78 break; | |
| 79 case 6: | |
| 80 skip_bits(gb,3); | |
| 81 block[perm[i]] = -s->qtable[perm[i]]; | |
| 82 i++; | |
| 83 break; | |
| 84 case 2: | |
| 85 skip_bits(gb,3); | |
| 86 block[perm[i]] = s->qtable[perm[i]]; | |
| 87 i++; | |
| 88 break; | |
| 89 case 7: // 111b | |
| 90 case 3: // 011b | |
| 91 skip_bits(gb,2); | |
| 92 if (show_bits(gb,6)==0x3F) { | |
| 93 skip_bits(gb, 6); | |
| 94 block[perm[i]] = get_sbits(gb,8)*s->qtable[perm[i]]; | |
| 95 }else{ | |
| 96 block[perm[i]] = get_sbits(gb,6)*s->qtable[perm[i]]; | |
| 97 } | |
| 98 i++; | |
| 99 break; | |
| 100 } | |
| 101 } | |
| 102 block[0] += 128<<4; | |
| 103 } | |
| 104 | |
| 105 static void tgq_idct_put_mb(TgqContext *s, DCTELEM (*block)[64], int mb_x, int mb_y){ | |
| 106 int linesize= s->frame.linesize[0]; | |
| 107 uint8_t *dest_y = s->frame.data[0] + (mb_y * 16* linesize ) + mb_x * 16; | |
| 108 uint8_t *dest_cb = s->frame.data[1] + (mb_y * 8 * s->frame.linesize[1]) + mb_x * 8; | |
| 109 uint8_t *dest_cr = s->frame.data[2] + (mb_y * 8 * s->frame.linesize[2]) + mb_x * 8; | |
| 110 | |
| 111 s->dsp.idct_put(dest_y , linesize, block[0]); | |
| 112 s->dsp.idct_put(dest_y + 8, linesize, block[1]); | |
| 113 s->dsp.idct_put(dest_y + 8*linesize , linesize, block[2]); | |
| 114 s->dsp.idct_put(dest_y + 8*linesize + 8, linesize, block[3]); | |
| 115 if(!(s->avctx->flags&CODEC_FLAG_GRAY)){ | |
| 116 s->dsp.idct_put(dest_cb, s->frame.linesize[1], block[4]); | |
| 117 s->dsp.idct_put(dest_cr, s->frame.linesize[2], block[5]); | |
| 118 } | |
| 119 } | |
| 120 | |
| 121 static inline void tgq_dconly(TgqContext *s, unsigned char *dst, int dst_stride, int dc){ | |
| 122 int level = av_clip_uint8((dc*s->qtable[0] + 2056)>>4); | |
| 123 int j; | |
| 124 for(j=0;j<8;j++) | |
| 125 memset(dst+j*dst_stride, level, 8); | |
| 126 } | |
| 127 | |
| 128 static void tgq_idct_put_mb_dconly(TgqContext *s, int mb_x, int mb_y, const int8_t *dc) | |
| 129 { | |
| 130 int linesize= s->frame.linesize[0]; | |
| 131 uint8_t *dest_y = s->frame.data[0] + (mb_y * 16* linesize ) + mb_x * 16; | |
| 132 uint8_t *dest_cb = s->frame.data[1] + (mb_y * 8 * s->frame.linesize[1]) + mb_x * 8; | |
| 133 uint8_t *dest_cr = s->frame.data[2] + (mb_y * 8 * s->frame.linesize[2]) + mb_x * 8; | |
| 134 tgq_dconly(s,dest_y , linesize, dc[0]); | |
| 135 tgq_dconly(s,dest_y + 8, linesize, dc[1]); | |
| 136 tgq_dconly(s,dest_y + 8*linesize , linesize, dc[2]); | |
| 137 tgq_dconly(s,dest_y + 8*linesize + 8, linesize, dc[3]); | |
| 138 if(!(s->avctx->flags&CODEC_FLAG_GRAY)) { | |
| 139 tgq_dconly(s,dest_cb, s->frame.linesize[1], dc[4]); | |
| 140 tgq_dconly(s,dest_cr, s->frame.linesize[2], dc[5]); | |
| 141 } | |
| 142 } | |
| 143 | |
|
9453
b5bc4764ff7a
Change tgq_decode_mb function arguments from using int8_t to uint8_t, this does
reimar
parents:
9428
diff
changeset
|
144 static void tgq_decode_mb(TgqContext *s, int mb_y, int mb_x, const uint8_t **bs, const uint8_t *buf_end){ |
| 8119 | 145 int mode; |
| 146 int i; | |
| 147 int8_t dc[6]; | |
| 148 | |
|
9453
b5bc4764ff7a
Change tgq_decode_mb function arguments from using int8_t to uint8_t, this does
reimar
parents:
9428
diff
changeset
|
149 mode = bytestream_get_byte(bs); |
| 8119 | 150 if (mode>buf_end-*bs) { |
| 151 av_log(s->avctx, AV_LOG_ERROR, "truncated macroblock\n"); | |
| 152 return; | |
| 153 } | |
| 154 | |
| 155 if (mode>12) { | |
| 156 GetBitContext gb; | |
| 157 init_get_bits(&gb, *bs, mode*8); | |
| 158 for(i=0; i<6; i++) | |
|
9685
ab8b3b2e4d49
Move eatgq blocks array from the stack to the codec context and make sure
reimar
parents:
9453
diff
changeset
|
159 tgq_decode_block(s, s->block[i], &gb); |
|
ab8b3b2e4d49
Move eatgq blocks array from the stack to the codec context and make sure
reimar
parents:
9453
diff
changeset
|
160 tgq_idct_put_mb(s, s->block, mb_x, mb_y); |
| 8119 | 161 }else{ |
| 162 if (mode==3) { | |
| 163 memset(dc, (*bs)[0], 4); | |
| 164 dc[4] = (*bs)[1]; | |
| 165 dc[5] = (*bs)[2]; | |
| 166 }else if (mode==6) { | |
| 167 memcpy(dc, *bs, 6); | |
| 168 }else if (mode==12) { | |
| 169 for(i=0; i<6; i++) | |
| 170 dc[i] = (*bs)[i*2]; | |
| 171 }else{ | |
| 172 av_log(s->avctx, AV_LOG_ERROR, "unsupported mb mode %i\n", mode); | |
| 173 } | |
| 174 tgq_idct_put_mb_dconly(s, mb_x, mb_y, dc); | |
| 175 } | |
| 176 *bs += mode; | |
| 177 } | |
| 178 | |
| 179 static void tgq_calculate_qtable(TgqContext *s, int quant){ | |
| 180 int i,j; | |
| 181 const int a = (14*(100-quant))/100 + 1; | |
| 182 const int b = (11*(100-quant))/100 + 4; | |
| 183 for(j=0;j<8;j++) | |
| 184 for(i=0;i<8;i++) | |
| 185 if (s->avctx->idct_algo==FF_IDCT_EA) | |
| 186 s->qtable[j*8+i] = ((a*(j+i)/(7+7) + b)*ff_inv_aanscales[j*8+i])>>(14-4); | |
| 187 else | |
| 188 s->qtable[j*8+i] = (a*(j+i)/(7+7) + b)<<3; | |
| 189 } | |
| 190 | |
| 191 static int tgq_decode_frame(AVCodecContext *avctx, | |
| 192 void *data, int *data_size, | |
|
9355
54bc8a2727b0
Implement avcodec_decode_video2(), _audio3() and _subtitle2() which takes an
rbultje
parents:
9083
diff
changeset
|
193 AVPacket *avpkt){ |
|
54bc8a2727b0
Implement avcodec_decode_video2(), _audio3() and _subtitle2() which takes an
rbultje
parents:
9083
diff
changeset
|
194 const uint8_t *buf = avpkt->data; |
|
54bc8a2727b0
Implement avcodec_decode_video2(), _audio3() and _subtitle2() which takes an
rbultje
parents:
9083
diff
changeset
|
195 int buf_size = avpkt->size; |
| 8119 | 196 const uint8_t *buf_start = buf; |
| 197 const uint8_t *buf_end = buf + buf_size; | |
| 198 TgqContext *s = avctx->priv_data; | |
| 199 int x,y; | |
| 200 | |
| 201 int big_endian = AV_RL32(&buf[4]) > 0x000FFFFF; | |
| 202 buf += 8; | |
| 203 | |
| 204 if(8>buf_end-buf) { | |
| 205 av_log(avctx, AV_LOG_WARNING, "truncated header\n"); | |
| 206 return -1; | |
| 207 } | |
| 208 s->width = big_endian ? AV_RB16(&buf[0]) : AV_RL16(&buf[0]); | |
| 209 s->height = big_endian ? AV_RB16(&buf[2]) : AV_RL16(&buf[2]); | |
| 210 | |
| 211 if (s->avctx->width!=s->width || s->avctx->height!=s->height) { | |
| 212 avcodec_set_dimensions(s->avctx, s->width, s->height); | |
| 213 if (s->frame.data[0]) | |
| 214 avctx->release_buffer(avctx, &s->frame); | |
| 215 } | |
| 216 tgq_calculate_qtable(s, buf[4]); | |
| 217 buf += 8; | |
| 218 | |
| 219 if (!s->frame.data[0]) { | |
| 220 s->frame.key_frame = 1; | |
| 221 s->frame.pict_type = FF_I_TYPE; | |
| 222 s->frame.buffer_hints = FF_BUFFER_HINTS_VALID; | |
| 223 if (avctx->get_buffer(avctx, &s->frame)) { | |
| 224 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); | |
| 225 return -1; | |
| 226 } | |
| 227 } | |
| 228 | |
| 229 for (y=0; y<(avctx->height+15)/16; y++) | |
| 230 for (x=0; x<(avctx->width+15)/16; x++) | |
|
9453
b5bc4764ff7a
Change tgq_decode_mb function arguments from using int8_t to uint8_t, this does
reimar
parents:
9428
diff
changeset
|
231 tgq_decode_mb(s, y, x, &buf, buf_end); |
| 8119 | 232 |
| 233 *data_size = sizeof(AVFrame); | |
| 234 *(AVFrame*)data = s->frame; | |
| 235 | |
| 236 return buf-buf_start; | |
| 237 } | |
| 238 | |
| 239 static av_cold int tgq_decode_end(AVCodecContext *avctx){ | |
| 240 TgqContext *s = avctx->priv_data; | |
| 241 if (s->frame.data[0]) | |
| 242 s->avctx->release_buffer(avctx, &s->frame); | |
| 243 return 0; | |
| 244 } | |
| 245 | |
| 246 AVCodec eatgq_decoder = { | |
| 247 "eatgq", | |
|
11560
8a4984c5cacc
Define AVMediaType enum, and use it instead of enum CodecType, which
stefano
parents:
11369
diff
changeset
|
248 AVMEDIA_TYPE_VIDEO, |
| 8119 | 249 CODEC_ID_TGQ, |
| 250 sizeof(TgqContext), | |
| 251 tgq_decode_init, | |
| 252 NULL, | |
| 253 tgq_decode_end, | |
| 254 tgq_decode_frame, | |
| 255 CODEC_CAP_DR1, | |
|
9083
bf274494b66e
Change a bunch of codec long_names to be more consistent and descriptive.
diego
parents:
8718
diff
changeset
|
256 .long_name = NULL_IF_CONFIG_SMALL("Electronic Arts TGQ video"), |
| 8119 | 257 }; |
