Mercurial > libavcodec.hg
annotate h264.h @ 6320:ffb2a7b80d6d libavcodec
ff_h264_idct8_add_sse2.
compared to mmx, 217->126 cycles on core2, 262->220 on k8.
| author | lorenm |
|---|---|
| date | Sun, 03 Feb 2008 07:05:11 +0000 |
| parents | 1d83e9c34641 |
| children | ecbe9565dcae |
| rev | line source |
|---|---|
| 4975 | 1 /* |
| 2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder | |
| 3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> | |
| 4 * | |
| 5 * This file is part of FFmpeg. | |
| 6 * | |
| 7 * FFmpeg is free software; you can redistribute it and/or | |
| 8 * modify it under the terms of the GNU Lesser General Public | |
| 9 * License as published by the Free Software Foundation; either | |
| 10 * version 2.1 of the License, or (at your option) any later version. | |
| 11 * | |
| 12 * FFmpeg is distributed in the hope that it will be useful, | |
| 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 15 * Lesser General Public License for more details. | |
| 16 * | |
| 17 * You should have received a copy of the GNU Lesser General Public | |
| 18 * License along with FFmpeg; if not, write to the Free Software | |
| 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
| 20 */ | |
| 21 | |
| 22 /** | |
| 23 * @file h264.h | |
| 24 * H.264 / AVC / MPEG4 part10 codec. | |
| 25 * @author Michael Niedermayer <michaelni@gmx.at> | |
| 26 */ | |
| 27 | |
|
5830
1d83e9c34641
Add FFMPEG_ prefix to all multiple inclusion guards.
diego
parents:
5801
diff
changeset
|
28 #ifndef FFMPEG_H264_H |
|
1d83e9c34641
Add FFMPEG_ prefix to all multiple inclusion guards.
diego
parents:
5801
diff
changeset
|
29 #define FFMPEG_H264_H |
| 4975 | 30 |
| 31 #include "dsputil.h" | |
| 32 #include "cabac.h" | |
| 33 #include "mpegvideo.h" | |
|
5638
4a26dc4ca11d
Move H.264 intra prediction functions into their own context
kostya
parents:
5231
diff
changeset
|
34 #include "h264pred.h" |
| 4975 | 35 |
| 36 #define interlaced_dct interlaced_dct_is_a_bad_name | |
| 5129 | 37 #define mb_intra mb_intra_is_not_initialized_see_mb_type |
| 4975 | 38 |
| 39 #define LUMA_DC_BLOCK_INDEX 25 | |
| 40 #define CHROMA_DC_BLOCK_INDEX 26 | |
| 41 | |
| 42 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8 | |
| 43 #define COEFF_TOKEN_VLC_BITS 8 | |
| 44 #define TOTAL_ZEROS_VLC_BITS 9 | |
| 45 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3 | |
| 46 #define RUN_VLC_BITS 3 | |
| 47 #define RUN7_VLC_BITS 6 | |
| 48 | |
| 49 #define MAX_SPS_COUNT 32 | |
| 50 #define MAX_PPS_COUNT 256 | |
| 51 | |
| 52 #define MAX_MMCO_COUNT 66 | |
| 53 | |
| 54 /* Compiling in interlaced support reduces the speed | |
| 55 * of progressive decoding by about 2%. */ | |
| 56 #define ALLOW_INTERLACE | |
| 57 | |
| 58 #ifdef ALLOW_INTERLACE | |
| 59 #define MB_MBAFF h->mb_mbaff | |
| 60 #define MB_FIELD h->mb_field_decoding_flag | |
| 61 #define FRAME_MBAFF h->mb_aff_frame | |
| 5801 | 62 #define FIELD_PICTURE (s->picture_structure != PICT_FRAME) |
| 4975 | 63 #else |
| 64 #define MB_MBAFF 0 | |
| 65 #define MB_FIELD 0 | |
| 66 #define FRAME_MBAFF 0 | |
| 5767 | 67 #define FIELD_PICTURE 0 |
| 4975 | 68 #undef IS_INTERLACED |
| 69 #define IS_INTERLACED(mb_type) 0 | |
| 70 #endif | |
|
5781
0b3aa6f4c313
Modifies macroblock addressing and current macroblock y-position for field decoding.
andoma
parents:
5772
diff
changeset
|
71 #define FIELD_OR_MBAFF_PICTURE (FRAME_MBAFF || FIELD_PICTURE) |
| 4975 | 72 |
| 73 /** | |
| 74 * Sequence parameter set | |
| 75 */ | |
| 76 typedef struct SPS{ | |
| 77 | |
| 78 int profile_idc; | |
| 79 int level_idc; | |
| 80 int transform_bypass; ///< qpprime_y_zero_transform_bypass_flag | |
| 81 int log2_max_frame_num; ///< log2_max_frame_num_minus4 + 4 | |
| 82 int poc_type; ///< pic_order_cnt_type | |
| 83 int log2_max_poc_lsb; ///< log2_max_pic_order_cnt_lsb_minus4 | |
| 84 int delta_pic_order_always_zero_flag; | |
| 85 int offset_for_non_ref_pic; | |
| 86 int offset_for_top_to_bottom_field; | |
| 87 int poc_cycle_length; ///< num_ref_frames_in_pic_order_cnt_cycle | |
| 88 int ref_frame_count; ///< num_ref_frames | |
| 89 int gaps_in_frame_num_allowed_flag; | |
|
5755
b45894d869da
Cosmetic preparations for h264/PAFF implementation.
andoma
parents:
5642
diff
changeset
|
90 int mb_width; ///< pic_width_in_mbs_minus1 + 1 |
|
b45894d869da
Cosmetic preparations for h264/PAFF implementation.
andoma
parents:
5642
diff
changeset
|
91 int mb_height; ///< pic_height_in_map_units_minus1 + 1 |
| 4975 | 92 int frame_mbs_only_flag; |
| 93 int mb_aff; ///<mb_adaptive_frame_field_flag | |
| 94 int direct_8x8_inference_flag; | |
| 95 int crop; ///< frame_cropping_flag | |
| 96 int crop_left; ///< frame_cropping_rect_left_offset | |
| 97 int crop_right; ///< frame_cropping_rect_right_offset | |
| 98 int crop_top; ///< frame_cropping_rect_top_offset | |
| 99 int crop_bottom; ///< frame_cropping_rect_bottom_offset | |
| 100 int vui_parameters_present_flag; | |
| 101 AVRational sar; | |
| 102 int timing_info_present_flag; | |
| 103 uint32_t num_units_in_tick; | |
| 104 uint32_t time_scale; | |
| 105 int fixed_frame_rate_flag; | |
| 106 short offset_for_ref_frame[256]; //FIXME dyn aloc? | |
| 107 int bitstream_restriction_flag; | |
| 108 int num_reorder_frames; | |
| 109 int scaling_matrix_present; | |
| 110 uint8_t scaling_matrix4[6][16]; | |
| 111 uint8_t scaling_matrix8[2][64]; | |
| 112 }SPS; | |
| 113 | |
| 114 /** | |
| 115 * Picture parameter set | |
| 116 */ | |
| 117 typedef struct PPS{ | |
| 118 unsigned int sps_id; | |
| 119 int cabac; ///< entropy_coding_mode_flag | |
| 120 int pic_order_present; ///< pic_order_present_flag | |
| 121 int slice_group_count; ///< num_slice_groups_minus1 + 1 | |
| 122 int mb_slice_group_map_type; | |
| 123 unsigned int ref_count[2]; ///< num_ref_idx_l0/1_active_minus1 + 1 | |
| 124 int weighted_pred; ///< weighted_pred_flag | |
| 125 int weighted_bipred_idc; | |
| 126 int init_qp; ///< pic_init_qp_minus26 + 26 | |
| 127 int init_qs; ///< pic_init_qs_minus26 + 26 | |
|
5231
07a97575d0c4
Add support for streams with different chroma_qp_index_offset
gpoirier
parents:
5226
diff
changeset
|
128 int chroma_qp_index_offset[2]; |
| 4975 | 129 int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag |
| 130 int constrained_intra_pred; ///< constrained_intra_pred_flag | |
| 131 int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag | |
| 132 int transform_8x8_mode; ///< transform_8x8_mode_flag | |
| 133 uint8_t scaling_matrix4[6][16]; | |
| 134 uint8_t scaling_matrix8[2][64]; | |
|
5231
07a97575d0c4
Add support for streams with different chroma_qp_index_offset
gpoirier
parents:
5226
diff
changeset
|
135 uint8_t chroma_qp_table[2][256]; ///< pre-scaled (with chroma_qp_index_offset) version of qp_table |
|
07a97575d0c4
Add support for streams with different chroma_qp_index_offset
gpoirier
parents:
5226
diff
changeset
|
136 int chroma_qp_diff; |
| 4975 | 137 }PPS; |
| 138 | |
| 139 /** | |
| 140 * Memory management control operation opcode. | |
| 141 */ | |
| 142 typedef enum MMCOOpcode{ | |
| 143 MMCO_END=0, | |
| 144 MMCO_SHORT2UNUSED, | |
| 145 MMCO_LONG2UNUSED, | |
| 146 MMCO_SHORT2LONG, | |
| 147 MMCO_SET_MAX_LONG, | |
| 148 MMCO_RESET, | |
| 149 MMCO_LONG, | |
| 150 } MMCOOpcode; | |
| 151 | |
| 152 /** | |
| 153 * Memory management control operation. | |
| 154 */ | |
| 155 typedef struct MMCO{ | |
| 156 MMCOOpcode opcode; | |
|
5756
db5a041fd77c
Rename MMCO stuff to prepare for h264/PAFF implementation.
andoma
parents:
5755
diff
changeset
|
157 int short_pic_num; ///< pic_num without wrapping (pic_num & max_pic_num) |
|
db5a041fd77c
Rename MMCO stuff to prepare for h264/PAFF implementation.
andoma
parents:
5755
diff
changeset
|
158 int long_arg; ///< index, pic_num, or num long refs depending on opcode |
| 4975 | 159 } MMCO; |
| 160 | |
| 161 /** | |
| 162 * H264Context | |
| 163 */ | |
| 164 typedef struct H264Context{ | |
| 165 MpegEncContext s; | |
| 166 int nal_ref_idc; | |
| 167 int nal_unit_type; | |
| 5174 | 168 uint8_t *rbsp_buffer[2]; |
| 169 unsigned int rbsp_buffer_size[2]; | |
| 4975 | 170 |
| 171 /** | |
| 172 * Used to parse AVC variant of h264 | |
| 173 */ | |
| 174 int is_avc; ///< this flag is != 0 if codec is avc1 | |
| 175 int got_avcC; ///< flag used to parse avcC data only once | |
| 176 int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4) | |
| 177 | |
|
5231
07a97575d0c4
Add support for streams with different chroma_qp_index_offset
gpoirier
parents:
5226
diff
changeset
|
178 int chroma_qp[2]; //QPc |
| 4975 | 179 |
| 180 int prev_mb_skipped; | |
| 181 int next_mb_skipped; | |
| 182 | |
| 183 //prediction stuff | |
| 184 int chroma_pred_mode; | |
| 185 int intra16x16_pred_mode; | |
| 186 | |
| 187 int top_mb_xy; | |
| 188 int left_mb_xy[2]; | |
| 189 | |
| 190 int8_t intra4x4_pred_mode_cache[5*8]; | |
| 191 int8_t (*intra4x4_pred_mode)[8]; | |
|
5638
4a26dc4ca11d
Move H.264 intra prediction functions into their own context
kostya
parents:
5231
diff
changeset
|
192 H264PredContext hpc; |
| 4975 | 193 unsigned int topleft_samples_available; |
| 194 unsigned int top_samples_available; | |
| 195 unsigned int topright_samples_available; | |
| 196 unsigned int left_samples_available; | |
| 197 uint8_t (*top_borders[2])[16+2*8]; | |
| 198 uint8_t left_border[2*(17+2*9)]; | |
| 199 | |
| 200 /** | |
| 201 * non zero coeff count cache. | |
| 202 * is 64 if not available. | |
| 203 */ | |
| 204 DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache[6*8]); | |
| 205 uint8_t (*non_zero_count)[16]; | |
| 206 | |
| 207 /** | |
| 208 * Motion vector cache. | |
| 209 */ | |
| 210 DECLARE_ALIGNED_8(int16_t, mv_cache[2][5*8][2]); | |
| 211 DECLARE_ALIGNED_8(int8_t, ref_cache[2][5*8]); | |
| 212 #define LIST_NOT_USED -1 //FIXME rename? | |
| 213 #define PART_NOT_AVAILABLE -2 | |
| 214 | |
| 215 /** | |
| 216 * is 1 if the specific list MV&references are set to 0,0,-2. | |
| 217 */ | |
| 218 int mv_cache_clean[2]; | |
| 219 | |
| 220 /** | |
| 221 * number of neighbors (top and/or left) that used 8x8 dct | |
| 222 */ | |
| 223 int neighbor_transform_size; | |
| 224 | |
| 225 /** | |
| 226 * block_offset[ 0..23] for frame macroblocks | |
| 227 * block_offset[24..47] for field macroblocks | |
| 228 */ | |
| 229 int block_offset[2*(16+8)]; | |
| 230 | |
| 231 uint32_t *mb2b_xy; //FIXME are these 4 a good idea? | |
| 232 uint32_t *mb2b8_xy; | |
| 233 int b_stride; //FIXME use s->b4_stride | |
| 234 int b8_stride; | |
| 235 | |
| 236 int mb_linesize; ///< may be equal to s->linesize or s->linesize*2, for mbaff | |
| 237 int mb_uvlinesize; | |
| 238 | |
| 239 int emu_edge_width; | |
| 240 int emu_edge_height; | |
| 241 | |
| 242 int halfpel_flag; | |
| 243 int thirdpel_flag; | |
| 244 | |
| 245 int unknown_svq3_flag; | |
| 246 int next_slice_index; | |
| 247 | |
| 5079 | 248 SPS *sps_buffers[MAX_SPS_COUNT]; |
| 4975 | 249 SPS sps; ///< current sps |
| 250 | |
| 5079 | 251 PPS *pps_buffers[MAX_PPS_COUNT]; |
| 4975 | 252 /** |
| 253 * current pps | |
| 254 */ | |
| 255 PPS pps; //FIXME move to Picture perhaps? (->no) do we need that? | |
| 256 | |
| 257 uint32_t dequant4_buffer[6][52][16]; | |
| 258 uint32_t dequant8_buffer[2][52][64]; | |
| 259 uint32_t (*dequant4_coeff[6])[16]; | |
| 260 uint32_t (*dequant8_coeff[2])[64]; | |
| 261 int dequant_coeff_pps; ///< reinit tables when pps changes | |
| 262 | |
| 263 int slice_num; | |
| 264 uint8_t *slice_table_base; | |
| 265 uint8_t *slice_table; ///< slice_table_base + 2*mb_stride + 1 | |
| 266 int slice_type; | |
| 267 int slice_type_fixed; | |
| 268 | |
| 269 //interlacing specific flags | |
| 270 int mb_aff_frame; | |
| 271 int mb_field_decoding_flag; | |
| 272 int mb_mbaff; ///< mb_aff_frame && mb_field_decoding_flag | |
| 273 | |
| 274 unsigned int sub_mb_type[4]; | |
| 275 | |
| 276 //POC stuff | |
| 277 int poc_lsb; | |
| 278 int poc_msb; | |
| 279 int delta_poc_bottom; | |
| 280 int delta_poc[2]; | |
| 281 int frame_num; | |
| 282 int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0 | |
| 283 int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0 | |
| 284 int frame_num_offset; ///< for POC type 2 | |
| 285 int prev_frame_num_offset; ///< for POC type 2 | |
| 286 int prev_frame_num; ///< frame_num of the last pic for POC type 1/2 | |
| 287 | |
| 288 /** | |
|
5772
65b71bd21a4d
Fix h->curr_pic_num for field pictures. Necessary for proper PAFF support.
andoma
parents:
5767
diff
changeset
|
289 * frame_num for frames or 2*frame_num+1 for field pics. |
| 4975 | 290 */ |
| 291 int curr_pic_num; | |
| 292 | |
| 293 /** | |
| 294 * max_frame_num or 2*max_frame_num for field pics. | |
| 295 */ | |
| 296 int max_pic_num; | |
| 297 | |
| 298 //Weighted pred stuff | |
| 299 int use_weight; | |
| 300 int use_weight_chroma; | |
| 301 int luma_log2_weight_denom; | |
| 302 int chroma_log2_weight_denom; | |
| 303 int luma_weight[2][48]; | |
| 304 int luma_offset[2][48]; | |
| 305 int chroma_weight[2][48][2]; | |
| 306 int chroma_offset[2][48][2]; | |
| 307 int implicit_weight[48][48]; | |
| 308 | |
| 309 //deblock | |
| 310 int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0 | |
| 311 int slice_alpha_c0_offset; | |
| 312 int slice_beta_offset; | |
| 313 | |
| 314 int redundant_pic_count; | |
| 315 | |
| 316 int direct_spatial_mv_pred; | |
| 317 int dist_scale_factor[16]; | |
| 318 int dist_scale_factor_field[32]; | |
| 319 int map_col_to_list0[2][16]; | |
| 320 int map_col_to_list0_field[2][32]; | |
| 321 | |
| 322 /** | |
| 323 * num_ref_idx_l0/1_active_minus1 + 1 | |
| 324 */ | |
| 325 unsigned int ref_count[2]; ///< counts frames or fields, depending on current mb mode | |
| 326 unsigned int list_count; | |
| 327 Picture *short_ref[32]; | |
| 328 Picture *long_ref[32]; | |
|
5755
b45894d869da
Cosmetic preparations for h264/PAFF implementation.
andoma
parents:
5642
diff
changeset
|
329 Picture default_ref_list[2][32]; ///< base reference list for all slices of a coded picture |
|
b45894d869da
Cosmetic preparations for h264/PAFF implementation.
andoma
parents:
5642
diff
changeset
|
330 Picture ref_list[2][48]; /**< 0..15: frame refs, 16..47: mbaff field refs. |
|
b45894d869da
Cosmetic preparations for h264/PAFF implementation.
andoma
parents:
5642
diff
changeset
|
331 Reordered version of default_ref_list |
|
b45894d869da
Cosmetic preparations for h264/PAFF implementation.
andoma
parents:
5642
diff
changeset
|
332 according to picture reordering in slice header */ |
| 4975 | 333 Picture *delayed_pic[18]; //FIXME size? |
| 334 Picture *delayed_output_pic; | |
| 335 | |
| 336 /** | |
| 337 * memory management control operations buffer. | |
| 338 */ | |
| 339 MMCO mmco[MAX_MMCO_COUNT]; | |
| 340 int mmco_index; | |
| 341 | |
| 342 int long_ref_count; ///< number of actual long term references | |
| 343 int short_ref_count; ///< number of actual short term references | |
| 344 | |
| 345 //data partitioning | |
| 346 GetBitContext intra_gb; | |
| 347 GetBitContext inter_gb; | |
| 348 GetBitContext *intra_gb_ptr; | |
| 349 GetBitContext *inter_gb_ptr; | |
| 350 | |
| 6320 | 351 DECLARE_ALIGNED_16(DCTELEM, mb[16*24]); |
| 4975 | 352 DCTELEM mb_padding[256]; ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not to large or ensure that there is some unused stuff after mb |
| 353 | |
| 354 /** | |
| 355 * Cabac | |
| 356 */ | |
| 357 CABACContext cabac; | |
| 358 uint8_t cabac_state[460]; | |
| 359 int cabac_init_idc; | |
| 360 | |
| 361 /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */ | |
| 362 uint16_t *cbp_table; | |
| 363 int cbp; | |
| 364 int top_cbp; | |
| 365 int left_cbp; | |
| 366 /* chroma_pred_mode for i4x4 or i16x16, else 0 */ | |
| 367 uint8_t *chroma_pred_mode_table; | |
| 368 int last_qscale_diff; | |
| 369 int16_t (*mvd_table[2])[2]; | |
| 370 DECLARE_ALIGNED_8(int16_t, mvd_cache[2][5*8][2]); | |
| 371 uint8_t *direct_table; | |
| 372 uint8_t direct_cache[5*8]; | |
| 373 | |
| 374 uint8_t zigzag_scan[16]; | |
| 375 uint8_t zigzag_scan8x8[64]; | |
| 376 uint8_t zigzag_scan8x8_cavlc[64]; | |
| 377 uint8_t field_scan[16]; | |
| 378 uint8_t field_scan8x8[64]; | |
| 379 uint8_t field_scan8x8_cavlc[64]; | |
| 380 const uint8_t *zigzag_scan_q0; | |
| 381 const uint8_t *zigzag_scan8x8_q0; | |
| 382 const uint8_t *zigzag_scan8x8_cavlc_q0; | |
| 383 const uint8_t *field_scan_q0; | |
| 384 const uint8_t *field_scan8x8_q0; | |
| 385 const uint8_t *field_scan8x8_cavlc_q0; | |
| 386 | |
| 387 int x264_build; | |
| 5642 | 388 |
| 389 /** | |
| 390 * @defgroup multithreading Members for slice based multithreading | |
| 391 * @{ | |
| 392 */ | |
| 393 struct H264Context *thread_context[MAX_THREADS]; | |
| 394 | |
| 395 /** | |
| 396 * current slice number, used to initalize slice_num of each thread/context | |
| 397 */ | |
| 398 int current_slice; | |
| 399 | |
| 400 /** | |
| 401 * Max number of threads / contexts. | |
| 402 * This is equal to AVCodecContext.thread_count unless | |
| 403 * multithreaded decoding is impossible, in which case it is | |
| 404 * reduced to 1. | |
| 405 */ | |
| 406 int max_contexts; | |
| 407 | |
| 408 /** | |
| 409 * 1 if the single thread fallback warning has already been | |
| 410 * displayed, 0 otherwise. | |
| 411 */ | |
| 412 int single_decode_warning; | |
| 413 | |
| 414 int last_slice_type; | |
| 415 /** @} */ | |
| 416 | |
| 4975 | 417 }H264Context; |
| 418 | |
|
5830
1d83e9c34641
Add FFMPEG_ prefix to all multiple inclusion guards.
diego
parents:
5801
diff
changeset
|
419 #endif /* FFMPEG_H264_H */ |
