Mercurial > libavcodec.hg
comparison bitstream.c @ 11547:aba20ba60384 libavcodec
optimize init_vlc().
Reduce worst case time from O(N^2) to O(N*log(N)).
Speedup average case by a factor of 10 in ffv2 (total decoding speed +4-25%),
factor of 1.3 in ffvhuff (total +0.5%),
factor of 1.8 in indeo5 (total +1%),
factor of 1.1 in mjpeg (total +0.1%).
| author | lorenm |
|---|---|
| date | Mon, 29 Mar 2010 02:50:23 +0000 |
| parents | 2d49996fe7d1 |
| children | e023a196a73e |
comparison
equal
deleted
inserted
replaced
| 11546:1d81cd330928 | 11547:aba20ba60384 |
|---|---|
| 1 /* | 1 /* |
| 2 * Common bit i/o utils | 2 * Common bit i/o utils |
| 3 * Copyright (c) 2000, 2001 Fabrice Bellard | 3 * Copyright (c) 2000, 2001 Fabrice Bellard |
| 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> | 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> |
| 5 * Copyright (c) 2010 Loren Merritt | |
| 5 * | 6 * |
| 6 * alternative bitstream reader & writer by Michael Niedermayer <michaelni@gmx.at> | 7 * alternative bitstream reader & writer by Michael Niedermayer <michaelni@gmx.at> |
| 7 * | 8 * |
| 8 * This file is part of FFmpeg. | 9 * This file is part of FFmpeg. |
| 9 * | 10 * |
| 114 return -1; | 115 return -1; |
| 115 } | 116 } |
| 116 return index; | 117 return index; |
| 117 } | 118 } |
| 118 | 119 |
| 119 static int build_table(VLC *vlc, int table_nb_bits, | 120 static av_always_inline uint32_t bitswap_32(uint32_t x) { |
| 120 int nb_codes, | 121 return av_reverse[x&0xFF]<<24 |
| 121 const void *bits, int bits_wrap, int bits_size, | 122 | av_reverse[(x>>8)&0xFF]<<16 |
| 122 const void *codes, int codes_wrap, int codes_size, | 123 | av_reverse[(x>>16)&0xFF]<<8 |
| 123 const void *symbols, int symbols_wrap, int symbols_size, | 124 | av_reverse[x>>24]; |
| 124 uint32_t code_prefix, int n_prefix, int flags) | 125 } |
| 125 { | 126 |
| 126 int i, j, k, n, table_size, table_index, nb, n1, index, code_prefix2, symbol; | 127 typedef struct { |
| 128 uint8_t bits; | |
| 129 uint16_t symbol; | |
| 130 /** codeword, with the first bit-to-be-read in the msb | |
| 131 * (even if intended for a little-endian bitstream reader) */ | |
| 132 uint32_t code; | |
| 133 } VLCcode; | |
| 134 | |
| 135 static int compare_vlcspec(const void *a, const void *b) | |
| 136 { | |
| 137 const VLCcode *sa=a, *sb=b; | |
| 138 return (sa->code >> 1) - (sb->code >> 1); | |
| 139 } | |
| 140 | |
| 141 /** | |
| 142 * Build VLC decoding tables suitable for use with get_vlc(). | |
| 143 * | |
| 144 * @param vlc the context to be initted | |
| 145 * | |
| 146 * @param table_nb_bits max length of vlc codes to store directly in this table | |
| 147 * (Longer codes are delegated to subtables.) | |
| 148 * | |
| 149 * @param nb_codes number of elements in codes[] | |
| 150 * | |
| 151 * @param codes descriptions of the vlc codes | |
| 152 * These must be ordered such that codes going into the same subtable are contiguous. | |
| 153 * Sorting by VLCcode.code is sufficient, though not necessary. | |
| 154 */ | |
| 155 static int build_table(VLC *vlc, int table_nb_bits, int nb_codes, | |
| 156 VLCcode *codes, int flags) | |
| 157 { | |
| 158 int table_size, table_index, index, code_prefix, symbol, subtable_bits; | |
| 159 int i, j, k, n, nb, inc; | |
| 127 uint32_t code; | 160 uint32_t code; |
| 128 VLC_TYPE (*table)[2]; | 161 VLC_TYPE (*table)[2]; |
| 129 | 162 |
| 130 table_size = 1 << table_nb_bits; | 163 table_size = 1 << table_nb_bits; |
| 131 table_index = alloc_table(vlc, table_size, flags & INIT_VLC_USE_NEW_STATIC); | 164 table_index = alloc_table(vlc, table_size, flags & INIT_VLC_USE_NEW_STATIC); |
| 132 #ifdef DEBUG_VLC | 165 #ifdef DEBUG_VLC |
| 133 av_log(NULL,AV_LOG_DEBUG,"new table index=%d size=%d code_prefix=%x n=%d\n", | 166 av_log(NULL,AV_LOG_DEBUG,"new table index=%d size=%d\n", |
| 134 table_index, table_size, code_prefix, n_prefix); | 167 table_index, table_size); |
| 135 #endif | 168 #endif |
| 136 if (table_index < 0) | 169 if (table_index < 0) |
| 137 return -1; | 170 return -1; |
| 138 table = &vlc->table[table_index]; | 171 table = &vlc->table[table_index]; |
| 139 | 172 |
| 142 table[i][0] = -1; //codes | 175 table[i][0] = -1; //codes |
| 143 } | 176 } |
| 144 | 177 |
| 145 /* first pass: map codes and compute auxillary table sizes */ | 178 /* first pass: map codes and compute auxillary table sizes */ |
| 146 for(i=0;i<nb_codes;i++) { | 179 for(i=0;i<nb_codes;i++) { |
| 147 GET_DATA(n, bits, i, bits_wrap, bits_size); | 180 n = codes[i].bits; |
| 148 GET_DATA(code, codes, i, codes_wrap, codes_size); | 181 code = codes[i].code; |
| 149 /* we accept tables with holes */ | 182 symbol = codes[i].symbol; |
| 150 if (n <= 0) | |
| 151 continue; | |
| 152 if (!symbols) | |
| 153 symbol = i; | |
| 154 else | |
| 155 GET_DATA(symbol, symbols, i, symbols_wrap, symbols_size); | |
| 156 #if defined(DEBUG_VLC) && 0 | 183 #if defined(DEBUG_VLC) && 0 |
| 157 av_log(NULL,AV_LOG_DEBUG,"i=%d n=%d code=0x%x\n", i, n, code); | 184 av_log(NULL,AV_LOG_DEBUG,"i=%d n=%d code=0x%x\n", i, n, code); |
| 158 #endif | 185 #endif |
| 159 /* if code matches the prefix, it is in the table */ | |
| 160 n -= n_prefix; | |
| 161 if (n > 0) { | |
| 162 if(flags & INIT_VLC_LE) | |
| 163 code_prefix2= code & (n_prefix>=32 ? 0xffffffff : (1 << n_prefix)-1); | |
| 164 else | |
| 165 code_prefix2= code >> n; | |
| 166 if (code_prefix2 == code_prefix) { | |
| 167 if (n <= table_nb_bits) { | 186 if (n <= table_nb_bits) { |
| 168 /* no need to add another table */ | 187 /* no need to add another table */ |
| 169 j = (code << (table_nb_bits - n)) & (table_size - 1); | 188 j = code >> (32 - table_nb_bits); |
| 170 nb = 1 << (table_nb_bits - n); | 189 nb = 1 << (table_nb_bits - n); |
| 190 inc = 1; | |
| 191 if (flags & INIT_VLC_LE) { | |
| 192 j = bitswap_32(code); | |
| 193 inc = 1 << n; | |
| 194 } | |
| 171 for(k=0;k<nb;k++) { | 195 for(k=0;k<nb;k++) { |
| 172 if(flags & INIT_VLC_LE) | |
| 173 j = (code >> n_prefix) + (k<<n); | |
| 174 #ifdef DEBUG_VLC | 196 #ifdef DEBUG_VLC |
| 175 av_log(NULL, AV_LOG_DEBUG, "%4x: code=%d n=%d\n", | 197 av_log(NULL, AV_LOG_DEBUG, "%4x: code=%d n=%d\n", |
| 176 j, i, n); | 198 j, i, n); |
| 177 #endif | 199 #endif |
| 178 if (table[j][1] /*bits*/ != 0) { | 200 if (table[j][1] /*bits*/ != 0) { |
| 179 av_log(NULL, AV_LOG_ERROR, "incorrect codes\n"); | 201 av_log(NULL, AV_LOG_ERROR, "incorrect codes\n"); |
| 180 return -1; | 202 return -1; |
| 181 } | 203 } |
| 182 table[j][1] = n; //bits | 204 table[j][1] = n; //bits |
| 183 table[j][0] = symbol; | 205 table[j][0] = symbol; |
| 184 j++; | 206 j += inc; |
| 185 } | 207 } |
| 186 } else { | 208 } else { |
| 209 /* fill auxiliary table recursively */ | |
| 187 n -= table_nb_bits; | 210 n -= table_nb_bits; |
| 188 j = (code >> ((flags & INIT_VLC_LE) ? n_prefix : n)) & ((1 << table_nb_bits) - 1); | 211 code_prefix = code >> (32 - table_nb_bits); |
| 212 subtable_bits = n; | |
| 213 codes[i].bits = n; | |
| 214 codes[i].code = code << table_nb_bits; | |
| 215 for (k = i+1; k < nb_codes; k++) { | |
| 216 n = codes[k].bits - table_nb_bits; | |
| 217 if (n <= 0) | |
| 218 break; | |
| 219 code = codes[k].code; | |
| 220 if (code >> (32 - table_nb_bits) != code_prefix) | |
| 221 break; | |
| 222 codes[k].bits = n; | |
| 223 codes[k].code = code << table_nb_bits; | |
| 224 subtable_bits = FFMAX(subtable_bits, n); | |
| 225 } | |
| 226 subtable_bits = FFMIN(subtable_bits, table_nb_bits); | |
| 227 j = (flags & INIT_VLC_LE) ? bitswap_32(code_prefix) >> (32 - table_nb_bits) : code_prefix; | |
| 228 table[j][1] = -subtable_bits; | |
| 189 #ifdef DEBUG_VLC | 229 #ifdef DEBUG_VLC |
| 190 av_log(NULL,AV_LOG_DEBUG,"%4x: n=%d (subtable)\n", | 230 av_log(NULL,AV_LOG_DEBUG,"%4x: n=%d (subtable)\n", |
| 191 j, n); | 231 j, codes[i].bits + table_nb_bits); |
| 192 #endif | 232 #endif |
| 193 /* compute table size */ | 233 index = build_table(vlc, subtable_bits, k-i, codes+i, flags); |
| 194 n1 = -table[j][1]; //bits | 234 if (index < 0) |
| 195 if (n > n1) | 235 return -1; |
| 196 n1 = n; | 236 /* note: realloc has been done, so reload tables */ |
| 197 table[j][1] = -n1; //bits | 237 table = &vlc->table[table_index]; |
| 238 table[j][0] = index; //code | |
| 239 i = k-1; | |
| 198 } | 240 } |
| 199 } | |
| 200 } | |
| 201 } | |
| 202 | |
| 203 /* second pass : fill auxillary tables recursively */ | |
| 204 for(i=0;i<table_size;i++) { | |
| 205 n = table[i][1]; //bits | |
| 206 if (n < 0) { | |
| 207 n = -n; | |
| 208 if (n > table_nb_bits) { | |
| 209 n = table_nb_bits; | |
| 210 table[i][1] = -n; //bits | |
| 211 } | |
| 212 index = build_table(vlc, n, nb_codes, | |
| 213 bits, bits_wrap, bits_size, | |
| 214 codes, codes_wrap, codes_size, | |
| 215 symbols, symbols_wrap, symbols_size, | |
| 216 (flags & INIT_VLC_LE) ? (code_prefix | (i << n_prefix)) : ((code_prefix << table_nb_bits) | i), | |
| 217 n_prefix + table_nb_bits, flags); | |
| 218 if (index < 0) | |
| 219 return -1; | |
| 220 /* note: realloc has been done, so reload tables */ | |
| 221 table = &vlc->table[table_index]; | |
| 222 table[i][0] = index; //code | |
| 223 } | |
| 224 } | 241 } |
| 225 return table_index; | 242 return table_index; |
| 226 } | 243 } |
| 227 | 244 |
| 228 | 245 |
| 256 const void *bits, int bits_wrap, int bits_size, | 273 const void *bits, int bits_wrap, int bits_size, |
| 257 const void *codes, int codes_wrap, int codes_size, | 274 const void *codes, int codes_wrap, int codes_size, |
| 258 const void *symbols, int symbols_wrap, int symbols_size, | 275 const void *symbols, int symbols_wrap, int symbols_size, |
| 259 int flags) | 276 int flags) |
| 260 { | 277 { |
| 278 VLCcode buf[nb_codes]; | |
| 279 int i, j; | |
| 280 | |
| 261 vlc->bits = nb_bits; | 281 vlc->bits = nb_bits; |
| 262 if(flags & INIT_VLC_USE_NEW_STATIC){ | 282 if(flags & INIT_VLC_USE_NEW_STATIC){ |
| 263 if(vlc->table_size && vlc->table_size == vlc->table_allocated){ | 283 if(vlc->table_size && vlc->table_size == vlc->table_allocated){ |
| 264 return 0; | 284 return 0; |
| 265 }else if(vlc->table_size){ | 285 }else if(vlc->table_size){ |
| 273 | 293 |
| 274 #ifdef DEBUG_VLC | 294 #ifdef DEBUG_VLC |
| 275 av_log(NULL,AV_LOG_DEBUG,"build table nb_codes=%d\n", nb_codes); | 295 av_log(NULL,AV_LOG_DEBUG,"build table nb_codes=%d\n", nb_codes); |
| 276 #endif | 296 #endif |
| 277 | 297 |
| 278 if (build_table(vlc, nb_bits, nb_codes, | 298 assert(symbols_size <= 2 || !symbols); |
| 279 bits, bits_wrap, bits_size, | 299 j = 0; |
| 280 codes, codes_wrap, codes_size, | 300 #define COPY(condition)\ |
| 281 symbols, symbols_wrap, symbols_size, | 301 for (i = 0; i < nb_codes; i++) {\ |
| 282 0, 0, flags) < 0) { | 302 GET_DATA(buf[j].bits, bits, i, bits_wrap, bits_size);\ |
| 303 if (!(condition))\ | |
| 304 continue;\ | |
| 305 GET_DATA(buf[j].code, codes, i, codes_wrap, codes_size);\ | |
| 306 if (flags & INIT_VLC_LE)\ | |
| 307 buf[j].code = bitswap_32(buf[j].code);\ | |
| 308 else\ | |
| 309 buf[j].code <<= 32 - buf[j].bits;\ | |
| 310 if (symbols)\ | |
| 311 GET_DATA(buf[j].symbol, symbols, i, symbols_wrap, symbols_size)\ | |
| 312 else\ | |
| 313 buf[j].symbol = i;\ | |
| 314 j++;\ | |
| 315 } | |
| 316 COPY(buf[j].bits > nb_bits); | |
| 317 // qsort is the slowest part of init_vlc, and could probably be improved or avoided | |
| 318 qsort(buf, j, sizeof(VLCcode), compare_vlcspec); | |
| 319 COPY(buf[j].bits && buf[j].bits <= nb_bits); | |
| 320 nb_codes = j; | |
| 321 | |
| 322 if (build_table(vlc, nb_bits, nb_codes, buf, flags) < 0) { | |
| 283 av_freep(&vlc->table); | 323 av_freep(&vlc->table); |
| 284 return -1; | 324 return -1; |
| 285 } | 325 } |
| 286 if((flags & INIT_VLC_USE_NEW_STATIC) && vlc->table_size != vlc->table_allocated) | 326 if((flags & INIT_VLC_USE_NEW_STATIC) && vlc->table_size != vlc->table_allocated) |
| 287 av_log(NULL, AV_LOG_ERROR, "needed %d had %d\n", vlc->table_size, vlc->table_allocated); | 327 av_log(NULL, AV_LOG_ERROR, "needed %d had %d\n", vlc->table_size, vlc->table_allocated); |
