|
808
|
1 /*
|
|
|
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
|
|
|
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
|
|
|
4 *
|
|
|
5 * This file is part of FFmpeg.
|
|
|
6 *
|
|
|
7 * FFmpeg is free software; you can redistribute it and/or
|
|
|
8 * modify it under the terms of the GNU Lesser General Public
|
|
|
9 * License as published by the Free Software Foundation; either
|
|
|
10 * version 2.1 of the License, or (at your option) any later version.
|
|
|
11 *
|
|
|
12 * FFmpeg is distributed in the hope that it will be useful,
|
|
|
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
15 * Lesser General Public License for more details.
|
|
|
16 *
|
|
|
17 * You should have received a copy of the GNU Lesser General Public
|
|
|
18 * License along with FFmpeg; if not, write to the Free Software
|
|
|
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
20 *
|
|
|
21 */
|
|
|
22
|
|
|
23 /**
|
|
|
24 * @file cabac.h
|
|
|
25 * Context Adaptive Binary Arithmetic Coder.
|
|
|
26 */
|
|
|
27
|
|
|
28
|
|
|
29 //#undef NDEBUG
|
|
|
30 #include <assert.h>
|
|
|
31
|
|
|
32 #define CABAC_BITS 16
|
|
|
33 #define CABAC_MASK ((1<<CABAC_BITS)-1)
|
|
|
34 #define BRANCHLESS_CABAC_DECODER 1
|
|
|
35 //#define ARCH_X86_DISABLED 1
|
|
|
36
|
|
|
37 typedef struct CABACContext{
|
|
|
38 int low;
|
|
|
39 int range;
|
|
|
40 int outstanding_count;
|
|
|
41 #ifdef STRICT_LIMITS
|
|
|
42 int symCount;
|
|
|
43 #endif
|
|
|
44 const uint8_t *bytestream_start;
|
|
|
45 const uint8_t *bytestream;
|
|
|
46 const uint8_t *bytestream_end;
|
|
|
47 PutBitContext pb;
|
|
|
48 }CABACContext;
|
|
|
49
|
|
|
50 extern uint8_t ff_h264_mlps_state[4*64];
|
|
|
51 extern uint8_t ff_h264_lps_range[4*2*64]; ///< rangeTabLPS
|
|
|
52 extern uint8_t ff_h264_mps_state[2*64]; ///< transIdxMPS
|
|
|
53 extern uint8_t ff_h264_lps_state[2*64]; ///< transIdxLPS
|
|
|
54 extern const uint8_t ff_h264_norm_shift[512];
|
|
|
55
|
|
|
56
|
|
|
57 void ff_init_cabac_encoder(CABACContext *c, uint8_t *buf, int buf_size);
|
|
|
58 void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size);
|
|
|
59 void ff_init_cabac_states(CABACContext *c);
|
|
|
60
|
|
|
61
|
|
|
62 static inline void put_cabac_bit(CABACContext *c, int b){
|
|
|
63 put_bits(&c->pb, 1, b);
|
|
|
64 for(;c->outstanding_count; c->outstanding_count--){
|
|
|
65 put_bits(&c->pb, 1, 1-b);
|
|
|
66 }
|
|
|
67 }
|
|
|
68
|
|
|
69 static inline void renorm_cabac_encoder(CABACContext *c){
|
|
|
70 while(c->range < 0x100){
|
|
|
71 //FIXME optimize
|
|
|
72 if(c->low<0x100){
|
|
|
73 put_cabac_bit(c, 0);
|
|
|
74 }else if(c->low<0x200){
|
|
|
75 c->outstanding_count++;
|
|
|
76 c->low -= 0x100;
|
|
|
77 }else{
|
|
|
78 put_cabac_bit(c, 1);
|
|
|
79 c->low -= 0x200;
|
|
|
80 }
|
|
|
81
|
|
|
82 c->range+= c->range;
|
|
|
83 c->low += c->low;
|
|
|
84 }
|
|
|
85 }
|
|
|
86
|
|
|
87 static void put_cabac(CABACContext *c, uint8_t * const state, int bit){
|
|
|
88 int RangeLPS= ff_h264_lps_range[2*(c->range&0xC0) + *state];
|
|
|
89
|
|
|
90 if(bit == ((*state)&1)){
|
|
|
91 c->range -= RangeLPS;
|
|
|
92 *state= ff_h264_mps_state[*state];
|
|
|
93 }else{
|
|
|
94 c->low += c->range - RangeLPS;
|
|
|
95 c->range = RangeLPS;
|
|
|
96 *state= ff_h264_lps_state[*state];
|
|
|
97 }
|
|
|
98
|
|
|
99 renorm_cabac_encoder(c);
|
|
|
100
|
|
|
101 #ifdef STRICT_LIMITS
|
|
|
102 c->symCount++;
|
|
|
103 #endif
|
|
|
104 }
|
|
|
105
|
|
|
106 static void put_cabac_static(CABACContext *c, int RangeLPS, int bit){
|
|
|
107 assert(c->range > RangeLPS);
|
|
|
108
|
|
|
109 if(!bit){
|
|
|
110 c->range -= RangeLPS;
|
|
|
111 }else{
|
|
|
112 c->low += c->range - RangeLPS;
|
|
|
113 c->range = RangeLPS;
|
|
|
114 }
|
|
|
115
|
|
|
116 renorm_cabac_encoder(c);
|
|
|
117
|
|
|
118 #ifdef STRICT_LIMITS
|
|
|
119 c->symCount++;
|
|
|
120 #endif
|
|
|
121 }
|
|
|
122
|
|
|
123 /**
|
|
|
124 * @param bit 0 -> write zero bit, !=0 write one bit
|
|
|
125 */
|
|
|
126 static void put_cabac_bypass(CABACContext *c, int bit){
|
|
|
127 c->low += c->low;
|
|
|
128
|
|
|
129 if(bit){
|
|
|
130 c->low += c->range;
|
|
|
131 }
|
|
|
132 //FIXME optimize
|
|
|
133 if(c->low<0x200){
|
|
|
134 put_cabac_bit(c, 0);
|
|
|
135 }else if(c->low<0x400){
|
|
|
136 c->outstanding_count++;
|
|
|
137 c->low -= 0x200;
|
|
|
138 }else{
|
|
|
139 put_cabac_bit(c, 1);
|
|
|
140 c->low -= 0x400;
|
|
|
141 }
|
|
|
142
|
|
|
143 #ifdef STRICT_LIMITS
|
|
|
144 c->symCount++;
|
|
|
145 #endif
|
|
|
146 }
|
|
|
147
|
|
|
148 /**
|
|
|
149 *
|
|
|
150 * @return the number of bytes written
|
|
|
151 */
|
|
|
152 static int put_cabac_terminate(CABACContext *c, int bit){
|
|
|
153 c->range -= 2;
|
|
|
154
|
|
|
155 if(!bit){
|
|
|
156 renorm_cabac_encoder(c);
|
|
|
157 }else{
|
|
|
158 c->low += c->range;
|
|
|
159 c->range= 2;
|
|
|
160
|
|
|
161 renorm_cabac_encoder(c);
|
|
|
162
|
|
|
163 assert(c->low <= 0x1FF);
|
|
|
164 put_cabac_bit(c, c->low>>9);
|
|
|
165 put_bits(&c->pb, 2, ((c->low>>7)&3)|1);
|
|
|
166
|
|
|
167 flush_put_bits(&c->pb); //FIXME FIXME FIXME XXX wrong
|
|
|
168 }
|
|
|
169
|
|
|
170 #ifdef STRICT_LIMITS
|
|
|
171 c->symCount++;
|
|
|
172 #endif
|
|
|
173
|
|
|
174 return (put_bits_count(&c->pb)+7)>>3;
|
|
|
175 }
|
|
|
176
|
|
|
177 /**
|
|
|
178 * put (truncated) unary binarization.
|
|
|
179 */
|
|
|
180 static void put_cabac_u(CABACContext *c, uint8_t * state, int v, int max, int max_index, int truncated){
|
|
|
181 int i;
|
|
|
182
|
|
|
183 assert(v <= max);
|
|
|
184
|
|
|
185 #if 1
|
|
|
186 for(i=0; i<v; i++){
|
|
|
187 put_cabac(c, state, 1);
|
|
|
188 if(i < max_index) state++;
|
|
|
189 }
|
|
|
190 if(truncated==0 || v<max)
|
|
|
191 put_cabac(c, state, 0);
|
|
|
192 #else
|
|
|
193 if(v <= max_index){
|
|
|
194 for(i=0; i<v; i++){
|
|
|
195 put_cabac(c, state+i, 1);
|
|
|
196 }
|
|
|
197 if(truncated==0 || v<max)
|
|
|
198 put_cabac(c, state+i, 0);
|
|
|
199 }else{
|
|
|
200 for(i=0; i<=max_index; i++){
|
|
|
201 put_cabac(c, state+i, 1);
|
|
|
202 }
|
|
|
203 for(; i<v; i++){
|
|
|
204 put_cabac(c, state+max_index, 1);
|
|
|
205 }
|
|
|
206 if(truncated==0 || v<max)
|
|
|
207 put_cabac(c, state+max_index, 0);
|
|
|
208 }
|
|
|
209 #endif
|
|
|
210 }
|
|
|
211
|
|
|
212 /**
|
|
|
213 * put unary exp golomb k-th order binarization.
|
|
|
214 */
|
|
|
215 static void put_cabac_ueg(CABACContext *c, uint8_t * state, int v, int max, int is_signed, int k, int max_index){
|
|
|
216 int i;
|
|
|
217
|
|
|
218 if(v==0)
|
|
|
219 put_cabac(c, state, 0);
|
|
|
220 else{
|
|
|
221 const int sign= v < 0;
|
|
|
222
|
|
|
223 if(is_signed) v= FFABS(v);
|
|
|
224
|
|
|
225 if(v<max){
|
|
|
226 for(i=0; i<v; i++){
|
|
|
227 put_cabac(c, state, 1);
|
|
|
228 if(i < max_index) state++;
|
|
|
229 }
|
|
|
230
|
|
|
231 put_cabac(c, state, 0);
|
|
|
232 }else{
|
|
|
233 int m= 1<<k;
|
|
|
234
|
|
|
235 for(i=0; i<max; i++){
|
|
|
236 put_cabac(c, state, 1);
|
|
|
237 if(i < max_index) state++;
|
|
|
238 }
|
|
|
239
|
|
|
240 v -= max;
|
|
|
241 while(v >= m){ //FIXME optimize
|
|
|
242 put_cabac_bypass(c, 1);
|
|
|
243 v-= m;
|
|
|
244 m+= m;
|
|
|
245 }
|
|
|
246 put_cabac_bypass(c, 0);
|
|
|
247 while(m>>=1){
|
|
|
248 put_cabac_bypass(c, v&m);
|
|
|
249 }
|
|
|
250 }
|
|
|
251
|
|
|
252 if(is_signed)
|
|
|
253 put_cabac_bypass(c, sign);
|
|
|
254 }
|
|
|
255 }
|
|
|
256
|
|
|
257 static void refill(CABACContext *c){
|
|
|
258 #if CABAC_BITS == 16
|
|
|
259 c->low+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1);
|
|
|
260 #else
|
|
|
261 c->low+= c->bytestream[0]<<1;
|
|
|
262 #endif
|
|
|
263 c->low -= CABAC_MASK;
|
|
|
264 c->bytestream+= CABAC_BITS/8;
|
|
|
265 }
|
|
|
266
|
|
|
267 static void refill2(CABACContext *c){
|
|
|
268 int i, x;
|
|
|
269
|
|
|
270 x= c->low ^ (c->low-1);
|
|
|
271 i= 7 - ff_h264_norm_shift[x>>(CABAC_BITS-1)];
|
|
|
272
|
|
|
273 x= -CABAC_MASK;
|
|
|
274
|
|
|
275 #if CABAC_BITS == 16
|
|
|
276 x+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1);
|
|
|
277 #else
|
|
|
278 x+= c->bytestream[0]<<1;
|
|
|
279 #endif
|
|
|
280
|
|
|
281 c->low += x<<i;
|
|
|
282 c->bytestream+= CABAC_BITS/8;
|
|
|
283 }
|
|
|
284
|
|
|
285 static inline void renorm_cabac_decoder(CABACContext *c){
|
|
|
286 while(c->range < 0x100){
|
|
|
287 c->range+= c->range;
|
|
|
288 c->low+= c->low;
|
|
|
289 if(!(c->low & CABAC_MASK))
|
|
|
290 refill(c);
|
|
|
291 }
|
|
|
292 }
|
|
|
293
|
|
|
294 static inline void renorm_cabac_decoder_once(CABACContext *c){
|
|
|
295 #ifdef ARCH_X86_DISABLED
|
|
|
296 int temp;
|
|
|
297 #if 0
|
|
|
298 //P3:683 athlon:475
|
|
|
299 asm(
|
|
|
300 "lea -0x100(%0), %2 \n\t"
|
|
|
301 "shr $31, %2 \n\t" //FIXME 31->63 for x86-64
|
|
|
302 "shl %%cl, %0 \n\t"
|
|
|
303 "shl %%cl, %1 \n\t"
|
|
|
304 : "+r"(c->range), "+r"(c->low), "+c"(temp)
|
|
|
305 );
|
|
|
306 #elif 0
|
|
|
307 //P3:680 athlon:474
|
|
|
308 asm(
|
|
|
309 "cmp $0x100, %0 \n\t"
|
|
|
310 "setb %%cl \n\t" //FIXME 31->63 for x86-64
|
|
|
311 "shl %%cl, %0 \n\t"
|
|
|
312 "shl %%cl, %1 \n\t"
|
|
|
313 : "+r"(c->range), "+r"(c->low), "+c"(temp)
|
|
|
314 );
|
|
|
315 #elif 1
|
|
|
316 int temp2;
|
|
|
317 //P3:665 athlon:517
|
|
|
318 asm(
|
|
|
319 "lea -0x100(%0), %%eax \n\t"
|
|
|
320 "cdq \n\t"
|
|
|
321 "mov %0, %%eax \n\t"
|
|
|
322 "and %%edx, %0 \n\t"
|
|
|
323 "and %1, %%edx \n\t"
|
|
|
324 "add %%eax, %0 \n\t"
|
|
|
325 "add %%edx, %1 \n\t"
|
|
|
326 : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
|
|
|
327 );
|
|
|
328 #elif 0
|
|
|
329 int temp2;
|
|
|
330 //P3:673 athlon:509
|
|
|
331 asm(
|
|
|
332 "cmp $0x100, %0 \n\t"
|
|
|
333 "sbb %%edx, %%edx \n\t"
|
|
|
334 "mov %0, %%eax \n\t"
|
|
|
335 "and %%edx, %0 \n\t"
|
|
|
336 "and %1, %%edx \n\t"
|
|
|
337 "add %%eax, %0 \n\t"
|
|
|
338 "add %%edx, %1 \n\t"
|
|
|
339 : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
|
|
|
340 );
|
|
|
341 #else
|
|
|
342 int temp2;
|
|
|
343 //P3:677 athlon:511
|
|
|
344 asm(
|
|
|
345 "cmp $0x100, %0 \n\t"
|
|
|
346 "lea (%0, %0), %%eax \n\t"
|
|
|
347 "lea (%1, %1), %%edx \n\t"
|
|
|
348 "cmovb %%eax, %0 \n\t"
|
|
|
349 "cmovb %%edx, %1 \n\t"
|
|
|
350 : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
|
|
|
351 );
|
|
|
352 #endif
|
|
|
353 #else
|
|
|
354 //P3:675 athlon:476
|
|
|
355 int shift= (uint32_t)(c->range - 0x100)>>31;
|
|
|
356 c->range<<= shift;
|
|
|
357 c->low <<= shift;
|
|
|
358 #endif
|
|
|
359 if(!(c->low & CABAC_MASK))
|
|
|
360 refill(c);
|
|
|
361 }
|
|
|
362
|
|
|
363 static int always_inline get_cabac_inline(CABACContext *c, uint8_t * const state){
|
|
|
364 //FIXME gcc generates duplicate load/stores for c->low and c->range
|
|
|
365 #define LOW "0"
|
|
|
366 #define RANGE "4"
|
|
|
367 #define BYTESTART "12"
|
|
|
368 #define BYTE "16"
|
|
|
369 #define BYTEEND "20"
|
|
|
370 #if defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__))
|
|
|
371 int bit;
|
|
|
372
|
|
|
373 #ifndef BRANCHLESS_CABAC_DECODER
|
|
|
374 asm volatile(
|
|
|
375 "movzbl (%1), %0 \n\t"
|
|
|
376 "movl "RANGE "(%2), %%ebx \n\t"
|
|
|
377 "movl "RANGE "(%2), %%edx \n\t"
|
|
|
378 "andl $0xC0, %%ebx \n\t"
|
|
|
379 "movzbl "MANGLE(ff_h264_lps_range)"(%0, %%ebx, 2), %%esi\n\t"
|
|
|
380 "movl "LOW "(%2), %%ebx \n\t"
|
|
|
381 //eax:state ebx:low, edx:range, esi:RangeLPS
|
|
|
382 "subl %%esi, %%edx \n\t"
|
|
|
383 "movl %%edx, %%ecx \n\t"
|
|
|
384 "shll $17, %%ecx \n\t"
|
|
|
385 "cmpl %%ecx, %%ebx \n\t"
|
|
|
386 " ja 1f \n\t"
|
|
|
387
|
|
|
388 #if 1
|
|
|
389 //athlon:4067 P3:4110
|
|
|
390 "lea -0x100(%%edx), %%ecx \n\t"
|
|
|
391 "shr $31, %%ecx \n\t"
|
|
|
392 "shl %%cl, %%edx \n\t"
|
|
|
393 "shl %%cl, %%ebx \n\t"
|
|
|
394 #else
|
|
|
395 //athlon:4057 P3:4130
|
|
|
396 "cmp $0x100, %%edx \n\t" //FIXME avoidable
|
|
|
397 "setb %%cl \n\t"
|
|
|
398 "shl %%cl, %%edx \n\t"
|
|
|
399 "shl %%cl, %%ebx \n\t"
|
|
|
400 #endif
|
|
|
401 "movzbl "MANGLE(ff_h264_mps_state)"(%0), %%ecx \n\t"
|
|
|
402 "movb %%cl, (%1) \n\t"
|
|
|
403 //eax:state ebx:low, edx:range, esi:RangeLPS
|
|
|
404 "test %%bx, %%bx \n\t"
|
|
|
405 " jnz 2f \n\t"
|
|
|
406 "movl "BYTE "(%2), %%esi \n\t"
|
|
|
407 "subl $0xFFFF, %%ebx \n\t"
|
|
|
408 "movzwl (%%esi), %%ecx \n\t"
|
|
|
409 "bswap %%ecx \n\t"
|
|
|
410 "shrl $15, %%ecx \n\t"
|
|
|
411 "addl $2, %%esi \n\t"
|
|
|
412 "addl %%ecx, %%ebx \n\t"
|
|
|
413 "movl %%esi, "BYTE "(%2) \n\t"
|
|
|
414 "jmp 2f \n\t"
|
|
|
415 "1: \n\t"
|
|
|
416 //eax:state ebx:low, edx:range, esi:RangeLPS
|
|
|
417 "subl %%ecx, %%ebx \n\t"
|
|
|
418 "movl %%esi, %%edx \n\t"
|
|
|
419 "movzbl " MANGLE(ff_h264_norm_shift) "(%%esi), %%ecx \n\t"
|
|
|
420 "shll %%cl, %%ebx \n\t"
|
|
|
421 "shll %%cl, %%edx \n\t"
|
|
|
422 "movzbl "MANGLE(ff_h264_lps_state)"(%0), %%ecx \n\t"
|
|
|
423 "movb %%cl, (%1) \n\t"
|
|
|
424 "addl $1, %0 \n\t"
|
|
|
425 "test %%bx, %%bx \n\t"
|
|
|
426 " jnz 2f \n\t"
|
|
|
427
|
|
|
428 "movl "BYTE "(%2), %%ecx \n\t"
|
|
|
429 "movzwl (%%ecx), %%esi \n\t"
|
|
|
430 "bswap %%esi \n\t"
|
|
|
431 "shrl $15, %%esi \n\t"
|
|
|
432 "subl $0xFFFF, %%esi \n\t"
|
|
|
433 "addl $2, %%ecx \n\t"
|
|
|
434 "movl %%ecx, "BYTE "(%2) \n\t"
|
|
|
435
|
|
|
436 "leal -1(%%ebx), %%ecx \n\t"
|
|
|
437 "xorl %%ebx, %%ecx \n\t"
|
|
|
438 "shrl $15, %%ecx \n\t"
|
|
|
439 "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"
|
|
|
440 "neg %%ecx \n\t"
|
|
|
441 "add $7, %%ecx \n\t"
|
|
|
442
|
|
|
443 "shll %%cl , %%esi \n\t"
|
|
|
444 "addl %%esi, %%ebx \n\t"
|
|
|
445 "2: \n\t"
|
|
|
446 "movl %%edx, "RANGE "(%2) \n\t"
|
|
|
447 "movl %%ebx, "LOW "(%2) \n\t"
|
|
|
448 :"=&a"(bit) //FIXME this is fragile gcc either runs out of registers or misscompiles it (for example if "+a"(bit) or "+m"(*state) is used
|
|
|
449 :"r"(state), "r"(c)
|
|
|
450 : "%ecx", "%ebx", "%edx", "%esi", "memory"
|
|
|
451 );
|
|
|
452 bit&=1;
|
|
|
453 #else /* BRANCHLESS_CABAC_DECODER */
|
|
|
454
|
|
|
455
|
|
|
456 #if defined CMOV_IS_FAST
|
|
|
457 #define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
|
|
|
458 "mov "tmp" , %%ecx \n\t"\
|
|
|
459 "shl $17 , "tmp" \n\t"\
|
|
|
460 "cmp "low" , "tmp" \n\t"\
|
|
|
461 "cmova %%ecx , "range" \n\t"\
|
|
|
462 "sbb %%ecx , %%ecx \n\t"\
|
|
|
463 "and %%ecx , "tmp" \n\t"\
|
|
|
464 "sub "tmp" , "low" \n\t"\
|
|
|
465 "xor %%ecx , "ret" \n\t"
|
|
|
466 #else /* CMOV_IS_FAST */
|
|
|
467 #define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
|
|
|
468 "mov "tmp" , %%ecx \n\t"\
|
|
|
469 "shl $17 , "tmp" \n\t"\
|
|
|
470 "sub "low" , "tmp" \n\t"\
|
|
|
471 "sar $31 , "tmp" \n\t" /*lps_mask*/\
|
|
|
472 "sub %%ecx , "range" \n\t" /*RangeLPS - range*/\
|
|
|
473 "and "tmp" , "range" \n\t" /*(RangeLPS - range)&lps_mask*/\
|
|
|
474 "add %%ecx , "range" \n\t" /*new range*/\
|
|
|
475 "shl $17 , %%ecx \n\t"\
|
|
|
476 "and "tmp" , %%ecx \n\t"\
|
|
|
477 "sub %%ecx , "low" \n\t"\
|
|
|
478 "xor "tmp" , "ret" \n\t"
|
|
|
479 #endif /* CMOV_IS_FAST */
|
|
|
480
|
|
|
481
|
|
|
482 #define BRANCHLESS_GET_CABAC(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
|
|
|
483 "movzbl "statep" , "ret" \n\t"\
|
|
|
484 "mov "range" , "tmp" \n\t"\
|
|
|
485 "and $0xC0 , "range" \n\t"\
|
|
|
486 "movzbl "MANGLE(ff_h264_lps_range)"("ret", "range", 2), "range" \n\t"\
|
|
|
487 "sub "range" , "tmp" \n\t"\
|
|
|
488 BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
|
|
|
489 "movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx \n\t"\
|
|
|
490 "shl %%cl , "range" \n\t"\
|
|
|
491 "movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp" \n\t"\
|
|
|
492 "mov "tmpbyte" , "statep" \n\t"\
|
|
|
493 "shl %%cl , "low" \n\t"\
|
|
|
494 "test "lowword" , "lowword" \n\t"\
|
|
|
495 " jnz 1f \n\t"\
|
|
|
496 "mov "BYTE"("cabac"), %%ecx \n\t"\
|
|
|
497 "movzwl (%%ecx) , "tmp" \n\t"\
|
|
|
498 "bswap "tmp" \n\t"\
|
|
|
499 "shr $15 , "tmp" \n\t"\
|
|
|
500 "sub $0xFFFF , "tmp" \n\t"\
|
|
|
501 "add $2 , %%ecx \n\t"\
|
|
|
502 "mov %%ecx , "BYTE "("cabac") \n\t"\
|
|
|
503 "lea -1("low") , %%ecx \n\t"\
|
|
|
504 "xor "low" , %%ecx \n\t"\
|
|
|
505 "shr $15 , %%ecx \n\t"\
|
|
|
506 "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"\
|
|
|
507 "neg %%ecx \n\t"\
|
|
|
508 "add $7 , %%ecx \n\t"\
|
|
|
509 "shl %%cl , "tmp" \n\t"\
|
|
|
510 "add "tmp" , "low" \n\t"\
|
|
|
511 "1: \n\t"
|
|
|
512
|
|
|
513 asm volatile(
|
|
|
514 "movl "RANGE "(%2), %%esi \n\t"
|
|
|
515 "movl "LOW "(%2), %%ebx \n\t"
|
|
|
516 BRANCHLESS_GET_CABAC("%0", "%2", "(%1)", "%%ebx", "%%bx", "%%esi", "%%edx", "%%dl")
|
|
|
517 "movl %%esi, "RANGE "(%2) \n\t"
|
|
|
518 "movl %%ebx, "LOW "(%2) \n\t"
|
|
|
519
|
|
|
520 :"=&a"(bit)
|
|
|
521 :"r"(state), "r"(c)
|
|
|
522 : "%ecx", "%ebx", "%edx", "%esi", "memory"
|
|
|
523 );
|
|
|
524 bit&=1;
|
|
|
525 #endif /* BRANCHLESS_CABAC_DECODER */
|
|
|
526 #else /* defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__)) */
|
|
|
527 int s = *state;
|
|
|
528 int RangeLPS= ff_h264_lps_range[2*(c->range&0xC0) + s];
|
|
|
529 int bit, lps_mask attribute_unused;
|
|
|
530
|
|
|
531 c->range -= RangeLPS;
|
|
|
532 #ifndef BRANCHLESS_CABAC_DECODER
|
|
|
533 if(c->low < (c->range<<17)){
|
|
|
534 bit= s&1;
|
|
|
535 *state= ff_h264_mps_state[s];
|
|
|
536 renorm_cabac_decoder_once(c);
|
|
|
537 }else{
|
|
|
538 bit= ff_h264_norm_shift[RangeLPS];
|
|
|
539 c->low -= (c->range<<17);
|
|
|
540 *state= ff_h264_lps_state[s];
|
|
|
541 c->range = RangeLPS<<bit;
|
|
|
542 c->low <<= bit;
|
|
|
543 bit= (s&1)^1;
|
|
|
544
|
|
|
545 if(!(c->low & 0xFFFF)){
|
|
|
546 refill2(c);
|
|
|
547 }
|
|
|
548 }
|
|
|
549 #else /* BRANCHLESS_CABAC_DECODER */
|
|
|
550 lps_mask= ((c->range<<17) - c->low)>>31;
|
|
|
551
|
|
|
552 c->low -= (c->range<<17) & lps_mask;
|
|
|
553 c->range += (RangeLPS - c->range) & lps_mask;
|
|
|
554
|
|
|
555 s^=lps_mask;
|
|
|
556 *state= (ff_h264_mlps_state+128)[s];
|
|
|
557 bit= s&1;
|
|
|
558
|
|
|
559 lps_mask= ff_h264_norm_shift[c->range];
|
|
|
560 c->range<<= lps_mask;
|
|
|
561 c->low <<= lps_mask;
|
|
|
562 if(!(c->low & CABAC_MASK))
|
|
|
563 refill2(c);
|
|
|
564 #endif /* BRANCHLESS_CABAC_DECODER */
|
|
|
565 #endif /* defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__)) */
|
|
|
566 return bit;
|
|
|
567 }
|
|
|
568
|
|
|
569 static int __attribute((noinline)) get_cabac_noinline(CABACContext *c, uint8_t * const state){
|
|
|
570 return get_cabac_inline(c,state);
|
|
|
571 }
|
|
|
572
|
|
|
573 static int get_cabac(CABACContext *c, uint8_t * const state){
|
|
|
574 return get_cabac_inline(c,state);
|
|
|
575 }
|
|
|
576
|
|
|
577 static int get_cabac_bypass(CABACContext *c){
|
|
|
578 #if 0 //not faster
|
|
|
579 int bit;
|
|
|
580 asm volatile(
|
|
|
581 "movl "RANGE "(%1), %%ebx \n\t"
|
|
|
582 "movl "LOW "(%1), %%eax \n\t"
|
|
|
583 "shl $17, %%ebx \n\t"
|
|
|
584 "add %%eax, %%eax \n\t"
|
|
|
585 "sub %%ebx, %%eax \n\t"
|
|
|
586 "cdq \n\t"
|
|
|
587 "and %%edx, %%ebx \n\t"
|
|
|
588 "add %%ebx, %%eax \n\t"
|
|
|
589 "test %%ax, %%ax \n\t"
|
|
|
590 " jnz 1f \n\t"
|
|
|
591 "movl "BYTE "(%1), %%ebx \n\t"
|
|
|
592 "subl $0xFFFF, %%eax \n\t"
|
|
|
593 "movzwl (%%ebx), %%ecx \n\t"
|
|
|
594 "bswap %%ecx \n\t"
|
|
|
595 "shrl $15, %%ecx \n\t"
|
|
|
596 "addl $2, %%ebx \n\t"
|
|
|
597 "addl %%ecx, %%eax \n\t"
|
|
|
598 "movl %%ebx, "BYTE "(%1) \n\t"
|
|
|
599 "1: \n\t"
|
|
|
600 "movl %%eax, "LOW "(%1) \n\t"
|
|
|
601
|
|
|
602 :"=&d"(bit)
|
|
|
603 :"r"(c)
|
|
|
604 : "%eax", "%ebx", "%ecx", "memory"
|
|
|
605 );
|
|
|
606 return bit+1;
|
|
|
607 #else
|
|
|
608 int range;
|
|
|
609 c->low += c->low;
|
|
|
610
|
|
|
611 if(!(c->low & CABAC_MASK))
|
|
|
612 refill(c);
|
|
|
613
|
|
|
614 range= c->range<<17;
|
|
|
615 if(c->low < range){
|
|
|
616 return 0;
|
|
|
617 }else{
|
|
|
618 c->low -= range;
|
|
|
619 return 1;
|
|
|
620 }
|
|
|
621 #endif
|
|
|
622 }
|
|
|
623
|
|
|
624
|
|
|
625 static always_inline int get_cabac_bypass_sign(CABACContext *c, int val){
|
|
|
626 #ifdef ARCH_X86
|
|
|
627 asm volatile(
|
|
|
628 "movl "RANGE "(%1), %%ebx \n\t"
|
|
|
629 "movl "LOW "(%1), %%eax \n\t"
|
|
|
630 "shl $17, %%ebx \n\t"
|
|
|
631 "add %%eax, %%eax \n\t"
|
|
|
632 "sub %%ebx, %%eax \n\t"
|
|
|
633 "cdq \n\t"
|
|
|
634 "and %%edx, %%ebx \n\t"
|
|
|
635 "add %%ebx, %%eax \n\t"
|
|
|
636 "xor %%edx, %%ecx \n\t"
|
|
|
637 "sub %%edx, %%ecx \n\t"
|
|
|
638 "test %%ax, %%ax \n\t"
|
|
|
639 " jnz 1f \n\t"
|
|
|
640 "movl "BYTE "(%1), %%ebx \n\t"
|
|
|
641 "subl $0xFFFF, %%eax \n\t"
|
|
|
642 "movzwl (%%ebx), %%edx \n\t"
|
|
|
643 "bswap %%edx \n\t"
|
|
|
644 "shrl $15, %%edx \n\t"
|
|
|
645 "addl $2, %%ebx \n\t"
|
|
|
646 "addl %%edx, %%eax \n\t"
|
|
|
647 "movl %%ebx, "BYTE "(%1) \n\t"
|
|
|
648 "1: \n\t"
|
|
|
649 "movl %%eax, "LOW "(%1) \n\t"
|
|
|
650
|
|
|
651 :"+c"(val)
|
|
|
652 :"r"(c)
|
|
|
653 : "%eax", "%ebx", "%edx", "memory"
|
|
|
654 );
|
|
|
655 return val;
|
|
|
656 #else
|
|
|
657 int range, mask;
|
|
|
658 c->low += c->low;
|
|
|
659
|
|
|
660 if(!(c->low & CABAC_MASK))
|
|
|
661 refill(c);
|
|
|
662
|
|
|
663 range= c->range<<17;
|
|
|
664 c->low -= range;
|
|
|
665 mask= c->low >> 31;
|
|
|
666 range &= mask;
|
|
|
667 c->low += range;
|
|
|
668 return (val^mask)-mask;
|
|
|
669 #endif
|
|
|
670 }
|
|
|
671
|
|
|
672 //FIXME the x86 code from this file should be moved into i386/h264 or cabac something.c/h (note ill kill you if you move my code away from under my fingers before iam finished with it!)
|
|
|
673 //FIXME use some macros to avoid duplicatin get_cabac (cant be done yet as that would make optimization work hard)
|
|
|
674 #ifdef ARCH_X86
|
|
|
675 static int decode_significance_x86(CABACContext *c, int max_coeff, uint8_t *significant_coeff_ctx_base, int *index){
|
|
|
676 void *end= significant_coeff_ctx_base + max_coeff - 1;
|
|
|
677 int minusstart= -(int)significant_coeff_ctx_base;
|
|
|
678 int minusindex= 4-(int)index;
|
|
|
679 int coeff_count;
|
|
|
680 asm volatile(
|
|
|
681 "movl "RANGE "(%3), %%esi \n\t"
|
|
|
682 "movl "LOW "(%3), %%ebx \n\t"
|
|
|
683
|
|
|
684 "2: \n\t"
|
|
|
685
|
|
|
686 BRANCHLESS_GET_CABAC("%%edx", "%3", "(%1)", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al")
|
|
|
687
|
|
|
688 "test $1, %%edx \n\t"
|
|
|
689 " jz 3f \n\t"
|
|
|
690
|
|
|
691 BRANCHLESS_GET_CABAC("%%edx", "%3", "61(%1)", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al")
|
|
|
692
|
|
|
693 "movl %2, %%eax \n\t"
|
|
|
694 "movl %4, %%ecx \n\t"
|
|
|
695 "addl %1, %%ecx \n\t"
|
|
|
696 "movl %%ecx, (%%eax) \n\t"
|
|
|
697
|
|
|
698 "test $1, %%edx \n\t"
|
|
|
699 " jnz 4f \n\t"
|
|
|
700
|
|
|
701 "addl $4, %%eax \n\t"
|
|
|
702 "movl %%eax, %2 \n\t"
|
|
|
703
|
|
|
704 "3: \n\t"
|
|
|
705 "addl $1, %1 \n\t"
|
|
|
706 "cmpl %5, %1 \n\t"
|
|
|
707 " jb 2b \n\t"
|
|
|
708 "movl %2, %%eax \n\t"
|
|
|
709 "movl %4, %%ecx \n\t"
|
|
|
710 "addl %1, %%ecx \n\t"
|
|
|
711 "movl %%ecx, (%%eax) \n\t"
|
|
|
712 "4: \n\t"
|
|
|
713 "addl %6, %%eax \n\t"
|
|
|
714 "shr $2, %%eax \n\t"
|
|
|
715
|
|
|
716 "movl %%esi, "RANGE "(%3) \n\t"
|
|
|
717 "movl %%ebx, "LOW "(%3) \n\t"
|
|
|
718 :"=&a"(coeff_count), "+r"(significant_coeff_ctx_base), "+m"(index)\
|
|
|
719 :"r"(c), "m"(minusstart), "m"(end), "m"(minusindex)\
|
|
|
720 : "%ecx", "%ebx", "%edx", "%esi", "memory"\
|
|
|
721 );
|
|
|
722 return coeff_count;
|
|
|
723 }
|
|
|
724
|
|
|
725 static int decode_significance_8x8_x86(CABACContext *c, uint8_t *significant_coeff_ctx_base, int *index, uint8_t *sig_off){
|
|
|
726 int minusindex= 4-(int)index;
|
|
|
727 int coeff_count;
|
|
|
728 int last=0;
|
|
|
729 asm volatile(
|
|
|
730 "movl "RANGE "(%3), %%esi \n\t"
|
|
|
731 "movl "LOW "(%3), %%ebx \n\t"
|
|
|
732
|
|
|
733 "mov %1, %%edi \n\t"
|
|
|
734 "2: \n\t"
|
|
|
735
|
|
|
736 "mov %6, %%eax \n\t"
|
|
|
737 "movzbl (%%eax, %%edi), %%edi \n\t"
|
|
|
738 "add %5, %%edi \n\t"
|
|
|
739
|
|
|
740 BRANCHLESS_GET_CABAC("%%edx", "%3", "(%%edi)", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al")
|
|
|
741
|
|
|
742 "mov %1, %%edi \n\t"
|
|
|
743 "test $1, %%edx \n\t"
|
|
|
744 " jz 3f \n\t"
|
|
|
745
|
|
|
746 "movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%%edi), %%edi\n\t"
|
|
|
747 "add %5, %%edi \n\t"
|
|
|
748
|
|
|
749 BRANCHLESS_GET_CABAC("%%edx", "%3", "15(%%edi)", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al")
|
|
|
750
|
|
|
751 "movl %2, %%eax \n\t"
|
|
|
752 "mov %1, %%edi \n\t"
|
|
|
753 "movl %%edi, (%%eax) \n\t"
|
|
|
754
|
|
|
755 "test $1, %%edx \n\t"
|
|
|
756 " jnz 4f \n\t"
|
|
|
757
|
|
|
758 "addl $4, %%eax \n\t"
|
|
|
759 "movl %%eax, %2 \n\t"
|
|
|
760
|
|
|
761 "3: \n\t"
|
|
|
762 "addl $1, %%edi \n\t"
|
|
|
763 "mov %%edi, %1 \n\t"
|
|
|
764 "cmpl $63, %%edi \n\t"
|
|
|
765 " jb 2b \n\t"
|
|
|
766 "movl %2, %%eax \n\t"
|
|
|
767 "movl %%edi, (%%eax) \n\t"
|
|
|
768 "4: \n\t"
|
|
|
769 "addl %4, %%eax \n\t"
|
|
|
770 "shr $2, %%eax \n\t"
|
|
|
771
|
|
|
772 "movl %%esi, "RANGE "(%3) \n\t"
|
|
|
773 "movl %%ebx, "LOW "(%3) \n\t"
|
|
|
774 :"=&a"(coeff_count),"+m"(last), "+m"(index)\
|
|
|
775 :"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off)\
|
|
|
776 : "%ecx", "%ebx", "%edx", "%esi", "%edi", "memory"\
|
|
|
777 );
|
|
|
778 return coeff_count;
|
|
|
779 }
|
|
|
780 #endif
|
|
|
781
|
|
|
782 /**
|
|
|
783 *
|
|
|
784 * @return the number of bytes read or 0 if no end
|
|
|
785 */
|
|
|
786 static int get_cabac_terminate(CABACContext *c){
|
|
|
787 c->range -= 2;
|
|
|
788 if(c->low < c->range<<17){
|
|
|
789 renorm_cabac_decoder_once(c);
|
|
|
790 return 0;
|
|
|
791 }else{
|
|
|
792 return c->bytestream - c->bytestream_start;
|
|
|
793 }
|
|
|
794 }
|
|
|
795
|
|
|
796 /**
|
|
|
797 * get (truncated) unnary binarization.
|
|
|
798 */
|
|
|
799 static int get_cabac_u(CABACContext *c, uint8_t * state, int max, int max_index, int truncated){
|
|
|
800 int i;
|
|
|
801
|
|
|
802 for(i=0; i<max; i++){
|
|
|
803 if(get_cabac(c, state)==0)
|
|
|
804 return i;
|
|
|
805
|
|
|
806 if(i< max_index) state++;
|
|
|
807 }
|
|
|
808
|
|
|
809 return truncated ? max : -1;
|
|
|
810 }
|
|
|
811
|
|
|
812 /**
|
|
|
813 * get unary exp golomb k-th order binarization.
|
|
|
814 */
|
|
|
815 static int get_cabac_ueg(CABACContext *c, uint8_t * state, int max, int is_signed, int k, int max_index){
|
|
|
816 int i, v;
|
|
|
817 int m= 1<<k;
|
|
|
818
|
|
|
819 if(get_cabac(c, state)==0)
|
|
|
820 return 0;
|
|
|
821
|
|
|
822 if(0 < max_index) state++;
|
|
|
823
|
|
|
824 for(i=1; i<max; i++){
|
|
|
825 if(get_cabac(c, state)==0){
|
|
|
826 if(is_signed && get_cabac_bypass(c)){
|
|
|
827 return -i;
|
|
|
828 }else
|
|
|
829 return i;
|
|
|
830 }
|
|
|
831
|
|
|
832 if(i < max_index) state++;
|
|
|
833 }
|
|
|
834
|
|
|
835 while(get_cabac_bypass(c)){
|
|
|
836 i+= m;
|
|
|
837 m+= m;
|
|
|
838 }
|
|
|
839
|
|
|
840 v=0;
|
|
|
841 while(m>>=1){
|
|
|
842 v+= v + get_cabac_bypass(c);
|
|
|
843 }
|
|
|
844 i += v;
|
|
|
845
|
|
|
846 if(is_signed && get_cabac_bypass(c)){
|
|
|
847 return -i;
|
|
|
848 }else
|
|
|
849 return i;
|
|
|
850 }
|