Mercurial > libavcodec.hg
comparison simple_idct.c @ 1008:fb6cbb8a04a3 libavcodec
fixing DCTELEM != short
| author | michaelni |
|---|---|
| date | Wed, 15 Jan 2003 19:21:21 +0000 |
| parents | caa77cd960c0 |
| children | b32afefe7d33 |
comparison
equal
deleted
inserted
replaced
| 1007:b2cf2a1d9a51 | 1008:fb6cbb8a04a3 |
|---|---|
| 65 /* signed 16x16 -> 32 multiply */ | 65 /* signed 16x16 -> 32 multiply */ |
| 66 #define MUL16(rt, ra, rb) rt = (ra) * (rb) | 66 #define MUL16(rt, ra, rb) rt = (ra) * (rb) |
| 67 | 67 |
| 68 #endif | 68 #endif |
| 69 | 69 |
| 70 static inline void idctRowCondDC (int16_t * row) | 70 static inline void idctRowCondDC (DCTELEM * row) |
| 71 { | 71 { |
| 72 int a0, a1, a2, a3, b0, b1, b2, b3; | 72 int a0, a1, a2, a3, b0, b1, b2, b3; |
| 73 #ifdef FAST_64BIT | 73 #ifdef FAST_64BIT |
| 74 uint64_t temp; | 74 uint64_t temp; |
| 75 #else | 75 #else |
| 80 #ifdef WORDS_BIGENDIAN | 80 #ifdef WORDS_BIGENDIAN |
| 81 #define ROW0_MASK 0xffff000000000000LL | 81 #define ROW0_MASK 0xffff000000000000LL |
| 82 #else | 82 #else |
| 83 #define ROW0_MASK 0xffffLL | 83 #define ROW0_MASK 0xffffLL |
| 84 #endif | 84 #endif |
| 85 if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) | | 85 if(sizeof(DCTELEM)==2){ |
| 86 ((uint64_t *)row)[1]) == 0) { | 86 if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) | |
| 87 temp = (row[0] << 3) & 0xffff; | 87 ((uint64_t *)row)[1]) == 0) { |
| 88 temp += temp << 16; | 88 temp = (row[0] << 3) & 0xffff; |
| 89 temp += temp << 32; | 89 temp += temp << 16; |
| 90 ((uint64_t *)row)[0] = temp; | 90 temp += temp << 32; |
| 91 ((uint64_t *)row)[1] = temp; | 91 ((uint64_t *)row)[0] = temp; |
| 92 return; | 92 ((uint64_t *)row)[1] = temp; |
| 93 } | 93 return; |
| 94 } | |
| 95 }else{ | |
| 96 if (!(row[1]|row[2]|row[3]|row[4]|row[5]|row[6]|row[7])) { | |
| 97 row[0]=row[1]=row[2]=row[3]=row[4]=row[5]=row[6]=row[7]= row[0] << 3; | |
| 98 return; | |
| 99 } | |
| 100 } | |
| 94 #else | 101 #else |
| 95 if (!(((uint32_t*)row)[1] | | 102 if(sizeof(DCTELEM)==2){ |
| 96 ((uint32_t*)row)[2] | | 103 if (!(((uint32_t*)row)[1] | |
| 97 ((uint32_t*)row)[3] | | 104 ((uint32_t*)row)[2] | |
| 98 row[1])) { | 105 ((uint32_t*)row)[3] | |
| 99 temp = (row[0] << 3) & 0xffff; | 106 row[1])) { |
| 100 temp += temp << 16; | 107 temp = (row[0] << 3) & 0xffff; |
| 101 ((uint32_t*)row)[0]=((uint32_t*)row)[1] = | 108 temp += temp << 16; |
| 102 ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp; | 109 ((uint32_t*)row)[0]=((uint32_t*)row)[1] = |
| 103 return; | 110 ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp; |
| 104 } | 111 return; |
| 112 } | |
| 113 }else{ | |
| 114 if (!(row[1]|row[2]|row[3]|row[4]|row[5]|row[6]|row[7])) { | |
| 115 row[0]=row[1]=row[2]=row[3]=row[4]=row[5]=row[6]=row[7]= row[0] << 3; | |
| 116 return; | |
| 117 } | |
| 118 } | |
| 105 #endif | 119 #endif |
| 106 | 120 |
| 107 a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1)); | 121 a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1)); |
| 108 a1 = a0; | 122 a1 = a0; |
| 109 a2 = a0; | 123 a2 = a0; |
| 157 row[3] = (a3 + b3) >> ROW_SHIFT; | 171 row[3] = (a3 + b3) >> ROW_SHIFT; |
| 158 row[4] = (a3 - b3) >> ROW_SHIFT; | 172 row[4] = (a3 - b3) >> ROW_SHIFT; |
| 159 } | 173 } |
| 160 | 174 |
| 161 static inline void idctSparseColPut (UINT8 *dest, int line_size, | 175 static inline void idctSparseColPut (UINT8 *dest, int line_size, |
| 162 int16_t * col) | 176 DCTELEM * col) |
| 163 { | 177 { |
| 164 int a0, a1, a2, a3, b0, b1, b2, b3; | 178 int a0, a1, a2, a3, b0, b1, b2, b3; |
| 165 UINT8 *cm = cropTbl + MAX_NEG_CROP; | 179 UINT8 *cm = cropTbl + MAX_NEG_CROP; |
| 166 | 180 |
| 167 /* XXX: I did that only to give same values as previous code */ | 181 /* XXX: I did that only to give same values as previous code */ |
| 229 dest += line_size; | 243 dest += line_size; |
| 230 dest[0] = cm[(a0 - b0) >> COL_SHIFT]; | 244 dest[0] = cm[(a0 - b0) >> COL_SHIFT]; |
| 231 } | 245 } |
| 232 | 246 |
| 233 static inline void idctSparseColAdd (UINT8 *dest, int line_size, | 247 static inline void idctSparseColAdd (UINT8 *dest, int line_size, |
| 234 int16_t * col) | 248 DCTELEM * col) |
| 235 { | 249 { |
| 236 int a0, a1, a2, a3, b0, b1, b2, b3; | 250 int a0, a1, a2, a3, b0, b1, b2, b3; |
| 237 UINT8 *cm = cropTbl + MAX_NEG_CROP; | 251 UINT8 *cm = cropTbl + MAX_NEG_CROP; |
| 238 | 252 |
| 239 /* XXX: I did that only to give same values as previous code */ | 253 /* XXX: I did that only to give same values as previous code */ |
| 300 dest[0] = cm[dest[0] + ((a1 - b1) >> COL_SHIFT)]; | 314 dest[0] = cm[dest[0] + ((a1 - b1) >> COL_SHIFT)]; |
| 301 dest += line_size; | 315 dest += line_size; |
| 302 dest[0] = cm[dest[0] + ((a0 - b0) >> COL_SHIFT)]; | 316 dest[0] = cm[dest[0] + ((a0 - b0) >> COL_SHIFT)]; |
| 303 } | 317 } |
| 304 | 318 |
| 305 static inline void idctSparseCol (int16_t * col) | 319 static inline void idctSparseCol (DCTELEM * col) |
| 306 { | 320 { |
| 307 int a0, a1, a2, a3, b0, b1, b2, b3; | 321 int a0, a1, a2, a3, b0, b1, b2, b3; |
| 308 | 322 |
| 309 /* XXX: I did that only to give same values as previous code */ | 323 /* XXX: I did that only to give same values as previous code */ |
| 310 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); | 324 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); |
| 363 col[40] = ((a2 - b2) >> COL_SHIFT); | 377 col[40] = ((a2 - b2) >> COL_SHIFT); |
| 364 col[48] = ((a1 - b1) >> COL_SHIFT); | 378 col[48] = ((a1 - b1) >> COL_SHIFT); |
| 365 col[56] = ((a0 - b0) >> COL_SHIFT); | 379 col[56] = ((a0 - b0) >> COL_SHIFT); |
| 366 } | 380 } |
| 367 | 381 |
| 368 void simple_idct_put(UINT8 *dest, int line_size, INT16 *block) | 382 void simple_idct_put(UINT8 *dest, int line_size, DCTELEM *block) |
| 369 { | 383 { |
| 370 int i; | 384 int i; |
| 371 for(i=0; i<8; i++) | 385 for(i=0; i<8; i++) |
| 372 idctRowCondDC(block + i*8); | 386 idctRowCondDC(block + i*8); |
| 373 | 387 |
| 374 for(i=0; i<8; i++) | 388 for(i=0; i<8; i++) |
| 375 idctSparseColPut(dest + i, line_size, block + i); | 389 idctSparseColPut(dest + i, line_size, block + i); |
| 376 } | 390 } |
| 377 | 391 |
| 378 void simple_idct_add(UINT8 *dest, int line_size, INT16 *block) | 392 void simple_idct_add(UINT8 *dest, int line_size, DCTELEM *block) |
| 379 { | 393 { |
| 380 int i; | 394 int i; |
| 381 for(i=0; i<8; i++) | 395 for(i=0; i<8; i++) |
| 382 idctRowCondDC(block + i*8); | 396 idctRowCondDC(block + i*8); |
| 383 | 397 |
| 384 for(i=0; i<8; i++) | 398 for(i=0; i<8; i++) |
| 385 idctSparseColAdd(dest + i, line_size, block + i); | 399 idctSparseColAdd(dest + i, line_size, block + i); |
| 386 } | 400 } |
| 387 | 401 |
| 388 void simple_idct(INT16 *block) | 402 void simple_idct(DCTELEM *block) |
| 389 { | 403 { |
| 390 int i; | 404 int i; |
| 391 for(i=0; i<8; i++) | 405 for(i=0; i<8; i++) |
| 392 idctRowCondDC(block + i*8); | 406 idctRowCondDC(block + i*8); |
| 393 | 407 |
| 404 | 418 |
| 405 /* row idct is multiple by 16 * sqrt(2.0), col idct4 is normalized, | 419 /* row idct is multiple by 16 * sqrt(2.0), col idct4 is normalized, |
| 406 and the butterfly must be multiplied by 0.5 * sqrt(2.0) */ | 420 and the butterfly must be multiplied by 0.5 * sqrt(2.0) */ |
| 407 #define C_SHIFT (4+1+12) | 421 #define C_SHIFT (4+1+12) |
| 408 | 422 |
| 409 static inline void idct4col(UINT8 *dest, int line_size, const INT16 *col) | 423 static inline void idct4col(UINT8 *dest, int line_size, const DCTELEM *col) |
| 410 { | 424 { |
| 411 int c0, c1, c2, c3, a0, a1, a2, a3; | 425 int c0, c1, c2, c3, a0, a1, a2, a3; |
| 412 const UINT8 *cm = cropTbl + MAX_NEG_CROP; | 426 const UINT8 *cm = cropTbl + MAX_NEG_CROP; |
| 413 | 427 |
| 414 a0 = col[8*0]; | 428 a0 = col[8*0]; |
| 441 to the pixels before clamping to avoid systematic error | 455 to the pixels before clamping to avoid systematic error |
| 442 (1024*sqrt(2)) offset would be needed otherwise. */ | 456 (1024*sqrt(2)) offset would be needed otherwise. */ |
| 443 /* XXX: I think a 1.0/sqrt(2) normalization should be needed to | 457 /* XXX: I think a 1.0/sqrt(2) normalization should be needed to |
| 444 compensate the extra butterfly stage - I don't have the full DV | 458 compensate the extra butterfly stage - I don't have the full DV |
| 445 specification */ | 459 specification */ |
| 446 void simple_idct248_put(UINT8 *dest, int line_size, INT16 *block) | 460 void simple_idct248_put(UINT8 *dest, int line_size, DCTELEM *block) |
| 447 { | 461 { |
| 448 int i; | 462 int i; |
| 449 INT16 *ptr; | 463 DCTELEM *ptr; |
| 450 | 464 |
| 451 /* butterfly */ | 465 /* butterfly */ |
| 452 ptr = block; | 466 ptr = block; |
| 453 for(i=0;i<4;i++) { | 467 for(i=0;i<4;i++) { |
| 454 BF(0); | 468 BF(0); |
| 484 #define C_FIX(x) ((int)((x) * 1.414213562 * (1 << CN_SHIFT) + 0.5)) | 498 #define C_FIX(x) ((int)((x) * 1.414213562 * (1 << CN_SHIFT) + 0.5)) |
| 485 #define C1 C_FIX(0.6532814824) | 499 #define C1 C_FIX(0.6532814824) |
| 486 #define C2 C_FIX(0.2705980501) | 500 #define C2 C_FIX(0.2705980501) |
| 487 #define C3 C_FIX(0.5) | 501 #define C3 C_FIX(0.5) |
| 488 #define C_SHIFT (4+1+12) | 502 #define C_SHIFT (4+1+12) |
| 489 static inline void idct4col_add(UINT8 *dest, int line_size, const INT16 *col) | 503 static inline void idct4col_add(UINT8 *dest, int line_size, const DCTELEM *col) |
| 490 { | 504 { |
| 491 int c0, c1, c2, c3, a0, a1, a2, a3; | 505 int c0, c1, c2, c3, a0, a1, a2, a3; |
| 492 const UINT8 *cm = cropTbl + MAX_NEG_CROP; | 506 const UINT8 *cm = cropTbl + MAX_NEG_CROP; |
| 493 | 507 |
| 494 a0 = col[8*0]; | 508 a0 = col[8*0]; |
| 512 #define R_FIX(x) ((int)((x) * 1.414213562 * (1 << RN_SHIFT) + 0.5)) | 526 #define R_FIX(x) ((int)((x) * 1.414213562 * (1 << RN_SHIFT) + 0.5)) |
| 513 #define R1 R_FIX(0.6532814824) | 527 #define R1 R_FIX(0.6532814824) |
| 514 #define R2 R_FIX(0.2705980501) | 528 #define R2 R_FIX(0.2705980501) |
| 515 #define R3 R_FIX(0.5) | 529 #define R3 R_FIX(0.5) |
| 516 #define R_SHIFT 11 | 530 #define R_SHIFT 11 |
| 517 static inline void idct4row(INT16 *row) | 531 static inline void idct4row(DCTELEM *row) |
| 518 { | 532 { |
| 519 int c0, c1, c2, c3, a0, a1, a2, a3; | 533 int c0, c1, c2, c3, a0, a1, a2, a3; |
| 520 const UINT8 *cm = cropTbl + MAX_NEG_CROP; | 534 const UINT8 *cm = cropTbl + MAX_NEG_CROP; |
| 521 | 535 |
| 522 a0 = row[0]; | 536 a0 = row[0]; |
| 531 row[1]= (c2 + c3) >> R_SHIFT; | 545 row[1]= (c2 + c3) >> R_SHIFT; |
| 532 row[2]= (c2 - c3) >> R_SHIFT; | 546 row[2]= (c2 - c3) >> R_SHIFT; |
| 533 row[3]= (c0 - c1) >> R_SHIFT; | 547 row[3]= (c0 - c1) >> R_SHIFT; |
| 534 } | 548 } |
| 535 | 549 |
| 536 void simple_idct84_add(UINT8 *dest, int line_size, INT16 *block) | 550 void simple_idct84_add(UINT8 *dest, int line_size, DCTELEM *block) |
| 537 { | 551 { |
| 538 int i; | 552 int i; |
| 539 | 553 |
| 540 /* IDCT8 on each line */ | 554 /* IDCT8 on each line */ |
| 541 for(i=0; i<4; i++) { | 555 for(i=0; i<4; i++) { |
| 546 for(i=0;i<8;i++) { | 560 for(i=0;i<8;i++) { |
| 547 idct4col_add(dest + i, line_size, block + i); | 561 idct4col_add(dest + i, line_size, block + i); |
| 548 } | 562 } |
| 549 } | 563 } |
| 550 | 564 |
| 551 void simple_idct48_add(UINT8 *dest, int line_size, INT16 *block) | 565 void simple_idct48_add(UINT8 *dest, int line_size, DCTELEM *block) |
| 552 { | 566 { |
| 553 int i; | 567 int i; |
| 554 | 568 |
| 555 /* IDCT4 on each line */ | 569 /* IDCT4 on each line */ |
| 556 for(i=0; i<8; i++) { | 570 for(i=0; i<8; i++) { |
