Mercurial > libavcodec.hg
comparison simple_idct.c @ 479:40ffce2cb6ef libavcodec
added inlined put/add functions
| author | bellard |
|---|---|
| date | Wed, 05 Jun 2002 18:46:25 +0000 |
| parents | ec13b0a726c3 |
| children | 2bf17a142cf4 |
comparison
equal
deleted
inserted
replaced
| 478:055d9ac1584d | 479:40ffce2cb6ef |
|---|---|
| 20 /* | 20 /* |
| 21 based upon some outcommented c code from mpeg2dec (idct_mmx.c | 21 based upon some outcommented c code from mpeg2dec (idct_mmx.c |
| 22 written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>) | 22 written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>) |
| 23 */ | 23 */ |
| 24 #include "avcodec.h" | 24 #include "avcodec.h" |
| 25 | 25 #include "dsputil.h" |
| 26 #include "simple_idct.h" | 26 #include "simple_idct.h" |
| 27 | 27 |
| 28 #if 0 | 28 #if 0 |
| 29 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */ | 29 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */ |
| 30 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */ | 30 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */ |
| 259 row[3] = (a3 + b3) >> ROW_SHIFT; | 259 row[3] = (a3 + b3) >> ROW_SHIFT; |
| 260 row[4] = (a3 - b3) >> ROW_SHIFT; | 260 row[4] = (a3 - b3) >> ROW_SHIFT; |
| 261 } | 261 } |
| 262 #endif /* not ARCH_ALPHA */ | 262 #endif /* not ARCH_ALPHA */ |
| 263 | 263 |
| 264 static inline void idctSparseCol (int16_t * col) | 264 static inline void idctSparseColPut (UINT8 *dest, int line_size, |
| 265 int16_t * col) | |
| 265 { | 266 { |
| 266 int a0, a1, a2, a3, b0, b1, b2, b3; | 267 int a0, a1, a2, a3, b0, b1, b2, b3; |
| 268 UINT8 *cm = cropTbl + MAX_NEG_CROP; | |
| 267 | 269 |
| 268 /* XXX: I did that only to give same values as previous code */ | 270 /* XXX: I did that only to give same values as previous code */ |
| 269 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); | 271 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); |
| 270 a1 = a0; | 272 a1 = a0; |
| 271 a2 = a0; | 273 a2 = a0; |
| 312 MAC16(b1, - W5, col[8*7]); | 314 MAC16(b1, - W5, col[8*7]); |
| 313 MAC16(b2, + W3, col[8*7]); | 315 MAC16(b2, + W3, col[8*7]); |
| 314 MAC16(b3, - W1, col[8*7]); | 316 MAC16(b3, - W1, col[8*7]); |
| 315 } | 317 } |
| 316 | 318 |
| 317 col[8*0] = (a0 + b0) >> COL_SHIFT; | 319 dest[0] = cm[(a0 + b0) >> COL_SHIFT]; |
| 318 col[8*7] = (a0 - b0) >> COL_SHIFT; | 320 dest += line_size; |
| 319 col[8*1] = (a1 + b1) >> COL_SHIFT; | 321 dest[0] = cm[(a1 + b1) >> COL_SHIFT]; |
| 320 col[8*6] = (a1 - b1) >> COL_SHIFT; | 322 dest += line_size; |
| 321 col[8*2] = (a2 + b2) >> COL_SHIFT; | 323 dest[0] = cm[(a2 + b2) >> COL_SHIFT]; |
| 322 col[8*5] = (a2 - b2) >> COL_SHIFT; | 324 dest += line_size; |
| 323 col[8*3] = (a3 + b3) >> COL_SHIFT; | 325 dest[0] = cm[(a3 + b3) >> COL_SHIFT]; |
| 324 col[8*4] = (a3 - b3) >> COL_SHIFT; | 326 dest += line_size; |
| 327 dest[0] = cm[(a3 - b3) >> COL_SHIFT]; | |
| 328 dest += line_size; | |
| 329 dest[0] = cm[(a2 - b2) >> COL_SHIFT]; | |
| 330 dest += line_size; | |
| 331 dest[0] = cm[(a1 - b1) >> COL_SHIFT]; | |
| 332 dest += line_size; | |
| 333 dest[0] = cm[(a0 - b0) >> COL_SHIFT]; | |
| 334 } | |
| 335 | |
| 336 static inline void idctSparseColAdd (UINT8 *dest, int line_size, | |
| 337 int16_t * col) | |
| 338 { | |
| 339 int a0, a1, a2, a3, b0, b1, b2, b3; | |
| 340 UINT8 *cm = cropTbl + MAX_NEG_CROP; | |
| 341 | |
| 342 /* XXX: I did that only to give same values as previous code */ | |
| 343 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); | |
| 344 a1 = a0; | |
| 345 a2 = a0; | |
| 346 a3 = a0; | |
| 347 | |
| 348 a0 += + W2*col[8*2]; | |
| 349 a1 += + W6*col[8*2]; | |
| 350 a2 += - W6*col[8*2]; | |
| 351 a3 += - W2*col[8*2]; | |
| 352 | |
| 353 MUL16(b0, W1, col[8*1]); | |
| 354 MUL16(b1, W3, col[8*1]); | |
| 355 MUL16(b2, W5, col[8*1]); | |
| 356 MUL16(b3, W7, col[8*1]); | |
| 357 | |
| 358 MAC16(b0, + W3, col[8*3]); | |
| 359 MAC16(b1, - W7, col[8*3]); | |
| 360 MAC16(b2, - W1, col[8*3]); | |
| 361 MAC16(b3, - W5, col[8*3]); | |
| 362 | |
| 363 if(col[8*4]){ | |
| 364 a0 += + W4*col[8*4]; | |
| 365 a1 += - W4*col[8*4]; | |
| 366 a2 += - W4*col[8*4]; | |
| 367 a3 += + W4*col[8*4]; | |
| 368 } | |
| 369 | |
| 370 if (col[8*5]) { | |
| 371 MAC16(b0, + W5, col[8*5]); | |
| 372 MAC16(b1, - W1, col[8*5]); | |
| 373 MAC16(b2, + W7, col[8*5]); | |
| 374 MAC16(b3, + W3, col[8*5]); | |
| 375 } | |
| 376 | |
| 377 if(col[8*6]){ | |
| 378 a0 += + W6*col[8*6]; | |
| 379 a1 += - W2*col[8*6]; | |
| 380 a2 += + W2*col[8*6]; | |
| 381 a3 += - W6*col[8*6]; | |
| 382 } | |
| 383 | |
| 384 if (col[8*7]) { | |
| 385 MAC16(b0, + W7, col[8*7]); | |
| 386 MAC16(b1, - W5, col[8*7]); | |
| 387 MAC16(b2, + W3, col[8*7]); | |
| 388 MAC16(b3, - W1, col[8*7]); | |
| 389 } | |
| 390 | |
| 391 dest[0] = cm[dest[0] + ((a0 + b0) >> COL_SHIFT)]; | |
| 392 dest += line_size; | |
| 393 dest[0] = cm[dest[0] + ((a1 + b1) >> COL_SHIFT)]; | |
| 394 dest += line_size; | |
| 395 dest[0] = cm[dest[0] + ((a2 + b2) >> COL_SHIFT)]; | |
| 396 dest += line_size; | |
| 397 dest[0] = cm[dest[0] + ((a3 + b3) >> COL_SHIFT)]; | |
| 398 dest += line_size; | |
| 399 dest[0] = cm[dest[0] + ((a3 - b3) >> COL_SHIFT)]; | |
| 400 dest += line_size; | |
| 401 dest[0] = cm[dest[0] + ((a2 - b2) >> COL_SHIFT)]; | |
| 402 dest += line_size; | |
| 403 dest[0] = cm[dest[0] + ((a1 - b1) >> COL_SHIFT)]; | |
| 404 dest += line_size; | |
| 405 dest[0] = cm[dest[0] + ((a0 - b0) >> COL_SHIFT)]; | |
| 325 } | 406 } |
| 326 | 407 |
| 327 #ifdef ARCH_ALPHA | 408 #ifdef ARCH_ALPHA |
| 328 /* If all rows but the first one are zero after row transformation, | 409 /* If all rows but the first one are zero after row transformation, |
| 329 all rows will be identical after column transformation. */ | 410 all rows will be identical after column transformation. */ |
| 387 for (i = 0; i < 8; i++) | 468 for (i = 0; i < 8; i++) |
| 388 idctSparseCol(block + i); | 469 idctSparseCol(block + i); |
| 389 } | 470 } |
| 390 } | 471 } |
| 391 | 472 |
| 392 #else | 473 /* XXX: suppress this mess */ |
| 393 | 474 void simple_idct_put(UINT8 *dest, int line_size, DCTELEM *block) |
| 394 void simple_idct (short *block) | 475 { |
| 476 simple_idct(block); | |
| 477 put_pixels_clamped(block, dest, line_size); | |
| 478 } | |
| 479 | |
| 480 void simple_idct_add(UINT8 *dest, int line_size, DCTELEM *block) | |
| 481 { | |
| 482 simple_idct(block); | |
| 483 add_pixels_clamped(block, dest, line_size); | |
| 484 } | |
| 485 | |
| 486 #else | |
| 487 | |
| 488 void simple_idct_put(UINT8 *dest, int line_size, INT16 *block) | |
| 395 { | 489 { |
| 396 int i; | 490 int i; |
| 397 for(i=0; i<8; i++) | 491 for(i=0; i<8; i++) |
| 398 idctRowCondDC(block + i*8); | 492 idctRowCondDC(block + i*8); |
| 399 | 493 |
| 400 for(i=0; i<8; i++) | 494 for(i=0; i<8; i++) |
| 401 idctSparseCol(block + i); | 495 idctSparseColPut(dest + i, line_size, block + i); |
| 496 } | |
| 497 | |
| 498 void simple_idct_add(UINT8 *dest, int line_size, INT16 *block) | |
| 499 { | |
| 500 int i; | |
| 501 for(i=0; i<8; i++) | |
| 502 idctRowCondDC(block + i*8); | |
| 503 | |
| 504 for(i=0; i<8; i++) | |
| 505 idctSparseColAdd(dest + i, line_size, block + i); | |
| 402 } | 506 } |
| 403 | 507 |
| 404 #endif | 508 #endif |
| 405 | 509 |
| 406 #undef COL_SHIFT | 510 #undef COL_SHIFT |
