Mercurial > libavcodec.hg
comparison libpostproc/postprocess.c @ 2036:6a6c678517b3 libavcodec
altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
| author | michael |
|---|---|
| date | Wed, 26 May 2004 20:15:15 +0000 |
| parents | 4225c131a2eb |
| children | 98d8283534bb |
comparison
equal
deleted
inserted
replaced
| 2035:e1b69326ae36 | 2036:6a6c678517b3 |
|---|---|
| 1 /* | 1 /* |
| 2 Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at) | 2 Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at) |
| 3 | |
| 4 AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org> | |
| 3 | 5 |
| 4 This program is free software; you can redistribute it and/or modify | 6 This program is free software; you can redistribute it and/or modify |
| 5 it under the terms of the GNU General Public License as published by | 7 it under the terms of the GNU General Public License as published by |
| 6 the Free Software Foundation; either version 2 of the License, or | 8 the Free Software Foundation; either version 2 of the License, or |
| 7 (at your option) any later version. | 9 (at your option) any later version. |
| 20 * @file postprocess.c | 22 * @file postprocess.c |
| 21 * postprocessing. | 23 * postprocessing. |
| 22 */ | 24 */ |
| 23 | 25 |
| 24 /* | 26 /* |
| 25 C MMX MMX2 3DNow | 27 C MMX MMX2 3DNow AltiVec |
| 26 isVertDC Ec Ec | 28 isVertDC Ec Ec Ec |
| 27 isVertMinMaxOk Ec Ec | 29 isVertMinMaxOk Ec Ec Ec |
| 28 doVertLowPass E e e | 30 doVertLowPass E e e Ec |
| 29 doVertDefFilter Ec Ec e e | 31 doVertDefFilter Ec Ec e e Ec |
| 30 isHorizDC Ec Ec | 32 isHorizDC Ec Ec |
| 31 isHorizMinMaxOk a E | 33 isHorizMinMaxOk a E |
| 32 doHorizLowPass E e e | 34 doHorizLowPass E e e |
| 33 doHorizDefFilter Ec Ec e e | 35 doHorizDefFilter Ec Ec e e |
| 34 deRing E e e* | 36 deRing E e e* Ecp |
| 35 Vertical RKAlgo1 E a a | 37 Vertical RKAlgo1 E a a |
| 36 Horizontal RKAlgo1 a a | 38 Horizontal RKAlgo1 a a |
| 37 Vertical X1# a E E | 39 Vertical X1# a E E |
| 38 Horizontal X1# a E E | 40 Horizontal X1# a E E |
| 39 LinIpolDeinterlace e E E* | 41 LinIpolDeinterlace e E E* |
| 46 # more or less selfinvented filters so the exactness isnt too meaningfull | 48 # more or less selfinvented filters so the exactness isnt too meaningfull |
| 47 E = Exact implementation | 49 E = Exact implementation |
| 48 e = allmost exact implementation (slightly different rounding,...) | 50 e = allmost exact implementation (slightly different rounding,...) |
| 49 a = alternative / approximate impl | 51 a = alternative / approximate impl |
| 50 c = checked against the other implementations (-vo md5) | 52 c = checked against the other implementations (-vo md5) |
| 53 p = partially optimized, still some work to do | |
| 51 */ | 54 */ |
| 52 | 55 |
| 53 /* | 56 /* |
| 54 TODO: | 57 TODO: |
| 55 reduce the time wasted on the mem transfer | 58 reduce the time wasted on the mem transfer |
| 192 // The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing | 195 // The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing |
| 193 | 196 |
| 194 /** | 197 /** |
| 195 * Check if the given 8x8 Block is mostly "flat" | 198 * Check if the given 8x8 Block is mostly "flat" |
| 196 */ | 199 */ |
| 197 static inline int isHorizDC(uint8_t src[], int stride, PPContext *c) | 200 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c) |
| 198 { | 201 { |
| 199 int numEq= 0; | 202 int numEq= 0; |
| 200 int y; | 203 int y; |
| 201 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; | 204 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; |
| 202 const int dcThreshold= dcOffset*2 + 1; | 205 const int dcThreshold= dcOffset*2 + 1; |
| 238 src+= stride; | 241 src+= stride; |
| 239 } | 242 } |
| 240 return numEq > c->ppMode.flatnessThreshold; | 243 return numEq > c->ppMode.flatnessThreshold; |
| 241 } | 244 } |
| 242 | 245 |
| 243 static inline int isHorizMinMaxOk(uint8_t src[], int stride, int QP) | 246 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP) |
| 244 { | 247 { |
| 245 int i; | 248 int i; |
| 246 #if 1 | 249 #if 1 |
| 247 for(i=0; i<2; i++){ | 250 for(i=0; i<2; i++){ |
| 248 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0; | 251 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0; |
| 302 } | 305 } |
| 303 return 1; | 306 return 1; |
| 304 #endif | 307 #endif |
| 305 } | 308 } |
| 306 | 309 |
| 310 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){ | |
| 311 if( isHorizDC_C(src, stride, c) ){ | |
| 312 if( isHorizMinMaxOk_C(src, stride, c->QP) ) | |
| 313 return 1; | |
| 314 else | |
| 315 return 0; | |
| 316 }else{ | |
| 317 return 2; | |
| 318 } | |
| 319 } | |
| 320 | |
| 307 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){ | 321 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){ |
| 308 if( isVertDC_C(src, stride, c) ){ | 322 if( isVertDC_C(src, stride, c) ){ |
| 309 if( isVertMinMaxOk_C(src, stride, c->QP) ) | 323 if( isVertMinMaxOk_C(src, stride, c->QP) ) |
| 310 return 1; | 324 return 1; |
| 311 else | 325 else |
| 313 }else{ | 327 }else{ |
| 314 return 2; | 328 return 2; |
| 315 } | 329 } |
| 316 } | 330 } |
| 317 | 331 |
| 318 static inline void doHorizDefFilter(uint8_t dst[], int stride, int QP) | 332 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c) |
| 319 { | 333 { |
| 320 int y; | 334 int y; |
| 321 for(y=0; y<BLOCK_SIZE; y++) | 335 for(y=0; y<BLOCK_SIZE; y++) |
| 322 { | 336 { |
| 323 const int middleEnergy= 5*(dst[4] - dst[5]) + 2*(dst[2] - dst[5]); | 337 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]); |
| 324 | 338 |
| 325 if(ABS(middleEnergy) < 8*QP) | 339 if(ABS(middleEnergy) < 8*c->QP) |
| 326 { | 340 { |
| 327 const int q=(dst[3] - dst[4])/2; | 341 const int q=(dst[3] - dst[4])/2; |
| 328 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]); | 342 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]); |
| 329 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]); | 343 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]); |
| 330 | 344 |
| 354 | 368 |
| 355 /** | 369 /** |
| 356 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block) | 370 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block) |
| 357 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version) | 371 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version) |
| 358 */ | 372 */ |
| 359 static inline void doHorizLowPass(uint8_t dst[], int stride, int QP) | 373 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c) |
| 360 { | 374 { |
| 361 | 375 |
| 362 int y; | 376 int y; |
| 363 for(y=0; y<BLOCK_SIZE; y++) | 377 for(y=0; y<BLOCK_SIZE; y++) |
| 364 { | 378 { |
| 365 const int first= ABS(dst[-1] - dst[0]) < QP ? dst[-1] : dst[0]; | 379 const int first= ABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0]; |
| 366 const int last= ABS(dst[8] - dst[7]) < QP ? dst[8] : dst[7]; | 380 const int last= ABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7]; |
| 367 | 381 |
| 368 int sums[9]; | 382 int sums[9]; |
| 369 sums[0] = first + dst[0]; | 383 sums[0] = first + dst[0]; |
| 370 sums[1] = dst[0] + dst[1]; | 384 sums[1] = dst[0] + dst[1]; |
| 371 sums[2] = dst[1] + dst[2]; | 385 sums[2] = dst[1] + dst[2]; |
| 460 //Plain C versions | 474 //Plain C versions |
| 461 #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT) | 475 #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT) |
| 462 #define COMPILE_C | 476 #define COMPILE_C |
| 463 #endif | 477 #endif |
| 464 | 478 |
| 479 #ifdef ARCH_POWERPC | |
| 480 #ifdef HAVE_ALTIVEC | |
| 481 #define COMPILE_ALTIVEC | |
| 482 #ifndef CONFIG_DARWIN | |
| 483 #warning "################################################################################" | |
| 484 #warning "WARNING: No gcc available as of today (2004-05-25) seems to be able to compile properly some of the code under non-Darwin PPC OSes. Some functions result in wrong results, while others simply won't compile (gcc explodes after allocating 1GiB+)." | |
| 485 #warning "################################################################################" | |
| 486 #endif //CONFIG_DARWIN | |
| 487 #endif //HAVE_ALTIVEC | |
| 488 #endif //ARCH_POWERPC | |
| 489 | |
| 465 #ifdef ARCH_X86 | 490 #ifdef ARCH_X86 |
| 466 | 491 |
| 467 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT) | 492 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT) |
| 468 #define COMPILE_MMX | 493 #define COMPILE_MMX |
| 469 #endif | 494 #endif |
| 478 #endif //ARCH_X86 | 503 #endif //ARCH_X86 |
| 479 | 504 |
| 480 #undef HAVE_MMX | 505 #undef HAVE_MMX |
| 481 #undef HAVE_MMX2 | 506 #undef HAVE_MMX2 |
| 482 #undef HAVE_3DNOW | 507 #undef HAVE_3DNOW |
| 508 #undef HAVE_ALTIVEC | |
| 483 #undef ARCH_X86 | 509 #undef ARCH_X86 |
| 484 | 510 |
| 485 #ifdef COMPILE_C | 511 #ifdef COMPILE_C |
| 486 #undef HAVE_MMX | 512 #undef HAVE_MMX |
| 487 #undef HAVE_MMX2 | 513 #undef HAVE_MMX2 |
| 488 #undef HAVE_3DNOW | 514 #undef HAVE_3DNOW |
| 489 #undef ARCH_X86 | 515 #undef ARCH_X86 |
| 490 #define RENAME(a) a ## _C | 516 #define RENAME(a) a ## _C |
| 491 #include "postprocess_template.c" | 517 #include "postprocess_template.c" |
| 492 #endif | 518 #endif |
| 519 | |
| 520 #ifdef ARCH_POWERPC | |
| 521 #ifdef COMPILE_ALTIVEC | |
| 522 #undef RENAME | |
| 523 #define HAVE_ALTIVEC | |
| 524 #define RENAME(a) a ## _altivec | |
| 525 #include "postprocess_altivec_template.c" | |
| 526 #include "postprocess_template.c" | |
| 527 #endif | |
| 528 #endif //ARCH_POWERPC | |
| 493 | 529 |
| 494 //MMX versions | 530 //MMX versions |
| 495 #ifdef COMPILE_MMX | 531 #ifdef COMPILE_MMX |
| 496 #undef RENAME | 532 #undef RENAME |
| 497 #define HAVE_MMX | 533 #define HAVE_MMX |
| 546 else if(c->cpuCaps & PP_CPU_CAPS_MMX) | 582 else if(c->cpuCaps & PP_CPU_CAPS_MMX) |
| 547 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | 583 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
| 548 else | 584 else |
| 549 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | 585 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
| 550 #else | 586 #else |
| 587 #ifdef ARCH_POWERPC | |
| 588 #ifdef HAVE_ALTIVEC | |
| 589 else if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC) | |
| 590 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | |
| 591 else | |
| 592 #endif | |
| 593 #endif | |
| 551 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | 594 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
| 552 #endif | 595 #endif |
| 553 #else //RUNTIME_CPUDETECT | 596 #else //RUNTIME_CPUDETECT |
| 554 #ifdef HAVE_MMX2 | 597 #ifdef HAVE_MMX2 |
| 555 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | 598 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
| 556 #elif defined (HAVE_3DNOW) | 599 #elif defined (HAVE_3DNOW) |
| 557 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | 600 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
| 558 #elif defined (HAVE_MMX) | 601 #elif defined (HAVE_MMX) |
| 559 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | 602 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
| 603 #elif defined (HAVE_ALTIVEC) | |
| 604 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | |
| 560 #else | 605 #else |
| 561 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | 606 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
| 562 #endif | 607 #endif |
| 563 #endif //!RUNTIME_CPUDETECT | 608 #endif //!RUNTIME_CPUDETECT |
| 564 } | 609 } |
