comparison libpostproc/postprocess.c @ 2036:6a6c678517b3 libavcodec

altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
author michael
date Wed, 26 May 2004 20:15:15 +0000
parents 4225c131a2eb
children 98d8283534bb
comparison
equal deleted inserted replaced
2035:e1b69326ae36 2036:6a6c678517b3
1 /* 1 /*
2 Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at) 2 Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3
4 AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
3 5
4 This program is free software; you can redistribute it and/or modify 6 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by 7 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or 8 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version. 9 (at your option) any later version.
20 * @file postprocess.c 22 * @file postprocess.c
21 * postprocessing. 23 * postprocessing.
22 */ 24 */
23 25
24 /* 26 /*
25 C MMX MMX2 3DNow 27 C MMX MMX2 3DNow AltiVec
26 isVertDC Ec Ec 28 isVertDC Ec Ec Ec
27 isVertMinMaxOk Ec Ec 29 isVertMinMaxOk Ec Ec Ec
28 doVertLowPass E e e 30 doVertLowPass E e e Ec
29 doVertDefFilter Ec Ec e e 31 doVertDefFilter Ec Ec e e Ec
30 isHorizDC Ec Ec 32 isHorizDC Ec Ec
31 isHorizMinMaxOk a E 33 isHorizMinMaxOk a E
32 doHorizLowPass E e e 34 doHorizLowPass E e e
33 doHorizDefFilter Ec Ec e e 35 doHorizDefFilter Ec Ec e e
34 deRing E e e* 36 deRing E e e* Ecp
35 Vertical RKAlgo1 E a a 37 Vertical RKAlgo1 E a a
36 Horizontal RKAlgo1 a a 38 Horizontal RKAlgo1 a a
37 Vertical X1# a E E 39 Vertical X1# a E E
38 Horizontal X1# a E E 40 Horizontal X1# a E E
39 LinIpolDeinterlace e E E* 41 LinIpolDeinterlace e E E*
46 # more or less selfinvented filters so the exactness isnt too meaningfull 48 # more or less selfinvented filters so the exactness isnt too meaningfull
47 E = Exact implementation 49 E = Exact implementation
48 e = allmost exact implementation (slightly different rounding,...) 50 e = allmost exact implementation (slightly different rounding,...)
49 a = alternative / approximate impl 51 a = alternative / approximate impl
50 c = checked against the other implementations (-vo md5) 52 c = checked against the other implementations (-vo md5)
53 p = partially optimized, still some work to do
51 */ 54 */
52 55
53 /* 56 /*
54 TODO: 57 TODO:
55 reduce the time wasted on the mem transfer 58 reduce the time wasted on the mem transfer
192 // The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing 195 // The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
193 196
194 /** 197 /**
195 * Check if the given 8x8 Block is mostly "flat" 198 * Check if the given 8x8 Block is mostly "flat"
196 */ 199 */
197 static inline int isHorizDC(uint8_t src[], int stride, PPContext *c) 200 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
198 { 201 {
199 int numEq= 0; 202 int numEq= 0;
200 int y; 203 int y;
201 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; 204 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
202 const int dcThreshold= dcOffset*2 + 1; 205 const int dcThreshold= dcOffset*2 + 1;
238 src+= stride; 241 src+= stride;
239 } 242 }
240 return numEq > c->ppMode.flatnessThreshold; 243 return numEq > c->ppMode.flatnessThreshold;
241 } 244 }
242 245
243 static inline int isHorizMinMaxOk(uint8_t src[], int stride, int QP) 246 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
244 { 247 {
245 int i; 248 int i;
246 #if 1 249 #if 1
247 for(i=0; i<2; i++){ 250 for(i=0; i<2; i++){
248 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0; 251 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
302 } 305 }
303 return 1; 306 return 1;
304 #endif 307 #endif
305 } 308 }
306 309
310 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
311 if( isHorizDC_C(src, stride, c) ){
312 if( isHorizMinMaxOk_C(src, stride, c->QP) )
313 return 1;
314 else
315 return 0;
316 }else{
317 return 2;
318 }
319 }
320
307 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){ 321 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
308 if( isVertDC_C(src, stride, c) ){ 322 if( isVertDC_C(src, stride, c) ){
309 if( isVertMinMaxOk_C(src, stride, c->QP) ) 323 if( isVertMinMaxOk_C(src, stride, c->QP) )
310 return 1; 324 return 1;
311 else 325 else
313 }else{ 327 }else{
314 return 2; 328 return 2;
315 } 329 }
316 } 330 }
317 331
318 static inline void doHorizDefFilter(uint8_t dst[], int stride, int QP) 332 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
319 { 333 {
320 int y; 334 int y;
321 for(y=0; y<BLOCK_SIZE; y++) 335 for(y=0; y<BLOCK_SIZE; y++)
322 { 336 {
323 const int middleEnergy= 5*(dst[4] - dst[5]) + 2*(dst[2] - dst[5]); 337 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
324 338
325 if(ABS(middleEnergy) < 8*QP) 339 if(ABS(middleEnergy) < 8*c->QP)
326 { 340 {
327 const int q=(dst[3] - dst[4])/2; 341 const int q=(dst[3] - dst[4])/2;
328 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]); 342 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
329 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]); 343 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
330 344
354 368
355 /** 369 /**
356 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block) 370 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
357 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version) 371 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
358 */ 372 */
359 static inline void doHorizLowPass(uint8_t dst[], int stride, int QP) 373 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
360 { 374 {
361 375
362 int y; 376 int y;
363 for(y=0; y<BLOCK_SIZE; y++) 377 for(y=0; y<BLOCK_SIZE; y++)
364 { 378 {
365 const int first= ABS(dst[-1] - dst[0]) < QP ? dst[-1] : dst[0]; 379 const int first= ABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
366 const int last= ABS(dst[8] - dst[7]) < QP ? dst[8] : dst[7]; 380 const int last= ABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
367 381
368 int sums[9]; 382 int sums[9];
369 sums[0] = first + dst[0]; 383 sums[0] = first + dst[0];
370 sums[1] = dst[0] + dst[1]; 384 sums[1] = dst[0] + dst[1];
371 sums[2] = dst[1] + dst[2]; 385 sums[2] = dst[1] + dst[2];
460 //Plain C versions 474 //Plain C versions
461 #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT) 475 #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
462 #define COMPILE_C 476 #define COMPILE_C
463 #endif 477 #endif
464 478
479 #ifdef ARCH_POWERPC
480 #ifdef HAVE_ALTIVEC
481 #define COMPILE_ALTIVEC
482 #ifndef CONFIG_DARWIN
483 #warning "################################################################################"
484 #warning "WARNING: No gcc available as of today (2004-05-25) seems to be able to compile properly some of the code under non-Darwin PPC OSes. Some functions result in wrong results, while others simply won't compile (gcc explodes after allocating 1GiB+)."
485 #warning "################################################################################"
486 #endif //CONFIG_DARWIN
487 #endif //HAVE_ALTIVEC
488 #endif //ARCH_POWERPC
489
465 #ifdef ARCH_X86 490 #ifdef ARCH_X86
466 491
467 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT) 492 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
468 #define COMPILE_MMX 493 #define COMPILE_MMX
469 #endif 494 #endif
478 #endif //ARCH_X86 503 #endif //ARCH_X86
479 504
480 #undef HAVE_MMX 505 #undef HAVE_MMX
481 #undef HAVE_MMX2 506 #undef HAVE_MMX2
482 #undef HAVE_3DNOW 507 #undef HAVE_3DNOW
508 #undef HAVE_ALTIVEC
483 #undef ARCH_X86 509 #undef ARCH_X86
484 510
485 #ifdef COMPILE_C 511 #ifdef COMPILE_C
486 #undef HAVE_MMX 512 #undef HAVE_MMX
487 #undef HAVE_MMX2 513 #undef HAVE_MMX2
488 #undef HAVE_3DNOW 514 #undef HAVE_3DNOW
489 #undef ARCH_X86 515 #undef ARCH_X86
490 #define RENAME(a) a ## _C 516 #define RENAME(a) a ## _C
491 #include "postprocess_template.c" 517 #include "postprocess_template.c"
492 #endif 518 #endif
519
520 #ifdef ARCH_POWERPC
521 #ifdef COMPILE_ALTIVEC
522 #undef RENAME
523 #define HAVE_ALTIVEC
524 #define RENAME(a) a ## _altivec
525 #include "postprocess_altivec_template.c"
526 #include "postprocess_template.c"
527 #endif
528 #endif //ARCH_POWERPC
493 529
494 //MMX versions 530 //MMX versions
495 #ifdef COMPILE_MMX 531 #ifdef COMPILE_MMX
496 #undef RENAME 532 #undef RENAME
497 #define HAVE_MMX 533 #define HAVE_MMX
546 else if(c->cpuCaps & PP_CPU_CAPS_MMX) 582 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
547 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 583 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
548 else 584 else
549 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 585 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
550 #else 586 #else
587 #ifdef ARCH_POWERPC
588 #ifdef HAVE_ALTIVEC
589 else if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
590 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
591 else
592 #endif
593 #endif
551 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 594 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
552 #endif 595 #endif
553 #else //RUNTIME_CPUDETECT 596 #else //RUNTIME_CPUDETECT
554 #ifdef HAVE_MMX2 597 #ifdef HAVE_MMX2
555 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 598 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
556 #elif defined (HAVE_3DNOW) 599 #elif defined (HAVE_3DNOW)
557 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 600 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
558 #elif defined (HAVE_MMX) 601 #elif defined (HAVE_MMX)
559 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 602 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
603 #elif defined (HAVE_ALTIVEC)
604 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
560 #else 605 #else
561 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 606 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
562 #endif 607 #endif
563 #endif //!RUNTIME_CPUDETECT 608 #endif //!RUNTIME_CPUDETECT
564 } 609 }