Mercurial > libavcodec.hg
comparison libpostproc/postprocess_template.c @ 112:a2c063b6ecf9 libavcodec
fixed a bug in the tmp buffer
fixed the color range for yuv
fixed the width %8!=0 bug (another 1% speed loss)
| author | michael |
|---|---|
| date | Fri, 19 Oct 2001 13:41:38 +0000 |
| parents | 8e4c5a16c9fc |
| children | 3e0dcdb6b340 |
comparison
equal
deleted
inserted
replaced
| 111:8e4c5a16c9fc | 112:a2c063b6ecf9 |
|---|---|
| 120 static uint64_t temp2=0; | 120 static uint64_t temp2=0; |
| 121 static uint64_t temp3=0; | 121 static uint64_t temp3=0; |
| 122 static uint64_t temp4=0; | 122 static uint64_t temp4=0; |
| 123 static uint64_t temp5=0; | 123 static uint64_t temp5=0; |
| 124 static uint64_t pQPb=0; | 124 static uint64_t pQPb=0; |
| 125 static uint8_t tempBlock[16*16]; | 125 static uint8_t tempBlock[16*16]; //used so the horizontal code gets aligned data |
| 126 | 126 |
| 127 int hFlatnessThreshold= 56 - 16; | 127 int hFlatnessThreshold= 56 - 16; |
| 128 int vFlatnessThreshold= 56 - 16; | 128 int vFlatnessThreshold= 56 - 16; |
| 129 | 129 |
| 130 //amount of "black" u r willing to loose to get a brightness corrected picture | 130 //amount of "black" u r willing to loose to get a brightness corrected picture |
| 131 double maxClippedThreshold= 0.01; | 131 double maxClippedThreshold= 0.01; |
| 132 | 132 |
| 133 int maxAllowedY=255; | 133 int maxAllowedY=255; |
| 134 //FIXME can never make a movieŽs black brighter (anyone needs that?) | 134 //FIXME can never make a movieŽs black brighter (anyone needs that?) |
| 135 int minAllowedY=0; | 135 int minAllowedY=16; |
| 136 | 136 |
| 137 #ifdef TIMING | 137 #ifdef TIMING |
| 138 static inline long long rdtsc() | 138 static inline long long rdtsc() |
| 139 { | 139 { |
| 140 long long l; | 140 long long l; |
| 2396 | 2396 |
| 2397 /* Temporary buffers for handling the last row(s) */ | 2397 /* Temporary buffers for handling the last row(s) */ |
| 2398 static uint8_t *tempDst= NULL; | 2398 static uint8_t *tempDst= NULL; |
| 2399 static uint8_t *tempSrc= NULL; | 2399 static uint8_t *tempSrc= NULL; |
| 2400 | 2400 |
| 2401 /* Temporary buffers for handling the last block */ | |
| 2402 static uint8_t *tempDstBlock= NULL; | |
| 2403 static uint8_t *tempSrcBlock= NULL; | |
| 2404 | |
| 2405 uint8_t *dstBlockPtrBackup; | |
| 2406 uint8_t *srcBlockPtrBackup; | |
| 2407 | |
| 2401 #ifdef TIMING | 2408 #ifdef TIMING |
| 2402 long long T0, T1, memcpyTime=0, vertTime=0, horizTime=0, sumTime, diffTime=0; | 2409 long long T0, T1, memcpyTime=0, vertTime=0, horizTime=0, sumTime, diffTime=0; |
| 2403 sumTime= rdtsc(); | 2410 sumTime= rdtsc(); |
| 2404 #endif | 2411 #endif |
| 2405 | 2412 |
| 2406 if(tempDst==NULL) | 2413 if(tempDst==NULL) |
| 2407 { | 2414 { |
| 2408 tempDst= (uint8_t*)memalign(8, 1024*24); | 2415 tempDst= (uint8_t*)memalign(8, 1024*24); |
| 2409 tempSrc= (uint8_t*)memalign(8, 1024*24); | 2416 tempSrc= (uint8_t*)memalign(8, 1024*24); |
| 2417 tempDstBlock= (uint8_t*)memalign(8, 1024*24); | |
| 2418 tempSrcBlock= (uint8_t*)memalign(8, 1024*24); | |
| 2410 } | 2419 } |
| 2411 | 2420 |
| 2412 if(!yHistogram) | 2421 if(!yHistogram) |
| 2413 { | 2422 { |
| 2414 int i; | 2423 int i; |
| 2415 yHistogram= (uint64_t*)malloc(8*256); | 2424 yHistogram= (uint64_t*)malloc(8*256); |
| 2416 for(i=0; i<256; i++) yHistogram[i]= width*height/64*15/256; | 2425 for(i=0; i<256; i++) yHistogram[i]= width*height/64*15/256; |
| 2426 | |
| 2427 if(mode & FULL_Y_RANGE) | |
| 2428 { | |
| 2429 maxAllowedY=255; | |
| 2430 minAllowedY=0; | |
| 2431 } | |
| 2417 } | 2432 } |
| 2418 | 2433 |
| 2419 if(!isColor) | 2434 if(!isColor) |
| 2420 { | 2435 { |
| 2421 uint64_t sum= 0; | 2436 uint64_t sum= 0; |
| 2503 memcpy(tempDst, dstBlock, dstStride*MIN(height-y, 5) ); | 2518 memcpy(tempDst, dstBlock, dstStride*MIN(height-y, 5) ); |
| 2504 dstBlock= tempDst; | 2519 dstBlock= tempDst; |
| 2505 srcBlock= tempSrc; | 2520 srcBlock= tempSrc; |
| 2506 } | 2521 } |
| 2507 | 2522 |
| 2523 // From this point on it is guranteed that we can read and write 16 lines downward | |
| 2508 // finish 1 block before the next otherwise weŽll might have a problem | 2524 // finish 1 block before the next otherwise weŽll might have a problem |
| 2509 // with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing | 2525 // with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing |
| 2510 for(x=0; x<width; x+=BLOCK_SIZE) | 2526 for(x=0; x<width; x+=BLOCK_SIZE) |
| 2511 { | 2527 { |
| 2512 const int stride= dstStride; | 2528 const int stride= dstStride; |
| 2542 prefetchw(dstBlock + (((x>>3)&3) + 9)*dstStride + 32); | 2558 prefetchw(dstBlock + (((x>>3)&3) + 9)*dstStride + 32); |
| 2543 */ | 2559 */ |
| 2544 #endif | 2560 #endif |
| 2545 | 2561 |
| 2546 if(!isColor) yHistogram[ srcBlock[srcStride*5] ]++; | 2562 if(!isColor) yHistogram[ srcBlock[srcStride*5] ]++; |
| 2563 | |
| 2564 //can we mess with a 8x16 block, if not use a temp buffer, yes again | |
| 2565 if(x+7 >= width) | |
| 2566 { | |
| 2567 int i; | |
| 2568 dstBlockPtrBackup= dstBlock; | |
| 2569 srcBlockPtrBackup= srcBlock; | |
| 2570 | |
| 2571 for(i=0;i<BLOCK_SIZE*2; i++) | |
| 2572 { | |
| 2573 memcpy(tempSrcBlock+i*srcStride, srcBlock+i*srcStride, width-x); | |
| 2574 memcpy(tempDstBlock+i*dstStride, dstBlock+i*dstStride, width-x); | |
| 2575 } | |
| 2576 | |
| 2577 dstBlock= tempDstBlock; | |
| 2578 srcBlock= tempSrcBlock; | |
| 2579 } | |
| 2547 | 2580 |
| 2548 blockCopy(dstBlock + dstStride*5, dstStride, | 2581 blockCopy(dstBlock + dstStride*5, dstStride, |
| 2549 srcBlock + srcStride*5, srcStride, 8, mode & LEVEL_FIX); | 2582 srcBlock + srcStride*5, srcStride, 8, mode & LEVEL_FIX); |
| 2550 | 2583 |
| 2551 if(mode & LINEAR_IPOL_DEINT_FILTER) | 2584 if(mode & LINEAR_IPOL_DEINT_FILTER) |
| 2591 T0=T1; | 2624 T0=T1; |
| 2592 #endif | 2625 #endif |
| 2593 } | 2626 } |
| 2594 | 2627 |
| 2595 /* check if we have a previous block to deblock it with dstBlock */ | 2628 /* check if we have a previous block to deblock it with dstBlock */ |
| 2596 if(x - 8 >= 0 && x<width) | 2629 if(x - 8 >= 0) |
| 2597 { | 2630 { |
| 2598 #ifdef MORE_TIMING | 2631 #ifdef MORE_TIMING |
| 2599 T0= rdtsc(); | 2632 T0= rdtsc(); |
| 2600 #endif | 2633 #endif |
| 2601 if(mode & H_DEBLOCK) | 2634 if(mode & H_DEBLOCK) |
| 2622 } | 2655 } |
| 2623 else if(y!=0) | 2656 else if(y!=0) |
| 2624 dering(dstBlock - stride*9 + width-9, stride, QP); | 2657 dering(dstBlock - stride*9 + width-9, stride, QP); |
| 2625 //FIXME dering filter will not be applied to last block (bottom right) | 2658 //FIXME dering filter will not be applied to last block (bottom right) |
| 2626 | 2659 |
| 2660 /* did we use a tmp-block buffer */ | |
| 2661 if(x+7 >= width) | |
| 2662 { | |
| 2663 int i; | |
| 2664 dstBlock= dstBlockPtrBackup; | |
| 2665 srcBlock= srcBlockPtrBackup; | |
| 2666 | |
| 2667 for(i=0;i<BLOCK_SIZE*2; i++) | |
| 2668 { | |
| 2669 memcpy(dstBlock+i*dstStride, tempDstBlock+i*dstStride, width-x); | |
| 2670 } | |
| 2671 } | |
| 2672 | |
| 2627 dstBlock+=8; | 2673 dstBlock+=8; |
| 2628 srcBlock+=8; | 2674 srcBlock+=8; |
| 2629 } | 2675 } |
| 2630 | 2676 |
| 2631 /* did we use a tmp buffer */ | 2677 /* did we use a tmp buffer */ |
| 2632 if(y+15 > height) | 2678 if(y+15 >= height) |
| 2633 { | 2679 { |
| 2634 uint8_t *dstBlock= &(dst[y*dstStride]); | 2680 uint8_t *dstBlock= &(dst[y*dstStride]); |
| 2635 memcpy(dstBlock, tempDst, dstStride*(height-y) ); | 2681 memcpy(dstBlock, tempDst, dstStride*(height-y) ); |
| 2636 } | 2682 } |
| 2637 } | 2683 } |
