Mercurial > libavcodec.hg
comparison libpostproc/postprocess_template.c @ 118:3dd1950ac98d libavcodec
brightness / contrast fix/copy optimizations +2% speedup
| author | michael |
|---|---|
| date | Tue, 23 Oct 2001 01:46:50 +0000 |
| parents | a02f3088b0cf |
| children | b2f0e40866b1 |
comparison
equal
deleted
inserted
replaced
| 117:a02f3088b0cf | 118:3dd1950ac98d |
|---|---|
| 2476 if(use_old_pp) return odivx_modes[quality]; | 2476 if(use_old_pp) return odivx_modes[quality]; |
| 2477 #endif | 2477 #endif |
| 2478 return modes[quality]; | 2478 return modes[quality]; |
| 2479 } | 2479 } |
| 2480 | 2480 |
| 2481 //} // extern "C" | |
| 2482 | |
| 2483 /** | 2481 /** |
| 2484 * Copies a block from src to dst and fixes the blacklevel | 2482 * Copies a block from src to dst and fixes the blacklevel |
| 2485 * numLines must be a multiple of 4 | 2483 * numLines must be a multiple of 4 |
| 2486 * levelFix == 0 -> dont touch the brighness & contrast | 2484 * levelFix == 0 -> dont touch the brighness & contrast |
| 2487 */ | 2485 */ |
| 2491 int i; | 2489 int i; |
| 2492 if(levelFix) | 2490 if(levelFix) |
| 2493 { | 2491 { |
| 2494 #ifdef HAVE_MMX | 2492 #ifdef HAVE_MMX |
| 2495 asm volatile( | 2493 asm volatile( |
| 2496 "movl %4, %%eax \n\t" | |
| 2497 "movl %%eax, temp0\n\t" | |
| 2498 "pushl %0 \n\t" | 2494 "pushl %0 \n\t" |
| 2499 "pushl %1 \n\t" | 2495 "pushl %1 \n\t" |
| 2500 "leal (%2,%2), %%eax \n\t" | 2496 "leal (%2,%2), %%eax \n\t" |
| 2501 "leal (%3,%3), %%ebx \n\t" | 2497 "leal (%3,%3), %%ebx \n\t" |
| 2502 "movq packedYOffset, %%mm2 \n\t" | 2498 "movq packedYOffset, %%mm2 \n\t" |
| 2503 "movq packedYScale, %%mm3 \n\t" | 2499 "movq packedYScale, %%mm3 \n\t" |
| 2504 "pxor %%mm4, %%mm4 \n\t" | 2500 "pxor %%mm4, %%mm4 \n\t" |
| 2505 | 2501 |
| 2506 #define SCALED_CPY \ | 2502 #define SCALED_CPY \ |
| 2507 "movq (%0), %%mm0 \n\t"\ | 2503 "movq (%0), %%mm0 \n\t"\ |
| 2508 "movq (%0,%2), %%mm1 \n\t"\ | 2504 "movq (%0), %%mm5 \n\t"\ |
| 2509 "movq %%mm0, %%mm5 \n\t"\ | |
| 2510 "punpcklbw %%mm4, %%mm0 \n\t"\ | 2505 "punpcklbw %%mm4, %%mm0 \n\t"\ |
| 2511 "punpckhbw %%mm4, %%mm5 \n\t"\ | 2506 "punpckhbw %%mm4, %%mm5 \n\t"\ |
| 2512 "psubw %%mm2, %%mm0 \n\t"\ | 2507 "psubw %%mm2, %%mm0 \n\t"\ |
| 2513 "psubw %%mm2, %%mm5 \n\t"\ | 2508 "psubw %%mm2, %%mm5 \n\t"\ |
| 2509 "movq (%0,%2), %%mm1 \n\t"\ | |
| 2514 "psllw $6, %%mm0 \n\t"\ | 2510 "psllw $6, %%mm0 \n\t"\ |
| 2515 "psllw $6, %%mm5 \n\t"\ | 2511 "psllw $6, %%mm5 \n\t"\ |
| 2516 "pmulhw %%mm3, %%mm0 \n\t"\ | 2512 "pmulhw %%mm3, %%mm0 \n\t"\ |
| 2513 "movq (%0,%2), %%mm6 \n\t"\ | |
| 2517 "pmulhw %%mm3, %%mm5 \n\t"\ | 2514 "pmulhw %%mm3, %%mm5 \n\t"\ |
| 2515 "punpcklbw %%mm4, %%mm1 \n\t"\ | |
| 2516 "punpckhbw %%mm4, %%mm6 \n\t"\ | |
| 2517 "psubw %%mm2, %%mm1 \n\t"\ | |
| 2518 "psubw %%mm2, %%mm6 \n\t"\ | |
| 2519 "psllw $6, %%mm1 \n\t"\ | |
| 2520 "psllw $6, %%mm6 \n\t"\ | |
| 2521 "pmulhw %%mm3, %%mm1 \n\t"\ | |
| 2522 "pmulhw %%mm3, %%mm6 \n\t"\ | |
| 2523 "addl %%eax, %0 \n\t"\ | |
| 2518 "packuswb %%mm5, %%mm0 \n\t"\ | 2524 "packuswb %%mm5, %%mm0 \n\t"\ |
| 2525 "packuswb %%mm6, %%mm1 \n\t"\ | |
| 2519 "movq %%mm0, (%1) \n\t"\ | 2526 "movq %%mm0, (%1) \n\t"\ |
| 2520 "movq %%mm1, %%mm5 \n\t"\ | |
| 2521 "punpcklbw %%mm4, %%mm1 \n\t"\ | |
| 2522 "punpckhbw %%mm4, %%mm5 \n\t"\ | |
| 2523 "psubw %%mm2, %%mm1 \n\t"\ | |
| 2524 "psubw %%mm2, %%mm5 \n\t"\ | |
| 2525 "psllw $6, %%mm1 \n\t"\ | |
| 2526 "psllw $6, %%mm5 \n\t"\ | |
| 2527 "pmulhw %%mm3, %%mm1 \n\t"\ | |
| 2528 "pmulhw %%mm3, %%mm5 \n\t"\ | |
| 2529 "packuswb %%mm5, %%mm1 \n\t"\ | |
| 2530 "movq %%mm1, (%1, %3) \n\t"\ | 2527 "movq %%mm1, (%1, %3) \n\t"\ |
| 2531 | 2528 |
| 2532 "1: \n\t" | |
| 2533 SCALED_CPY | 2529 SCALED_CPY |
| 2534 "addl %%eax, %0 \n\t" | |
| 2535 "addl %%ebx, %1 \n\t" | 2530 "addl %%ebx, %1 \n\t" |
| 2536 SCALED_CPY | 2531 SCALED_CPY |
| 2537 "addl %%eax, %0 \n\t" | |
| 2538 "addl %%ebx, %1 \n\t" | 2532 "addl %%ebx, %1 \n\t" |
| 2539 "decl temp0 \n\t" | 2533 SCALED_CPY |
| 2540 "jnz 1b \n\t" | 2534 "addl %%ebx, %1 \n\t" |
| 2535 SCALED_CPY | |
| 2541 | 2536 |
| 2542 "popl %1 \n\t" | 2537 "popl %1 \n\t" |
| 2543 "popl %0 \n\t" | 2538 "popl %0 \n\t" |
| 2544 : : "r" (src), | 2539 : : "r" (src), |
| 2545 "r" (dst), | 2540 "r" (dst), |
| 2546 "r" (srcStride), | 2541 "r" (srcStride), |
| 2547 "r" (dstStride), | 2542 "r" (dstStride) |
| 2548 "m" (numLines>>2) | |
| 2549 : "%eax", "%ebx" | 2543 : "%eax", "%ebx" |
| 2550 ); | 2544 ); |
| 2551 #else | 2545 #else |
| 2552 for(i=0; i<numLines; i++) | 2546 for(i=0; i<numLines; i++) |
| 2553 memcpy( &(dst[dstStride*i]), | 2547 memcpy( &(dst[dstStride*i]), |
