Mercurial > libpostproc.hg
comparison postprocess_template.c @ 129:c78bbb57f0b9 libpostproc
Remove disabled code cruft.
| author | diego |
|---|---|
| date | Wed, 19 Aug 2009 08:15:32 +0000 |
| parents | 1500ae6cf66c |
| children | 51571e34b760 |
comparison
equal
deleted
inserted
replaced
| 128:1afabe715e63 | 129:c78bbb57f0b9 |
|---|---|
| 346 src++; | 346 src++; |
| 347 } | 347 } |
| 348 #endif //HAVE_MMX2 || HAVE_AMD3DNOW | 348 #endif //HAVE_MMX2 || HAVE_AMD3DNOW |
| 349 } | 349 } |
| 350 #endif //HAVE_ALTIVEC | 350 #endif //HAVE_ALTIVEC |
| 351 | |
| 352 #if 0 | |
| 353 /** | |
| 354 * Experimental implementation of the filter (Algorithm 1) described in a paper from Ramkishor & Karandikar | |
| 355 * values are correctly clipped (MMX2) | |
| 356 * values are wraparound (C) | |
| 357 * Conclusion: It is fast, but introduces ugly horizontal patterns | |
| 358 * if there is a continuous gradient. | |
| 359 0 8 16 24 | |
| 360 x = 8 | |
| 361 x/2 = 4 | |
| 362 x/8 = 1 | |
| 363 1 12 12 23 | |
| 364 */ | |
| 365 static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP) | |
| 366 { | |
| 367 #if HAVE_MMX2 || HAVE_AMD3DNOW | |
| 368 src+= stride*3; | |
| 369 // FIXME rounding | |
| 370 __asm__ volatile( | |
| 371 "pxor %%mm7, %%mm7 \n\t" // 0 | |
| 372 "movq "MANGLE(b80)", %%mm6 \n\t" // MIN_SIGNED_BYTE | |
| 373 "leal (%0, %1), %%"REG_a" \n\t" | |
| 374 "leal (%%"REG_a", %1, 4), %%"REG_c" \n\t" | |
| 375 // 0 1 2 3 4 5 6 7 8 9 | |
| 376 // %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1 %0+8%1 ecx+4%1 | |
| 377 "movq "MANGLE(pQPb)", %%mm0 \n\t" // QP,..., QP | |
| 378 "movq %%mm0, %%mm1 \n\t" // QP,..., QP | |
| 379 "paddusb "MANGLE(b02)", %%mm0 \n\t" | |
| 380 "psrlw $2, %%mm0 \n\t" | |
| 381 "pand "MANGLE(b3F)", %%mm0 \n\t" // QP/4,..., QP/4 | |
| 382 "paddusb %%mm1, %%mm0 \n\t" // QP*1.25 ... | |
| 383 "movq (%0, %1, 4), %%mm2 \n\t" // line 4 | |
| 384 "movq (%%"REG_c"), %%mm3 \n\t" // line 5 | |
| 385 "movq %%mm2, %%mm4 \n\t" // line 4 | |
| 386 "pcmpeqb %%mm5, %%mm5 \n\t" // -1 | |
| 387 "pxor %%mm2, %%mm5 \n\t" // -line 4 - 1 | |
| 388 PAVGB(%%mm3, %%mm5) | |
| 389 "paddb %%mm6, %%mm5 \n\t" // (l5-l4)/2 | |
| 390 "psubusb %%mm3, %%mm4 \n\t" | |
| 391 "psubusb %%mm2, %%mm3 \n\t" | |
| 392 "por %%mm3, %%mm4 \n\t" // |l4 - l5| | |
| 393 "psubusb %%mm0, %%mm4 \n\t" | |
| 394 "pcmpeqb %%mm7, %%mm4 \n\t" | |
| 395 "pand %%mm4, %%mm5 \n\t" // d/2 | |
| 396 | |
| 397 // "paddb %%mm6, %%mm2 \n\t" // line 4 + 0x80 | |
| 398 "paddb %%mm5, %%mm2 \n\t" | |
| 399 // "psubb %%mm6, %%mm2 \n\t" | |
| 400 "movq %%mm2, (%0,%1, 4) \n\t" | |
| 401 | |
| 402 "movq (%%"REG_c"), %%mm2 \n\t" | |
| 403 // "paddb %%mm6, %%mm2 \n\t" // line 5 + 0x80 | |
| 404 "psubb %%mm5, %%mm2 \n\t" | |
| 405 // "psubb %%mm6, %%mm2 \n\t" | |
| 406 "movq %%mm2, (%%"REG_c") \n\t" | |
| 407 | |
| 408 "paddb %%mm6, %%mm5 \n\t" | |
| 409 "psrlw $2, %%mm5 \n\t" | |
| 410 "pand "MANGLE(b3F)", %%mm5 \n\t" | |
| 411 "psubb "MANGLE(b20)", %%mm5 \n\t" // (l5-l4)/8 | |
| 412 | |
| 413 "movq (%%"REG_a", %1, 2), %%mm2 \n\t" | |
| 414 "paddb %%mm6, %%mm2 \n\t" // line 3 + 0x80 | |
| 415 "paddsb %%mm5, %%mm2 \n\t" | |
| 416 "psubb %%mm6, %%mm2 \n\t" | |
| 417 "movq %%mm2, (%%"REG_a", %1, 2) \n\t" | |
| 418 | |
| 419 "movq (%%"REG_c", %1), %%mm2 \n\t" | |
| 420 "paddb %%mm6, %%mm2 \n\t" // line 6 + 0x80 | |
| 421 "psubsb %%mm5, %%mm2 \n\t" | |
| 422 "psubb %%mm6, %%mm2 \n\t" | |
| 423 "movq %%mm2, (%%"REG_c", %1) \n\t" | |
| 424 | |
| 425 : | |
| 426 : "r" (src), "r" ((x86_reg)stride) | |
| 427 : "%"REG_a, "%"REG_c | |
| 428 ); | |
| 429 #else //HAVE_MMX2 || HAVE_AMD3DNOW | |
| 430 const int l1= stride; | |
| 431 const int l2= stride + l1; | |
| 432 const int l3= stride + l2; | |
| 433 const int l4= stride + l3; | |
| 434 const int l5= stride + l4; | |
| 435 const int l6= stride + l5; | |
| 436 // const int l7= stride + l6; | |
| 437 // const int l8= stride + l7; | |
| 438 // const int l9= stride + l8; | |
| 439 int x; | |
| 440 const int QP15= QP + (QP>>2); | |
| 441 src+= stride*3; | |
| 442 for(x=0; x<BLOCK_SIZE; x++){ | |
| 443 const int v = (src[x+l5] - src[x+l4]); | |
| 444 if(FFABS(v) < QP15){ | |
| 445 src[x+l3] +=v>>3; | |
| 446 src[x+l4] +=v>>1; | |
| 447 src[x+l5] -=v>>1; | |
| 448 src[x+l6] -=v>>3; | |
| 449 } | |
| 450 } | |
| 451 | |
| 452 #endif //HAVE_MMX2 || HAVE_AMD3DNOW | |
| 453 } | |
| 454 #endif //0 | |
| 455 | 351 |
| 456 /** | 352 /** |
| 457 * Experimental Filter 1 | 353 * Experimental Filter 1 |
| 458 * will not damage linear gradients | 354 * will not damage linear gradients |
| 459 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter | 355 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter |
