comparison libpostproc/postprocess_template.c @ 210:c2b6d68a0671 libavcodec

mangle for win32 in postproc
author atmos4
date Sat, 19 Jan 2002 05:14:46 +0000
parents 3ccd74a91074
children f1074f0d4969
comparison
equal deleted inserted replaced
209:c0d8ecae7ac5 210:c2b6d68a0671
58 asm volatile( 58 asm volatile(
59 "leal (%1, %2), %%eax \n\t" 59 "leal (%1, %2), %%eax \n\t"
60 "leal (%%eax, %2, 4), %%ebx \n\t" 60 "leal (%%eax, %2, 4), %%ebx \n\t"
61 // 0 1 2 3 4 5 6 7 8 9 61 // 0 1 2 3 4 5 6 7 8 9
62 // %1 eax eax+%2 eax+2%2 %1+4%2 ebx ebx+%2 ebx+2%2 %1+8%2 ebx+4%2 62 // %1 eax eax+%2 eax+2%2 %1+4%2 ebx ebx+%2 ebx+2%2 %1+8%2 ebx+4%2
63 "movq mmxDCOffset, %%mm7 \n\t" // mm7 = 0x7F 63 "movq "MANGLE(mmxDCOffset)", %%mm7 \n\t" // mm7 = 0x7F
64 "movq mmxDCThreshold, %%mm6 \n\t" // mm6 = 0x7D 64 "movq "MANGLE(mmxDCThreshold)", %%mm6 \n\t" // mm6 = 0x7D
65 "movq (%1), %%mm0 \n\t" 65 "movq (%1), %%mm0 \n\t"
66 "movq (%%eax), %%mm1 \n\t" 66 "movq (%%eax), %%mm1 \n\t"
67 "psubb %%mm1, %%mm0 \n\t" // mm0 = differnece 67 "psubb %%mm1, %%mm0 \n\t" // mm0 = differnece
68 "paddb %%mm7, %%mm0 \n\t" 68 "paddb %%mm7, %%mm0 \n\t"
69 "pcmpgtb %%mm6, %%mm0 \n\t" 69 "pcmpgtb %%mm6, %%mm0 \n\t"
169 "movq %%mm0, %%mm2 \n\t" 169 "movq %%mm0, %%mm2 \n\t"
170 "psubusb %%mm1, %%mm0 \n\t" 170 "psubusb %%mm1, %%mm0 \n\t"
171 "psubusb %%mm2, %%mm1 \n\t" 171 "psubusb %%mm2, %%mm1 \n\t"
172 "por %%mm1, %%mm0 \n\t" // ABS Diff 172 "por %%mm1, %%mm0 \n\t" // ABS Diff
173 173
174 "movq pQPb, %%mm7 \n\t" // QP,..., QP 174 "movq "MANGLE(pQPb)", %%mm7 \n\t" // QP,..., QP
175 "paddusb %%mm7, %%mm7 \n\t" // 2QP ... 2QP 175 "paddusb %%mm7, %%mm7 \n\t" // 2QP ... 2QP
176 "psubusb %%mm7, %%mm0 \n\t" // Diff <= 2QP -> 0 176 "psubusb %%mm7, %%mm0 \n\t" // Diff <= 2QP -> 0
177 "pcmpeqd b00, %%mm0 \n\t" 177 "pcmpeqd "MANGLE(b00)", %%mm0 \n\t"
178 "psrlq $16, %%mm0 \n\t" 178 "psrlq $16, %%mm0 \n\t"
179 "pcmpeqd bFF, %%mm0 \n\t" 179 "pcmpeqd "MANGLE(bFF)", %%mm0 \n\t"
180 // "movd %%mm0, (%1, %2, 4)\n\t" 180 // "movd %%mm0, (%1, %2, 4)\n\t"
181 "movd %%mm0, %0 \n\t" 181 "movd %%mm0, %0 \n\t"
182 : "=r" (isOk) 182 : "=r" (isOk)
183 : "r" (src), "r" (stride) 183 : "r" (src), "r" (stride)
184 ); 184 );
217 static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, int QP) 217 static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, int QP)
218 { 218 {
219 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) 219 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
220 src+= stride*3; 220 src+= stride*3;
221 asm volatile( //"movv %0 %1 %2\n\t" 221 asm volatile( //"movv %0 %1 %2\n\t"
222 "movq pQPb, %%mm0 \n\t" // QP,..., QP 222 "movq "MANGLE(pQPb)", %%mm0 \n\t" // QP,..., QP
223 223
224 "movq (%0), %%mm6 \n\t" 224 "movq (%0), %%mm6 \n\t"
225 "movq (%0, %1), %%mm5 \n\t" 225 "movq (%0, %1), %%mm5 \n\t"
226 "movq %%mm5, %%mm1 \n\t" 226 "movq %%mm5, %%mm1 \n\t"
227 "movq %%mm6, %%mm2 \n\t" 227 "movq %%mm6, %%mm2 \n\t"
228 "psubusb %%mm6, %%mm5 \n\t" 228 "psubusb %%mm6, %%mm5 \n\t"
229 "psubusb %%mm1, %%mm2 \n\t" 229 "psubusb %%mm1, %%mm2 \n\t"
230 "por %%mm5, %%mm2 \n\t" // ABS Diff of lines 230 "por %%mm5, %%mm2 \n\t" // ABS Diff of lines
231 "psubusb %%mm0, %%mm2 \n\t" // diff <= QP -> 0 231 "psubusb %%mm0, %%mm2 \n\t" // diff <= QP -> 0
232 "pcmpeqb b00, %%mm2 \n\t" // diff <= QP -> FF 232 "pcmpeqb "MANGLE(b00)", %%mm2 \n\t" // diff <= QP -> FF
233 233
234 "pand %%mm2, %%mm6 \n\t" 234 "pand %%mm2, %%mm6 \n\t"
235 "pandn %%mm1, %%mm2 \n\t" 235 "pandn %%mm1, %%mm2 \n\t"
236 "por %%mm2, %%mm6 \n\t"// First Line to Filter 236 "por %%mm2, %%mm6 \n\t"// First Line to Filter
237 237
245 "movq %%mm7, %%mm2 \n\t" 245 "movq %%mm7, %%mm2 \n\t"
246 "psubusb %%mm7, %%mm5 \n\t" 246 "psubusb %%mm7, %%mm5 \n\t"
247 "psubusb %%mm1, %%mm2 \n\t" 247 "psubusb %%mm1, %%mm2 \n\t"
248 "por %%mm5, %%mm2 \n\t" // ABS Diff of lines 248 "por %%mm5, %%mm2 \n\t" // ABS Diff of lines
249 "psubusb %%mm0, %%mm2 \n\t" // diff <= QP -> 0 249 "psubusb %%mm0, %%mm2 \n\t" // diff <= QP -> 0
250 "pcmpeqb b00, %%mm2 \n\t" // diff <= QP -> FF 250 "pcmpeqb "MANGLE(b00)", %%mm2 \n\t" // diff <= QP -> FF
251 251
252 "pand %%mm2, %%mm7 \n\t" 252 "pand %%mm2, %%mm7 \n\t"
253 "pandn %%mm1, %%mm2 \n\t" 253 "pandn %%mm1, %%mm2 \n\t"
254 "por %%mm2, %%mm7 \n\t" // First Line to Filter 254 "por %%mm2, %%mm7 \n\t" // First Line to Filter
255 255
401 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) 401 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
402 src+= stride*3; 402 src+= stride*3;
403 // FIXME rounding 403 // FIXME rounding
404 asm volatile( 404 asm volatile(
405 "pxor %%mm7, %%mm7 \n\t" // 0 405 "pxor %%mm7, %%mm7 \n\t" // 0
406 "movq b80, %%mm6 \n\t" // MIN_SIGNED_BYTE 406 "movq "MANGLE(b80)", %%mm6 \n\t" // MIN_SIGNED_BYTE
407 "leal (%0, %1), %%eax \n\t" 407 "leal (%0, %1), %%eax \n\t"
408 "leal (%%eax, %1, 4), %%ebx \n\t" 408 "leal (%%eax, %1, 4), %%ebx \n\t"
409 // 0 1 2 3 4 5 6 7 8 9 409 // 0 1 2 3 4 5 6 7 8 9
410 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 410 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1
411 "movq pQPb, %%mm0 \n\t" // QP,..., QP 411 "movq "MANGLE(pQPb)", %%mm0 \n\t" // QP,..., QP
412 "movq %%mm0, %%mm1 \n\t" // QP,..., QP 412 "movq %%mm0, %%mm1 \n\t" // QP,..., QP
413 "paddusb b02, %%mm0 \n\t" 413 "paddusb "MANGLE(b02)", %%mm0 \n\t"
414 "psrlw $2, %%mm0 \n\t" 414 "psrlw $2, %%mm0 \n\t"
415 "pand b3F, %%mm0 \n\t" // QP/4,..., QP/4 415 "pand "MANGLE(b3F)", %%mm0 \n\t" // QP/4,..., QP/4
416 "paddusb %%mm1, %%mm0 \n\t" // QP*1.25 ... 416 "paddusb %%mm1, %%mm0 \n\t" // QP*1.25 ...
417 "movq (%0, %1, 4), %%mm2 \n\t" // line 4 417 "movq (%0, %1, 4), %%mm2 \n\t" // line 4
418 "movq (%%ebx), %%mm3 \n\t" // line 5 418 "movq (%%ebx), %%mm3 \n\t" // line 5
419 "movq %%mm2, %%mm4 \n\t" // line 4 419 "movq %%mm2, %%mm4 \n\t" // line 4
420 "pcmpeqb %%mm5, %%mm5 \n\t" // -1 420 "pcmpeqb %%mm5, %%mm5 \n\t" // -1
439 // "psubb %%mm6, %%mm2 \n\t" 439 // "psubb %%mm6, %%mm2 \n\t"
440 "movq %%mm2, (%%ebx) \n\t" 440 "movq %%mm2, (%%ebx) \n\t"
441 441
442 "paddb %%mm6, %%mm5 \n\t" 442 "paddb %%mm6, %%mm5 \n\t"
443 "psrlw $2, %%mm5 \n\t" 443 "psrlw $2, %%mm5 \n\t"
444 "pand b3F, %%mm5 \n\t" 444 "pand "MANGLE(b3F)", %%mm5 \n\t"
445 "psubb b20, %%mm5 \n\t" // (l5-l4)/8 445 "psubb "MANGLE(b20)", %%mm5 \n\t" // (l5-l4)/8
446 446
447 "movq (%%eax, %1, 2), %%mm2 \n\t" 447 "movq (%%eax, %1, 2), %%mm2 \n\t"
448 "paddb %%mm6, %%mm2 \n\t" // line 3 + 0x80 448 "paddb %%mm6, %%mm2 \n\t" // line 3 + 0x80
449 "paddsb %%mm5, %%mm2 \n\t" 449 "paddsb %%mm5, %%mm2 \n\t"
450 "psubb %%mm6, %%mm2 \n\t" 450 "psubb %%mm6, %%mm2 \n\t"
501 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) 501 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
502 src+= stride*3; 502 src+= stride*3;
503 503
504 asm volatile( 504 asm volatile(
505 "pxor %%mm7, %%mm7 \n\t" // 0 505 "pxor %%mm7, %%mm7 \n\t" // 0
506 // "movq b80, %%mm6 \n\t" // MIN_SIGNED_BYTE 506 // "movq "MANGLE(b80)", %%mm6 \n\t" // MIN_SIGNED_BYTE
507 "leal (%0, %1), %%eax \n\t" 507 "leal (%0, %1), %%eax \n\t"
508 "leal (%%eax, %1, 4), %%ebx \n\t" 508 "leal (%%eax, %1, 4), %%ebx \n\t"
509 // 0 1 2 3 4 5 6 7 8 9 509 // 0 1 2 3 4 5 6 7 8 9
510 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 510 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1
511 "movq (%%eax, %1, 2), %%mm0 \n\t" // line 3 511 "movq (%%eax, %1, 2), %%mm0 \n\t" // line 3
527 "pcmpeqb %%mm7, %%mm2 \n\t" // (l4 - l5) <= 0 ? -1 : 0 527 "pcmpeqb %%mm7, %%mm2 \n\t" // (l4 - l5) <= 0 ? -1 : 0
528 "psubusb %%mm1, %%mm5 \n\t" 528 "psubusb %%mm1, %%mm5 \n\t"
529 "por %%mm5, %%mm4 \n\t" // |l4 - l5| 529 "por %%mm5, %%mm4 \n\t" // |l4 - l5|
530 "psubusb %%mm0, %%mm4 \n\t" //d = MAX(0, |l4-l5| - (|l2-l3| + |l5-l6|)/2) 530 "psubusb %%mm0, %%mm4 \n\t" //d = MAX(0, |l4-l5| - (|l2-l3| + |l5-l6|)/2)
531 "movq %%mm4, %%mm3 \n\t" // d 531 "movq %%mm4, %%mm3 \n\t" // d
532 "psubusb pQPb, %%mm4 \n\t" 532 "psubusb "MANGLE(pQPb)", %%mm4 \n\t"
533 "pcmpeqb %%mm7, %%mm4 \n\t" // d <= QP ? -1 : 0 533 "pcmpeqb %%mm7, %%mm4 \n\t" // d <= QP ? -1 : 0
534 "psubusb b01, %%mm3 \n\t" 534 "psubusb "MANGLE(b01)", %%mm3 \n\t"
535 "pand %%mm4, %%mm3 \n\t" // d <= QP ? d : 0 535 "pand %%mm4, %%mm3 \n\t" // d <= QP ? d : 0
536 536
537 PAVGB(%%mm7, %%mm3) // d/2 537 PAVGB(%%mm7, %%mm3) // d/2
538 "movq %%mm3, %%mm1 \n\t" // d/2 538 "movq %%mm3, %%mm1 \n\t" // d/2
539 PAVGB(%%mm7, %%mm3) // d/4 539 PAVGB(%%mm7, %%mm3) // d/4
738 "por %%mm6, %%mm2 \n\t" // ~|2l4 - 5l5 + 5l6 - 2l7|/8 738 "por %%mm6, %%mm2 \n\t" // ~|2l4 - 5l5 + 5l6 - 2l7|/8
739 // mm0= SIGN(menergy), mm1= |lenergy|/8, mm2= |renergy|/8, mm3= |menergy|/8, mm7=0 739 // mm0= SIGN(menergy), mm1= |lenergy|/8, mm2= |renergy|/8, mm3= |menergy|/8, mm7=0
740 740
741 741
742 PMINUB(%%mm2, %%mm1, %%mm4) // MIN(|lenergy|,|renergy|)/8 742 PMINUB(%%mm2, %%mm1, %%mm4) // MIN(|lenergy|,|renergy|)/8
743 "movq pQPb, %%mm4 \n\t" // QP //FIXME QP+1 ? 743 "movq "MANGLE(pQPb)", %%mm4 \n\t" // QP //FIXME QP+1 ?
744 "paddusb b01, %%mm4 \n\t" 744 "paddusb "MANGLE(b01)", %%mm4 \n\t"
745 "pcmpgtb %%mm3, %%mm4 \n\t" // |menergy|/8 < QP 745 "pcmpgtb %%mm3, %%mm4 \n\t" // |menergy|/8 < QP
746 "psubusb %%mm1, %%mm3 \n\t" // d=|menergy|/8-MIN(|lenergy|,|renergy|)/8 746 "psubusb %%mm1, %%mm3 \n\t" // d=|menergy|/8-MIN(|lenergy|,|renergy|)/8
747 "pand %%mm4, %%mm3 \n\t" 747 "pand %%mm4, %%mm3 \n\t"
748 748
749 "movq %%mm3, %%mm1 \n\t" 749 "movq %%mm3, %%mm1 \n\t"
750 // "psubusb b01, %%mm3 \n\t" 750 // "psubusb "MANGLE(b01)", %%mm3 \n\t"
751 PAVGB(%%mm7, %%mm3) 751 PAVGB(%%mm7, %%mm3)
752 PAVGB(%%mm7, %%mm3) 752 PAVGB(%%mm7, %%mm3)
753 "paddusb %%mm1, %%mm3 \n\t" 753 "paddusb %%mm1, %%mm3 \n\t"
754 // "paddusb b01, %%mm3 \n\t" 754 // "paddusb "MANGLE(b01)", %%mm3 \n\t"
755 755
756 "movq (%%eax, %1, 2), %%mm6 \n\t" //l3 756 "movq (%%eax, %1, 2), %%mm6 \n\t" //l3
757 "movq (%0, %1, 4), %%mm5 \n\t" //l4 757 "movq (%0, %1, 4), %%mm5 \n\t" //l4
758 "movq (%0, %1, 4), %%mm4 \n\t" //l4 758 "movq (%0, %1, 4), %%mm4 \n\t" //l4
759 "psubusb %%mm6, %%mm5 \n\t" 759 "psubusb %%mm6, %%mm5 \n\t"
762 "pcmpeqb %%mm7, %%mm6 \n\t" // SIGN(l3-l4) 762 "pcmpeqb %%mm7, %%mm6 \n\t" // SIGN(l3-l4)
763 "pxor %%mm6, %%mm0 \n\t" 763 "pxor %%mm6, %%mm0 \n\t"
764 "pand %%mm0, %%mm3 \n\t" 764 "pand %%mm0, %%mm3 \n\t"
765 PMINUB(%%mm5, %%mm3, %%mm0) 765 PMINUB(%%mm5, %%mm3, %%mm0)
766 766
767 "psubusb b01, %%mm3 \n\t" 767 "psubusb "MANGLE(b01)", %%mm3 \n\t"
768 PAVGB(%%mm7, %%mm3) 768 PAVGB(%%mm7, %%mm3)
769 769
770 "movq (%%eax, %1, 2), %%mm0 \n\t" 770 "movq (%%eax, %1, 2), %%mm0 \n\t"
771 "movq (%0, %1, 4), %%mm2 \n\t" 771 "movq (%0, %1, 4), %%mm2 \n\t"
772 "pxor %%mm6, %%mm0 \n\t" 772 "pxor %%mm6, %%mm0 \n\t"
794 794
795 "movq (%%eax, %1, 4), %%mm2 \n\t" // l5 795 "movq (%%eax, %1, 4), %%mm2 \n\t" // l5
796 "movq (%%eax, %1), %%mm3 \n\t" // l2 796 "movq (%%eax, %1), %%mm3 \n\t" // l2
797 "pxor %%mm6, %%mm2 \n\t" // -l5-1 797 "pxor %%mm6, %%mm2 \n\t" // -l5-1
798 "movq %%mm2, %%mm5 \n\t" // -l5-1 798 "movq %%mm2, %%mm5 \n\t" // -l5-1
799 "movq b80, %%mm4 \n\t" // 128 799 "movq "MANGLE(b80)", %%mm4 \n\t" // 128
800 "leal (%%eax, %1, 4), %%ebx \n\t" 800 "leal (%%eax, %1, 4), %%ebx \n\t"
801 PAVGB(%%mm3, %%mm2) // (l2-l5+256)/2 801 PAVGB(%%mm3, %%mm2) // (l2-l5+256)/2
802 PAVGB(%%mm0, %%mm4) // ~(l4-l3)/4 + 128 802 PAVGB(%%mm0, %%mm4) // ~(l4-l3)/4 + 128
803 PAVGB(%%mm2, %%mm4) // ~(l2-l5)/4 +(l4-l3)/8 + 128 803 PAVGB(%%mm2, %%mm4) // ~(l2-l5)/4 +(l4-l3)/8 + 128
804 PAVGB(%%mm0, %%mm4) // ~(l2-l5)/8 +5(l4-l3)/16 + 128 804 PAVGB(%%mm0, %%mm4) // ~(l2-l5)/8 +5(l4-l3)/16 + 128
806 806
807 "movq (%%eax), %%mm2 \n\t" // l1 807 "movq (%%eax), %%mm2 \n\t" // l1
808 "pxor %%mm6, %%mm2 \n\t" // -l1-1 808 "pxor %%mm6, %%mm2 \n\t" // -l1-1
809 PAVGB(%%mm3, %%mm2) // (l2-l1+256)/2 809 PAVGB(%%mm3, %%mm2) // (l2-l1+256)/2
810 PAVGB((%0), %%mm1) // (l0-l3+256)/2 810 PAVGB((%0), %%mm1) // (l0-l3+256)/2
811 "movq b80, %%mm3 \n\t" // 128 811 "movq "MANGLE(b80)", %%mm3 \n\t" // 128
812 PAVGB(%%mm2, %%mm3) // ~(l2-l1)/4 + 128 812 PAVGB(%%mm2, %%mm3) // ~(l2-l1)/4 + 128
813 PAVGB(%%mm1, %%mm3) // ~(l0-l3)/4 +(l2-l1)/8 + 128 813 PAVGB(%%mm1, %%mm3) // ~(l0-l3)/4 +(l2-l1)/8 + 128
814 PAVGB(%%mm2, %%mm3) // ~(l0-l3)/8 +5(l2-l1)/16 + 128 814 PAVGB(%%mm2, %%mm3) // ~(l0-l3)/8 +5(l2-l1)/16 + 128
815 // mm0=128-q, mm3=lenergy/16 + 128, mm4= menergy/16 + 128, mm5= -l5-1 815 // mm0=128-q, mm3=lenergy/16 + 128, mm4= menergy/16 + 128, mm5= -l5-1
816 816
817 PAVGB((%%ebx, %1), %%mm5) // (l6-l5+256)/2 817 PAVGB((%%ebx, %1), %%mm5) // (l6-l5+256)/2
818 "movq (%%ebx, %1, 2), %%mm1 \n\t" // l7 818 "movq (%%ebx, %1, 2), %%mm1 \n\t" // l7
819 "pxor %%mm6, %%mm1 \n\t" // -l7-1 819 "pxor %%mm6, %%mm1 \n\t" // -l7-1
820 PAVGB((%0, %1, 4), %%mm1) // (l4-l7+256)/2 820 PAVGB((%0, %1, 4), %%mm1) // (l4-l7+256)/2
821 "movq b80, %%mm2 \n\t" // 128 821 "movq "MANGLE(b80)", %%mm2 \n\t" // 128
822 PAVGB(%%mm5, %%mm2) // ~(l6-l5)/4 + 128 822 PAVGB(%%mm5, %%mm2) // ~(l6-l5)/4 + 128
823 PAVGB(%%mm1, %%mm2) // ~(l4-l7)/4 +(l6-l5)/8 + 128 823 PAVGB(%%mm1, %%mm2) // ~(l4-l7)/4 +(l6-l5)/8 + 128
824 PAVGB(%%mm5, %%mm2) // ~(l4-l7)/8 +5(l6-l5)/16 + 128 824 PAVGB(%%mm5, %%mm2) // ~(l4-l7)/8 +5(l6-l5)/16 + 128
825 // mm0=128-q, mm2=renergy/16 + 128, mm3=lenergy/16 + 128, mm4= menergy/16 + 128 825 // mm0=128-q, mm2=renergy/16 + 128, mm3=lenergy/16 + 128, mm4= menergy/16 + 128
826 826
827 "movq b00, %%mm1 \n\t" // 0 827 "movq "MANGLE(b00)", %%mm1 \n\t" // 0
828 "movq b00, %%mm5 \n\t" // 0 828 "movq "MANGLE(b00)", %%mm5 \n\t" // 0
829 "psubb %%mm2, %%mm1 \n\t" // 128 - renergy/16 829 "psubb %%mm2, %%mm1 \n\t" // 128 - renergy/16
830 "psubb %%mm3, %%mm5 \n\t" // 128 - lenergy/16 830 "psubb %%mm3, %%mm5 \n\t" // 128 - lenergy/16
831 PMAXUB(%%mm1, %%mm2) // 128 + |renergy/16| 831 PMAXUB(%%mm1, %%mm2) // 128 + |renergy/16|
832 PMAXUB(%%mm5, %%mm3) // 128 + |lenergy/16| 832 PMAXUB(%%mm5, %%mm3) // 128 + |lenergy/16|
833 PMINUB(%%mm2, %%mm3, %%mm1) // 128 + MIN(|lenergy|,|renergy|)/16 833 PMINUB(%%mm2, %%mm3, %%mm1) // 128 + MIN(|lenergy|,|renergy|)/16
834 834
835 // mm0=128-q, mm3=128 + MIN(|lenergy|,|renergy|)/16, mm4= menergy/16 + 128 835 // mm0=128-q, mm3=128 + MIN(|lenergy|,|renergy|)/16, mm4= menergy/16 + 128
836 836
837 "movq b00, %%mm7 \n\t" // 0 837 "movq "MANGLE(b00)", %%mm7 \n\t" // 0
838 "movq pQPb, %%mm2 \n\t" // QP 838 "movq "MANGLE(pQPb)", %%mm2 \n\t" // QP
839 PAVGB(%%mm6, %%mm2) // 128 + QP/2 839 PAVGB(%%mm6, %%mm2) // 128 + QP/2
840 "psubb %%mm6, %%mm2 \n\t" 840 "psubb %%mm6, %%mm2 \n\t"
841 841
842 "movq %%mm4, %%mm1 \n\t" 842 "movq %%mm4, %%mm1 \n\t"
843 "pcmpgtb %%mm7, %%mm1 \n\t" // SIGN(menergy) 843 "pcmpgtb %%mm7, %%mm1 \n\t" // SIGN(menergy)
846 "pcmpgtb %%mm4, %%mm2 \n\t" // |menergy|/16 < QP/2 846 "pcmpgtb %%mm4, %%mm2 \n\t" // |menergy|/16 < QP/2
847 "psubusb %%mm3, %%mm4 \n\t" //d=|menergy|/16 - MIN(|lenergy|,|renergy|)/16 847 "psubusb %%mm3, %%mm4 \n\t" //d=|menergy|/16 - MIN(|lenergy|,|renergy|)/16
848 // mm0=128-q, mm1= SIGN(menergy), mm2= |menergy|/16 < QP/2, mm4= d/16 848 // mm0=128-q, mm1= SIGN(menergy), mm2= |menergy|/16 < QP/2, mm4= d/16
849 849
850 "movq %%mm4, %%mm3 \n\t" // d 850 "movq %%mm4, %%mm3 \n\t" // d
851 "psubusb b01, %%mm4 \n\t" 851 "psubusb "MANGLE(b01)", %%mm4 \n\t"
852 PAVGB(%%mm7, %%mm4) // d/32 852 PAVGB(%%mm7, %%mm4) // d/32
853 PAVGB(%%mm7, %%mm4) // (d + 32)/64 853 PAVGB(%%mm7, %%mm4) // (d + 32)/64
854 "paddb %%mm3, %%mm4 \n\t" // 5d/64 854 "paddb %%mm3, %%mm4 \n\t" // 5d/64
855 "pand %%mm2, %%mm4 \n\t" 855 "pand %%mm2, %%mm4 \n\t"
856 856
857 "movq b80, %%mm5 \n\t" // 128 857 "movq "MANGLE(b80)", %%mm5 \n\t" // 128
858 "psubb %%mm0, %%mm5 \n\t" // q 858 "psubb %%mm0, %%mm5 \n\t" // q
859 "paddsb %%mm6, %%mm5 \n\t" // fix bad rounding 859 "paddsb %%mm6, %%mm5 \n\t" // fix bad rounding
860 "pcmpgtb %%mm5, %%mm7 \n\t" // SIGN(q) 860 "pcmpgtb %%mm5, %%mm7 \n\t" // SIGN(q)
861 "pxor %%mm7, %%mm5 \n\t" 861 "pxor %%mm7, %%mm5 \n\t"
862 862
989 989
990 "psubw %%mm2, %%mm0 \n\t" // 2L0 - 5L1 + 5L2 - L3 990 "psubw %%mm2, %%mm0 \n\t" // 2L0 - 5L1 + 5L2 - L3
991 "psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2 - H3 991 "psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2 - H3
992 "psubw %%mm2, %%mm0 \n\t" // 2L0 - 5L1 + 5L2 - 2L3 992 "psubw %%mm2, %%mm0 \n\t" // 2L0 - 5L1 + 5L2 - 2L3
993 "psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2 - 2H3 993 "psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2 - 2H3
994 "movq %%mm0, temp0 \n\t" // 2L0 - 5L1 + 5L2 - 2L3 994 "movq %%mm0, "MANGLE(temp0)" \n\t" // 2L0 - 5L1 + 5L2 - 2L3
995 "movq %%mm1, temp1 \n\t" // 2H0 - 5H1 + 5H2 - 2H3 995 "movq %%mm1, "MANGLE(temp1)" \n\t" // 2H0 - 5H1 + 5H2 - 2H3
996 996
997 "movq (%0, %1, 4), %%mm0 \n\t" 997 "movq (%0, %1, 4), %%mm0 \n\t"
998 "movq %%mm0, %%mm1 \n\t" 998 "movq %%mm0, %%mm1 \n\t"
999 "punpcklbw %%mm7, %%mm0 \n\t" // L4 999 "punpcklbw %%mm7, %%mm0 \n\t" // L4
1000 "punpckhbw %%mm7, %%mm1 \n\t" // H4 1000 "punpckhbw %%mm7, %%mm1 \n\t" // H4
1001 1001
1002 "psubw %%mm0, %%mm2 \n\t" // L3 - L4 1002 "psubw %%mm0, %%mm2 \n\t" // L3 - L4
1003 "psubw %%mm1, %%mm3 \n\t" // H3 - H4 1003 "psubw %%mm1, %%mm3 \n\t" // H3 - H4
1004 "movq %%mm2, temp2 \n\t" // L3 - L4 1004 "movq %%mm2, "MANGLE(temp2)" \n\t" // L3 - L4
1005 "movq %%mm3, temp3 \n\t" // H3 - H4 1005 "movq %%mm3, "MANGLE(temp3)" \n\t" // H3 - H4
1006 "paddw %%mm4, %%mm4 \n\t" // 2L2 1006 "paddw %%mm4, %%mm4 \n\t" // 2L2
1007 "paddw %%mm5, %%mm5 \n\t" // 2H2 1007 "paddw %%mm5, %%mm5 \n\t" // 2H2
1008 "psubw %%mm2, %%mm4 \n\t" // 2L2 - L3 + L4 1008 "psubw %%mm2, %%mm4 \n\t" // 2L2 - L3 + L4
1009 "psubw %%mm3, %%mm5 \n\t" // 2H2 - H3 + H4 1009 "psubw %%mm3, %%mm5 \n\t" // 2H2 - H3 + H4
1010 1010
1047 "paddw %%mm2, %%mm2 \n\t" // 2L7 1047 "paddw %%mm2, %%mm2 \n\t" // 2L7
1048 "paddw %%mm3, %%mm3 \n\t" // 2H7 1048 "paddw %%mm3, %%mm3 \n\t" // 2H7
1049 "psubw %%mm2, %%mm0 \n\t" // 2L4 - 5L5 + 5L6 - 2L7 1049 "psubw %%mm2, %%mm0 \n\t" // 2L4 - 5L5 + 5L6 - 2L7
1050 "psubw %%mm3, %%mm1 \n\t" // 2H4 - 5H5 + 5H6 - 2H7 1050 "psubw %%mm3, %%mm1 \n\t" // 2H4 - 5H5 + 5H6 - 2H7
1051 1051
1052 "movq temp0, %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3 1052 "movq "MANGLE(temp0)", %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3
1053 "movq temp1, %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3 1053 "movq "MANGLE(temp1)", %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3
1054 1054
1055 #ifdef HAVE_MMX2 1055 #ifdef HAVE_MMX2
1056 "movq %%mm7, %%mm6 \n\t" // 0 1056 "movq %%mm7, %%mm6 \n\t" // 0
1057 "psubw %%mm0, %%mm6 \n\t" 1057 "psubw %%mm0, %%mm6 \n\t"
1058 "pmaxsw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7| 1058 "pmaxsw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7|
1136 //FIXME if *5/64 is supposed to be /13 then we should use 5041 instead of 5120 1136 //FIXME if *5/64 is supposed to be /13 then we should use 5041 instead of 5120
1137 "pmulhw %%mm2, %%mm4 \n\t" // hd/13 1137 "pmulhw %%mm2, %%mm4 \n\t" // hd/13
1138 "pmulhw %%mm2, %%mm5 \n\t" // ld/13 1138 "pmulhw %%mm2, %%mm5 \n\t" // ld/13
1139 */ 1139 */
1140 1140
1141 "movq temp2, %%mm0 \n\t" // L3 - L4 1141 "movq "MANGLE(temp2)", %%mm0 \n\t" // L3 - L4
1142 "movq temp3, %%mm1 \n\t" // H3 - H4 1142 "movq "MANGLE(temp3)", %%mm1 \n\t" // H3 - H4
1143 1143
1144 "pxor %%mm2, %%mm2 \n\t" 1144 "pxor %%mm2, %%mm2 \n\t"
1145 "pxor %%mm3, %%mm3 \n\t" 1145 "pxor %%mm3, %%mm3 \n\t"
1146 1146
1147 "pcmpgtw %%mm0, %%mm2 \n\t" // sign (L3-L4) 1147 "pcmpgtw %%mm0, %%mm2 \n\t" // sign (L3-L4)
1233 1233
1234 static inline void RENAME(dering)(uint8_t src[], int stride, int QP) 1234 static inline void RENAME(dering)(uint8_t src[], int stride, int QP)
1235 { 1235 {
1236 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) 1236 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1237 asm volatile( 1237 asm volatile(
1238 "movq pQPb, %%mm0 \n\t" 1238 "movq "MANGLE(pQPb)", %%mm0 \n\t"
1239 "paddusb %%mm0, %%mm0 \n\t" 1239 "paddusb %%mm0, %%mm0 \n\t"
1240 "movq %%mm0, pQPb2 \n\t" 1240 "movq %%mm0, "MANGLE(pQPb2)" \n\t"
1241 1241
1242 "leal (%0, %1), %%eax \n\t" 1242 "leal (%0, %1), %%eax \n\t"
1243 "leal (%%eax, %1, 4), %%ebx \n\t" 1243 "leal (%%eax, %1, 4), %%ebx \n\t"
1244 // 0 1 2 3 4 5 6 7 8 9 1244 // 0 1 2 3 4 5 6 7 8 9
1245 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 1245 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1
1317 "paddb %%mm4, %%mm6 \n\t" 1317 "paddb %%mm4, %%mm6 \n\t"
1318 #endif 1318 #endif
1319 "movq %%mm6, %%mm0 \n\t" // max 1319 "movq %%mm6, %%mm0 \n\t" // max
1320 "psubb %%mm7, %%mm6 \n\t" // max - min 1320 "psubb %%mm7, %%mm6 \n\t" // max - min
1321 "movd %%mm6, %%ecx \n\t" 1321 "movd %%mm6, %%ecx \n\t"
1322 "cmpb deringThreshold, %%cl \n\t" 1322 "cmpb "MANGLE(deringThreshold)", %%cl \n\t"
1323 " jb 1f \n\t" 1323 " jb 1f \n\t"
1324 PAVGB(%%mm0, %%mm7) // a=(max + min)/2 1324 PAVGB(%%mm0, %%mm7) // a=(max + min)/2
1325 "punpcklbw %%mm7, %%mm7 \n\t" 1325 "punpcklbw %%mm7, %%mm7 \n\t"
1326 "punpcklbw %%mm7, %%mm7 \n\t" 1326 "punpcklbw %%mm7, %%mm7 \n\t"
1327 "punpcklbw %%mm7, %%mm7 \n\t" 1327 "punpcklbw %%mm7, %%mm7 \n\t"
1328 "movq %%mm7, temp0 \n\t" 1328 "movq %%mm7, "MANGLE(temp0)" \n\t"
1329 1329
1330 "movq (%0), %%mm0 \n\t" // L10 1330 "movq (%0), %%mm0 \n\t" // L10
1331 "movq %%mm0, %%mm1 \n\t" // L10 1331 "movq %%mm0, %%mm1 \n\t" // L10
1332 "movq %%mm0, %%mm2 \n\t" // L10 1332 "movq %%mm0, %%mm2 \n\t" // L10
1333 "psllq $8, %%mm1 \n\t" 1333 "psllq $8, %%mm1 \n\t"
1342 PAVGB(%%mm2, %%mm1) // (L20 + L00)/2 1342 PAVGB(%%mm2, %%mm1) // (L20 + L00)/2
1343 PAVGB(%%mm0, %%mm1) // (L20 + L00 + 2L10)/4 1343 PAVGB(%%mm0, %%mm1) // (L20 + L00 + 2L10)/4
1344 "psubusb %%mm7, %%mm0 \n\t" 1344 "psubusb %%mm7, %%mm0 \n\t"
1345 "psubusb %%mm7, %%mm2 \n\t" 1345 "psubusb %%mm7, %%mm2 \n\t"
1346 "psubusb %%mm7, %%mm3 \n\t" 1346 "psubusb %%mm7, %%mm3 \n\t"
1347 "pcmpeqb b00, %%mm0 \n\t" // L10 > a ? 0 : -1 1347 "pcmpeqb "MANGLE(b00)", %%mm0 \n\t" // L10 > a ? 0 : -1
1348 "pcmpeqb b00, %%mm2 \n\t" // L20 > a ? 0 : -1 1348 "pcmpeqb "MANGLE(b00)", %%mm2 \n\t" // L20 > a ? 0 : -1
1349 "pcmpeqb b00, %%mm3 \n\t" // L00 > a ? 0 : -1 1349 "pcmpeqb "MANGLE(b00)", %%mm3 \n\t" // L00 > a ? 0 : -1
1350 "paddb %%mm2, %%mm0 \n\t" 1350 "paddb %%mm2, %%mm0 \n\t"
1351 "paddb %%mm3, %%mm0 \n\t" 1351 "paddb %%mm3, %%mm0 \n\t"
1352 1352
1353 "movq (%%eax), %%mm2 \n\t" // L11 1353 "movq (%%eax), %%mm2 \n\t" // L11
1354 "movq %%mm2, %%mm3 \n\t" // L11 1354 "movq %%mm2, %%mm3 \n\t" // L11
1365 PAVGB(%%mm4, %%mm3) // (L21 + L01)/2 1365 PAVGB(%%mm4, %%mm3) // (L21 + L01)/2
1366 PAVGB(%%mm2, %%mm3) // (L21 + L01 + 2L11)/4 1366 PAVGB(%%mm2, %%mm3) // (L21 + L01 + 2L11)/4
1367 "psubusb %%mm7, %%mm2 \n\t" 1367 "psubusb %%mm7, %%mm2 \n\t"
1368 "psubusb %%mm7, %%mm4 \n\t" 1368 "psubusb %%mm7, %%mm4 \n\t"
1369 "psubusb %%mm7, %%mm5 \n\t" 1369 "psubusb %%mm7, %%mm5 \n\t"
1370 "pcmpeqb b00, %%mm2 \n\t" // L11 > a ? 0 : -1 1370 "pcmpeqb "MANGLE(b00)", %%mm2 \n\t" // L11 > a ? 0 : -1
1371 "pcmpeqb b00, %%mm4 \n\t" // L21 > a ? 0 : -1 1371 "pcmpeqb "MANGLE(b00)", %%mm4 \n\t" // L21 > a ? 0 : -1
1372 "pcmpeqb b00, %%mm5 \n\t" // L01 > a ? 0 : -1 1372 "pcmpeqb "MANGLE(b00)", %%mm5 \n\t" // L01 > a ? 0 : -1
1373 "paddb %%mm4, %%mm2 \n\t" 1373 "paddb %%mm4, %%mm2 \n\t"
1374 "paddb %%mm5, %%mm2 \n\t" 1374 "paddb %%mm5, %%mm2 \n\t"
1375 // 0, 2, 3, 1 1375 // 0, 2, 3, 1
1376 #define DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) \ 1376 #define DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) \
1377 "movq " #src ", " #sx " \n\t" /* src[0] */\ 1377 "movq " #src ", " #sx " \n\t" /* src[0] */\
1387 "por " #t1 ", " #t0 " \n\t" /* src[+1] */\ 1387 "por " #t1 ", " #t0 " \n\t" /* src[+1] */\
1388 "movq " #lx ", " #t1 " \n\t" /* src[-1] */\ 1388 "movq " #lx ", " #t1 " \n\t" /* src[-1] */\
1389 PAVGB(t0, lx) /* (src[-1] + src[+1])/2 */\ 1389 PAVGB(t0, lx) /* (src[-1] + src[+1])/2 */\
1390 PAVGB(sx, lx) /* (src[-1] + 2src[0] + src[+1])/4 */\ 1390 PAVGB(sx, lx) /* (src[-1] + 2src[0] + src[+1])/4 */\
1391 PAVGB(lx, pplx) \ 1391 PAVGB(lx, pplx) \
1392 "movq " #lx ", temp1 \n\t"\ 1392 "movq " #lx ", "MANGLE(temp1)" \n\t"\
1393 "movq temp0, " #lx " \n\t"\ 1393 "movq "MANGLE(temp0)", " #lx " \n\t"\
1394 "psubusb " #lx ", " #t1 " \n\t"\ 1394 "psubusb " #lx ", " #t1 " \n\t"\
1395 "psubusb " #lx ", " #t0 " \n\t"\ 1395 "psubusb " #lx ", " #t0 " \n\t"\
1396 "psubusb " #lx ", " #sx " \n\t"\ 1396 "psubusb " #lx ", " #sx " \n\t"\
1397 "movq b00, " #lx " \n\t"\ 1397 "movq "MANGLE(b00)", " #lx " \n\t"\
1398 "pcmpeqb " #lx ", " #t1 " \n\t" /* src[-1] > a ? 0 : -1*/\ 1398 "pcmpeqb " #lx ", " #t1 " \n\t" /* src[-1] > a ? 0 : -1*/\
1399 "pcmpeqb " #lx ", " #t0 " \n\t" /* src[+1] > a ? 0 : -1*/\ 1399 "pcmpeqb " #lx ", " #t0 " \n\t" /* src[+1] > a ? 0 : -1*/\
1400 "pcmpeqb " #lx ", " #sx " \n\t" /* src[0] > a ? 0 : -1*/\ 1400 "pcmpeqb " #lx ", " #sx " \n\t" /* src[0] > a ? 0 : -1*/\
1401 "paddb " #t1 ", " #t0 " \n\t"\ 1401 "paddb " #t1 ", " #t0 " \n\t"\
1402 "paddb " #t0 ", " #sx " \n\t"\ 1402 "paddb " #t0 ", " #sx " \n\t"\
1403 \ 1403 \
1404 PAVGB(plx, pplx) /* filtered */\ 1404 PAVGB(plx, pplx) /* filtered */\
1405 "movq " #dst ", " #t0 " \n\t" /* dst */\ 1405 "movq " #dst ", " #t0 " \n\t" /* dst */\
1406 "movq " #t0 ", " #t1 " \n\t" /* dst */\ 1406 "movq " #t0 ", " #t1 " \n\t" /* dst */\
1407 "psubusb pQPb2, " #t0 " \n\t"\ 1407 "psubusb "MANGLE(pQPb2)", " #t0 " \n\t"\
1408 "paddusb pQPb2, " #t1 " \n\t"\ 1408 "paddusb "MANGLE(pQPb2)", " #t1 " \n\t"\
1409 PMAXUB(t0, pplx)\ 1409 PMAXUB(t0, pplx)\
1410 PMINUB(t1, pplx, t0)\ 1410 PMINUB(t1, pplx, t0)\
1411 "paddb " #sx ", " #ppsx " \n\t"\ 1411 "paddb " #sx ", " #ppsx " \n\t"\
1412 "paddb " #psx ", " #ppsx " \n\t"\ 1412 "paddb " #psx ", " #ppsx " \n\t"\
1413 "#paddb b02, " #ppsx " \n\t"\ 1413 "#paddb "MANGLE(b02)", " #ppsx " \n\t"\
1414 "pand b08, " #ppsx " \n\t"\ 1414 "pand "MANGLE(b08)", " #ppsx " \n\t"\
1415 "pcmpeqb " #lx ", " #ppsx " \n\t"\ 1415 "pcmpeqb " #lx ", " #ppsx " \n\t"\
1416 "pand " #ppsx ", " #pplx " \n\t"\ 1416 "pand " #ppsx ", " #pplx " \n\t"\
1417 "pandn " #dst ", " #ppsx " \n\t"\ 1417 "pandn " #dst ", " #ppsx " \n\t"\
1418 "por " #pplx ", " #ppsx " \n\t"\ 1418 "por " #pplx ", " #ppsx " \n\t"\
1419 "movq " #ppsx ", " #dst " \n\t"\ 1419 "movq " #ppsx ", " #dst " \n\t"\
1420 "movq temp1, " #lx " \n\t" 1420 "movq "MANGLE(temp1)", " #lx " \n\t"
1421 1421
1422 /* 1422 /*
1423 0000000 1423 0000000
1424 1111111 1424 1111111
1425 1425
2080 "paddw %%mm5, %%mm6 \n\t" 2080 "paddw %%mm5, %%mm6 \n\t"
2081 "paddw %%mm7, %%mm6 \n\t" 2081 "paddw %%mm7, %%mm6 \n\t"
2082 "paddw %%mm6, %%mm0 \n\t" 2082 "paddw %%mm6, %%mm0 \n\t"
2083 #elif defined (FAST_L2_DIFF) 2083 #elif defined (FAST_L2_DIFF)
2084 "pcmpeqb %%mm7, %%mm7 \n\t" 2084 "pcmpeqb %%mm7, %%mm7 \n\t"
2085 "movq b80, %%mm6 \n\t" 2085 "movq "MANGLE(b80)", %%mm6 \n\t"
2086 "pxor %%mm0, %%mm0 \n\t" 2086 "pxor %%mm0, %%mm0 \n\t"
2087 #define L2_DIFF_CORE(a, b)\ 2087 #define L2_DIFF_CORE(a, b)\
2088 "movq " #a ", %%mm5 \n\t"\ 2088 "movq " #a ", %%mm5 \n\t"\
2089 "movq " #b ", %%mm2 \n\t"\ 2089 "movq " #b ", %%mm2 \n\t"\
2090 "pxor %%mm7, %%mm2 \n\t"\ 2090 "pxor %%mm7, %%mm2 \n\t"\
2150 "addl 1024(%%ebx), %%ecx \n\t" 2150 "addl 1024(%%ebx), %%ecx \n\t"
2151 "shrl $3, %%ecx \n\t" 2151 "shrl $3, %%ecx \n\t"
2152 "movl %%ecx, (%%ebx) \n\t" 2152 "movl %%ecx, (%%ebx) \n\t"
2153 "leal (%%eax, %2, 2), %%ebx \n\t" // 5*stride 2153 "leal (%%eax, %2, 2), %%ebx \n\t" // 5*stride
2154 2154
2155 // "movl %3, %%ecx \n\t" 2155 // "movl %3, %%ecx \n\t"
2156 // "movl %%ecx, test \n\t" 2156 // "movl %%ecx, test \n\t"
2157 // "jmp 4f \n\t" 2157 // "jmp 4f \n\t"
2158 "cmpl 4+maxTmpNoise, %%ecx \n\t" 2158 "cmpl 4+"MANGLE(maxTmpNoise)", %%ecx \n\t"
2159 " jb 2f \n\t" 2159 " jb 2f \n\t"
2160 "cmpl 8+maxTmpNoise, %%ecx \n\t" 2160 "cmpl 8+"MANGLE(maxTmpNoise)", %%ecx \n\t"
2161 " jb 1f \n\t" 2161 " jb 1f \n\t"
2162 2162
2163 "leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride 2163 "leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride
2164 "movq (%0), %%mm0 \n\t" // L0 2164 "movq (%0), %%mm0 \n\t" // L0
2165 "movq (%0, %2), %%mm1 \n\t" // L1 2165 "movq (%0, %2), %%mm1 \n\t" // L1
2214 "movq %%mm6, (%0, %%eax, 2) \n\t" // L6 2214 "movq %%mm6, (%0, %%eax, 2) \n\t" // L6
2215 "movq %%mm7, (%0, %%ecx) \n\t" // L7 2215 "movq %%mm7, (%0, %%ecx) \n\t" // L7
2216 "jmp 4f \n\t" 2216 "jmp 4f \n\t"
2217 2217
2218 "2: \n\t" 2218 "2: \n\t"
2219 "cmpl maxTmpNoise, %%ecx \n\t" 2219 "cmpl "MANGLE(maxTmpNoise)", %%ecx \n\t"
2220 " jb 3f \n\t" 2220 " jb 3f \n\t"
2221 2221
2222 "leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride 2222 "leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride
2223 "movq (%0), %%mm0 \n\t" // L0 2223 "movq (%0), %%mm0 \n\t" // L0
2224 "movq (%0, %2), %%mm1 \n\t" // L1 2224 "movq (%0, %2), %%mm1 \n\t" // L1
2459 { 2459 {
2460 #ifdef HAVE_MMX 2460 #ifdef HAVE_MMX
2461 asm volatile( 2461 asm volatile(
2462 "leal (%0,%2), %%eax \n\t" 2462 "leal (%0,%2), %%eax \n\t"
2463 "leal (%1,%3), %%ebx \n\t" 2463 "leal (%1,%3), %%ebx \n\t"
2464 "movq packedYOffset, %%mm2 \n\t" 2464 "movq "MANGLE(packedYOffset)", %%mm2\n\t"
2465 "movq packedYScale, %%mm3 \n\t" 2465 "movq "MANGLE(packedYScale)", %%mm3\n\t"
2466 "pxor %%mm4, %%mm4 \n\t" 2466 "pxor %%mm4, %%mm4 \n\t"
2467 #ifdef HAVE_MMX2 2467 #ifdef HAVE_MMX2
2468 #define SCALED_CPY(src1, src2, dst1, dst2) \ 2468 #define SCALED_CPY(src1, src2, dst1, dst2) \
2469 "movq " #src1 ", %%mm0 \n\t"\ 2469 "movq " #src1 ", %%mm0 \n\t"\
2470 "movq " #src1 ", %%mm5 \n\t"\ 2470 "movq " #src1 ", %%mm5 \n\t"\
2882 asm volatile( 2882 asm volatile(
2883 "movd %0, %%mm7 \n\t" 2883 "movd %0, %%mm7 \n\t"
2884 "packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP 2884 "packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP
2885 "packuswb %%mm7, %%mm7 \n\t" // 0,QP, 0, QP, 0,QP, 0, QP 2885 "packuswb %%mm7, %%mm7 \n\t" // 0,QP, 0, QP, 0,QP, 0, QP
2886 "packuswb %%mm7, %%mm7 \n\t" // QP,..., QP 2886 "packuswb %%mm7, %%mm7 \n\t" // QP,..., QP
2887 "movq %%mm7, pQPb \n\t" 2887 "movq %%mm7, "MANGLE(pQPb)" \n\t"
2888 : : "r" (QP) 2888 : : "r" (QP)
2889 ); 2889 );
2890 #endif 2890 #endif
2891 2891
2892 #ifdef MORE_TIMING 2892 #ifdef MORE_TIMING