comparison postprocess_template.c @ 113:bf8f52662dc3 libpostproc

Replace long with x86_reg in postprocess_template.c like in all other x86 assembler code files, just libpostprocess was forgotten.
author reimar
date Sun, 02 Nov 2008 18:59:44 +0000
parents d4d919ebc31c
children bdd1788fb53b
comparison
equal deleted inserted replaced
112:d4d919ebc31c 113:bf8f52662dc3
156 "packssdw %%mm4, %%mm4 \n\t" 156 "packssdw %%mm4, %%mm4 \n\t"
157 "movd %%mm0, %0 \n\t" 157 "movd %%mm0, %0 \n\t"
158 "movd %%mm4, %1 \n\t" 158 "movd %%mm4, %1 \n\t"
159 159
160 : "=r" (numEq), "=r" (dcOk) 160 : "=r" (numEq), "=r" (dcOk)
161 : "r" (src), "r" ((long)stride), "m" (c->pQPb) 161 : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb)
162 : "%"REG_a 162 : "%"REG_a
163 ); 163 );
164 164
165 numEq= (-numEq) &0xFF; 165 numEq= (-numEq) &0xFF;
166 if(numEq > c->ppMode.flatnessThreshold){ 166 if(numEq > c->ppMode.flatnessThreshold){
301 PAVGB(%%mm0, %%mm5) // 112246 /16 301 PAVGB(%%mm0, %%mm5) // 112246 /16
302 "movq %%mm5, (%%"REG_a", %1, 4) \n\t" // X 302 "movq %%mm5, (%%"REG_a", %1, 4) \n\t" // X
303 "sub %1, %0 \n\t" 303 "sub %1, %0 \n\t"
304 304
305 : 305 :
306 : "r" (src), "r" ((long)stride), "m" (c->pQPb) 306 : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb)
307 : "%"REG_a, "%"REG_c 307 : "%"REG_a, "%"REG_c
308 ); 308 );
309 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) 309 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
310 const int l1= stride; 310 const int l1= stride;
311 const int l2= stride + l1; 311 const int l2= stride + l1;
421 "psubsb %%mm5, %%mm2 \n\t" 421 "psubsb %%mm5, %%mm2 \n\t"
422 "psubb %%mm6, %%mm2 \n\t" 422 "psubb %%mm6, %%mm2 \n\t"
423 "movq %%mm2, (%%"REG_c", %1) \n\t" 423 "movq %%mm2, (%%"REG_c", %1) \n\t"
424 424
425 : 425 :
426 : "r" (src), "r" ((long)stride) 426 : "r" (src), "r" ((x86_reg)stride)
427 : "%"REG_a, "%"REG_c 427 : "%"REG_a, "%"REG_c
428 ); 428 );
429 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) 429 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
430 const int l1= stride; 430 const int l1= stride;
431 const int l2= stride + l1; 431 const int l2= stride + l1;
543 "paddusb %%mm1, %%mm0 \n\t" 543 "paddusb %%mm1, %%mm0 \n\t"
544 "pxor %%mm2, %%mm0 \n\t" 544 "pxor %%mm2, %%mm0 \n\t"
545 "movq %%mm0, (%%"REG_c", %1, 2) \n\t" // line 7 545 "movq %%mm0, (%%"REG_c", %1, 2) \n\t" // line 7
546 546
547 : 547 :
548 : "r" (src), "r" ((long)stride), "m" (co->pQPb) 548 : "r" (src), "r" ((x86_reg)stride), "m" (co->pQPb)
549 : "%"REG_a, "%"REG_c 549 : "%"REG_a, "%"REG_c
550 ); 550 );
551 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) 551 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
552 552
553 const int l1= stride; 553 const int l1= stride;
808 "pxor %%mm1, %%mm2 \n\t" 808 "pxor %%mm1, %%mm2 \n\t"
809 "movq %%mm0, (%%"REG_a", %1, 2) \n\t" 809 "movq %%mm0, (%%"REG_a", %1, 2) \n\t"
810 "movq %%mm2, (%0, %1, 4) \n\t" 810 "movq %%mm2, (%0, %1, 4) \n\t"
811 811
812 : 812 :
813 : "r" (src), "r" ((long)stride), "m" (c->pQPb) 813 : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb)
814 : "%"REG_a, "%"REG_c 814 : "%"REG_a, "%"REG_c
815 ); 815 );
816 816
817 /* 817 /*
818 { 818 {
1096 "movq (%0, %1), %%mm0 \n\t" 1096 "movq (%0, %1), %%mm0 \n\t"
1097 "psubb %%mm4, %%mm0 \n\t" 1097 "psubb %%mm4, %%mm0 \n\t"
1098 "movq %%mm0, (%0, %1) \n\t" 1098 "movq %%mm0, (%0, %1) \n\t"
1099 1099
1100 : "+r" (src) 1100 : "+r" (src)
1101 : "r" ((long)stride), "m" (c->pQPb) 1101 : "r" ((x86_reg)stride), "m" (c->pQPb)
1102 : "%"REG_a, "%"REG_c 1102 : "%"REG_a, "%"REG_c
1103 ); 1103 );
1104 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) 1104 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1105 const int l1= stride; 1105 const int l1= stride;
1106 const int l2= stride + l1; 1106 const int l2= stride + l1;
1365 DERING_CORE((%%REGd, %1) ,(%%REGd, %1, 2),%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7) 1365 DERING_CORE((%%REGd, %1) ,(%%REGd, %1, 2),%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7)
1366 DERING_CORE((%%REGd, %1, 2),(%0, %1, 8) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7) 1366 DERING_CORE((%%REGd, %1, 2),(%0, %1, 8) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
1367 DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7) 1367 DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
1368 1368
1369 "1: \n\t" 1369 "1: \n\t"
1370 : : "r" (src), "r" ((long)stride), "m" (c->pQPb), "m"(c->pQPb2) 1370 : : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb), "m"(c->pQPb2)
1371 : "%"REG_a, "%"REG_d, "%"REG_c 1371 : "%"REG_a, "%"REG_d, "%"REG_c
1372 ); 1372 );
1373 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) 1373 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1374 int y; 1374 int y;
1375 int min=255; 1375 int min=255;
1519 "movq %%mm0, (%%"REG_c") \n\t" 1519 "movq %%mm0, (%%"REG_c") \n\t"
1520 "movq (%0, %1, 8), %%mm0 \n\t" 1520 "movq (%0, %1, 8), %%mm0 \n\t"
1521 PAVGB(%%mm0, %%mm1) 1521 PAVGB(%%mm0, %%mm1)
1522 "movq %%mm1, (%%"REG_c", %1, 2) \n\t" 1522 "movq %%mm1, (%%"REG_c", %1, 2) \n\t"
1523 1523
1524 : : "r" (src), "r" ((long)stride) 1524 : : "r" (src), "r" ((x86_reg)stride)
1525 : "%"REG_a, "%"REG_c 1525 : "%"REG_a, "%"REG_c
1526 ); 1526 );
1527 #else 1527 #else
1528 int a, b, x; 1528 int a, b, x;
1529 src+= 4*stride; 1529 src+= 4*stride;
1589 DEINT_CUBIC((%0) , (%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd, %1)) 1589 DEINT_CUBIC((%0) , (%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd, %1))
1590 DEINT_CUBIC((%%REGa, %1), (%0, %1, 4) , (%%REGd) , (%%REGd, %1), (%0, %1, 8)) 1590 DEINT_CUBIC((%%REGa, %1), (%0, %1, 4) , (%%REGd) , (%%REGd, %1), (%0, %1, 8))
1591 DEINT_CUBIC((%0, %1, 4) , (%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGc)) 1591 DEINT_CUBIC((%0, %1, 4) , (%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGc))
1592 DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc) , (%%REGc, %1, 2)) 1592 DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc) , (%%REGc, %1, 2))
1593 1593
1594 : : "r" (src), "r" ((long)stride) 1594 : : "r" (src), "r" ((x86_reg)stride)
1595 : "%"REG_a, "%"REG_d, "%"REG_c 1595 : "%"REG_a, "%"REG_d, "%"REG_c
1596 ); 1596 );
1597 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) 1597 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1598 int x; 1598 int x;
1599 src+= stride*3; 1599 src+= stride*3;
1660 DEINT_FF((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd) ) 1660 DEINT_FF((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd) )
1661 DEINT_FF((%0, %1, 4) , (%%REGd) , (%%REGd, %1), (%%REGd, %1, 2)) 1661 DEINT_FF((%0, %1, 4) , (%%REGd) , (%%REGd, %1), (%%REGd, %1, 2))
1662 DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) 1662 DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
1663 1663
1664 "movq %%mm0, (%2) \n\t" 1664 "movq %%mm0, (%2) \n\t"
1665 : : "r" (src), "r" ((long)stride), "r"(tmp) 1665 : : "r" (src), "r" ((x86_reg)stride), "r"(tmp)
1666 : "%"REG_a, "%"REG_d 1666 : "%"REG_a, "%"REG_d
1667 ); 1667 );
1668 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) 1668 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1669 int x; 1669 int x;
1670 src+= stride*4; 1670 src+= stride*4;
1750 DEINT_L5(%%mm0, %%mm1, (%%REGd, %1) , (%%REGd, %1, 2), (%0, %1, 8) ) 1750 DEINT_L5(%%mm0, %%mm1, (%%REGd, %1) , (%%REGd, %1, 2), (%0, %1, 8) )
1751 DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) 1751 DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
1752 1752
1753 "movq %%mm0, (%2) \n\t" 1753 "movq %%mm0, (%2) \n\t"
1754 "movq %%mm1, (%3) \n\t" 1754 "movq %%mm1, (%3) \n\t"
1755 : : "r" (src), "r" ((long)stride), "r"(tmp), "r"(tmp2) 1755 : : "r" (src), "r" ((x86_reg)stride), "r"(tmp), "r"(tmp2)
1756 : "%"REG_a, "%"REG_d 1756 : "%"REG_a, "%"REG_d
1757 ); 1757 );
1758 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) 1758 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1759 int x; 1759 int x;
1760 src+= stride*4; 1760 src+= stride*4;
1838 PAVGB(%%mm0, %%mm2) // L7+L9 1838 PAVGB(%%mm0, %%mm2) // L7+L9
1839 PAVGB(%%mm1, %%mm2) // 2L8 + L7 + L9 1839 PAVGB(%%mm1, %%mm2) // 2L8 + L7 + L9
1840 "movq %%mm2, (%%"REG_d", %1, 2) \n\t" 1840 "movq %%mm2, (%%"REG_d", %1, 2) \n\t"
1841 "movq %%mm1, (%2) \n\t" 1841 "movq %%mm1, (%2) \n\t"
1842 1842
1843 : : "r" (src), "r" ((long)stride), "r" (tmp) 1843 : : "r" (src), "r" ((x86_reg)stride), "r" (tmp)
1844 : "%"REG_a, "%"REG_d 1844 : "%"REG_a, "%"REG_d
1845 ); 1845 );
1846 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) 1846 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1847 int a, b, c, x; 1847 int a, b, c, x;
1848 src+= 4*stride; 1848 src+= 4*stride;
1942 "pmaxub %%mm1, %%mm0 \n\t" // 1942 "pmaxub %%mm1, %%mm0 \n\t" //
1943 "pminub %%mm0, %%mm2 \n\t" 1943 "pminub %%mm0, %%mm2 \n\t"
1944 "movq %%mm2, (%%"REG_d", %1, 2) \n\t" 1944 "movq %%mm2, (%%"REG_d", %1, 2) \n\t"
1945 1945
1946 1946
1947 : : "r" (src), "r" ((long)stride) 1947 : : "r" (src), "r" ((x86_reg)stride)
1948 : "%"REG_a, "%"REG_d 1948 : "%"REG_a, "%"REG_d
1949 ); 1949 );
1950 1950
1951 #else // MMX without MMX2 1951 #else // MMX without MMX2
1952 __asm__ volatile( 1952 __asm__ volatile(
1984 MEDIAN((%0) , (%%REGa) , (%%REGa, %1)) 1984 MEDIAN((%0) , (%%REGa) , (%%REGa, %1))
1985 MEDIAN((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4)) 1985 MEDIAN((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4))
1986 MEDIAN((%0, %1, 4) , (%%REGd) , (%%REGd, %1)) 1986 MEDIAN((%0, %1, 4) , (%%REGd) , (%%REGd, %1))
1987 MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8)) 1987 MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8))
1988 1988
1989 : : "r" (src), "r" ((long)stride) 1989 : : "r" (src), "r" ((x86_reg)stride)
1990 : "%"REG_a, "%"REG_d 1990 : "%"REG_a, "%"REG_d
1991 ); 1991 );
1992 #endif //HAVE_MMX2 1992 #endif //HAVE_MMX2
1993 #else //HAVE_MMX 1993 #else //HAVE_MMX
1994 int x, y; 1994 int x, y;
2091 "movd %%mm1, 100(%3) \n\t" 2091 "movd %%mm1, 100(%3) \n\t"
2092 "psrlq $32, %%mm1 \n\t" 2092 "psrlq $32, %%mm1 \n\t"
2093 "movd %%mm1, 116(%3) \n\t" 2093 "movd %%mm1, 116(%3) \n\t"
2094 2094
2095 2095
2096 :: "r" (src), "r" ((long)srcStride), "r" (dst1), "r" (dst2) 2096 :: "r" (src), "r" ((x86_reg)srcStride), "r" (dst1), "r" (dst2)
2097 : "%"REG_a 2097 : "%"REG_a
2098 ); 2098 );
2099 } 2099 }
2100 2100
2101 /** 2101 /**
2171 "movd %%mm2, 4(%%"REG_d") \n\t" 2171 "movd %%mm2, 4(%%"REG_d") \n\t"
2172 "movd %%mm1, 4(%%"REG_d", %1) \n\t" 2172 "movd %%mm1, 4(%%"REG_d", %1) \n\t"
2173 "psrlq $32, %%mm1 \n\t" 2173 "psrlq $32, %%mm1 \n\t"
2174 "movd %%mm1, 4(%%"REG_d", %1, 2) \n\t" 2174 "movd %%mm1, 4(%%"REG_d", %1, 2) \n\t"
2175 2175
2176 :: "r" (dst), "r" ((long)dstStride), "r" (src) 2176 :: "r" (dst), "r" ((x86_reg)dstStride), "r" (src)
2177 : "%"REG_a, "%"REG_d 2177 : "%"REG_a, "%"REG_d
2178 ); 2178 );
2179 } 2179 }
2180 #endif //HAVE_MMX 2180 #endif //HAVE_MMX
2181 //static long test=0; 2181 //static long test=0;
2474 "movq %%mm2, (%0, %%"REG_a", 2) \n\t" // L6 2474 "movq %%mm2, (%0, %%"REG_a", 2) \n\t" // L6
2475 "movq %%mm3, (%0, %%"REG_c") \n\t" // L7 2475 "movq %%mm3, (%0, %%"REG_c") \n\t" // L7
2476 2476
2477 "4: \n\t" 2477 "4: \n\t"
2478 2478
2479 :: "r" (src), "r" (tempBlurred), "r"((long)stride), "m" (tempBlurredPast) 2479 :: "r" (src), "r" (tempBlurred), "r"((x86_reg)stride), "m" (tempBlurredPast)
2480 : "%"REG_a, "%"REG_d, "%"REG_c, "memory" 2480 : "%"REG_a, "%"REG_d, "%"REG_c, "memory"
2481 ); 2481 );
2482 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) 2482 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
2483 { 2483 {
2484 int y; 2484 int y;
2674 "psubb %%mm0, %%mm6 \n\t" 2674 "psubb %%mm0, %%mm6 \n\t"
2675 "pcmpgtb %%mm7, %%mm6 \n\t" 2675 "pcmpgtb %%mm7, %%mm6 \n\t"
2676 "movq %%mm6, %0 \n\t" 2676 "movq %%mm6, %0 \n\t"
2677 2677
2678 : "=m" (eq_mask), "=m" (dc_mask) 2678 : "=m" (eq_mask), "=m" (dc_mask)
2679 : "r" (src), "r" ((long)step), "m" (c->pQPb), "m"(c->ppMode.flatnessThreshold) 2679 : "r" (src), "r" ((x86_reg)step), "m" (c->pQPb), "m"(c->ppMode.flatnessThreshold)
2680 : "%"REG_a 2680 : "%"REG_a
2681 ); 2681 );
2682 2682
2683 both_masks = dc_mask & eq_mask; 2683 both_masks = dc_mask & eq_mask;
2684 2684
2685 if(both_masks){ 2685 if(both_masks){
2686 long offset= -8*step; 2686 x86_reg offset= -8*step;
2687 int64_t *temp_sums= sums; 2687 int64_t *temp_sums= sums;
2688 2688
2689 __asm__ volatile( 2689 __asm__ volatile(
2690 "movq %2, %%mm0 \n\t" // QP,..., QP 2690 "movq %2, %%mm0 \n\t" // QP,..., QP
2691 "pxor %%mm4, %%mm4 \n\t" 2691 "pxor %%mm4, %%mm4 \n\t"
2818 "movq %%mm1, 152(%3) \n\t" 2818 "movq %%mm1, 152(%3) \n\t"
2819 2819
2820 "mov %4, %0 \n\t" //FIXME 2820 "mov %4, %0 \n\t" //FIXME
2821 2821
2822 : "+&r"(src) 2822 : "+&r"(src)
2823 : "r" ((long)step), "m" (c->pQPb), "r"(sums), "g"(src) 2823 : "r" ((x86_reg)step), "m" (c->pQPb), "r"(sums), "g"(src)
2824 ); 2824 );
2825 2825
2826 src+= step; // src points to begin of the 8x8 Block 2826 src+= step; // src points to begin of the 8x8 Block
2827 2827
2828 __asm__ volatile( 2828 __asm__ volatile(
2855 "add $16, %1 \n\t" 2855 "add $16, %1 \n\t"
2856 "add %2, %0 \n\t" 2856 "add %2, %0 \n\t"
2857 " js 1b \n\t" 2857 " js 1b \n\t"
2858 2858
2859 : "+r"(offset), "+r"(temp_sums) 2859 : "+r"(offset), "+r"(temp_sums)
2860 : "r" ((long)step), "r"(src - offset), "m"(both_masks) 2860 : "r" ((x86_reg)step), "r"(src - offset), "m"(both_masks)
2861 ); 2861 );
2862 }else 2862 }else
2863 src+= step; // src points to begin of the 8x8 Block 2863 src+= step; // src points to begin of the 8x8 Block
2864 2864
2865 if(eq_mask != -1LL){ 2865 if(eq_mask != -1LL){
3090 "movq (%0, %1), %%mm0 \n\t" 3090 "movq (%0, %1), %%mm0 \n\t"
3091 "psubb %%mm1, %%mm0 \n\t" 3091 "psubb %%mm1, %%mm0 \n\t"
3092 "movq %%mm0, (%0, %1) \n\t" 3092 "movq %%mm0, (%0, %1) \n\t"
3093 3093
3094 : "+r" (temp_src) 3094 : "+r" (temp_src)
3095 : "r" ((long)step), "m" (c->pQPb), "m"(eq_mask) 3095 : "r" ((x86_reg)step), "m" (c->pQPb), "m"(eq_mask)
3096 : "%"REG_a, "%"REG_c 3096 : "%"REG_a, "%"REG_c
3097 ); 3097 );
3098 } 3098 }
3099 /*if(step==16){ 3099 /*if(step==16){
3100 STOP_TIMER("step16") 3100 STOP_TIMER("step16")
3191 3191
3192 : "=&a" (packedOffsetAndScale) 3192 : "=&a" (packedOffsetAndScale)
3193 : "0" (packedOffsetAndScale), 3193 : "0" (packedOffsetAndScale),
3194 "r"(src), 3194 "r"(src),
3195 "r"(dst), 3195 "r"(dst),
3196 "r" ((long)srcStride), 3196 "r" ((x86_reg)srcStride),
3197 "r" ((long)dstStride) 3197 "r" ((x86_reg)dstStride)
3198 : "%"REG_d 3198 : "%"REG_d
3199 ); 3199 );
3200 #else //HAVE_MMX 3200 #else //HAVE_MMX
3201 for(i=0; i<8; i++) 3201 for(i=0; i<8; i++)
3202 memcpy( &(dst[dstStride*i]), 3202 memcpy( &(dst[dstStride*i]),
3224 "lea (%%"REG_d",%3,4), %%"REG_d" \n\t" 3224 "lea (%%"REG_d",%3,4), %%"REG_d" \n\t"
3225 SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2)) 3225 SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2))
3226 3226
3227 : : "r" (src), 3227 : : "r" (src),
3228 "r" (dst), 3228 "r" (dst),
3229 "r" ((long)srcStride), 3229 "r" ((x86_reg)srcStride),
3230 "r" ((long)dstStride) 3230 "r" ((x86_reg)dstStride)
3231 : "%"REG_a, "%"REG_d 3231 : "%"REG_a, "%"REG_d
3232 ); 3232 );
3233 #else //HAVE_MMX 3233 #else //HAVE_MMX
3234 for(i=0; i<8; i++) 3234 for(i=0; i<8; i++)
3235 memcpy( &(dst[dstStride*i]), 3235 memcpy( &(dst[dstStride*i]),
3249 "add %1, %0 \n\t" 3249 "add %1, %0 \n\t"
3250 "movq %%mm0, (%0) \n\t" 3250 "movq %%mm0, (%0) \n\t"
3251 "movq %%mm0, (%0, %1) \n\t" 3251 "movq %%mm0, (%0, %1) \n\t"
3252 "movq %%mm0, (%0, %1, 2) \n\t" 3252 "movq %%mm0, (%0, %1, 2) \n\t"
3253 : "+r" (src) 3253 : "+r" (src)
3254 : "r" ((long)-stride) 3254 : "r" ((x86_reg)-stride)
3255 ); 3255 );
3256 #else 3256 #else
3257 int i; 3257 int i;
3258 uint8_t *p=src; 3258 uint8_t *p=src;
3259 for(i=0; i<3; i++){ 3259 for(i=0; i<3; i++){
3404 "prefetcht0 32(%%"REG_d", %2) \n\t" 3404 "prefetcht0 32(%%"REG_d", %2) \n\t"
3405 "add %1, %%"REG_a" \n\t" 3405 "add %1, %%"REG_a" \n\t"
3406 "add %3, %%"REG_d" \n\t" 3406 "add %3, %%"REG_d" \n\t"
3407 "prefetchnta 32(%%"REG_a", %0) \n\t" 3407 "prefetchnta 32(%%"REG_a", %0) \n\t"
3408 "prefetcht0 32(%%"REG_d", %2) \n\t" 3408 "prefetcht0 32(%%"REG_d", %2) \n\t"
3409 :: "r" (srcBlock), "r" ((long)srcStride), "r" (dstBlock), "r" ((long)dstStride), 3409 :: "r" (srcBlock), "r" ((x86_reg)srcStride), "r" (dstBlock), "r" ((x86_reg)dstStride),
3410 "g" ((long)x), "g" ((long)copyAhead) 3410 "g" ((x86_reg)x), "g" ((x86_reg)copyAhead)
3411 : "%"REG_a, "%"REG_d 3411 : "%"REG_a, "%"REG_d
3412 ); 3412 );
3413 3413
3414 #elif defined(HAVE_3DNOW) 3414 #elif defined(HAVE_3DNOW)
3415 //FIXME check if this is faster on an 3dnow chip or if it is faster without the prefetch or ... 3415 //FIXME check if this is faster on an 3dnow chip or if it is faster without the prefetch or ...
3540 "prefetcht0 32(%%"REG_d", %2) \n\t" 3540 "prefetcht0 32(%%"REG_d", %2) \n\t"
3541 "add %1, %%"REG_a" \n\t" 3541 "add %1, %%"REG_a" \n\t"
3542 "add %3, %%"REG_d" \n\t" 3542 "add %3, %%"REG_d" \n\t"
3543 "prefetchnta 32(%%"REG_a", %0) \n\t" 3543 "prefetchnta 32(%%"REG_a", %0) \n\t"
3544 "prefetcht0 32(%%"REG_d", %2) \n\t" 3544 "prefetcht0 32(%%"REG_d", %2) \n\t"
3545 :: "r" (srcBlock), "r" ((long)srcStride), "r" (dstBlock), "r" ((long)dstStride), 3545 :: "r" (srcBlock), "r" ((x86_reg)srcStride), "r" (dstBlock), "r" ((x86_reg)dstStride),
3546 "g" ((long)x), "g" ((long)copyAhead) 3546 "g" ((x86_reg)x), "g" ((x86_reg)copyAhead)
3547 : "%"REG_a, "%"REG_d 3547 : "%"REG_a, "%"REG_d
3548 ); 3548 );
3549 3549
3550 #elif defined(HAVE_3DNOW) 3550 #elif defined(HAVE_3DNOW)
3551 //FIXME check if this is faster on an 3dnow chip or if it is faster without the prefetch or ... 3551 //FIXME check if this is faster on an 3dnow chip or if it is faster without the prefetch or ...