Mercurial > libpostproc.hg
comparison postprocess_template.c @ 113:bf8f52662dc3 libpostproc
Replace long with x86_reg in postprocess_template.c like in all other
x86 assembler code files, just libpostprocess was forgotten.
| author | reimar |
|---|---|
| date | Sun, 02 Nov 2008 18:59:44 +0000 |
| parents | d4d919ebc31c |
| children | bdd1788fb53b |
comparison
equal
deleted
inserted
replaced
| 112:d4d919ebc31c | 113:bf8f52662dc3 |
|---|---|
| 156 "packssdw %%mm4, %%mm4 \n\t" | 156 "packssdw %%mm4, %%mm4 \n\t" |
| 157 "movd %%mm0, %0 \n\t" | 157 "movd %%mm0, %0 \n\t" |
| 158 "movd %%mm4, %1 \n\t" | 158 "movd %%mm4, %1 \n\t" |
| 159 | 159 |
| 160 : "=r" (numEq), "=r" (dcOk) | 160 : "=r" (numEq), "=r" (dcOk) |
| 161 : "r" (src), "r" ((long)stride), "m" (c->pQPb) | 161 : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb) |
| 162 : "%"REG_a | 162 : "%"REG_a |
| 163 ); | 163 ); |
| 164 | 164 |
| 165 numEq= (-numEq) &0xFF; | 165 numEq= (-numEq) &0xFF; |
| 166 if(numEq > c->ppMode.flatnessThreshold){ | 166 if(numEq > c->ppMode.flatnessThreshold){ |
| 301 PAVGB(%%mm0, %%mm5) // 112246 /16 | 301 PAVGB(%%mm0, %%mm5) // 112246 /16 |
| 302 "movq %%mm5, (%%"REG_a", %1, 4) \n\t" // X | 302 "movq %%mm5, (%%"REG_a", %1, 4) \n\t" // X |
| 303 "sub %1, %0 \n\t" | 303 "sub %1, %0 \n\t" |
| 304 | 304 |
| 305 : | 305 : |
| 306 : "r" (src), "r" ((long)stride), "m" (c->pQPb) | 306 : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb) |
| 307 : "%"REG_a, "%"REG_c | 307 : "%"REG_a, "%"REG_c |
| 308 ); | 308 ); |
| 309 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 309 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 310 const int l1= stride; | 310 const int l1= stride; |
| 311 const int l2= stride + l1; | 311 const int l2= stride + l1; |
| 421 "psubsb %%mm5, %%mm2 \n\t" | 421 "psubsb %%mm5, %%mm2 \n\t" |
| 422 "psubb %%mm6, %%mm2 \n\t" | 422 "psubb %%mm6, %%mm2 \n\t" |
| 423 "movq %%mm2, (%%"REG_c", %1) \n\t" | 423 "movq %%mm2, (%%"REG_c", %1) \n\t" |
| 424 | 424 |
| 425 : | 425 : |
| 426 : "r" (src), "r" ((long)stride) | 426 : "r" (src), "r" ((x86_reg)stride) |
| 427 : "%"REG_a, "%"REG_c | 427 : "%"REG_a, "%"REG_c |
| 428 ); | 428 ); |
| 429 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 429 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 430 const int l1= stride; | 430 const int l1= stride; |
| 431 const int l2= stride + l1; | 431 const int l2= stride + l1; |
| 543 "paddusb %%mm1, %%mm0 \n\t" | 543 "paddusb %%mm1, %%mm0 \n\t" |
| 544 "pxor %%mm2, %%mm0 \n\t" | 544 "pxor %%mm2, %%mm0 \n\t" |
| 545 "movq %%mm0, (%%"REG_c", %1, 2) \n\t" // line 7 | 545 "movq %%mm0, (%%"REG_c", %1, 2) \n\t" // line 7 |
| 546 | 546 |
| 547 : | 547 : |
| 548 : "r" (src), "r" ((long)stride), "m" (co->pQPb) | 548 : "r" (src), "r" ((x86_reg)stride), "m" (co->pQPb) |
| 549 : "%"REG_a, "%"REG_c | 549 : "%"REG_a, "%"REG_c |
| 550 ); | 550 ); |
| 551 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 551 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 552 | 552 |
| 553 const int l1= stride; | 553 const int l1= stride; |
| 808 "pxor %%mm1, %%mm2 \n\t" | 808 "pxor %%mm1, %%mm2 \n\t" |
| 809 "movq %%mm0, (%%"REG_a", %1, 2) \n\t" | 809 "movq %%mm0, (%%"REG_a", %1, 2) \n\t" |
| 810 "movq %%mm2, (%0, %1, 4) \n\t" | 810 "movq %%mm2, (%0, %1, 4) \n\t" |
| 811 | 811 |
| 812 : | 812 : |
| 813 : "r" (src), "r" ((long)stride), "m" (c->pQPb) | 813 : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb) |
| 814 : "%"REG_a, "%"REG_c | 814 : "%"REG_a, "%"REG_c |
| 815 ); | 815 ); |
| 816 | 816 |
| 817 /* | 817 /* |
| 818 { | 818 { |
| 1096 "movq (%0, %1), %%mm0 \n\t" | 1096 "movq (%0, %1), %%mm0 \n\t" |
| 1097 "psubb %%mm4, %%mm0 \n\t" | 1097 "psubb %%mm4, %%mm0 \n\t" |
| 1098 "movq %%mm0, (%0, %1) \n\t" | 1098 "movq %%mm0, (%0, %1) \n\t" |
| 1099 | 1099 |
| 1100 : "+r" (src) | 1100 : "+r" (src) |
| 1101 : "r" ((long)stride), "m" (c->pQPb) | 1101 : "r" ((x86_reg)stride), "m" (c->pQPb) |
| 1102 : "%"REG_a, "%"REG_c | 1102 : "%"REG_a, "%"REG_c |
| 1103 ); | 1103 ); |
| 1104 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1104 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 1105 const int l1= stride; | 1105 const int l1= stride; |
| 1106 const int l2= stride + l1; | 1106 const int l2= stride + l1; |
| 1365 DERING_CORE((%%REGd, %1) ,(%%REGd, %1, 2),%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7) | 1365 DERING_CORE((%%REGd, %1) ,(%%REGd, %1, 2),%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7) |
| 1366 DERING_CORE((%%REGd, %1, 2),(%0, %1, 8) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7) | 1366 DERING_CORE((%%REGd, %1, 2),(%0, %1, 8) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7) |
| 1367 DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7) | 1367 DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7) |
| 1368 | 1368 |
| 1369 "1: \n\t" | 1369 "1: \n\t" |
| 1370 : : "r" (src), "r" ((long)stride), "m" (c->pQPb), "m"(c->pQPb2) | 1370 : : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb), "m"(c->pQPb2) |
| 1371 : "%"REG_a, "%"REG_d, "%"REG_c | 1371 : "%"REG_a, "%"REG_d, "%"REG_c |
| 1372 ); | 1372 ); |
| 1373 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1373 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 1374 int y; | 1374 int y; |
| 1375 int min=255; | 1375 int min=255; |
| 1519 "movq %%mm0, (%%"REG_c") \n\t" | 1519 "movq %%mm0, (%%"REG_c") \n\t" |
| 1520 "movq (%0, %1, 8), %%mm0 \n\t" | 1520 "movq (%0, %1, 8), %%mm0 \n\t" |
| 1521 PAVGB(%%mm0, %%mm1) | 1521 PAVGB(%%mm0, %%mm1) |
| 1522 "movq %%mm1, (%%"REG_c", %1, 2) \n\t" | 1522 "movq %%mm1, (%%"REG_c", %1, 2) \n\t" |
| 1523 | 1523 |
| 1524 : : "r" (src), "r" ((long)stride) | 1524 : : "r" (src), "r" ((x86_reg)stride) |
| 1525 : "%"REG_a, "%"REG_c | 1525 : "%"REG_a, "%"REG_c |
| 1526 ); | 1526 ); |
| 1527 #else | 1527 #else |
| 1528 int a, b, x; | 1528 int a, b, x; |
| 1529 src+= 4*stride; | 1529 src+= 4*stride; |
| 1589 DEINT_CUBIC((%0) , (%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd, %1)) | 1589 DEINT_CUBIC((%0) , (%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd, %1)) |
| 1590 DEINT_CUBIC((%%REGa, %1), (%0, %1, 4) , (%%REGd) , (%%REGd, %1), (%0, %1, 8)) | 1590 DEINT_CUBIC((%%REGa, %1), (%0, %1, 4) , (%%REGd) , (%%REGd, %1), (%0, %1, 8)) |
| 1591 DEINT_CUBIC((%0, %1, 4) , (%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGc)) | 1591 DEINT_CUBIC((%0, %1, 4) , (%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGc)) |
| 1592 DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc) , (%%REGc, %1, 2)) | 1592 DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc) , (%%REGc, %1, 2)) |
| 1593 | 1593 |
| 1594 : : "r" (src), "r" ((long)stride) | 1594 : : "r" (src), "r" ((x86_reg)stride) |
| 1595 : "%"REG_a, "%"REG_d, "%"REG_c | 1595 : "%"REG_a, "%"REG_d, "%"REG_c |
| 1596 ); | 1596 ); |
| 1597 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1597 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 1598 int x; | 1598 int x; |
| 1599 src+= stride*3; | 1599 src+= stride*3; |
| 1660 DEINT_FF((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd) ) | 1660 DEINT_FF((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd) ) |
| 1661 DEINT_FF((%0, %1, 4) , (%%REGd) , (%%REGd, %1), (%%REGd, %1, 2)) | 1661 DEINT_FF((%0, %1, 4) , (%%REGd) , (%%REGd, %1), (%%REGd, %1, 2)) |
| 1662 DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) | 1662 DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) |
| 1663 | 1663 |
| 1664 "movq %%mm0, (%2) \n\t" | 1664 "movq %%mm0, (%2) \n\t" |
| 1665 : : "r" (src), "r" ((long)stride), "r"(tmp) | 1665 : : "r" (src), "r" ((x86_reg)stride), "r"(tmp) |
| 1666 : "%"REG_a, "%"REG_d | 1666 : "%"REG_a, "%"REG_d |
| 1667 ); | 1667 ); |
| 1668 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1668 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 1669 int x; | 1669 int x; |
| 1670 src+= stride*4; | 1670 src+= stride*4; |
| 1750 DEINT_L5(%%mm0, %%mm1, (%%REGd, %1) , (%%REGd, %1, 2), (%0, %1, 8) ) | 1750 DEINT_L5(%%mm0, %%mm1, (%%REGd, %1) , (%%REGd, %1, 2), (%0, %1, 8) ) |
| 1751 DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) | 1751 DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) |
| 1752 | 1752 |
| 1753 "movq %%mm0, (%2) \n\t" | 1753 "movq %%mm0, (%2) \n\t" |
| 1754 "movq %%mm1, (%3) \n\t" | 1754 "movq %%mm1, (%3) \n\t" |
| 1755 : : "r" (src), "r" ((long)stride), "r"(tmp), "r"(tmp2) | 1755 : : "r" (src), "r" ((x86_reg)stride), "r"(tmp), "r"(tmp2) |
| 1756 : "%"REG_a, "%"REG_d | 1756 : "%"REG_a, "%"REG_d |
| 1757 ); | 1757 ); |
| 1758 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1758 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 1759 int x; | 1759 int x; |
| 1760 src+= stride*4; | 1760 src+= stride*4; |
| 1838 PAVGB(%%mm0, %%mm2) // L7+L9 | 1838 PAVGB(%%mm0, %%mm2) // L7+L9 |
| 1839 PAVGB(%%mm1, %%mm2) // 2L8 + L7 + L9 | 1839 PAVGB(%%mm1, %%mm2) // 2L8 + L7 + L9 |
| 1840 "movq %%mm2, (%%"REG_d", %1, 2) \n\t" | 1840 "movq %%mm2, (%%"REG_d", %1, 2) \n\t" |
| 1841 "movq %%mm1, (%2) \n\t" | 1841 "movq %%mm1, (%2) \n\t" |
| 1842 | 1842 |
| 1843 : : "r" (src), "r" ((long)stride), "r" (tmp) | 1843 : : "r" (src), "r" ((x86_reg)stride), "r" (tmp) |
| 1844 : "%"REG_a, "%"REG_d | 1844 : "%"REG_a, "%"REG_d |
| 1845 ); | 1845 ); |
| 1846 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1846 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 1847 int a, b, c, x; | 1847 int a, b, c, x; |
| 1848 src+= 4*stride; | 1848 src+= 4*stride; |
| 1942 "pmaxub %%mm1, %%mm0 \n\t" // | 1942 "pmaxub %%mm1, %%mm0 \n\t" // |
| 1943 "pminub %%mm0, %%mm2 \n\t" | 1943 "pminub %%mm0, %%mm2 \n\t" |
| 1944 "movq %%mm2, (%%"REG_d", %1, 2) \n\t" | 1944 "movq %%mm2, (%%"REG_d", %1, 2) \n\t" |
| 1945 | 1945 |
| 1946 | 1946 |
| 1947 : : "r" (src), "r" ((long)stride) | 1947 : : "r" (src), "r" ((x86_reg)stride) |
| 1948 : "%"REG_a, "%"REG_d | 1948 : "%"REG_a, "%"REG_d |
| 1949 ); | 1949 ); |
| 1950 | 1950 |
| 1951 #else // MMX without MMX2 | 1951 #else // MMX without MMX2 |
| 1952 __asm__ volatile( | 1952 __asm__ volatile( |
| 1984 MEDIAN((%0) , (%%REGa) , (%%REGa, %1)) | 1984 MEDIAN((%0) , (%%REGa) , (%%REGa, %1)) |
| 1985 MEDIAN((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4)) | 1985 MEDIAN((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4)) |
| 1986 MEDIAN((%0, %1, 4) , (%%REGd) , (%%REGd, %1)) | 1986 MEDIAN((%0, %1, 4) , (%%REGd) , (%%REGd, %1)) |
| 1987 MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8)) | 1987 MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8)) |
| 1988 | 1988 |
| 1989 : : "r" (src), "r" ((long)stride) | 1989 : : "r" (src), "r" ((x86_reg)stride) |
| 1990 : "%"REG_a, "%"REG_d | 1990 : "%"REG_a, "%"REG_d |
| 1991 ); | 1991 ); |
| 1992 #endif //HAVE_MMX2 | 1992 #endif //HAVE_MMX2 |
| 1993 #else //HAVE_MMX | 1993 #else //HAVE_MMX |
| 1994 int x, y; | 1994 int x, y; |
| 2091 "movd %%mm1, 100(%3) \n\t" | 2091 "movd %%mm1, 100(%3) \n\t" |
| 2092 "psrlq $32, %%mm1 \n\t" | 2092 "psrlq $32, %%mm1 \n\t" |
| 2093 "movd %%mm1, 116(%3) \n\t" | 2093 "movd %%mm1, 116(%3) \n\t" |
| 2094 | 2094 |
| 2095 | 2095 |
| 2096 :: "r" (src), "r" ((long)srcStride), "r" (dst1), "r" (dst2) | 2096 :: "r" (src), "r" ((x86_reg)srcStride), "r" (dst1), "r" (dst2) |
| 2097 : "%"REG_a | 2097 : "%"REG_a |
| 2098 ); | 2098 ); |
| 2099 } | 2099 } |
| 2100 | 2100 |
| 2101 /** | 2101 /** |
| 2171 "movd %%mm2, 4(%%"REG_d") \n\t" | 2171 "movd %%mm2, 4(%%"REG_d") \n\t" |
| 2172 "movd %%mm1, 4(%%"REG_d", %1) \n\t" | 2172 "movd %%mm1, 4(%%"REG_d", %1) \n\t" |
| 2173 "psrlq $32, %%mm1 \n\t" | 2173 "psrlq $32, %%mm1 \n\t" |
| 2174 "movd %%mm1, 4(%%"REG_d", %1, 2) \n\t" | 2174 "movd %%mm1, 4(%%"REG_d", %1, 2) \n\t" |
| 2175 | 2175 |
| 2176 :: "r" (dst), "r" ((long)dstStride), "r" (src) | 2176 :: "r" (dst), "r" ((x86_reg)dstStride), "r" (src) |
| 2177 : "%"REG_a, "%"REG_d | 2177 : "%"REG_a, "%"REG_d |
| 2178 ); | 2178 ); |
| 2179 } | 2179 } |
| 2180 #endif //HAVE_MMX | 2180 #endif //HAVE_MMX |
| 2181 //static long test=0; | 2181 //static long test=0; |
| 2474 "movq %%mm2, (%0, %%"REG_a", 2) \n\t" // L6 | 2474 "movq %%mm2, (%0, %%"REG_a", 2) \n\t" // L6 |
| 2475 "movq %%mm3, (%0, %%"REG_c") \n\t" // L7 | 2475 "movq %%mm3, (%0, %%"REG_c") \n\t" // L7 |
| 2476 | 2476 |
| 2477 "4: \n\t" | 2477 "4: \n\t" |
| 2478 | 2478 |
| 2479 :: "r" (src), "r" (tempBlurred), "r"((long)stride), "m" (tempBlurredPast) | 2479 :: "r" (src), "r" (tempBlurred), "r"((x86_reg)stride), "m" (tempBlurredPast) |
| 2480 : "%"REG_a, "%"REG_d, "%"REG_c, "memory" | 2480 : "%"REG_a, "%"REG_d, "%"REG_c, "memory" |
| 2481 ); | 2481 ); |
| 2482 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 2482 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 2483 { | 2483 { |
| 2484 int y; | 2484 int y; |
| 2674 "psubb %%mm0, %%mm6 \n\t" | 2674 "psubb %%mm0, %%mm6 \n\t" |
| 2675 "pcmpgtb %%mm7, %%mm6 \n\t" | 2675 "pcmpgtb %%mm7, %%mm6 \n\t" |
| 2676 "movq %%mm6, %0 \n\t" | 2676 "movq %%mm6, %0 \n\t" |
| 2677 | 2677 |
| 2678 : "=m" (eq_mask), "=m" (dc_mask) | 2678 : "=m" (eq_mask), "=m" (dc_mask) |
| 2679 : "r" (src), "r" ((long)step), "m" (c->pQPb), "m"(c->ppMode.flatnessThreshold) | 2679 : "r" (src), "r" ((x86_reg)step), "m" (c->pQPb), "m"(c->ppMode.flatnessThreshold) |
| 2680 : "%"REG_a | 2680 : "%"REG_a |
| 2681 ); | 2681 ); |
| 2682 | 2682 |
| 2683 both_masks = dc_mask & eq_mask; | 2683 both_masks = dc_mask & eq_mask; |
| 2684 | 2684 |
| 2685 if(both_masks){ | 2685 if(both_masks){ |
| 2686 long offset= -8*step; | 2686 x86_reg offset= -8*step; |
| 2687 int64_t *temp_sums= sums; | 2687 int64_t *temp_sums= sums; |
| 2688 | 2688 |
| 2689 __asm__ volatile( | 2689 __asm__ volatile( |
| 2690 "movq %2, %%mm0 \n\t" // QP,..., QP | 2690 "movq %2, %%mm0 \n\t" // QP,..., QP |
| 2691 "pxor %%mm4, %%mm4 \n\t" | 2691 "pxor %%mm4, %%mm4 \n\t" |
| 2818 "movq %%mm1, 152(%3) \n\t" | 2818 "movq %%mm1, 152(%3) \n\t" |
| 2819 | 2819 |
| 2820 "mov %4, %0 \n\t" //FIXME | 2820 "mov %4, %0 \n\t" //FIXME |
| 2821 | 2821 |
| 2822 : "+&r"(src) | 2822 : "+&r"(src) |
| 2823 : "r" ((long)step), "m" (c->pQPb), "r"(sums), "g"(src) | 2823 : "r" ((x86_reg)step), "m" (c->pQPb), "r"(sums), "g"(src) |
| 2824 ); | 2824 ); |
| 2825 | 2825 |
| 2826 src+= step; // src points to begin of the 8x8 Block | 2826 src+= step; // src points to begin of the 8x8 Block |
| 2827 | 2827 |
| 2828 __asm__ volatile( | 2828 __asm__ volatile( |
| 2855 "add $16, %1 \n\t" | 2855 "add $16, %1 \n\t" |
| 2856 "add %2, %0 \n\t" | 2856 "add %2, %0 \n\t" |
| 2857 " js 1b \n\t" | 2857 " js 1b \n\t" |
| 2858 | 2858 |
| 2859 : "+r"(offset), "+r"(temp_sums) | 2859 : "+r"(offset), "+r"(temp_sums) |
| 2860 : "r" ((long)step), "r"(src - offset), "m"(both_masks) | 2860 : "r" ((x86_reg)step), "r"(src - offset), "m"(both_masks) |
| 2861 ); | 2861 ); |
| 2862 }else | 2862 }else |
| 2863 src+= step; // src points to begin of the 8x8 Block | 2863 src+= step; // src points to begin of the 8x8 Block |
| 2864 | 2864 |
| 2865 if(eq_mask != -1LL){ | 2865 if(eq_mask != -1LL){ |
| 3090 "movq (%0, %1), %%mm0 \n\t" | 3090 "movq (%0, %1), %%mm0 \n\t" |
| 3091 "psubb %%mm1, %%mm0 \n\t" | 3091 "psubb %%mm1, %%mm0 \n\t" |
| 3092 "movq %%mm0, (%0, %1) \n\t" | 3092 "movq %%mm0, (%0, %1) \n\t" |
| 3093 | 3093 |
| 3094 : "+r" (temp_src) | 3094 : "+r" (temp_src) |
| 3095 : "r" ((long)step), "m" (c->pQPb), "m"(eq_mask) | 3095 : "r" ((x86_reg)step), "m" (c->pQPb), "m"(eq_mask) |
| 3096 : "%"REG_a, "%"REG_c | 3096 : "%"REG_a, "%"REG_c |
| 3097 ); | 3097 ); |
| 3098 } | 3098 } |
| 3099 /*if(step==16){ | 3099 /*if(step==16){ |
| 3100 STOP_TIMER("step16") | 3100 STOP_TIMER("step16") |
| 3191 | 3191 |
| 3192 : "=&a" (packedOffsetAndScale) | 3192 : "=&a" (packedOffsetAndScale) |
| 3193 : "0" (packedOffsetAndScale), | 3193 : "0" (packedOffsetAndScale), |
| 3194 "r"(src), | 3194 "r"(src), |
| 3195 "r"(dst), | 3195 "r"(dst), |
| 3196 "r" ((long)srcStride), | 3196 "r" ((x86_reg)srcStride), |
| 3197 "r" ((long)dstStride) | 3197 "r" ((x86_reg)dstStride) |
| 3198 : "%"REG_d | 3198 : "%"REG_d |
| 3199 ); | 3199 ); |
| 3200 #else //HAVE_MMX | 3200 #else //HAVE_MMX |
| 3201 for(i=0; i<8; i++) | 3201 for(i=0; i<8; i++) |
| 3202 memcpy( &(dst[dstStride*i]), | 3202 memcpy( &(dst[dstStride*i]), |
| 3224 "lea (%%"REG_d",%3,4), %%"REG_d" \n\t" | 3224 "lea (%%"REG_d",%3,4), %%"REG_d" \n\t" |
| 3225 SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2)) | 3225 SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2)) |
| 3226 | 3226 |
| 3227 : : "r" (src), | 3227 : : "r" (src), |
| 3228 "r" (dst), | 3228 "r" (dst), |
| 3229 "r" ((long)srcStride), | 3229 "r" ((x86_reg)srcStride), |
| 3230 "r" ((long)dstStride) | 3230 "r" ((x86_reg)dstStride) |
| 3231 : "%"REG_a, "%"REG_d | 3231 : "%"REG_a, "%"REG_d |
| 3232 ); | 3232 ); |
| 3233 #else //HAVE_MMX | 3233 #else //HAVE_MMX |
| 3234 for(i=0; i<8; i++) | 3234 for(i=0; i<8; i++) |
| 3235 memcpy( &(dst[dstStride*i]), | 3235 memcpy( &(dst[dstStride*i]), |
| 3249 "add %1, %0 \n\t" | 3249 "add %1, %0 \n\t" |
| 3250 "movq %%mm0, (%0) \n\t" | 3250 "movq %%mm0, (%0) \n\t" |
| 3251 "movq %%mm0, (%0, %1) \n\t" | 3251 "movq %%mm0, (%0, %1) \n\t" |
| 3252 "movq %%mm0, (%0, %1, 2) \n\t" | 3252 "movq %%mm0, (%0, %1, 2) \n\t" |
| 3253 : "+r" (src) | 3253 : "+r" (src) |
| 3254 : "r" ((long)-stride) | 3254 : "r" ((x86_reg)-stride) |
| 3255 ); | 3255 ); |
| 3256 #else | 3256 #else |
| 3257 int i; | 3257 int i; |
| 3258 uint8_t *p=src; | 3258 uint8_t *p=src; |
| 3259 for(i=0; i<3; i++){ | 3259 for(i=0; i<3; i++){ |
| 3404 "prefetcht0 32(%%"REG_d", %2) \n\t" | 3404 "prefetcht0 32(%%"REG_d", %2) \n\t" |
| 3405 "add %1, %%"REG_a" \n\t" | 3405 "add %1, %%"REG_a" \n\t" |
| 3406 "add %3, %%"REG_d" \n\t" | 3406 "add %3, %%"REG_d" \n\t" |
| 3407 "prefetchnta 32(%%"REG_a", %0) \n\t" | 3407 "prefetchnta 32(%%"REG_a", %0) \n\t" |
| 3408 "prefetcht0 32(%%"REG_d", %2) \n\t" | 3408 "prefetcht0 32(%%"REG_d", %2) \n\t" |
| 3409 :: "r" (srcBlock), "r" ((long)srcStride), "r" (dstBlock), "r" ((long)dstStride), | 3409 :: "r" (srcBlock), "r" ((x86_reg)srcStride), "r" (dstBlock), "r" ((x86_reg)dstStride), |
| 3410 "g" ((long)x), "g" ((long)copyAhead) | 3410 "g" ((x86_reg)x), "g" ((x86_reg)copyAhead) |
| 3411 : "%"REG_a, "%"REG_d | 3411 : "%"REG_a, "%"REG_d |
| 3412 ); | 3412 ); |
| 3413 | 3413 |
| 3414 #elif defined(HAVE_3DNOW) | 3414 #elif defined(HAVE_3DNOW) |
| 3415 //FIXME check if this is faster on an 3dnow chip or if it is faster without the prefetch or ... | 3415 //FIXME check if this is faster on an 3dnow chip or if it is faster without the prefetch or ... |
| 3540 "prefetcht0 32(%%"REG_d", %2) \n\t" | 3540 "prefetcht0 32(%%"REG_d", %2) \n\t" |
| 3541 "add %1, %%"REG_a" \n\t" | 3541 "add %1, %%"REG_a" \n\t" |
| 3542 "add %3, %%"REG_d" \n\t" | 3542 "add %3, %%"REG_d" \n\t" |
| 3543 "prefetchnta 32(%%"REG_a", %0) \n\t" | 3543 "prefetchnta 32(%%"REG_a", %0) \n\t" |
| 3544 "prefetcht0 32(%%"REG_d", %2) \n\t" | 3544 "prefetcht0 32(%%"REG_d", %2) \n\t" |
| 3545 :: "r" (srcBlock), "r" ((long)srcStride), "r" (dstBlock), "r" ((long)dstStride), | 3545 :: "r" (srcBlock), "r" ((x86_reg)srcStride), "r" (dstBlock), "r" ((x86_reg)dstStride), |
| 3546 "g" ((long)x), "g" ((long)copyAhead) | 3546 "g" ((x86_reg)x), "g" ((x86_reg)copyAhead) |
| 3547 : "%"REG_a, "%"REG_d | 3547 : "%"REG_a, "%"REG_d |
| 3548 ); | 3548 ); |
| 3549 | 3549 |
| 3550 #elif defined(HAVE_3DNOW) | 3550 #elif defined(HAVE_3DNOW) |
| 3551 //FIXME check if this is faster on an 3dnow chip or if it is faster without the prefetch or ... | 3551 //FIXME check if this is faster on an 3dnow chip or if it is faster without the prefetch or ... |
