Mercurial > libavcodec.hg
comparison libpostproc/postprocess.c @ 106:389391a6d0bf libavcodec
rewrote the horizontal lowpass filter to fix a bug which caused a blocky look
added deinterlace filters (linear interpolate, linear blend, median)
minor cleanups (removed some outcommented stuff)
| author | michael |
|---|---|
| date | Mon, 15 Oct 2001 03:01:08 +0000 |
| parents | a2f94bfb5793 |
| children | bd163e13a0fb |
comparison
equal
deleted
inserted
replaced
| 105:a2f94bfb5793 | 106:389391a6d0bf |
|---|---|
| 15 along with this program; if not, write to the Free Software | 15 along with this program; if not, write to the Free Software |
| 16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
| 17 */ | 17 */ |
| 18 | 18 |
| 19 /* | 19 /* |
| 20 C MMX MMX2 3DNow* | 20 C MMX MMX2 3DNow |
| 21 isVertDC Ec Ec | 21 isVertDC Ec Ec |
| 22 isVertMinMaxOk Ec Ec | 22 isVertMinMaxOk Ec Ec |
| 23 doVertLowPass E e e* | 23 doVertLowPass E e e |
| 24 doVertDefFilter Ec Ec Ec | 24 doVertDefFilter Ec Ec Ec |
| 25 isHorizDC Ec Ec | 25 isHorizDC Ec Ec |
| 26 isHorizMinMaxOk a | 26 isHorizMinMaxOk a |
| 27 doHorizLowPass E a a* | 27 doHorizLowPass E a a |
| 28 doHorizDefFilter E ac ac | 28 doHorizDefFilter E ac ac |
| 29 deRing | 29 deRing |
| 30 Vertical RKAlgo1 E a a* | 30 Vertical RKAlgo1 E a a |
| 31 Vertical X1 a E E* | 31 Vertical X1 a E E |
| 32 Horizontal X1 a E E* | 32 Horizontal X1 a E E |
| 33 LinIpolDeinterlace a E E* | |
| 34 LinBlendDeinterlace a E E* | |
| 35 MedianDeinterlace a E | |
| 33 | 36 |
| 34 | 37 |
| 35 * i dont have a 3dnow CPU -> its untested | 38 * i dont have a 3dnow CPU -> its untested |
| 36 E = Exact implementation | 39 E = Exact implementation |
| 37 e = allmost exact implementation | 40 e = allmost exact implementation |
| 53 make the mainloop more flexible (variable number of blocks at once | 56 make the mainloop more flexible (variable number of blocks at once |
| 54 (the if/else stuff per block is slowing things down) | 57 (the if/else stuff per block is slowing things down) |
| 55 compare the quality & speed of all filters | 58 compare the quality & speed of all filters |
| 56 implement a few simple deinterlacing filters | 59 implement a few simple deinterlacing filters |
| 57 split this huge file | 60 split this huge file |
| 61 fix warnings (unused vars, ...) | |
| 58 ... | 62 ... |
| 59 | 63 |
| 60 Notes: | 64 Notes: |
| 61 | 65 |
| 62 */ | 66 */ |
| 63 | 67 |
| 64 /* | 68 /* |
| 65 Changelog: use the CVS log | 69 Changelog: use the CVS log |
| 70 rewrote the horizontal lowpass filter to fix a bug which caused a blocky look | |
| 71 added deinterlace filters (linear interpolate, linear blend, median) | |
| 72 minor cleanups (removed some outcommented stuff) | |
| 66 0.1.3 | 73 0.1.3 |
| 67 bugfixes: last 3 lines not brightness/contrast corrected | 74 bugfixes: last 3 lines not brightness/contrast corrected |
| 68 brightness statistics messed up with initial black pic | 75 brightness statistics messed up with initial black pic |
| 69 changed initial values of the brightness statistics | 76 changed initial values of the brightness statistics |
| 70 C++ -> C conversation | 77 C++ -> C conversation |
| 192 //FIXME? |255-0| = 1 (shouldnt be a problem ...) | 199 //FIXME? |255-0| = 1 (shouldnt be a problem ...) |
| 193 /** | 200 /** |
| 194 * Check if the middle 8x8 Block in the given 8x10 block is flat | 201 * Check if the middle 8x8 Block in the given 8x10 block is flat |
| 195 */ | 202 */ |
| 196 static inline int isVertDC(uint8_t src[], int stride){ | 203 static inline int isVertDC(uint8_t src[], int stride){ |
| 197 // return true; | |
| 198 int numEq= 0; | 204 int numEq= 0; |
| 199 int y; | 205 int y; |
| 200 src+= stride; // src points to begin of the 8x8 Block | 206 src+= stride; // src points to begin of the 8x8 Block |
| 201 #ifdef HAVE_MMX | 207 #ifdef HAVE_MMX |
| 202 asm volatile( | 208 asm volatile( |
| 203 // "int $3 \n\t" | |
| 204 "pushl %1\n\t" | 209 "pushl %1\n\t" |
| 205 "movq b7E, %%mm7 \n\t" // mm7 = 0x7F | 210 "movq b7E, %%mm7 \n\t" // mm7 = 0x7F |
| 206 "movq b7C, %%mm6 \n\t" // mm6 = 0x7D | 211 "movq b7C, %%mm6 \n\t" // mm6 = 0x7D |
| 207 "movq (%1), %%mm0 \n\t" | 212 "movq (%1), %%mm0 \n\t" |
| 208 "addl %2, %1 \n\t" | 213 "addl %2, %1 \n\t" |
| 1575 } | 1580 } |
| 1576 #endif | 1581 #endif |
| 1577 } | 1582 } |
| 1578 | 1583 |
| 1579 /** | 1584 /** |
| 1580 * Do a horizontal low pass filter on the 8x8 block | 1585 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block) |
| 1581 * useing the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version) | 1586 * useing the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version) |
| 1582 * useing approximately the 7-Tap Filter (1,2,3,4,3,2,1)/16 (MMX2/3DNOW version) | 1587 * useing the 7-Tap Filter (2,2,2,4,2,2,2)/16 (MMX2/3DNOW version) |
| 1583 */ | 1588 */ |
| 1584 static inline void doHorizLowPassAndCopyBack(uint8_t dst[], int stride, int QP) | 1589 static inline void doHorizLowPassAndCopyBack(uint8_t dst[], int stride, int QP) |
| 1585 { | 1590 { |
| 1586 //return; | 1591 //return; |
| 1587 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1592 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 1633 "punpcklbw %%mm2, %%mm2 \n\t"\ | 1638 "punpcklbw %%mm2, %%mm2 \n\t"\ |
| 1634 "movq %%mm2, (%0) \n\t"\ | 1639 "movq %%mm2, (%0) \n\t"\ |
| 1635 */ | 1640 */ |
| 1636 // approximately a 7-Tap Filter with Vector (1,2,3,4,3,2,1)/16 | 1641 // approximately a 7-Tap Filter with Vector (1,2,3,4,3,2,1)/16 |
| 1637 /* | 1642 /* |
| 1638 31 | |
| 1639 121 | |
| 1640 121 | |
| 1641 121 | |
| 1642 121 | |
| 1643 121 | |
| 1644 121 | |
| 1645 13 | |
| 1646 Implemented Exact 7-Tap | 1643 Implemented Exact 7-Tap |
| 1647 9421 A321 | 1644 9421 A321 |
| 1648 36421 64321 | 1645 36421 64321 |
| 1649 334321 = | 1646 334321 = |
| 1650 1234321 = | 1647 1234321 = |
| 1652 123433 = | 1649 123433 = |
| 1653 12463 12346 | 1650 12463 12346 |
| 1654 1249 123A | 1651 1249 123A |
| 1655 | 1652 |
| 1656 */ | 1653 */ |
| 1654 | |
| 1657 #ifdef HAVE_MMX2 | 1655 #ifdef HAVE_MMX2 |
| 1658 #define HLP3(i) "movq " #i "(%%eax), %%mm0 \n\t"\ | 1656 #define HLP3(i) "movq " #i "(%%eax), %%mm0 \n\t"\ |
| 1659 "movq %%mm0, %%mm1 \n\t"\ | 1657 "movq %%mm0, %%mm1 \n\t"\ |
| 1660 "movq %%mm0, %%mm2 \n\t"\ | 1658 "movq %%mm0, %%mm2 \n\t"\ |
| 1661 "movq %%mm0, %%mm3 \n\t"\ | 1659 "movq %%mm0, %%mm3 \n\t"\ |
| 1678 "movd %%mm0, 4(%0) \n\t" | 1676 "movd %%mm0, 4(%0) \n\t" |
| 1679 #else | 1677 #else |
| 1680 #define HLP3(i) "movq " #i "(%%eax), %%mm0 \n\t"\ | 1678 #define HLP3(i) "movq " #i "(%%eax), %%mm0 \n\t"\ |
| 1681 "movq %%mm0, %%mm1 \n\t"\ | 1679 "movq %%mm0, %%mm1 \n\t"\ |
| 1682 "movq %%mm0, %%mm2 \n\t"\ | 1680 "movq %%mm0, %%mm2 \n\t"\ |
| 1683 "movq %%mm0, %%mm3 \n\t"\ | 1681 "movd -4(%0), %%mm3 \n\t" /*0001000*/\ |
| 1684 "movq %%mm0, %%mm4 \n\t"\ | 1682 "movd 8(%0), %%mm4 \n\t" /*0001000*/\ |
| 1685 "psllq $8, %%mm1 \n\t"\ | 1683 "psllq $8, %%mm1 \n\t"\ |
| 1686 "psrlq $8, %%mm2 \n\t"\ | 1684 "psrlq $8, %%mm2 \n\t"\ |
| 1687 "pand bm00000001, %%mm3 \n\t"\ | 1685 "psrlq $24, %%mm3 \n\t"\ |
| 1688 "pand bm10000000, %%mm4 \n\t"\ | 1686 "psllq $56, %%mm4 \n\t"\ |
| 1689 "por %%mm3, %%mm1 \n\t"\ | 1687 "por %%mm3, %%mm1 \n\t"\ |
| 1690 "por %%mm4, %%mm2 \n\t"\ | 1688 "por %%mm4, %%mm2 \n\t"\ |
| 1691 PAVGB(%%mm2, %%mm1)\ | 1689 PAVGB(%%mm2, %%mm1)\ |
| 1692 PAVGB(%%mm1, %%mm0)\ | 1690 PAVGB(%%mm1, %%mm0)\ |
| 1693 \ | 1691 \ |
| 1706 "movd %%mm0, (%0) \n\t"\ | 1704 "movd %%mm0, (%0) \n\t"\ |
| 1707 "psrlq $32, %%mm0 \n\t"\ | 1705 "psrlq $32, %%mm0 \n\t"\ |
| 1708 "movd %%mm0, 4(%0) \n\t" | 1706 "movd %%mm0, 4(%0) \n\t" |
| 1709 #endif | 1707 #endif |
| 1710 | 1708 |
| 1711 #define HLP(i) HLP3(i) | 1709 /* uses the 7-Tap Filter: 1112111 */ |
| 1710 #define NEW_HLP(i)\ | |
| 1711 "movq " #i "(%%eax), %%mm0 \n\t"\ | |
| 1712 "movq %%mm0, %%mm1 \n\t"\ | |
| 1713 "movq %%mm0, %%mm2 \n\t"\ | |
| 1714 "movd -4(%0), %%mm3 \n\t" /*0001000*/\ | |
| 1715 "movd 8(%0), %%mm4 \n\t" /*0001000*/\ | |
| 1716 "psllq $8, %%mm1 \n\t"\ | |
| 1717 "psrlq $8, %%mm2 \n\t"\ | |
| 1718 "psrlq $24, %%mm3 \n\t"\ | |
| 1719 "psllq $56, %%mm4 \n\t"\ | |
| 1720 "por %%mm3, %%mm1 \n\t"\ | |
| 1721 "por %%mm4, %%mm2 \n\t"\ | |
| 1722 "movq %%mm1, %%mm5 \n\t"\ | |
| 1723 PAVGB(%%mm2, %%mm1)\ | |
| 1724 PAVGB(%%mm1, %%mm0)\ | |
| 1725 "psllq $8, %%mm5 \n\t"\ | |
| 1726 "psrlq $8, %%mm2 \n\t"\ | |
| 1727 "por %%mm3, %%mm5 \n\t"\ | |
| 1728 "por %%mm4, %%mm2 \n\t"\ | |
| 1729 "movq %%mm5, %%mm1 \n\t"\ | |
| 1730 PAVGB(%%mm2, %%mm5)\ | |
| 1731 "psllq $8, %%mm1 \n\t"\ | |
| 1732 "psrlq $8, %%mm2 \n\t"\ | |
| 1733 "por %%mm3, %%mm1 \n\t"\ | |
| 1734 "por %%mm4, %%mm2 \n\t"\ | |
| 1735 PAVGB(%%mm2, %%mm1)\ | |
| 1736 PAVGB(%%mm1, %%mm5)\ | |
| 1737 PAVGB(%%mm5, %%mm0)\ | |
| 1738 "movd %%mm0, (%0) \n\t"\ | |
| 1739 "psrlq $32, %%mm0 \n\t"\ | |
| 1740 "movd %%mm0, 4(%0) \n\t" | |
| 1741 | |
| 1742 /* uses the 9-Tap Filter: 112242211 */ | |
| 1743 #define NEW_HLP2(i)\ | |
| 1744 "movq " #i "(%%eax), %%mm0 \n\t" /*0001000*/\ | |
| 1745 "movq %%mm0, %%mm1 \n\t" /*0001000*/\ | |
| 1746 "movq %%mm0, %%mm2 \n\t" /*0001000*/\ | |
| 1747 "movd -4(%0), %%mm3 \n\t" /*0001000*/\ | |
| 1748 "movd 8(%0), %%mm4 \n\t" /*0001000*/\ | |
| 1749 "psllq $8, %%mm1 \n\t"\ | |
| 1750 "psrlq $8, %%mm2 \n\t"\ | |
| 1751 "psrlq $24, %%mm3 \n\t"\ | |
| 1752 "psllq $56, %%mm4 \n\t"\ | |
| 1753 "por %%mm3, %%mm1 \n\t" /*0010000*/\ | |
| 1754 "por %%mm4, %%mm2 \n\t" /*0000100*/\ | |
| 1755 "movq %%mm1, %%mm5 \n\t" /*0010000*/\ | |
| 1756 PAVGB(%%mm2, %%mm1) /*0010100*/\ | |
| 1757 PAVGB(%%mm1, %%mm0) /*0012100*/\ | |
| 1758 "psllq $8, %%mm5 \n\t"\ | |
| 1759 "psrlq $8, %%mm2 \n\t"\ | |
| 1760 "por %%mm3, %%mm5 \n\t" /*0100000*/\ | |
| 1761 "por %%mm4, %%mm2 \n\t" /*0000010*/\ | |
| 1762 "movq %%mm5, %%mm1 \n\t" /*0100000*/\ | |
| 1763 PAVGB(%%mm2, %%mm5) /*0100010*/\ | |
| 1764 "psllq $8, %%mm1 \n\t"\ | |
| 1765 "psrlq $8, %%mm2 \n\t"\ | |
| 1766 "por %%mm3, %%mm1 \n\t" /*1000000*/\ | |
| 1767 "por %%mm4, %%mm2 \n\t" /*0000001*/\ | |
| 1768 "movq %%mm1, %%mm6 \n\t" /*1000000*/\ | |
| 1769 PAVGB(%%mm2, %%mm1) /*1000001*/\ | |
| 1770 "psllq $8, %%mm6 \n\t"\ | |
| 1771 "psrlq $8, %%mm2 \n\t"\ | |
| 1772 "por %%mm3, %%mm6 \n\t"/*100000000*/\ | |
| 1773 "por %%mm4, %%mm2 \n\t"/*000000001*/\ | |
| 1774 PAVGB(%%mm2, %%mm6) /*100000001*/\ | |
| 1775 PAVGB(%%mm6, %%mm1) /*110000011*/\ | |
| 1776 PAVGB(%%mm1, %%mm5) /*112000211*/\ | |
| 1777 PAVGB(%%mm5, %%mm0) /*112242211*/\ | |
| 1778 "movd %%mm0, (%0) \n\t"\ | |
| 1779 "psrlq $32, %%mm0 \n\t"\ | |
| 1780 "movd %%mm0, 4(%0) \n\t" | |
| 1781 | |
| 1782 #define HLP(i) NEW_HLP(i) | |
| 1712 | 1783 |
| 1713 HLP(0) | 1784 HLP(0) |
| 1714 "addl %1, %0 \n\t" | 1785 "addl %1, %0 \n\t" |
| 1715 HLP(8) | 1786 HLP(8) |
| 1716 "addl %1, %0 \n\t" | 1787 "addl %1, %0 \n\t" |
| 1826 | 1897 |
| 1827 //FIXME | 1898 //FIXME |
| 1828 #endif | 1899 #endif |
| 1829 } | 1900 } |
| 1830 | 1901 |
| 1902 /** | |
| 1903 * Deinterlaces the given block | |
| 1904 * will be called for every 8x8 block, except the last row, and can read & write into an 8x16 block | |
| 1905 */ | |
| 1906 static inline void deInterlaceInterpolateLinear(uint8_t src[], int stride) | |
| 1907 { | |
| 1908 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | |
| 1909 asm volatile( | |
| 1910 "leal (%0, %1), %%eax \n\t" | |
| 1911 "leal (%%eax, %1, 4), %%ebx \n\t" | |
| 1912 // 0 1 2 3 4 5 6 7 8 9 | |
| 1913 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 | |
| 1914 | |
| 1915 "movq (%0), %%mm0 \n\t" | |
| 1916 "movq (%%eax, %1), %%mm1 \n\t" | |
| 1917 PAVGB(%%mm1, %%mm0)\ | |
| 1918 "movq %%mm0, (%%eax) \n\t" | |
| 1919 "movq (%0, %1, 4), %%mm0 \n\t" | |
| 1920 PAVGB(%%mm0, %%mm1)\ | |
| 1921 "movq %%mm1, (%%eax, %1, 2) \n\t" | |
| 1922 "movq (%%ebx, %1), %%mm1 \n\t" | |
| 1923 PAVGB(%%mm1, %%mm0)\ | |
| 1924 "movq %%mm0, (%%ebx) \n\t" | |
| 1925 "movq (%0, %1, 8), %%mm0 \n\t" | |
| 1926 PAVGB(%%mm0, %%mm1)\ | |
| 1927 "movq %%mm1, (%%ebx, %1, 2) \n\t" | |
| 1928 | |
| 1929 : : "r" (src), "r" (stride) | |
| 1930 : "%eax", "%ebx" | |
| 1931 ); | |
| 1932 #else | |
| 1933 int x; | |
| 1934 for(x=0; x<8; x++) | |
| 1935 { | |
| 1936 src[stride] = (src[0] + src[stride*2])>>1; | |
| 1937 src[stride*3] = (src[stride*2] + src[stride*4])>>1; | |
| 1938 src[stride*5] = (src[stride*4] + src[stride*6])>>1; | |
| 1939 src[stride*7] = (src[stride*6] + src[stride*8])>>1; | |
| 1940 src++; | |
| 1941 } | |
| 1942 #endif | |
| 1943 } | |
| 1944 | |
| 1945 /** | |
| 1946 * Deinterlaces the given block | |
| 1947 * will be called for every 8x8 block, in the last row, and can read & write into an 8x8 block | |
| 1948 */ | |
| 1949 static inline void deInterlaceInterpolateLinearLastRow(uint8_t src[], int stride) | |
| 1950 { | |
| 1951 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | |
| 1952 asm volatile( | |
| 1953 "leal (%0, %1), %%eax \n\t" | |
| 1954 "leal (%%eax, %1, 4), %%ebx \n\t" | |
| 1955 // 0 1 2 3 4 5 6 7 8 9 | |
| 1956 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 | |
| 1957 | |
| 1958 "movq (%0), %%mm0 \n\t" | |
| 1959 "movq (%%eax, %1), %%mm1 \n\t" | |
| 1960 PAVGB(%%mm1, %%mm0)\ | |
| 1961 "movq %%mm0, (%%eax) \n\t" | |
| 1962 "movq (%0, %1, 4), %%mm0 \n\t" | |
| 1963 PAVGB(%%mm0, %%mm1)\ | |
| 1964 "movq %%mm1, (%%eax, %1, 2) \n\t" | |
| 1965 "movq (%%ebx, %1), %%mm1 \n\t" | |
| 1966 PAVGB(%%mm1, %%mm0)\ | |
| 1967 "movq %%mm0, (%%ebx) \n\t" | |
| 1968 "movq %%mm1, (%%ebx, %1, 2) \n\t" | |
| 1969 | |
| 1970 | |
| 1971 : : "r" (src), "r" (stride) | |
| 1972 : "%eax", "%ebx" | |
| 1973 ); | |
| 1974 #else | |
| 1975 int x; | |
| 1976 for(x=0; x<8; x++) | |
| 1977 { | |
| 1978 src[stride] = (src[0] + src[stride*2])>>1; | |
| 1979 src[stride*3] = (src[stride*2] + src[stride*4])>>1; | |
| 1980 src[stride*5] = (src[stride*4] + src[stride*6])>>1; | |
| 1981 src[stride*7] = src[stride*6]; | |
| 1982 src++; | |
| 1983 } | |
| 1984 #endif | |
| 1985 } | |
| 1986 | |
| 1987 /** | |
| 1988 * Deinterlaces the given block | |
| 1989 * will be called for every 8x8 block, except the last row, and can read & write into an 8x16 block | |
| 1990 * will shift the image up by 1 line (FIXME if this is a problem) | |
| 1991 */ | |
| 1992 static inline void deInterlaceBlendLinear(uint8_t src[], int stride) | |
| 1993 { | |
| 1994 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | |
| 1995 asm volatile( | |
| 1996 "leal (%0, %1), %%eax \n\t" | |
| 1997 "leal (%%eax, %1, 4), %%ebx \n\t" | |
| 1998 // 0 1 2 3 4 5 6 7 8 9 | |
| 1999 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 | |
| 2000 | |
| 2001 "movq (%0), %%mm0 \n\t" // L0 | |
| 2002 "movq (%%eax, %1), %%mm1 \n\t" // L2 | |
| 2003 PAVGB(%%mm1, %%mm0) // L0+L2 | |
| 2004 "movq (%%eax), %%mm2 \n\t" // L1 | |
| 2005 PAVGB(%%mm2, %%mm0) | |
| 2006 "movq %%mm0, (%0) \n\t" | |
| 2007 "movq (%%eax, %1, 2), %%mm0 \n\t" // L3 | |
| 2008 PAVGB(%%mm0, %%mm2) // L1+L3 | |
| 2009 PAVGB(%%mm1, %%mm2) // 2L2 + L1 + L3 | |
| 2010 "movq %%mm2, (%%eax) \n\t" | |
| 2011 "movq (%0, %1, 4), %%mm2 \n\t" // L4 | |
| 2012 PAVGB(%%mm2, %%mm1) // L2+L4 | |
| 2013 PAVGB(%%mm0, %%mm1) // 2L3 + L2 + L4 | |
| 2014 "movq %%mm1, (%%eax, %1) \n\t" | |
| 2015 "movq (%%ebx), %%mm1 \n\t" // L5 | |
| 2016 PAVGB(%%mm1, %%mm0) // L3+L5 | |
| 2017 PAVGB(%%mm2, %%mm0) // 2L4 + L3 + L5 | |
| 2018 "movq %%mm0, (%%eax, %1, 2) \n\t" | |
| 2019 "movq (%%ebx, %1), %%mm0 \n\t" // L6 | |
| 2020 PAVGB(%%mm0, %%mm2) // L4+L6 | |
| 2021 PAVGB(%%mm1, %%mm2) // 2L5 + L4 + L6 | |
| 2022 "movq %%mm2, (%0, %1, 4) \n\t" | |
| 2023 "movq (%%ebx, %1, 2), %%mm2 \n\t" // L7 | |
| 2024 PAVGB(%%mm2, %%mm1) // L5+L7 | |
| 2025 PAVGB(%%mm0, %%mm1) // 2L6 + L5 + L7 | |
| 2026 "movq %%mm1, (%%ebx) \n\t" | |
| 2027 "movq (%0, %1, 8), %%mm1 \n\t" // L8 | |
| 2028 PAVGB(%%mm1, %%mm0) // L6+L8 | |
| 2029 PAVGB(%%mm2, %%mm0) // 2L7 + L6 + L8 | |
| 2030 "movq %%mm0, (%%ebx, %1) \n\t" | |
| 2031 "movq (%%ebx, %1, 4), %%mm0 \n\t" // L9 | |
| 2032 PAVGB(%%mm0, %%mm2) // L7+L9 | |
| 2033 PAVGB(%%mm1, %%mm2) // 2L8 + L7 + L9 | |
| 2034 "movq %%mm2, (%%ebx, %1, 2) \n\t" | |
| 2035 | |
| 2036 | |
| 2037 : : "r" (src), "r" (stride) | |
| 2038 : "%eax", "%ebx" | |
| 2039 ); | |
| 2040 #else | |
| 2041 int x; | |
| 2042 for(x=0; x<8; x++) | |
| 2043 { | |
| 2044 src[0 ] = (src[0 ] + 2*src[stride ] + src[stride*2])>>2; | |
| 2045 src[stride ] = (src[stride ] + 2*src[stride*2] + src[stride*3])>>2; | |
| 2046 src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2; | |
| 2047 src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2; | |
| 2048 src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2; | |
| 2049 src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2; | |
| 2050 src[stride*6] = (src[stride*6] + 2*src[stride*7] + src[stride*8])>>2; | |
| 2051 src[stride*7] = (src[stride*7] + 2*src[stride*8] + src[stride*9])>>2; | |
| 2052 src++; | |
| 2053 } | |
| 2054 #endif | |
| 2055 } | |
| 2056 | |
| 2057 /** | |
| 2058 * Deinterlaces the given block | |
| 2059 * will be called for every 8x8 block, in the last row, and can read & write into an 8x8 block | |
| 2060 * will shift the image up by 1 line (FIXME if this is a problem) | |
| 2061 */ | |
| 2062 static inline void deInterlaceBlendLinearLastRow(uint8_t src[], int stride) | |
| 2063 { | |
| 2064 #if defined (HAVE_MMSX2) || defined (HAVE_3DNOW) | |
| 2065 asm volatile( | |
| 2066 "leal (%0, %1), %%eax \n\t" | |
| 2067 "leal (%%eax, %1, 4), %%ebx \n\t" | |
| 2068 // 0 1 2 3 4 5 6 7 8 9 | |
| 2069 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 | |
| 2070 | |
| 2071 "movq (%0), %%mm0 \n\t" // L0 | |
| 2072 "movq (%%eax, %1), %%mm1 \n\t" // L2 | |
| 2073 PAVGB(%%mm1, %%mm0) // L0+L2 | |
| 2074 "movq (%%eax), %%mm2 \n\t" // L1 | |
| 2075 PAVGB(%%mm2, %%mm0) | |
| 2076 "movq %%mm0, (%0) \n\t" | |
| 2077 "movq (%%eax, %1, 2), %%mm0 \n\t" // L3 | |
| 2078 PAVGB(%%mm0, %%mm2) // L1+L3 | |
| 2079 PAVGB(%%mm1, %%mm2) // 2L2 + L1 + L3 | |
| 2080 "movq %%mm2, (%%eax) \n\t" | |
| 2081 "movq (%0, %1, 4), %%mm2 \n\t" // L4 | |
| 2082 PAVGB(%%mm2, %%mm1) // L2+L4 | |
| 2083 PAVGB(%%mm0, %%mm1) // 2L3 + L2 + L4 | |
| 2084 "movq %%mm1, (%%eax, %1) \n\t" | |
| 2085 "movq (%%ebx), %%mm1 \n\t" // L5 | |
| 2086 PAVGB(%%mm1, %%mm0) // L3+L5 | |
| 2087 PAVGB(%%mm2, %%mm0) // 2L4 + L3 + L5 | |
| 2088 "movq %%mm0, (%%eax, %1, 2) \n\t" | |
| 2089 "movq (%%ebx, %1), %%mm0 \n\t" // L6 | |
| 2090 PAVGB(%%mm0, %%mm2) // L4+L6 | |
| 2091 PAVGB(%%mm1, %%mm2) // 2L5 + L4 + L6 | |
| 2092 "movq %%mm2, (%0, %1, 4) \n\t" | |
| 2093 "movq (%%ebx, %1, 2), %%mm2 \n\t" // L7 | |
| 2094 PAVGB(%%mm2, %%mm1) // L5+L7 | |
| 2095 PAVGB(%%mm0, %%mm1) // 2L6 + L5 + L7 | |
| 2096 "movq %%mm1, (%%ebx) \n\t" | |
| 2097 PAVGB(%%mm2, %%mm0) // L7 + L8 | |
| 2098 "movq %%mm0, (%%ebx, %1) \n\t" | |
| 2099 "movq %%mm0, (%%ebx, %1, 2) \n\t" | |
| 2100 | |
| 2101 : : "r" (src), "r" (stride) | |
| 2102 : "%eax", "%ebx" | |
| 2103 ); | |
| 2104 #else | |
| 2105 int x; | |
| 2106 for(x=0; x<8; x++) | |
| 2107 { | |
| 2108 src[0 ] = (src[0 ] + 2*src[stride ] + src[stride*2])>>2; | |
| 2109 src[stride ] = (src[stride ] + 2*src[stride*2] + src[stride*3])>>2; | |
| 2110 src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2; | |
| 2111 src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2; | |
| 2112 src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2; | |
| 2113 src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2; | |
| 2114 src[stride*6] = (src[stride*6] + src[stride*7])>>1; | |
| 2115 src[stride*7] = src[stride*6]; | |
| 2116 src++; | |
| 2117 } | |
| 2118 #endif | |
| 2119 } | |
| 2120 | |
| 2121 /** | |
| 2122 * Deinterlaces the given block | |
| 2123 * will be called for every 8x8 block, except the last row, and can read & write into an 8x16 block | |
| 2124 */ | |
| 2125 static inline void deInterlaceMedian(uint8_t src[], int stride) | |
| 2126 { | |
| 2127 #if defined (HAVE_MMX2) | |
| 2128 asm volatile( | |
| 2129 "leal (%0, %1), %%eax \n\t" | |
| 2130 "leal (%%eax, %1, 4), %%ebx \n\t" | |
| 2131 // 0 1 2 3 4 5 6 7 8 9 | |
| 2132 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 | |
| 2133 | |
| 2134 "movq (%0), %%mm0 \n\t" // | |
| 2135 "movq (%%eax, %1), %%mm2 \n\t" // | |
| 2136 "movq (%%eax), %%mm1 \n\t" // | |
| 2137 "movq %%mm0, %%mm3 \n\t" | |
| 2138 "pmaxub %%mm1, %%mm0 \n\t" // | |
| 2139 "pminub %%mm3, %%mm1 \n\t" // | |
| 2140 "pmaxub %%mm2, %%mm1 \n\t" // | |
| 2141 "pminub %%mm1, %%mm0 \n\t" | |
| 2142 "movq %%mm0, (%%eax) \n\t" | |
| 2143 | |
| 2144 "movq (%0, %1, 4), %%mm0 \n\t" // | |
| 2145 "movq (%%eax, %1, 2), %%mm1 \n\t" // | |
| 2146 "movq %%mm2, %%mm3 \n\t" | |
| 2147 "pmaxub %%mm1, %%mm2 \n\t" // | |
| 2148 "pminub %%mm3, %%mm1 \n\t" // | |
| 2149 "pmaxub %%mm0, %%mm1 \n\t" // | |
| 2150 "pminub %%mm1, %%mm2 \n\t" | |
| 2151 "movq %%mm2, (%%eax, %1, 2) \n\t" | |
| 2152 | |
| 2153 "movq (%%ebx), %%mm2 \n\t" // | |
| 2154 "movq (%%ebx, %1), %%mm1 \n\t" // | |
| 2155 "movq %%mm2, %%mm3 \n\t" | |
| 2156 "pmaxub %%mm0, %%mm2 \n\t" // | |
| 2157 "pminub %%mm3, %%mm0 \n\t" // | |
| 2158 "pmaxub %%mm1, %%mm0 \n\t" // | |
| 2159 "pminub %%mm0, %%mm2 \n\t" | |
| 2160 "movq %%mm2, (%%ebx) \n\t" | |
| 2161 | |
| 2162 "movq (%%ebx, %1, 2), %%mm2 \n\t" // | |
| 2163 "movq (%0, %1, 8), %%mm0 \n\t" // | |
| 2164 "movq %%mm2, %%mm3 \n\t" | |
| 2165 "pmaxub %%mm0, %%mm2 \n\t" // | |
| 2166 "pminub %%mm3, %%mm0 \n\t" // | |
| 2167 "pmaxub %%mm1, %%mm0 \n\t" // | |
| 2168 "pminub %%mm0, %%mm2 \n\t" | |
| 2169 "movq %%mm2, (%%ebx, %1, 2) \n\t" | |
| 2170 | |
| 2171 | |
| 2172 : : "r" (src), "r" (stride) | |
| 2173 : "%eax", "%ebx" | |
| 2174 ); | |
| 2175 #else | |
| 2176 //FIXME | |
| 2177 int x; | |
| 2178 for(x=0; x<8; x++) | |
| 2179 { | |
| 2180 src[0 ] = (src[0 ] + 2*src[stride ] + src[stride*2])>>2; | |
| 2181 src[stride ] = (src[stride ] + 2*src[stride*2] + src[stride*3])>>2; | |
| 2182 src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2; | |
| 2183 src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2; | |
| 2184 src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2; | |
| 2185 src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2; | |
| 2186 src[stride*6] = (src[stride*6] + 2*src[stride*7] + src[stride*8])>>2; | |
| 2187 src[stride*7] = (src[stride*7] + 2*src[stride*8] + src[stride*9])>>2; | |
| 2188 src++; | |
| 2189 } | |
| 2190 #endif | |
| 2191 } | |
| 2192 | |
| 2193 /** | |
| 2194 * Deinterlaces the given block | |
| 2195 * will be called for every 8x8 block, in the last row, and can read & write into an 8x8 block | |
| 2196 * will shift the image up by 1 line (FIXME if this is a problem) | |
| 2197 */ | |
| 2198 static inline void deInterlaceMedianLastRow(uint8_t src[], int stride) | |
| 2199 { | |
| 2200 #if defined (HAVE_MMX2) | |
| 2201 asm volatile( | |
| 2202 "leal (%0, %1), %%eax \n\t" | |
| 2203 "leal (%%eax, %1, 4), %%ebx \n\t" | |
| 2204 // 0 1 2 3 4 5 6 7 8 9 | |
| 2205 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 | |
| 2206 | |
| 2207 "movq (%0), %%mm0 \n\t" // | |
| 2208 "movq (%%eax, %1), %%mm2 \n\t" // | |
| 2209 "movq (%%eax), %%mm1 \n\t" // | |
| 2210 "movq %%mm0, %%mm3 \n\t" | |
| 2211 "pmaxub %%mm1, %%mm0 \n\t" // | |
| 2212 "pminub %%mm3, %%mm1 \n\t" // | |
| 2213 "pmaxub %%mm2, %%mm1 \n\t" // | |
| 2214 "pminub %%mm1, %%mm0 \n\t" | |
| 2215 "movq %%mm0, (%%eax) \n\t" | |
| 2216 | |
| 2217 "movq (%0, %1, 4), %%mm0 \n\t" // | |
| 2218 "movq (%%eax, %1, 2), %%mm1 \n\t" // | |
| 2219 "movq %%mm2, %%mm3 \n\t" | |
| 2220 "pmaxub %%mm1, %%mm2 \n\t" // | |
| 2221 "pminub %%mm3, %%mm1 \n\t" // | |
| 2222 "pmaxub %%mm0, %%mm1 \n\t" // | |
| 2223 "pminub %%mm1, %%mm2 \n\t" | |
| 2224 "movq %%mm2, (%%eax, %1, 2) \n\t" | |
| 2225 | |
| 2226 "movq (%%ebx), %%mm2 \n\t" // | |
| 2227 "movq (%%ebx, %1), %%mm1 \n\t" // | |
| 2228 "movq %%mm2, %%mm3 \n\t" | |
| 2229 "pmaxub %%mm0, %%mm2 \n\t" // | |
| 2230 "pminub %%mm3, %%mm0 \n\t" // | |
| 2231 "pmaxub %%mm1, %%mm0 \n\t" // | |
| 2232 "pminub %%mm0, %%mm2 \n\t" | |
| 2233 "movq %%mm2, (%%ebx) \n\t" | |
| 2234 | |
| 2235 "movq %%mm1, (%%ebx, %1, 2) \n\t" | |
| 2236 | |
| 2237 : : "r" (src), "r" (stride) | |
| 2238 : "%eax", "%ebx" | |
| 2239 ); | |
| 2240 #else | |
| 2241 //FIXME | |
| 2242 int x; | |
| 2243 for(x=0; x<8; x++) | |
| 2244 { | |
| 2245 src[0 ] = (src[0 ] + 2*src[stride ] + src[stride*2])>>2; | |
| 2246 src[stride ] = (src[stride ] + 2*src[stride*2] + src[stride*3])>>2; | |
| 2247 src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2; | |
| 2248 src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2; | |
| 2249 src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2; | |
| 2250 src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2; | |
| 2251 src[stride*6] = (src[stride*6] + src[stride*7])>>1; | |
| 2252 src[stride*7] = src[stride*6]; | |
| 2253 src++; | |
| 2254 } | |
| 2255 #endif | |
| 2256 } | |
| 2257 | |
| 2258 | |
| 1831 #ifdef HAVE_ODIVX_POSTPROCESS | 2259 #ifdef HAVE_ODIVX_POSTPROCESS |
| 1832 #include "../opendivx/postprocess.h" | 2260 #include "../opendivx/postprocess.h" |
| 1833 int use_old_pp=0; | 2261 int use_old_pp=0; |
| 1834 #endif | 2262 #endif |
| 1835 | 2263 |
| 1839 /** | 2267 /** |
| 1840 * ... | 2268 * ... |
| 1841 * the mode value is interpreted as a quality value if its negative, its range is then (-1 ... -63) | 2269 * the mode value is interpreted as a quality value if its negative, its range is then (-1 ... -63) |
| 1842 * -63 is best quality -1 is worst | 2270 * -63 is best quality -1 is worst |
| 1843 */ | 2271 */ |
| 1844 //extern "C"{ | |
| 1845 void postprocess(unsigned char * src[], int src_stride, | 2272 void postprocess(unsigned char * src[], int src_stride, |
| 1846 unsigned char * dst[], int dst_stride, | 2273 unsigned char * dst[], int dst_stride, |
| 1847 int horizontal_size, int vertical_size, | 2274 int horizontal_size, int vertical_size, |
| 1848 QP_STORE_T *QP_store, int QP_stride, | 2275 QP_STORE_T *QP_store, int QP_stride, |
| 1849 int mode) | 2276 int mode) |
| 2194 if(!isColor) yHistogram[ srcBlock[0] ]++; | 2621 if(!isColor) yHistogram[ srcBlock[0] ]++; |
| 2195 | 2622 |
| 2196 blockCopy(vertBlock + dstStride*2, dstStride, | 2623 blockCopy(vertBlock + dstStride*2, dstStride, |
| 2197 vertSrcBlock + srcStride*2, srcStride, 8, mode & LEVEL_FIX); | 2624 vertSrcBlock + srcStride*2, srcStride, 8, mode & LEVEL_FIX); |
| 2198 | 2625 |
| 2626 if(mode & LINEAR_IPOL_DEINT_FILTER) | |
| 2627 deInterlaceInterpolateLinear(dstBlock, dstStride); | |
| 2628 else if(mode & LINEAR_BLEND_DEINT_FILTER) | |
| 2629 deInterlaceBlendLinear(dstBlock, dstStride); | |
| 2630 else if(mode & MEDIAN_DEINT_FILTER) | |
| 2631 deInterlaceMedian(dstBlock, dstStride); | |
| 2632 /* else if(mode & CUBIC_IPOL_DEINT_FILTER) | |
| 2633 deInterlaceInterpolateCubic(dstBlock, dstStride); | |
| 2634 else if(mode & CUBIC_BLEND_DEINT_FILTER) | |
| 2635 deInterlaceBlendCubic(dstBlock, dstStride); | |
| 2636 */ | |
| 2199 | 2637 |
| 2200 #ifdef MORE_TIMEING | 2638 #ifdef MORE_TIMEING |
| 2201 T1= rdtsc(); | 2639 T1= rdtsc(); |
| 2202 memcpyTime+= T1-T0; | 2640 memcpyTime+= T1-T0; |
| 2203 T0=T1; | 2641 T0=T1; |
| 2224 vertTime+= T1-T0; | 2662 vertTime+= T1-T0; |
| 2225 T0=T1; | 2663 T0=T1; |
| 2226 #endif | 2664 #endif |
| 2227 } | 2665 } |
| 2228 else | 2666 else |
| 2667 { | |
| 2229 blockCopy(vertBlock + dstStride*1, dstStride, | 2668 blockCopy(vertBlock + dstStride*1, dstStride, |
| 2230 vertSrcBlock + srcStride*1, srcStride, 4, mode & LEVEL_FIX); | 2669 vertSrcBlock + srcStride*1, srcStride, 4, mode & LEVEL_FIX); |
| 2231 | 2670 |
| 2671 if(mode & LINEAR_IPOL_DEINT_FILTER) | |
| 2672 deInterlaceInterpolateLinearLastRow(dstBlock, dstStride); | |
| 2673 else if(mode & LINEAR_BLEND_DEINT_FILTER) | |
| 2674 deInterlaceBlendLinearLastRow(dstBlock, dstStride); | |
| 2675 else if(mode & MEDIAN_DEINT_FILTER) | |
| 2676 deInterlaceMedianLastRow(dstBlock, dstStride); | |
| 2677 /* else if(mode & CUBIC_IPOL_DEINT_FILTER) | |
| 2678 deInterlaceInterpolateCubicLastRow(dstBlock, dstStride); | |
| 2679 else if(mode & CUBIC_BLEND_DEINT_FILTER) | |
| 2680 deInterlaceBlendCubicLastRow(dstBlock, dstStride); | |
| 2681 */ | |
| 2682 } | |
| 2232 | 2683 |
| 2233 if(x - 8 >= 0 && x<width) | 2684 if(x - 8 >= 0 && x<width) |
| 2234 { | 2685 { |
| 2235 #ifdef MORE_TIMEING | 2686 #ifdef MORE_TIMEING |
| 2236 T0= rdtsc(); | 2687 T0= rdtsc(); |
