Mercurial > libavcodec.hg
comparison libpostproc/postprocess_template.c @ 1157:57fe9c4e0c6e libavcodec
fixing cliping of c deinterlacers
5 tap lowpass deinterlacer
| author | michaelni |
|---|---|
| date | Sat, 29 Mar 2003 13:31:12 +0000 |
| parents | 3644e555a20a |
| children | 71d890b5c13b |
comparison
equal
deleted
inserted
replaced
| 1156:48efa413ac81 | 1157:57fe9c4e0c6e |
|---|---|
| 1579 * Deinterlaces the given block by cubic interpolating every second line. | 1579 * Deinterlaces the given block by cubic interpolating every second line. |
| 1580 * will be called for every 8x8 block and can read & write from line 4-15 | 1580 * will be called for every 8x8 block and can read & write from line 4-15 |
| 1581 * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too | 1581 * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too |
| 1582 * lines 4-12 will be read into the deblocking filter and should be deinterlaced | 1582 * lines 4-12 will be read into the deblocking filter and should be deinterlaced |
| 1583 * this filter will read lines 3-15 and write 7-13 | 1583 * this filter will read lines 3-15 and write 7-13 |
| 1584 * no cliping in C version | |
| 1585 */ | 1584 */ |
| 1586 static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride) | 1585 static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride) |
| 1587 { | 1586 { |
| 1588 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1587 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 1589 src+= stride*3; | 1588 src+= stride*3; |
| 1629 #else | 1628 #else |
| 1630 int x; | 1629 int x; |
| 1631 src+= stride*3; | 1630 src+= stride*3; |
| 1632 for(x=0; x<8; x++) | 1631 for(x=0; x<8; x++) |
| 1633 { | 1632 { |
| 1634 src[stride*3] = (-src[0] + 9*src[stride*2] + 9*src[stride*4] - src[stride*6])>>4; | 1633 src[stride*3] = CLIP((-src[0] + 9*src[stride*2] + 9*src[stride*4] - src[stride*6])>>4); |
| 1635 src[stride*5] = (-src[stride*2] + 9*src[stride*4] + 9*src[stride*6] - src[stride*8])>>4; | 1634 src[stride*5] = CLIP((-src[stride*2] + 9*src[stride*4] + 9*src[stride*6] - src[stride*8])>>4); |
| 1636 src[stride*7] = (-src[stride*4] + 9*src[stride*6] + 9*src[stride*8] - src[stride*10])>>4; | 1635 src[stride*7] = CLIP((-src[stride*4] + 9*src[stride*6] + 9*src[stride*8] - src[stride*10])>>4); |
| 1637 src[stride*9] = (-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4; | 1636 src[stride*9] = CLIP((-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4); |
| 1638 src++; | 1637 src++; |
| 1639 } | 1638 } |
| 1640 #endif | 1639 #endif |
| 1641 } | 1640 } |
| 1642 | 1641 |
| 1644 * Deinterlaces the given block by filtering every second line with a (-1 4 2 4 -1) filter. | 1643 * Deinterlaces the given block by filtering every second line with a (-1 4 2 4 -1) filter. |
| 1645 * will be called for every 8x8 block and can read & write from line 4-15 | 1644 * will be called for every 8x8 block and can read & write from line 4-15 |
| 1646 * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too | 1645 * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too |
| 1647 * lines 4-12 will be read into the deblocking filter and should be deinterlaced | 1646 * lines 4-12 will be read into the deblocking filter and should be deinterlaced |
| 1648 * this filter will read lines 4-13 and write 5-11 | 1647 * this filter will read lines 4-13 and write 5-11 |
| 1649 * no cliping in C version | |
| 1650 */ | 1648 */ |
| 1651 static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp) | 1649 static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp) |
| 1652 { | 1650 { |
| 1653 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1651 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 1654 src+= stride*4; | 1652 src+= stride*4; |
| 1703 for(x=0; x<8; x++) | 1701 for(x=0; x<8; x++) |
| 1704 { | 1702 { |
| 1705 int t1= tmp[x]; | 1703 int t1= tmp[x]; |
| 1706 int t2= src[stride*1]; | 1704 int t2= src[stride*1]; |
| 1707 | 1705 |
| 1708 src[stride*1]= (-t1 + 4*src[stride*0] + 2*t2 + 4*src[stride*2] - src[stride*3] + 4)>>3; | 1706 src[stride*1]= CLIP((-t1 + 4*src[stride*0] + 2*t2 + 4*src[stride*2] - src[stride*3] + 4)>>3); |
| 1709 t1= src[stride*4]; | 1707 t1= src[stride*4]; |
| 1710 src[stride*3]= (-t2 + 4*src[stride*2] + 2*t1 + 4*src[stride*4] - src[stride*5] + 4)>>3; | 1708 src[stride*3]= CLIP((-t2 + 4*src[stride*2] + 2*t1 + 4*src[stride*4] - src[stride*5] + 4)>>3); |
| 1711 t2= src[stride*6]; | 1709 t2= src[stride*6]; |
| 1712 src[stride*5]= (-t1 + 4*src[stride*4] + 2*t2 + 4*src[stride*6] - src[stride*7] + 4)>>3; | 1710 src[stride*5]= CLIP((-t1 + 4*src[stride*4] + 2*t2 + 4*src[stride*6] - src[stride*7] + 4)>>3); |
| 1713 t1= src[stride*8]; | 1711 t1= src[stride*8]; |
| 1714 src[stride*7]= (-t2 + 4*src[stride*6] + 2*t1 + 4*src[stride*8] - src[stride*9] + 4)>>3; | 1712 src[stride*7]= CLIP((-t2 + 4*src[stride*6] + 2*t1 + 4*src[stride*8] - src[stride*9] + 4)>>3); |
| 1715 tmp[x]= t1; | 1713 tmp[x]= t1; |
| 1714 | |
| 1715 src++; | |
| 1716 } | |
| 1717 #endif | |
| 1718 } | |
| 1719 | |
| 1720 /** | |
| 1721 * Deinterlaces the given block by filtering every line with a (-1 2 6 2 -1) filter. | |
| 1722 * will be called for every 8x8 block and can read & write from line 4-15 | |
| 1723 * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too | |
| 1724 * lines 4-12 will be read into the deblocking filter and should be deinterlaced | |
| 1725 * this filter will read lines 4-13 and write 4-11 | |
| 1726 */ | |
| 1727 static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp, uint8_t *tmp2) | |
| 1728 { | |
| 1729 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | |
| 1730 src+= stride*4; | |
| 1731 asm volatile( | |
| 1732 "leal (%0, %1), %%eax \n\t" | |
| 1733 "leal (%%eax, %1, 4), %%edx \n\t" | |
| 1734 "pxor %%mm7, %%mm7 \n\t" | |
| 1735 "movq (%2), %%mm0 \n\t" | |
| 1736 "movq (%3), %%mm1 \n\t" | |
| 1737 // 0 1 2 3 4 5 6 7 8 9 10 | |
| 1738 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 ecx | |
| 1739 | |
| 1740 #define DEINT_L5(t1,t2,a,b,c)\ | |
| 1741 "movq " #a ", %%mm2 \n\t"\ | |
| 1742 "movq " #b ", %%mm3 \n\t"\ | |
| 1743 "movq " #c ", %%mm4 \n\t"\ | |
| 1744 PAVGB(t2, %%mm3) \ | |
| 1745 PAVGB(t1, %%mm4) \ | |
| 1746 "movq %%mm2, %%mm5 \n\t"\ | |
| 1747 "movq %%mm2, " #t1 " \n\t"\ | |
| 1748 "punpcklbw %%mm7, %%mm2 \n\t"\ | |
| 1749 "punpckhbw %%mm7, %%mm5 \n\t"\ | |
| 1750 "movq %%mm2, %%mm6 \n\t"\ | |
| 1751 "paddw %%mm2, %%mm2 \n\t"\ | |
| 1752 "paddw %%mm6, %%mm2 \n\t"\ | |
| 1753 "movq %%mm5, %%mm6 \n\t"\ | |
| 1754 "paddw %%mm5, %%mm5 \n\t"\ | |
| 1755 "paddw %%mm6, %%mm5 \n\t"\ | |
| 1756 "movq %%mm3, %%mm6 \n\t"\ | |
| 1757 "punpcklbw %%mm7, %%mm3 \n\t"\ | |
| 1758 "punpckhbw %%mm7, %%mm6 \n\t"\ | |
| 1759 "paddw %%mm3, %%mm3 \n\t"\ | |
| 1760 "paddw %%mm6, %%mm6 \n\t"\ | |
| 1761 "paddw %%mm3, %%mm2 \n\t"\ | |
| 1762 "paddw %%mm6, %%mm5 \n\t"\ | |
| 1763 "movq %%mm4, %%mm6 \n\t"\ | |
| 1764 "punpcklbw %%mm7, %%mm4 \n\t"\ | |
| 1765 "punpckhbw %%mm7, %%mm6 \n\t"\ | |
| 1766 "psubw %%mm4, %%mm2 \n\t"\ | |
| 1767 "psubw %%mm6, %%mm5 \n\t"\ | |
| 1768 "psraw $2, %%mm2 \n\t"\ | |
| 1769 "psraw $2, %%mm5 \n\t"\ | |
| 1770 "packuswb %%mm5, %%mm2 \n\t"\ | |
| 1771 "movq %%mm2, " #a " \n\t"\ | |
| 1772 | |
| 1773 DEINT_L5(%%mm0, %%mm1, (%0) , (%%eax) , (%%eax, %1) ) | |
| 1774 DEINT_L5(%%mm1, %%mm0, (%%eax) , (%%eax, %1) , (%%eax, %1, 2)) | |
| 1775 DEINT_L5(%%mm0, %%mm1, (%%eax, %1) , (%%eax, %1, 2), (%0, %1, 4) ) | |
| 1776 DEINT_L5(%%mm1, %%mm0, (%%eax, %1, 2), (%0, %1, 4) , (%%edx) ) | |
| 1777 DEINT_L5(%%mm0, %%mm1, (%0, %1, 4) , (%%edx) , (%%edx, %1) ) | |
| 1778 DEINT_L5(%%mm1, %%mm0, (%%edx) , (%%edx, %1) , (%%edx, %1, 2)) | |
| 1779 DEINT_L5(%%mm0, %%mm1, (%%edx, %1) , (%%edx, %1, 2), (%0, %1, 8) ) | |
| 1780 DEINT_L5(%%mm1, %%mm0, (%%edx, %1, 2), (%0, %1, 8) , (%%edx, %1, 4)) | |
| 1781 | |
| 1782 "movq %%mm0, (%2) \n\t" | |
| 1783 "movq %%mm1, (%3) \n\t" | |
| 1784 : : "r" (src), "r" (stride), "r"(tmp), "r"(tmp2) | |
| 1785 : "%eax", "%edx" | |
| 1786 ); | |
| 1787 #else | |
| 1788 int x; | |
| 1789 src+= stride*4; | |
| 1790 for(x=0; x<8; x++) | |
| 1791 { | |
| 1792 int t1= tmp[x]; | |
| 1793 int t2= tmp2[x]; | |
| 1794 int t3= src[0]; | |
| 1795 | |
| 1796 src[stride*0]= CLIP((-(t1 + src[stride*2]) + 2*(t2 + src[stride*1]) + 6*t3 + 4)>>3); | |
| 1797 t1= src[stride*1]; | |
| 1798 src[stride*1]= CLIP((-(t2 + src[stride*3]) + 2*(t3 + src[stride*2]) + 6*t1 + 4)>>3); | |
| 1799 t2= src[stride*2]; | |
| 1800 src[stride*2]= CLIP((-(t3 + src[stride*4]) + 2*(t1 + src[stride*3]) + 6*t2 + 4)>>3); | |
| 1801 t3= src[stride*3]; | |
| 1802 src[stride*3]= CLIP((-(t1 + src[stride*5]) + 2*(t2 + src[stride*4]) + 6*t3 + 4)>>3); | |
| 1803 t1= src[stride*4]; | |
| 1804 src[stride*4]= CLIP((-(t2 + src[stride*6]) + 2*(t3 + src[stride*5]) + 6*t1 + 4)>>3); | |
| 1805 t2= src[stride*5]; | |
| 1806 src[stride*5]= CLIP((-(t3 + src[stride*7]) + 2*(t1 + src[stride*6]) + 6*t2 + 4)>>3); | |
| 1807 t3= src[stride*6]; | |
| 1808 src[stride*6]= CLIP((-(t1 + src[stride*8]) + 2*(t2 + src[stride*7]) + 6*t3 + 4)>>3); | |
| 1809 t1= src[stride*7]; | |
| 1810 src[stride*7]= CLIP((-(t2 + src[stride*9]) + 2*(t3 + src[stride*8]) + 6*t1 + 4)>>3); | |
| 1811 | |
| 1812 tmp[x]= t3; | |
| 1813 tmp2[x]= t1; | |
| 1716 | 1814 |
| 1717 src++; | 1815 src++; |
| 1718 } | 1816 } |
| 1719 #endif | 1817 #endif |
| 1720 } | 1818 } |
| 2694 } | 2792 } |
| 2695 #endif | 2793 #endif |
| 2696 | 2794 |
| 2697 if(mode & CUBIC_IPOL_DEINT_FILTER) copyAhead=16; | 2795 if(mode & CUBIC_IPOL_DEINT_FILTER) copyAhead=16; |
| 2698 else if( (mode & LINEAR_BLEND_DEINT_FILTER) | 2796 else if( (mode & LINEAR_BLEND_DEINT_FILTER) |
| 2699 || (mode & FFMPEG_DEINT_FILTER)) copyAhead=14; | 2797 || (mode & FFMPEG_DEINT_FILTER) |
| 2798 || (mode & LOWPASS5_DEINT_FILTER)) copyAhead=14; | |
| 2700 else if( (mode & V_DEBLOCK) | 2799 else if( (mode & V_DEBLOCK) |
| 2701 || (mode & LINEAR_IPOL_DEINT_FILTER) | 2800 || (mode & LINEAR_IPOL_DEINT_FILTER) |
| 2702 || (mode & MEDIAN_DEINT_FILTER)) copyAhead=13; | 2801 || (mode & MEDIAN_DEINT_FILTER)) copyAhead=13; |
| 2703 else if(mode & V_X1_FILTER) copyAhead=11; | 2802 else if(mode & V_X1_FILTER) copyAhead=11; |
| 2704 // else if(mode & V_RK1_FILTER) copyAhead=10; | 2803 // else if(mode & V_RK1_FILTER) copyAhead=10; |
| 2830 RENAME(deInterlaceMedian)(dstBlock, dstStride); | 2929 RENAME(deInterlaceMedian)(dstBlock, dstStride); |
| 2831 else if(mode & CUBIC_IPOL_DEINT_FILTER) | 2930 else if(mode & CUBIC_IPOL_DEINT_FILTER) |
| 2832 RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride); | 2931 RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride); |
| 2833 else if(mode & FFMPEG_DEINT_FILTER) | 2932 else if(mode & FFMPEG_DEINT_FILTER) |
| 2834 RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x); | 2933 RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x); |
| 2934 else if(mode & LOWPASS5_DEINT_FILTER) | |
| 2935 RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x); | |
| 2835 /* else if(mode & CUBIC_BLEND_DEINT_FILTER) | 2936 /* else if(mode & CUBIC_BLEND_DEINT_FILTER) |
| 2836 RENAME(deInterlaceBlendCubic)(dstBlock, dstStride); | 2937 RENAME(deInterlaceBlendCubic)(dstBlock, dstStride); |
| 2837 */ | 2938 */ |
| 2838 dstBlock+=8; | 2939 dstBlock+=8; |
| 2839 srcBlock+=8; | 2940 srcBlock+=8; |
| 2972 RENAME(deInterlaceMedian)(dstBlock, dstStride); | 3073 RENAME(deInterlaceMedian)(dstBlock, dstStride); |
| 2973 else if(mode & CUBIC_IPOL_DEINT_FILTER) | 3074 else if(mode & CUBIC_IPOL_DEINT_FILTER) |
| 2974 RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride); | 3075 RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride); |
| 2975 else if(mode & FFMPEG_DEINT_FILTER) | 3076 else if(mode & FFMPEG_DEINT_FILTER) |
| 2976 RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x); | 3077 RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x); |
| 3078 else if(mode & LOWPASS5_DEINT_FILTER) | |
| 3079 RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x); | |
| 2977 /* else if(mode & CUBIC_BLEND_DEINT_FILTER) | 3080 /* else if(mode & CUBIC_BLEND_DEINT_FILTER) |
| 2978 RENAME(deInterlaceBlendCubic)(dstBlock, dstStride); | 3081 RENAME(deInterlaceBlendCubic)(dstBlock, dstStride); |
| 2979 */ | 3082 */ |
| 2980 | 3083 |
| 2981 /* only deblock if we have 2 blocks */ | 3084 /* only deblock if we have 2 blocks */ |
