comparison libpostproc/postprocess_template.c @ 1157:57fe9c4e0c6e libavcodec

fixing cliping of c deinterlacers 5 tap lowpass deinterlacer
author michaelni
date Sat, 29 Mar 2003 13:31:12 +0000
parents 3644e555a20a
children 71d890b5c13b
comparison
equal deleted inserted replaced
1156:48efa413ac81 1157:57fe9c4e0c6e
1579 * Deinterlaces the given block by cubic interpolating every second line. 1579 * Deinterlaces the given block by cubic interpolating every second line.
1580 * will be called for every 8x8 block and can read & write from line 4-15 1580 * will be called for every 8x8 block and can read & write from line 4-15
1581 * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too 1581 * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too
1582 * lines 4-12 will be read into the deblocking filter and should be deinterlaced 1582 * lines 4-12 will be read into the deblocking filter and should be deinterlaced
1583 * this filter will read lines 3-15 and write 7-13 1583 * this filter will read lines 3-15 and write 7-13
1584 * no cliping in C version
1585 */ 1584 */
1586 static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride) 1585 static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride)
1587 { 1586 {
1588 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) 1587 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1589 src+= stride*3; 1588 src+= stride*3;
1629 #else 1628 #else
1630 int x; 1629 int x;
1631 src+= stride*3; 1630 src+= stride*3;
1632 for(x=0; x<8; x++) 1631 for(x=0; x<8; x++)
1633 { 1632 {
1634 src[stride*3] = (-src[0] + 9*src[stride*2] + 9*src[stride*4] - src[stride*6])>>4; 1633 src[stride*3] = CLIP((-src[0] + 9*src[stride*2] + 9*src[stride*4] - src[stride*6])>>4);
1635 src[stride*5] = (-src[stride*2] + 9*src[stride*4] + 9*src[stride*6] - src[stride*8])>>4; 1634 src[stride*5] = CLIP((-src[stride*2] + 9*src[stride*4] + 9*src[stride*6] - src[stride*8])>>4);
1636 src[stride*7] = (-src[stride*4] + 9*src[stride*6] + 9*src[stride*8] - src[stride*10])>>4; 1635 src[stride*7] = CLIP((-src[stride*4] + 9*src[stride*6] + 9*src[stride*8] - src[stride*10])>>4);
1637 src[stride*9] = (-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4; 1636 src[stride*9] = CLIP((-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4);
1638 src++; 1637 src++;
1639 } 1638 }
1640 #endif 1639 #endif
1641 } 1640 }
1642 1641
1644 * Deinterlaces the given block by filtering every second line with a (-1 4 2 4 -1) filter. 1643 * Deinterlaces the given block by filtering every second line with a (-1 4 2 4 -1) filter.
1645 * will be called for every 8x8 block and can read & write from line 4-15 1644 * will be called for every 8x8 block and can read & write from line 4-15
1646 * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too 1645 * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too
1647 * lines 4-12 will be read into the deblocking filter and should be deinterlaced 1646 * lines 4-12 will be read into the deblocking filter and should be deinterlaced
1648 * this filter will read lines 4-13 and write 5-11 1647 * this filter will read lines 4-13 and write 5-11
1649 * no cliping in C version
1650 */ 1648 */
1651 static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp) 1649 static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp)
1652 { 1650 {
1653 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) 1651 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1654 src+= stride*4; 1652 src+= stride*4;
1703 for(x=0; x<8; x++) 1701 for(x=0; x<8; x++)
1704 { 1702 {
1705 int t1= tmp[x]; 1703 int t1= tmp[x];
1706 int t2= src[stride*1]; 1704 int t2= src[stride*1];
1707 1705
1708 src[stride*1]= (-t1 + 4*src[stride*0] + 2*t2 + 4*src[stride*2] - src[stride*3] + 4)>>3; 1706 src[stride*1]= CLIP((-t1 + 4*src[stride*0] + 2*t2 + 4*src[stride*2] - src[stride*3] + 4)>>3);
1709 t1= src[stride*4]; 1707 t1= src[stride*4];
1710 src[stride*3]= (-t2 + 4*src[stride*2] + 2*t1 + 4*src[stride*4] - src[stride*5] + 4)>>3; 1708 src[stride*3]= CLIP((-t2 + 4*src[stride*2] + 2*t1 + 4*src[stride*4] - src[stride*5] + 4)>>3);
1711 t2= src[stride*6]; 1709 t2= src[stride*6];
1712 src[stride*5]= (-t1 + 4*src[stride*4] + 2*t2 + 4*src[stride*6] - src[stride*7] + 4)>>3; 1710 src[stride*5]= CLIP((-t1 + 4*src[stride*4] + 2*t2 + 4*src[stride*6] - src[stride*7] + 4)>>3);
1713 t1= src[stride*8]; 1711 t1= src[stride*8];
1714 src[stride*7]= (-t2 + 4*src[stride*6] + 2*t1 + 4*src[stride*8] - src[stride*9] + 4)>>3; 1712 src[stride*7]= CLIP((-t2 + 4*src[stride*6] + 2*t1 + 4*src[stride*8] - src[stride*9] + 4)>>3);
1715 tmp[x]= t1; 1713 tmp[x]= t1;
1714
1715 src++;
1716 }
1717 #endif
1718 }
1719
1720 /**
1721 * Deinterlaces the given block by filtering every line with a (-1 2 6 2 -1) filter.
1722 * will be called for every 8x8 block and can read & write from line 4-15
1723 * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too
1724 * lines 4-12 will be read into the deblocking filter and should be deinterlaced
1725 * this filter will read lines 4-13 and write 4-11
1726 */
1727 static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp, uint8_t *tmp2)
1728 {
1729 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1730 src+= stride*4;
1731 asm volatile(
1732 "leal (%0, %1), %%eax \n\t"
1733 "leal (%%eax, %1, 4), %%edx \n\t"
1734 "pxor %%mm7, %%mm7 \n\t"
1735 "movq (%2), %%mm0 \n\t"
1736 "movq (%3), %%mm1 \n\t"
1737 // 0 1 2 3 4 5 6 7 8 9 10
1738 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 ecx
1739
1740 #define DEINT_L5(t1,t2,a,b,c)\
1741 "movq " #a ", %%mm2 \n\t"\
1742 "movq " #b ", %%mm3 \n\t"\
1743 "movq " #c ", %%mm4 \n\t"\
1744 PAVGB(t2, %%mm3) \
1745 PAVGB(t1, %%mm4) \
1746 "movq %%mm2, %%mm5 \n\t"\
1747 "movq %%mm2, " #t1 " \n\t"\
1748 "punpcklbw %%mm7, %%mm2 \n\t"\
1749 "punpckhbw %%mm7, %%mm5 \n\t"\
1750 "movq %%mm2, %%mm6 \n\t"\
1751 "paddw %%mm2, %%mm2 \n\t"\
1752 "paddw %%mm6, %%mm2 \n\t"\
1753 "movq %%mm5, %%mm6 \n\t"\
1754 "paddw %%mm5, %%mm5 \n\t"\
1755 "paddw %%mm6, %%mm5 \n\t"\
1756 "movq %%mm3, %%mm6 \n\t"\
1757 "punpcklbw %%mm7, %%mm3 \n\t"\
1758 "punpckhbw %%mm7, %%mm6 \n\t"\
1759 "paddw %%mm3, %%mm3 \n\t"\
1760 "paddw %%mm6, %%mm6 \n\t"\
1761 "paddw %%mm3, %%mm2 \n\t"\
1762 "paddw %%mm6, %%mm5 \n\t"\
1763 "movq %%mm4, %%mm6 \n\t"\
1764 "punpcklbw %%mm7, %%mm4 \n\t"\
1765 "punpckhbw %%mm7, %%mm6 \n\t"\
1766 "psubw %%mm4, %%mm2 \n\t"\
1767 "psubw %%mm6, %%mm5 \n\t"\
1768 "psraw $2, %%mm2 \n\t"\
1769 "psraw $2, %%mm5 \n\t"\
1770 "packuswb %%mm5, %%mm2 \n\t"\
1771 "movq %%mm2, " #a " \n\t"\
1772
1773 DEINT_L5(%%mm0, %%mm1, (%0) , (%%eax) , (%%eax, %1) )
1774 DEINT_L5(%%mm1, %%mm0, (%%eax) , (%%eax, %1) , (%%eax, %1, 2))
1775 DEINT_L5(%%mm0, %%mm1, (%%eax, %1) , (%%eax, %1, 2), (%0, %1, 4) )
1776 DEINT_L5(%%mm1, %%mm0, (%%eax, %1, 2), (%0, %1, 4) , (%%edx) )
1777 DEINT_L5(%%mm0, %%mm1, (%0, %1, 4) , (%%edx) , (%%edx, %1) )
1778 DEINT_L5(%%mm1, %%mm0, (%%edx) , (%%edx, %1) , (%%edx, %1, 2))
1779 DEINT_L5(%%mm0, %%mm1, (%%edx, %1) , (%%edx, %1, 2), (%0, %1, 8) )
1780 DEINT_L5(%%mm1, %%mm0, (%%edx, %1, 2), (%0, %1, 8) , (%%edx, %1, 4))
1781
1782 "movq %%mm0, (%2) \n\t"
1783 "movq %%mm1, (%3) \n\t"
1784 : : "r" (src), "r" (stride), "r"(tmp), "r"(tmp2)
1785 : "%eax", "%edx"
1786 );
1787 #else
1788 int x;
1789 src+= stride*4;
1790 for(x=0; x<8; x++)
1791 {
1792 int t1= tmp[x];
1793 int t2= tmp2[x];
1794 int t3= src[0];
1795
1796 src[stride*0]= CLIP((-(t1 + src[stride*2]) + 2*(t2 + src[stride*1]) + 6*t3 + 4)>>3);
1797 t1= src[stride*1];
1798 src[stride*1]= CLIP((-(t2 + src[stride*3]) + 2*(t3 + src[stride*2]) + 6*t1 + 4)>>3);
1799 t2= src[stride*2];
1800 src[stride*2]= CLIP((-(t3 + src[stride*4]) + 2*(t1 + src[stride*3]) + 6*t2 + 4)>>3);
1801 t3= src[stride*3];
1802 src[stride*3]= CLIP((-(t1 + src[stride*5]) + 2*(t2 + src[stride*4]) + 6*t3 + 4)>>3);
1803 t1= src[stride*4];
1804 src[stride*4]= CLIP((-(t2 + src[stride*6]) + 2*(t3 + src[stride*5]) + 6*t1 + 4)>>3);
1805 t2= src[stride*5];
1806 src[stride*5]= CLIP((-(t3 + src[stride*7]) + 2*(t1 + src[stride*6]) + 6*t2 + 4)>>3);
1807 t3= src[stride*6];
1808 src[stride*6]= CLIP((-(t1 + src[stride*8]) + 2*(t2 + src[stride*7]) + 6*t3 + 4)>>3);
1809 t1= src[stride*7];
1810 src[stride*7]= CLIP((-(t2 + src[stride*9]) + 2*(t3 + src[stride*8]) + 6*t1 + 4)>>3);
1811
1812 tmp[x]= t3;
1813 tmp2[x]= t1;
1716 1814
1717 src++; 1815 src++;
1718 } 1816 }
1719 #endif 1817 #endif
1720 } 1818 }
2694 } 2792 }
2695 #endif 2793 #endif
2696 2794
2697 if(mode & CUBIC_IPOL_DEINT_FILTER) copyAhead=16; 2795 if(mode & CUBIC_IPOL_DEINT_FILTER) copyAhead=16;
2698 else if( (mode & LINEAR_BLEND_DEINT_FILTER) 2796 else if( (mode & LINEAR_BLEND_DEINT_FILTER)
2699 || (mode & FFMPEG_DEINT_FILTER)) copyAhead=14; 2797 || (mode & FFMPEG_DEINT_FILTER)
2798 || (mode & LOWPASS5_DEINT_FILTER)) copyAhead=14;
2700 else if( (mode & V_DEBLOCK) 2799 else if( (mode & V_DEBLOCK)
2701 || (mode & LINEAR_IPOL_DEINT_FILTER) 2800 || (mode & LINEAR_IPOL_DEINT_FILTER)
2702 || (mode & MEDIAN_DEINT_FILTER)) copyAhead=13; 2801 || (mode & MEDIAN_DEINT_FILTER)) copyAhead=13;
2703 else if(mode & V_X1_FILTER) copyAhead=11; 2802 else if(mode & V_X1_FILTER) copyAhead=11;
2704 // else if(mode & V_RK1_FILTER) copyAhead=10; 2803 // else if(mode & V_RK1_FILTER) copyAhead=10;
2830 RENAME(deInterlaceMedian)(dstBlock, dstStride); 2929 RENAME(deInterlaceMedian)(dstBlock, dstStride);
2831 else if(mode & CUBIC_IPOL_DEINT_FILTER) 2930 else if(mode & CUBIC_IPOL_DEINT_FILTER)
2832 RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride); 2931 RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride);
2833 else if(mode & FFMPEG_DEINT_FILTER) 2932 else if(mode & FFMPEG_DEINT_FILTER)
2834 RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x); 2933 RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x);
2934 else if(mode & LOWPASS5_DEINT_FILTER)
2935 RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x);
2835 /* else if(mode & CUBIC_BLEND_DEINT_FILTER) 2936 /* else if(mode & CUBIC_BLEND_DEINT_FILTER)
2836 RENAME(deInterlaceBlendCubic)(dstBlock, dstStride); 2937 RENAME(deInterlaceBlendCubic)(dstBlock, dstStride);
2837 */ 2938 */
2838 dstBlock+=8; 2939 dstBlock+=8;
2839 srcBlock+=8; 2940 srcBlock+=8;
2972 RENAME(deInterlaceMedian)(dstBlock, dstStride); 3073 RENAME(deInterlaceMedian)(dstBlock, dstStride);
2973 else if(mode & CUBIC_IPOL_DEINT_FILTER) 3074 else if(mode & CUBIC_IPOL_DEINT_FILTER)
2974 RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride); 3075 RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride);
2975 else if(mode & FFMPEG_DEINT_FILTER) 3076 else if(mode & FFMPEG_DEINT_FILTER)
2976 RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x); 3077 RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x);
3078 else if(mode & LOWPASS5_DEINT_FILTER)
3079 RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x);
2977 /* else if(mode & CUBIC_BLEND_DEINT_FILTER) 3080 /* else if(mode & CUBIC_BLEND_DEINT_FILTER)
2978 RENAME(deInterlaceBlendCubic)(dstBlock, dstStride); 3081 RENAME(deInterlaceBlendCubic)(dstBlock, dstStride);
2979 */ 3082 */
2980 3083
2981 /* only deblock if we have 2 blocks */ 3084 /* only deblock if we have 2 blocks */