comparison libpostproc/postprocess_template.c @ 1581:d2fc92d02bf7 libavcodec

linear blend 1 line shift fix
author michael
date Tue, 28 Oct 2003 09:23:26 +0000
parents cf65e69400ec
children ea5200a9f730
comparison
equal deleted inserted replaced
1580:628bf341e099 1581:d2fc92d02bf7
1803 /** 1803 /**
1804 * Deinterlaces the given block by filtering all lines with a (1 2 1) filter. 1804 * Deinterlaces the given block by filtering all lines with a (1 2 1) filter.
1805 * will be called for every 8x8 block and can read & write from line 4-15 1805 * will be called for every 8x8 block and can read & write from line 4-15
1806 * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too 1806 * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too
1807 * lines 4-12 will be read into the deblocking filter and should be deinterlaced 1807 * lines 4-12 will be read into the deblocking filter and should be deinterlaced
1808 * will shift the image up by 1 line (FIXME if this is a problem)
1809 * this filter will read lines 4-13 and write 4-11 1808 * this filter will read lines 4-13 and write 4-11
1810 */ 1809 */
1811 static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride) 1810 static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uint8_t *tmp)
1812 { 1811 {
1813 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) 1812 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1814 src+= 4*stride; 1813 src+= 4*stride;
1815 asm volatile( 1814 asm volatile(
1816 "leal (%0, %1), %%eax \n\t" 1815 "leal (%0, %1), %%eax \n\t"
1817 "leal (%%eax, %1, 4), %%edx \n\t" 1816 "leal (%%eax, %1, 4), %%edx \n\t"
1818 // 0 1 2 3 4 5 6 7 8 9 1817 // 0 1 2 3 4 5 6 7 8 9
1819 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 1818 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1
1820 1819
1821 "movq (%0), %%mm0 \n\t" // L0 1820 "movq (%2), %%mm0 \n\t" // L0
1822 "movq (%%eax, %1), %%mm1 \n\t" // L2 1821 "movq (%%eax), %%mm1 \n\t" // L2
1823 PAVGB(%%mm1, %%mm0) // L0+L2 1822 PAVGB(%%mm1, %%mm0) // L0+L2
1824 "movq (%%eax), %%mm2 \n\t" // L1 1823 "movq (%0), %%mm2 \n\t" // L1
1825 PAVGB(%%mm2, %%mm0) 1824 PAVGB(%%mm2, %%mm0)
1826 "movq %%mm0, (%0) \n\t" 1825 "movq %%mm0, (%0) \n\t"
1827 "movq (%%eax, %1, 2), %%mm0 \n\t" // L3 1826 "movq (%%eax, %1), %%mm0 \n\t" // L3
1828 PAVGB(%%mm0, %%mm2) // L1+L3 1827 PAVGB(%%mm0, %%mm2) // L1+L3
1829 PAVGB(%%mm1, %%mm2) // 2L2 + L1 + L3 1828 PAVGB(%%mm1, %%mm2) // 2L2 + L1 + L3
1830 "movq %%mm2, (%%eax) \n\t" 1829 "movq %%mm2, (%%eax) \n\t"
1831 "movq (%0, %1, 4), %%mm2 \n\t" // L4 1830 "movq (%%eax, %1, 2), %%mm2 \n\t" // L4
1832 PAVGB(%%mm2, %%mm1) // L2+L4 1831 PAVGB(%%mm2, %%mm1) // L2+L4
1833 PAVGB(%%mm0, %%mm1) // 2L3 + L2 + L4 1832 PAVGB(%%mm0, %%mm1) // 2L3 + L2 + L4
1834 "movq %%mm1, (%%eax, %1) \n\t" 1833 "movq %%mm1, (%%eax, %1) \n\t"
1835 "movq (%%edx), %%mm1 \n\t" // L5 1834 "movq (%0, %1, 4), %%mm1 \n\t" // L5
1836 PAVGB(%%mm1, %%mm0) // L3+L5 1835 PAVGB(%%mm1, %%mm0) // L3+L5
1837 PAVGB(%%mm2, %%mm0) // 2L4 + L3 + L5 1836 PAVGB(%%mm2, %%mm0) // 2L4 + L3 + L5
1838 "movq %%mm0, (%%eax, %1, 2) \n\t" 1837 "movq %%mm0, (%%eax, %1, 2) \n\t"
1839 "movq (%%edx, %1), %%mm0 \n\t" // L6 1838 "movq (%%edx), %%mm0 \n\t" // L6
1840 PAVGB(%%mm0, %%mm2) // L4+L6 1839 PAVGB(%%mm0, %%mm2) // L4+L6
1841 PAVGB(%%mm1, %%mm2) // 2L5 + L4 + L6 1840 PAVGB(%%mm1, %%mm2) // 2L5 + L4 + L6
1842 "movq %%mm2, (%0, %1, 4) \n\t" 1841 "movq %%mm2, (%0, %1, 4) \n\t"
1843 "movq (%%edx, %1, 2), %%mm2 \n\t" // L7 1842 "movq (%%edx, %1), %%mm2 \n\t" // L7
1844 PAVGB(%%mm2, %%mm1) // L5+L7 1843 PAVGB(%%mm2, %%mm1) // L5+L7
1845 PAVGB(%%mm0, %%mm1) // 2L6 + L5 + L7 1844 PAVGB(%%mm0, %%mm1) // 2L6 + L5 + L7
1846 "movq %%mm1, (%%edx) \n\t" 1845 "movq %%mm1, (%%edx) \n\t"
1847 "movq (%0, %1, 8), %%mm1 \n\t" // L8 1846 "movq (%%edx, %1, 2), %%mm1 \n\t" // L8
1848 PAVGB(%%mm1, %%mm0) // L6+L8 1847 PAVGB(%%mm1, %%mm0) // L6+L8
1849 PAVGB(%%mm2, %%mm0) // 2L7 + L6 + L8 1848 PAVGB(%%mm2, %%mm0) // 2L7 + L6 + L8
1850 "movq %%mm0, (%%edx, %1) \n\t" 1849 "movq %%mm0, (%%edx, %1) \n\t"
1851 "movq (%%edx, %1, 4), %%mm0 \n\t" // L9 1850 "movq (%0, %1, 8), %%mm0 \n\t" // L9
1852 PAVGB(%%mm0, %%mm2) // L7+L9 1851 PAVGB(%%mm0, %%mm2) // L7+L9
1853 PAVGB(%%mm1, %%mm2) // 2L8 + L7 + L9 1852 PAVGB(%%mm1, %%mm2) // 2L8 + L7 + L9
1854 "movq %%mm2, (%%edx, %1, 2) \n\t" 1853 "movq %%mm2, (%%edx, %1, 2) \n\t"
1855 1854 "movq %%mm1, (%2) \n\t"
1856 1855
1857 : : "r" (src), "r" (stride) 1856 : : "r" (src), "r" (stride), "r" (tmp)
1858 : "%eax", "%edx" 1857 : "%eax", "%edx"
1859 ); 1858 );
1860 #else 1859 #else
1861 int a, b, c, x; 1860 int a, b, c, x;
1862 src+= 4*stride; 1861 src+= 4*stride;
1863 1862
1864 for(x=0; x<2; x++){ 1863 for(x=0; x<2; x++){
1865 a= *(uint32_t*)&src[stride*0]; 1864 a= *(uint32_t*)&tmp[stride*0];
1866 b= *(uint32_t*)&src[stride*1]; 1865 b= *(uint32_t*)&src[stride*0];
1867 c= *(uint32_t*)&src[stride*2]; 1866 c= *(uint32_t*)&src[stride*1];
1868 a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1); 1867 a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
1869 *(uint32_t*)&src[stride*0]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1); 1868 *(uint32_t*)&src[stride*0]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
1870 1869
1871 a= *(uint32_t*)&src[stride*3]; 1870 a= *(uint32_t*)&src[stride*2];
1872 b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1); 1871 b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
1873 *(uint32_t*)&src[stride*1]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1); 1872 *(uint32_t*)&src[stride*1]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
1874 1873
1875 b= *(uint32_t*)&src[stride*4]; 1874 b= *(uint32_t*)&src[stride*3];
1876 c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1); 1875 c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1);
1877 *(uint32_t*)&src[stride*2]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1); 1876 *(uint32_t*)&src[stride*2]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1);
1878 1877
1879 c= *(uint32_t*)&src[stride*5]; 1878 c= *(uint32_t*)&src[stride*4];
1880 a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1); 1879 a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
1881 *(uint32_t*)&src[stride*3]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1); 1880 *(uint32_t*)&src[stride*3]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
1882 1881
1883 a= *(uint32_t*)&src[stride*6]; 1882 a= *(uint32_t*)&src[stride*5];
1884 b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1); 1883 b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
1885 *(uint32_t*)&src[stride*4]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1); 1884 *(uint32_t*)&src[stride*4]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
1886 1885
1887 b= *(uint32_t*)&src[stride*7]; 1886 b= *(uint32_t*)&src[stride*6];
1888 c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1); 1887 c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1);
1889 *(uint32_t*)&src[stride*5]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1); 1888 *(uint32_t*)&src[stride*5]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1);
1890 1889
1891 c= *(uint32_t*)&src[stride*8]; 1890 c= *(uint32_t*)&src[stride*7];
1892 a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1); 1891 a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
1893 *(uint32_t*)&src[stride*6]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1); 1892 *(uint32_t*)&src[stride*6]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
1894 1893
1895 a= *(uint32_t*)&src[stride*9]; 1894 a= *(uint32_t*)&src[stride*8];
1896 b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1); 1895 b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
1897 *(uint32_t*)&src[stride*7]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1); 1896 *(uint32_t*)&src[stride*7]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
1898 1897
1898 *(uint32_t*)&tmp[stride*0]= c;
1899 src += 4; 1899 src += 4;
1900 tmp += 4;
1900 } 1901 }
1901 #endif 1902 #endif
1902 } 1903 }
1903 1904
1904 /** 1905 /**
2931 RENAME(duplicate)(dstBlock + dstStride*8, dstStride); 2932 RENAME(duplicate)(dstBlock + dstStride*8, dstStride);
2932 2933
2933 if(mode & LINEAR_IPOL_DEINT_FILTER) 2934 if(mode & LINEAR_IPOL_DEINT_FILTER)
2934 RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride); 2935 RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride);
2935 else if(mode & LINEAR_BLEND_DEINT_FILTER) 2936 else if(mode & LINEAR_BLEND_DEINT_FILTER)
2936 RENAME(deInterlaceBlendLinear)(dstBlock, dstStride); 2937 RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c.deintTemp + x);
2937 else if(mode & MEDIAN_DEINT_FILTER) 2938 else if(mode & MEDIAN_DEINT_FILTER)
2938 RENAME(deInterlaceMedian)(dstBlock, dstStride); 2939 RENAME(deInterlaceMedian)(dstBlock, dstStride);
2939 else if(mode & CUBIC_IPOL_DEINT_FILTER) 2940 else if(mode & CUBIC_IPOL_DEINT_FILTER)
2940 RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride); 2941 RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride);
2941 else if(mode & FFMPEG_DEINT_FILTER) 2942 else if(mode & FFMPEG_DEINT_FILTER)
3075 srcBlock + srcStride*copyAhead, srcStride, mode & LEVEL_FIX, &c.packedYOffset); 3076 srcBlock + srcStride*copyAhead, srcStride, mode & LEVEL_FIX, &c.packedYOffset);
3076 3077
3077 if(mode & LINEAR_IPOL_DEINT_FILTER) 3078 if(mode & LINEAR_IPOL_DEINT_FILTER)
3078 RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride); 3079 RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride);
3079 else if(mode & LINEAR_BLEND_DEINT_FILTER) 3080 else if(mode & LINEAR_BLEND_DEINT_FILTER)
3080 RENAME(deInterlaceBlendLinear)(dstBlock, dstStride); 3081 RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c.deintTemp + x);
3081 else if(mode & MEDIAN_DEINT_FILTER) 3082 else if(mode & MEDIAN_DEINT_FILTER)
3082 RENAME(deInterlaceMedian)(dstBlock, dstStride); 3083 RENAME(deInterlaceMedian)(dstBlock, dstStride);
3083 else if(mode & CUBIC_IPOL_DEINT_FILTER) 3084 else if(mode & CUBIC_IPOL_DEINT_FILTER)
3084 RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride); 3085 RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride);
3085 else if(mode & FFMPEG_DEINT_FILTER) 3086 else if(mode & FFMPEG_DEINT_FILTER)