comparison dsputil.c @ 936:caa77cd960c0 libavcodec

qpel encoding 4mv+b frames encoding finally fixed chroma ME 5 comparission functions for ME b frame encoding speedup wmv2 codec (unfinished) user specified diamond size for EPZS
author michaelni
date Fri, 27 Dec 2002 23:51:46 +0000
parents 8ae1e4c24e91
children 463f7260b155
comparison
equal deleted inserted replaced
935:c9bbd35064b6 936:caa77cd960c0
18 * 18 *
19 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> 19 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
20 */ 20 */
21 #include "avcodec.h" 21 #include "avcodec.h"
22 #include "dsputil.h" 22 #include "dsputil.h"
23 #include "mpegvideo.h"
23 24
24 int ff_bit_exact=0; 25 int ff_bit_exact=0;
25 26
26 UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; 27 UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
27 UINT32 squareTbl[512]; 28 UINT32 squareTbl[512];
142 } 143 }
143 return s; 144 return s;
144 } 145 }
145 146
146 147
147 static int pix_norm_c(UINT8 * pix1, UINT8 * pix2, int line_size) 148 static int sse8_c(void *v, UINT8 * pix1, UINT8 * pix2, int line_size)
149 {
150 int s, i;
151 UINT32 *sq = squareTbl + 256;
152
153 s = 0;
154 for (i = 0; i < 8; i++) {
155 s += sq[pix1[0] - pix2[0]];
156 s += sq[pix1[1] - pix2[1]];
157 s += sq[pix1[2] - pix2[2]];
158 s += sq[pix1[3] - pix2[3]];
159 s += sq[pix1[4] - pix2[4]];
160 s += sq[pix1[5] - pix2[5]];
161 s += sq[pix1[6] - pix2[6]];
162 s += sq[pix1[7] - pix2[7]];
163 pix1 += line_size;
164 pix2 += line_size;
165 }
166 return s;
167 }
168
169 static int sse16_c(void *v, UINT8 * pix1, UINT8 * pix2, int line_size)
148 { 170 {
149 int s, i, j; 171 int s, i, j;
150 UINT32 *sq = squareTbl + 256; 172 UINT32 *sq = squareTbl + 256;
151 173
152 s = 0; 174 s = 0;
1139 #undef op_avg 1161 #undef op_avg
1140 #undef op_avg_no_rnd 1162 #undef op_avg_no_rnd
1141 #undef op_put 1163 #undef op_put
1142 #undef op_put_no_rnd 1164 #undef op_put_no_rnd
1143 1165
1144 static int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size) 1166 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
1167 uint8_t *cm = cropTbl + MAX_NEG_CROP;
1168 int i;
1169
1170 for(i=0; i<h; i++){
1171 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
1172 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
1173 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
1174 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
1175 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
1176 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
1177 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
1178 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
1179 dst+=dstStride;
1180 src+=srcStride;
1181 }
1182 }
1183
1184 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
1185 uint8_t *cm = cropTbl + MAX_NEG_CROP;
1186 int i;
1187
1188 for(i=0; i<w; i++){
1189 const int src_1= src[ -srcStride];
1190 const int src0 = src[0 ];
1191 const int src1 = src[ srcStride];
1192 const int src2 = src[2*srcStride];
1193 const int src3 = src[3*srcStride];
1194 const int src4 = src[4*srcStride];
1195 const int src5 = src[5*srcStride];
1196 const int src6 = src[6*srcStride];
1197 const int src7 = src[7*srcStride];
1198 const int src8 = src[8*srcStride];
1199 const int src9 = src[9*srcStride];
1200 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
1201 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
1202 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
1203 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
1204 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
1205 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
1206 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
1207 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
1208 src++;
1209 dst++;
1210 }
1211 }
1212
1213 static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){
1214 put_pixels8_c(dst, src, stride, 8);
1215 }
1216
1217 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
1218 uint8_t half[64];
1219 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1220 put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
1221 }
1222
1223 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
1224 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
1225 }
1226
1227 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
1228 uint8_t half[64];
1229 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1230 put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
1231 }
1232
1233 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
1234 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
1235 }
1236
1237 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
1238 uint8_t halfH[88];
1239 uint8_t halfV[64];
1240 uint8_t halfHV[64];
1241 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1242 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
1243 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1244 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
1245 }
1246 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
1247 uint8_t halfH[88];
1248 uint8_t halfV[64];
1249 uint8_t halfHV[64];
1250 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1251 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
1252 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1253 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
1254 }
1255 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
1256 uint8_t halfH[88];
1257 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1258 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
1259 }
1260
1261
1262 static inline int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1145 { 1263 {
1146 int s, i; 1264 int s, i;
1147 1265
1148 s = 0; 1266 s = 0;
1149 for(i=0;i<16;i++) { 1267 for(i=0;i<16;i++) {
1255 pix3 += line_size; 1373 pix3 += line_size;
1256 } 1374 }
1257 return s; 1375 return s;
1258 } 1376 }
1259 1377
1260 static int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size) 1378 static inline int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1261 { 1379 {
1262 int s, i; 1380 int s, i;
1263 1381
1264 s = 0; 1382 s = 0;
1265 for(i=0;i<8;i++) { 1383 for(i=0;i<8;i++) {
1339 pix3 += line_size; 1457 pix3 += line_size;
1340 } 1458 }
1341 return s; 1459 return s;
1342 } 1460 }
1343 1461
1462 static int sad16x16_c(void *s, uint8_t *a, uint8_t *b, int stride){
1463 return pix_abs16x16_c(a,b,stride);
1464 }
1465
1466 static int sad8x8_c(void *s, uint8_t *a, uint8_t *b, int stride){
1467 return pix_abs8x8_c(a,b,stride);
1468 }
1469
1344 void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, int last) 1470 void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, int last)
1345 { 1471 {
1346 int i; 1472 int i;
1347 INT16 temp[64]; 1473 INT16 temp[64];
1348 1474
1397 } 1523 }
1398 for(; i<w; i++) 1524 for(; i<w; i++)
1399 dst[i+0] = src1[i+0]-src2[i+0]; 1525 dst[i+0] = src1[i+0]-src2[i+0];
1400 } 1526 }
1401 1527
1528 #define BUTTERFLY2(o1,o2,i1,i2) \
1529 o1= (i1)+(i2);\
1530 o2= (i1)-(i2);
1531
1532 #define BUTTERFLY1(x,y) \
1533 {\
1534 int a,b;\
1535 a= x;\
1536 b= y;\
1537 x= a+b;\
1538 y= a-b;\
1539 }
1540
1541 #define BUTTERFLYA(x,y) (ABS((x)+(y)) + ABS((x)-(y)))
1542
1543 static int hadamard8_diff_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride){
1544 int i;
1545 int temp[64];
1546 int sum=0;
1547
1548 for(i=0; i<8; i++){
1549 //FIXME try pointer walks
1550 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
1551 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
1552 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
1553 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
1554
1555 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
1556 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
1557 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
1558 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
1559
1560 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
1561 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
1562 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
1563 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
1564 }
1565
1566 for(i=0; i<8; i++){
1567 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
1568 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
1569 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
1570 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
1571
1572 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
1573 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
1574 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
1575 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
1576
1577 sum +=
1578 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
1579 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
1580 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
1581 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
1582 }
1583 #if 0
1584 static int maxi=0;
1585 if(sum>maxi){
1586 maxi=sum;
1587 printf("MAX:%d\n", maxi);
1588 }
1589 #endif
1590 return sum;
1591 }
1592
1593 static int hadamard8_abs_c(uint8_t *src, int stride, int mean){
1594 int i;
1595 int temp[64];
1596 int sum=0;
1597 //FIXME OOOPS ignore 0 term instead of mean mess
1598 for(i=0; i<8; i++){
1599 //FIXME try pointer walks
1600 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-mean,src[stride*i+1]-mean);
1601 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-mean,src[stride*i+3]-mean);
1602 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-mean,src[stride*i+5]-mean);
1603 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-mean,src[stride*i+7]-mean);
1604
1605 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
1606 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
1607 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
1608 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
1609
1610 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
1611 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
1612 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
1613 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
1614 }
1615
1616 for(i=0; i<8; i++){
1617 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
1618 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
1619 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
1620 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
1621
1622 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
1623 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
1624 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
1625 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
1626
1627 sum +=
1628 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
1629 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
1630 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
1631 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
1632 }
1633
1634 return sum;
1635 }
1636
1637 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){
1638 MpegEncContext * const s= (MpegEncContext *)c;
1639 DCTELEM temp[64];
1640 int sum=0, i;
1641
1642 s->dsp.diff_pixels(temp, src1, src2, stride);
1643 s->fdct(temp);
1644
1645 for(i=0; i<64; i++)
1646 sum+= ABS(temp[i]);
1647
1648 return sum;
1649 }
1650
1651 void simple_idct(INT16 *block); //FIXME
1652
1653 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){
1654 MpegEncContext * const s= (MpegEncContext *)c;
1655 DCTELEM temp[64], bak[64];
1656 int sum=0, i;
1657
1658 s->mb_intra=0;
1659
1660 s->dsp.diff_pixels(temp, src1, src2, stride);
1661
1662 memcpy(bak, temp, 64*sizeof(DCTELEM));
1663
1664 s->dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
1665 s->dct_unquantize(s, temp, 0, s->qscale);
1666 simple_idct(temp); //FIXME
1667
1668 for(i=0; i<64; i++)
1669 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
1670
1671 return sum;
1672 }
1673
1674 WARPER88_1616(hadamard8_diff_c, hadamard8_diff16_c)
1675 WARPER88_1616(dct_sad8x8_c, dct_sad16x16_c)
1676 WARPER88_1616(quant_psnr8x8_c, quant_psnr16x16_c)
1677
1402 void dsputil_init(DSPContext* c, unsigned mask) 1678 void dsputil_init(DSPContext* c, unsigned mask)
1403 { 1679 {
1404 static int init_done = 0; 1680 static int init_done = 0;
1405 int i; 1681 int i;
1406 1682
1427 c->gmc1 = gmc1_c; 1703 c->gmc1 = gmc1_c;
1428 c->gmc = gmc_c; 1704 c->gmc = gmc_c;
1429 c->clear_blocks = clear_blocks_c; 1705 c->clear_blocks = clear_blocks_c;
1430 c->pix_sum = pix_sum_c; 1706 c->pix_sum = pix_sum_c;
1431 c->pix_norm1 = pix_norm1_c; 1707 c->pix_norm1 = pix_norm1_c;
1432 c->pix_norm = pix_norm_c; 1708 c->sse[0]= sse16_c;
1709 c->sse[1]= sse8_c;
1433 1710
1434 /* TODO [0] 16 [1] 8 */ 1711 /* TODO [0] 16 [1] 8 */
1435 c->pix_abs16x16 = pix_abs16x16_c; 1712 c->pix_abs16x16 = pix_abs16x16_c;
1436 c->pix_abs16x16_x2 = pix_abs16x16_x2_c; 1713 c->pix_abs16x16_x2 = pix_abs16x16_x2_c;
1437 c->pix_abs16x16_y2 = pix_abs16x16_y2_c; 1714 c->pix_abs16x16_y2 = pix_abs16x16_y2_c;
1487 1764
1488 dspfunc(avg_qpel, 1, 8); 1765 dspfunc(avg_qpel, 1, 8);
1489 /* dspfunc(avg_no_rnd_qpel, 1, 8); */ 1766 /* dspfunc(avg_no_rnd_qpel, 1, 8); */
1490 #undef dspfunc 1767 #undef dspfunc
1491 1768
1769 c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
1770 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
1771 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
1772 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
1773 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
1774 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
1775 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
1776 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
1777
1778 c->hadamard8_diff[0]= hadamard8_diff16_c;
1779 c->hadamard8_diff[1]= hadamard8_diff_c;
1780 c->hadamard8_abs = hadamard8_abs_c;
1781
1782 c->dct_sad[0]= dct_sad16x16_c;
1783 c->dct_sad[1]= dct_sad8x8_c;
1784
1785 c->sad[0]= sad16x16_c;
1786 c->sad[1]= sad8x8_c;
1787
1788 c->quant_psnr[0]= quant_psnr16x16_c;
1789 c->quant_psnr[1]= quant_psnr8x8_c;
1790
1492 c->add_bytes= add_bytes_c; 1791 c->add_bytes= add_bytes_c;
1493 c->diff_bytes= diff_bytes_c; 1792 c->diff_bytes= diff_bytes_c;
1494 1793
1495 #ifdef HAVE_MMX 1794 #ifdef HAVE_MMX
1496 dsputil_init_mmx(c, mask); 1795 dsputil_init_mmx(c, mask);
1514 dsputil_init_ppc(c, mask); 1813 dsputil_init_ppc(c, mask);
1515 #endif 1814 #endif
1516 #ifdef HAVE_MMI 1815 #ifdef HAVE_MMI
1517 dsputil_init_mmi(c, mask); 1816 dsputil_init_mmi(c, mask);
1518 #endif 1817 #endif
1519
1520 } 1818 }
1521 1819
1522 /* remove any non bit exact operation (testing purpose) */ 1820 /* remove any non bit exact operation (testing purpose) */
1523 void avcodec_set_bit_exact(void) 1821 void avcodec_set_bit_exact(void)
1524 { 1822 {