Mercurial > libavcodec.hg
comparison dsputil.c @ 706:e65798d228ea libavcodec
idct permutation cleanup, idct can be selected per context now
fixing some threadunsafe code
| author | michaelni |
|---|---|
| date | Sun, 29 Sep 2002 22:44:22 +0000 |
| parents | efcbfbd18864 |
| children | cbe316f082bc |
comparison
equal
deleted
inserted
replaced
| 705:107a56aa74f5 | 706:e65798d228ea |
|---|---|
| 18 * | 18 * |
| 19 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> | 19 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> |
| 20 */ | 20 */ |
| 21 #include "avcodec.h" | 21 #include "avcodec.h" |
| 22 #include "dsputil.h" | 22 #include "dsputil.h" |
| 23 #include "simple_idct.h" | 23 |
| 24 | |
| 25 void (*ff_idct)(DCTELEM *block); | |
| 26 void (*ff_idct_put)(UINT8 *dest, int line_size, DCTELEM *block); | |
| 27 void (*ff_idct_add)(UINT8 *dest, int line_size, DCTELEM *block); | |
| 28 void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size); | 24 void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size); |
| 29 void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride); | 25 void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride); |
| 30 void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); | 26 void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); |
| 31 void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); | 27 void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); |
| 32 void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder); | 28 void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder); |
| 47 int ff_bit_exact=0; | 43 int ff_bit_exact=0; |
| 48 | 44 |
| 49 UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; | 45 UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; |
| 50 UINT32 squareTbl[512]; | 46 UINT32 squareTbl[512]; |
| 51 | 47 |
| 52 extern INT16 ff_mpeg1_default_intra_matrix[64]; | 48 const UINT8 ff_zigzag_direct[64] = { |
| 53 extern INT16 ff_mpeg1_default_non_intra_matrix[64]; | 49 0, 1, 8, 16, 9, 2, 3, 10, |
| 54 extern INT16 ff_mpeg4_default_intra_matrix[64]; | 50 17, 24, 32, 25, 18, 11, 4, 5, |
| 55 extern INT16 ff_mpeg4_default_non_intra_matrix[64]; | |
| 56 | |
| 57 UINT8 zigzag_direct[64] = { | |
| 58 0, 1, 8, 16, 9, 2, 3, 10, | |
| 59 17, 24, 32, 25, 18, 11, 4, 5, | |
| 60 12, 19, 26, 33, 40, 48, 41, 34, | 51 12, 19, 26, 33, 40, 48, 41, 34, |
| 61 27, 20, 13, 6, 7, 14, 21, 28, | 52 27, 20, 13, 6, 7, 14, 21, 28, |
| 62 35, 42, 49, 56, 57, 50, 43, 36, | 53 35, 42, 49, 56, 57, 50, 43, 36, |
| 63 29, 22, 15, 23, 30, 37, 44, 51, | 54 29, 22, 15, 23, 30, 37, 44, 51, |
| 64 58, 59, 52, 45, 38, 31, 39, 46, | 55 58, 59, 52, 45, 38, 31, 39, 46, |
| 65 53, 60, 61, 54, 47, 55, 62, 63 | 56 53, 60, 61, 54, 47, 55, 62, 63 |
| 66 }; | 57 }; |
| 67 | 58 |
| 68 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */ | 59 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */ |
| 69 UINT16 __align8 inv_zigzag_direct16[64]; | 60 UINT16 __align8 inv_zigzag_direct16[64]; |
| 70 | 61 |
| 71 /* not permutated zigzag_direct for MMX quantizer */ | 62 const UINT8 ff_alternate_horizontal_scan[64] = { |
| 72 UINT8 zigzag_direct_noperm[64]; | 63 0, 1, 2, 3, 8, 9, 16, 17, |
| 73 | |
| 74 UINT8 ff_alternate_horizontal_scan[64] = { | |
| 75 0, 1, 2, 3, 8, 9, 16, 17, | |
| 76 10, 11, 4, 5, 6, 7, 15, 14, | 64 10, 11, 4, 5, 6, 7, 15, 14, |
| 77 13, 12, 19, 18, 24, 25, 32, 33, | 65 13, 12, 19, 18, 24, 25, 32, 33, |
| 78 26, 27, 20, 21, 22, 23, 28, 29, | 66 26, 27, 20, 21, 22, 23, 28, 29, |
| 79 30, 31, 34, 35, 40, 41, 48, 49, | 67 30, 31, 34, 35, 40, 41, 48, 49, |
| 80 42, 43, 36, 37, 38, 39, 44, 45, | 68 42, 43, 36, 37, 38, 39, 44, 45, |
| 81 46, 47, 50, 51, 56, 57, 58, 59, | 69 46, 47, 50, 51, 56, 57, 58, 59, |
| 82 52, 53, 54, 55, 60, 61, 62, 63, | 70 52, 53, 54, 55, 60, 61, 62, 63, |
| 83 }; | 71 }; |
| 84 | 72 |
| 85 UINT8 ff_alternate_vertical_scan[64] = { | 73 const UINT8 ff_alternate_vertical_scan[64] = { |
| 86 0, 8, 16, 24, 1, 9, 2, 10, | 74 0, 8, 16, 24, 1, 9, 2, 10, |
| 87 17, 25, 32, 40, 48, 56, 57, 49, | 75 17, 25, 32, 40, 48, 56, 57, 49, |
| 88 41, 33, 26, 18, 3, 11, 4, 12, | 76 41, 33, 26, 18, 3, 11, 4, 12, |
| 89 19, 27, 34, 42, 50, 58, 35, 43, | 77 19, 27, 34, 42, 50, 58, 35, 43, |
| 90 51, 59, 20, 28, 5, 13, 6, 14, | 78 51, 59, 20, 28, 5, 13, 6, 14, |
| 91 21, 29, 36, 44, 52, 60, 37, 45, | 79 21, 29, 36, 44, 52, 60, 37, 45, |
| 92 53, 61, 22, 30, 7, 15, 23, 31, | 80 53, 61, 22, 30, 7, 15, 23, 31, |
| 93 38, 46, 54, 62, 39, 47, 55, 63, | 81 38, 46, 54, 62, 39, 47, 55, 63, |
| 94 }; | 82 }; |
| 95 | |
| 96 #ifdef SIMPLE_IDCT | |
| 97 | |
| 98 /* Input permutation for the simple_idct_mmx */ | |
| 99 static UINT8 simple_mmx_permutation[64]={ | |
| 100 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, | |
| 101 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, | |
| 102 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, | |
| 103 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, | |
| 104 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, | |
| 105 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, | |
| 106 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, | |
| 107 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, | |
| 108 }; | |
| 109 #endif | |
| 110 | 83 |
| 111 /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */ | 84 /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */ |
| 112 UINT32 inverse[256]={ | 85 UINT32 inverse[256]={ |
| 113 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757, | 86 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757, |
| 114 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154, | 87 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154, |
| 141 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933, | 114 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933, |
| 142 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575, | 115 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575, |
| 143 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532, | 116 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532, |
| 144 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010, | 117 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010, |
| 145 }; | 118 }; |
| 146 | |
| 147 /* used to skip zeros at the end */ | |
| 148 UINT8 zigzag_end[64]; | |
| 149 | |
| 150 UINT8 permutation[64]; | |
| 151 //UINT8 invPermutation[64]; | |
| 152 | |
| 153 static void build_zigzag_end(void) | |
| 154 { | |
| 155 int lastIndex; | |
| 156 int lastIndexAfterPerm=0; | |
| 157 for(lastIndex=0; lastIndex<64; lastIndex++) | |
| 158 { | |
| 159 if(zigzag_direct[lastIndex] > lastIndexAfterPerm) | |
| 160 lastIndexAfterPerm= zigzag_direct[lastIndex]; | |
| 161 zigzag_end[lastIndex]= lastIndexAfterPerm + 1; | |
| 162 } | |
| 163 } | |
| 164 | 119 |
| 165 int pix_sum_c(UINT8 * pix, int line_size) | 120 int pix_sum_c(UINT8 * pix, int line_size) |
| 166 { | 121 { |
| 167 int s, i, j; | 122 int s, i, j; |
| 168 | 123 |
| 1538 return s; | 1493 return s; |
| 1539 } | 1494 } |
| 1540 | 1495 |
| 1541 /* permute block according so that it corresponds to the MMX idct | 1496 /* permute block according so that it corresponds to the MMX idct |
| 1542 order */ | 1497 order */ |
| 1543 #ifdef SIMPLE_IDCT | 1498 void block_permute(INT16 *block, UINT8 *permutation) |
| 1544 /* general permutation, but perhaps slightly slower */ | |
| 1545 void block_permute(INT16 *block) | |
| 1546 { | 1499 { |
| 1547 int i; | 1500 int i; |
| 1548 INT16 temp[64]; | 1501 INT16 temp[64]; |
| 1549 | 1502 |
| 1550 for(i=0; i<64; i++) temp[ block_permute_op(i) ] = block[i]; | 1503 for(i=0; i<64; i++) temp[ permutation[i] ] = block[i]; |
| 1551 | 1504 |
| 1552 for(i=0; i<64; i++) block[i] = temp[i]; | 1505 for(i=0; i<64; i++) block[i] = temp[i]; |
| 1553 } | 1506 } |
| 1554 #else | |
| 1555 | |
| 1556 void block_permute(INT16 *block) | |
| 1557 { | |
| 1558 int tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; | |
| 1559 int i; | |
| 1560 | |
| 1561 for(i=0;i<8;i++) { | |
| 1562 tmp1 = block[1]; | |
| 1563 tmp2 = block[2]; | |
| 1564 tmp3 = block[3]; | |
| 1565 tmp4 = block[4]; | |
| 1566 tmp5 = block[5]; | |
| 1567 tmp6 = block[6]; | |
| 1568 block[1] = tmp2; | |
| 1569 block[2] = tmp4; | |
| 1570 block[3] = tmp6; | |
| 1571 block[4] = tmp1; | |
| 1572 block[5] = tmp3; | |
| 1573 block[6] = tmp5; | |
| 1574 block += 8; | |
| 1575 } | |
| 1576 } | |
| 1577 #endif | |
| 1578 | 1507 |
| 1579 void clear_blocks_c(DCTELEM *blocks) | 1508 void clear_blocks_c(DCTELEM *blocks) |
| 1580 { | 1509 { |
| 1581 memset(blocks, 0, sizeof(DCTELEM)*6*64); | 1510 memset(blocks, 0, sizeof(DCTELEM)*6*64); |
| 1582 } | 1511 } |
| 1583 | 1512 |
| 1584 /* XXX: those functions should be suppressed ASAP when all IDCTs are | |
| 1585 converted */ | |
| 1586 void gen_idct_put(UINT8 *dest, int line_size, DCTELEM *block) | |
| 1587 { | |
| 1588 ff_idct (block); | |
| 1589 put_pixels_clamped(block, dest, line_size); | |
| 1590 } | |
| 1591 | |
| 1592 void gen_idct_add(UINT8 *dest, int line_size, DCTELEM *block) | |
| 1593 { | |
| 1594 ff_idct (block); | |
| 1595 add_pixels_clamped(block, dest, line_size); | |
| 1596 } | |
| 1597 | |
| 1598 void dsputil_init(void) | 1513 void dsputil_init(void) |
| 1599 { | 1514 { |
| 1600 int i, j; | 1515 int i, j; |
| 1601 int use_permuted_idct; | |
| 1602 | 1516 |
| 1603 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i; | 1517 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i; |
| 1604 for(i=0;i<MAX_NEG_CROP;i++) { | 1518 for(i=0;i<MAX_NEG_CROP;i++) { |
| 1605 cropTbl[i] = 0; | 1519 cropTbl[i] = 0; |
| 1606 cropTbl[i + MAX_NEG_CROP + 256] = 255; | 1520 cropTbl[i + MAX_NEG_CROP + 256] = 255; |
| 1608 | 1522 |
| 1609 for(i=0;i<512;i++) { | 1523 for(i=0;i<512;i++) { |
| 1610 squareTbl[i] = (i - 256) * (i - 256); | 1524 squareTbl[i] = (i - 256) * (i - 256); |
| 1611 } | 1525 } |
| 1612 | 1526 |
| 1613 #ifdef SIMPLE_IDCT | |
| 1614 ff_idct = NULL; | |
| 1615 #else | |
| 1616 ff_idct = j_rev_dct; | |
| 1617 #endif | |
| 1618 get_pixels = get_pixels_c; | 1527 get_pixels = get_pixels_c; |
| 1619 diff_pixels = diff_pixels_c; | 1528 diff_pixels = diff_pixels_c; |
| 1620 put_pixels_clamped = put_pixels_clamped_c; | 1529 put_pixels_clamped = put_pixels_clamped_c; |
| 1621 add_pixels_clamped = add_pixels_clamped_c; | 1530 add_pixels_clamped = add_pixels_clamped_c; |
| 1622 gmc1= gmc1_c; | 1531 gmc1= gmc1_c; |
| 1631 pix_abs8x8 = pix_abs8x8_c; | 1540 pix_abs8x8 = pix_abs8x8_c; |
| 1632 pix_abs8x8_x2 = pix_abs8x8_x2_c; | 1541 pix_abs8x8_x2 = pix_abs8x8_x2_c; |
| 1633 pix_abs8x8_y2 = pix_abs8x8_y2_c; | 1542 pix_abs8x8_y2 = pix_abs8x8_y2_c; |
| 1634 pix_abs8x8_xy2 = pix_abs8x8_xy2_c; | 1543 pix_abs8x8_xy2 = pix_abs8x8_xy2_c; |
| 1635 | 1544 |
| 1636 use_permuted_idct = 1; | |
| 1637 | |
| 1638 #ifdef HAVE_MMX | 1545 #ifdef HAVE_MMX |
| 1639 dsputil_init_mmx(); | 1546 dsputil_init_mmx(); |
| 1640 #endif | 1547 #endif |
| 1641 #ifdef ARCH_ARMV4L | 1548 #ifdef ARCH_ARMV4L |
| 1642 dsputil_init_armv4l(); | 1549 dsputil_init_armv4l(); |
| 1643 #endif | 1550 #endif |
| 1644 #ifdef HAVE_MLIB | 1551 #ifdef HAVE_MLIB |
| 1645 dsputil_init_mlib(); | 1552 dsputil_init_mlib(); |
| 1646 use_permuted_idct = 0; | |
| 1647 #endif | 1553 #endif |
| 1648 #ifdef ARCH_ALPHA | 1554 #ifdef ARCH_ALPHA |
| 1649 dsputil_init_alpha(); | 1555 dsputil_init_alpha(); |
| 1650 use_permuted_idct = 0; | |
| 1651 #endif | 1556 #endif |
| 1652 #ifdef ARCH_POWERPC | 1557 #ifdef ARCH_POWERPC |
| 1653 dsputil_init_ppc(); | 1558 dsputil_init_ppc(); |
| 1654 #endif | 1559 #endif |
| 1655 #ifdef HAVE_MMI | 1560 #ifdef HAVE_MMI |
| 1656 dsputil_init_mmi(); | 1561 dsputil_init_mmi(); |
| 1657 use_permuted_idct = 0; | |
| 1658 #endif | 1562 #endif |
| 1659 | 1563 |
| 1660 #ifdef SIMPLE_IDCT | 1564 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1; |
| 1661 if (ff_idct == NULL) { | |
| 1662 ff_idct_put = simple_idct_put; | |
| 1663 ff_idct_add = simple_idct_add; | |
| 1664 use_permuted_idct=0; | |
| 1665 } | |
| 1666 #endif | |
| 1667 if(ff_idct != NULL) { | |
| 1668 ff_idct_put = gen_idct_put; | |
| 1669 ff_idct_add = gen_idct_add; | |
| 1670 } | |
| 1671 | |
| 1672 if(use_permuted_idct) | |
| 1673 #ifdef SIMPLE_IDCT | |
| 1674 for(i=0; i<64; i++) permutation[i]= simple_mmx_permutation[i]; | |
| 1675 #else | |
| 1676 for(i=0; i<64; i++) permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); | |
| 1677 #endif | |
| 1678 else | |
| 1679 for(i=0; i<64; i++) permutation[i]=i; | |
| 1680 | |
| 1681 for(i=0; i<64; i++) inv_zigzag_direct16[zigzag_direct[i]]= i+1; | |
| 1682 for(i=0; i<64; i++) zigzag_direct_noperm[i]= zigzag_direct[i]; | |
| 1683 | |
| 1684 if (use_permuted_idct) { | |
| 1685 /* permute for IDCT */ | |
| 1686 for(i=0;i<64;i++) { | |
| 1687 j = zigzag_direct[i]; | |
| 1688 zigzag_direct[i] = block_permute_op(j); | |
| 1689 j = ff_alternate_horizontal_scan[i]; | |
| 1690 ff_alternate_horizontal_scan[i] = block_permute_op(j); | |
| 1691 j = ff_alternate_vertical_scan[i]; | |
| 1692 ff_alternate_vertical_scan[i] = block_permute_op(j); | |
| 1693 } | |
| 1694 block_permute(ff_mpeg1_default_intra_matrix); | |
| 1695 block_permute(ff_mpeg1_default_non_intra_matrix); | |
| 1696 block_permute(ff_mpeg4_default_intra_matrix); | |
| 1697 block_permute(ff_mpeg4_default_non_intra_matrix); | |
| 1698 } | |
| 1699 | |
| 1700 build_zigzag_end(); | |
| 1701 } | 1565 } |
| 1702 | 1566 |
| 1703 /* remove any non bit exact operation (testing purpose) */ | 1567 /* remove any non bit exact operation (testing purpose) */ |
| 1704 void avcodec_set_bit_exact(void) | 1568 void avcodec_set_bit_exact(void) |
| 1705 { | 1569 { |
