Mercurial > libavcodec.hg
annotate snow.h @ 3559:c02459cd0d31 libavcodec
slightly faster ff_imdct_calc_3dn2() on amd64. (gcc added a bunch of useless movsxd)
| author | lorenm |
|---|---|
| date | Tue, 08 Aug 2006 21:47:11 +0000 |
| parents | fb245e797c5d |
| children | 6310389a9688 |
| rev | line source |
|---|---|
| 3198 | 1 /* |
| 2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at> | |
| 3 * Copyright (C) 2006 Robert Edele <yartrebo@earthlink.net> | |
| 4 * | |
| 5 * This library is free software; you can redistribute it and/or | |
| 6 * modify it under the terms of the GNU Lesser General Public | |
| 7 * License as published by the Free Software Foundation; either | |
| 8 * version 2 of the License, or (at your option) any later version. | |
| 9 * | |
| 10 * This library is distributed in the hope that it will be useful, | |
| 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 13 * Lesser General Public License for more details. | |
| 14 * | |
| 15 * You should have received a copy of the GNU Lesser General Public | |
| 16 * License along with this library; if not, write to the Free Software | |
| 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
| 18 */ | |
| 19 | |
| 20 #ifndef _SNOW_H | |
| 21 #define _SNOW_H | |
| 22 | |
| 23 #include "dsputil.h" | |
| 24 | |
| 25 #define MID_STATE 128 | |
| 26 | |
| 27 #define MAX_DECOMPOSITIONS 8 | |
| 28 #define MAX_PLANES 4 | |
| 29 #define QSHIFT 5 | |
| 30 #define QROOT (1<<QSHIFT) | |
| 31 #define LOSSLESS_QLOG -128 | |
| 32 #define FRAC_BITS 8 | |
| 3314 | 33 #define MAX_REF_FRAMES 8 |
| 3198 | 34 |
| 3206 | 35 #define LOG2_OBMC_MAX 8 |
| 3198 | 36 #define OBMC_MAX (1<<(LOG2_OBMC_MAX)) |
| 37 | |
| 3326 | 38 #define DWT_97 0 |
| 39 #define DWT_53 1 | |
| 40 #define DWT_X 2 | |
| 41 | |
| 3198 | 42 /** Used to minimize the amount of memory used in order to optimize cache performance. **/ |
| 43 struct slice_buffer_s { | |
| 44 DWTELEM * * line; ///< For use by idwt and predict_slices. | |
| 45 DWTELEM * * data_stack; ///< Used for internal purposes. | |
| 46 int data_stack_top; | |
| 47 int line_count; | |
| 48 int line_width; | |
| 49 int data_count; | |
| 50 DWTELEM * base_buffer; ///< Buffer that this structure is caching. | |
| 51 }; | |
| 52 | |
| 53 #define liftS lift | |
| 54 #define lift5 lift | |
| 55 #if 1 | |
| 56 #define W_AM 3 | |
| 57 #define W_AO 0 | |
| 58 #define W_AS 1 | |
| 59 | |
| 60 #undef liftS | |
| 61 #define W_BM 1 | |
| 62 #define W_BO 8 | |
| 63 #define W_BS 4 | |
| 64 | |
| 65 #define W_CM 1 | |
| 66 #define W_CO 0 | |
| 67 #define W_CS 0 | |
| 68 | |
| 69 #define W_DM 3 | |
| 70 #define W_DO 4 | |
| 71 #define W_DS 3 | |
| 72 #elif 0 | |
| 73 #define W_AM 55 | |
| 74 #define W_AO 16 | |
| 75 #define W_AS 5 | |
| 76 | |
| 77 #define W_BM 3 | |
| 78 #define W_BO 32 | |
| 79 #define W_BS 6 | |
| 80 | |
| 81 #define W_CM 127 | |
| 82 #define W_CO 64 | |
| 83 #define W_CS 7 | |
| 84 | |
| 85 #define W_DM 7 | |
| 86 #define W_DO 8 | |
| 87 #define W_DS 4 | |
| 88 #elif 0 | |
| 89 #define W_AM 97 | |
| 90 #define W_AO 32 | |
| 91 #define W_AS 6 | |
| 92 | |
| 93 #define W_BM 63 | |
| 94 #define W_BO 512 | |
| 95 #define W_BS 10 | |
| 96 | |
| 97 #define W_CM 13 | |
| 98 #define W_CO 8 | |
| 99 #define W_CS 4 | |
| 100 | |
| 101 #define W_DM 15 | |
| 102 #define W_DO 16 | |
| 103 #define W_DS 5 | |
| 104 | |
| 105 #else | |
| 106 | |
| 107 #define W_AM 203 | |
| 108 #define W_AO 64 | |
| 109 #define W_AS 7 | |
| 110 | |
| 111 #define W_BM 217 | |
| 112 #define W_BO 2048 | |
| 113 #define W_BS 12 | |
| 114 | |
| 115 #define W_CM 113 | |
| 116 #define W_CO 64 | |
| 117 #define W_CS 7 | |
| 118 | |
| 119 #define W_DM 227 | |
| 120 #define W_DO 128 | |
| 121 #define W_DS 9 | |
| 122 #endif | |
| 123 | |
| 124 extern void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width); | |
| 125 extern void ff_snow_horizontal_compose97i(DWTELEM *b, int width); | |
| 126 extern void ff_snow_inner_add_yblock(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); | |
| 127 | |
|
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3314
diff
changeset
|
128 int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h); |
|
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3314
diff
changeset
|
129 int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h); |
|
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3314
diff
changeset
|
130 |
| 3223 | 131 |
| 132 /* C bits used by mmx/sse2/altivec */ | |
| 133 | |
| 134 static always_inline void snow_interleave_line_header(int * i, int width, DWTELEM * low, DWTELEM * high){ | |
| 135 (*i) = (width) - 2; | |
| 136 | |
| 137 if (width & 1){ | |
| 138 low[(*i)+1] = low[((*i)+1)>>1]; | |
| 139 (*i)--; | |
| 140 } | |
| 141 } | |
| 142 | |
| 143 static always_inline void snow_interleave_line_footer(int * i, DWTELEM * low, DWTELEM * high){ | |
| 144 for (; (*i)>=0; (*i)-=2){ | |
| 145 low[(*i)+1] = high[(*i)>>1]; | |
| 146 low[*i] = low[(*i)>>1]; | |
| 147 } | |
| 148 } | |
| 149 | |
| 150 static always_inline void snow_horizontal_compose_lift_lead_out(int i, DWTELEM * dst, DWTELEM * src, DWTELEM * ref, int width, int w, int lift_high, int mul, int add, int shift){ | |
| 151 for(; i<w; i++){ | |
| 152 dst[i] = src[i] - ((mul * (ref[i] + ref[i + 1]) + add) >> shift); | |
| 153 } | |
| 154 | |
| 155 if((width^lift_high)&1){ | |
| 156 dst[w] = src[w] - ((mul * 2 * ref[w] + add) >> shift); | |
| 157 } | |
| 158 } | |
| 159 | |
| 160 static always_inline void snow_horizontal_compose_liftS_lead_out(int i, DWTELEM * dst, DWTELEM * src, DWTELEM * ref, int width, int w){ | |
| 161 for(; i<w; i++){ | |
| 162 dst[i] = src[i] - (((-(ref[i] + ref[(i+1)])+W_BO) - 4 * src[i]) >> W_BS); | |
| 163 } | |
| 164 | |
| 165 if(width&1){ | |
| 166 dst[w] = src[w] - (((-2 * ref[w] + W_BO) - 4 * src[w]) >> W_BS); | |
| 167 } | |
| 168 } | |
| 169 | |
| 3198 | 170 #endif |
