Mercurial > libavcodec.hg
annotate jrevdct.c @ 8624:b1663f732e67 libavcodec
Fix 10L in r16670 (broke deblocking code)
| author | darkshikari |
|---|---|
| date | Sun, 18 Jan 2009 07:20:12 +0000 |
| parents | f7cbb7733146 |
| children | e9d9d946f213 |
| rev | line source |
|---|---|
| 0 | 1 /* |
| 2 * jrevdct.c | |
| 3 * | |
| 4 * This file is part of the Independent JPEG Group's software. | |
|
3669
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
5 * |
|
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
6 * The authors make NO WARRANTY or representation, either express or implied, |
|
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
7 * with respect to this software, its quality, accuracy, merchantability, or |
|
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
8 * fitness for a particular purpose. This software is provided "AS IS", and |
|
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
9 * you, its user, assume the entire risk as to its quality and accuracy. |
|
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
10 * |
|
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
11 * This software is copyright (C) 1991, 1992, Thomas G. Lane. |
|
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
12 * All Rights Reserved except as specified below. |
|
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
13 * |
|
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
14 * Permission is hereby granted to use, copy, modify, and distribute this |
|
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
15 * software (or portions thereof) for any purpose, without fee, subject to |
|
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
16 * these conditions: |
|
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
17 * (1) If any part of the source code for this software is distributed, then |
|
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
18 * this README file must be included, with this copyright and no-warranty |
|
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
19 * notice unaltered; and any additions, deletions, or changes to the original |
|
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
20 * files must be clearly indicated in accompanying documentation. |
|
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
21 * (2) If only executable code is distributed, then the accompanying |
|
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
22 * documentation must state that "this software is based in part on the work |
|
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
23 * of the Independent JPEG Group". |
|
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
24 * (3) Permission for use of this software is granted only if the user accepts |
|
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
25 * full responsibility for any undesirable consequences; the authors accept |
|
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
26 * NO LIABILITY for damages of any kind. |
|
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
27 * |
|
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
28 * These conditions apply to any software derived from or based on the IJG |
|
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
29 * code, not just to the unmodified library. If you use our work, you ought |
|
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
30 * to acknowledge us. |
|
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
31 * |
|
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
32 * Permission is NOT granted for the use of any IJG author's name or company |
|
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
33 * name in advertising or publicity relating to this software or products |
|
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
34 * derived from it. This software may be referred to only as "the Independent |
|
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
35 * JPEG Group's software". |
|
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
36 * |
|
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
37 * We specifically permit and encourage the use of this software as the basis |
|
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
38 * of commercial products, provided that all warranty or liability claims are |
|
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
39 * assumed by the product vendor. |
| 0 | 40 * |
| 41 * This file contains the basic inverse-DCT transformation subroutine. | |
| 42 * | |
| 43 * This implementation is based on an algorithm described in | |
| 44 * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT | |
| 45 * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics, | |
| 46 * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991. | |
| 47 * The primary algorithm described there uses 11 multiplies and 29 adds. | |
| 48 * We use their alternate method with 12 multiplies and 32 adds. | |
| 49 * The advantage of this method is that no data path contains more than one | |
| 50 * multiplication; this allows a very simple and accurate implementation in | |
| 51 * scaled fixed-point arithmetic, with a minimal number of shifts. | |
| 2967 | 52 * |
| 0 | 53 * I've made lots of modifications to attempt to take advantage of the |
| 54 * sparse nature of the DCT matrices we're getting. Although the logic | |
| 55 * is cumbersome, it's straightforward and the resulting code is much | |
| 56 * faster. | |
| 57 * | |
| 58 * A better way to do this would be to pass in the DCT block as a sparse | |
| 59 * matrix, perhaps with the difference cases encoded. | |
| 60 */ | |
| 2967 | 61 |
| 1106 | 62 /** |
| 63 * @file jrevdct.c | |
| 64 * Independent JPEG Group's LLM idct. | |
| 65 */ | |
| 2967 | 66 |
| 6763 | 67 #include "libavutil/common.h" |
| 0 | 68 #include "dsputil.h" |
| 69 | |
| 70 #define EIGHT_BIT_SAMPLES | |
| 71 | |
| 72 #define DCTSIZE 8 | |
| 73 #define DCTSIZE2 64 | |
| 74 | |
| 75 #define GLOBAL | |
| 76 | |
| 77 #define RIGHT_SHIFT(x, n) ((x) >> (n)) | |
| 78 | |
| 79 typedef DCTELEM DCTBLOCK[DCTSIZE2]; | |
| 80 | |
| 81 #define CONST_BITS 13 | |
| 82 | |
| 83 /* | |
| 84 * This routine is specialized to the case DCTSIZE = 8. | |
| 85 */ | |
| 86 | |
| 87 #if DCTSIZE != 8 | |
| 88 Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ | |
| 89 #endif | |
| 90 | |
| 91 | |
| 92 /* | |
| 93 * A 2-D IDCT can be done by 1-D IDCT on each row followed by 1-D IDCT | |
| 94 * on each column. Direct algorithms are also available, but they are | |
| 95 * much more complex and seem not to be any faster when reduced to code. | |
| 96 * | |
| 97 * The poop on this scaling stuff is as follows: | |
| 98 * | |
| 99 * Each 1-D IDCT step produces outputs which are a factor of sqrt(N) | |
| 100 * larger than the true IDCT outputs. The final outputs are therefore | |
| 101 * a factor of N larger than desired; since N=8 this can be cured by | |
| 102 * a simple right shift at the end of the algorithm. The advantage of | |
| 103 * this arrangement is that we save two multiplications per 1-D IDCT, | |
| 104 * because the y0 and y4 inputs need not be divided by sqrt(N). | |
| 105 * | |
| 106 * We have to do addition and subtraction of the integer inputs, which | |
| 107 * is no problem, and multiplication by fractional constants, which is | |
| 108 * a problem to do in integer arithmetic. We multiply all the constants | |
| 109 * by CONST_SCALE and convert them to integer constants (thus retaining | |
| 110 * CONST_BITS bits of precision in the constants). After doing a | |
| 111 * multiplication we have to divide the product by CONST_SCALE, with proper | |
| 112 * rounding, to produce the correct output. This division can be done | |
| 113 * cheaply as a right shift of CONST_BITS bits. We postpone shifting | |
| 114 * as long as possible so that partial sums can be added together with | |
| 115 * full fractional precision. | |
| 116 * | |
| 117 * The outputs of the first pass are scaled up by PASS1_BITS bits so that | |
| 118 * they are represented to better-than-integral precision. These outputs | |
| 119 * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word | |
| 120 * with the recommended scaling. (To scale up 12-bit sample data further, an | |
| 121 * intermediate int32 array would be needed.) | |
| 122 * | |
| 123 * To avoid overflow of the 32-bit intermediate results in pass 2, we must | |
| 124 * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis | |
| 125 * shows that the values given below are the most effective. | |
| 126 */ | |
| 127 | |
| 128 #ifdef EIGHT_BIT_SAMPLES | |
| 129 #define PASS1_BITS 2 | |
| 130 #else | |
| 2979 | 131 #define PASS1_BITS 1 /* lose a little precision to avoid overflow */ |
| 0 | 132 #endif |
| 133 | |
| 2979 | 134 #define ONE ((int32_t) 1) |
| 0 | 135 |
| 136 #define CONST_SCALE (ONE << CONST_BITS) | |
| 137 | |
| 138 /* Convert a positive real constant to an integer scaled by CONST_SCALE. | |
| 139 * IMPORTANT: if your compiler doesn't do this arithmetic at compile time, | |
| 140 * you will pay a significant penalty in run time. In that case, figure | |
| 141 * the correct integer constant values and insert them by hand. | |
| 142 */ | |
| 143 | |
| 144 /* Actually FIX is no longer used, we precomputed them all */ | |
| 2979 | 145 #define FIX(x) ((int32_t) ((x) * CONST_SCALE + 0.5)) |
| 0 | 146 |
| 1064 | 147 /* Descale and correctly round an int32_t value that's scaled by N bits. |
| 0 | 148 * We assume RIGHT_SHIFT rounds towards minus infinity, so adding |
| 149 * the fudge factor is correct for either sign of X. | |
| 150 */ | |
| 151 | |
| 152 #define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)-1)), n) | |
| 153 | |
| 1064 | 154 /* Multiply an int32_t variable by an int32_t constant to yield an int32_t result. |
| 0 | 155 * For 8-bit samples with the recommended scaling, all the variable |
| 156 * and constant values involved are no more than 16 bits wide, so a | |
| 157 * 16x16->32 bit multiply can be used instead of a full 32x32 multiply; | |
| 158 * this provides a useful speedup on many machines. | |
| 159 * There is no way to specify a 16x16->32 multiply in portable C, but | |
| 160 * some C compilers will do the right thing if you provide the correct | |
| 161 * combination of casts. | |
| 162 * NB: for 12-bit samples, a full 32-bit multiplication will be needed. | |
| 163 */ | |
| 164 | |
| 165 #ifdef EIGHT_BIT_SAMPLES | |
| 2979 | 166 #ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */ |
| 1064 | 167 #define MULTIPLY(var,const) (((int16_t) (var)) * ((int16_t) (const))) |
| 0 | 168 #endif |
| 2979 | 169 #ifdef SHORTxLCONST_32 /* known to work with Microsoft C 6.0 */ |
| 1064 | 170 #define MULTIPLY(var,const) (((int16_t) (var)) * ((int32_t) (const))) |
| 0 | 171 #endif |
| 172 #endif | |
| 173 | |
| 2979 | 174 #ifndef MULTIPLY /* default definition */ |
| 0 | 175 #define MULTIPLY(var,const) ((var) * (const)) |
| 176 #endif | |
| 177 | |
| 178 | |
| 2967 | 179 /* |
| 0 | 180 Unlike our decoder where we approximate the FIXes, we need to use exact |
| 2967 | 181 ones here or successive P-frames will drift too much with Reference frame coding |
| 0 | 182 */ |
| 183 #define FIX_0_211164243 1730 | |
| 184 #define FIX_0_275899380 2260 | |
| 185 #define FIX_0_298631336 2446 | |
| 186 #define FIX_0_390180644 3196 | |
| 187 #define FIX_0_509795579 4176 | |
| 188 #define FIX_0_541196100 4433 | |
| 189 #define FIX_0_601344887 4926 | |
| 190 #define FIX_0_765366865 6270 | |
| 191 #define FIX_0_785694958 6436 | |
| 192 #define FIX_0_899976223 7373 | |
| 193 #define FIX_1_061594337 8697 | |
| 194 #define FIX_1_111140466 9102 | |
| 195 #define FIX_1_175875602 9633 | |
| 196 #define FIX_1_306562965 10703 | |
| 197 #define FIX_1_387039845 11363 | |
| 198 #define FIX_1_451774981 11893 | |
| 199 #define FIX_1_501321110 12299 | |
| 200 #define FIX_1_662939225 13623 | |
| 201 #define FIX_1_847759065 15137 | |
| 202 #define FIX_1_961570560 16069 | |
| 203 #define FIX_2_053119869 16819 | |
| 204 #define FIX_2_172734803 17799 | |
| 205 #define FIX_2_562915447 20995 | |
| 206 #define FIX_3_072711026 25172 | |
| 207 | |
| 208 /* | |
| 209 * Perform the inverse DCT on one block of coefficients. | |
| 210 */ | |
| 211 | |
| 212 void j_rev_dct(DCTBLOCK data) | |
| 213 { | |
| 1064 | 214 int32_t tmp0, tmp1, tmp2, tmp3; |
| 215 int32_t tmp10, tmp11, tmp12, tmp13; | |
| 216 int32_t z1, z2, z3, z4, z5; | |
| 217 int32_t d0, d1, d2, d3, d4, d5, d6, d7; | |
| 0 | 218 register DCTELEM *dataptr; |
| 219 int rowctr; | |
| 2967 | 220 |
| 0 | 221 /* Pass 1: process rows. */ |
| 222 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ | |
| 223 /* furthermore, we scale the results by 2**PASS1_BITS. */ | |
| 224 | |
| 225 dataptr = data; | |
| 226 | |
| 227 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { | |
| 228 /* Due to quantization, we will usually find that many of the input | |
| 229 * coefficients are zero, especially the AC terms. We can exploit this | |
| 230 * by short-circuiting the IDCT calculation for any row in which all | |
| 231 * the AC terms are zero. In that case each output is equal to the | |
| 232 * DC coefficient (with scale factor as needed). | |
| 233 * With typical images and quantization tables, half or more of the | |
| 234 * row DCT calculations can be simplified this way. | |
| 235 */ | |
| 236 | |
| 237 register int *idataptr = (int*)dataptr; | |
| 238 | |
|
36
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
239 /* WARNING: we do the same permutation as MMX idct to simplify the |
|
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
240 video core */ |
| 0 | 241 d0 = dataptr[0]; |
|
36
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
242 d2 = dataptr[1]; |
|
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
243 d4 = dataptr[2]; |
|
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
244 d6 = dataptr[3]; |
|
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
245 d1 = dataptr[4]; |
|
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
246 d3 = dataptr[5]; |
|
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
247 d5 = dataptr[6]; |
| 0 | 248 d7 = dataptr[7]; |
| 249 | |
|
36
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
250 if ((d1 | d2 | d3 | d4 | d5 | d6 | d7) == 0) { |
| 0 | 251 /* AC terms all zero */ |
| 252 if (d0) { | |
| 2979 | 253 /* Compute a 32 bit value to assign. */ |
| 254 DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS); | |
| 255 register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000); | |
| 2967 | 256 |
| 2979 | 257 idataptr[0] = v; |
| 258 idataptr[1] = v; | |
| 259 idataptr[2] = v; | |
| 260 idataptr[3] = v; | |
| 0 | 261 } |
| 2967 | 262 |
| 2979 | 263 dataptr += DCTSIZE; /* advance pointer to next row */ |
| 0 | 264 continue; |
| 265 } | |
| 266 | |
| 267 /* Even part: reverse the even part of the forward DCT. */ | |
| 268 /* The rotator is sqrt(2)*c(-6). */ | |
| 269 { | |
| 270 if (d6) { | |
| 2979 | 271 if (d2) { |
| 272 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ | |
| 273 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | |
| 274 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | |
| 275 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | |
| 0 | 276 |
| 2979 | 277 tmp0 = (d0 + d4) << CONST_BITS; |
| 278 tmp1 = (d0 - d4) << CONST_BITS; | |
| 0 | 279 |
| 2979 | 280 tmp10 = tmp0 + tmp3; |
| 281 tmp13 = tmp0 - tmp3; | |
| 282 tmp11 = tmp1 + tmp2; | |
| 283 tmp12 = tmp1 - tmp2; | |
| 284 } else { | |
| 285 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ | |
| 286 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | |
| 287 tmp3 = MULTIPLY(d6, FIX_0_541196100); | |
| 0 | 288 |
| 2979 | 289 tmp0 = (d0 + d4) << CONST_BITS; |
| 290 tmp1 = (d0 - d4) << CONST_BITS; | |
| 0 | 291 |
| 2979 | 292 tmp10 = tmp0 + tmp3; |
| 293 tmp13 = tmp0 - tmp3; | |
| 294 tmp11 = tmp1 + tmp2; | |
| 295 tmp12 = tmp1 - tmp2; | |
| 296 } | |
| 2263 | 297 } else { |
| 2979 | 298 if (d2) { |
| 299 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ | |
| 300 tmp2 = MULTIPLY(d2, FIX_0_541196100); | |
| 301 tmp3 = MULTIPLY(d2, FIX_1_306562965); | |
| 0 | 302 |
| 2979 | 303 tmp0 = (d0 + d4) << CONST_BITS; |
| 304 tmp1 = (d0 - d4) << CONST_BITS; | |
| 0 | 305 |
| 2979 | 306 tmp10 = tmp0 + tmp3; |
| 307 tmp13 = tmp0 - tmp3; | |
| 308 tmp11 = tmp1 + tmp2; | |
| 309 tmp12 = tmp1 - tmp2; | |
| 310 } else { | |
| 311 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ | |
| 312 tmp10 = tmp13 = (d0 + d4) << CONST_BITS; | |
| 313 tmp11 = tmp12 = (d0 - d4) << CONST_BITS; | |
| 314 } | |
| 0 | 315 } |
| 316 | |
| 317 /* Odd part per figure 8; the matrix is unitary and hence its | |
| 318 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. | |
| 319 */ | |
| 320 | |
| 321 if (d7) { | |
| 2979 | 322 if (d5) { |
| 323 if (d3) { | |
| 324 if (d1) { | |
| 325 /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */ | |
| 326 z1 = d7 + d1; | |
| 327 z2 = d5 + d3; | |
| 328 z3 = d7 + d3; | |
| 329 z4 = d5 + d1; | |
| 330 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); | |
| 2967 | 331 |
| 2979 | 332 tmp0 = MULTIPLY(d7, FIX_0_298631336); |
| 333 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
| 334 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
| 335 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
| 336 z1 = MULTIPLY(-z1, FIX_0_899976223); | |
| 337 z2 = MULTIPLY(-z2, FIX_2_562915447); | |
| 338 z3 = MULTIPLY(-z3, FIX_1_961570560); | |
| 339 z4 = MULTIPLY(-z4, FIX_0_390180644); | |
| 2967 | 340 |
| 2979 | 341 z3 += z5; |
| 342 z4 += z5; | |
| 2967 | 343 |
| 2979 | 344 tmp0 += z1 + z3; |
| 345 tmp1 += z2 + z4; | |
| 346 tmp2 += z2 + z3; | |
| 347 tmp3 += z1 + z4; | |
| 348 } else { | |
| 349 /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */ | |
| 350 z2 = d5 + d3; | |
| 351 z3 = d7 + d3; | |
| 352 z5 = MULTIPLY(z3 + d5, FIX_1_175875602); | |
| 2967 | 353 |
| 2979 | 354 tmp0 = MULTIPLY(d7, FIX_0_298631336); |
| 355 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
| 356 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
| 357 z1 = MULTIPLY(-d7, FIX_0_899976223); | |
| 358 z2 = MULTIPLY(-z2, FIX_2_562915447); | |
| 359 z3 = MULTIPLY(-z3, FIX_1_961570560); | |
| 360 z4 = MULTIPLY(-d5, FIX_0_390180644); | |
| 2967 | 361 |
| 2979 | 362 z3 += z5; |
| 363 z4 += z5; | |
| 2967 | 364 |
| 2979 | 365 tmp0 += z1 + z3; |
| 366 tmp1 += z2 + z4; | |
| 367 tmp2 += z2 + z3; | |
| 368 tmp3 = z1 + z4; | |
| 369 } | |
| 370 } else { | |
| 371 if (d1) { | |
| 372 /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */ | |
| 373 z1 = d7 + d1; | |
| 374 z4 = d5 + d1; | |
| 375 z5 = MULTIPLY(d7 + z4, FIX_1_175875602); | |
| 2967 | 376 |
| 2979 | 377 tmp0 = MULTIPLY(d7, FIX_0_298631336); |
| 378 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
| 379 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
| 380 z1 = MULTIPLY(-z1, FIX_0_899976223); | |
| 381 z2 = MULTIPLY(-d5, FIX_2_562915447); | |
| 382 z3 = MULTIPLY(-d7, FIX_1_961570560); | |
| 383 z4 = MULTIPLY(-z4, FIX_0_390180644); | |
| 2967 | 384 |
| 2979 | 385 z3 += z5; |
| 386 z4 += z5; | |
| 2967 | 387 |
| 2979 | 388 tmp0 += z1 + z3; |
| 389 tmp1 += z2 + z4; | |
| 390 tmp2 = z2 + z3; | |
| 391 tmp3 += z1 + z4; | |
| 392 } else { | |
| 393 /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */ | |
| 394 tmp0 = MULTIPLY(-d7, FIX_0_601344887); | |
| 395 z1 = MULTIPLY(-d7, FIX_0_899976223); | |
| 396 z3 = MULTIPLY(-d7, FIX_1_961570560); | |
| 397 tmp1 = MULTIPLY(-d5, FIX_0_509795579); | |
| 398 z2 = MULTIPLY(-d5, FIX_2_562915447); | |
| 399 z4 = MULTIPLY(-d5, FIX_0_390180644); | |
| 400 z5 = MULTIPLY(d5 + d7, FIX_1_175875602); | |
| 2967 | 401 |
| 2979 | 402 z3 += z5; |
| 403 z4 += z5; | |
| 2967 | 404 |
| 2979 | 405 tmp0 += z3; |
| 406 tmp1 += z4; | |
| 407 tmp2 = z2 + z3; | |
| 408 tmp3 = z1 + z4; | |
| 409 } | |
| 410 } | |
| 411 } else { | |
| 412 if (d3) { | |
| 413 if (d1) { | |
| 414 /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */ | |
| 415 z1 = d7 + d1; | |
| 416 z3 = d7 + d3; | |
| 417 z5 = MULTIPLY(z3 + d1, FIX_1_175875602); | |
| 2967 | 418 |
| 2979 | 419 tmp0 = MULTIPLY(d7, FIX_0_298631336); |
| 420 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
| 421 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
| 422 z1 = MULTIPLY(-z1, FIX_0_899976223); | |
| 423 z2 = MULTIPLY(-d3, FIX_2_562915447); | |
| 424 z3 = MULTIPLY(-z3, FIX_1_961570560); | |
| 425 z4 = MULTIPLY(-d1, FIX_0_390180644); | |
| 2967 | 426 |
| 2979 | 427 z3 += z5; |
| 428 z4 += z5; | |
| 2967 | 429 |
| 2979 | 430 tmp0 += z1 + z3; |
| 431 tmp1 = z2 + z4; | |
| 432 tmp2 += z2 + z3; | |
| 433 tmp3 += z1 + z4; | |
| 434 } else { | |
| 435 /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */ | |
| 436 z3 = d7 + d3; | |
| 2967 | 437 |
| 2979 | 438 tmp0 = MULTIPLY(-d7, FIX_0_601344887); |
| 439 z1 = MULTIPLY(-d7, FIX_0_899976223); | |
| 440 tmp2 = MULTIPLY(d3, FIX_0_509795579); | |
| 441 z2 = MULTIPLY(-d3, FIX_2_562915447); | |
| 442 z5 = MULTIPLY(z3, FIX_1_175875602); | |
| 443 z3 = MULTIPLY(-z3, FIX_0_785694958); | |
| 2967 | 444 |
| 2979 | 445 tmp0 += z3; |
| 446 tmp1 = z2 + z5; | |
| 447 tmp2 += z3; | |
| 448 tmp3 = z1 + z5; | |
| 449 } | |
| 450 } else { | |
| 451 if (d1) { | |
| 452 /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */ | |
| 453 z1 = d7 + d1; | |
| 454 z5 = MULTIPLY(z1, FIX_1_175875602); | |
| 0 | 455 |
| 2979 | 456 z1 = MULTIPLY(z1, FIX_0_275899380); |
| 457 z3 = MULTIPLY(-d7, FIX_1_961570560); | |
| 458 tmp0 = MULTIPLY(-d7, FIX_1_662939225); | |
| 459 z4 = MULTIPLY(-d1, FIX_0_390180644); | |
| 460 tmp3 = MULTIPLY(d1, FIX_1_111140466); | |
| 0 | 461 |
| 2979 | 462 tmp0 += z1; |
| 463 tmp1 = z4 + z5; | |
| 464 tmp2 = z3 + z5; | |
| 465 tmp3 += z1; | |
| 466 } else { | |
| 467 /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */ | |
| 468 tmp0 = MULTIPLY(-d7, FIX_1_387039845); | |
| 469 tmp1 = MULTIPLY(d7, FIX_1_175875602); | |
| 470 tmp2 = MULTIPLY(-d7, FIX_0_785694958); | |
| 471 tmp3 = MULTIPLY(d7, FIX_0_275899380); | |
| 472 } | |
| 473 } | |
| 474 } | |
| 0 | 475 } else { |
| 2979 | 476 if (d5) { |
| 477 if (d3) { | |
| 478 if (d1) { | |
| 479 /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */ | |
| 480 z2 = d5 + d3; | |
| 481 z4 = d5 + d1; | |
| 482 z5 = MULTIPLY(d3 + z4, FIX_1_175875602); | |
| 2967 | 483 |
| 2979 | 484 tmp1 = MULTIPLY(d5, FIX_2_053119869); |
| 485 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
| 486 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
| 487 z1 = MULTIPLY(-d1, FIX_0_899976223); | |
| 488 z2 = MULTIPLY(-z2, FIX_2_562915447); | |
| 489 z3 = MULTIPLY(-d3, FIX_1_961570560); | |
| 490 z4 = MULTIPLY(-z4, FIX_0_390180644); | |
| 2967 | 491 |
| 2979 | 492 z3 += z5; |
| 493 z4 += z5; | |
| 2967 | 494 |
| 2979 | 495 tmp0 = z1 + z3; |
| 496 tmp1 += z2 + z4; | |
| 497 tmp2 += z2 + z3; | |
| 498 tmp3 += z1 + z4; | |
| 499 } else { | |
| 500 /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */ | |
| 501 z2 = d5 + d3; | |
| 2967 | 502 |
| 2979 | 503 z5 = MULTIPLY(z2, FIX_1_175875602); |
| 504 tmp1 = MULTIPLY(d5, FIX_1_662939225); | |
| 505 z4 = MULTIPLY(-d5, FIX_0_390180644); | |
| 506 z2 = MULTIPLY(-z2, FIX_1_387039845); | |
| 507 tmp2 = MULTIPLY(d3, FIX_1_111140466); | |
| 508 z3 = MULTIPLY(-d3, FIX_1_961570560); | |
| 2967 | 509 |
| 2979 | 510 tmp0 = z3 + z5; |
| 511 tmp1 += z2; | |
| 512 tmp2 += z2; | |
| 513 tmp3 = z4 + z5; | |
| 514 } | |
| 515 } else { | |
| 516 if (d1) { | |
| 517 /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */ | |
| 518 z4 = d5 + d1; | |
| 2967 | 519 |
| 2979 | 520 z5 = MULTIPLY(z4, FIX_1_175875602); |
| 521 z1 = MULTIPLY(-d1, FIX_0_899976223); | |
| 522 tmp3 = MULTIPLY(d1, FIX_0_601344887); | |
| 523 tmp1 = MULTIPLY(-d5, FIX_0_509795579); | |
| 524 z2 = MULTIPLY(-d5, FIX_2_562915447); | |
| 525 z4 = MULTIPLY(z4, FIX_0_785694958); | |
| 2967 | 526 |
| 2979 | 527 tmp0 = z1 + z5; |
| 528 tmp1 += z4; | |
| 529 tmp2 = z2 + z5; | |
| 530 tmp3 += z4; | |
| 531 } else { | |
| 532 /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */ | |
| 533 tmp0 = MULTIPLY(d5, FIX_1_175875602); | |
| 534 tmp1 = MULTIPLY(d5, FIX_0_275899380); | |
| 535 tmp2 = MULTIPLY(-d5, FIX_1_387039845); | |
| 536 tmp3 = MULTIPLY(d5, FIX_0_785694958); | |
| 537 } | |
| 538 } | |
| 539 } else { | |
| 540 if (d3) { | |
| 541 if (d1) { | |
| 542 /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */ | |
| 543 z5 = d1 + d3; | |
| 544 tmp3 = MULTIPLY(d1, FIX_0_211164243); | |
| 545 tmp2 = MULTIPLY(-d3, FIX_1_451774981); | |
| 546 z1 = MULTIPLY(d1, FIX_1_061594337); | |
| 547 z2 = MULTIPLY(-d3, FIX_2_172734803); | |
| 548 z4 = MULTIPLY(z5, FIX_0_785694958); | |
| 549 z5 = MULTIPLY(z5, FIX_1_175875602); | |
| 2967 | 550 |
| 2979 | 551 tmp0 = z1 - z4; |
| 552 tmp1 = z2 + z4; | |
| 553 tmp2 += z5; | |
| 554 tmp3 += z5; | |
| 555 } else { | |
| 556 /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */ | |
| 557 tmp0 = MULTIPLY(-d3, FIX_0_785694958); | |
| 558 tmp1 = MULTIPLY(-d3, FIX_1_387039845); | |
| 559 tmp2 = MULTIPLY(-d3, FIX_0_275899380); | |
| 560 tmp3 = MULTIPLY(d3, FIX_1_175875602); | |
| 561 } | |
| 562 } else { | |
| 563 if (d1) { | |
| 564 /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */ | |
| 565 tmp0 = MULTIPLY(d1, FIX_0_275899380); | |
| 566 tmp1 = MULTIPLY(d1, FIX_0_785694958); | |
| 567 tmp2 = MULTIPLY(d1, FIX_1_175875602); | |
| 568 tmp3 = MULTIPLY(d1, FIX_1_387039845); | |
| 569 } else { | |
| 570 /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */ | |
| 571 tmp0 = tmp1 = tmp2 = tmp3 = 0; | |
| 572 } | |
| 573 } | |
| 574 } | |
| 0 | 575 } |
| 576 } | |
| 577 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ | |
| 578 | |
| 579 dataptr[0] = (DCTELEM) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS); | |
| 580 dataptr[7] = (DCTELEM) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS); | |
| 581 dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS); | |
| 582 dataptr[6] = (DCTELEM) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS); | |
| 583 dataptr[2] = (DCTELEM) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS); | |
| 584 dataptr[5] = (DCTELEM) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS); | |
| 585 dataptr[3] = (DCTELEM) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS); | |
| 586 dataptr[4] = (DCTELEM) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS); | |
| 587 | |
| 2979 | 588 dataptr += DCTSIZE; /* advance pointer to next row */ |
| 0 | 589 } |
| 590 | |
| 591 /* Pass 2: process columns. */ | |
| 592 /* Note that we must descale the results by a factor of 8 == 2**3, */ | |
| 593 /* and also undo the PASS1_BITS scaling. */ | |
| 594 | |
| 595 dataptr = data; | |
| 596 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { | |
| 597 /* Columns of zeroes can be exploited in the same way as we did with rows. | |
| 598 * However, the row calculation has created many nonzero AC terms, so the | |
| 599 * simplification applies less often (typically 5% to 10% of the time). | |
| 600 * On machines with very fast multiplication, it's possible that the | |
| 601 * test takes more time than it's worth. In that case this section | |
| 602 * may be commented out. | |
| 603 */ | |
| 604 | |
| 605 d0 = dataptr[DCTSIZE*0]; | |
| 606 d1 = dataptr[DCTSIZE*1]; | |
| 607 d2 = dataptr[DCTSIZE*2]; | |
| 608 d3 = dataptr[DCTSIZE*3]; | |
| 609 d4 = dataptr[DCTSIZE*4]; | |
| 610 d5 = dataptr[DCTSIZE*5]; | |
| 611 d6 = dataptr[DCTSIZE*6]; | |
| 612 d7 = dataptr[DCTSIZE*7]; | |
| 613 | |
| 614 /* Even part: reverse the even part of the forward DCT. */ | |
| 615 /* The rotator is sqrt(2)*c(-6). */ | |
| 616 if (d6) { | |
| 2979 | 617 if (d2) { |
| 618 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ | |
| 619 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | |
| 620 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | |
| 621 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | |
| 0 | 622 |
| 2979 | 623 tmp0 = (d0 + d4) << CONST_BITS; |
| 624 tmp1 = (d0 - d4) << CONST_BITS; | |
| 0 | 625 |
| 2979 | 626 tmp10 = tmp0 + tmp3; |
| 627 tmp13 = tmp0 - tmp3; | |
| 628 tmp11 = tmp1 + tmp2; | |
| 629 tmp12 = tmp1 - tmp2; | |
| 630 } else { | |
| 631 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ | |
| 632 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | |
| 633 tmp3 = MULTIPLY(d6, FIX_0_541196100); | |
| 0 | 634 |
| 2979 | 635 tmp0 = (d0 + d4) << CONST_BITS; |
| 636 tmp1 = (d0 - d4) << CONST_BITS; | |
| 0 | 637 |
| 2979 | 638 tmp10 = tmp0 + tmp3; |
| 639 tmp13 = tmp0 - tmp3; | |
| 640 tmp11 = tmp1 + tmp2; | |
| 641 tmp12 = tmp1 - tmp2; | |
| 642 } | |
| 2263 | 643 } else { |
| 2979 | 644 if (d2) { |
| 645 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ | |
| 646 tmp2 = MULTIPLY(d2, FIX_0_541196100); | |
| 647 tmp3 = MULTIPLY(d2, FIX_1_306562965); | |
| 0 | 648 |
| 2979 | 649 tmp0 = (d0 + d4) << CONST_BITS; |
| 650 tmp1 = (d0 - d4) << CONST_BITS; | |
| 0 | 651 |
| 2979 | 652 tmp10 = tmp0 + tmp3; |
| 653 tmp13 = tmp0 - tmp3; | |
| 654 tmp11 = tmp1 + tmp2; | |
| 655 tmp12 = tmp1 - tmp2; | |
| 656 } else { | |
| 657 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ | |
| 658 tmp10 = tmp13 = (d0 + d4) << CONST_BITS; | |
| 659 tmp11 = tmp12 = (d0 - d4) << CONST_BITS; | |
| 660 } | |
| 0 | 661 } |
| 662 | |
| 663 /* Odd part per figure 8; the matrix is unitary and hence its | |
| 664 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. | |
| 665 */ | |
| 666 if (d7) { | |
| 2979 | 667 if (d5) { |
| 668 if (d3) { | |
| 669 if (d1) { | |
| 670 /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */ | |
| 671 z1 = d7 + d1; | |
| 672 z2 = d5 + d3; | |
| 673 z3 = d7 + d3; | |
| 674 z4 = d5 + d1; | |
| 675 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); | |
| 2967 | 676 |
| 2979 | 677 tmp0 = MULTIPLY(d7, FIX_0_298631336); |
| 678 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
| 679 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
| 680 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
| 681 z1 = MULTIPLY(-z1, FIX_0_899976223); | |
| 682 z2 = MULTIPLY(-z2, FIX_2_562915447); | |
| 683 z3 = MULTIPLY(-z3, FIX_1_961570560); | |
| 684 z4 = MULTIPLY(-z4, FIX_0_390180644); | |
| 2967 | 685 |
| 2979 | 686 z3 += z5; |
| 687 z4 += z5; | |
| 2967 | 688 |
| 2979 | 689 tmp0 += z1 + z3; |
| 690 tmp1 += z2 + z4; | |
| 691 tmp2 += z2 + z3; | |
| 692 tmp3 += z1 + z4; | |
| 693 } else { | |
| 694 /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */ | |
| 695 z1 = d7; | |
| 696 z2 = d5 + d3; | |
| 697 z3 = d7 + d3; | |
| 698 z5 = MULTIPLY(z3 + d5, FIX_1_175875602); | |
| 2967 | 699 |
| 2979 | 700 tmp0 = MULTIPLY(d7, FIX_0_298631336); |
| 701 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
| 702 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
| 703 z1 = MULTIPLY(-d7, FIX_0_899976223); | |
| 704 z2 = MULTIPLY(-z2, FIX_2_562915447); | |
| 705 z3 = MULTIPLY(-z3, FIX_1_961570560); | |
| 706 z4 = MULTIPLY(-d5, FIX_0_390180644); | |
| 2967 | 707 |
| 2979 | 708 z3 += z5; |
| 709 z4 += z5; | |
| 2967 | 710 |
| 2979 | 711 tmp0 += z1 + z3; |
| 712 tmp1 += z2 + z4; | |
| 713 tmp2 += z2 + z3; | |
| 714 tmp3 = z1 + z4; | |
| 715 } | |
| 716 } else { | |
| 717 if (d1) { | |
| 718 /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */ | |
| 719 z1 = d7 + d1; | |
| 720 z2 = d5; | |
| 721 z3 = d7; | |
| 722 z4 = d5 + d1; | |
| 723 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); | |
| 2967 | 724 |
| 2979 | 725 tmp0 = MULTIPLY(d7, FIX_0_298631336); |
| 726 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
| 727 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
| 728 z1 = MULTIPLY(-z1, FIX_0_899976223); | |
| 729 z2 = MULTIPLY(-d5, FIX_2_562915447); | |
| 730 z3 = MULTIPLY(-d7, FIX_1_961570560); | |
| 731 z4 = MULTIPLY(-z4, FIX_0_390180644); | |
| 2967 | 732 |
| 2979 | 733 z3 += z5; |
| 734 z4 += z5; | |
| 2967 | 735 |
| 2979 | 736 tmp0 += z1 + z3; |
| 737 tmp1 += z2 + z4; | |
| 738 tmp2 = z2 + z3; | |
| 739 tmp3 += z1 + z4; | |
| 740 } else { | |
| 741 /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */ | |
| 742 tmp0 = MULTIPLY(-d7, FIX_0_601344887); | |
| 743 z1 = MULTIPLY(-d7, FIX_0_899976223); | |
| 744 z3 = MULTIPLY(-d7, FIX_1_961570560); | |
| 745 tmp1 = MULTIPLY(-d5, FIX_0_509795579); | |
| 746 z2 = MULTIPLY(-d5, FIX_2_562915447); | |
| 747 z4 = MULTIPLY(-d5, FIX_0_390180644); | |
| 748 z5 = MULTIPLY(d5 + d7, FIX_1_175875602); | |
| 2967 | 749 |
| 2979 | 750 z3 += z5; |
| 751 z4 += z5; | |
| 2967 | 752 |
| 2979 | 753 tmp0 += z3; |
| 754 tmp1 += z4; | |
| 755 tmp2 = z2 + z3; | |
| 756 tmp3 = z1 + z4; | |
| 757 } | |
| 758 } | |
| 759 } else { | |
| 760 if (d3) { | |
| 761 if (d1) { | |
| 762 /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */ | |
| 763 z1 = d7 + d1; | |
| 764 z3 = d7 + d3; | |
| 765 z5 = MULTIPLY(z3 + d1, FIX_1_175875602); | |
| 2967 | 766 |
| 2979 | 767 tmp0 = MULTIPLY(d7, FIX_0_298631336); |
| 768 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
| 769 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
| 770 z1 = MULTIPLY(-z1, FIX_0_899976223); | |
| 771 z2 = MULTIPLY(-d3, FIX_2_562915447); | |
| 772 z3 = MULTIPLY(-z3, FIX_1_961570560); | |
| 773 z4 = MULTIPLY(-d1, FIX_0_390180644); | |
| 2967 | 774 |
| 2979 | 775 z3 += z5; |
| 776 z4 += z5; | |
| 2967 | 777 |
| 2979 | 778 tmp0 += z1 + z3; |
| 779 tmp1 = z2 + z4; | |
| 780 tmp2 += z2 + z3; | |
| 781 tmp3 += z1 + z4; | |
| 782 } else { | |
| 783 /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */ | |
| 784 z3 = d7 + d3; | |
| 2967 | 785 |
| 2979 | 786 tmp0 = MULTIPLY(-d7, FIX_0_601344887); |
| 787 z1 = MULTIPLY(-d7, FIX_0_899976223); | |
| 788 tmp2 = MULTIPLY(d3, FIX_0_509795579); | |
| 789 z2 = MULTIPLY(-d3, FIX_2_562915447); | |
| 790 z5 = MULTIPLY(z3, FIX_1_175875602); | |
| 791 z3 = MULTIPLY(-z3, FIX_0_785694958); | |
| 2967 | 792 |
| 2979 | 793 tmp0 += z3; |
| 794 tmp1 = z2 + z5; | |
| 795 tmp2 += z3; | |
| 796 tmp3 = z1 + z5; | |
| 797 } | |
| 798 } else { | |
| 799 if (d1) { | |
| 800 /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */ | |
| 801 z1 = d7 + d1; | |
| 802 z5 = MULTIPLY(z1, FIX_1_175875602); | |
| 0 | 803 |
| 2979 | 804 z1 = MULTIPLY(z1, FIX_0_275899380); |
| 805 z3 = MULTIPLY(-d7, FIX_1_961570560); | |
| 806 tmp0 = MULTIPLY(-d7, FIX_1_662939225); | |
| 807 z4 = MULTIPLY(-d1, FIX_0_390180644); | |
| 808 tmp3 = MULTIPLY(d1, FIX_1_111140466); | |
| 0 | 809 |
| 2979 | 810 tmp0 += z1; |
| 811 tmp1 = z4 + z5; | |
| 812 tmp2 = z3 + z5; | |
| 813 tmp3 += z1; | |
| 814 } else { | |
| 815 /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */ | |
| 816 tmp0 = MULTIPLY(-d7, FIX_1_387039845); | |
| 817 tmp1 = MULTIPLY(d7, FIX_1_175875602); | |
| 818 tmp2 = MULTIPLY(-d7, FIX_0_785694958); | |
| 819 tmp3 = MULTIPLY(d7, FIX_0_275899380); | |
| 820 } | |
| 821 } | |
| 822 } | |
| 0 | 823 } else { |
| 2979 | 824 if (d5) { |
| 825 if (d3) { | |
| 826 if (d1) { | |
| 827 /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */ | |
| 828 z2 = d5 + d3; | |
| 829 z4 = d5 + d1; | |
| 830 z5 = MULTIPLY(d3 + z4, FIX_1_175875602); | |
| 2967 | 831 |
| 2979 | 832 tmp1 = MULTIPLY(d5, FIX_2_053119869); |
| 833 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
| 834 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
| 835 z1 = MULTIPLY(-d1, FIX_0_899976223); | |
| 836 z2 = MULTIPLY(-z2, FIX_2_562915447); | |
| 837 z3 = MULTIPLY(-d3, FIX_1_961570560); | |
| 838 z4 = MULTIPLY(-z4, FIX_0_390180644); | |
| 2967 | 839 |
| 2979 | 840 z3 += z5; |
| 841 z4 += z5; | |
| 2967 | 842 |
| 2979 | 843 tmp0 = z1 + z3; |
| 844 tmp1 += z2 + z4; | |
| 845 tmp2 += z2 + z3; | |
| 846 tmp3 += z1 + z4; | |
| 847 } else { | |
| 848 /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */ | |
| 849 z2 = d5 + d3; | |
| 2967 | 850 |
| 2979 | 851 z5 = MULTIPLY(z2, FIX_1_175875602); |
| 852 tmp1 = MULTIPLY(d5, FIX_1_662939225); | |
| 853 z4 = MULTIPLY(-d5, FIX_0_390180644); | |
| 854 z2 = MULTIPLY(-z2, FIX_1_387039845); | |
| 855 tmp2 = MULTIPLY(d3, FIX_1_111140466); | |
| 856 z3 = MULTIPLY(-d3, FIX_1_961570560); | |
| 2967 | 857 |
| 2979 | 858 tmp0 = z3 + z5; |
| 859 tmp1 += z2; | |
| 860 tmp2 += z2; | |
| 861 tmp3 = z4 + z5; | |
| 862 } | |
| 863 } else { | |
| 864 if (d1) { | |
| 865 /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */ | |
| 866 z4 = d5 + d1; | |
| 2967 | 867 |
| 2979 | 868 z5 = MULTIPLY(z4, FIX_1_175875602); |
| 869 z1 = MULTIPLY(-d1, FIX_0_899976223); | |
| 870 tmp3 = MULTIPLY(d1, FIX_0_601344887); | |
| 871 tmp1 = MULTIPLY(-d5, FIX_0_509795579); | |
| 872 z2 = MULTIPLY(-d5, FIX_2_562915447); | |
| 873 z4 = MULTIPLY(z4, FIX_0_785694958); | |
| 2967 | 874 |
| 2979 | 875 tmp0 = z1 + z5; |
| 876 tmp1 += z4; | |
| 877 tmp2 = z2 + z5; | |
| 878 tmp3 += z4; | |
| 879 } else { | |
| 880 /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */ | |
| 881 tmp0 = MULTIPLY(d5, FIX_1_175875602); | |
| 882 tmp1 = MULTIPLY(d5, FIX_0_275899380); | |
| 883 tmp2 = MULTIPLY(-d5, FIX_1_387039845); | |
| 884 tmp3 = MULTIPLY(d5, FIX_0_785694958); | |
| 885 } | |
| 886 } | |
| 887 } else { | |
| 888 if (d3) { | |
| 889 if (d1) { | |
| 890 /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */ | |
| 891 z5 = d1 + d3; | |
| 892 tmp3 = MULTIPLY(d1, FIX_0_211164243); | |
| 893 tmp2 = MULTIPLY(-d3, FIX_1_451774981); | |
| 894 z1 = MULTIPLY(d1, FIX_1_061594337); | |
| 895 z2 = MULTIPLY(-d3, FIX_2_172734803); | |
| 896 z4 = MULTIPLY(z5, FIX_0_785694958); | |
| 897 z5 = MULTIPLY(z5, FIX_1_175875602); | |
| 2967 | 898 |
| 2979 | 899 tmp0 = z1 - z4; |
| 900 tmp1 = z2 + z4; | |
| 901 tmp2 += z5; | |
| 902 tmp3 += z5; | |
| 903 } else { | |
| 904 /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */ | |
| 905 tmp0 = MULTIPLY(-d3, FIX_0_785694958); | |
| 906 tmp1 = MULTIPLY(-d3, FIX_1_387039845); | |
| 907 tmp2 = MULTIPLY(-d3, FIX_0_275899380); | |
| 908 tmp3 = MULTIPLY(d3, FIX_1_175875602); | |
| 909 } | |
| 910 } else { | |
| 911 if (d1) { | |
| 912 /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */ | |
| 913 tmp0 = MULTIPLY(d1, FIX_0_275899380); | |
| 914 tmp1 = MULTIPLY(d1, FIX_0_785694958); | |
| 915 tmp2 = MULTIPLY(d1, FIX_1_175875602); | |
| 916 tmp3 = MULTIPLY(d1, FIX_1_387039845); | |
| 917 } else { | |
| 918 /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */ | |
| 919 tmp0 = tmp1 = tmp2 = tmp3 = 0; | |
| 920 } | |
| 921 } | |
| 922 } | |
| 0 | 923 } |
| 924 | |
| 925 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ | |
| 926 | |
| 927 dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp3, | |
| 2979 | 928 CONST_BITS+PASS1_BITS+3); |
| 0 | 929 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp10 - tmp3, |
| 2979 | 930 CONST_BITS+PASS1_BITS+3); |
| 0 | 931 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp11 + tmp2, |
| 2979 | 932 CONST_BITS+PASS1_BITS+3); |
| 0 | 933 dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(tmp11 - tmp2, |
| 2979 | 934 CONST_BITS+PASS1_BITS+3); |
| 0 | 935 dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp12 + tmp1, |
| 2979 | 936 CONST_BITS+PASS1_BITS+3); |
| 0 | 937 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12 - tmp1, |
| 2979 | 938 CONST_BITS+PASS1_BITS+3); |
| 0 | 939 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp13 + tmp0, |
| 2979 | 940 CONST_BITS+PASS1_BITS+3); |
| 0 | 941 dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp13 - tmp0, |
| 2979 | 942 CONST_BITS+PASS1_BITS+3); |
| 2967 | 943 |
| 2979 | 944 dataptr++; /* advance pointer to next column */ |
| 0 | 945 } |
| 946 } | |
| 947 | |
| 2256 | 948 #undef DCTSIZE |
| 949 #define DCTSIZE 4 | |
| 950 #define DCTSTRIDE 8 | |
| 951 | |
| 952 void j_rev_dct4(DCTBLOCK data) | |
| 953 { | |
| 954 int32_t tmp0, tmp1, tmp2, tmp3; | |
| 955 int32_t tmp10, tmp11, tmp12, tmp13; | |
| 956 int32_t z1; | |
| 957 int32_t d0, d2, d4, d6; | |
| 958 register DCTELEM *dataptr; | |
| 959 int rowctr; | |
| 2262 | 960 |
| 2256 | 961 /* Pass 1: process rows. */ |
| 962 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ | |
| 963 /* furthermore, we scale the results by 2**PASS1_BITS. */ | |
| 964 | |
| 2262 | 965 data[0] += 4; |
| 2967 | 966 |
| 2256 | 967 dataptr = data; |
| 968 | |
| 969 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { | |
| 970 /* Due to quantization, we will usually find that many of the input | |
| 971 * coefficients are zero, especially the AC terms. We can exploit this | |
| 972 * by short-circuiting the IDCT calculation for any row in which all | |
| 973 * the AC terms are zero. In that case each output is equal to the | |
| 974 * DC coefficient (with scale factor as needed). | |
| 975 * With typical images and quantization tables, half or more of the | |
| 976 * row DCT calculations can be simplified this way. | |
| 977 */ | |
| 978 | |
| 979 register int *idataptr = (int*)dataptr; | |
| 980 | |
| 981 d0 = dataptr[0]; | |
| 982 d2 = dataptr[1]; | |
| 983 d4 = dataptr[2]; | |
| 984 d6 = dataptr[3]; | |
| 985 | |
| 986 if ((d2 | d4 | d6) == 0) { | |
| 987 /* AC terms all zero */ | |
| 988 if (d0) { | |
| 2979 | 989 /* Compute a 32 bit value to assign. */ |
| 990 DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS); | |
| 991 register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000); | |
| 2967 | 992 |
| 2979 | 993 idataptr[0] = v; |
| 994 idataptr[1] = v; | |
| 2256 | 995 } |
| 2967 | 996 |
| 2979 | 997 dataptr += DCTSTRIDE; /* advance pointer to next row */ |
| 2256 | 998 continue; |
| 999 } | |
| 2967 | 1000 |
| 2256 | 1001 /* Even part: reverse the even part of the forward DCT. */ |
| 1002 /* The rotator is sqrt(2)*c(-6). */ | |
| 1003 if (d6) { | |
| 2979 | 1004 if (d2) { |
| 1005 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ | |
| 1006 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | |
| 1007 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | |
| 1008 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | |
| 2256 | 1009 |
| 2979 | 1010 tmp0 = (d0 + d4) << CONST_BITS; |
| 1011 tmp1 = (d0 - d4) << CONST_BITS; | |
| 2256 | 1012 |
| 2979 | 1013 tmp10 = tmp0 + tmp3; |
| 1014 tmp13 = tmp0 - tmp3; | |
| 1015 tmp11 = tmp1 + tmp2; | |
| 1016 tmp12 = tmp1 - tmp2; | |
| 1017 } else { | |
| 1018 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ | |
| 1019 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | |
| 1020 tmp3 = MULTIPLY(d6, FIX_0_541196100); | |
| 2256 | 1021 |
| 2979 | 1022 tmp0 = (d0 + d4) << CONST_BITS; |
| 1023 tmp1 = (d0 - d4) << CONST_BITS; | |
| 2256 | 1024 |
| 2979 | 1025 tmp10 = tmp0 + tmp3; |
| 1026 tmp13 = tmp0 - tmp3; | |
| 1027 tmp11 = tmp1 + tmp2; | |
| 1028 tmp12 = tmp1 - tmp2; | |
| 1029 } | |
| 2262 | 1030 } else { |
| 2979 | 1031 if (d2) { |
| 1032 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ | |
| 1033 tmp2 = MULTIPLY(d2, FIX_0_541196100); | |
| 1034 tmp3 = MULTIPLY(d2, FIX_1_306562965); | |
| 2256 | 1035 |
| 2979 | 1036 tmp0 = (d0 + d4) << CONST_BITS; |
| 1037 tmp1 = (d0 - d4) << CONST_BITS; | |
| 2256 | 1038 |
| 2979 | 1039 tmp10 = tmp0 + tmp3; |
| 1040 tmp13 = tmp0 - tmp3; | |
| 1041 tmp11 = tmp1 + tmp2; | |
| 1042 tmp12 = tmp1 - tmp2; | |
| 1043 } else { | |
| 1044 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ | |
| 1045 tmp10 = tmp13 = (d0 + d4) << CONST_BITS; | |
| 1046 tmp11 = tmp12 = (d0 - d4) << CONST_BITS; | |
| 1047 } | |
| 2256 | 1048 } |
| 1049 | |
| 1050 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ | |
| 1051 | |
| 1052 dataptr[0] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS); | |
| 1053 dataptr[1] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS); | |
| 1054 dataptr[2] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS); | |
| 1055 dataptr[3] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS); | |
| 1056 | |
| 2979 | 1057 dataptr += DCTSTRIDE; /* advance pointer to next row */ |
| 2256 | 1058 } |
| 1059 | |
| 1060 /* Pass 2: process columns. */ | |
| 1061 /* Note that we must descale the results by a factor of 8 == 2**3, */ | |
| 1062 /* and also undo the PASS1_BITS scaling. */ | |
| 1063 | |
| 1064 dataptr = data; | |
| 1065 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { | |
| 1066 /* Columns of zeroes can be exploited in the same way as we did with rows. | |
| 1067 * However, the row calculation has created many nonzero AC terms, so the | |
| 1068 * simplification applies less often (typically 5% to 10% of the time). | |
| 1069 * On machines with very fast multiplication, it's possible that the | |
| 1070 * test takes more time than it's worth. In that case this section | |
| 1071 * may be commented out. | |
| 1072 */ | |
| 1073 | |
| 1074 d0 = dataptr[DCTSTRIDE*0]; | |
| 1075 d2 = dataptr[DCTSTRIDE*1]; | |
| 1076 d4 = dataptr[DCTSTRIDE*2]; | |
| 1077 d6 = dataptr[DCTSTRIDE*3]; | |
| 1078 | |
| 1079 /* Even part: reverse the even part of the forward DCT. */ | |
| 1080 /* The rotator is sqrt(2)*c(-6). */ | |
| 1081 if (d6) { | |
| 2979 | 1082 if (d2) { |
| 1083 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ | |
| 1084 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | |
| 1085 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | |
| 1086 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | |
| 2256 | 1087 |
| 2979 | 1088 tmp0 = (d0 + d4) << CONST_BITS; |
| 1089 tmp1 = (d0 - d4) << CONST_BITS; | |
| 2256 | 1090 |
| 2979 | 1091 tmp10 = tmp0 + tmp3; |
| 1092 tmp13 = tmp0 - tmp3; | |
| 1093 tmp11 = tmp1 + tmp2; | |
| 1094 tmp12 = tmp1 - tmp2; | |
| 1095 } else { | |
| 1096 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ | |
| 1097 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | |
| 1098 tmp3 = MULTIPLY(d6, FIX_0_541196100); | |
| 2256 | 1099 |
| 2979 | 1100 tmp0 = (d0 + d4) << CONST_BITS; |
| 1101 tmp1 = (d0 - d4) << CONST_BITS; | |
| 2256 | 1102 |
| 2979 | 1103 tmp10 = tmp0 + tmp3; |
| 1104 tmp13 = tmp0 - tmp3; | |
| 1105 tmp11 = tmp1 + tmp2; | |
| 1106 tmp12 = tmp1 - tmp2; | |
| 1107 } | |
| 2262 | 1108 } else { |
| 2979 | 1109 if (d2) { |
| 1110 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ | |
| 1111 tmp2 = MULTIPLY(d2, FIX_0_541196100); | |
| 1112 tmp3 = MULTIPLY(d2, FIX_1_306562965); | |
| 2256 | 1113 |
| 2979 | 1114 tmp0 = (d0 + d4) << CONST_BITS; |
| 1115 tmp1 = (d0 - d4) << CONST_BITS; | |
| 2256 | 1116 |
| 2979 | 1117 tmp10 = tmp0 + tmp3; |
| 1118 tmp13 = tmp0 - tmp3; | |
| 1119 tmp11 = tmp1 + tmp2; | |
| 1120 tmp12 = tmp1 - tmp2; | |
| 1121 } else { | |
| 1122 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ | |
| 1123 tmp10 = tmp13 = (d0 + d4) << CONST_BITS; | |
| 1124 tmp11 = tmp12 = (d0 - d4) << CONST_BITS; | |
| 1125 } | |
| 2256 | 1126 } |
| 1127 | |
| 1128 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ | |
| 1129 | |
| 2262 | 1130 dataptr[DCTSTRIDE*0] = tmp10 >> (CONST_BITS+PASS1_BITS+3); |
| 1131 dataptr[DCTSTRIDE*1] = tmp11 >> (CONST_BITS+PASS1_BITS+3); | |
| 1132 dataptr[DCTSTRIDE*2] = tmp12 >> (CONST_BITS+PASS1_BITS+3); | |
| 1133 dataptr[DCTSTRIDE*3] = tmp13 >> (CONST_BITS+PASS1_BITS+3); | |
| 2967 | 1134 |
| 2979 | 1135 dataptr++; /* advance pointer to next column */ |
| 2256 | 1136 } |
| 1137 } | |
| 1138 | |
| 2257 | 1139 void j_rev_dct2(DCTBLOCK data){ |
| 1140 int d00, d01, d10, d11; | |
| 1141 | |
| 1142 data[0] += 4; | |
| 1143 d00 = data[0+0*DCTSTRIDE] + data[1+0*DCTSTRIDE]; | |
| 1144 d01 = data[0+0*DCTSTRIDE] - data[1+0*DCTSTRIDE]; | |
| 1145 d10 = data[0+1*DCTSTRIDE] + data[1+1*DCTSTRIDE]; | |
| 1146 d11 = data[0+1*DCTSTRIDE] - data[1+1*DCTSTRIDE]; | |
| 2967 | 1147 |
| 2257 | 1148 data[0+0*DCTSTRIDE]= (d00 + d10)>>3; |
| 1149 data[1+0*DCTSTRIDE]= (d01 + d11)>>3; | |
| 1150 data[0+1*DCTSTRIDE]= (d00 - d10)>>3; | |
| 1151 data[1+1*DCTSTRIDE]= (d01 - d11)>>3; | |
| 1152 } | |
| 2256 | 1153 |
| 2259 | 1154 void j_rev_dct1(DCTBLOCK data){ |
| 1155 data[0] = (data[0] + 4)>>3; | |
| 1156 } | |
| 1157 | |
|
440
000aeeac27a2
* started to cleanup name clashes for onetime compilation
kabi
parents:
36
diff
changeset
|
1158 #undef FIX |
|
000aeeac27a2
* started to cleanup name clashes for onetime compilation
kabi
parents:
36
diff
changeset
|
1159 #undef CONST_BITS |
