Mercurial > libavcodec.hg
annotate simple_idct.c @ 719:2b7ff6dfee35 libavcodec
first version of IDCT248 for DV decoding support
| author | bellard |
|---|---|
| date | Thu, 03 Oct 2002 13:41:33 +0000 |
| parents | 4263629270c8 |
| children | ff90043f4a2d |
| rev | line source |
|---|---|
|
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
1 /* |
| 429 | 2 * Simple IDCT |
| 3 * | |
| 4 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at> | |
| 5 * | |
| 6 * This library is free software; you can redistribute it and/or | |
| 7 * modify it under the terms of the GNU Lesser General Public | |
| 8 * License as published by the Free Software Foundation; either | |
| 9 * version 2 of the License, or (at your option) any later version. | |
| 10 * | |
| 11 * This library is distributed in the hope that it will be useful, | |
| 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 14 * Lesser General Public License for more details. | |
| 15 * | |
| 16 * You should have received a copy of the GNU Lesser General Public | |
| 17 * License along with this library; if not, write to the Free Software | |
| 18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
| 19 */ | |
|
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
20 /* |
| 429 | 21 based upon some outcommented c code from mpeg2dec (idct_mmx.c |
| 22 written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>) | |
| 23 */ | |
|
396
fce0a2520551
removed useless header includes - use av memory functions
glantau
parents:
352
diff
changeset
|
24 #include "avcodec.h" |
| 479 | 25 #include "dsputil.h" |
|
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
26 #include "simple_idct.h" |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
27 |
| 633 | 28 //#define ARCH_ALPHA |
| 29 | |
|
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
30 #if 0 |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
31 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */ |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
32 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */ |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
33 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */ |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
34 #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */ |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
35 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */ |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
36 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */ |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
37 #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */ |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
38 #define ROW_SHIFT 8 |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
39 #define COL_SHIFT 17 |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
40 #else |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
41 #define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
42 #define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
43 #define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
|
352
5a8eb5cf9f92
C4=16383 for the c version too and even for some outcommented code
michaelni
parents:
215
diff
changeset
|
44 #define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
|
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
45 #define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
46 #define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
47 #define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
48 #define ROW_SHIFT 11 |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
49 #define COL_SHIFT 20 // 6 |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
50 #endif |
| 205 | 51 |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
52 #ifdef ARCH_ALPHA |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
53 #define FAST_64BIT |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
54 #endif |
| 205 | 55 |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
56 #if defined(ARCH_POWERPC_405) |
|
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
57 |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
58 /* signed 16x16 -> 32 multiply add accumulate */ |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
59 #define MAC16(rt, ra, rb) \ |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
60 asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb)); |
|
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
61 |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
62 /* signed 16x16 -> 32 multiply */ |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
63 #define MUL16(rt, ra, rb) \ |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
64 asm ("mullhw %0, %1, %2" : "=r" (rt) : "r" (ra), "r" (rb)); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
65 |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
66 #else |
| 205 | 67 |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
68 /* signed 16x16 -> 32 multiply add accumulate */ |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
69 #define MAC16(rt, ra, rb) rt += (ra) * (rb) |
|
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
70 |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
71 /* signed 16x16 -> 32 multiply */ |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
72 #define MUL16(rt, ra, rb) rt = (ra) * (rb) |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
73 |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
74 #endif |
| 205 | 75 |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
76 #ifdef ARCH_ALPHA |
|
215
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
77 /* 0: all entries 0, 1: only first entry nonzero, 2: otherwise */ |
|
464
9b73bce5071a
gcc 3.1 warning fix (patch by Felix Buenemann <atmosfear at users.sourceforge.net>)
michaelni
parents:
440
diff
changeset
|
78 static inline int idctRowCondDC(int16_t *row) |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
79 { |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
80 int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
81 uint64_t *lrow = (uint64_t *) row; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
82 |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
83 if (lrow[1] == 0) { |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
84 if (lrow[0] == 0) |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
85 return 0; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
86 if ((lrow[0] & ~0xffffULL) == 0) { |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
87 uint64_t v; |
| 633 | 88 #if 1 //is ok if |a0| < 1024 than theres an +-1 error (for the *W4 case for W4=16383 !!!) |
| 89 a0 = row[0]<<3; | |
| 90 #else | |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
91 a0 = W4 * row[0]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
92 a0 += 1 << (ROW_SHIFT - 1); |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
93 a0 >>= ROW_SHIFT; |
| 633 | 94 #endif |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
95 v = (uint16_t) a0; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
96 v += v << 16; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
97 v += v << 32; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
98 lrow[0] = v; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
99 lrow[1] = v; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
100 |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
101 return 1; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
102 } |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
103 } |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
104 |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
105 a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1)); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
106 a1 = a0; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
107 a2 = a0; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
108 a3 = a0; |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
109 |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
110 if (row[2]) { |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
111 a0 += W2 * row[2]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
112 a1 += W6 * row[2]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
113 a2 -= W6 * row[2]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
114 a3 -= W2 * row[2]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
115 } |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
116 |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
117 if (row[4]) { |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
118 a0 += W4 * row[4]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
119 a1 -= W4 * row[4]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
120 a2 -= W4 * row[4]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
121 a3 += W4 * row[4]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
122 } |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
123 |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
124 if (row[6]) { |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
125 a0 += W6 * row[6]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
126 a1 -= W2 * row[6]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
127 a2 += W2 * row[6]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
128 a3 -= W6 * row[6]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
129 } |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
130 |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
131 if (row[1]) { |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
132 b0 = W1 * row[1]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
133 b1 = W3 * row[1]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
134 b2 = W5 * row[1]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
135 b3 = W7 * row[1]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
136 } else { |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
137 b0 = 0; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
138 b1 = 0; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
139 b2 = 0; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
140 b3 = 0; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
141 } |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
142 |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
143 if (row[3]) { |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
144 b0 += W3 * row[3]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
145 b1 -= W7 * row[3]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
146 b2 -= W1 * row[3]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
147 b3 -= W5 * row[3]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
148 } |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
149 |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
150 if (row[5]) { |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
151 b0 += W5 * row[5]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
152 b1 -= W1 * row[5]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
153 b2 += W7 * row[5]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
154 b3 += W3 * row[5]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
155 } |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
156 |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
157 if (row[7]) { |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
158 b0 += W7 * row[7]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
159 b1 -= W5 * row[7]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
160 b2 += W3 * row[7]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
161 b3 -= W1 * row[7]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
162 } |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
163 |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
164 row[0] = (a0 + b0) >> ROW_SHIFT; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
165 row[1] = (a1 + b1) >> ROW_SHIFT; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
166 row[2] = (a2 + b2) >> ROW_SHIFT; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
167 row[3] = (a3 + b3) >> ROW_SHIFT; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
168 row[4] = (a3 - b3) >> ROW_SHIFT; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
169 row[5] = (a2 - b2) >> ROW_SHIFT; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
170 row[6] = (a1 - b1) >> ROW_SHIFT; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
171 row[7] = (a0 - b0) >> ROW_SHIFT; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
172 |
|
215
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
173 return 2; |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
174 } |
|
503
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
175 |
| 642 | 176 inline static void idctSparseCol2(int16_t *col) |
|
503
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
177 { |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
178 int a0, a1, a2, a3, b0, b1, b2, b3; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
179 |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
180 col[0] += (1 << (COL_SHIFT - 1)) / W4; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
181 |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
182 a0 = W4 * col[8 * 0]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
183 a1 = W4 * col[8 * 0]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
184 a2 = W4 * col[8 * 0]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
185 a3 = W4 * col[8 * 0]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
186 |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
187 if (col[8 * 2]) { |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
188 a0 += W2 * col[8 * 2]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
189 a1 += W6 * col[8 * 2]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
190 a2 -= W6 * col[8 * 2]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
191 a3 -= W2 * col[8 * 2]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
192 } |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
193 |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
194 if (col[8 * 4]) { |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
195 a0 += W4 * col[8 * 4]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
196 a1 -= W4 * col[8 * 4]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
197 a2 -= W4 * col[8 * 4]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
198 a3 += W4 * col[8 * 4]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
199 } |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
200 |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
201 if (col[8 * 6]) { |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
202 a0 += W6 * col[8 * 6]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
203 a1 -= W2 * col[8 * 6]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
204 a2 += W2 * col[8 * 6]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
205 a3 -= W6 * col[8 * 6]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
206 } |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
207 |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
208 if (col[8 * 1]) { |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
209 b0 = W1 * col[8 * 1]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
210 b1 = W3 * col[8 * 1]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
211 b2 = W5 * col[8 * 1]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
212 b3 = W7 * col[8 * 1]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
213 } else { |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
214 b0 = b1 = b2 = b3 = 0; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
215 } |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
216 |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
217 if (col[8 * 3]) { |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
218 b0 += W3 * col[8 * 3]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
219 b1 -= W7 * col[8 * 3]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
220 b2 -= W1 * col[8 * 3]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
221 b3 -= W5 * col[8 * 3]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
222 } |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
223 |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
224 if (col[8 * 5]) { |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
225 b0 += W5 * col[8 * 5]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
226 b1 -= W1 * col[8 * 5]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
227 b2 += W7 * col[8 * 5]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
228 b3 += W3 * col[8 * 5]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
229 } |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
230 |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
231 if (col[8 * 7]) { |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
232 b0 += W7 * col[8 * 7]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
233 b1 -= W5 * col[8 * 7]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
234 b2 += W3 * col[8 * 7]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
235 b3 -= W1 * col[8 * 7]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
236 } |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
237 |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
238 col[8 * 0] = (a0 + b0) >> COL_SHIFT; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
239 col[8 * 7] = (a0 - b0) >> COL_SHIFT; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
240 col[8 * 1] = (a1 + b1) >> COL_SHIFT; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
241 col[8 * 6] = (a1 - b1) >> COL_SHIFT; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
242 col[8 * 2] = (a2 + b2) >> COL_SHIFT; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
243 col[8 * 5] = (a2 - b2) >> COL_SHIFT; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
244 col[8 * 3] = (a3 + b3) >> COL_SHIFT; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
245 col[8 * 4] = (a3 - b3) >> COL_SHIFT; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
246 } |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
247 |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
248 #else /* not ARCH_ALPHA */ |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
249 |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
250 static inline void idctRowCondDC (int16_t * row) |
| 205 | 251 { |
| 252 int a0, a1, a2, a3, b0, b1, b2, b3; | |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
253 #ifdef FAST_64BIT |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
254 uint64_t temp; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
255 #else |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
256 uint32_t temp; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
257 #endif |
| 205 | 258 |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
259 #ifdef FAST_64BIT |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
260 #ifdef WORDS_BIGENDIAN |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
261 #define ROW0_MASK 0xffff000000000000LL |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
262 #else |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
263 #define ROW0_MASK 0xffffLL |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
264 #endif |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
265 if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) | |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
266 ((uint64_t *)row)[1]) == 0) { |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
267 temp = (row[0] << 3) & 0xffff; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
268 temp += temp << 16; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
269 temp += temp << 32; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
270 ((uint64_t *)row)[0] = temp; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
271 ((uint64_t *)row)[1] = temp; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
272 return; |
| 205 | 273 } |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
274 #else |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
275 if (!(((uint32_t*)row)[1] | |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
276 ((uint32_t*)row)[2] | |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
277 ((uint32_t*)row)[3] | |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
278 row[1])) { |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
279 temp = (row[0] << 3) & 0xffff; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
280 temp += temp << 16; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
281 ((uint32_t*)row)[0]=((uint32_t*)row)[1] = |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
282 ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
283 return; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
284 } |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
285 #endif |
| 205 | 286 |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
287 a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1)); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
288 a1 = a0; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
289 a2 = a0; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
290 a3 = a0; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
291 |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
292 /* no need to optimize : gcc does it */ |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
293 a0 += W2 * row[2]; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
294 a1 += W6 * row[2]; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
295 a2 -= W6 * row[2]; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
296 a3 -= W2 * row[2]; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
297 |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
298 MUL16(b0, W1, row[1]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
299 MAC16(b0, W3, row[3]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
300 MUL16(b1, W3, row[1]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
301 MAC16(b1, -W7, row[3]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
302 MUL16(b2, W5, row[1]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
303 MAC16(b2, -W1, row[3]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
304 MUL16(b3, W7, row[1]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
305 MAC16(b3, -W5, row[3]); |
| 205 | 306 |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
307 #ifdef FAST_64BIT |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
308 temp = ((uint64_t*)row)[1]; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
309 #else |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
310 temp = ((uint32_t*)row)[2] | ((uint32_t*)row)[3]; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
311 #endif |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
312 if (temp != 0) { |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
313 a0 += W4*row[4] + W6*row[6]; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
314 a1 += - W4*row[4] - W2*row[6]; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
315 a2 += - W4*row[4] + W2*row[6]; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
316 a3 += W4*row[4] - W6*row[6]; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
317 |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
318 MAC16(b0, W5, row[5]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
319 MAC16(b0, W7, row[7]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
320 |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
321 MAC16(b1, -W1, row[5]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
322 MAC16(b1, -W5, row[7]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
323 |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
324 MAC16(b2, W7, row[5]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
325 MAC16(b2, W3, row[7]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
326 |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
327 MAC16(b3, W3, row[5]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
328 MAC16(b3, -W1, row[7]); |
| 205 | 329 } |
| 330 | |
| 331 row[0] = (a0 + b0) >> ROW_SHIFT; | |
| 332 row[7] = (a0 - b0) >> ROW_SHIFT; | |
| 333 row[1] = (a1 + b1) >> ROW_SHIFT; | |
| 334 row[6] = (a1 - b1) >> ROW_SHIFT; | |
| 335 row[2] = (a2 + b2) >> ROW_SHIFT; | |
| 336 row[5] = (a2 - b2) >> ROW_SHIFT; | |
| 337 row[3] = (a3 + b3) >> ROW_SHIFT; | |
| 338 row[4] = (a3 - b3) >> ROW_SHIFT; | |
|
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
339 } |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
340 #endif /* not ARCH_ALPHA */ |
|
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
341 |
| 479 | 342 static inline void idctSparseColPut (UINT8 *dest, int line_size, |
| 343 int16_t * col) | |
| 205 | 344 { |
| 345 int a0, a1, a2, a3, b0, b1, b2, b3; | |
| 479 | 346 UINT8 *cm = cropTbl + MAX_NEG_CROP; |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
347 |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
348 /* XXX: I did that only to give same values as previous code */ |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
349 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
350 a1 = a0; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
351 a2 = a0; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
352 a3 = a0; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
353 |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
354 a0 += + W2*col[8*2]; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
355 a1 += + W6*col[8*2]; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
356 a2 += - W6*col[8*2]; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
357 a3 += - W2*col[8*2]; |
| 205 | 358 |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
359 MUL16(b0, W1, col[8*1]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
360 MUL16(b1, W3, col[8*1]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
361 MUL16(b2, W5, col[8*1]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
362 MUL16(b3, W7, col[8*1]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
363 |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
364 MAC16(b0, + W3, col[8*3]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
365 MAC16(b1, - W7, col[8*3]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
366 MAC16(b2, - W1, col[8*3]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
367 MAC16(b3, - W5, col[8*3]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
368 |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
369 if(col[8*4]){ |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
370 a0 += + W4*col[8*4]; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
371 a1 += - W4*col[8*4]; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
372 a2 += - W4*col[8*4]; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
373 a3 += + W4*col[8*4]; |
| 205 | 374 } |
| 375 | |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
376 if (col[8*5]) { |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
377 MAC16(b0, + W5, col[8*5]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
378 MAC16(b1, - W1, col[8*5]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
379 MAC16(b2, + W7, col[8*5]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
380 MAC16(b3, + W3, col[8*5]); |
| 205 | 381 } |
| 382 | |
| 383 if(col[8*6]){ | |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
384 a0 += + W6*col[8*6]; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
385 a1 += - W2*col[8*6]; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
386 a2 += + W2*col[8*6]; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
387 a3 += - W6*col[8*6]; |
| 205 | 388 } |
| 389 | |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
390 if (col[8*7]) { |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
391 MAC16(b0, + W7, col[8*7]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
392 MAC16(b1, - W5, col[8*7]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
393 MAC16(b2, + W3, col[8*7]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
394 MAC16(b3, - W1, col[8*7]); |
| 205 | 395 } |
| 396 | |
| 479 | 397 dest[0] = cm[(a0 + b0) >> COL_SHIFT]; |
| 398 dest += line_size; | |
| 399 dest[0] = cm[(a1 + b1) >> COL_SHIFT]; | |
| 400 dest += line_size; | |
| 401 dest[0] = cm[(a2 + b2) >> COL_SHIFT]; | |
| 402 dest += line_size; | |
| 403 dest[0] = cm[(a3 + b3) >> COL_SHIFT]; | |
| 404 dest += line_size; | |
| 405 dest[0] = cm[(a3 - b3) >> COL_SHIFT]; | |
| 406 dest += line_size; | |
| 407 dest[0] = cm[(a2 - b2) >> COL_SHIFT]; | |
| 408 dest += line_size; | |
| 409 dest[0] = cm[(a1 - b1) >> COL_SHIFT]; | |
| 410 dest += line_size; | |
| 411 dest[0] = cm[(a0 - b0) >> COL_SHIFT]; | |
| 412 } | |
| 413 | |
| 414 static inline void idctSparseColAdd (UINT8 *dest, int line_size, | |
| 415 int16_t * col) | |
| 416 { | |
| 417 int a0, a1, a2, a3, b0, b1, b2, b3; | |
| 418 UINT8 *cm = cropTbl + MAX_NEG_CROP; | |
| 419 | |
| 420 /* XXX: I did that only to give same values as previous code */ | |
| 421 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); | |
| 422 a1 = a0; | |
| 423 a2 = a0; | |
| 424 a3 = a0; | |
| 425 | |
| 426 a0 += + W2*col[8*2]; | |
| 427 a1 += + W6*col[8*2]; | |
| 428 a2 += - W6*col[8*2]; | |
| 429 a3 += - W2*col[8*2]; | |
| 430 | |
| 431 MUL16(b0, W1, col[8*1]); | |
| 432 MUL16(b1, W3, col[8*1]); | |
| 433 MUL16(b2, W5, col[8*1]); | |
| 434 MUL16(b3, W7, col[8*1]); | |
| 435 | |
| 436 MAC16(b0, + W3, col[8*3]); | |
| 437 MAC16(b1, - W7, col[8*3]); | |
| 438 MAC16(b2, - W1, col[8*3]); | |
| 439 MAC16(b3, - W5, col[8*3]); | |
| 440 | |
| 441 if(col[8*4]){ | |
| 442 a0 += + W4*col[8*4]; | |
| 443 a1 += - W4*col[8*4]; | |
| 444 a2 += - W4*col[8*4]; | |
| 445 a3 += + W4*col[8*4]; | |
| 446 } | |
| 447 | |
| 448 if (col[8*5]) { | |
| 449 MAC16(b0, + W5, col[8*5]); | |
| 450 MAC16(b1, - W1, col[8*5]); | |
| 451 MAC16(b2, + W7, col[8*5]); | |
| 452 MAC16(b3, + W3, col[8*5]); | |
| 453 } | |
| 454 | |
| 455 if(col[8*6]){ | |
| 456 a0 += + W6*col[8*6]; | |
| 457 a1 += - W2*col[8*6]; | |
| 458 a2 += + W2*col[8*6]; | |
| 459 a3 += - W6*col[8*6]; | |
| 460 } | |
| 461 | |
| 462 if (col[8*7]) { | |
| 463 MAC16(b0, + W7, col[8*7]); | |
| 464 MAC16(b1, - W5, col[8*7]); | |
| 465 MAC16(b2, + W3, col[8*7]); | |
| 466 MAC16(b3, - W1, col[8*7]); | |
| 467 } | |
| 468 | |
| 469 dest[0] = cm[dest[0] + ((a0 + b0) >> COL_SHIFT)]; | |
| 470 dest += line_size; | |
| 471 dest[0] = cm[dest[0] + ((a1 + b1) >> COL_SHIFT)]; | |
| 472 dest += line_size; | |
| 473 dest[0] = cm[dest[0] + ((a2 + b2) >> COL_SHIFT)]; | |
| 474 dest += line_size; | |
| 475 dest[0] = cm[dest[0] + ((a3 + b3) >> COL_SHIFT)]; | |
| 476 dest += line_size; | |
| 477 dest[0] = cm[dest[0] + ((a3 - b3) >> COL_SHIFT)]; | |
| 478 dest += line_size; | |
| 479 dest[0] = cm[dest[0] + ((a2 - b2) >> COL_SHIFT)]; | |
| 480 dest += line_size; | |
| 481 dest[0] = cm[dest[0] + ((a1 - b1) >> COL_SHIFT)]; | |
| 482 dest += line_size; | |
| 483 dest[0] = cm[dest[0] + ((a0 - b0) >> COL_SHIFT)]; | |
|
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
484 } |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
485 |
| 633 | 486 static inline void idctSparseCol (int16_t * col) |
| 487 { | |
| 488 int a0, a1, a2, a3, b0, b1, b2, b3; | |
| 489 | |
| 490 /* XXX: I did that only to give same values as previous code */ | |
| 491 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); | |
| 492 a1 = a0; | |
| 493 a2 = a0; | |
| 494 a3 = a0; | |
| 495 | |
| 496 a0 += + W2*col[8*2]; | |
| 497 a1 += + W6*col[8*2]; | |
| 498 a2 += - W6*col[8*2]; | |
| 499 a3 += - W2*col[8*2]; | |
| 500 | |
| 501 MUL16(b0, W1, col[8*1]); | |
| 502 MUL16(b1, W3, col[8*1]); | |
| 503 MUL16(b2, W5, col[8*1]); | |
| 504 MUL16(b3, W7, col[8*1]); | |
| 505 | |
| 506 MAC16(b0, + W3, col[8*3]); | |
| 507 MAC16(b1, - W7, col[8*3]); | |
| 508 MAC16(b2, - W1, col[8*3]); | |
| 509 MAC16(b3, - W5, col[8*3]); | |
| 510 | |
| 511 if(col[8*4]){ | |
| 512 a0 += + W4*col[8*4]; | |
| 513 a1 += - W4*col[8*4]; | |
| 514 a2 += - W4*col[8*4]; | |
| 515 a3 += + W4*col[8*4]; | |
| 516 } | |
| 517 | |
| 518 if (col[8*5]) { | |
| 519 MAC16(b0, + W5, col[8*5]); | |
| 520 MAC16(b1, - W1, col[8*5]); | |
| 521 MAC16(b2, + W7, col[8*5]); | |
| 522 MAC16(b3, + W3, col[8*5]); | |
| 523 } | |
| 524 | |
| 525 if(col[8*6]){ | |
| 526 a0 += + W6*col[8*6]; | |
| 527 a1 += - W2*col[8*6]; | |
| 528 a2 += + W2*col[8*6]; | |
| 529 a3 += - W6*col[8*6]; | |
| 530 } | |
| 531 | |
| 532 if (col[8*7]) { | |
| 533 MAC16(b0, + W7, col[8*7]); | |
| 534 MAC16(b1, - W5, col[8*7]); | |
| 535 MAC16(b2, + W3, col[8*7]); | |
| 536 MAC16(b3, - W1, col[8*7]); | |
| 537 } | |
| 538 | |
| 539 col[0 ] = ((a0 + b0) >> COL_SHIFT); | |
| 540 col[8 ] = ((a1 + b1) >> COL_SHIFT); | |
| 541 col[16] = ((a2 + b2) >> COL_SHIFT); | |
| 542 col[24] = ((a3 + b3) >> COL_SHIFT); | |
| 543 col[32] = ((a3 - b3) >> COL_SHIFT); | |
| 544 col[40] = ((a2 - b2) >> COL_SHIFT); | |
| 545 col[48] = ((a1 - b1) >> COL_SHIFT); | |
| 546 col[56] = ((a0 - b0) >> COL_SHIFT); | |
| 547 } | |
| 548 | |
| 549 | |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
550 #ifdef ARCH_ALPHA |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
551 /* If all rows but the first one are zero after row transformation, |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
552 all rows will be identical after column transformation. */ |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
553 static inline void idctCol2(int16_t *col) |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
554 { |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
555 int i; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
556 uint64_t l, r; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
557 uint64_t *lcol = (uint64_t *) col; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
558 |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
559 for (i = 0; i < 8; ++i) { |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
560 int a0 = col[0] + (1 << (COL_SHIFT - 1)) / W4; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
561 |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
562 a0 *= W4; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
563 col[0] = a0 >> COL_SHIFT; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
564 ++col; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
565 } |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
566 |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
567 l = lcol[0]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
568 r = lcol[1]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
569 lcol[ 2] = l; lcol[ 3] = r; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
570 lcol[ 4] = l; lcol[ 5] = r; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
571 lcol[ 6] = l; lcol[ 7] = r; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
572 lcol[ 8] = l; lcol[ 9] = r; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
573 lcol[10] = l; lcol[11] = r; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
574 lcol[12] = l; lcol[13] = r; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
575 lcol[14] = l; lcol[15] = r; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
576 } |
| 205 | 577 |
|
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
578 void simple_idct (short *block) |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
579 { |
| 205 | 580 |
|
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
581 int i; |
|
215
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
582 int rowsZero = 1; /* all rows except row 0 zero */ |
|
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
583 int rowsConstant = 1; /* all rows consist of a constant value */ |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
584 |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
585 for (i = 0; i < 8; i++) { |
|
215
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
586 int sparseness = idctRowCondDC(block + 8 * i); |
|
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
587 |
|
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
588 if (i > 0 && sparseness > 0) |
|
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
589 rowsZero = 0; |
|
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
590 if (sparseness == 2) |
|
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
591 rowsConstant = 0; |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
592 } |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
593 |
|
215
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
594 if (rowsZero) { |
|
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
595 idctCol2(block); |
|
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
596 } else if (rowsConstant) { |
|
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
597 uint64_t *lblock = (uint64_t *) block; |
|
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
598 |
| 642 | 599 idctSparseCol2(block); |
|
215
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
600 for (i = 0; i < 8; i++) { |
|
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
601 uint64_t v = (uint16_t) block[i * 8]; |
|
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
602 |
|
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
603 v += v << 16; |
|
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
604 v += v << 32; |
|
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
605 lblock[0] = v; |
|
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
606 lblock[1] = v; |
|
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
607 lblock += 2; |
|
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
608 } |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
609 } else { |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
610 for (i = 0; i < 8; i++) |
| 642 | 611 idctSparseCol2(block + i); |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
612 } |
|
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
613 } |
|
440
000aeeac27a2
* started to cleanup name clashes for onetime compilation
kabi
parents:
429
diff
changeset
|
614 |
| 479 | 615 /* XXX: suppress this mess */ |
| 616 void simple_idct_put(UINT8 *dest, int line_size, DCTELEM *block) | |
| 617 { | |
| 618 simple_idct(block); | |
| 619 put_pixels_clamped(block, dest, line_size); | |
| 620 } | |
| 621 | |
| 622 void simple_idct_add(UINT8 *dest, int line_size, DCTELEM *block) | |
| 623 { | |
| 624 simple_idct(block); | |
| 625 add_pixels_clamped(block, dest, line_size); | |
| 626 } | |
| 627 | |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
628 #else |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
629 |
| 479 | 630 void simple_idct_put(UINT8 *dest, int line_size, INT16 *block) |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
631 { |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
632 int i; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
633 for(i=0; i<8; i++) |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
634 idctRowCondDC(block + i*8); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
635 |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
636 for(i=0; i<8; i++) |
| 479 | 637 idctSparseColPut(dest + i, line_size, block + i); |
| 638 } | |
| 639 | |
| 640 void simple_idct_add(UINT8 *dest, int line_size, INT16 *block) | |
| 641 { | |
| 642 int i; | |
| 643 for(i=0; i<8; i++) | |
| 644 idctRowCondDC(block + i*8); | |
| 645 | |
| 646 for(i=0; i<8; i++) | |
| 647 idctSparseColAdd(dest + i, line_size, block + i); | |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
648 } |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
649 |
| 633 | 650 void simple_idct(INT16 *block) |
| 651 { | |
| 652 int i; | |
| 653 for(i=0; i<8; i++) | |
| 654 idctRowCondDC(block + i*8); | |
| 655 | |
| 656 for(i=0; i<8; i++) | |
| 657 idctSparseCol(block + i); | |
| 658 } | |
| 659 | |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
660 #endif |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
661 |
| 719 | 662 /* 2x4x8 idct */ |
| 663 | |
| 664 #define CN_SHIFT 12 | |
| 665 #define C_FIX(x) ((int)((x) * (1 << CN_SHIFT) + 0.5)) | |
| 666 #define C0 C_FIX(0.7071067811) | |
| 667 #define C1 C_FIX(0.9238795324) | |
| 668 #define C2 C_FIX(0.3826834324) | |
| 669 | |
| 670 /* row idct is multiple by 16 * sqrt(2.0), col idct4 is multiplied by | |
| 671 sqrt(2). An extra division by two is needed for the first butterfly | |
| 672 stage */ | |
| 673 #define C_SHIFT (4+1+12+1) | |
| 674 | |
| 675 static inline void idct4col(UINT8 *dest, int line_size, const INT16 *col) | |
| 676 { | |
| 677 int c0, c1, c2, c3, a0, a1, a2, a3; | |
| 678 const UINT8 *cm = cropTbl + MAX_NEG_CROP; | |
| 679 | |
| 680 a0 = col[8*0]; | |
| 681 a1 = col[8*2]; | |
| 682 a2 = col[8*4]; | |
| 683 a3 = col[8*6]; | |
| 684 c0 = (a0 + a2) * C0 + (1 << (C_SHIFT - 1)) + (128 << C_SHIFT); | |
| 685 c2 = (a0 - a2) * C0 + (1 << (C_SHIFT - 1)) + (128 << C_SHIFT); | |
| 686 c1 = a1 * C1 + a3 * C2; | |
| 687 c3 = a1 * C2 - a3 * C1; | |
| 688 dest[0] = cm[(c0 + c1) >> C_SHIFT]; | |
| 689 dest += line_size; | |
| 690 dest[0] = cm[(c2 + c3) >> C_SHIFT]; | |
| 691 dest += line_size; | |
| 692 dest[0] = cm[(c2 - c3) >> C_SHIFT]; | |
| 693 dest += line_size; | |
| 694 dest[0] = cm[(c0 - c1) >> C_SHIFT]; | |
| 695 } | |
| 696 | |
| 697 #define BF(k) \ | |
| 698 {\ | |
| 699 int a0, a1;\ | |
| 700 a0 = ptr[k];\ | |
| 701 a1 = ptr[8 + k];\ | |
| 702 ptr[k] = a0 + a1;\ | |
| 703 ptr[8 + k] = a0 - a1;\ | |
| 704 } | |
| 705 | |
| 706 /* only used by DV codec. The input must be interlaced. 128 is added | |
| 707 to the pixels before clamping to avoid systematic error | |
| 708 (1024*sqrt(2)) offset would be needed otherwise. */ | |
| 709 /* XXX: I think a 1.0/sqrt(2) normalization should be needed to | |
| 710 compensate the extra butterfly stage - I don't have the full DV | |
| 711 specification */ | |
| 712 void simple_idct248_put(UINT8 *dest, int line_size, INT16 *block) | |
| 713 { | |
| 714 int i; | |
| 715 INT16 *ptr; | |
| 716 | |
| 717 /* butterfly */ | |
| 718 ptr = block; | |
| 719 for(i=0;i<4;i++) { | |
| 720 BF(0); | |
| 721 BF(1); | |
| 722 BF(2); | |
| 723 BF(3); | |
| 724 BF(4); | |
| 725 BF(5); | |
| 726 BF(6); | |
| 727 BF(7); | |
| 728 ptr += 2 * 8; | |
| 729 } | |
| 730 | |
| 731 /* IDCT8 on each line */ | |
| 732 for(i=0; i<8; i++) { | |
| 733 idctRowCondDC(block + i*8); | |
| 734 } | |
| 735 | |
| 736 /* IDCT4 and store */ | |
| 737 for(i=0;i<8;i++) { | |
| 738 idct4col(dest + i, 2 * line_size, block + i); | |
| 739 idct4col(dest + line_size + i, 2 * line_size, block + 8 + i); | |
| 740 } | |
| 741 } |
