Mercurial > libavcodec.hg
annotate simple_idct.c @ 503:2bf17a142cf4 libavcodec
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
code duplication, I'm currently working on the put/add variants, but I
did not get them to be as fast as the old method yet...
| author | mellum |
|---|---|
| date | Mon, 24 Jun 2002 21:17:22 +0000 |
| parents | 40ffce2cb6ef |
| children | e7b72c1dfa1b |
| rev | line source |
|---|---|
|
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
1 /* |
| 429 | 2 * Simple IDCT |
| 3 * | |
| 4 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at> | |
| 5 * | |
| 6 * This library is free software; you can redistribute it and/or | |
| 7 * modify it under the terms of the GNU Lesser General Public | |
| 8 * License as published by the Free Software Foundation; either | |
| 9 * version 2 of the License, or (at your option) any later version. | |
| 10 * | |
| 11 * This library is distributed in the hope that it will be useful, | |
| 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 14 * Lesser General Public License for more details. | |
| 15 * | |
| 16 * You should have received a copy of the GNU Lesser General Public | |
| 17 * License along with this library; if not, write to the Free Software | |
| 18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
| 19 */ | |
|
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
20 /* |
| 429 | 21 based upon some outcommented c code from mpeg2dec (idct_mmx.c |
| 22 written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>) | |
| 23 */ | |
|
396
fce0a2520551
removed useless header includes - use av memory functions
glantau
parents:
352
diff
changeset
|
24 #include "avcodec.h" |
| 479 | 25 #include "dsputil.h" |
|
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
26 #include "simple_idct.h" |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
27 |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
28 #if 0 |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
29 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */ |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
30 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */ |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
31 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */ |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
32 #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */ |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
33 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */ |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
34 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */ |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
35 #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */ |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
36 #define ROW_SHIFT 8 |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
37 #define COL_SHIFT 17 |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
38 #else |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
39 #define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
40 #define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
41 #define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
|
352
5a8eb5cf9f92
C4=16383 for the c version too and even for some outcommented code
michaelni
parents:
215
diff
changeset
|
42 #define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
|
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
43 #define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
44 #define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
45 #define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
46 #define ROW_SHIFT 11 |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
47 #define COL_SHIFT 20 // 6 |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
48 #endif |
| 205 | 49 |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
50 #ifdef ARCH_ALPHA |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
51 #define FAST_64BIT |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
52 #endif |
| 205 | 53 |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
54 #if defined(ARCH_POWERPC_405) |
|
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
55 |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
56 /* signed 16x16 -> 32 multiply add accumulate */ |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
57 #define MAC16(rt, ra, rb) \ |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
58 asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb)); |
|
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
59 |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
60 /* signed 16x16 -> 32 multiply */ |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
61 #define MUL16(rt, ra, rb) \ |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
62 asm ("mullhw %0, %1, %2" : "=r" (rt) : "r" (ra), "r" (rb)); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
63 |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
64 #else |
| 205 | 65 |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
66 /* signed 16x16 -> 32 multiply add accumulate */ |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
67 #define MAC16(rt, ra, rb) rt += (ra) * (rb) |
|
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
68 |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
69 /* signed 16x16 -> 32 multiply */ |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
70 #define MUL16(rt, ra, rb) rt = (ra) * (rb) |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
71 |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
72 #endif |
| 205 | 73 |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
74 #ifdef ARCH_ALPHA |
|
215
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
75 /* 0: all entries 0, 1: only first entry nonzero, 2: otherwise */ |
|
464
9b73bce5071a
gcc 3.1 warning fix (patch by Felix Buenemann <atmosfear at users.sourceforge.net>)
michaelni
parents:
440
diff
changeset
|
76 static inline int idctRowCondDC(int16_t *row) |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
77 { |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
78 int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
79 uint64_t *lrow = (uint64_t *) row; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
80 |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
81 if (lrow[1] == 0) { |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
82 if (lrow[0] == 0) |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
83 return 0; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
84 if ((lrow[0] & ~0xffffULL) == 0) { |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
85 uint64_t v; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
86 |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
87 a0 = W4 * row[0]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
88 a0 += 1 << (ROW_SHIFT - 1); |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
89 a0 >>= ROW_SHIFT; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
90 v = (uint16_t) a0; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
91 v += v << 16; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
92 v += v << 32; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
93 lrow[0] = v; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
94 lrow[1] = v; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
95 |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
96 return 1; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
97 } |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
98 } |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
99 |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
100 a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1)); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
101 a1 = a0; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
102 a2 = a0; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
103 a3 = a0; |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
104 |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
105 if (row[2]) { |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
106 a0 += W2 * row[2]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
107 a1 += W6 * row[2]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
108 a2 -= W6 * row[2]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
109 a3 -= W2 * row[2]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
110 } |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
111 |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
112 if (row[4]) { |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
113 a0 += W4 * row[4]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
114 a1 -= W4 * row[4]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
115 a2 -= W4 * row[4]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
116 a3 += W4 * row[4]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
117 } |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
118 |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
119 if (row[6]) { |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
120 a0 += W6 * row[6]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
121 a1 -= W2 * row[6]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
122 a2 += W2 * row[6]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
123 a3 -= W6 * row[6]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
124 } |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
125 |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
126 if (row[1]) { |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
127 b0 = W1 * row[1]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
128 b1 = W3 * row[1]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
129 b2 = W5 * row[1]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
130 b3 = W7 * row[1]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
131 } else { |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
132 b0 = 0; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
133 b1 = 0; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
134 b2 = 0; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
135 b3 = 0; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
136 } |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
137 |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
138 if (row[3]) { |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
139 b0 += W3 * row[3]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
140 b1 -= W7 * row[3]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
141 b2 -= W1 * row[3]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
142 b3 -= W5 * row[3]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
143 } |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
144 |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
145 if (row[5]) { |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
146 b0 += W5 * row[5]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
147 b1 -= W1 * row[5]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
148 b2 += W7 * row[5]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
149 b3 += W3 * row[5]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
150 } |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
151 |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
152 if (row[7]) { |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
153 b0 += W7 * row[7]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
154 b1 -= W5 * row[7]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
155 b2 += W3 * row[7]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
156 b3 -= W1 * row[7]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
157 } |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
158 |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
159 row[0] = (a0 + b0) >> ROW_SHIFT; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
160 row[1] = (a1 + b1) >> ROW_SHIFT; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
161 row[2] = (a2 + b2) >> ROW_SHIFT; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
162 row[3] = (a3 + b3) >> ROW_SHIFT; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
163 row[4] = (a3 - b3) >> ROW_SHIFT; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
164 row[5] = (a2 - b2) >> ROW_SHIFT; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
165 row[6] = (a1 - b1) >> ROW_SHIFT; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
166 row[7] = (a0 - b0) >> ROW_SHIFT; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
167 |
|
215
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
168 return 2; |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
169 } |
|
503
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
170 |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
171 inline static void idctSparseCol(int16_t *col) |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
172 { |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
173 int a0, a1, a2, a3, b0, b1, b2, b3; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
174 |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
175 col[0] += (1 << (COL_SHIFT - 1)) / W4; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
176 |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
177 a0 = W4 * col[8 * 0]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
178 a1 = W4 * col[8 * 0]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
179 a2 = W4 * col[8 * 0]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
180 a3 = W4 * col[8 * 0]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
181 |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
182 if (col[8 * 2]) { |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
183 a0 += W2 * col[8 * 2]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
184 a1 += W6 * col[8 * 2]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
185 a2 -= W6 * col[8 * 2]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
186 a3 -= W2 * col[8 * 2]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
187 } |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
188 |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
189 if (col[8 * 4]) { |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
190 a0 += W4 * col[8 * 4]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
191 a1 -= W4 * col[8 * 4]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
192 a2 -= W4 * col[8 * 4]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
193 a3 += W4 * col[8 * 4]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
194 } |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
195 |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
196 if (col[8 * 6]) { |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
197 a0 += W6 * col[8 * 6]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
198 a1 -= W2 * col[8 * 6]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
199 a2 += W2 * col[8 * 6]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
200 a3 -= W6 * col[8 * 6]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
201 } |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
202 |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
203 if (col[8 * 1]) { |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
204 b0 = W1 * col[8 * 1]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
205 b1 = W3 * col[8 * 1]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
206 b2 = W5 * col[8 * 1]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
207 b3 = W7 * col[8 * 1]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
208 } else { |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
209 b0 = b1 = b2 = b3 = 0; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
210 } |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
211 |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
212 if (col[8 * 3]) { |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
213 b0 += W3 * col[8 * 3]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
214 b1 -= W7 * col[8 * 3]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
215 b2 -= W1 * col[8 * 3]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
216 b3 -= W5 * col[8 * 3]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
217 } |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
218 |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
219 if (col[8 * 5]) { |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
220 b0 += W5 * col[8 * 5]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
221 b1 -= W1 * col[8 * 5]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
222 b2 += W7 * col[8 * 5]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
223 b3 += W3 * col[8 * 5]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
224 } |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
225 |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
226 if (col[8 * 7]) { |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
227 b0 += W7 * col[8 * 7]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
228 b1 -= W5 * col[8 * 7]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
229 b2 += W3 * col[8 * 7]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
230 b3 -= W1 * col[8 * 7]; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
231 } |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
232 |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
233 col[8 * 0] = (a0 + b0) >> COL_SHIFT; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
234 col[8 * 7] = (a0 - b0) >> COL_SHIFT; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
235 col[8 * 1] = (a1 + b1) >> COL_SHIFT; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
236 col[8 * 6] = (a1 - b1) >> COL_SHIFT; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
237 col[8 * 2] = (a2 + b2) >> COL_SHIFT; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
238 col[8 * 5] = (a2 - b2) >> COL_SHIFT; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
239 col[8 * 3] = (a3 + b3) >> COL_SHIFT; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
240 col[8 * 4] = (a3 - b3) >> COL_SHIFT; |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
241 } |
|
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
242 |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
243 #else /* not ARCH_ALPHA */ |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
244 |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
245 static inline void idctRowCondDC (int16_t * row) |
| 205 | 246 { |
| 247 int a0, a1, a2, a3, b0, b1, b2, b3; | |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
248 #ifdef FAST_64BIT |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
249 uint64_t temp; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
250 #else |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
251 uint32_t temp; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
252 #endif |
| 205 | 253 |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
254 #ifdef FAST_64BIT |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
255 #ifdef WORDS_BIGENDIAN |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
256 #define ROW0_MASK 0xffff000000000000LL |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
257 #else |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
258 #define ROW0_MASK 0xffffLL |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
259 #endif |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
260 if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) | |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
261 ((uint64_t *)row)[1]) == 0) { |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
262 temp = (row[0] << 3) & 0xffff; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
263 temp += temp << 16; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
264 temp += temp << 32; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
265 ((uint64_t *)row)[0] = temp; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
266 ((uint64_t *)row)[1] = temp; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
267 return; |
| 205 | 268 } |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
269 #else |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
270 if (!(((uint32_t*)row)[1] | |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
271 ((uint32_t*)row)[2] | |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
272 ((uint32_t*)row)[3] | |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
273 row[1])) { |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
274 temp = (row[0] << 3) & 0xffff; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
275 temp += temp << 16; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
276 ((uint32_t*)row)[0]=((uint32_t*)row)[1] = |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
277 ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
278 return; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
279 } |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
280 #endif |
| 205 | 281 |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
282 a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1)); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
283 a1 = a0; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
284 a2 = a0; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
285 a3 = a0; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
286 |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
287 /* no need to optimize : gcc does it */ |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
288 a0 += W2 * row[2]; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
289 a1 += W6 * row[2]; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
290 a2 -= W6 * row[2]; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
291 a3 -= W2 * row[2]; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
292 |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
293 MUL16(b0, W1, row[1]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
294 MAC16(b0, W3, row[3]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
295 MUL16(b1, W3, row[1]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
296 MAC16(b1, -W7, row[3]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
297 MUL16(b2, W5, row[1]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
298 MAC16(b2, -W1, row[3]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
299 MUL16(b3, W7, row[1]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
300 MAC16(b3, -W5, row[3]); |
| 205 | 301 |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
302 #ifdef FAST_64BIT |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
303 temp = ((uint64_t*)row)[1]; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
304 #else |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
305 temp = ((uint32_t*)row)[2] | ((uint32_t*)row)[3]; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
306 #endif |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
307 if (temp != 0) { |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
308 a0 += W4*row[4] + W6*row[6]; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
309 a1 += - W4*row[4] - W2*row[6]; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
310 a2 += - W4*row[4] + W2*row[6]; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
311 a3 += W4*row[4] - W6*row[6]; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
312 |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
313 MAC16(b0, W5, row[5]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
314 MAC16(b0, W7, row[7]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
315 |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
316 MAC16(b1, -W1, row[5]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
317 MAC16(b1, -W5, row[7]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
318 |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
319 MAC16(b2, W7, row[5]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
320 MAC16(b2, W3, row[7]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
321 |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
322 MAC16(b3, W3, row[5]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
323 MAC16(b3, -W1, row[7]); |
| 205 | 324 } |
| 325 | |
| 326 row[0] = (a0 + b0) >> ROW_SHIFT; | |
| 327 row[7] = (a0 - b0) >> ROW_SHIFT; | |
| 328 row[1] = (a1 + b1) >> ROW_SHIFT; | |
| 329 row[6] = (a1 - b1) >> ROW_SHIFT; | |
| 330 row[2] = (a2 + b2) >> ROW_SHIFT; | |
| 331 row[5] = (a2 - b2) >> ROW_SHIFT; | |
| 332 row[3] = (a3 + b3) >> ROW_SHIFT; | |
| 333 row[4] = (a3 - b3) >> ROW_SHIFT; | |
|
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
334 } |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
335 #endif /* not ARCH_ALPHA */ |
|
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
336 |
| 479 | 337 static inline void idctSparseColPut (UINT8 *dest, int line_size, |
| 338 int16_t * col) | |
| 205 | 339 { |
| 340 int a0, a1, a2, a3, b0, b1, b2, b3; | |
| 479 | 341 UINT8 *cm = cropTbl + MAX_NEG_CROP; |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
342 |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
343 /* XXX: I did that only to give same values as previous code */ |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
344 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
345 a1 = a0; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
346 a2 = a0; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
347 a3 = a0; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
348 |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
349 a0 += + W2*col[8*2]; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
350 a1 += + W6*col[8*2]; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
351 a2 += - W6*col[8*2]; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
352 a3 += - W2*col[8*2]; |
| 205 | 353 |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
354 MUL16(b0, W1, col[8*1]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
355 MUL16(b1, W3, col[8*1]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
356 MUL16(b2, W5, col[8*1]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
357 MUL16(b3, W7, col[8*1]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
358 |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
359 MAC16(b0, + W3, col[8*3]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
360 MAC16(b1, - W7, col[8*3]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
361 MAC16(b2, - W1, col[8*3]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
362 MAC16(b3, - W5, col[8*3]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
363 |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
364 if(col[8*4]){ |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
365 a0 += + W4*col[8*4]; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
366 a1 += - W4*col[8*4]; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
367 a2 += - W4*col[8*4]; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
368 a3 += + W4*col[8*4]; |
| 205 | 369 } |
| 370 | |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
371 if (col[8*5]) { |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
372 MAC16(b0, + W5, col[8*5]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
373 MAC16(b1, - W1, col[8*5]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
374 MAC16(b2, + W7, col[8*5]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
375 MAC16(b3, + W3, col[8*5]); |
| 205 | 376 } |
| 377 | |
| 378 if(col[8*6]){ | |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
379 a0 += + W6*col[8*6]; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
380 a1 += - W2*col[8*6]; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
381 a2 += + W2*col[8*6]; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
382 a3 += - W6*col[8*6]; |
| 205 | 383 } |
| 384 | |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
385 if (col[8*7]) { |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
386 MAC16(b0, + W7, col[8*7]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
387 MAC16(b1, - W5, col[8*7]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
388 MAC16(b2, + W3, col[8*7]); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
389 MAC16(b3, - W1, col[8*7]); |
| 205 | 390 } |
| 391 | |
| 479 | 392 dest[0] = cm[(a0 + b0) >> COL_SHIFT]; |
| 393 dest += line_size; | |
| 394 dest[0] = cm[(a1 + b1) >> COL_SHIFT]; | |
| 395 dest += line_size; | |
| 396 dest[0] = cm[(a2 + b2) >> COL_SHIFT]; | |
| 397 dest += line_size; | |
| 398 dest[0] = cm[(a3 + b3) >> COL_SHIFT]; | |
| 399 dest += line_size; | |
| 400 dest[0] = cm[(a3 - b3) >> COL_SHIFT]; | |
| 401 dest += line_size; | |
| 402 dest[0] = cm[(a2 - b2) >> COL_SHIFT]; | |
| 403 dest += line_size; | |
| 404 dest[0] = cm[(a1 - b1) >> COL_SHIFT]; | |
| 405 dest += line_size; | |
| 406 dest[0] = cm[(a0 - b0) >> COL_SHIFT]; | |
| 407 } | |
| 408 | |
| 409 static inline void idctSparseColAdd (UINT8 *dest, int line_size, | |
| 410 int16_t * col) | |
| 411 { | |
| 412 int a0, a1, a2, a3, b0, b1, b2, b3; | |
| 413 UINT8 *cm = cropTbl + MAX_NEG_CROP; | |
| 414 | |
| 415 /* XXX: I did that only to give same values as previous code */ | |
| 416 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); | |
| 417 a1 = a0; | |
| 418 a2 = a0; | |
| 419 a3 = a0; | |
| 420 | |
| 421 a0 += + W2*col[8*2]; | |
| 422 a1 += + W6*col[8*2]; | |
| 423 a2 += - W6*col[8*2]; | |
| 424 a3 += - W2*col[8*2]; | |
| 425 | |
| 426 MUL16(b0, W1, col[8*1]); | |
| 427 MUL16(b1, W3, col[8*1]); | |
| 428 MUL16(b2, W5, col[8*1]); | |
| 429 MUL16(b3, W7, col[8*1]); | |
| 430 | |
| 431 MAC16(b0, + W3, col[8*3]); | |
| 432 MAC16(b1, - W7, col[8*3]); | |
| 433 MAC16(b2, - W1, col[8*3]); | |
| 434 MAC16(b3, - W5, col[8*3]); | |
| 435 | |
| 436 if(col[8*4]){ | |
| 437 a0 += + W4*col[8*4]; | |
| 438 a1 += - W4*col[8*4]; | |
| 439 a2 += - W4*col[8*4]; | |
| 440 a3 += + W4*col[8*4]; | |
| 441 } | |
| 442 | |
| 443 if (col[8*5]) { | |
| 444 MAC16(b0, + W5, col[8*5]); | |
| 445 MAC16(b1, - W1, col[8*5]); | |
| 446 MAC16(b2, + W7, col[8*5]); | |
| 447 MAC16(b3, + W3, col[8*5]); | |
| 448 } | |
| 449 | |
| 450 if(col[8*6]){ | |
| 451 a0 += + W6*col[8*6]; | |
| 452 a1 += - W2*col[8*6]; | |
| 453 a2 += + W2*col[8*6]; | |
| 454 a3 += - W6*col[8*6]; | |
| 455 } | |
| 456 | |
| 457 if (col[8*7]) { | |
| 458 MAC16(b0, + W7, col[8*7]); | |
| 459 MAC16(b1, - W5, col[8*7]); | |
| 460 MAC16(b2, + W3, col[8*7]); | |
| 461 MAC16(b3, - W1, col[8*7]); | |
| 462 } | |
| 463 | |
| 464 dest[0] = cm[dest[0] + ((a0 + b0) >> COL_SHIFT)]; | |
| 465 dest += line_size; | |
| 466 dest[0] = cm[dest[0] + ((a1 + b1) >> COL_SHIFT)]; | |
| 467 dest += line_size; | |
| 468 dest[0] = cm[dest[0] + ((a2 + b2) >> COL_SHIFT)]; | |
| 469 dest += line_size; | |
| 470 dest[0] = cm[dest[0] + ((a3 + b3) >> COL_SHIFT)]; | |
| 471 dest += line_size; | |
| 472 dest[0] = cm[dest[0] + ((a3 - b3) >> COL_SHIFT)]; | |
| 473 dest += line_size; | |
| 474 dest[0] = cm[dest[0] + ((a2 - b2) >> COL_SHIFT)]; | |
| 475 dest += line_size; | |
| 476 dest[0] = cm[dest[0] + ((a1 - b1) >> COL_SHIFT)]; | |
| 477 dest += line_size; | |
| 478 dest[0] = cm[dest[0] + ((a0 - b0) >> COL_SHIFT)]; | |
|
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
479 } |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
480 |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
481 #ifdef ARCH_ALPHA |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
482 /* If all rows but the first one are zero after row transformation, |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
483 all rows will be identical after column transformation. */ |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
484 static inline void idctCol2(int16_t *col) |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
485 { |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
486 int i; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
487 uint64_t l, r; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
488 uint64_t *lcol = (uint64_t *) col; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
489 |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
490 for (i = 0; i < 8; ++i) { |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
491 int a0 = col[0] + (1 << (COL_SHIFT - 1)) / W4; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
492 |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
493 a0 *= W4; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
494 col[0] = a0 >> COL_SHIFT; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
495 ++col; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
496 } |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
497 |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
498 l = lcol[0]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
499 r = lcol[1]; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
500 lcol[ 2] = l; lcol[ 3] = r; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
501 lcol[ 4] = l; lcol[ 5] = r; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
502 lcol[ 6] = l; lcol[ 7] = r; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
503 lcol[ 8] = l; lcol[ 9] = r; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
504 lcol[10] = l; lcol[11] = r; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
505 lcol[12] = l; lcol[13] = r; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
506 lcol[14] = l; lcol[15] = r; |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
507 } |
| 205 | 508 |
|
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
509 void simple_idct (short *block) |
|
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
510 { |
| 205 | 511 |
|
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
512 int i; |
|
215
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
513 int rowsZero = 1; /* all rows except row 0 zero */ |
|
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
514 int rowsConstant = 1; /* all rows consist of a constant value */ |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
515 |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
516 for (i = 0; i < 8; i++) { |
|
215
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
517 int sparseness = idctRowCondDC(block + 8 * i); |
|
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
518 |
|
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
519 if (i > 0 && sparseness > 0) |
|
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
520 rowsZero = 0; |
|
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
521 if (sparseness == 2) |
|
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
522 rowsConstant = 0; |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
523 } |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
524 |
|
215
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
525 if (rowsZero) { |
|
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
526 idctCol2(block); |
|
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
527 } else if (rowsConstant) { |
|
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
528 uint64_t *lblock = (uint64_t *) block; |
|
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
529 |
|
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
530 idctSparseCol(block); |
|
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
531 for (i = 0; i < 8; i++) { |
|
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
532 uint64_t v = (uint16_t) block[i * 8]; |
|
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
533 |
|
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
534 v += v << 16; |
|
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
535 v += v << 32; |
|
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
536 lblock[0] = v; |
|
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
537 lblock[1] = v; |
|
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
538 lblock += 2; |
|
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
539 } |
|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
540 } else { |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
541 for (i = 0; i < 8; i++) |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
542 idctSparseCol(block + i); |
|
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
543 } |
|
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
544 } |
|
440
000aeeac27a2
* started to cleanup name clashes for onetime compilation
kabi
parents:
429
diff
changeset
|
545 |
| 479 | 546 /* XXX: suppress this mess */ |
| 547 void simple_idct_put(UINT8 *dest, int line_size, DCTELEM *block) | |
| 548 { | |
| 549 simple_idct(block); | |
| 550 put_pixels_clamped(block, dest, line_size); | |
| 551 } | |
| 552 | |
| 553 void simple_idct_add(UINT8 *dest, int line_size, DCTELEM *block) | |
| 554 { | |
| 555 simple_idct(block); | |
| 556 add_pixels_clamped(block, dest, line_size); | |
| 557 } | |
| 558 | |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
559 #else |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
560 |
| 479 | 561 void simple_idct_put(UINT8 *dest, int line_size, INT16 *block) |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
562 { |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
563 int i; |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
564 for(i=0; i<8; i++) |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
565 idctRowCondDC(block + i*8); |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
566 |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
567 for(i=0; i<8; i++) |
| 479 | 568 idctSparseColPut(dest + i, line_size, block + i); |
| 569 } | |
| 570 | |
| 571 void simple_idct_add(UINT8 *dest, int line_size, INT16 *block) | |
| 572 { | |
| 573 int i; | |
| 574 for(i=0; i<8; i++) | |
| 575 idctRowCondDC(block + i*8); | |
| 576 | |
| 577 for(i=0; i<8; i++) | |
| 578 idctSparseColAdd(dest + i, line_size, block + i); | |
|
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
579 } |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
580 |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
581 #endif |
|
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
582 |
|
440
000aeeac27a2
* started to cleanup name clashes for onetime compilation
kabi
parents:
429
diff
changeset
|
583 #undef COL_SHIFT |
