Mercurial > mplayer.hg
annotate libmpcodecs/native/rtjpegn.c @ 37195:ac6c37d85d65 default tip
configure: Fix initialization of variable def_local_aligned_32
It contiained the #define of HAVE_LOCAL_ALIGNED_16 instead
of HAVE_LOCAL_ALIGNED_32.
| author | al |
|---|---|
| date | Sun, 28 Sep 2014 18:38:41 +0000 |
| parents | 0f1b5b68af32 |
| children |
| rev | line source |
|---|---|
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1 /* |
| 3802 | 2 RTjpeg (C) Justin Schoeman 1998 (justin@suntiger.ee.up.ac.za) |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
3 |
| 3802 | 4 With modifications by: |
| 5 (c) 1998, 1999 by Joerg Walter <trouble@moes.pmnet.uni-oldenburg.de> | |
| 6 and | |
| 7 (c) 1999 by Wim Taymans <wim.taymans@tvd.be> | |
| 8 | |
| 9 This program is free software; you can redistribute it and/or modify | |
| 10 it under the terms of the GNU General Public License as published by | |
| 11 the Free Software Foundation; either version 2 of the License, or | |
| 12 (at your option) any later version. | |
| 13 | |
| 14 This program is distributed in the hope that it will be useful, | |
| 15 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 17 GNU General Public License for more details. | |
| 18 | |
| 19 You should have received a copy of the GNU General Public License | |
| 20 along with this program; if not, write to the Free Software | |
|
21977
cea0eb833758
Fix FSF address and otherwise broken license headers.
diego
parents:
21507
diff
changeset
|
21 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 3802 | 22 */ |
| 23 | |
| 24 #include <stdio.h> | |
| 25 #include <stdlib.h> | |
| 26 #include <string.h> | |
| 3805 | 27 |
| 28 #include "config.h" | |
| 29 | |
|
21507
fa99b3d31d13
Hack around libavutil/bswap.h compilation problems due to always_inline undefined.
reimar
parents:
21372
diff
changeset
|
30 #include "mpbswap.h" |
|
26304
5f526e8e3988
Rename RTJPEG files so that filenames consist of lowercase name only.
diego
parents:
26280
diff
changeset
|
31 #include "rtjpegn.h" |
| 3802 | 32 |
|
28298
a7124a264ea6
Completely get rid of MMX define, use HAVE_MMX define instead.
gpoirier
parents:
28296
diff
changeset
|
33 #if HAVE_MMX |
| 3802 | 34 #include "mmx.h" |
| 35 #endif | |
| 36 | |
| 37 //#define SHOWBLOCK 1 | |
| 38 #define BETTERCOMPRESSION 1 | |
| 39 | |
| 40 static const unsigned char RTjpeg_ZZ[64]={ | |
| 41 0, | |
| 42 8, 1, | |
| 43 2, 9, 16, | |
| 44 24, 17, 10, 3, | |
| 45 4, 11, 18, 25, 32, | |
| 46 40, 33, 26, 19, 12, 5, | |
| 47 6, 13, 20, 27, 34, 41, 48, | |
| 48 56, 49, 42, 35, 28, 21, 14, 7, | |
| 49 15, 22, 29, 36, 43, 50, 57, | |
| 50 58, 51, 44, 37, 30, 23, | |
| 51 31, 38, 45, 52, 59, | |
| 52 60, 53, 46, 39, | |
| 53 47, 54, 61, | |
| 54 62, 55, | |
| 55 63 }; | |
| 56 | |
| 57 static const __u64 RTjpeg_aan_tab[64]={ | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
58 4294967296ULL, 5957222912ULL, 5611718144ULL, 5050464768ULL, 4294967296ULL, 3374581504ULL, 2324432128ULL, 1184891264ULL, |
|
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
59 5957222912ULL, 8263040512ULL, 7783580160ULL, 7005009920ULL, 5957222912ULL, 4680582144ULL, 3224107520ULL, 1643641088ULL, |
|
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
60 5611718144ULL, 7783580160ULL, 7331904512ULL, 6598688768ULL, 5611718144ULL, 4408998912ULL, 3036936960ULL, 1548224000ULL, |
|
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
61 5050464768ULL, 7005009920ULL, 6598688768ULL, 5938608128ULL, 5050464768ULL, 3968072960ULL, 2733115392ULL, 1393296000ULL, |
|
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
62 4294967296ULL, 5957222912ULL, 5611718144ULL, 5050464768ULL, 4294967296ULL, 3374581504ULL, 2324432128ULL, 1184891264ULL, |
|
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
63 3374581504ULL, 4680582144ULL, 4408998912ULL, 3968072960ULL, 3374581504ULL, 2651326208ULL, 1826357504ULL, 931136000ULL, |
|
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
64 2324432128ULL, 3224107520ULL, 3036936960ULL, 2733115392ULL, 2324432128ULL, 1826357504ULL, 1258030336ULL, 641204288ULL, |
|
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
65 1184891264ULL, 1643641088ULL, 1548224000ULL, 1393296000ULL, 1184891264ULL, 931136000ULL, 641204288ULL, 326894240ULL, |
| 3802 | 66 }; |
| 67 | |
|
28298
a7124a264ea6
Completely get rid of MMX define, use HAVE_MMX define instead.
gpoirier
parents:
28296
diff
changeset
|
68 #if !HAVE_MMX |
| 3802 | 69 static __s32 RTjpeg_ws[64+31]; |
| 70 #endif | |
|
28849
87b59e8d3c26
Mark everything not used outside the file as "static"
reimar
parents:
28298
diff
changeset
|
71 static __u8 RTjpeg_alldata[2*64+4*64+4*64+4*64+4*64+32]; |
| 3802 | 72 |
| 3835 | 73 static __s16 *block; // rh |
| 74 static __s16 *RTjpeg_block; | |
| 75 static __s32 *RTjpeg_lqt; | |
| 76 static __s32 *RTjpeg_cqt; | |
| 77 static __u32 *RTjpeg_liqt; | |
| 78 static __u32 *RTjpeg_ciqt; | |
| 79 | |
| 80 static unsigned char RTjpeg_lb8; | |
| 81 static unsigned char RTjpeg_cb8; | |
| 82 static int RTjpeg_width, RTjpeg_height; | |
| 83 static int RTjpeg_Ywidth, RTjpeg_Cwidth; | |
| 84 static int RTjpeg_Ysize, RTjpeg_Csize; | |
| 85 | |
| 86 static __s16 *RTjpeg_old=NULL; | |
| 3802 | 87 |
|
28298
a7124a264ea6
Completely get rid of MMX define, use HAVE_MMX define instead.
gpoirier
parents:
28296
diff
changeset
|
88 #if HAVE_MMX |
|
28849
87b59e8d3c26
Mark everything not used outside the file as "static"
reimar
parents:
28298
diff
changeset
|
89 static mmx_t RTjpeg_lmask; |
|
87b59e8d3c26
Mark everything not used outside the file as "static"
reimar
parents:
28298
diff
changeset
|
90 static mmx_t RTjpeg_cmask; |
| 3802 | 91 #else |
|
28849
87b59e8d3c26
Mark everything not used outside the file as "static"
reimar
parents:
28298
diff
changeset
|
92 static __u16 RTjpeg_lmask; |
|
87b59e8d3c26
Mark everything not used outside the file as "static"
reimar
parents:
28298
diff
changeset
|
93 static __u16 RTjpeg_cmask; |
| 3802 | 94 #endif |
| 95 | |
| 96 static const unsigned char RTjpeg_lum_quant_tbl[64] = { | |
| 97 16, 11, 10, 16, 24, 40, 51, 61, | |
| 98 12, 12, 14, 19, 26, 58, 60, 55, | |
| 99 14, 13, 16, 24, 40, 57, 69, 56, | |
| 100 14, 17, 22, 29, 51, 87, 80, 62, | |
| 101 18, 22, 37, 56, 68, 109, 103, 77, | |
| 102 24, 35, 55, 64, 81, 104, 113, 92, | |
| 103 49, 64, 78, 87, 103, 121, 120, 101, | |
| 104 72, 92, 95, 98, 112, 100, 103, 99 | |
| 105 }; | |
| 106 | |
| 107 static const unsigned char RTjpeg_chrom_quant_tbl[64] = { | |
| 108 17, 18, 24, 47, 99, 99, 99, 99, | |
| 109 18, 21, 26, 66, 99, 99, 99, 99, | |
| 110 24, 26, 56, 99, 99, 99, 99, 99, | |
| 111 47, 66, 99, 99, 99, 99, 99, 99, | |
| 112 99, 99, 99, 99, 99, 99, 99, 99, | |
| 113 99, 99, 99, 99, 99, 99, 99, 99, | |
| 114 99, 99, 99, 99, 99, 99, 99, 99, | |
| 115 99, 99, 99, 99, 99, 99, 99, 99 | |
| 116 }; | |
| 117 | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
118 #ifdef BETTERCOMPRESSION |
| 3802 | 119 |
| 120 /*--------------------------------------------------*/ | |
| 121 /* better encoding, but needs a lot more cpu time */ | |
| 122 /* seems to be more effective than old method +lzo */ | |
| 123 /* with this encoding lzo isn't efficient anymore */ | |
| 124 /* there is still more potential for better */ | |
| 125 /* encoding but that would need even more cputime */ | |
| 126 /* anyway your mileage may vary */ | |
| 127 /* */ | |
| 128 /* written by Martin BIELY and Roman HOCHLEITNER */ | |
| 129 /*--------------------------------------------------*/ | |
| 130 | |
| 131 /* +++++++++++++++++++++++++++++++++++++++++++++++++++*/ | |
| 132 /* Block to Stream (encoding) */ | |
| 133 /* */ | |
| 134 | |
|
28849
87b59e8d3c26
Mark everything not used outside the file as "static"
reimar
parents:
28298
diff
changeset
|
135 static int RTjpeg_b2s(__s16 *data, __s8 *strm, __u8 bt8) |
| 3802 | 136 { |
| 137 register int ci, co=1; | |
| 138 register __s16 ZZvalue; | |
| 139 register unsigned char bitten; | |
| 140 register unsigned char bitoff; | |
| 141 | |
| 142 #ifdef SHOWBLOCK | |
| 143 | |
| 144 int ii; | |
| 145 for (ii=0; ii < 64; ii++) { | |
| 146 fprintf(stdout, "%d ", data[RTjpeg_ZZ[ii]]); | |
| 147 } | |
| 148 fprintf(stdout, "\n\n"); | |
| 149 | |
| 150 #endif | |
| 151 | |
| 152 // first byte allways written | |
| 12378 | 153 ((__u8*)strm)[0]= |
| 3802 | 154 (__u8)(data[RTjpeg_ZZ[0]]>254) ? 254:((data[RTjpeg_ZZ[0]]<0)?0:data[RTjpeg_ZZ[0]]); |
| 155 | |
| 156 | |
| 157 ci=63; | |
| 158 while (data[RTjpeg_ZZ[ci]]==0 && ci>0) ci--; | |
| 159 | |
| 160 bitten = ((unsigned char)ci) << 2; | |
| 161 | |
| 162 if (ci==0) { | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
163 ((__u8*)strm)[1]= bitten; |
| 3802 | 164 co = 2; |
| 165 return (int)co; | |
| 166 } | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
167 |
|
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
168 /* bitoff=0 because the high 6bit contain first non zero position */ |
| 3802 | 169 bitoff = 0; |
| 170 co = 1; | |
| 171 | |
| 172 for(; ci>0; ci--) { | |
| 173 | |
| 174 ZZvalue = data[RTjpeg_ZZ[ci]]; | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
175 |
| 3802 | 176 switch(ZZvalue) { |
| 177 case 0: | |
| 178 break; | |
| 179 case 1: | |
| 180 bitten |= (0x01<<bitoff); | |
| 181 break; | |
| 182 case -1: | |
| 183 bitten |= (0x03<<bitoff); | |
| 184 break; | |
| 185 default: | |
| 186 bitten |= (0x02<<bitoff); | |
| 187 goto HERZWEH; | |
| 188 break; | |
| 189 } | |
| 190 | |
| 191 if( bitoff == 0 ) { | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
192 ((__u8*)strm)[co]= bitten; |
| 3802 | 193 bitten = 0; |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
194 bitoff = 8; |
| 3802 | 195 co++; |
| 196 } /* "fall through" */ | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
197 bitoff-=2; |
| 3802 | 198 |
| 199 } | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
200 |
| 3802 | 201 /* ci must be 0 */ |
| 202 if(bitoff != 6) { | |
| 203 | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
204 ((__u8*)strm)[co]= bitten; |
| 3802 | 205 co++; |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
206 |
| 3802 | 207 } |
| 208 goto BAUCHWEH; | |
| 209 | |
| 210 HERZWEH: | |
| 211 /* ci cannot be 0 */ | |
| 212 /* correct bitoff to nibble boundaries */ | |
| 213 | |
| 214 switch(bitoff){ | |
| 215 case 4: | |
| 216 case 6: | |
| 217 bitoff = 0; | |
| 218 break; | |
| 219 case 2: | |
| 220 case 0: | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
221 ((__u8*)strm)[co]= bitten; |
| 3802 | 222 bitoff = 4; |
| 223 co++; | |
| 224 bitten = 0; // clear half nibble values in bitten | |
| 225 break; | |
| 226 default: | |
| 227 break; | |
| 228 } | |
| 229 | |
| 230 for(; ci>0; ci--) { | |
| 231 | |
| 232 ZZvalue = data[RTjpeg_ZZ[ci]]; | |
| 233 | |
| 234 if( (ZZvalue > 7) || (ZZvalue < -7) ) { | |
| 235 bitten |= (0x08<<bitoff); | |
| 236 goto HIRNWEH; | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
237 } |
|
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
238 |
| 3802 | 239 bitten |= (ZZvalue&0xf)<<bitoff; |
| 240 | |
| 241 if( bitoff == 0 ) { | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
242 ((__u8*)strm)[co]= bitten; |
| 3802 | 243 bitten = 0; |
| 244 bitoff = 8; | |
| 245 co++; | |
| 246 } /* "fall thru" */ | |
| 247 bitoff-=4; | |
| 248 } | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
249 |
| 3802 | 250 /* ci must be 0 */ |
| 251 if( bitoff == 0 ) { | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
252 ((__u8*)strm)[co]= bitten; |
| 3802 | 253 co++; |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
254 } |
| 3802 | 255 goto BAUCHWEH; |
| 256 | |
| 257 HIRNWEH: | |
| 258 | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
259 ((__u8*)strm)[co]= bitten; |
| 3802 | 260 co++; |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
261 |
| 3802 | 262 |
| 263 /* bitting is over now we bite */ | |
| 264 for(; ci>0; ci--) { | |
| 265 | |
| 266 ZZvalue = data[RTjpeg_ZZ[ci]]; | |
| 267 | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
268 if(ZZvalue>0) |
| 3802 | 269 { |
| 270 strm[co++]=(__s8)(ZZvalue>127)?127:ZZvalue; | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
271 } |
|
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
272 else |
| 3802 | 273 { |
| 274 strm[co++]=(__s8)(ZZvalue<-128)?-128:ZZvalue; | |
| 275 } | |
| 276 | |
| 277 } | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
278 |
| 3802 | 279 |
| 280 BAUCHWEH: | |
| 281 /* we gotoo much now we are ill */ | |
| 282 #ifdef SHOWBLOCK | |
| 283 { | |
| 284 int i; | |
| 285 fprintf(stdout, "\nco = '%d'\n", co); | |
| 286 for (i=0; i < co+2; i++) { | |
| 287 fprintf(stdout, "%d ", strm[i]); | |
| 288 } | |
| 289 fprintf(stdout, "\n\n"); | |
| 290 } | |
| 291 #endif | |
| 292 | |
| 293 return (int)co; | |
| 294 } | |
| 295 | |
| 296 #else | |
| 297 | |
|
28849
87b59e8d3c26
Mark everything not used outside the file as "static"
reimar
parents:
28298
diff
changeset
|
298 static int RTjpeg_b2s(__s16 *data, __s8 *strm, __u8 bt8) |
| 3802 | 299 { |
| 300 register int ci, co=1, tmp; | |
| 301 register __s16 ZZvalue; | |
| 302 | |
| 303 #ifdef SHOWBLOCK | |
| 304 | |
| 305 int ii; | |
| 306 for (ii=0; ii < 64; ii++) { | |
| 307 fprintf(stdout, "%d ", data[RTjpeg_ZZ[ii]]); | |
| 308 } | |
| 309 fprintf(stdout, "\n\n"); | |
| 310 | |
| 311 #endif | |
| 312 | |
| 313 (__u8)strm[0]=(__u8)(data[RTjpeg_ZZ[0]]>254) ? 254:((data[RTjpeg_ZZ[0]]<0)?0:data[RTjpeg_ZZ[0]]); | |
| 314 | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
315 for(ci=1; ci<=bt8; ci++) |
| 3802 | 316 { |
| 317 ZZvalue = data[RTjpeg_ZZ[ci]]; | |
| 318 | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
319 if(ZZvalue>0) |
| 3802 | 320 { |
| 321 strm[co++]=(__s8)(ZZvalue>127)?127:ZZvalue; | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
322 } |
|
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
323 else |
| 3802 | 324 { |
| 325 strm[co++]=(__s8)(ZZvalue<-128)?-128:ZZvalue; | |
| 326 } | |
| 327 } | |
| 328 | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
329 for(; ci<64; ci++) |
| 3802 | 330 { |
| 331 ZZvalue = data[RTjpeg_ZZ[ci]]; | |
| 332 | |
| 333 if(ZZvalue>0) | |
| 334 { | |
| 335 strm[co++]=(__s8)(ZZvalue>63)?63:ZZvalue; | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
336 } |
| 3802 | 337 else if(ZZvalue<0) |
| 338 { | |
| 339 strm[co++]=(__s8)(ZZvalue<-64)?-64:ZZvalue; | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
340 } |
| 3802 | 341 else /* compress zeros */ |
| 342 { | |
| 343 tmp=ci; | |
| 344 do | |
| 345 { | |
| 346 ci++; | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
347 } |
| 3802 | 348 while((ci<64)&&(data[RTjpeg_ZZ[ci]]==0)); |
| 349 | |
| 350 strm[co++]=(__s8)(63+(ci-tmp)); | |
| 351 ci--; | |
| 352 } | |
| 353 } | |
| 354 return (int)co; | |
| 355 } | |
| 356 | |
|
28849
87b59e8d3c26
Mark everything not used outside the file as "static"
reimar
parents:
28298
diff
changeset
|
357 static int RTjpeg_s2b(__s16 *data, __s8 *strm, __u8 bt8, __u32 *qtbl) |
| 3802 | 358 { |
| 359 int ci=1, co=1, tmp; | |
| 360 register int i; | |
| 361 | |
| 362 i=RTjpeg_ZZ[0]; | |
| 363 data[i]=((__u8)strm[0])*qtbl[i]; | |
| 364 | |
| 365 for(co=1; co<=bt8; co++) | |
| 366 { | |
| 367 i=RTjpeg_ZZ[co]; | |
| 368 data[i]=strm[ci++]*qtbl[i]; | |
| 369 } | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
370 |
| 3802 | 371 for(; co<64; co++) |
| 372 { | |
| 373 if(strm[ci]>63) | |
| 374 { | |
| 375 tmp=co+strm[ci]-63; | |
| 376 for(; co<tmp; co++)data[RTjpeg_ZZ[co]]=0; | |
| 377 co--; | |
| 378 } else | |
| 379 { | |
| 380 i=RTjpeg_ZZ[co]; | |
| 381 data[i]=strm[ci]*qtbl[i]; | |
| 382 } | |
| 383 ci++; | |
| 384 } | |
| 385 return (int)ci; | |
| 386 } | |
| 387 #endif | |
| 388 | |
|
28298
a7124a264ea6
Completely get rid of MMX define, use HAVE_MMX define instead.
gpoirier
parents:
28296
diff
changeset
|
389 #if HAVE_MMX |
|
28849
87b59e8d3c26
Mark everything not used outside the file as "static"
reimar
parents:
28298
diff
changeset
|
390 static void RTjpeg_quant_init(void) |
| 3802 | 391 { |
| 392 int i; | |
| 393 __s16 *qtbl; | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
394 |
| 3802 | 395 qtbl=(__s16 *)RTjpeg_lqt; |
| 396 for(i=0; i<64; i++)qtbl[i]=(__s16)RTjpeg_lqt[i]; | |
| 397 | |
| 398 qtbl=(__s16 *)RTjpeg_cqt; | |
| 399 for(i=0; i<64; i++)qtbl[i]=(__s16)RTjpeg_cqt[i]; | |
| 400 } | |
| 401 | |
| 12928 | 402 static mmx_t RTjpeg_ones={0x0001000100010001LL}; |
| 403 static mmx_t RTjpeg_half={0x7fff7fff7fff7fffLL}; | |
| 3802 | 404 |
|
28849
87b59e8d3c26
Mark everything not used outside the file as "static"
reimar
parents:
28298
diff
changeset
|
405 static void RTjpeg_quant(__s16 *block, __s32 *qtbl) |
| 3802 | 406 { |
| 407 int i; | |
| 408 mmx_t *bl, *ql; | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
409 |
| 3802 | 410 ql=(mmx_t *)qtbl; |
| 411 bl=(mmx_t *)block; | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
412 |
| 3802 | 413 movq_m2r(RTjpeg_ones, mm6); |
| 414 movq_m2r(RTjpeg_half, mm7); | |
| 415 | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
416 for(i=16; i; i--) |
| 3802 | 417 { |
| 418 movq_m2r(*(ql++), mm0); /* quant vals (4) */ | |
| 419 movq_m2r(*bl, mm2); /* block vals (4) */ | |
| 420 movq_r2r(mm0, mm1); | |
| 421 movq_r2r(mm2, mm3); | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
422 |
| 3802 | 423 punpcklwd_r2r(mm6, mm0); /* 1 qb 1 qa */ |
| 424 punpckhwd_r2r(mm6, mm1); /* 1 qd 1 qc */ | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
425 |
| 3802 | 426 punpcklwd_r2r(mm7, mm2); /* 32767 bb 32767 ba */ |
| 427 punpckhwd_r2r(mm7, mm3); /* 32767 bd 32767 bc */ | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
428 |
| 3802 | 429 pmaddwd_r2r(mm2, mm0); /* 32767+bb*qb 32767+ba*qa */ |
| 430 pmaddwd_r2r(mm3, mm1); /* 32767+bd*qd 32767+bc*qc */ | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
431 |
| 3802 | 432 psrad_i2r(16, mm0); |
| 433 psrad_i2r(16, mm1); | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
434 |
| 3802 | 435 packssdw_r2r(mm1, mm0); |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
436 |
| 3802 | 437 movq_r2m(mm0, *(bl++)); |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
438 |
| 3802 | 439 } |
| 440 } | |
| 441 #else | |
|
28849
87b59e8d3c26
Mark everything not used outside the file as "static"
reimar
parents:
28298
diff
changeset
|
442 static void RTjpeg_quant_init(void) |
| 3802 | 443 { |
| 444 } | |
| 445 | |
|
28849
87b59e8d3c26
Mark everything not used outside the file as "static"
reimar
parents:
28298
diff
changeset
|
446 static void RTjpeg_quant(__s16 *block, __s32 *qtbl) |
| 3802 | 447 { |
| 448 int i; | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
449 |
| 3802 | 450 for(i=0; i<64; i++) |
| 451 block[i]=(__s16)((block[i]*qtbl[i]+32767)>>16); | |
| 452 } | |
| 453 #endif | |
| 454 | |
| 455 /* | |
| 456 * Perform the forward DCT on one block of samples. | |
| 457 */ | |
|
28298
a7124a264ea6
Completely get rid of MMX define, use HAVE_MMX define instead.
gpoirier
parents:
28296
diff
changeset
|
458 #if HAVE_MMX |
| 12928 | 459 static mmx_t RTjpeg_C4 ={0x2D412D412D412D41LL}; |
| 460 static mmx_t RTjpeg_C6 ={0x187E187E187E187ELL}; | |
| 461 static mmx_t RTjpeg_C2mC6={0x22A322A322A322A3LL}; | |
| 462 static mmx_t RTjpeg_C2pC6={0x539F539F539F539FLL}; | |
| 463 static mmx_t RTjpeg_zero ={0x0000000000000000LL}; | |
| 3802 | 464 |
| 465 #else | |
| 466 | |
| 467 #define FIX_0_382683433 ((__s32) 98) /* FIX(0.382683433) */ | |
| 468 #define FIX_0_541196100 ((__s32) 139) /* FIX(0.541196100) */ | |
| 469 #define FIX_0_707106781 ((__s32) 181) /* FIX(0.707106781) */ | |
| 470 #define FIX_1_306562965 ((__s32) 334) /* FIX(1.306562965) */ | |
| 471 | |
| 472 #define DESCALE10(x) (__s16)( ((x)+128) >> 8) | |
| 473 #define DESCALE20(x) (__s16)(((x)+32768) >> 16) | |
| 474 #define D_MULTIPLY(var,const) ((__s32) ((var) * (const))) | |
| 475 #endif | |
| 476 | |
|
28849
87b59e8d3c26
Mark everything not used outside the file as "static"
reimar
parents:
28298
diff
changeset
|
477 static void RTjpeg_dct_init(void) |
| 3802 | 478 { |
| 479 int i; | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
480 |
| 3802 | 481 for(i=0; i<64; i++) |
| 482 { | |
| 483 RTjpeg_lqt[i]=(((__u64)RTjpeg_lqt[i]<<32)/RTjpeg_aan_tab[i]); | |
| 484 RTjpeg_cqt[i]=(((__u64)RTjpeg_cqt[i]<<32)/RTjpeg_aan_tab[i]); | |
| 485 } | |
| 486 } | |
| 487 | |
|
28849
87b59e8d3c26
Mark everything not used outside the file as "static"
reimar
parents:
28298
diff
changeset
|
488 static void RTjpeg_dctY(__u8 *idata, __s16 *odata, int rskip) |
| 3802 | 489 { |
|
28298
a7124a264ea6
Completely get rid of MMX define, use HAVE_MMX define instead.
gpoirier
parents:
28296
diff
changeset
|
490 #if !HAVE_MMX |
| 3802 | 491 __s32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; |
| 492 __s32 tmp10, tmp11, tmp12, tmp13; | |
| 493 __s32 z1, z2, z3, z4, z5, z11, z13; | |
| 494 __u8 *idataptr; | |
| 495 __s16 *odataptr; | |
| 496 __s32 *wsptr; | |
| 497 int ctr; | |
| 498 | |
| 499 idataptr = idata; | |
| 500 wsptr = RTjpeg_ws; | |
| 501 for (ctr = 7; ctr >= 0; ctr--) { | |
| 502 tmp0 = idataptr[0] + idataptr[7]; | |
| 503 tmp7 = idataptr[0] - idataptr[7]; | |
| 504 tmp1 = idataptr[1] + idataptr[6]; | |
| 505 tmp6 = idataptr[1] - idataptr[6]; | |
| 506 tmp2 = idataptr[2] + idataptr[5]; | |
| 507 tmp5 = idataptr[2] - idataptr[5]; | |
| 508 tmp3 = idataptr[3] + idataptr[4]; | |
| 509 tmp4 = idataptr[3] - idataptr[4]; | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
510 |
| 3802 | 511 tmp10 = (tmp0 + tmp3); /* phase 2 */ |
| 512 tmp13 = tmp0 - tmp3; | |
| 513 tmp11 = (tmp1 + tmp2); | |
| 514 tmp12 = tmp1 - tmp2; | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
515 |
| 3802 | 516 wsptr[0] = (tmp10 + tmp11)<<8; /* phase 3 */ |
| 517 wsptr[4] = (tmp10 - tmp11)<<8; | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
518 |
| 3802 | 519 z1 = D_MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */ |
| 520 wsptr[2] = (tmp13<<8) + z1; /* phase 5 */ | |
| 521 wsptr[6] = (tmp13<<8) - z1; | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
522 |
| 3802 | 523 tmp10 = tmp4 + tmp5; /* phase 2 */ |
| 524 tmp11 = tmp5 + tmp6; | |
| 525 tmp12 = tmp6 + tmp7; | |
| 526 | |
| 527 z5 = D_MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */ | |
| 528 z2 = D_MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */ | |
| 529 z4 = D_MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */ | |
| 530 z3 = D_MULTIPLY(tmp11, FIX_0_707106781); /* c4 */ | |
| 531 | |
| 532 z11 = (tmp7<<8) + z3; /* phase 5 */ | |
| 533 z13 = (tmp7<<8) - z3; | |
| 534 | |
| 535 wsptr[5] = z13 + z2; /* phase 6 */ | |
| 536 wsptr[3] = z13 - z2; | |
| 537 wsptr[1] = z11 + z4; | |
| 538 wsptr[7] = z11 - z4; | |
| 539 | |
| 540 idataptr += rskip<<3; /* advance pointer to next row */ | |
| 541 wsptr += 8; | |
| 542 } | |
| 543 | |
| 544 wsptr = RTjpeg_ws; | |
| 545 odataptr=odata; | |
| 546 for (ctr = 7; ctr >= 0; ctr--) { | |
| 547 tmp0 = wsptr[0] + wsptr[56]; | |
| 548 tmp7 = wsptr[0] - wsptr[56]; | |
| 549 tmp1 = wsptr[8] + wsptr[48]; | |
| 550 tmp6 = wsptr[8] - wsptr[48]; | |
| 551 tmp2 = wsptr[16] + wsptr[40]; | |
| 552 tmp5 = wsptr[16] - wsptr[40]; | |
| 553 tmp3 = wsptr[24] + wsptr[32]; | |
| 554 tmp4 = wsptr[24] - wsptr[32]; | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
555 |
| 3802 | 556 tmp10 = tmp0 + tmp3; /* phase 2 */ |
| 557 tmp13 = tmp0 - tmp3; | |
| 558 tmp11 = tmp1 + tmp2; | |
| 559 tmp12 = tmp1 - tmp2; | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
560 |
| 3802 | 561 odataptr[0] = DESCALE10(tmp10 + tmp11); /* phase 3 */ |
| 562 odataptr[32] = DESCALE10(tmp10 - tmp11); | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
563 |
| 3802 | 564 z1 = D_MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */ |
| 565 odataptr[16] = DESCALE20((tmp13<<8) + z1); /* phase 5 */ | |
| 566 odataptr[48] = DESCALE20((tmp13<<8) - z1); | |
| 567 | |
| 568 tmp10 = tmp4 + tmp5; /* phase 2 */ | |
| 569 tmp11 = tmp5 + tmp6; | |
| 570 tmp12 = tmp6 + tmp7; | |
| 571 | |
| 572 z5 = D_MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */ | |
| 573 z2 = D_MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */ | |
| 574 z4 = D_MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */ | |
| 575 z3 = D_MULTIPLY(tmp11, FIX_0_707106781); /* c4 */ | |
| 576 | |
| 577 z11 = (tmp7<<8) + z3; /* phase 5 */ | |
| 578 z13 = (tmp7<<8) - z3; | |
| 579 | |
| 580 odataptr[40] = DESCALE20(z13 + z2); /* phase 6 */ | |
| 581 odataptr[24] = DESCALE20(z13 - z2); | |
| 582 odataptr[8] = DESCALE20(z11 + z4); | |
| 583 odataptr[56] = DESCALE20(z11 - z4); | |
| 584 | |
| 585 odataptr++; /* advance pointer to next column */ | |
| 586 wsptr++; | |
| 587 } | |
| 588 #else | |
| 589 volatile mmx_t tmp6, tmp7; | |
| 590 register mmx_t *dataptr = (mmx_t *)odata; | |
| 591 mmx_t *idata2 = (mmx_t *)idata; | |
| 592 | |
| 593 // first copy the input 8 bit to the destination 16 bits | |
| 594 | |
| 595 movq_m2r(RTjpeg_zero, mm2); | |
| 596 | |
| 597 | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
598 movq_m2r(*idata2, mm0); |
|
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
599 movq_r2r(mm0, mm1); |
| 3802 | 600 |
| 601 punpcklbw_r2r(mm2, mm0); | |
| 602 movq_r2m(mm0, *(dataptr)); | |
| 603 | |
| 604 punpckhbw_r2r(mm2, mm1); | |
| 605 movq_r2m(mm1, *(dataptr+1)); | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
606 |
| 3802 | 607 idata2 += rskip; |
| 608 | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
609 movq_m2r(*idata2, mm0); |
|
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
610 movq_r2r(mm0, mm1); |
| 3802 | 611 |
| 612 punpcklbw_r2r(mm2, mm0); | |
| 613 movq_r2m(mm0, *(dataptr+2)); | |
| 614 | |
| 615 punpckhbw_r2r(mm2, mm1); | |
| 616 movq_r2m(mm1, *(dataptr+3)); | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
617 |
| 3802 | 618 idata2 += rskip; |
| 619 | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
620 movq_m2r(*idata2, mm0); |
|
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
621 movq_r2r(mm0, mm1); |
| 3802 | 622 |
| 623 punpcklbw_r2r(mm2, mm0); | |
| 624 movq_r2m(mm0, *(dataptr+4)); | |
| 625 | |
| 626 punpckhbw_r2r(mm2, mm1); | |
| 627 movq_r2m(mm1, *(dataptr+5)); | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
628 |
| 3802 | 629 idata2 += rskip; |
| 630 | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
631 movq_m2r(*idata2, mm0); |
|
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
632 movq_r2r(mm0, mm1); |
| 3802 | 633 |
| 634 punpcklbw_r2r(mm2, mm0); | |
| 635 movq_r2m(mm0, *(dataptr+6)); | |
| 636 | |
| 637 punpckhbw_r2r(mm2, mm1); | |
| 638 movq_r2m(mm1, *(dataptr+7)); | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
639 |
| 3802 | 640 idata2 += rskip; |
| 641 | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
642 movq_m2r(*idata2, mm0); |
|
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
643 movq_r2r(mm0, mm1); |
| 3802 | 644 |
| 645 punpcklbw_r2r(mm2, mm0); | |
| 646 movq_r2m(mm0, *(dataptr+8)); | |
| 647 | |
| 648 punpckhbw_r2r(mm2, mm1); | |
| 649 movq_r2m(mm1, *(dataptr+9)); | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
650 |
| 3802 | 651 idata2 += rskip; |
| 652 | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
653 movq_m2r(*idata2, mm0); |
|
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
654 movq_r2r(mm0, mm1); |
| 3802 | 655 |
| 656 punpcklbw_r2r(mm2, mm0); | |
| 657 movq_r2m(mm0, *(dataptr+10)); | |
| 658 | |
| 659 punpckhbw_r2r(mm2, mm1); | |
| 660 movq_r2m(mm1, *(dataptr+11)); | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
661 |
| 3802 | 662 idata2 += rskip; |
| 663 | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
664 movq_m2r(*idata2, mm0); |
|
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
665 movq_r2r(mm0, mm1); |
| 3802 | 666 |
| 667 punpcklbw_r2r(mm2, mm0); | |
| 668 movq_r2m(mm0, *(dataptr+12)); | |
| 669 | |
| 670 punpckhbw_r2r(mm2, mm1); | |
| 671 movq_r2m(mm1, *(dataptr+13)); | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
672 |
| 3802 | 673 idata2 += rskip; |
| 674 | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
675 movq_m2r(*idata2, mm0); |
|
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
676 movq_r2r(mm0, mm1); |
| 3802 | 677 |
| 678 punpcklbw_r2r(mm2, mm0); | |
| 679 movq_r2m(mm0, *(dataptr+14)); | |
| 680 | |
| 681 punpckhbw_r2r(mm2, mm1); | |
| 682 movq_r2m(mm1, *(dataptr+15)); | |
| 683 | |
| 684 /* Start Transpose to do calculations on rows */ | |
| 685 | |
| 686 movq_m2r(*(dataptr+9), mm7); // m03:m02|m01:m00 - first line (line 4)and copy into m5 | |
| 687 | |
| 688 movq_m2r(*(dataptr+13), mm6); // m23:m22|m21:m20 - third line (line 6)and copy into m2 | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
689 movq_r2r(mm7, mm5); |
| 3802 | 690 |
| 691 punpcklwd_m2r(*(dataptr+11), mm7); // m11:m01|m10:m00 - interleave first and second lines | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
692 movq_r2r(mm6, mm2); |
| 3802 | 693 |
| 694 punpcklwd_m2r(*(dataptr+15), mm6); // m31:m21|m30:m20 - interleave third and fourth lines | |
| 695 movq_r2r(mm7, mm1); | |
| 696 | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
697 movq_m2r(*(dataptr+11), mm3); // m13:m13|m11:m10 - second line |
| 3802 | 698 punpckldq_r2r(mm6, mm7); // m30:m20|m10:m00 - interleave to produce result 1 |
| 699 | |
| 700 movq_m2r(*(dataptr+15), mm0); // m13:m13|m11:m10 - fourth line | |
| 701 punpckhdq_r2r(mm6, mm1); // m31:m21|m11:m01 - interleave to produce result 2 | |
| 702 | |
| 703 movq_r2m(mm7,*(dataptr+9)); // write result 1 | |
| 704 punpckhwd_r2r(mm3, mm5); // m13:m03|m12:m02 - interleave first and second lines | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
705 |
| 3802 | 706 movq_r2m(mm1,*(dataptr+11)); // write result 2 |
| 707 punpckhwd_r2r(mm0, mm2); // m33:m23|m32:m22 - interleave third and fourth lines | |
| 708 | |
| 709 movq_r2r(mm5, mm1); | |
| 710 punpckldq_r2r(mm2, mm5); // m32:m22|m12:m02 - interleave to produce result 3 | |
| 711 | |
| 712 movq_m2r(*(dataptr+1), mm0); // m03:m02|m01:m00 - first line, 4x4 | |
| 713 punpckhdq_r2r(mm2, mm1); // m33:m23|m13:m03 - interleave to produce result 4 | |
| 714 | |
| 715 movq_r2m(mm5,*(dataptr+13)); // write result 3 | |
| 716 | |
| 717 // last 4x4 done | |
| 718 | |
| 719 movq_r2m(mm1, *(dataptr+15)); // write result 4, last 4x4 | |
| 720 | |
| 721 movq_m2r(*(dataptr+5), mm2); // m23:m22|m21:m20 - third line | |
| 722 movq_r2r(mm0, mm6); | |
| 723 | |
| 724 punpcklwd_m2r(*(dataptr+3), mm0); // m11:m01|m10:m00 - interleave first and second lines | |
| 725 movq_r2r(mm2, mm7); | |
| 726 | |
| 727 punpcklwd_m2r(*(dataptr+7), mm2); // m31:m21|m30:m20 - interleave third and fourth lines | |
| 728 movq_r2r(mm0, mm4); | |
| 729 | |
| 730 // | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
731 movq_m2r(*(dataptr+8), mm1); // n03:n02|n01:n00 - first line |
| 3802 | 732 punpckldq_r2r(mm2, mm0); // m30:m20|m10:m00 - interleave to produce first result |
| 733 | |
| 734 movq_m2r(*(dataptr+12), mm3); // n23:n22|n21:n20 - third line | |
| 735 punpckhdq_r2r(mm2, mm4); // m31:m21|m11:m01 - interleave to produce second result | |
| 736 | |
| 737 punpckhwd_m2r(*(dataptr+3), mm6); // m13:m03|m12:m02 - interleave first and second lines | |
| 738 movq_r2r(mm1, mm2); // copy first line | |
| 739 | |
| 740 punpckhwd_m2r(*(dataptr+7), mm7); // m33:m23|m32:m22 - interleave third and fourth lines | |
| 741 movq_r2r(mm6, mm5); // copy first intermediate result | |
| 742 | |
| 743 movq_r2m(mm0, *(dataptr+8)); // write result 1 | |
| 744 punpckhdq_r2r(mm7, mm5); // m33:m23|m13:m03 - produce third result | |
| 745 | |
| 746 punpcklwd_m2r(*(dataptr+10), mm1); // n11:n01|n10:n00 - interleave first and second lines | |
| 747 movq_r2r(mm3, mm0); // copy third line | |
| 748 | |
| 749 punpckhwd_m2r(*(dataptr+10), mm2); // n13:n03|n12:n02 - interleave first and second lines | |
| 750 | |
| 751 movq_r2m(mm4, *(dataptr+10)); // write result 2 out | |
| 752 punpckldq_r2r(mm7, mm6); // m32:m22|m12:m02 - produce fourth result | |
| 753 | |
| 754 punpcklwd_m2r(*(dataptr+14), mm3); // n31:n21|n30:n20 - interleave third and fourth lines | |
| 755 movq_r2r(mm1, mm4); | |
| 756 | |
| 757 movq_r2m(mm6, *(dataptr+12)); // write result 3 out | |
| 758 punpckldq_r2r(mm3, mm1); // n30:n20|n10:n00 - produce first result | |
| 759 | |
| 760 punpckhwd_m2r(*(dataptr+14), mm0); // n33:n23|n32:n22 - interleave third and fourth lines | |
| 761 movq_r2r(mm2, mm6); | |
| 762 | |
| 763 movq_r2m(mm5, *(dataptr+14)); // write result 4 out | |
| 764 punpckhdq_r2r(mm3, mm4); // n31:n21|n11:n01- produce second result | |
| 765 | |
| 766 movq_r2m(mm1, *(dataptr+1)); // write result 5 out - (first result for other 4 x 4 block) | |
| 767 punpckldq_r2r(mm0, mm2); // n32:n22|n12:n02- produce third result | |
| 768 | |
| 769 movq_r2m(mm4, *(dataptr+3)); // write result 6 out | |
| 770 punpckhdq_r2r(mm0, mm6); // n33:n23|n13:n03 - produce fourth result | |
| 771 | |
| 772 movq_r2m(mm2, *(dataptr+5)); // write result 7 out | |
| 773 | |
| 774 movq_m2r(*dataptr, mm0); // m03:m02|m01:m00 - first line, first 4x4 | |
| 775 | |
| 776 movq_r2m(mm6, *(dataptr+7)); // write result 8 out | |
| 777 | |
| 778 | |
| 779 // Do first 4x4 quadrant, which is used in the beginning of the DCT: | |
| 780 | |
| 781 movq_m2r(*(dataptr+4), mm7); // m23:m22|m21:m20 - third line | |
| 782 movq_r2r(mm0, mm2); | |
| 783 | |
| 784 punpcklwd_m2r(*(dataptr+2), mm0); // m11:m01|m10:m00 - interleave first and second lines | |
| 785 movq_r2r(mm7, mm4); | |
| 786 | |
| 787 punpcklwd_m2r(*(dataptr+6), mm7); // m31:m21|m30:m20 - interleave third and fourth lines | |
| 788 movq_r2r(mm0, mm1); | |
| 789 | |
| 790 movq_m2r(*(dataptr+2), mm6); // m13:m12|m11:m10 - second line | |
| 791 punpckldq_r2r(mm7, mm0); // m30:m20|m10:m00 - interleave to produce result 1 | |
| 792 | |
| 793 movq_m2r(*(dataptr+6), mm5); // m33:m32|m31:m30 - fourth line | |
| 794 punpckhdq_r2r(mm7, mm1); // m31:m21|m11:m01 - interleave to produce result 2 | |
| 795 | |
| 796 movq_r2r(mm0, mm7); // write result 1 | |
| 797 punpckhwd_r2r(mm6, mm2); // m13:m03|m12:m02 - interleave first and second lines | |
| 798 | |
| 799 psubw_m2r(*(dataptr+14), mm7); // tmp07=x0-x7 /* Stage 1 */ | |
| 800 movq_r2r(mm1, mm6); // write result 2 | |
| 801 | |
| 802 paddw_m2r(*(dataptr+14), mm0); // tmp00=x0+x7 /* Stage 1 */ | |
| 803 punpckhwd_r2r(mm5, mm4); // m33:m23|m32:m22 - interleave third and fourth lines | |
| 804 | |
| 805 paddw_m2r(*(dataptr+12), mm1); // tmp01=x1+x6 /* Stage 1 */ | |
| 806 movq_r2r(mm2, mm3); // copy first intermediate result | |
| 807 | |
| 808 psubw_m2r(*(dataptr+12), mm6); // tmp06=x1-x6 /* Stage 1 */ | |
| 809 punpckldq_r2r(mm4, mm2); // m32:m22|m12:m02 - interleave to produce result 3 | |
| 810 | |
| 811 movq_r2m(mm7, tmp7); | |
| 812 movq_r2r(mm2, mm5); // write result 3 | |
| 813 | |
| 814 movq_r2m(mm6, tmp6); | |
| 815 punpckhdq_r2r(mm4, mm3); // m33:m23|m13:m03 - interleave to produce result 4 | |
| 816 | |
| 817 paddw_m2r(*(dataptr+10), mm2); // tmp02=x2+5 /* Stage 1 */ | |
| 818 movq_r2r(mm3, mm4); // write result 4 | |
| 819 | |
| 820 /************************************************************************************************ | |
| 821 End of Transpose | |
| 822 ************************************************************************************************/ | |
| 823 | |
| 824 | |
| 825 paddw_m2r(*(dataptr+8), mm3); // tmp03=x3+x4 /* stage 1*/ | |
| 826 movq_r2r(mm0, mm7); | |
| 827 | |
| 828 psubw_m2r(*(dataptr+8), mm4); // tmp04=x3-x4 /* stage 1*/ | |
| 829 movq_r2r(mm1, mm6); | |
| 830 | |
| 831 paddw_r2r(mm3, mm0); // tmp10 = tmp00 + tmp03 /* even 2 */ | |
| 832 psubw_r2r(mm3, mm7); // tmp13 = tmp00 - tmp03 /* even 2 */ | |
| 833 | |
| 834 psubw_r2r(mm2, mm6); // tmp12 = tmp01 - tmp02 /* even 2 */ | |
| 835 paddw_r2r(mm2, mm1); // tmp11 = tmp01 + tmp02 /* even 2 */ | |
| 836 | |
| 837 psubw_m2r(*(dataptr+10), mm5); // tmp05=x2-x5 /* stage 1*/ | |
| 838 paddw_r2r(mm7, mm6); // tmp12 + tmp13 | |
| 839 | |
| 840 /* stage 3 */ | |
| 841 | |
| 842 movq_m2r(tmp6, mm2); | |
| 843 movq_r2r(mm0, mm3); | |
| 844 | |
| 845 psllw_i2r(2, mm6); // m8 * 2^2 | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
846 paddw_r2r(mm1, mm0); |
| 3802 | 847 |
| 848 pmulhw_m2r(RTjpeg_C4, mm6); // z1 | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
849 psubw_r2r(mm1, mm3); |
| 3802 | 850 |
| 851 movq_r2m(mm0, *dataptr); | |
| 852 movq_r2r(mm7, mm0); | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
853 |
| 3802 | 854 /* Odd part */ |
| 855 movq_r2m(mm3, *(dataptr+8)); | |
| 856 paddw_r2r(mm5, mm4); // tmp10 | |
| 857 | |
| 858 movq_m2r(tmp7, mm3); | |
| 859 paddw_r2r(mm6, mm0); // tmp32 | |
| 860 | |
| 861 paddw_r2r(mm2, mm5); // tmp11 | |
| 862 psubw_r2r(mm6, mm7); // tmp33 | |
| 863 | |
| 864 movq_r2m(mm0, *(dataptr+4)); | |
| 865 paddw_r2r(mm3, mm2); // tmp12 | |
| 866 | |
| 867 /* stage 4 */ | |
| 868 | |
| 869 movq_r2m(mm7, *(dataptr+12)); | |
| 870 movq_r2r(mm4, mm1); // copy of tmp10 | |
| 871 | |
| 872 psubw_r2r(mm2, mm1); // tmp10 - tmp12 | |
| 873 psllw_i2r(2, mm4); // m8 * 2^2 | |
| 874 | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
875 movq_m2r(RTjpeg_C2mC6, mm0); |
| 3802 | 876 psllw_i2r(2, mm1); |
| 877 | |
| 878 pmulhw_m2r(RTjpeg_C6, mm1); // z5 | |
| 879 psllw_i2r(2, mm2); | |
| 880 | |
| 881 pmulhw_r2r(mm0, mm4); // z5 | |
| 882 | |
| 883 /* stage 5 */ | |
| 884 | |
| 885 pmulhw_m2r(RTjpeg_C2pC6, mm2); | |
| 886 psllw_i2r(2, mm5); | |
| 887 | |
| 888 pmulhw_m2r(RTjpeg_C4, mm5); // z3 | |
| 889 movq_r2r(mm3, mm0); // copy tmp7 | |
| 890 | |
| 891 movq_m2r(*(dataptr+1), mm7); | |
| 892 paddw_r2r(mm1, mm4); // z2 | |
| 893 | |
| 894 paddw_r2r(mm1, mm2); // z4 | |
| 895 | |
| 896 paddw_r2r(mm5, mm0); // z11 | |
| 897 psubw_r2r(mm5, mm3); // z13 | |
| 898 | |
| 899 /* stage 6 */ | |
| 900 | |
| 901 movq_r2r(mm3, mm5); // copy z13 | |
| 902 psubw_r2r(mm4, mm3); // y3=z13 - z2 | |
| 903 | |
| 904 paddw_r2r(mm4, mm5); // y5=z13 + z2 | |
| 905 movq_r2r(mm0, mm6); // copy z11 | |
| 906 | |
| 907 movq_r2m(mm3, *(dataptr+6)); //save y3 | |
| 908 psubw_r2r(mm2, mm0); // y7=z11 - z4 | |
| 909 | |
| 910 movq_r2m(mm5, *(dataptr+10)); //save y5 | |
| 911 paddw_r2r(mm2, mm6); // y1=z11 + z4 | |
| 912 | |
| 913 movq_r2m(mm0, *(dataptr+14)); //save y7 | |
| 914 | |
| 915 /************************************************ | |
| 916 * End of 1st 4 rows | |
| 917 ************************************************/ | |
| 918 | |
| 919 movq_m2r(*(dataptr+3), mm1); // load x1 /* stage 1 */ | |
| 920 movq_r2r(mm7, mm0); // copy x0 | |
| 921 | |
| 922 movq_r2m(mm6, *(dataptr+2)); //save y1 | |
| 923 | |
| 924 movq_m2r(*(dataptr+5), mm2); // load x2 /* stage 1 */ | |
| 925 movq_r2r(mm1, mm6); // copy x1 | |
| 926 | |
| 927 paddw_m2r(*(dataptr+15), mm0); // tmp00 = x0 + x7 | |
| 928 | |
| 929 movq_m2r(*(dataptr+7), mm3); // load x3 /* stage 1 */ | |
| 930 movq_r2r(mm2, mm5); // copy x2 | |
| 931 | |
| 932 psubw_m2r(*(dataptr+15), mm7); // tmp07 = x0 - x7 | |
| 933 movq_r2r(mm3, mm4); // copy x3 | |
| 934 | |
| 935 paddw_m2r(*(dataptr+13), mm1); // tmp01 = x1 + x6 | |
| 936 | |
| 937 movq_r2m(mm7, tmp7); // save tmp07 | |
| 938 movq_r2r(mm0, mm7); // copy tmp00 | |
| 939 | |
| 940 psubw_m2r(*(dataptr+13), mm6); // tmp06 = x1 - x6 | |
| 941 | |
| 942 /* stage 2, Even Part */ | |
| 943 | |
| 944 paddw_m2r(*(dataptr+9), mm3); // tmp03 = x3 + x4 | |
| 945 | |
| 946 movq_r2m(mm6, tmp6); // save tmp07 | |
| 947 movq_r2r(mm1, mm6); // copy tmp01 | |
| 948 | |
| 949 paddw_m2r(*(dataptr+11), mm2); // tmp02 = x2 + x5 | |
| 950 paddw_r2r(mm3, mm0); // tmp10 = tmp00 + tmp03 | |
| 951 | |
| 952 psubw_r2r(mm3, mm7); // tmp13 = tmp00 - tmp03 | |
| 953 | |
| 954 psubw_m2r(*(dataptr+9), mm4); // tmp04 = x3 - x4 | |
| 955 psubw_r2r(mm2, mm6); // tmp12 = tmp01 - tmp02 | |
| 956 | |
| 957 paddw_r2r(mm2, mm1); // tmp11 = tmp01 + tmp02 | |
| 958 | |
| 959 psubw_m2r(*(dataptr+11), mm5); // tmp05 = x2 - x5 | |
| 960 paddw_r2r(mm7, mm6); // tmp12 + tmp13 | |
| 961 | |
| 962 /* stage 3, Even and stage 4 & 5 even */ | |
| 963 | |
| 964 movq_m2r(tmp6, mm2); // load tmp6 | |
| 965 movq_r2r(mm0, mm3); // copy tmp10 | |
| 966 | |
| 967 psllw_i2r(2, mm6); // shift z1 | |
| 968 paddw_r2r(mm1, mm0); // y0=tmp10 + tmp11 | |
| 969 | |
| 970 pmulhw_m2r(RTjpeg_C4, mm6); // z1 | |
| 971 psubw_r2r(mm1, mm3); // y4=tmp10 - tmp11 | |
| 972 | |
| 973 movq_r2m(mm0, *(dataptr+1)); //save y0 | |
| 974 movq_r2r(mm7, mm0); // copy tmp13 | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
975 |
| 3802 | 976 /* odd part */ |
| 977 | |
| 978 movq_r2m(mm3, *(dataptr+9)); //save y4 | |
| 979 paddw_r2r(mm5, mm4); // tmp10 = tmp4 + tmp5 | |
| 980 | |
| 981 movq_m2r(tmp7, mm3); // load tmp7 | |
| 982 paddw_r2r(mm6, mm0); // tmp32 = tmp13 + z1 | |
| 983 | |
| 984 paddw_r2r(mm2, mm5); // tmp11 = tmp5 + tmp6 | |
| 985 psubw_r2r(mm6, mm7); // tmp33 = tmp13 - z1 | |
| 986 | |
| 987 movq_r2m(mm0, *(dataptr+5)); //save y2 | |
| 988 paddw_r2r(mm3, mm2); // tmp12 = tmp6 + tmp7 | |
| 989 | |
| 990 /* stage 4 */ | |
| 991 | |
| 992 movq_r2m(mm7, *(dataptr+13)); //save y6 | |
| 993 movq_r2r(mm4, mm1); // copy tmp10 | |
| 994 | |
| 995 psubw_r2r(mm2, mm1); // tmp10 - tmp12 | |
| 996 psllw_i2r(2, mm4); // shift tmp10 | |
| 997 | |
| 998 movq_m2r(RTjpeg_C2mC6, mm0); // load C2mC6 | |
| 999 psllw_i2r(2, mm1); // shift (tmp10-tmp12) | |
| 1000 | |
| 1001 pmulhw_m2r(RTjpeg_C6, mm1); // z5 | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1002 psllw_i2r(2, mm5); // prepare for multiply |
| 3802 | 1003 |
| 1004 pmulhw_r2r(mm0, mm4); // multiply by converted real | |
| 1005 | |
| 1006 /* stage 5 */ | |
| 1007 | |
| 1008 pmulhw_m2r(RTjpeg_C4, mm5); // z3 | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1009 psllw_i2r(2, mm2); // prepare for multiply |
| 3802 | 1010 |
| 1011 pmulhw_m2r(RTjpeg_C2pC6, mm2); // multiply | |
| 1012 movq_r2r(mm3, mm0); // copy tmp7 | |
| 1013 | |
| 1014 movq_m2r(*(dataptr+9), mm7); // m03:m02|m01:m00 - first line (line 4)and copy into mm7 | |
| 1015 paddw_r2r(mm1, mm4); // z2 | |
| 1016 | |
| 1017 paddw_r2r(mm5, mm0); // z11 | |
| 1018 psubw_r2r(mm5, mm3); // z13 | |
| 1019 | |
| 1020 /* stage 6 */ | |
| 1021 | |
| 1022 movq_r2r(mm3, mm5); // copy z13 | |
| 1023 paddw_r2r(mm1, mm2); // z4 | |
| 1024 | |
| 1025 movq_r2r(mm0, mm6); // copy z11 | |
| 1026 psubw_r2r(mm4, mm5); // y3 | |
| 1027 | |
| 1028 paddw_r2r(mm2, mm6); // y1 | |
| 1029 paddw_r2r(mm4, mm3); // y5 | |
| 1030 | |
| 1031 movq_r2m(mm5, *(dataptr+7)); //save y3 | |
| 1032 | |
| 1033 movq_r2m(mm6, *(dataptr+3)); //save y1 | |
| 1034 psubw_r2r(mm2, mm0); // y7 | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1035 |
| 3802 | 1036 /************************************************************************************************ |
| 1037 Start of Transpose | |
| 1038 ************************************************************************************************/ | |
| 1039 | |
| 1040 movq_m2r(*(dataptr+13), mm6); // m23:m22|m21:m20 - third line (line 6)and copy into m2 | |
| 1041 movq_r2r(mm7, mm5); // copy first line | |
| 1042 | |
| 1043 punpcklwd_r2r(mm3, mm7); // m11:m01|m10:m00 - interleave first and second lines | |
| 1044 movq_r2r(mm6, mm2); // copy third line | |
| 1045 | |
| 1046 punpcklwd_r2r(mm0, mm6); // m31:m21|m30:m20 - interleave third and fourth lines | |
| 1047 movq_r2r(mm7, mm1); // copy first intermediate result | |
| 1048 | |
| 1049 punpckldq_r2r(mm6, mm7); // m30:m20|m10:m00 - interleave to produce result 1 | |
| 1050 | |
| 1051 punpckhdq_r2r(mm6, mm1); // m31:m21|m11:m01 - interleave to produce result 2 | |
| 1052 | |
| 1053 movq_r2m(mm7, *(dataptr+9)); // write result 1 | |
| 1054 punpckhwd_r2r(mm3, mm5); // m13:m03|m12:m02 - interleave first and second lines | |
| 1055 | |
| 1056 movq_r2m(mm1, *(dataptr+11)); // write result 2 | |
| 1057 punpckhwd_r2r(mm0, mm2); // m33:m23|m32:m22 - interleave third and fourth lines | |
| 1058 | |
| 1059 movq_r2r(mm5, mm1); // copy first intermediate result | |
| 1060 punpckldq_r2r(mm2, mm5); // m32:m22|m12:m02 - interleave to produce result 3 | |
| 1061 | |
| 1062 movq_m2r(*(dataptr+1), mm0); // m03:m02|m01:m00 - first line, 4x4 | |
| 1063 punpckhdq_r2r(mm2, mm1); // m33:m23|m13:m03 - interleave to produce result 4 | |
| 1064 | |
| 1065 movq_r2m(mm5, *(dataptr+13)); // write result 3 | |
| 1066 | |
| 1067 /****** last 4x4 done */ | |
| 1068 | |
| 1069 movq_r2m(mm1, *(dataptr+15)); // write result 4, last 4x4 | |
| 1070 | |
| 1071 movq_m2r(*(dataptr+5), mm2); // m23:m22|m21:m20 - third line | |
| 1072 movq_r2r(mm0, mm6); // copy first line | |
| 1073 | |
| 1074 punpcklwd_m2r(*(dataptr+3), mm0); // m11:m01|m10:m00 - interleave first and second lines | |
| 1075 movq_r2r(mm2, mm7); // copy third line | |
| 1076 | |
| 1077 punpcklwd_m2r(*(dataptr+7), mm2); // m31:m21|m30:m20 - interleave third and fourth lines | |
| 1078 movq_r2r(mm0, mm4); // copy first intermediate result | |
| 1079 | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1080 |
| 3802 | 1081 |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1082 movq_m2r(*(dataptr+8), mm1); // n03:n02|n01:n00 - first line |
| 3802 | 1083 punpckldq_r2r(mm2, mm0); // m30:m20|m10:m00 - interleave to produce first result |
| 1084 | |
| 1085 movq_m2r(*(dataptr+12), mm3); // n23:n22|n21:n20 - third line | |
| 1086 punpckhdq_r2r(mm2, mm4); // m31:m21|m11:m01 - interleave to produce second result | |
| 1087 | |
| 1088 punpckhwd_m2r(*(dataptr+3), mm6); // m13:m03|m12:m02 - interleave first and second lines | |
| 1089 movq_r2r(mm1, mm2); // copy first line | |
| 1090 | |
| 1091 punpckhwd_m2r(*(dataptr+7), mm7); // m33:m23|m32:m22 - interleave third and fourth lines | |
| 1092 movq_r2r(mm6, mm5); // copy first intermediate result | |
| 1093 | |
| 1094 movq_r2m(mm0, *(dataptr+8)); // write result 1 | |
| 1095 punpckhdq_r2r(mm7, mm5); // m33:m23|m13:m03 - produce third result | |
| 1096 | |
| 1097 punpcklwd_m2r(*(dataptr+10), mm1); // n11:n01|n10:n00 - interleave first and second lines | |
| 1098 movq_r2r(mm3, mm0); // copy third line | |
| 1099 | |
| 1100 punpckhwd_m2r(*(dataptr+10), mm2); // n13:n03|n12:n02 - interleave first and second lines | |
| 1101 | |
| 1102 movq_r2m(mm4, *(dataptr+10)); // write result 2 out | |
| 1103 punpckldq_r2r(mm7, mm6); // m32:m22|m12:m02 - produce fourth result | |
| 1104 | |
| 1105 punpcklwd_m2r(*(dataptr+14), mm3); // n33:n23|n32:n22 - interleave third and fourth lines | |
| 1106 movq_r2r(mm1, mm4); // copy second intermediate result | |
| 1107 | |
| 1108 movq_r2m(mm6, *(dataptr+12)); // write result 3 out | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1109 punpckldq_r2r(mm3, mm1); // |
| 3802 | 1110 |
| 1111 punpckhwd_m2r(*(dataptr+14), mm0); // n33:n23|n32:n22 - interleave third and fourth lines | |
| 1112 movq_r2r(mm2, mm6); // copy second intermediate result | |
| 1113 | |
| 1114 movq_r2m(mm5, *(dataptr+14)); // write result 4 out | |
| 1115 punpckhdq_r2r(mm3, mm4); // n31:n21|n11:n01- produce second result | |
| 1116 | |
| 1117 movq_r2m(mm1, *(dataptr+1)); // write result 5 out - (first result for other 4 x 4 block) | |
| 1118 punpckldq_r2r(mm0, mm2); // n32:n22|n12:n02- produce third result | |
| 1119 | |
| 1120 movq_r2m(mm4, *(dataptr+3)); // write result 6 out | |
| 1121 punpckhdq_r2r(mm0, mm6); // n33:n23|n13:n03 - produce fourth result | |
| 1122 | |
| 1123 movq_r2m(mm2, *(dataptr+5)); // write result 7 out | |
| 1124 | |
| 1125 movq_m2r(*dataptr, mm0); // m03:m02|m01:m00 - first line, first 4x4 | |
| 1126 | |
| 1127 movq_r2m(mm6, *(dataptr+7)); // write result 8 out | |
| 1128 | |
| 1129 // Do first 4x4 quadrant, which is used in the beginning of the DCT: | |
| 1130 | |
| 1131 movq_m2r(*(dataptr+4), mm7); // m23:m22|m21:m20 - third line | |
| 1132 movq_r2r(mm0, mm2); // copy first line | |
| 1133 | |
| 1134 punpcklwd_m2r(*(dataptr+2), mm0); // m11:m01|m10:m00 - interleave first and second lines | |
| 1135 movq_r2r(mm7, mm4); // copy third line | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1136 |
| 3802 | 1137 punpcklwd_m2r(*(dataptr+6), mm7); // m31:m21|m30:m20 - interleave third and fourth lines |
| 1138 movq_r2r(mm0, mm1); // copy first intermediate result | |
| 1139 | |
| 1140 movq_m2r(*(dataptr+2), mm6); // m13:m12|m11:m10 - second line | |
| 1141 punpckldq_r2r(mm7, mm0); // m30:m20|m10:m00 - interleave to produce result 1 | |
| 1142 | |
| 1143 movq_m2r(*(dataptr+6), mm5); // m33:m32|m31:m30 - fourth line | |
| 1144 punpckhdq_r2r(mm7, mm1); // m31:m21|m11:m01 - interleave to produce result 2 | |
| 1145 | |
| 1146 movq_r2r(mm0, mm7); // write result 1 | |
| 1147 punpckhwd_r2r(mm6, mm2); // m13:m03|m12:m02 - interleave first and second lines | |
| 1148 | |
| 1149 psubw_m2r(*(dataptr+14), mm7); // tmp07=x0-x7 /* Stage 1 */ | |
| 1150 movq_r2r(mm1, mm6); // write result 2 | |
| 1151 | |
| 1152 paddw_m2r(*(dataptr+14), mm0); // tmp00=x0+x7 /* Stage 1 */ | |
| 1153 punpckhwd_r2r(mm5, mm4); // m33:m23|m32:m22 - interleave third and fourth lines | |
| 1154 | |
| 1155 paddw_m2r(*(dataptr+12), mm1); // tmp01=x1+x6 /* Stage 1 */ | |
| 1156 movq_r2r(mm2, mm3); // copy first intermediate result | |
| 1157 | |
| 1158 psubw_m2r(*(dataptr+12), mm6); // tmp06=x1-x6 /* Stage 1 */ | |
| 1159 punpckldq_r2r(mm4, mm2); // m32:m22|m12:m02 - interleave to produce result 3 | |
| 1160 | |
| 1161 movq_r2m(mm7, tmp7); // save tmp07 | |
| 1162 movq_r2r(mm2, mm5); // write result 3 | |
| 1163 | |
| 1164 movq_r2m(mm6, tmp6); // save tmp06 | |
| 1165 | |
| 1166 punpckhdq_r2r(mm4, mm3); // m33:m23|m13:m03 - interleave to produce result 4 | |
| 1167 | |
| 1168 paddw_m2r(*(dataptr+10), mm2); // tmp02=x2+x5 /* stage 1 */ | |
| 1169 movq_r2r(mm3, mm4); // write result 4 | |
| 1170 | |
| 1171 /************************************************************************************************ | |
| 1172 End of Transpose 2 | |
| 1173 ************************************************************************************************/ | |
| 1174 | |
| 1175 paddw_m2r(*(dataptr+8), mm3); // tmp03=x3+x4 /* stage 1*/ | |
| 1176 movq_r2r(mm0, mm7); | |
| 1177 | |
| 1178 psubw_m2r(*(dataptr+8), mm4); // tmp04=x3-x4 /* stage 1*/ | |
| 1179 movq_r2r(mm1, mm6); | |
| 1180 | |
| 1181 paddw_r2r(mm3, mm0); // tmp10 = tmp00 + tmp03 /* even 2 */ | |
| 1182 psubw_r2r(mm3, mm7); // tmp13 = tmp00 - tmp03 /* even 2 */ | |
| 1183 | |
| 1184 psubw_r2r(mm2, mm6); // tmp12 = tmp01 - tmp02 /* even 2 */ | |
| 1185 paddw_r2r(mm2, mm1); // tmp11 = tmp01 + tmp02 /* even 2 */ | |
| 1186 | |
| 1187 psubw_m2r(*(dataptr+10), mm5); // tmp05=x2-x5 /* stage 1*/ | |
| 1188 paddw_r2r(mm7, mm6); // tmp12 + tmp13 | |
| 1189 | |
| 1190 /* stage 3 */ | |
| 1191 | |
| 1192 movq_m2r(tmp6, mm2); | |
| 1193 movq_r2r(mm0, mm3); | |
| 1194 | |
| 1195 psllw_i2r(2, mm6); // m8 * 2^2 | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1196 paddw_r2r(mm1, mm0); |
| 3802 | 1197 |
| 1198 pmulhw_m2r(RTjpeg_C4, mm6); // z1 | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1199 psubw_r2r(mm1, mm3); |
| 3802 | 1200 |
| 1201 movq_r2m(mm0, *dataptr); | |
| 1202 movq_r2r(mm7, mm0); | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1203 |
| 3802 | 1204 /* Odd part */ |
| 1205 movq_r2m(mm3, *(dataptr+8)); | |
| 1206 paddw_r2r(mm5, mm4); // tmp10 | |
| 1207 | |
| 1208 movq_m2r(tmp7, mm3); | |
| 1209 paddw_r2r(mm6, mm0); // tmp32 | |
| 1210 | |
| 1211 paddw_r2r(mm2, mm5); // tmp11 | |
| 1212 psubw_r2r(mm6, mm7); // tmp33 | |
| 1213 | |
| 1214 movq_r2m(mm0, *(dataptr+4)); | |
| 1215 paddw_r2r(mm3, mm2); // tmp12 | |
| 1216 | |
| 1217 /* stage 4 */ | |
| 1218 movq_r2m(mm7, *(dataptr+12)); | |
| 1219 movq_r2r(mm4, mm1); // copy of tmp10 | |
| 1220 | |
| 1221 psubw_r2r(mm2, mm1); // tmp10 - tmp12 | |
| 1222 psllw_i2r(2, mm4); // m8 * 2^2 | |
| 1223 | |
| 1224 movq_m2r(RTjpeg_C2mC6, mm0); | |
| 1225 psllw_i2r(2, mm1); | |
| 1226 | |
| 1227 pmulhw_m2r(RTjpeg_C6, mm1); // z5 | |
| 1228 psllw_i2r(2, mm2); | |
| 1229 | |
| 1230 pmulhw_r2r(mm0, mm4); // z5 | |
| 1231 | |
| 1232 /* stage 5 */ | |
| 1233 | |
| 1234 pmulhw_m2r(RTjpeg_C2pC6, mm2); | |
| 1235 psllw_i2r(2, mm5); | |
| 1236 | |
| 1237 pmulhw_m2r(RTjpeg_C4, mm5); // z3 | |
| 1238 movq_r2r(mm3, mm0); // copy tmp7 | |
| 1239 | |
| 1240 movq_m2r(*(dataptr+1), mm7); | |
| 1241 paddw_r2r(mm1, mm4); // z2 | |
| 1242 | |
| 1243 paddw_r2r(mm1, mm2); // z4 | |
| 1244 | |
| 1245 paddw_r2r(mm5, mm0); // z11 | |
| 1246 psubw_r2r(mm5, mm3); // z13 | |
| 1247 | |
| 1248 /* stage 6 */ | |
| 1249 | |
| 1250 movq_r2r(mm3, mm5); // copy z13 | |
| 1251 psubw_r2r(mm4, mm3); // y3=z13 - z2 | |
| 1252 | |
| 1253 paddw_r2r(mm4, mm5); // y5=z13 + z2 | |
| 1254 movq_r2r(mm0, mm6); // copy z11 | |
| 1255 | |
| 1256 movq_r2m(mm3, *(dataptr+6)); //save y3 | |
| 1257 psubw_r2r(mm2, mm0); // y7=z11 - z4 | |
| 1258 | |
| 1259 movq_r2m(mm5, *(dataptr+10)); //save y5 | |
| 1260 paddw_r2r(mm2, mm6); // y1=z11 + z4 | |
| 1261 | |
| 1262 movq_r2m(mm0, *(dataptr+14)); //save y7 | |
| 1263 | |
| 1264 /************************************************ | |
| 1265 * End of 1st 4 rows | |
| 1266 ************************************************/ | |
| 1267 | |
| 1268 movq_m2r(*(dataptr+3), mm1); // load x1 /* stage 1 */ | |
| 1269 movq_r2r(mm7, mm0); // copy x0 | |
| 1270 | |
| 1271 movq_r2m(mm6, *(dataptr+2)); //save y1 | |
| 1272 | |
| 1273 movq_m2r(*(dataptr+5), mm2); // load x2 /* stage 1 */ | |
| 1274 movq_r2r(mm1, mm6); // copy x1 | |
| 1275 | |
| 1276 paddw_m2r(*(dataptr+15), mm0); // tmp00 = x0 + x7 | |
| 1277 | |
| 1278 movq_m2r(*(dataptr+7), mm3); // load x3 /* stage 1 */ | |
| 1279 movq_r2r(mm2, mm5); // copy x2 | |
| 1280 | |
| 1281 psubw_m2r(*(dataptr+15), mm7); // tmp07 = x0 - x7 | |
| 1282 movq_r2r(mm3, mm4); // copy x3 | |
| 1283 | |
| 1284 paddw_m2r(*(dataptr+13), mm1); // tmp01 = x1 + x6 | |
| 1285 | |
| 1286 movq_r2m(mm7, tmp7); // save tmp07 | |
| 1287 movq_r2r(mm0, mm7); // copy tmp00 | |
| 1288 | |
| 1289 psubw_m2r(*(dataptr+13), mm6); // tmp06 = x1 - x6 | |
| 1290 | |
| 1291 /* stage 2, Even Part */ | |
| 1292 | |
| 1293 paddw_m2r(*(dataptr+9), mm3); // tmp03 = x3 + x4 | |
| 1294 | |
| 1295 movq_r2m(mm6, tmp6); // save tmp07 | |
| 1296 movq_r2r(mm1, mm6); // copy tmp01 | |
| 1297 | |
| 1298 paddw_m2r(*(dataptr+11), mm2); // tmp02 = x2 + x5 | |
| 1299 paddw_r2r(mm3, mm0); // tmp10 = tmp00 + tmp03 | |
| 1300 | |
| 1301 psubw_r2r(mm3, mm7); // tmp13 = tmp00 - tmp03 | |
| 1302 | |
| 1303 psubw_m2r(*(dataptr+9), mm4); // tmp04 = x3 - x4 | |
| 1304 psubw_r2r(mm2, mm6); // tmp12 = tmp01 - tmp02 | |
| 1305 | |
| 1306 paddw_r2r(mm2, mm1); // tmp11 = tmp01 + tmp02 | |
| 1307 | |
| 1308 psubw_m2r(*(dataptr+11), mm5); // tmp05 = x2 - x5 | |
| 1309 paddw_r2r(mm7, mm6); // tmp12 + tmp13 | |
| 1310 | |
| 1311 /* stage 3, Even and stage 4 & 5 even */ | |
| 1312 | |
| 1313 movq_m2r(tmp6, mm2); // load tmp6 | |
| 1314 movq_r2r(mm0, mm3); // copy tmp10 | |
| 1315 | |
| 1316 psllw_i2r(2, mm6); // shift z1 | |
| 1317 paddw_r2r(mm1, mm0); // y0=tmp10 + tmp11 | |
| 1318 | |
| 1319 pmulhw_m2r(RTjpeg_C4, mm6); // z1 | |
| 1320 psubw_r2r(mm1, mm3); // y4=tmp10 - tmp11 | |
| 1321 | |
| 1322 movq_r2m(mm0, *(dataptr+1)); //save y0 | |
| 1323 movq_r2r(mm7, mm0); // copy tmp13 | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1324 |
| 3802 | 1325 /* odd part */ |
| 1326 | |
| 1327 movq_r2m(mm3, *(dataptr+9)); //save y4 | |
| 1328 paddw_r2r(mm5, mm4); // tmp10 = tmp4 + tmp5 | |
| 1329 | |
| 1330 movq_m2r(tmp7, mm3); // load tmp7 | |
| 1331 paddw_r2r(mm6, mm0); // tmp32 = tmp13 + z1 | |
| 1332 | |
| 1333 paddw_r2r(mm2, mm5); // tmp11 = tmp5 + tmp6 | |
| 1334 psubw_r2r(mm6, mm7); // tmp33 = tmp13 - z1 | |
| 1335 | |
| 1336 movq_r2m(mm0, *(dataptr+5)); //save y2 | |
| 1337 paddw_r2r(mm3, mm2); // tmp12 = tmp6 + tmp7 | |
| 1338 | |
| 1339 /* stage 4 */ | |
| 1340 | |
| 1341 movq_r2m(mm7, *(dataptr+13)); //save y6 | |
| 1342 movq_r2r(mm4, mm1); // copy tmp10 | |
| 1343 | |
| 1344 psubw_r2r(mm2, mm1); // tmp10 - tmp12 | |
| 1345 psllw_i2r(2, mm4); // shift tmp10 | |
| 1346 | |
| 1347 movq_m2r(RTjpeg_C2mC6, mm0); // load C2mC6 | |
| 1348 psllw_i2r(2, mm1); // shift (tmp10-tmp12) | |
| 1349 | |
| 1350 pmulhw_m2r(RTjpeg_C6, mm1); // z5 | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1351 psllw_i2r(2, mm5); // prepare for multiply |
| 3802 | 1352 |
| 1353 pmulhw_r2r(mm0, mm4); // multiply by converted real | |
| 1354 | |
| 1355 /* stage 5 */ | |
| 1356 | |
| 1357 pmulhw_m2r(RTjpeg_C4, mm5); // z3 | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1358 psllw_i2r(2, mm2); // prepare for multiply |
| 3802 | 1359 |
| 1360 pmulhw_m2r(RTjpeg_C2pC6, mm2); // multiply | |
| 1361 movq_r2r(mm3, mm0); // copy tmp7 | |
| 1362 | |
| 1363 movq_m2r(*(dataptr+9), mm7); // m03:m02|m01:m00 - first line (line 4)and copy into mm7 | |
| 1364 paddw_r2r(mm1, mm4); // z2 | |
| 1365 | |
| 1366 paddw_r2r(mm5, mm0); // z11 | |
| 1367 psubw_r2r(mm5, mm3); // z13 | |
| 1368 | |
| 1369 /* stage 6 */ | |
| 1370 | |
| 1371 movq_r2r(mm3, mm5); // copy z13 | |
| 1372 paddw_r2r(mm1, mm2); // z4 | |
| 1373 | |
| 1374 movq_r2r(mm0, mm6); // copy z11 | |
| 1375 psubw_r2r(mm4, mm5); // y3 | |
| 1376 | |
| 1377 paddw_r2r(mm2, mm6); // y1 | |
| 1378 paddw_r2r(mm4, mm3); // y5 | |
| 1379 | |
| 1380 movq_r2m(mm5, *(dataptr+7)); //save y3 | |
| 1381 psubw_r2r(mm2, mm0); // yè=z11 - z4 | |
| 1382 | |
| 1383 movq_r2m(mm3, *(dataptr+11)); //save y5 | |
| 1384 | |
| 1385 movq_r2m(mm6, *(dataptr+3)); //save y1 | |
| 1386 | |
| 1387 movq_r2m(mm0, *(dataptr+15)); //save y7 | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1388 |
| 3802 | 1389 |
| 1390 #endif | |
| 1391 } | |
| 1392 | |
| 1393 /* | |
| 1394 | |
| 1395 Main Routines | |
| 1396 | |
| 1397 This file contains most of the initialisation and control functions | |
| 1398 | |
| 1399 (C) Justin Schoeman 1998 | |
| 1400 | |
| 1401 */ | |
| 1402 | |
| 1403 /* | |
| 1404 | |
| 1405 Private function | |
| 1406 | |
| 1407 Initialise all the cache-aliged data blocks | |
| 1408 | |
| 1409 */ | |
| 1410 | |
|
28849
87b59e8d3c26
Mark everything not used outside the file as "static"
reimar
parents:
28298
diff
changeset
|
1411 static void RTjpeg_init_data(void) |
| 3802 | 1412 { |
| 1413 unsigned long dptr; | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1414 |
| 3802 | 1415 dptr=(unsigned long)&(RTjpeg_alldata[0]); |
| 1416 dptr+=32; | |
| 1417 dptr=dptr>>5; | |
| 1418 dptr=dptr<<5; /* cache align data */ | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1419 |
| 3802 | 1420 RTjpeg_block=(__s16 *)dptr; |
| 1421 dptr+=sizeof(__s16)*64; | |
| 1422 RTjpeg_lqt=(__s32 *)dptr; | |
| 1423 dptr+=sizeof(__s32)*64; | |
| 1424 RTjpeg_cqt=(__s32 *)dptr; | |
| 1425 dptr+=sizeof(__s32)*64; | |
| 1426 RTjpeg_liqt=(__u32 *)dptr; | |
| 1427 dptr+=sizeof(__u32)*64; | |
| 1428 RTjpeg_ciqt=(__u32 *)dptr; | |
| 1429 } | |
| 1430 | |
| 1431 /* | |
| 1432 | |
| 1433 External Function | |
| 1434 | |
| 1435 Re-set quality factor | |
| 1436 | |
| 1437 Input: buf -> pointer to 128 ints for quant values store to pass back to | |
| 1438 init_decompress. | |
| 1439 Q -> quality factor (192=best, 32=worst) | |
| 1440 */ | |
| 1441 | |
|
28849
87b59e8d3c26
Mark everything not used outside the file as "static"
reimar
parents:
28298
diff
changeset
|
1442 static void RTjpeg_init_Q(__u8 Q) |
| 3802 | 1443 { |
| 1444 int i; | |
| 1445 __u64 qual; | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1446 |
| 3802 | 1447 qual=(__u64)Q<<(32-7); /* 32 bit FP, 255=2, 0=0 */ |
| 1448 | |
| 1449 for(i=0; i<64; i++) | |
| 1450 { | |
| 1451 RTjpeg_lqt[i]=(__s32)((qual/((__u64)RTjpeg_lum_quant_tbl[i]<<16))>>3); | |
| 1452 if(RTjpeg_lqt[i]==0)RTjpeg_lqt[i]=1; | |
| 1453 RTjpeg_cqt[i]=(__s32)((qual/((__u64)RTjpeg_chrom_quant_tbl[i]<<16))>>3); | |
| 1454 if(RTjpeg_cqt[i]==0)RTjpeg_cqt[i]=1; | |
| 1455 RTjpeg_liqt[i]=(1<<16)/(RTjpeg_lqt[i]<<3); | |
| 1456 RTjpeg_ciqt[i]=(1<<16)/(RTjpeg_cqt[i]<<3); | |
| 1457 RTjpeg_lqt[i]=((1<<16)/RTjpeg_liqt[i])>>3; | |
| 1458 RTjpeg_cqt[i]=((1<<16)/RTjpeg_ciqt[i])>>3; | |
| 1459 } | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1460 |
| 3802 | 1461 RTjpeg_lb8=0; |
| 1462 while(RTjpeg_liqt[RTjpeg_ZZ[++RTjpeg_lb8]]<=8); | |
| 1463 RTjpeg_lb8--; | |
| 1464 RTjpeg_cb8=0; | |
| 1465 while(RTjpeg_ciqt[RTjpeg_ZZ[++RTjpeg_cb8]]<=8); | |
| 1466 RTjpeg_cb8--; | |
| 1467 | |
| 1468 RTjpeg_dct_init(); | |
| 1469 RTjpeg_quant_init(); | |
| 1470 } | |
| 1471 | |
| 1472 /* | |
| 1473 | |
| 1474 External Function | |
| 1475 | |
| 1476 Initialise compression. | |
| 1477 | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1478 Input: buf -> pointer to 128 ints for quant values store to pass back to |
| 3802 | 1479 init_decompress. |
| 1480 width -> width of image | |
| 1481 height -> height of image | |
| 1482 Q -> quality factor (192=best, 32=worst) | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1483 |
| 3802 | 1484 */ |
| 1485 | |
| 1486 void RTjpeg_init_compress(__u32 *buf, int width, int height, __u8 Q) | |
| 1487 { | |
| 1488 int i; | |
| 1489 __u64 qual; | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1490 |
| 3802 | 1491 RTjpeg_init_data(); |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1492 |
| 3802 | 1493 RTjpeg_width=width; |
| 1494 RTjpeg_height=height; | |
| 1495 RTjpeg_Ywidth = RTjpeg_width>>3; | |
| 1496 RTjpeg_Ysize=width * height; | |
| 1497 RTjpeg_Cwidth = RTjpeg_width>>4; | |
| 1498 RTjpeg_Csize= (width>>1) * height; | |
| 1499 | |
| 1500 qual=(__u64)Q<<(32-7); /* 32 bit FP, 255=2, 0=0 */ | |
| 1501 | |
| 1502 for(i=0; i<64; i++) | |
| 1503 { | |
| 1504 RTjpeg_lqt[i]=(__s32)((qual/((__u64)RTjpeg_lum_quant_tbl[i]<<16))>>3); | |
| 1505 if(RTjpeg_lqt[i]==0)RTjpeg_lqt[i]=1; | |
| 1506 RTjpeg_cqt[i]=(__s32)((qual/((__u64)RTjpeg_chrom_quant_tbl[i]<<16))>>3); | |
| 1507 if(RTjpeg_cqt[i]==0)RTjpeg_cqt[i]=1; | |
| 1508 RTjpeg_liqt[i]=(1<<16)/(RTjpeg_lqt[i]<<3); | |
| 1509 RTjpeg_ciqt[i]=(1<<16)/(RTjpeg_cqt[i]<<3); | |
| 1510 RTjpeg_lqt[i]=((1<<16)/RTjpeg_liqt[i])>>3; | |
| 1511 RTjpeg_cqt[i]=((1<<16)/RTjpeg_ciqt[i])>>3; | |
| 1512 } | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1513 |
| 3802 | 1514 RTjpeg_lb8=0; |
| 1515 while(RTjpeg_liqt[RTjpeg_ZZ[++RTjpeg_lb8]]<=8); | |
| 1516 RTjpeg_lb8--; | |
| 1517 RTjpeg_cb8=0; | |
| 1518 while(RTjpeg_ciqt[RTjpeg_ZZ[++RTjpeg_cb8]]<=8); | |
| 1519 RTjpeg_cb8--; | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1520 |
| 3802 | 1521 RTjpeg_dct_init(); |
| 1522 RTjpeg_quant_init(); | |
| 1523 | |
| 1524 for(i=0; i<64; i++) | |
|
14896
9ddae5897422
Make nuv files work on bigendian (but old nuv files created with mencoder
reimar
parents:
14642
diff
changeset
|
1525 buf[i]=le2me_32(RTjpeg_liqt[i]); |
| 3802 | 1526 for(i=0; i<64; i++) |
|
14896
9ddae5897422
Make nuv files work on bigendian (but old nuv files created with mencoder
reimar
parents:
14642
diff
changeset
|
1527 buf[64+i]=le2me_32(RTjpeg_ciqt[i]); |
| 3802 | 1528 } |
| 1529 | |
| 1530 int RTjpeg_compressYUV420(__s8 *sp, unsigned char *bp) | |
| 1531 { | |
| 1532 __s8 * sb; | |
| 1533 register __s8 * bp1 = bp + (RTjpeg_width<<3); | |
| 1534 register __s8 * bp2 = bp + RTjpeg_Ysize; | |
| 1535 register __s8 * bp3 = bp2 + (RTjpeg_Csize>>1); | |
| 1536 register int i, j, k; | |
| 1537 | |
|
28298
a7124a264ea6
Completely get rid of MMX define, use HAVE_MMX define instead.
gpoirier
parents:
28296
diff
changeset
|
1538 #if HAVE_MMX |
| 3802 | 1539 emms(); |
| 1540 #endif | |
| 1541 sb=sp; | |
| 1542 /* Y */ | |
| 1543 for(i=RTjpeg_height>>1; i; i-=8) | |
| 1544 { | |
| 1545 for(j=0, k=0; j<RTjpeg_width; j+=16, k+=8) | |
| 1546 { | |
| 1547 RTjpeg_dctY(bp+j, RTjpeg_block, RTjpeg_Ywidth); | |
| 1548 RTjpeg_quant(RTjpeg_block, RTjpeg_lqt); | |
| 1549 sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8); | |
| 1550 | |
| 1551 RTjpeg_dctY(bp+j+8, RTjpeg_block, RTjpeg_Ywidth); | |
| 1552 RTjpeg_quant(RTjpeg_block, RTjpeg_lqt); | |
| 1553 sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8); | |
| 1554 | |
| 1555 RTjpeg_dctY(bp1+j, RTjpeg_block, RTjpeg_Ywidth); | |
| 1556 RTjpeg_quant(RTjpeg_block, RTjpeg_lqt); | |
| 1557 sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8); | |
| 1558 | |
| 1559 RTjpeg_dctY(bp1+j+8, RTjpeg_block, RTjpeg_Ywidth); | |
| 1560 RTjpeg_quant(RTjpeg_block, RTjpeg_lqt); | |
| 1561 sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8); | |
| 1562 | |
| 1563 RTjpeg_dctY(bp2+k, RTjpeg_block, RTjpeg_Cwidth); | |
| 1564 RTjpeg_quant(RTjpeg_block, RTjpeg_cqt); | |
| 1565 sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8); | |
| 1566 | |
| 1567 RTjpeg_dctY(bp3+k, RTjpeg_block, RTjpeg_Cwidth); | |
| 1568 RTjpeg_quant(RTjpeg_block, RTjpeg_cqt); | |
| 1569 sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8); | |
| 1570 | |
| 1571 } | |
| 1572 bp+=RTjpeg_width<<4; | |
| 1573 bp1+=RTjpeg_width<<4; | |
| 1574 bp2+=RTjpeg_width<<2; | |
| 1575 bp3+=RTjpeg_width<<2; | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1576 |
| 3802 | 1577 } |
|
28298
a7124a264ea6
Completely get rid of MMX define, use HAVE_MMX define instead.
gpoirier
parents:
28296
diff
changeset
|
1578 #if HAVE_MMX |
| 3802 | 1579 emms(); |
| 1580 #endif | |
| 1581 return (sp-sb); | |
| 1582 } | |
| 1583 | |
| 1584 /* | |
| 1585 External Function | |
| 1586 | |
| 1587 Initialise additional data structures for motion compensation | |
| 1588 | |
| 1589 */ | |
| 1590 | |
| 1591 void RTjpeg_init_mcompress(void) | |
| 1592 { | |
| 1593 unsigned long tmp; | |
| 1594 | |
| 1595 if(!RTjpeg_old) | |
| 1596 { | |
| 1597 RTjpeg_old=malloc((4*RTjpeg_width*RTjpeg_height)+32); | |
| 1598 tmp=(unsigned long)RTjpeg_old; | |
| 1599 tmp+=32; | |
| 1600 tmp=tmp>>5; | |
| 1601 RTjpeg_old=(__s16 *)(tmp<<5); | |
| 1602 } | |
| 1603 if (!RTjpeg_old) | |
| 1604 { | |
| 1605 fprintf(stderr, "RTjpeg: Could not allocate memory\n"); | |
| 1606 exit(-1); | |
| 1607 } | |
|
14642
38572280e8e7
bzero is deprecated patch by Gianluigi Tiesi <mplayer at netfarm.it>
faust3
parents:
12928
diff
changeset
|
1608 memset(RTjpeg_old, 0, ((4*RTjpeg_width*RTjpeg_height))); |
| 3802 | 1609 } |
| 1610 | |
|
28298
a7124a264ea6
Completely get rid of MMX define, use HAVE_MMX define instead.
gpoirier
parents:
28296
diff
changeset
|
1611 #if HAVE_MMX |
| 3802 | 1612 |
|
28849
87b59e8d3c26
Mark everything not used outside the file as "static"
reimar
parents:
28298
diff
changeset
|
1613 static int RTjpeg_bcomp(__s16 *old, mmx_t *mask) |
| 3802 | 1614 { |
| 1615 int i; | |
| 1616 mmx_t *mold=(mmx_t *)old; | |
| 1617 mmx_t *mblock=(mmx_t *)RTjpeg_block; | |
| 1618 volatile mmx_t result; | |
| 12928 | 1619 static mmx_t neg={0xffffffffffffffffULL}; |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1620 |
| 3802 | 1621 movq_m2r(*mask, mm7); |
| 1622 movq_m2r(neg, mm6); | |
| 1623 pxor_r2r(mm5, mm5); | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1624 |
| 3802 | 1625 for(i=0; i<8; i++) |
| 1626 { | |
| 1627 movq_m2r(*(mblock++), mm0); | |
| 1628 movq_m2r(*(mblock++), mm2); | |
| 1629 movq_m2r(*(mold++), mm1); | |
| 1630 movq_m2r(*(mold++), mm3); | |
| 1631 psubsw_r2r(mm1, mm0); | |
| 1632 psubsw_r2r(mm3, mm2); | |
| 1633 movq_r2r(mm0, mm1); | |
| 1634 movq_r2r(mm2, mm3); | |
| 1635 pcmpgtw_r2r(mm7, mm0); | |
| 1636 pcmpgtw_r2r(mm7, mm2); | |
| 1637 pxor_r2r(mm6, mm1); | |
| 1638 pxor_r2r(mm6, mm3); | |
| 1639 pcmpgtw_r2r(mm7, mm1); | |
| 1640 pcmpgtw_r2r(mm7, mm3); | |
| 1641 por_r2r(mm0, mm5); | |
| 1642 por_r2r(mm2, mm5); | |
| 1643 por_r2r(mm1, mm5); | |
| 1644 por_r2r(mm3, mm5); | |
| 1645 } | |
| 1646 movq_r2m(mm5, result); | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1647 |
| 3802 | 1648 if(result.q) |
| 1649 { | |
| 1650 return 0; | |
| 1651 } | |
| 1652 return 1; | |
| 1653 } | |
| 1654 | |
| 1655 #else | |
|
28849
87b59e8d3c26
Mark everything not used outside the file as "static"
reimar
parents:
28298
diff
changeset
|
1656 static int RTjpeg_bcomp(__s16 *old, __u16 *mask) |
| 3802 | 1657 { |
| 1658 int i; | |
| 1659 | |
| 1660 for(i=0; i<64; i++) | |
| 1661 if(abs(old[i]-RTjpeg_block[i])>*mask) | |
| 1662 { | |
| 1663 for(i=0; i<16; i++)((__u64 *)old)[i]=((__u64 *)RTjpeg_block)[i]; | |
| 1664 return 0; | |
| 1665 } | |
| 1666 return 1; | |
| 1667 } | |
| 1668 #endif | |
| 1669 | |
| 1670 int RTjpeg_mcompressYUV420(__s8 *sp, unsigned char *bp, __u16 lmask, __u16 cmask) | |
| 1671 { | |
| 1672 __s8 * sb; | |
| 1673 register __s8 * bp1 = bp + (RTjpeg_width<<3); | |
| 1674 register __s8 * bp2 = bp + RTjpeg_Ysize; | |
| 1675 register __s8 * bp3 = bp2 + (RTjpeg_Csize>>1); | |
| 1676 register int i, j, k; | |
| 1677 | |
|
28298
a7124a264ea6
Completely get rid of MMX define, use HAVE_MMX define instead.
gpoirier
parents:
28296
diff
changeset
|
1678 #if HAVE_MMX |
| 3802 | 1679 emms(); |
|
16653
27b0d49988b2
Fix 100l bugs that break playback on 64 bit systems (like typedefing __u32
reimar
parents:
14896
diff
changeset
|
1680 RTjpeg_lmask.uq=((__u64)lmask<<48)|((__u64)lmask<<32)|((__u64)lmask<<16)|lmask; |
|
27b0d49988b2
Fix 100l bugs that break playback on 64 bit systems (like typedefing __u32
reimar
parents:
14896
diff
changeset
|
1681 RTjpeg_cmask.uq=((__u64)cmask<<48)|((__u64)cmask<<32)|((__u64)cmask<<16)|cmask; |
| 3802 | 1682 #else |
|
16661
adb581352e63
Stupidity in last patch broke compile without MMX: RTjpeg_lmask is a union
reimar
parents:
16653
diff
changeset
|
1683 RTjpeg_lmask=lmask; |
|
adb581352e63
Stupidity in last patch broke compile without MMX: RTjpeg_lmask is a union
reimar
parents:
16653
diff
changeset
|
1684 RTjpeg_cmask=cmask; |
| 3802 | 1685 #endif |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1686 |
| 3802 | 1687 sb=sp; |
| 1688 block=RTjpeg_old; | |
| 1689 /* Y */ | |
| 1690 for(i=RTjpeg_height>>1; i; i-=8) | |
| 1691 { | |
| 1692 for(j=0, k=0; j<RTjpeg_width; j+=16, k+=8) | |
| 1693 { | |
| 1694 RTjpeg_dctY(bp+j, RTjpeg_block, RTjpeg_Ywidth); | |
| 1695 RTjpeg_quant(RTjpeg_block, RTjpeg_lqt); | |
| 1696 if(RTjpeg_bcomp(block, &RTjpeg_lmask)) | |
| 1697 { | |
| 1698 *((__u8 *)sp++)=255; | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1699 } |
| 3802 | 1700 else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8); |
| 1701 block+=64; | |
| 1702 | |
| 1703 RTjpeg_dctY(bp+j+8, RTjpeg_block, RTjpeg_Ywidth); | |
| 1704 RTjpeg_quant(RTjpeg_block, RTjpeg_lqt); | |
| 1705 if(RTjpeg_bcomp(block, &RTjpeg_lmask)) | |
| 1706 { | |
| 1707 *((__u8 *)sp++)=255; | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1708 } |
| 3802 | 1709 else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8); |
| 1710 block+=64; | |
| 1711 | |
| 1712 RTjpeg_dctY(bp1+j, RTjpeg_block, RTjpeg_Ywidth); | |
| 1713 RTjpeg_quant(RTjpeg_block, RTjpeg_lqt); | |
| 1714 if(RTjpeg_bcomp(block, &RTjpeg_lmask)) | |
| 1715 { | |
| 1716 *((__u8 *)sp++)=255; | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1717 } |
| 3802 | 1718 else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8); |
| 1719 block+=64; | |
| 1720 | |
| 1721 RTjpeg_dctY(bp1+j+8, RTjpeg_block, RTjpeg_Ywidth); | |
| 1722 RTjpeg_quant(RTjpeg_block, RTjpeg_lqt); | |
| 1723 if(RTjpeg_bcomp(block, &RTjpeg_lmask)) | |
| 1724 { | |
| 1725 *((__u8 *)sp++)=255; | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1726 } |
| 3802 | 1727 else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8); |
| 1728 block+=64; | |
| 1729 | |
| 1730 RTjpeg_dctY(bp2+k, RTjpeg_block, RTjpeg_Cwidth); | |
| 1731 RTjpeg_quant(RTjpeg_block, RTjpeg_cqt); | |
| 1732 if(RTjpeg_bcomp(block, &RTjpeg_cmask)) | |
| 1733 { | |
| 1734 *((__u8 *)sp++)=255; | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1735 } |
| 3802 | 1736 else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8); |
| 1737 block+=64; | |
| 1738 | |
| 1739 RTjpeg_dctY(bp3+k, RTjpeg_block, RTjpeg_Cwidth); | |
| 1740 RTjpeg_quant(RTjpeg_block, RTjpeg_cqt); | |
| 1741 if(RTjpeg_bcomp(block, &RTjpeg_cmask)) | |
| 1742 { | |
| 1743 *((__u8 *)sp++)=255; | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1744 } |
| 3802 | 1745 else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8); |
| 1746 block+=64; | |
| 1747 } | |
| 1748 bp+=RTjpeg_width<<4; | |
| 1749 bp1+=RTjpeg_width<<4; | |
| 1750 bp2+=RTjpeg_width<<2; | |
| 1751 bp3+=RTjpeg_width<<2; | |
|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1752 |
| 3802 | 1753 } |
|
28298
a7124a264ea6
Completely get rid of MMX define, use HAVE_MMX define instead.
gpoirier
parents:
28296
diff
changeset
|
1754 #if HAVE_MMX |
| 3802 | 1755 emms(); |
| 1756 #endif | |
| 1757 return (sp-sb); | |
| 1758 } |
