Mercurial > libavcodec.hg
annotate dct-test.c @ 8991:ca768cb2bfb6 libavcodec
Use last decoded SPS as current SPS in order to parse picture timing SEI
correctly. This works around an apparent H.264 standard deficiency.
Patch by Ivan Schreter, schreter gmx net
| author | cehoyos |
|---|---|
| date | Fri, 20 Feb 2009 16:20:01 +0000 |
| parents | 2c5662c41129 |
| children | 6d7c38f60b06 |
| rev | line source |
|---|---|
|
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
1 /* |
|
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
2 * (c) 2001 Fabrice Bellard |
|
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
3 * 2007 Marc Hoffman <marc.hoffman@analog.com> |
|
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
4 * |
|
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
5 * This file is part of FFmpeg. |
|
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
6 * |
|
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
7 * FFmpeg is free software; you can redistribute it and/or |
|
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
8 * modify it under the terms of the GNU Lesser General Public |
|
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
9 * License as published by the Free Software Foundation; either |
|
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
10 * version 2.1 of the License, or (at your option) any later version. |
|
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
11 * |
|
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
12 * FFmpeg is distributed in the hope that it will be useful, |
|
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
15 * Lesser General Public License for more details. |
|
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
16 * |
|
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
17 * You should have received a copy of the GNU Lesser General Public |
|
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
18 * License along with FFmpeg; if not, write to the Free Software |
|
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
20 */ |
|
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
21 |
| 1106 | 22 /** |
|
8718
e9d9d946f213
Use full internal pathname in doxygen @file directives.
diego
parents:
8693
diff
changeset
|
23 * @file libavcodec/dct-test.c |
|
8724
2c5662c41129
cosmetics: Remove period after copyright statement non-sentence.
diego
parents:
8718
diff
changeset
|
24 * DCT test (c) 2001 Fabrice Bellard |
| 1106 | 25 * Started from sample code by Juan J. Sierralta P. |
| 26 */ | |
| 27 | |
| 0 | 28 #include <stdlib.h> |
| 29 #include <stdio.h> | |
| 30 #include <string.h> | |
| 31 #include <sys/time.h> | |
| 32 #include <unistd.h> | |
|
5118
3b190bc34546
Add some #includes to allow compilation without HAVE_AV_CONFIG_H.
diego
parents:
5110
diff
changeset
|
33 #include <math.h> |
| 0 | 34 |
|
7130
601509a430f7
Replace redundant MAX macro declaration by proper use of FFMAX.
diego
parents:
7125
diff
changeset
|
35 #include "libavutil/common.h" |
| 0 | 36 |
| 633 | 37 #include "simple_idct.h" |
| 8223 | 38 #include "aandcttab.h" |
| 1557 | 39 #include "faandct.h" |
| 6407 | 40 #include "faanidct.h" |
| 8430 | 41 #include "x86/idct_xvid.h" |
| 33 | 42 |
| 2872 | 43 #undef printf |
|
5379
e9f8a337c5ce
Fix the compilation of dct-test, fft-test and motion-test which are using
takis
parents:
5127
diff
changeset
|
44 #undef random |
| 2872 | 45 |
| 46 void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);}; | |
| 47 | |
| 33 | 48 /* reference fdct/idct */ |
| 8250 | 49 void fdct(DCTELEM *block); |
| 50 void idct(DCTELEM *block); | |
|
8693
18737839ed27
Add missing void keyword to parameterless function declarations.
diego
parents:
8622
diff
changeset
|
51 void init_fdct(void); |
| 0 | 52 |
| 8250 | 53 void ff_mmx_idct(DCTELEM *data); |
| 54 void ff_mmxext_idct(DCTELEM *data); | |
| 33 | 55 |
| 8250 | 56 void odivx_idct_c(short *block); |
| 633 | 57 |
|
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
58 // BFIN |
| 8250 | 59 void ff_bfin_idct(DCTELEM *block); |
| 60 void ff_bfin_fdct(DCTELEM *block); | |
|
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
61 |
|
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
62 // ALTIVEC |
| 8250 | 63 void fdct_altivec(DCTELEM *block); |
| 64 //void idct_altivec(DCTELEM *block);?? no routine | |
|
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
65 |
| 8351 | 66 // ARM |
| 67 void j_rev_dct_ARM(DCTELEM *data); | |
| 68 void simple_idct_ARM(DCTELEM *data); | |
| 69 void simple_idct_armv5te(DCTELEM *data); | |
| 70 void ff_simple_idct_armv6(DCTELEM *data); | |
| 71 void ff_simple_idct_neon(DCTELEM *data); | |
|
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
72 |
| 8622 | 73 void ff_simple_idct_axp(DCTELEM *data); |
| 74 | |
|
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
75 struct algo { |
|
7125
65e7e714c770
Mark constant structure member as const to avoid some warnings.
diego
parents:
6602
diff
changeset
|
76 const char *name; |
|
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
77 enum { FDCT, IDCT } is_idct; |
|
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
78 void (* func) (DCTELEM *block); |
|
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
79 void (* ref) (DCTELEM *block); |
| 8350 | 80 enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM, SSE2_PERM, PARTTRANS_PERM } format; |
|
6543
948d9453432b
check mm_flags for each DCT and skips the ones that can't be run
gpoirier
parents:
6542
diff
changeset
|
81 int mm_support; |
|
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
82 }; |
|
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
83 |
|
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
84 #ifndef FAAN_POSTSCALE |
|
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
85 #define FAAN_SCALE SCALE_PERM |
|
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
86 #else |
|
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
87 #define FAAN_SCALE NO_PERM |
|
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
88 #endif |
|
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
89 |
| 7155 | 90 static int cpu_flags; |
| 91 | |
|
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
92 struct algo algos[] = { |
| 6542 | 93 {"REF-DBL", 0, fdct, fdct, NO_PERM}, |
| 94 {"FAAN", 0, ff_faandct, fdct, FAAN_SCALE}, | |
| 95 {"FAANI", 1, ff_faanidct, idct, NO_PERM}, | |
| 96 {"IJG-AAN-INT", 0, fdct_ifast, fdct, SCALE_PERM}, | |
| 97 {"IJG-LLM-INT", 0, ff_jpeg_fdct_islow, fdct, NO_PERM}, | |
| 98 {"REF-DBL", 1, idct, idct, NO_PERM}, | |
| 99 {"INT", 1, j_rev_dct, idct, MMX_PERM}, | |
| 100 {"SIMPLE-C", 1, ff_simple_idct, idct, NO_PERM}, | |
|
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
101 |
| 8590 | 102 #if HAVE_MMX |
|
8104
0d108ec85620
Remove duplicated MM_* macros for CPU capabilities from dsputil.h.
rathann
parents:
8031
diff
changeset
|
103 {"MMX", 0, ff_fdct_mmx, fdct, NO_PERM, FF_MM_MMX}, |
| 8590 | 104 #if HAVE_MMX2 |
|
8104
0d108ec85620
Remove duplicated MM_* macros for CPU capabilities from dsputil.h.
rathann
parents:
8031
diff
changeset
|
105 {"MMX2", 0, ff_fdct_mmx2, fdct, NO_PERM, FF_MM_MMXEXT}, |
| 8141 | 106 {"SSE2", 0, ff_fdct_sse2, fdct, NO_PERM, FF_MM_SSE2}, |
| 5110 | 107 #endif |
|
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
108 |
| 8590 | 109 #if CONFIG_GPL |
|
8104
0d108ec85620
Remove duplicated MM_* macros for CPU capabilities from dsputil.h.
rathann
parents:
8031
diff
changeset
|
110 {"LIBMPEG2-MMX", 1, ff_mmx_idct, idct, MMX_PERM, FF_MM_MMX}, |
|
0d108ec85620
Remove duplicated MM_* macros for CPU capabilities from dsputil.h.
rathann
parents:
8031
diff
changeset
|
111 {"LIBMPEG2-MMXEXT", 1, ff_mmxext_idct, idct, MMX_PERM, FF_MM_MMXEXT}, |
| 5109 | 112 #endif |
|
8104
0d108ec85620
Remove duplicated MM_* macros for CPU capabilities from dsputil.h.
rathann
parents:
8031
diff
changeset
|
113 {"SIMPLE-MMX", 1, ff_simple_idct_mmx, idct, MMX_SIMPLE_PERM, FF_MM_MMX}, |
|
0d108ec85620
Remove duplicated MM_* macros for CPU capabilities from dsputil.h.
rathann
parents:
8031
diff
changeset
|
114 {"XVID-MMX", 1, ff_idct_xvid_mmx, idct, NO_PERM, FF_MM_MMX}, |
|
0d108ec85620
Remove duplicated MM_* macros for CPU capabilities from dsputil.h.
rathann
parents:
8031
diff
changeset
|
115 {"XVID-MMX2", 1, ff_idct_xvid_mmx2, idct, NO_PERM, FF_MM_MMXEXT}, |
|
0d108ec85620
Remove duplicated MM_* macros for CPU capabilities from dsputil.h.
rathann
parents:
8031
diff
changeset
|
116 {"XVID-SSE2", 1, ff_idct_xvid_sse2, idct, SSE2_PERM, FF_MM_SSE2}, |
|
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
117 #endif |
|
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
118 |
| 8590 | 119 #if HAVE_ALTIVEC |
|
8104
0d108ec85620
Remove duplicated MM_* macros for CPU capabilities from dsputil.h.
rathann
parents:
8031
diff
changeset
|
120 {"altivecfdct", 0, fdct_altivec, fdct, NO_PERM, FF_MM_ALTIVEC}, |
|
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
121 #endif |
|
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
122 |
| 8590 | 123 #if ARCH_BFIN |
| 6542 | 124 {"BFINfdct", 0, ff_bfin_fdct, fdct, NO_PERM}, |
| 125 {"BFINidct", 1, ff_bfin_idct, idct, NO_PERM}, | |
|
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
126 #endif |
|
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
127 |
| 8590 | 128 #if ARCH_ARM |
| 8351 | 129 {"SIMPLE-ARM", 1, simple_idct_ARM, idct, NO_PERM }, |
| 130 {"INT-ARM", 1, j_rev_dct_ARM, idct, MMX_PERM }, | |
| 8590 | 131 #if HAVE_ARMV5TE |
| 8351 | 132 {"SIMPLE-ARMV5TE", 1, simple_idct_armv5te, idct, NO_PERM }, |
| 133 #endif | |
| 8590 | 134 #if HAVE_ARMV6 |
| 8351 | 135 {"SIMPLE-ARMV6", 1, ff_simple_idct_armv6, idct, MMX_PERM }, |
| 136 #endif | |
| 8590 | 137 #if HAVE_NEON |
| 8351 | 138 {"SIMPLE-NEON", 1, ff_simple_idct_neon, idct, PARTTRANS_PERM }, |
| 139 #endif | |
| 8359 | 140 #endif /* ARCH_ARM */ |
| 8351 | 141 |
| 8622 | 142 #if ARCH_ALPHA |
| 143 {"SIMPLE-ALPHA", 1, ff_simple_idct_axp, idct, NO_PERM }, | |
| 144 #endif | |
| 145 | |
|
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
146 { 0 } |
|
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
147 }; |
|
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
148 |
| 0 | 149 #define AANSCALE_BITS 12 |
| 150 | |
| 4197 | 151 uint8_t cropTbl[256 + 2 * MAX_NEG_CROP]; |
| 633 | 152 |
| 1064 | 153 int64_t gettime(void) |
| 0 | 154 { |
| 155 struct timeval tv; | |
| 156 gettimeofday(&tv,NULL); | |
| 1064 | 157 return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec; |
| 0 | 158 } |
| 159 | |
| 160 #define NB_ITS 20000 | |
| 161 #define NB_ITS_SPEED 50000 | |
| 162 | |
| 33 | 163 static short idct_mmx_perm[64]; |
| 164 | |
| 633 | 165 static short idct_simple_mmx_perm[64]={ |
| 2979 | 166 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, |
| 167 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, | |
| 168 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, | |
| 169 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, | |
| 170 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, | |
| 171 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, | |
| 172 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, | |
| 173 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, | |
| 633 | 174 }; |
| 175 | |
| 6602 | 176 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7}; |
| 177 | |
| 33 | 178 void idct_mmx_init(void) |
| 179 { | |
| 180 int i; | |
| 181 | |
| 182 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */ | |
| 183 for (i = 0; i < 64; i++) { | |
| 2979 | 184 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); |
| 185 // idct_simple_mmx_perm[i] = simple_block_permute_op(i); | |
| 33 | 186 } |
| 187 } | |
| 188 | |
|
6544
8a059debc2ba
Altivec and SSE2 require 16-byte alignment, so make sure they get it
gpoirier
parents:
6543
diff
changeset
|
189 static DCTELEM block[64] __attribute__ ((aligned (16))); |
| 33 | 190 static DCTELEM block1[64] __attribute__ ((aligned (8))); |
| 633 | 191 static DCTELEM block_org[64] __attribute__ ((aligned (8))); |
| 33 | 192 |
| 7155 | 193 static inline void mmx_emms(void) |
| 194 { | |
| 8590 | 195 #if HAVE_MMX |
|
8104
0d108ec85620
Remove duplicated MM_* macros for CPU capabilities from dsputil.h.
rathann
parents:
8031
diff
changeset
|
196 if (cpu_flags & FF_MM_MMX) |
| 8031 | 197 __asm__ volatile ("emms\n\t"); |
| 7155 | 198 #endif |
| 199 } | |
| 200 | |
| 33 | 201 void dct_error(const char *name, int is_idct, |
| 202 void (*fdct_func)(DCTELEM *block), | |
|
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
203 void (*fdct_ref)(DCTELEM *block), int form, int test) |
| 0 | 204 { |
| 205 int it, i, scale; | |
| 206 int err_inf, v; | |
| 1064 | 207 int64_t err2, ti, ti1, it1; |
| 208 int64_t sysErr[64], sysErrMax=0; | |
| 633 | 209 int maxout=0; |
| 210 int blockSumErrMax=0, blockSumErr; | |
| 0 | 211 |
| 212 srandom(0); | |
| 213 | |
| 214 err_inf = 0; | |
| 215 err2 = 0; | |
| 633 | 216 for(i=0; i<64; i++) sysErr[i]=0; |
| 0 | 217 for(it=0;it<NB_ITS;it++) { |
| 633 | 218 for(i=0;i<64;i++) |
| 219 block1[i] = 0; | |
| 220 switch(test){ | |
| 2967 | 221 case 0: |
| 633 | 222 for(i=0;i<64;i++) |
| 223 block1[i] = (random() % 512) -256; | |
| 635 | 224 if (is_idct){ |
| 633 | 225 fdct(block1); |
| 635 | 226 |
| 227 for(i=0;i<64;i++) | |
| 228 block1[i]>>=3; | |
| 229 } | |
| 633 | 230 break; |
| 231 case 1:{ | |
| 232 int num= (random()%10)+1; | |
| 233 for(i=0;i<num;i++) | |
| 234 block1[random()%64] = (random() % 512) -256; | |
| 235 }break; | |
| 236 case 2: | |
| 237 block1[0]= (random()%4096)-2048; | |
| 238 block1[63]= (block1[0]&1)^1; | |
| 239 break; | |
| 240 } | |
| 33 | 241 |
| 633 | 242 #if 0 // simulate mismatch control |
| 243 { int sum=0; | |
| 244 for(i=0;i<64;i++) | |
| 245 sum+=block1[i]; | |
| 246 | |
| 2967 | 247 if((sum&1)==0) block1[63]^=1; |
| 633 | 248 } |
| 249 #endif | |
| 250 | |
| 251 for(i=0; i<64; i++) | |
| 252 block_org[i]= block1[i]; | |
| 33 | 253 |
|
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
254 if (form == MMX_PERM) { |
| 633 | 255 for(i=0;i<64;i++) |
| 33 | 256 block[idct_mmx_perm[i]] = block1[i]; |
|
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
257 } else if (form == MMX_SIMPLE_PERM) { |
| 633 | 258 for(i=0;i<64;i++) |
| 259 block[idct_simple_mmx_perm[i]] = block1[i]; | |
| 260 | |
| 6602 | 261 } else if (form == SSE2_PERM) { |
| 262 for(i=0; i<64; i++) | |
| 263 block[(i&0x38) | idct_sse2_row_perm[i&7]] = block1[i]; | |
| 8350 | 264 } else if (form == PARTTRANS_PERM) { |
| 265 for(i=0; i<64; i++) | |
| 266 block[(i&0x24) | ((i&3)<<3) | ((i>>3)&3)] = block1[i]; | |
| 2979 | 267 } else { |
| 633 | 268 for(i=0; i<64; i++) |
| 269 block[i]= block1[i]; | |
| 33 | 270 } |
| 633 | 271 #if 0 // simulate mismatch control for tested IDCT but not the ref |
| 272 { int sum=0; | |
| 273 for(i=0;i<64;i++) | |
| 274 sum+=block[i]; | |
| 275 | |
| 2967 | 276 if((sum&1)==0) block[63]^=1; |
| 633 | 277 } |
| 278 #endif | |
| 33 | 279 |
| 0 | 280 fdct_func(block); |
| 7155 | 281 mmx_emms(); |
| 33 | 282 |
|
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
283 if (form == SCALE_PERM) { |
| 0 | 284 for(i=0; i<64; i++) { |
| 8223 | 285 scale = 8*(1 << (AANSCALE_BITS + 11)) / ff_aanscales[i]; |
| 633 | 286 block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS; |
| 287 } | |
| 288 } | |
| 289 | |
| 33 | 290 fdct_ref(block1); |
| 0 | 291 |
| 633 | 292 blockSumErr=0; |
| 0 | 293 for(i=0;i<64;i++) { |
| 294 v = abs(block[i] - block1[i]); | |
| 295 if (v > err_inf) | |
| 296 err_inf = v; | |
| 297 err2 += v * v; | |
| 2979 | 298 sysErr[i] += block[i] - block1[i]; |
| 299 blockSumErr += v; | |
| 300 if( abs(block[i])>maxout) maxout=abs(block[i]); | |
| 0 | 301 } |
| 633 | 302 if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr; |
| 303 #if 0 // print different matrix pairs | |
| 304 if(blockSumErr){ | |
| 305 printf("\n"); | |
| 306 for(i=0; i<64; i++){ | |
| 307 if((i&7)==0) printf("\n"); | |
| 308 printf("%4d ", block_org[i]); | |
| 309 } | |
| 310 for(i=0; i<64; i++){ | |
| 311 if((i&7)==0) printf("\n"); | |
| 312 printf("%4d ", block[i] - block1[i]); | |
| 313 } | |
| 314 } | |
| 315 #endif | |
| 0 | 316 } |
|
7130
601509a430f7
Replace redundant MAX macro declaration by proper use of FFMAX.
diego
parents:
7125
diff
changeset
|
317 for(i=0; i<64; i++) sysErrMax= FFMAX(sysErrMax, FFABS(sysErr[i])); |
| 2967 | 318 |
| 633 | 319 #if 1 // dump systematic errors |
| 320 for(i=0; i<64; i++){ | |
| 2979 | 321 if(i%8==0) printf("\n"); |
| 633 | 322 printf("%5d ", (int)sysErr[i]); |
| 323 } | |
| 324 printf("\n"); | |
| 325 #endif | |
| 2967 | 326 |
| 633 | 327 printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n", |
| 33 | 328 is_idct ? "IDCT" : "DCT", |
| 633 | 329 name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax); |
| 330 #if 1 //Speed test | |
| 0 | 331 /* speed test */ |
| 633 | 332 for(i=0;i<64;i++) |
| 333 block1[i] = 0; | |
| 334 switch(test){ | |
| 2967 | 335 case 0: |
| 633 | 336 for(i=0;i<64;i++) |
| 337 block1[i] = (random() % 512) -256; | |
| 635 | 338 if (is_idct){ |
| 633 | 339 fdct(block1); |
| 635 | 340 |
| 341 for(i=0;i<64;i++) | |
| 342 block1[i]>>=3; | |
| 343 } | |
| 633 | 344 break; |
| 345 case 1:{ | |
| 346 case 2: | |
| 347 block1[0] = (random() % 512) -256; | |
| 348 block1[1] = (random() % 512) -256; | |
| 349 block1[2] = (random() % 512) -256; | |
| 350 block1[3] = (random() % 512) -256; | |
| 351 }break; | |
| 352 } | |
| 0 | 353 |
|
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
354 if (form == MMX_PERM) { |
| 633 | 355 for(i=0;i<64;i++) |
| 33 | 356 block[idct_mmx_perm[i]] = block1[i]; |
|
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
357 } else if(form == MMX_SIMPLE_PERM) { |
| 633 | 358 for(i=0;i<64;i++) |
| 359 block[idct_simple_mmx_perm[i]] = block1[i]; | |
| 360 } else { | |
| 361 for(i=0; i<64; i++) | |
| 362 block[i]= block1[i]; | |
| 33 | 363 } |
| 364 | |
| 0 | 365 ti = gettime(); |
| 366 it1 = 0; | |
| 367 do { | |
| 368 for(it=0;it<NB_ITS_SPEED;it++) { | |
| 633 | 369 for(i=0; i<64; i++) |
| 370 block[i]= block1[i]; | |
| 371 // memcpy(block, block1, sizeof(DCTELEM) * 64); | |
| 5127 | 372 // do not memcpy especially not fastmemcpy because it does movntq !!! |
| 0 | 373 fdct_func(block); |
| 374 } | |
| 375 it1 += NB_ITS_SPEED; | |
| 376 ti1 = gettime() - ti; | |
| 377 } while (ti1 < 1000000); | |
| 7155 | 378 mmx_emms(); |
| 0 | 379 |
| 633 | 380 printf("%s %s: %0.1f kdct/s\n", |
| 33 | 381 is_idct ? "IDCT" : "DCT", |
| 0 | 382 name, (double)it1 * 1000.0 / (double)ti1); |
| 633 | 383 #endif |
| 0 | 384 } |
| 385 | |
| 1064 | 386 static uint8_t img_dest[64] __attribute__ ((aligned (8))); |
| 387 static uint8_t img_dest1[64] __attribute__ ((aligned (8))); | |
| 720 | 388 |
| 1064 | 389 void idct248_ref(uint8_t *dest, int linesize, int16_t *block) |
| 720 | 390 { |
| 391 static int init; | |
| 392 static double c8[8][8]; | |
| 393 static double c4[4][4]; | |
| 394 double block1[64], block2[64], block3[64]; | |
| 395 double s, sum, v; | |
| 396 int i, j, k; | |
| 397 | |
| 398 if (!init) { | |
| 399 init = 1; | |
| 400 | |
| 401 for(i=0;i<8;i++) { | |
| 402 sum = 0; | |
| 403 for(j=0;j<8;j++) { | |
| 404 s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0); | |
| 405 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0); | |
| 406 sum += c8[i][j] * c8[i][j]; | |
| 407 } | |
| 408 } | |
| 2967 | 409 |
| 720 | 410 for(i=0;i<4;i++) { |
| 411 sum = 0; | |
| 412 for(j=0;j<4;j++) { | |
| 413 s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0); | |
| 414 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0); | |
| 415 sum += c4[i][j] * c4[i][j]; | |
| 416 } | |
| 417 } | |
| 418 } | |
| 419 | |
| 420 /* butterfly */ | |
|
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
421 s = 0.5 * sqrt(2.0); |
| 720 | 422 for(i=0;i<4;i++) { |
| 423 for(j=0;j<8;j++) { | |
|
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
424 block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s; |
|
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
425 block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s; |
| 720 | 426 } |
| 427 } | |
| 428 | |
| 429 /* idct8 on lines */ | |
| 430 for(i=0;i<8;i++) { | |
| 431 for(j=0;j<8;j++) { | |
| 432 sum = 0; | |
| 433 for(k=0;k<8;k++) | |
| 434 sum += c8[k][j] * block1[8*i+k]; | |
| 435 block2[8*i+j] = sum; | |
| 436 } | |
| 437 } | |
| 438 | |
| 439 /* idct4 */ | |
| 440 for(i=0;i<8;i++) { | |
| 441 for(j=0;j<4;j++) { | |
| 442 /* top */ | |
| 443 sum = 0; | |
| 444 for(k=0;k<4;k++) | |
| 445 sum += c4[k][j] * block2[8*(2*k)+i]; | |
| 446 block3[8*(2*j)+i] = sum; | |
| 447 | |
| 448 /* bottom */ | |
| 449 sum = 0; | |
| 450 for(k=0;k<4;k++) | |
| 451 sum += c4[k][j] * block2[8*(2*k+1)+i]; | |
| 452 block3[8*(2*j+1)+i] = sum; | |
| 453 } | |
| 454 } | |
| 455 | |
| 456 /* clamp and store the result */ | |
| 457 for(i=0;i<8;i++) { | |
| 458 for(j=0;j<8;j++) { | |
|
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
459 v = block3[8*i+j]; |
| 720 | 460 if (v < 0) |
| 461 v = 0; | |
| 462 else if (v > 255) | |
| 463 v = 255; | |
| 464 dest[i * linesize + j] = (int)rint(v); | |
| 465 } | |
| 466 } | |
| 467 } | |
| 468 | |
| 2967 | 469 void idct248_error(const char *name, |
| 1064 | 470 void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block)) |
| 720 | 471 { |
| 472 int it, i, it1, ti, ti1, err_max, v; | |
| 473 | |
| 474 srandom(0); | |
| 2967 | 475 |
| 720 | 476 /* just one test to see if code is correct (precision is less |
| 477 important here) */ | |
| 478 err_max = 0; | |
| 479 for(it=0;it<NB_ITS;it++) { | |
| 2967 | 480 |
|
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
481 /* XXX: use forward transform to generate values */ |
| 720 | 482 for(i=0;i<64;i++) |
|
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
483 block1[i] = (random() % 256) - 128; |
|
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
484 block1[0] += 1024; |
|
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
485 |
| 720 | 486 for(i=0; i<64; i++) |
| 487 block[i]= block1[i]; | |
| 488 idct248_ref(img_dest1, 8, block); | |
| 2967 | 489 |
|
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
490 for(i=0; i<64; i++) |
|
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
491 block[i]= block1[i]; |
|
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
492 idct248_put(img_dest, 8, block); |
| 2967 | 493 |
|
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
494 for(i=0;i<64;i++) { |
|
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
495 v = abs((int)img_dest[i] - (int)img_dest1[i]); |
|
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
496 if (v == 255) |
|
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
497 printf("%d %d\n", img_dest[i], img_dest1[i]); |
|
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
498 if (v > err_max) |
|
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
499 err_max = v; |
|
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
500 } |
| 720 | 501 #if 0 |
| 502 printf("ref=\n"); | |
| 503 for(i=0;i<8;i++) { | |
| 504 int j; | |
| 505 for(j=0;j<8;j++) { | |
| 506 printf(" %3d", img_dest1[i*8+j]); | |
| 507 } | |
| 508 printf("\n"); | |
| 509 } | |
| 2967 | 510 |
| 720 | 511 printf("out=\n"); |
| 512 for(i=0;i<8;i++) { | |
| 513 int j; | |
| 514 for(j=0;j<8;j++) { | |
| 515 printf(" %3d", img_dest[i*8+j]); | |
| 516 } | |
| 517 printf("\n"); | |
| 518 } | |
| 519 #endif | |
| 520 } | |
| 521 printf("%s %s: err_inf=%d\n", | |
| 522 1 ? "IDCT248" : "DCT248", | |
| 523 name, err_max); | |
| 524 | |
| 525 ti = gettime(); | |
| 526 it1 = 0; | |
| 527 do { | |
| 528 for(it=0;it<NB_ITS_SPEED;it++) { | |
| 529 for(i=0; i<64; i++) | |
| 530 block[i]= block1[i]; | |
| 531 // memcpy(block, block1, sizeof(DCTELEM) * 64); | |
| 5127 | 532 // do not memcpy especially not fastmemcpy because it does movntq !!! |
| 720 | 533 idct248_put(img_dest, 8, block); |
| 534 } | |
| 535 it1 += NB_ITS_SPEED; | |
| 536 ti1 = gettime() - ti; | |
| 537 } while (ti1 < 1000000); | |
| 7155 | 538 mmx_emms(); |
| 720 | 539 |
| 540 printf("%s %s: %0.1f kdct/s\n", | |
| 541 1 ? "IDCT248" : "DCT248", | |
| 542 name, (double)it1 * 1000.0 / (double)ti1); | |
| 543 } | |
| 544 | |
| 33 | 545 void help(void) |
| 546 { | |
| 633 | 547 printf("dct-test [-i] [<test-number>]\n" |
| 548 "test-number 0 -> test with random matrixes\n" | |
| 549 " 1 -> test with random sparse matrixes\n" | |
| 550 " 2 -> do 3. test from mpeg4 std\n" | |
| 720 | 551 "-i test IDCT implementations\n" |
| 552 "-4 test IDCT248 implementations\n"); | |
| 33 | 553 } |
| 554 | |
| 0 | 555 int main(int argc, char **argv) |
| 556 { | |
| 720 | 557 int test_idct = 0, test_248_dct = 0; |
| 633 | 558 int c,i; |
| 559 int test=1; | |
| 7155 | 560 cpu_flags = mm_support(); |
| 33 | 561 |
| 0 | 562 init_fdct(); |
| 33 | 563 idct_mmx_init(); |
|
7135
6bd6a2da306e
Define mm_flags/support to be 0 on architectures where they don't exist.
astrange
parents:
7130
diff
changeset
|
564 |
| 4197 | 565 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i; |
| 633 | 566 for(i=0;i<MAX_NEG_CROP;i++) { |
| 4197 | 567 cropTbl[i] = 0; |
| 568 cropTbl[i + MAX_NEG_CROP + 256] = 255; | |
| 633 | 569 } |
| 2967 | 570 |
| 33 | 571 for(;;) { |
| 720 | 572 c = getopt(argc, argv, "ih4"); |
| 33 | 573 if (c == -1) |
| 574 break; | |
| 575 switch(c) { | |
| 576 case 'i': | |
| 577 test_idct = 1; | |
| 578 break; | |
| 720 | 579 case '4': |
| 580 test_248_dct = 1; | |
| 581 break; | |
| 633 | 582 default : |
| 33 | 583 case 'h': |
| 584 help(); | |
| 4754 | 585 return 0; |
| 33 | 586 } |
| 587 } | |
| 2967 | 588 |
| 633 | 589 if(optind <argc) test= atoi(argv[optind]); |
| 2967 | 590 |
| 33 | 591 printf("ffmpeg DCT/IDCT test\n"); |
| 592 | |
| 720 | 593 if (test_248_dct) { |
| 6001 | 594 idct248_error("SIMPLE-C", ff_simple_idct248_put); |
| 33 | 595 } else { |
|
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
596 for (i=0;algos[i].name;i++) |
|
7140
654f063dc099
Use a local variable instead of mm_flags in dct-test.
astrange
parents:
7135
diff
changeset
|
597 if (algos[i].is_idct == test_idct && !(~cpu_flags & algos[i].mm_support)) { |
|
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
598 dct_error (algos[i].name, algos[i].is_idct, algos[i].func, algos[i].ref, algos[i].format, test); |
| 720 | 599 } |
| 33 | 600 } |
| 0 | 601 return 0; |
| 602 } |
