Mercurial > libavcodec.hg
annotate ppc/dsputil_ppc.c @ 11969:3cd4cd0509cd libavcodec
Remove PPC perf counter support
This functionality is better accessed through tools like oprofile.
| author | mru |
|---|---|
| date | Sat, 26 Jun 2010 22:23:35 +0000 |
| parents | 50415a8f1451 |
| children | 3fc4c625b6f3 |
| rev | line source |
|---|---|
|
828
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
1 /* |
|
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
2 * Copyright (c) 2002 Brian Foley |
|
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
3 * Copyright (c) 2002 Dieter Shirley |
|
1949
66215baae7b9
hadamard8_diff8x8 in AltiVec, the 16bits edition by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1879
diff
changeset
|
4 * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org> |
|
828
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
5 * |
|
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3581
diff
changeset
|
6 * This file is part of FFmpeg. |
|
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3581
diff
changeset
|
7 * |
|
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3581
diff
changeset
|
8 * FFmpeg is free software; you can redistribute it and/or |
|
828
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
9 * modify it under the terms of the GNU Lesser General Public |
|
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
10 * License as published by the Free Software Foundation; either |
|
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3581
diff
changeset
|
11 * version 2.1 of the License, or (at your option) any later version. |
|
828
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
12 * |
|
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3581
diff
changeset
|
13 * FFmpeg is distributed in the hope that it will be useful, |
|
828
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
16 * Lesser General Public License for more details. |
|
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
17 * |
|
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
18 * You should have received a copy of the GNU Lesser General Public |
|
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3581
diff
changeset
|
19 * License along with FFmpeg; if not, write to the Free Software |
|
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
2979
diff
changeset
|
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
828
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
21 */ |
|
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
22 |
| 6763 | 23 #include "libavcodec/dsputil.h" |
|
638
0012f75c92bb
altivec build tidyup patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff
changeset
|
24 #include "dsputil_altivec.h" |
|
0012f75c92bb
altivec build tidyup patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff
changeset
|
25 |
| 4197 | 26 int mm_flags = 0; |
|
894
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
884
diff
changeset
|
27 |
|
995
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
28 int mm_support(void) |
|
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
29 { |
|
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
30 int result = 0; |
| 8590 | 31 #if HAVE_ALTIVEC |
|
995
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
32 if (has_altivec()) { |
|
8104
0d108ec85620
Remove duplicated MM_* macros for CPU capabilities from dsputil.h.
rathann
parents:
8031
diff
changeset
|
33 result |= FF_MM_ALTIVEC; |
|
995
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
34 } |
|
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
35 #endif /* result */ |
|
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
36 return result; |
|
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
37 } |
|
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
38 |
|
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
39 /* ***** WARNING ***** WARNING ***** WARNING ***** */ |
|
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
40 /* |
|
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
41 clear_blocks_dcbz32_ppc will not work properly on PowerPC processors with a |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
42 cache line size not equal to 32 bytes. |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
43 Fortunately all processor used by Apple up to at least the 7450 (aka second |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
44 generation G4) use 32 bytes cache line. |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
45 This is due to the use of the 'dcbz' instruction. It simply clear to zero a |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
46 single cache line, so you need to know the cache line size to use it ! |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
47 It's absurd, but it's fast... |
|
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
48 |
|
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
49 update 24/06/2003 : Apple released yesterday the G5, with a PPC970. cache line |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
50 size: 128 bytes. Oups. |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
51 The semantic of dcbz was changed, it always clear 32 bytes. so the function |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
52 below will work, but will be slow. So I fixed check_dcbz_effect to use dcbzl, |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
53 which is defined to clear a cache line (as dcbz before). So we still can |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
54 distinguish, and use dcbz (32 bytes) or dcbzl (one cache line) as required. |
|
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
55 |
|
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
56 see <http://developer.apple.com/technotes/tn/tn2087.html> |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
57 and <http://developer.apple.com/technotes/tn/tn2086.html> |
|
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
58 */ |
|
11382
50415a8f1451
PPC: move prototypes to headers and make some functions static
mru
parents:
9995
diff
changeset
|
59 static void clear_blocks_dcbz32_ppc(DCTELEM *blocks) |
|
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
60 { |
|
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
61 register int misal = ((unsigned long)blocks & 0x00000010); |
|
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
62 register int i = 0; |
|
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
63 #if 1 |
|
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
64 if (misal) { |
|
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
65 ((unsigned long*)blocks)[0] = 0L; |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
66 ((unsigned long*)blocks)[1] = 0L; |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
67 ((unsigned long*)blocks)[2] = 0L; |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
68 ((unsigned long*)blocks)[3] = 0L; |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
69 i += 16; |
|
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
70 } |
|
2294
fac626a2b73b
missaliged clear_blocks() and h264 not complied but referenced fix patch by (Roine Gustafsson <roine at users dot sourceforge dot net>) and me
michael
parents:
2236
diff
changeset
|
71 for ( ; i < sizeof(DCTELEM)*6*64-31 ; i += 32) { |
| 8031 | 72 __asm__ volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory"); |
|
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
73 } |
|
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
74 if (misal) { |
|
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
75 ((unsigned long*)blocks)[188] = 0L; |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
76 ((unsigned long*)blocks)[189] = 0L; |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
77 ((unsigned long*)blocks)[190] = 0L; |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
78 ((unsigned long*)blocks)[191] = 0L; |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
79 i += 16; |
|
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
80 } |
|
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
81 #else |
|
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
82 memset(blocks, 0, sizeof(DCTELEM)*6*64); |
|
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
83 #endif |
|
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
84 } |
|
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
85 |
|
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
86 /* same as above, when dcbzl clear a whole 128B cache line |
|
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
87 i.e. the PPC970 aka G5 */ |
| 8590 | 88 #if HAVE_DCBZL |
|
11382
50415a8f1451
PPC: move prototypes to headers and make some functions static
mru
parents:
9995
diff
changeset
|
89 static void clear_blocks_dcbz128_ppc(DCTELEM *blocks) |
|
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
90 { |
|
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
91 register int misal = ((unsigned long)blocks & 0x0000007f); |
|
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
92 register int i = 0; |
|
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
93 #if 1 |
|
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
94 if (misal) { |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
95 // we could probably also optimize this case, |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
96 // but there's not much point as the machines |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
97 // aren't available yet (2003-06-26) |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
98 memset(blocks, 0, sizeof(DCTELEM)*6*64); |
|
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
99 } |
|
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
100 else |
|
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
101 for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) { |
| 8031 | 102 __asm__ volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory"); |
|
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
103 } |
|
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
104 #else |
|
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
105 memset(blocks, 0, sizeof(DCTELEM)*6*64); |
|
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
106 #endif |
|
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
107 } |
|
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
108 #else |
|
11382
50415a8f1451
PPC: move prototypes to headers and make some functions static
mru
parents:
9995
diff
changeset
|
109 static void clear_blocks_dcbz128_ppc(DCTELEM *blocks) |
|
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
110 { |
|
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
111 memset(blocks, 0, sizeof(DCTELEM)*6*64); |
|
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
112 } |
|
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
113 #endif |
|
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
114 |
| 8590 | 115 #if HAVE_DCBZL |
|
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
116 /* check dcbz report how many bytes are set to 0 by dcbz */ |
|
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
117 /* update 24/06/2003 : replace dcbz by dcbzl to get |
|
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
118 the intended effect (Apple "fixed" dcbz) |
|
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
119 unfortunately this cannot be used unless the assembler |
|
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
120 knows about dcbzl ... */ |
|
11382
50415a8f1451
PPC: move prototypes to headers and make some functions static
mru
parents:
9995
diff
changeset
|
121 static long check_dcbzl_effect(void) |
|
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
122 { |
|
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
123 register char *fakedata = av_malloc(1024); |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
124 register char *fakedata_middle; |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
125 register long zero = 0; |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
126 register long i = 0; |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
127 long count = 0; |
|
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
128 |
|
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
129 if (!fakedata) { |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
130 return 0L; |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
131 } |
|
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
132 |
|
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
133 fakedata_middle = (fakedata + 512); |
|
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
134 |
|
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
135 memset(fakedata, 0xFF, 1024); |
|
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
136 |
|
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
137 /* below the constraint "b" seems to mean "Address base register" |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
138 in gcc-3.3 / RS/6000 speaks. seems to avoid using r0, so.... */ |
| 8031 | 139 __asm__ volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero)); |
|
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
140 |
|
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
141 for (i = 0; i < 1024 ; i ++) { |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
142 if (fakedata[i] == (char)0) |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
143 count++; |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
144 } |
|
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
145 |
|
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
146 av_free(fakedata); |
| 2967 | 147 |
|
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
148 return count; |
|
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
149 } |
|
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
150 #else |
|
11382
50415a8f1451
PPC: move prototypes to headers and make some functions static
mru
parents:
9995
diff
changeset
|
151 static long check_dcbzl_effect(void) |
|
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
152 { |
|
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
153 return 0; |
|
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
154 } |
|
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
155 #endif |
|
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
156 |
| 4003 | 157 static void prefetch_ppc(void *mem, int stride, int h) |
| 158 { | |
| 159 register const uint8_t *p = mem; | |
| 160 do { | |
| 8031 | 161 __asm__ volatile ("dcbt 0,%0" : : "r" (p)); |
| 4003 | 162 p+= stride; |
| 163 } while(--h); | |
| 164 } | |
| 165 | |
| 1092 | 166 void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx) |
|
638
0012f75c92bb
altivec build tidyup patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff
changeset
|
167 { |
| 5749 | 168 // Common optimizations whether AltiVec is available or not |
| 4003 | 169 c->prefetch = prefetch_ppc; |
|
3546
5f97ba9a4eaa
Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents:
3542
diff
changeset
|
170 switch (check_dcbzl_effect()) { |
|
5f97ba9a4eaa
Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents:
3542
diff
changeset
|
171 case 32: |
|
5f97ba9a4eaa
Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents:
3542
diff
changeset
|
172 c->clear_blocks = clear_blocks_dcbz32_ppc; |
|
5f97ba9a4eaa
Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents:
3542
diff
changeset
|
173 break; |
|
5f97ba9a4eaa
Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents:
3542
diff
changeset
|
174 case 128: |
|
5f97ba9a4eaa
Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents:
3542
diff
changeset
|
175 c->clear_blocks = clear_blocks_dcbz128_ppc; |
|
5f97ba9a4eaa
Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents:
3542
diff
changeset
|
176 break; |
|
5f97ba9a4eaa
Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents:
3542
diff
changeset
|
177 default: |
|
5f97ba9a4eaa
Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents:
3542
diff
changeset
|
178 break; |
|
5f97ba9a4eaa
Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents:
3542
diff
changeset
|
179 } |
|
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
2068
diff
changeset
|
180 |
| 8590 | 181 #if HAVE_ALTIVEC |
|
8596
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
182 if(CONFIG_H264_DECODER) dsputil_h264_init_ppc(c, avctx); |
| 2967 | 183 |
|
638
0012f75c92bb
altivec build tidyup patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff
changeset
|
184 if (has_altivec()) { |
|
8104
0d108ec85620
Remove duplicated MM_* macros for CPU capabilities from dsputil.h.
rathann
parents:
8031
diff
changeset
|
185 mm_flags |= FF_MM_ALTIVEC; |
| 2967 | 186 |
| 3547 | 187 dsputil_init_altivec(c, avctx); |
|
9995
3141f69e3905
Do not check for both CONFIG_VC1_DECODER and CONFIG_WMV3_DECODER,
diego
parents:
9975
diff
changeset
|
188 if(CONFIG_VC1_DECODER) |
|
4227
ef1d382309e5
Conditionally compile some of the AltiVec optimizations.
diego
parents:
4197
diff
changeset
|
189 vc1dsp_init_altivec(c, avctx); |
| 3581 | 190 float_init_altivec(c, avctx); |
|
4838
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
4521
diff
changeset
|
191 int_init_altivec(c, avctx); |
| 2979 | 192 c->gmc1 = gmc1_altivec; |
| 1092 | 193 |
| 8590 | 194 #if CONFIG_ENCODERS |
| 2979 | 195 if (avctx->dct_algo == FF_DCT_AUTO || |
|
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
196 avctx->dct_algo == FF_DCT_ALTIVEC) { |
| 2979 | 197 c->fdct = fdct_altivec; |
| 198 } | |
|
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
1511
diff
changeset
|
199 #endif //CONFIG_ENCODERS |
|
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
1511
diff
changeset
|
200 |
|
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
201 if (avctx->lowres==0) { |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
202 if ((avctx->idct_algo == FF_IDCT_AUTO) || |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
203 (avctx->idct_algo == FF_IDCT_ALTIVEC)) { |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
204 c->idct_put = idct_put_altivec; |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
205 c->idct_add = idct_add_altivec; |
|
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
206 c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; |
|
9975
d6d7e8d4a04d
Do not redundantly check for both CONFIG_THEORA_DECODER and CONFIG_VP3_DECODER.
diego
parents:
9711
diff
changeset
|
207 }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER) && |
| 9711 | 208 avctx->idct_algo==FF_IDCT_VP3){ |
| 209 c->idct_put = ff_vp3_idct_put_altivec; | |
| 210 c->idct_add = ff_vp3_idct_add_altivec; | |
| 211 c->idct = ff_vp3_idct_altivec; | |
| 212 c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; | |
|
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
213 } |
|
3546
5f97ba9a4eaa
Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents:
3542
diff
changeset
|
214 } |
| 2967 | 215 |
|
3957
b6f6bf155661
Non Altivec optimizations already present at the top
lu_zero
parents:
3949
diff
changeset
|
216 } |
|
1024
9cc1031e1864
More AltiVec MC functions patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1015
diff
changeset
|
217 #endif /* HAVE_ALTIVEC */ |
|
638
0012f75c92bb
altivec build tidyup patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff
changeset
|
218 } |
