Mercurial > libavcodec.hg
annotate arm/mathops.h @ 12510:ef2f2db5b7be libavcodec
Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
code directly also and remove loop setup. 20% faster in function, 0.8% overall.
See "[PATCH] unroll loop in h264_idct_add8_sse2()" thread on ML.
| author | rbultje |
|---|---|
| date | Fri, 24 Sep 2010 14:05:45 +0000 |
| parents | 25136467a218 |
| children |
| rev | line source |
|---|---|
| 3733 | 1 /* |
| 2 * simple math operations | |
| 3 * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al | |
| 4 * | |
|
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3767
diff
changeset
|
5 * This file is part of FFmpeg. |
|
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3767
diff
changeset
|
6 * |
|
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3767
diff
changeset
|
7 * FFmpeg is free software; you can redistribute it and/or |
| 3733 | 8 * modify it under the terms of the GNU Lesser General Public |
| 9 * License as published by the Free Software Foundation; either | |
|
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3767
diff
changeset
|
10 * version 2.1 of the License, or (at your option) any later version. |
| 3733 | 11 * |
|
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3767
diff
changeset
|
12 * FFmpeg is distributed in the hope that it will be useful, |
| 3733 | 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 15 * Lesser General Public License for more details. | |
| 16 * | |
| 17 * You should have received a copy of the GNU Lesser General Public | |
|
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3767
diff
changeset
|
18 * License along with FFmpeg; if not, write to the Free Software |
| 3733 | 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 20 */ | |
| 21 | |
| 8359 | 22 #ifndef AVCODEC_ARM_MATHOPS_H |
| 23 #define AVCODEC_ARM_MATHOPS_H | |
| 5163 | 24 |
| 8084 | 25 #include <stdint.h> |
| 10080 | 26 #include "config.h" |
| 8084 | 27 #include "libavutil/common.h" |
| 28 | |
| 9141 | 29 #if HAVE_INLINE_ASM |
| 30 | |
| 8112 | 31 # define MULL MULL |
| 8201 | 32 static inline av_const int MULL(int a, int b, unsigned shift) |
| 8112 | 33 { |
| 34 int lo, hi; | |
| 35 __asm__("smull %0, %1, %2, %3 \n\t" | |
| 36 "mov %0, %0, lsr %4 \n\t" | |
| 37 "add %1, %0, %1, lsl %5 \n\t" | |
| 38 : "=&r"(lo), "=&r"(hi) | |
| 8676 | 39 : "r"(b), "r"(a), "ir"(shift), "ir"(32-shift)); |
| 8112 | 40 return hi; |
| 41 } | |
| 3733 | 42 |
| 8113 | 43 #define MULH MULH |
| 8590 | 44 #if HAVE_ARMV6 |
| 7280 | 45 static inline av_const int MULH(int a, int b) |
| 46 { | |
| 47 int r; | |
| 8031 | 48 __asm__ ("smmul %0, %1, %2" : "=r"(r) : "r"(a), "r"(b)); |
| 7280 | 49 return r; |
| 50 } | |
| 51 #else | |
| 8113 | 52 static inline av_const int MULH(int a, int b) |
| 53 { | |
| 54 int lo, hi; | |
| 55 __asm__ ("smull %0, %1, %2, %3" : "=&r"(lo), "=&r"(hi) : "r"(b), "r"(a)); | |
| 56 return hi; | |
| 57 } | |
| 7280 | 58 #endif |
| 3733 | 59 |
| 7281 | 60 static inline av_const int64_t MUL64(int a, int b) |
| 61 { | |
| 62 union { uint64_t x; unsigned hl[2]; } x; | |
| 8031 | 63 __asm__ ("smull %0, %1, %2, %3" |
| 8111 | 64 : "=r"(x.hl[0]), "=r"(x.hl[1]) : "r"(a), "r"(b)); |
| 7281 | 65 return x.x; |
| 66 } | |
| 67 #define MUL64 MUL64 | |
| 68 | |
| 7282 | 69 static inline av_const int64_t MAC64(int64_t d, int a, int b) |
| 70 { | |
| 71 union { uint64_t x; unsigned hl[2]; } x = { d }; | |
| 8031 | 72 __asm__ ("smlal %0, %1, %2, %3" |
| 8111 | 73 : "+r"(x.hl[0]), "+r"(x.hl[1]) : "r"(a), "r"(b)); |
| 7282 | 74 return x.x; |
| 75 } | |
| 76 #define MAC64(d, a, b) ((d) = MAC64(d, a, b)) | |
| 77 #define MLS64(d, a, b) MAC64(d, -(a), b) | |
| 78 | |
| 8590 | 79 #if HAVE_ARMV5TE |
| 3733 | 80 |
| 81 /* signed 16x16 -> 32 multiply add accumulate */ | |
| 8114 | 82 # define MAC16(rt, ra, rb) \ |
| 83 __asm__ ("smlabb %0, %1, %2, %0" : "+r"(rt) : "r"(ra), "r"(rb)); | |
| 84 | |
| 3733 | 85 /* signed 16x16 -> 32 multiply */ |
| 8115 | 86 # define MUL16 MUL16 |
| 9079 | 87 static inline av_const int MUL16(int ra, int rb) |
| 8115 | 88 { |
| 89 int rt; | |
| 90 __asm__ ("smulbb %0, %1, %2" : "=r"(rt) : "r"(ra), "r"(rb)); | |
| 91 return rt; | |
| 92 } | |
| 3733 | 93 |
| 94 #endif | |
| 5163 | 95 |
| 8677 | 96 #define mid_pred mid_pred |
| 97 static inline av_const int mid_pred(int a, int b, int c) | |
| 98 { | |
| 99 int m; | |
| 100 __asm__ volatile ( | |
| 101 "mov %0, %2 \n\t" | |
| 102 "cmp %1, %2 \n\t" | |
| 103 "movgt %0, %1 \n\t" | |
| 104 "movgt %1, %2 \n\t" | |
| 105 "cmp %1, %3 \n\t" | |
| 106 "movle %1, %3 \n\t" | |
| 107 "cmp %0, %1 \n\t" | |
| 108 "movgt %0, %1 \n\t" | |
| 109 : "=&r"(m), "+r"(a) | |
| 110 : "r"(b), "r"(c)); | |
| 111 return m; | |
| 112 } | |
| 113 | |
| 9141 | 114 #endif /* HAVE_INLINE_ASM */ |
| 115 | |
| 8359 | 116 #endif /* AVCODEC_ARM_MATHOPS_H */ |
