libavcodec.hg: arm/mathops.h annotate

annotate arm/mathops.h @ 12510:ef2f2db5b7be libavcodec

Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the code directly also and remove loop setup. 20% faster in function, 0.8% overall. See "[PATCH] unroll loop in h264_idct_add8_sse2()" thread on ML.

author	rbultje
date	Fri, 24 Sep 2010 14:05:45 +0000
parents	25136467a218
children

rev	line source
3733 d1b5acd0b680 New single instruction math operation header lu_zero parents: diff changeset	1 /*
d1b5acd0b680 New single instruction math operation header lu_zero parents: diff changeset	2 * simple math operations
d1b5acd0b680 New single instruction math operation header lu_zero parents: diff changeset	3 * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al
d1b5acd0b680 New single instruction math operation header lu_zero parents: diff changeset	4 *
3947 c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library' diego parents: 3767 diff changeset	5 * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library' diego parents: 3767 diff changeset	6 *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library' diego parents: 3767 diff changeset	7 * FFmpeg is free software; you can redistribute it and/or
3733 d1b5acd0b680 New single instruction math operation header lu_zero parents: diff changeset	8 * modify it under the terms of the GNU Lesser General Public
d1b5acd0b680 New single instruction math operation header lu_zero parents: diff changeset	9 * License as published by the Free Software Foundation; either
3947 c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library' diego parents: 3767 diff changeset	10 * version 2.1 of the License, or (at your option) any later version.
3733 d1b5acd0b680 New single instruction math operation header lu_zero parents: diff changeset	11 *
3947 c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library' diego parents: 3767 diff changeset	12 * FFmpeg is distributed in the hope that it will be useful,
3733 d1b5acd0b680 New single instruction math operation header lu_zero parents: diff changeset	13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
d1b5acd0b680 New single instruction math operation header lu_zero parents: diff changeset	14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
d1b5acd0b680 New single instruction math operation header lu_zero parents: diff changeset	15 * Lesser General Public License for more details.
d1b5acd0b680 New single instruction math operation header lu_zero parents: diff changeset	16 *
d1b5acd0b680 New single instruction math operation header lu_zero parents: diff changeset	17 * You should have received a copy of the GNU Lesser General Public
3947 c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library' diego parents: 3767 diff changeset	18 * License along with FFmpeg; if not, write to the Free Software
3733 d1b5acd0b680 New single instruction math operation header lu_zero parents: diff changeset	19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
d1b5acd0b680 New single instruction math operation header lu_zero parents: diff changeset	20 */
d1b5acd0b680 New single instruction math operation header lu_zero parents: diff changeset	21
8359 9281a8a9387a ARM: replace "armv4l" with "arm" mru parents: 8201 diff changeset	22 #ifndef AVCODEC_ARM_MATHOPS_H
9281a8a9387a ARM: replace "armv4l" with "arm" mru parents: 8201 diff changeset	23 #define AVCODEC_ARM_MATHOPS_H
5163 9ecbfc0c82bf add multiple inclusion guards to headers mru parents: 3947 diff changeset	24
8084 8547a4ae101b Add missing headers to pass 'make checkheaders'. diego parents: 8031 diff changeset	25 #include <stdint.h>
10080 25136467a218 Add necessary #include for config.h. diego parents: 9141 diff changeset	26 #include "config.h"
8084 8547a4ae101b Add missing headers to pass 'make checkheaders'. diego parents: 8031 diff changeset	27 #include "libavutil/common.h"
8547a4ae101b Add missing headers to pass 'make checkheaders'. diego parents: 8031 diff changeset	28
9141 489def16f0c7 ARM: disable inline asm for armcc mru parents: 9079 diff changeset	29 #if HAVE_INLINE_ASM
489def16f0c7 ARM: disable inline asm for armcc mru parents: 9079 diff changeset	30
8112 954dd6e341ce ARM: change MULL() macro to inline function mru parents: 8111 diff changeset	31 # define MULL MULL
8201 c6e2ffef3797 Add shift argument to MULL() macro mru parents: 8115 diff changeset	32 static inline av_const int MULL(int a, int b, unsigned shift)
8112 954dd6e341ce ARM: change MULL() macro to inline function mru parents: 8111 diff changeset	33 {
954dd6e341ce ARM: change MULL() macro to inline function mru parents: 8111 diff changeset	34 int lo, hi;
954dd6e341ce ARM: change MULL() macro to inline function mru parents: 8111 diff changeset	35 __asm__("smull %0, %1, %2, %3 \n\t"
954dd6e341ce ARM: change MULL() macro to inline function mru parents: 8111 diff changeset	36 "mov %0, %0, lsr %4 \n\t"
954dd6e341ce ARM: change MULL() macro to inline function mru parents: 8111 diff changeset	37 "add %1, %0, %1, lsl %5 \n\t"
954dd6e341ce ARM: change MULL() macro to inline function mru parents: 8111 diff changeset	38 : "=&r"(lo), "=&r"(hi)
8676 7fcf95230c28 ARM: allow register operands for shifts in MULL() mru parents: 8590 diff changeset	39 : "r"(b), "r"(a), "ir"(shift), "ir"(32-shift));
8112 954dd6e341ce ARM: change MULL() macro to inline function mru parents: 8111 diff changeset	40 return hi;
954dd6e341ce ARM: change MULL() macro to inline function mru parents: 8111 diff changeset	41 }
3733 d1b5acd0b680 New single instruction math operation header lu_zero parents: diff changeset	42
8113 aa55fd152068 ARM: change MULH() macro to inline function mru parents: 8112 diff changeset	43 #define MULH MULH
8590 7a463923ecd1 Change semantic of CONFIG_, HAVE_ and ARCH_. aurel* parents: 8359 diff changeset	44 #if HAVE_ARMV6
7280 c8b0366e066f ARM: ARMv6 optimised MULH mru parents: 5830 diff changeset	45 static inline av_const int MULH(int a, int b)
c8b0366e066f ARM: ARMv6 optimised MULH mru parents: 5830 diff changeset	46 {
c8b0366e066f ARM: ARMv6 optimised MULH mru parents: 5830 diff changeset	47 int r;
8031 eebc7209c47f Convert asm keyword into __asm__. flameeyes parents: 7760 diff changeset	48 __asm__ ("smmul %0, %1, %2" : "=r"(r) : "r"(a), "r"(b));
7280 c8b0366e066f ARM: ARMv6 optimised MULH mru parents: 5830 diff changeset	49 return r;
c8b0366e066f ARM: ARMv6 optimised MULH mru parents: 5830 diff changeset	50 }
c8b0366e066f ARM: ARMv6 optimised MULH mru parents: 5830 diff changeset	51 #else
8113 aa55fd152068 ARM: change MULH() macro to inline function mru parents: 8112 diff changeset	52 static inline av_const int MULH(int a, int b)
aa55fd152068 ARM: change MULH() macro to inline function mru parents: 8112 diff changeset	53 {
aa55fd152068 ARM: change MULH() macro to inline function mru parents: 8112 diff changeset	54 int lo, hi;
aa55fd152068 ARM: change MULH() macro to inline function mru parents: 8112 diff changeset	55 __asm__ ("smull %0, %1, %2, %3" : "=&r"(lo), "=&r"(hi) : "r"(b), "r"(a));
aa55fd152068 ARM: change MULH() macro to inline function mru parents: 8112 diff changeset	56 return hi;
aa55fd152068 ARM: change MULH() macro to inline function mru parents: 8112 diff changeset	57 }
7280 c8b0366e066f ARM: ARMv6 optimised MULH mru parents: 5830 diff changeset	58 #endif
3733 d1b5acd0b680 New single instruction math operation header lu_zero parents: diff changeset	59
7281 747908449de0 ARM: optimised MUL64 mru parents: 7280 diff changeset	60 static inline av_const int64_t MUL64(int a, int b)
747908449de0 ARM: optimised MUL64 mru parents: 7280 diff changeset	61 {
747908449de0 ARM: optimised MUL64 mru parents: 7280 diff changeset	62 union { uint64_t x; unsigned hl[2]; } x;
8031 eebc7209c47f Convert asm keyword into __asm__. flameeyes parents: 7760 diff changeset	63 __asm__ ("smull %0, %1, %2, %3"
8111 97b08ce5d507 ARM: mathops.h whitespace cosmetics mru parents: 8105 diff changeset	64 : "=r"(x.hl[0]), "=r"(x.hl[1]) : "r"(a), "r"(b));
7281 747908449de0 ARM: optimised MUL64 mru parents: 7280 diff changeset	65 return x.x;
747908449de0 ARM: optimised MUL64 mru parents: 7280 diff changeset	66 }
747908449de0 ARM: optimised MUL64 mru parents: 7280 diff changeset	67 #define MUL64 MUL64
747908449de0 ARM: optimised MUL64 mru parents: 7280 diff changeset	68
7282 dc5a334c758b ARM: optimised MAC64 and MLS64 mru parents: 7281 diff changeset	69 static inline av_const int64_t MAC64(int64_t d, int a, int b)
dc5a334c758b ARM: optimised MAC64 and MLS64 mru parents: 7281 diff changeset	70 {
dc5a334c758b ARM: optimised MAC64 and MLS64 mru parents: 7281 diff changeset	71 union { uint64_t x; unsigned hl[2]; } x = { d };
8031 eebc7209c47f Convert asm keyword into __asm__. flameeyes parents: 7760 diff changeset	72 __asm__ ("smlal %0, %1, %2, %3"
8111 97b08ce5d507 ARM: mathops.h whitespace cosmetics mru parents: 8105 diff changeset	73 : "+r"(x.hl[0]), "+r"(x.hl[1]) : "r"(a), "r"(b));
7282 dc5a334c758b ARM: optimised MAC64 and MLS64 mru parents: 7281 diff changeset	74 return x.x;
dc5a334c758b ARM: optimised MAC64 and MLS64 mru parents: 7281 diff changeset	75 }
dc5a334c758b ARM: optimised MAC64 and MLS64 mru parents: 7281 diff changeset	76 #define MAC64(d, a, b) ((d) = MAC64(d, a, b))
dc5a334c758b ARM: optimised MAC64 and MLS64 mru parents: 7281 diff changeset	77 #define MLS64(d, a, b) MAC64(d, -(a), b)
dc5a334c758b ARM: optimised MAC64 and MLS64 mru parents: 7281 diff changeset	78
8590 7a463923ecd1 Change semantic of CONFIG_, HAVE_ and ARCH_. aurel* parents: 8359 diff changeset	79 #if HAVE_ARMV5TE
3733 d1b5acd0b680 New single instruction math operation header lu_zero parents: diff changeset	80
d1b5acd0b680 New single instruction math operation header lu_zero parents: diff changeset	81 /* signed 16x16 -> 32 multiply add accumulate */
8114 1231a7ddd932 ARM: prettify MAC16() macro mru parents: 8113 diff changeset	82 # define MAC16(rt, ra, rb) \
1231a7ddd932 ARM: prettify MAC16() macro mru parents: 8113 diff changeset	83 __asm__ ("smlabb %0, %1, %2, %0" : "+r"(rt) : "r"(ra), "r"(rb));
1231a7ddd932 ARM: prettify MAC16() macro mru parents: 8113 diff changeset	84
3733 d1b5acd0b680 New single instruction math operation header lu_zero parents: diff changeset	85 /* signed 16x16 -> 32 multiply */
8115 e61cc20bad68 ARM: change MUL16() macro to inline function mru parents: 8114 diff changeset	86 # define MUL16 MUL16
9079 37dd457573a4 ARM: fix missing MUL16() return type mru parents: 8677 diff changeset	87 static inline av_const int MUL16(int ra, int rb)
8115 e61cc20bad68 ARM: change MUL16() macro to inline function mru parents: 8114 diff changeset	88 {
e61cc20bad68 ARM: change MUL16() macro to inline function mru parents: 8114 diff changeset	89 int rt;
e61cc20bad68 ARM: change MUL16() macro to inline function mru parents: 8114 diff changeset	90 __asm__ ("smulbb %0, %1, %2" : "=r"(rt) : "r"(ra), "r"(rb));
e61cc20bad68 ARM: change MUL16() macro to inline function mru parents: 8114 diff changeset	91 return rt;
e61cc20bad68 ARM: change MUL16() macro to inline function mru parents: 8114 diff changeset	92 }
3733 d1b5acd0b680 New single instruction math operation header lu_zero parents: diff changeset	93
d1b5acd0b680 New single instruction math operation header lu_zero parents: diff changeset	94 #endif
5163 9ecbfc0c82bf add multiple inclusion guards to headers mru parents: 3947 diff changeset	95
8677 3c484b73ca73 ARM: optimised mid_pred() mru parents: 8676 diff changeset	96 #define mid_pred mid_pred
3c484b73ca73 ARM: optimised mid_pred() mru parents: 8676 diff changeset	97 static inline av_const int mid_pred(int a, int b, int c)
3c484b73ca73 ARM: optimised mid_pred() mru parents: 8676 diff changeset	98 {
3c484b73ca73 ARM: optimised mid_pred() mru parents: 8676 diff changeset	99 int m;
3c484b73ca73 ARM: optimised mid_pred() mru parents: 8676 diff changeset	100 __asm__ volatile (
3c484b73ca73 ARM: optimised mid_pred() mru parents: 8676 diff changeset	101 "mov %0, %2 \n\t"
3c484b73ca73 ARM: optimised mid_pred() mru parents: 8676 diff changeset	102 "cmp %1, %2 \n\t"
3c484b73ca73 ARM: optimised mid_pred() mru parents: 8676 diff changeset	103 "movgt %0, %1 \n\t"
3c484b73ca73 ARM: optimised mid_pred() mru parents: 8676 diff changeset	104 "movgt %1, %2 \n\t"
3c484b73ca73 ARM: optimised mid_pred() mru parents: 8676 diff changeset	105 "cmp %1, %3 \n\t"
3c484b73ca73 ARM: optimised mid_pred() mru parents: 8676 diff changeset	106 "movle %1, %3 \n\t"
3c484b73ca73 ARM: optimised mid_pred() mru parents: 8676 diff changeset	107 "cmp %0, %1 \n\t"
3c484b73ca73 ARM: optimised mid_pred() mru parents: 8676 diff changeset	108 "movgt %0, %1 \n\t"
3c484b73ca73 ARM: optimised mid_pred() mru parents: 8676 diff changeset	109 : "=&r"(m), "+r"(a)
3c484b73ca73 ARM: optimised mid_pred() mru parents: 8676 diff changeset	110 : "r"(b), "r"(c));
3c484b73ca73 ARM: optimised mid_pred() mru parents: 8676 diff changeset	111 return m;
3c484b73ca73 ARM: optimised mid_pred() mru parents: 8676 diff changeset	112 }
3c484b73ca73 ARM: optimised mid_pred() mru parents: 8676 diff changeset	113
9141 489def16f0c7 ARM: disable inline asm for armcc mru parents: 9079 diff changeset	114 #endif /* HAVE_INLINE_ASM */
489def16f0c7 ARM: disable inline asm for armcc mru parents: 9079 diff changeset	115
8359 9281a8a9387a ARM: replace "armv4l" with "arm" mru parents: 8201 diff changeset	116 #endif /* AVCODEC_ARM_MATHOPS_H */

Mercurial > libavcodec.hg

annotate arm/mathops.h @ 12510:ef2f2db5b7be libavcodec