diff alpha/pixops.h @ 214:73df666cacc7 libavcodec

Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
author nickols_k
date Sun, 20 Jan 2002 14:48:02 +0000
parents
children 718a22dc121f
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/alpha/pixops.h	Sun Jan 20 14:48:02 2002 +0000
@@ -0,0 +1,135 @@
+/*
+ * Alpha optimized DSP utils
+ * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* This file is intended to be #included with proper definitions of
+ * PIXOPNAME, BTYPE, AVG2, AVG4 and STORE.  */
+
+static void PIXOPNAME(_pixels_axp)(BTYPE *block, const UINT8 *pixels,
+				   int line_size, int h)
+{
+    if ((size_t) pixels & 0x7) {
+	do {
+	    STORE(uldq(pixels), block);
+	    pixels += line_size;
+	    block  += line_size;
+	} while (--h);
+    } else {
+	do {
+	    STORE(ldq(pixels), block);
+	    pixels += line_size;
+	    block  += line_size;
+	} while (--h);
+    }
+}
+
+static void PIXOPNAME(_pixels_x2_axp)(BTYPE *block, const UINT8 *pixels,
+				      int line_size, int h)
+{
+    if ((size_t) pixels & 0x7) {
+	do {
+	    UINT64 pix1, pix2;
+
+	    pix1 = uldq(pixels);
+	    pix2 = pix1 >> 8 | ((UINT64) pixels[8] << 56);
+	    STORE(AVG2(pix1, pix2), block);
+	    pixels += line_size;
+	    block += line_size;
+	} while (--h);
+    } else {
+	do {
+	    UINT64 pix1, pix2;
+
+	    pix1 = ldq(pixels);
+	    pix2 = pix1 >> 8 | ((UINT64) pixels[8] << 56);
+	    STORE(AVG2(pix1, pix2), block);
+	    pixels += line_size;
+	    block += line_size;
+	} while (--h);
+    }
+}
+
+static void PIXOPNAME(_pixels_y2_axp)(BTYPE *block, const UINT8 *pixels,
+				      int line_size, int h)
+{
+    if ((size_t) pixels & 0x7) {
+	UINT64 pix = uldq(pixels);
+	do {
+	    UINT64 next_pix;
+
+	    pixels += line_size;
+	    next_pix = uldq(pixels);
+	    STORE(AVG2(pix, next_pix), block);
+	    block += line_size;
+	    pix = next_pix;
+	} while (--h);
+    } else {
+	UINT64 pix = ldq(pixels);
+	do {
+	    UINT64 next_pix;
+
+	    pixels += line_size;
+	    next_pix = ldq(pixels);
+	    STORE(AVG2(pix, next_pix), block);
+	    block += line_size;
+	    pix = next_pix;
+	} while (--h);
+    }
+}
+
+/* This could be further sped up by recycling AVG4 intermediate
+  results from the previous loop pass.  */
+static void PIXOPNAME(_pixels_xy2_axp)(BTYPE *block, const UINT8 *pixels,
+				       int line_size, int h)
+{
+    if ((size_t) pixels & 0x7) {
+	UINT64 pix1 = uldq(pixels);
+	UINT64 pix2 = pix1 >> 8 | ((UINT64) pixels[8] << 56);
+
+	do {
+	    UINT64 next_pix1, next_pix2;
+
+	    pixels += line_size;
+	    next_pix1 = uldq(pixels);
+	    next_pix2 = next_pix1 >> 8 | ((UINT64) pixels[8] << 56);
+
+	    STORE(AVG4(pix1, pix2, next_pix1, next_pix2), block);
+
+	    block += line_size;
+	    pix1 = next_pix1;
+	    pix2 = next_pix2;
+	} while (--h);
+    } else {
+	UINT64 pix1 = ldq(pixels);
+	UINT64 pix2 = pix1 >> 8 | ((UINT64) pixels[8] << 56);
+
+	do {
+	    UINT64 next_pix1, next_pix2;
+
+	    pixels += line_size;
+	    next_pix1 = ldq(pixels);
+	    next_pix2 = next_pix1 >> 8 | ((UINT64) pixels[8] << 56);
+
+	    STORE(AVG4(pix1, pix2, next_pix1, next_pix2), block);
+
+	    block += line_size;
+	    pix1 = next_pix1;
+	    pix2 = next_pix2;
+	} while (--h);
+    }
+}