m68k: Add <asm/hash.h>

This provides a multiply by constant GOLDEN_RATIO_32 = 0x61C88647 for the original mc68000, which lacks a 32x32-bit multiply instruction. Yes, the amount of optimization effort put in is excessive. :-) Shift-add chain found by Yevgen Voronenko's Hcub algorithm at http://spiral.ece.cmu.edu/mcm/gen.html Signed-off-by: George Spelvin <linux@sciencehorizons.net> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Greg Ungerer <gerg@linux-m68k.org> Cc: Andreas Schwab <schwab@linux-m68k.org> Cc: Philippe De Muyter <phdm@macq.eu> Cc: linux-m68k@lists.linux-m68k.org
author: George Spelvin <linux@sciencehorizons.net> 2016-05-26 11:36:19 -0400
committer: George Spelvin <linux@sciencehorizons.net> 2016-05-28 15:48:57 -0400
commit: 14c44b95b3dcb8ff1d627e6b78f57c4373d375cb (patch)
tree: e81c581cdff3075036b99e0dbf2f0c47afe1e6ec
parent: 468a9428521e7d00fb21250af363eb94dc1d6861 (diff)
2 files changed, 60 insertions, 0 deletions
diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu
index 0dfcf1281e9c..bf3de464cf3c 100644
--- a/arch/m68k/Kconfig.cpu
+++ b/arch/m68k/Kconfig.cpu
@@ -40,6 +40,7 @@ config M68000
        select CPU_HAS_NO_MULDIV64
        select CPU_HAS_NO_UNALIGNED
        select GENERIC_CSUM
+        select HAVE_ARCH_HASH
        help
          The Freescale (was Motorola) 68000 CPU is the first generation of
          the well known M68K family of processors. The CPU core as well as
diff --git a/arch/m68k/include/asm/hash.h b/arch/m68k/include/asm/hash.h
new file mode 100644
index 000000000000..6407af84a994
--- /dev/null
+++ b/arch/m68k/include/asm/hash.h
@@ -0,0 +1,59 @@
+#ifndef _ASM_HASH_H
+#define _ASM_HASH_H
+/*
+ * If CONFIG_M68000=y (original mc68000/010), this file is #included
+ * to work around the lack of a MULU.L instruction.
+ */
+#define HAVE_ARCH__HASH_32 1
+/*
+ * While it would be legal to substitute a different hash operation
+ * entirely, let's keep it simple and just use an optimized multiply
+ * by GOLDEN_RATIO_32 = 0x61C88647.
+ *
+ * The best way to do that appears to be to multiply by 0x8647 with
+ * shifts and adds, and use mulu.w to multiply the high half by 0x61C8.
+ *
+ * Because the 68000 has multi-cycle shifts, this addition chain is
+ * chosen to minimise the shift distances.
+ *
+ * Despite every attempt to spoon-feed it simple operations, GCC
+ * 6.1.1 doggedly insists on doing annoying things like converting
+ * "lsl.l #2,<reg>" (12 cycles) to two adds (8+8 cycles).
+ *
+ * It also likes to notice two shifts in a row, like "a = x << 2" and
+ * "a <<= 7", and convert that to "a = x << 9".  But shifts longer
+ * than 8 bits are extra-slow on m68k, so that's a lose.
+ *
+ * Since the 68000 is a very simple in-order processor with no
+ * instruction scheduling effects on execution time, we can safely
+ * take it out of GCC's hands and write one big asm() block.
+ *
+ * Without calling overhead, this operation is 30 bytes (14 instructions
+ * plus one immediate constant) and 166 cycles.
+ *
+ * (Because %2 is fetched twice, it can't be postincrement, and thus it
+ * can't be a fully general "g" or "m".  Register is preferred, but
+ * offsettable memory or immediate will work.)
+ */
+static inline u32 __attribute_const__ __hash_32(u32 x)
+{
+        u32 a, b;
+        asm(   "move.l %2,%0"   /* a = x * 0x0001 */
+        "\n     lsl.l #2,%0"    /* a = x * 0x0004 */
+        "\n     move.l %0,%1"
+        "\n     lsl.l #7,%0"    /* a = x * 0x0200 */
+        "\n     add.l %2,%0"    /* a = x * 0x0201 */
+        "\n     add.l %0,%1"    /* b = x * 0x0205 */
+        "\n     add.l %0,%0"    /* a = x * 0x0402 */
+        "\n     add.l %0,%1"    /* b = x * 0x0607 */
+        "\n     lsl.l #5,%0"    /* a = x * 0x8040 */
+        : "=&d,d" (a), "=&r,r" (b)
+        : "r,roi?" (x));        /* a+b = x*0x8647 */
+        return ((u16)(x*0x61c8) << 16) + a + b;
+}
+#endif  /* _ASM_HASH_H */
author	George Spelvin <linux@sciencehorizons.net>	2016-05-26 11:36:19 -0400
committer	George Spelvin <linux@sciencehorizons.net>	2016-05-28 15:48:57 -0400
commit	14c44b95b3dcb8ff1d627e6b78f57c4373d375cb (patch)
tree	e81c581cdff3075036b99e0dbf2f0c47afe1e6ec
parent	468a9428521e7d00fb21250af363eb94dc1d6861 (diff)

diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu index 0dfcf1281e9c..bf3de464cf3c 100644 --- a/arch/m68k/Kconfig.cpu +++ b/arch/m68k/Kconfig.cpu
@@ -40,6 +40,7 @@ config M68000
40	select CPU_HAS_NO_MULDIV64	40	select CPU_HAS_NO_MULDIV64
41	select CPU_HAS_NO_UNALIGNED	41	select CPU_HAS_NO_UNALIGNED
42	select GENERIC_CSUM	42	select GENERIC_CSUM
		43	select HAVE_ARCH_HASH
43	help	44	help
44	The Freescale (was Motorola) 68000 CPU is the first generation of	45	The Freescale (was Motorola) 68000 CPU is the first generation of
45	the well known M68K family of processors. The CPU core as well as	46	the well known M68K family of processors. The CPU core as well as


diff --git a/arch/m68k/include/asm/hash.h b/arch/m68k/include/asm/hash.h new file mode 100644 index 000000000000..6407af84a994 --- /dev/null +++ b/arch/m68k/include/asm/hash.h
@@ -0,0 +1,59 @@
		1	#ifndef _ASM_HASH_H
		2	#define _ASM_HASH_H
		3
		4	/*
		5	* If CONFIG_M68000=y (original mc68000/010), this file is #included
		6	* to work around the lack of a MULU.L instruction.
		7	*/
		8
		9	#define HAVE_ARCH__HASH_32 1
		10	/*
		11	* While it would be legal to substitute a different hash operation
		12	* entirely, let's keep it simple and just use an optimized multiply
		13	* by GOLDEN_RATIO_32 = 0x61C88647.
		14	*
		15	* The best way to do that appears to be to multiply by 0x8647 with
		16	* shifts and adds, and use mulu.w to multiply the high half by 0x61C8.
		17	*
		18	* Because the 68000 has multi-cycle shifts, this addition chain is
		19	* chosen to minimise the shift distances.
		20	*
		21	* Despite every attempt to spoon-feed it simple operations, GCC
		22	* 6.1.1 doggedly insists on doing annoying things like converting
		23	* "lsl.l #2,<reg>" (12 cycles) to two adds (8+8 cycles).
		24	*
		25	* It also likes to notice two shifts in a row, like "a = x << 2" and
		26	* "a <<= 7", and convert that to "a = x << 9". But shifts longer
		27	* than 8 bits are extra-slow on m68k, so that's a lose.
		28	*
		29	* Since the 68000 is a very simple in-order processor with no
		30	* instruction scheduling effects on execution time, we can safely
		31	* take it out of GCC's hands and write one big asm() block.
		32	*
		33	* Without calling overhead, this operation is 30 bytes (14 instructions
		34	* plus one immediate constant) and 166 cycles.
		35	*
		36	* (Because %2 is fetched twice, it can't be postincrement, and thus it
		37	* can't be a fully general "g" or "m". Register is preferred, but
		38	* offsettable memory or immediate will work.)
		39	*/
		40	static inline u32 __attribute_const__ __hash_32(u32 x)
		41	{
		42	u32 a, b;
		43
		44	asm( "move.l %2,%0" /* a = x * 0x0001 */
		45	"\n lsl.l #2,%0" /* a = x * 0x0004 */
		46	"\n move.l %0,%1"
		47	"\n lsl.l #7,%0" /* a = x * 0x0200 */
		48	"\n add.l %2,%0" /* a = x * 0x0201 */
		49	"\n add.l %0,%1" /* b = x * 0x0205 */
		50	"\n add.l %0,%0" /* a = x * 0x0402 */
		51	"\n add.l %0,%1" /* b = x * 0x0607 */
		52	"\n lsl.l #5,%0" /* a = x * 0x8040 */
		53	: "=&d,d" (a), "=&r,r" (b)
		54	: "r,roi?" (x)); /* a+b = x0x8647 /
		55
		56	return ((u16)(x*0x61c8) << 16) + a + b;
		57	}
		58
		59	#endif /* _ASM_HASH_H */