diff options
author | George Spelvin <linux@sciencehorizons.net> | 2016-05-26 11:36:19 -0400 |
---|---|---|
committer | George Spelvin <linux@sciencehorizons.net> | 2016-05-28 15:48:57 -0400 |
commit | 14c44b95b3dcb8ff1d627e6b78f57c4373d375cb (patch) | |
tree | e81c581cdff3075036b99e0dbf2f0c47afe1e6ec | |
parent | 468a9428521e7d00fb21250af363eb94dc1d6861 (diff) |
m68k: Add <asm/hash.h>
This provides a multiply by constant GOLDEN_RATIO_32 = 0x61C88647
for the original mc68000, which lacks a 32x32-bit multiply instruction.
Yes, the amount of optimization effort put in is excessive. :-)
Shift-add chain found by Yevgen Voronenko's Hcub algorithm at
http://spiral.ece.cmu.edu/mcm/gen.html
Signed-off-by: George Spelvin <linux@sciencehorizons.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greg Ungerer <gerg@linux-m68k.org>
Cc: Andreas Schwab <schwab@linux-m68k.org>
Cc: Philippe De Muyter <phdm@macq.eu>
Cc: linux-m68k@lists.linux-m68k.org
-rw-r--r-- | arch/m68k/Kconfig.cpu | 1 | ||||
-rw-r--r-- | arch/m68k/include/asm/hash.h | 59 |
2 files changed, 60 insertions, 0 deletions
diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu index 0dfcf1281e9c..bf3de464cf3c 100644 --- a/arch/m68k/Kconfig.cpu +++ b/arch/m68k/Kconfig.cpu | |||
@@ -40,6 +40,7 @@ config M68000 | |||
40 | select CPU_HAS_NO_MULDIV64 | 40 | select CPU_HAS_NO_MULDIV64 |
41 | select CPU_HAS_NO_UNALIGNED | 41 | select CPU_HAS_NO_UNALIGNED |
42 | select GENERIC_CSUM | 42 | select GENERIC_CSUM |
43 | select HAVE_ARCH_HASH | ||
43 | help | 44 | help |
44 | The Freescale (was Motorola) 68000 CPU is the first generation of | 45 | The Freescale (was Motorola) 68000 CPU is the first generation of |
45 | the well known M68K family of processors. The CPU core as well as | 46 | the well known M68K family of processors. The CPU core as well as |
diff --git a/arch/m68k/include/asm/hash.h b/arch/m68k/include/asm/hash.h new file mode 100644 index 000000000000..6407af84a994 --- /dev/null +++ b/arch/m68k/include/asm/hash.h | |||
@@ -0,0 +1,59 @@ | |||
1 | #ifndef _ASM_HASH_H | ||
2 | #define _ASM_HASH_H | ||
3 | |||
4 | /* | ||
5 | * If CONFIG_M68000=y (original mc68000/010), this file is #included | ||
6 | * to work around the lack of a MULU.L instruction. | ||
7 | */ | ||
8 | |||
9 | #define HAVE_ARCH__HASH_32 1 | ||
10 | /* | ||
11 | * While it would be legal to substitute a different hash operation | ||
12 | * entirely, let's keep it simple and just use an optimized multiply | ||
13 | * by GOLDEN_RATIO_32 = 0x61C88647. | ||
14 | * | ||
15 | * The best way to do that appears to be to multiply by 0x8647 with | ||
16 | * shifts and adds, and use mulu.w to multiply the high half by 0x61C8. | ||
17 | * | ||
18 | * Because the 68000 has multi-cycle shifts, this addition chain is | ||
19 | * chosen to minimise the shift distances. | ||
20 | * | ||
21 | * Despite every attempt to spoon-feed it simple operations, GCC | ||
22 | * 6.1.1 doggedly insists on doing annoying things like converting | ||
23 | * "lsl.l #2,<reg>" (12 cycles) to two adds (8+8 cycles). | ||
24 | * | ||
25 | * It also likes to notice two shifts in a row, like "a = x << 2" and | ||
26 | * "a <<= 7", and convert that to "a = x << 9". But shifts longer | ||
27 | * than 8 bits are extra-slow on m68k, so that's a lose. | ||
28 | * | ||
29 | * Since the 68000 is a very simple in-order processor with no | ||
30 | * instruction scheduling effects on execution time, we can safely | ||
31 | * take it out of GCC's hands and write one big asm() block. | ||
32 | * | ||
33 | * Without calling overhead, this operation is 30 bytes (14 instructions | ||
34 | * plus one immediate constant) and 166 cycles. | ||
35 | * | ||
36 | * (Because %2 is fetched twice, it can't be postincrement, and thus it | ||
37 | * can't be a fully general "g" or "m". Register is preferred, but | ||
38 | * offsettable memory or immediate will work.) | ||
39 | */ | ||
40 | static inline u32 __attribute_const__ __hash_32(u32 x) | ||
41 | { | ||
42 | u32 a, b; | ||
43 | |||
44 | asm( "move.l %2,%0" /* a = x * 0x0001 */ | ||
45 | "\n lsl.l #2,%0" /* a = x * 0x0004 */ | ||
46 | "\n move.l %0,%1" | ||
47 | "\n lsl.l #7,%0" /* a = x * 0x0200 */ | ||
48 | "\n add.l %2,%0" /* a = x * 0x0201 */ | ||
49 | "\n add.l %0,%1" /* b = x * 0x0205 */ | ||
50 | "\n add.l %0,%0" /* a = x * 0x0402 */ | ||
51 | "\n add.l %0,%1" /* b = x * 0x0607 */ | ||
52 | "\n lsl.l #5,%0" /* a = x * 0x8040 */ | ||
53 | : "=&d,d" (a), "=&r,r" (b) | ||
54 | : "r,roi?" (x)); /* a+b = x*0x8647 */ | ||
55 | |||
56 | return ((u16)(x*0x61c8) << 16) + a + b; | ||
57 | } | ||
58 | |||
59 | #endif /* _ASM_HASH_H */ | ||