diff options
author | James Hogan <jhogan@kernel.org> | 2017-12-07 02:20:46 -0500 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2018-01-11 08:40:31 -0500 |
commit | ebabcf17bcd7ce968b1631ebe08236275698f39b (patch) | |
tree | 6a11781dbc8689ed2e3cba7d3bfddd6343d13163 | |
parent | ccf85c744275de0ba40beff0bf9206a094f12e62 (diff) |
MIPS: Implement __multi3 for GCC7 MIPS64r6 builds
GCC7 is a bit too eager to generate suboptimal __multi3 calls (128bit
multiply with 128bit result) for MIPS64r6 builds, even in code which
doesn't explicitly use 128bit types, such as the following:
unsigned long func(unsigned long a, unsigned long b)
{
return a > (~0UL) / b;
}
Which GCC rearanges to:
return (unsigned __int128)a * (unsigned __int128)b > 0xffffffffffffffff;
Therefore implement __multi3, but only for MIPS64r6 with GCC7 as under
normal circumstances we wouldn't expect any calls to __multi3 to be
generated from kernel code.
Reported-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Signed-off-by: James Hogan <jhogan@kernel.org>
Tested-by: Waldemar Brodkorb <wbx@openadk.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Maciej W. Rozycki <macro@mips.com>
Cc: Matthew Fortune <matthew.fortune@mips.com>
Cc: Florian Fainelli <florian@openwrt.org>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/17890/
-rw-r--r-- | arch/mips/lib/Makefile | 3 | ||||
-rw-r--r-- | arch/mips/lib/libgcc.h | 17 | ||||
-rw-r--r-- | arch/mips/lib/multi3.c | 54 |
3 files changed, 73 insertions, 1 deletions
diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile index 78c2affeabf8..e84e12655fa8 100644 --- a/arch/mips/lib/Makefile +++ b/arch/mips/lib/Makefile | |||
@@ -16,4 +16,5 @@ obj-$(CONFIG_CPU_R3000) += r3k_dump_tlb.o | |||
16 | obj-$(CONFIG_CPU_TX39XX) += r3k_dump_tlb.o | 16 | obj-$(CONFIG_CPU_TX39XX) += r3k_dump_tlb.o |
17 | 17 | ||
18 | # libgcc-style stuff needed in the kernel | 18 | # libgcc-style stuff needed in the kernel |
19 | obj-y += ashldi3.o ashrdi3.o bswapsi.o bswapdi.o cmpdi2.o lshrdi3.o ucmpdi2.o | 19 | obj-y += ashldi3.o ashrdi3.o bswapsi.o bswapdi.o cmpdi2.o lshrdi3.o multi3.o \ |
20 | ucmpdi2.o | ||
diff --git a/arch/mips/lib/libgcc.h b/arch/mips/lib/libgcc.h index 28002ed90c2c..199a7f96282f 100644 --- a/arch/mips/lib/libgcc.h +++ b/arch/mips/lib/libgcc.h | |||
@@ -10,10 +10,18 @@ typedef int word_type __attribute__ ((mode (__word__))); | |||
10 | struct DWstruct { | 10 | struct DWstruct { |
11 | int high, low; | 11 | int high, low; |
12 | }; | 12 | }; |
13 | |||
14 | struct TWstruct { | ||
15 | long long high, low; | ||
16 | }; | ||
13 | #elif defined(__LITTLE_ENDIAN) | 17 | #elif defined(__LITTLE_ENDIAN) |
14 | struct DWstruct { | 18 | struct DWstruct { |
15 | int low, high; | 19 | int low, high; |
16 | }; | 20 | }; |
21 | |||
22 | struct TWstruct { | ||
23 | long long low, high; | ||
24 | }; | ||
17 | #else | 25 | #else |
18 | #error I feel sick. | 26 | #error I feel sick. |
19 | #endif | 27 | #endif |
@@ -23,4 +31,13 @@ typedef union { | |||
23 | long long ll; | 31 | long long ll; |
24 | } DWunion; | 32 | } DWunion; |
25 | 33 | ||
34 | #if defined(CONFIG_64BIT) && defined(CONFIG_CPU_MIPSR6) | ||
35 | typedef int ti_type __attribute__((mode(TI))); | ||
36 | |||
37 | typedef union { | ||
38 | struct TWstruct s; | ||
39 | ti_type ti; | ||
40 | } TWunion; | ||
41 | #endif | ||
42 | |||
26 | #endif /* __ASM_LIBGCC_H */ | 43 | #endif /* __ASM_LIBGCC_H */ |
diff --git a/arch/mips/lib/multi3.c b/arch/mips/lib/multi3.c new file mode 100644 index 000000000000..111ad475aa0c --- /dev/null +++ b/arch/mips/lib/multi3.c | |||
@@ -0,0 +1,54 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | #include <linux/export.h> | ||
3 | |||
4 | #include "libgcc.h" | ||
5 | |||
6 | /* | ||
7 | * GCC 7 suboptimally generates __multi3 calls for mips64r6, so for that | ||
8 | * specific case only we'll implement it here. | ||
9 | * | ||
10 | * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82981 | ||
11 | */ | ||
12 | #if defined(CONFIG_64BIT) && defined(CONFIG_CPU_MIPSR6) && (__GNUC__ == 7) | ||
13 | |||
14 | /* multiply 64-bit values, low 64-bits returned */ | ||
15 | static inline long long notrace dmulu(long long a, long long b) | ||
16 | { | ||
17 | long long res; | ||
18 | |||
19 | asm ("dmulu %0,%1,%2" : "=r" (res) : "r" (a), "r" (b)); | ||
20 | return res; | ||
21 | } | ||
22 | |||
23 | /* multiply 64-bit unsigned values, high 64-bits of 128-bit result returned */ | ||
24 | static inline long long notrace dmuhu(long long a, long long b) | ||
25 | { | ||
26 | long long res; | ||
27 | |||
28 | asm ("dmuhu %0,%1,%2" : "=r" (res) : "r" (a), "r" (b)); | ||
29 | return res; | ||
30 | } | ||
31 | |||
32 | /* multiply 128-bit values, low 128-bits returned */ | ||
33 | ti_type notrace __multi3(ti_type a, ti_type b) | ||
34 | { | ||
35 | TWunion res, aa, bb; | ||
36 | |||
37 | aa.ti = a; | ||
38 | bb.ti = b; | ||
39 | |||
40 | /* | ||
41 | * a * b = (a.lo * b.lo) | ||
42 | * + 2^64 * (a.hi * b.lo + a.lo * b.hi) | ||
43 | * [+ 2^128 * (a.hi * b.hi)] | ||
44 | */ | ||
45 | res.s.low = dmulu(aa.s.low, bb.s.low); | ||
46 | res.s.high = dmuhu(aa.s.low, bb.s.low); | ||
47 | res.s.high += dmulu(aa.s.high, bb.s.low); | ||
48 | res.s.high += dmulu(aa.s.low, bb.s.high); | ||
49 | |||
50 | return res.ti; | ||
51 | } | ||
52 | EXPORT_SYMBOL(__multi3); | ||
53 | |||
54 | #endif /* 64BIT && CPU_MIPSR6 && GCC7 */ | ||