aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorSteven J. Hill <Steven.Hill@imgtec.com>2013-03-25 14:40:49 -0400
committerRalf Baechle <ralf@linux-mips.org>2013-05-09 11:55:19 -0400
commit26c5e07d1478021914801c8c7dd77c9268940e4f (patch)
treea0594bbc2c6a5881e07381e4bfc15e6b71cc901d /arch
parentbce860833ab1e40113eb0efab34d0c8f3b0226b1 (diff)
MIPS: microMIPS: Optimise 'memset' core library function.
Optimise 'memset' to use microMIPS instructions and/or optimisations for binary size reduction. When the microMIPS ISA is not being used, the library function compiles to the original binary code. Signed-off-by: Steven J. Hill <Steven.Hill@imgtec.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/mips/include/asm/asm.h2
-rw-r--r--arch/mips/lib/memset.S84
2 files changed, 56 insertions, 30 deletions
diff --git a/arch/mips/include/asm/asm.h b/arch/mips/include/asm/asm.h
index 164a21e65b42..879691d194af 100644
--- a/arch/mips/include/asm/asm.h
+++ b/arch/mips/include/asm/asm.h
@@ -296,6 +296,7 @@ symbol = value
296#define LONG_SUBU subu 296#define LONG_SUBU subu
297#define LONG_L lw 297#define LONG_L lw
298#define LONG_S sw 298#define LONG_S sw
299#define LONG_SP swp
299#define LONG_SLL sll 300#define LONG_SLL sll
300#define LONG_SLLV sllv 301#define LONG_SLLV sllv
301#define LONG_SRL srl 302#define LONG_SRL srl
@@ -318,6 +319,7 @@ symbol = value
318#define LONG_SUBU dsubu 319#define LONG_SUBU dsubu
319#define LONG_L ld 320#define LONG_L ld
320#define LONG_S sd 321#define LONG_S sd
322#define LONG_SP sdp
321#define LONG_SLL dsll 323#define LONG_SLL dsll
322#define LONG_SLLV dsllv 324#define LONG_SLLV dsllv
323#define LONG_SRL dsrl 325#define LONG_SRL dsrl
diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S
index 053d3b0b0317..0580194e7402 100644
--- a/arch/mips/lib/memset.S
+++ b/arch/mips/lib/memset.S
@@ -5,7 +5,8 @@
5 * 5 *
6 * Copyright (C) 1998, 1999, 2000 by Ralf Baechle 6 * Copyright (C) 1998, 1999, 2000 by Ralf Baechle
7 * Copyright (C) 1999, 2000 Silicon Graphics, Inc. 7 * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
8 * Copyright (C) 2007 Maciej W. Rozycki 8 * Copyright (C) 2007 by Maciej W. Rozycki
9 * Copyright (C) 2011, 2012 MIPS Technologies, Inc.
9 */ 10 */
10#include <asm/asm.h> 11#include <asm/asm.h>
11#include <asm/asm-offsets.h> 12#include <asm/asm-offsets.h>
@@ -19,6 +20,20 @@
19#define LONG_S_R sdr 20#define LONG_S_R sdr
20#endif 21#endif
21 22
23#ifdef CONFIG_CPU_MICROMIPS
24#define STORSIZE (LONGSIZE * 2)
25#define STORMASK (STORSIZE - 1)
26#define FILL64RG t8
27#define FILLPTRG t7
28#undef LONG_S
29#define LONG_S LONG_SP
30#else
31#define STORSIZE LONGSIZE
32#define STORMASK LONGMASK
33#define FILL64RG a1
34#define FILLPTRG t0
35#endif
36
22#define EX(insn,reg,addr,handler) \ 37#define EX(insn,reg,addr,handler) \
239: insn reg, addr; \ 389: insn reg, addr; \
24 .section __ex_table,"a"; \ 39 .section __ex_table,"a"; \
@@ -26,23 +41,25 @@
26 .previous 41 .previous
27 42
28 .macro f_fill64 dst, offset, val, fixup 43 .macro f_fill64 dst, offset, val, fixup
29 EX(LONG_S, \val, (\offset + 0 * LONGSIZE)(\dst), \fixup) 44 EX(LONG_S, \val, (\offset + 0 * STORSIZE)(\dst), \fixup)
30 EX(LONG_S, \val, (\offset + 1 * LONGSIZE)(\dst), \fixup) 45 EX(LONG_S, \val, (\offset + 1 * STORSIZE)(\dst), \fixup)
31 EX(LONG_S, \val, (\offset + 2 * LONGSIZE)(\dst), \fixup) 46 EX(LONG_S, \val, (\offset + 2 * STORSIZE)(\dst), \fixup)
32 EX(LONG_S, \val, (\offset + 3 * LONGSIZE)(\dst), \fixup) 47 EX(LONG_S, \val, (\offset + 3 * STORSIZE)(\dst), \fixup)
33 EX(LONG_S, \val, (\offset + 4 * LONGSIZE)(\dst), \fixup) 48#if ((defined(CONFIG_CPU_MICROMIPS) && (LONGSIZE == 4)) || !defined(CONFIG_CPU_MICROMIPS))
34 EX(LONG_S, \val, (\offset + 5 * LONGSIZE)(\dst), \fixup) 49 EX(LONG_S, \val, (\offset + 4 * STORSIZE)(\dst), \fixup)
35 EX(LONG_S, \val, (\offset + 6 * LONGSIZE)(\dst), \fixup) 50 EX(LONG_S, \val, (\offset + 5 * STORSIZE)(\dst), \fixup)
36 EX(LONG_S, \val, (\offset + 7 * LONGSIZE)(\dst), \fixup) 51 EX(LONG_S, \val, (\offset + 6 * STORSIZE)(\dst), \fixup)
37#if LONGSIZE == 4 52 EX(LONG_S, \val, (\offset + 7 * STORSIZE)(\dst), \fixup)
38 EX(LONG_S, \val, (\offset + 8 * LONGSIZE)(\dst), \fixup) 53#endif
39 EX(LONG_S, \val, (\offset + 9 * LONGSIZE)(\dst), \fixup) 54#if (!defined(CONFIG_CPU_MICROMIPS) && (LONGSIZE == 4))
40 EX(LONG_S, \val, (\offset + 10 * LONGSIZE)(\dst), \fixup) 55 EX(LONG_S, \val, (\offset + 8 * STORSIZE)(\dst), \fixup)
41 EX(LONG_S, \val, (\offset + 11 * LONGSIZE)(\dst), \fixup) 56 EX(LONG_S, \val, (\offset + 9 * STORSIZE)(\dst), \fixup)
42 EX(LONG_S, \val, (\offset + 12 * LONGSIZE)(\dst), \fixup) 57 EX(LONG_S, \val, (\offset + 10 * STORSIZE)(\dst), \fixup)
43 EX(LONG_S, \val, (\offset + 13 * LONGSIZE)(\dst), \fixup) 58 EX(LONG_S, \val, (\offset + 11 * STORSIZE)(\dst), \fixup)
44 EX(LONG_S, \val, (\offset + 14 * LONGSIZE)(\dst), \fixup) 59 EX(LONG_S, \val, (\offset + 12 * STORSIZE)(\dst), \fixup)
45 EX(LONG_S, \val, (\offset + 15 * LONGSIZE)(\dst), \fixup) 60 EX(LONG_S, \val, (\offset + 13 * STORSIZE)(\dst), \fixup)
61 EX(LONG_S, \val, (\offset + 14 * STORSIZE)(\dst), \fixup)
62 EX(LONG_S, \val, (\offset + 15 * STORSIZE)(\dst), \fixup)
46#endif 63#endif
47 .endm 64 .endm
48 65
@@ -71,16 +88,20 @@ LEAF(memset)
711: 881:
72 89
73FEXPORT(__bzero) 90FEXPORT(__bzero)
74 sltiu t0, a2, LONGSIZE /* very small region? */ 91 sltiu t0, a2, STORSIZE /* very small region? */
75 bnez t0, .Lsmall_memset 92 bnez t0, .Lsmall_memset
76 andi t0, a0, LONGMASK /* aligned? */ 93 andi t0, a0, STORMASK /* aligned? */
77 94
95#ifdef CONFIG_CPU_MICROMIPS
96 move t8, a1 /* used by 'swp' instruction */
97 move t9, a1
98#endif
78#ifndef CONFIG_CPU_DADDI_WORKAROUNDS 99#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
79 beqz t0, 1f 100 beqz t0, 1f
80 PTR_SUBU t0, LONGSIZE /* alignment in bytes */ 101 PTR_SUBU t0, STORSIZE /* alignment in bytes */
81#else 102#else
82 .set noat 103 .set noat
83 li AT, LONGSIZE 104 li AT, STORSIZE
84 beqz t0, 1f 105 beqz t0, 1f
85 PTR_SUBU t0, AT /* alignment in bytes */ 106 PTR_SUBU t0, AT /* alignment in bytes */
86 .set at 107 .set at
@@ -99,24 +120,27 @@ FEXPORT(__bzero)
991: ori t1, a2, 0x3f /* # of full blocks */ 1201: ori t1, a2, 0x3f /* # of full blocks */
100 xori t1, 0x3f 121 xori t1, 0x3f
101 beqz t1, .Lmemset_partial /* no block to fill */ 122 beqz t1, .Lmemset_partial /* no block to fill */
102 andi t0, a2, 0x40-LONGSIZE 123 andi t0, a2, 0x40-STORSIZE
103 124
104 PTR_ADDU t1, a0 /* end address */ 125 PTR_ADDU t1, a0 /* end address */
105 .set reorder 126 .set reorder
1061: PTR_ADDIU a0, 64 1271: PTR_ADDIU a0, 64
107 R10KCBARRIER(0(ra)) 128 R10KCBARRIER(0(ra))
108 f_fill64 a0, -64, a1, .Lfwd_fixup 129 f_fill64 a0, -64, FILL64RG, .Lfwd_fixup
109 bne t1, a0, 1b 130 bne t1, a0, 1b
110 .set noreorder 131 .set noreorder
111 132
112.Lmemset_partial: 133.Lmemset_partial:
113 R10KCBARRIER(0(ra)) 134 R10KCBARRIER(0(ra))
114 PTR_LA t1, 2f /* where to start */ 135 PTR_LA t1, 2f /* where to start */
136#ifdef CONFIG_CPU_MICROMIPS
137 LONG_SRL t7, t0, 1
138#endif
115#if LONGSIZE == 4 139#if LONGSIZE == 4
116 PTR_SUBU t1, t0 140 PTR_SUBU t1, FILLPTRG
117#else 141#else
118 .set noat 142 .set noat
119 LONG_SRL AT, t0, 1 143 LONG_SRL AT, FILLPTRG, 1
120 PTR_SUBU t1, AT 144 PTR_SUBU t1, AT
121 .set at 145 .set at
122#endif 146#endif
@@ -126,9 +150,9 @@ FEXPORT(__bzero)
126 .set push 150 .set push
127 .set noreorder 151 .set noreorder
128 .set nomacro 152 .set nomacro
129 f_fill64 a0, -64, a1, .Lpartial_fixup /* ... but first do longs ... */ 153 f_fill64 a0, -64, FILL64RG, .Lpartial_fixup /* ... but first do longs ... */
1302: .set pop 1542: .set pop
131 andi a2, LONGMASK /* At most one long to go */ 155 andi a2, STORMASK /* At most one long to go */
132 156
133 beqz a2, 1f 157 beqz a2, 1f
134 PTR_ADDU a0, a2 /* What's left */ 158 PTR_ADDU a0, a2 /* What's left */
@@ -169,7 +193,7 @@ FEXPORT(__bzero)
169 193
170.Lpartial_fixup: 194.Lpartial_fixup:
171 PTR_L t0, TI_TASK($28) 195 PTR_L t0, TI_TASK($28)
172 andi a2, LONGMASK 196 andi a2, STORMASK
173 LONG_L t0, THREAD_BUADDR(t0) 197 LONG_L t0, THREAD_BUADDR(t0)
174 LONG_ADDU a2, t1 198 LONG_ADDU a2, t1
175 jr ra 199 jr ra
@@ -177,4 +201,4 @@ FEXPORT(__bzero)
177 201
178.Llast_fixup: 202.Llast_fixup:
179 jr ra 203 jr ra
180 andi v1, a2, LONGMASK 204 andi v1, a2, STORMASK