aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMaciej W. Rozycki <macro@linux-mips.org>2018-10-02 07:50:16 -0400
committerPaul Burton <paul.burton@mips.com>2018-10-09 13:31:03 -0400
commit68dec269ee29c3abfd09596fbee7e40d875a6ab3 (patch)
treeaeabc6974c4bbfb69613fe05d5d44889271a9bbe
parent2f7619ae90bf78cf576b5e72087aab0435266fdb (diff)
MIPS: memset: Limit excessive `noreorder' assembly mode use
Rewrite to use the `reorder' assembly mode and remove manually scheduled delay slots except where GAS cannot schedule a delay-slot instruction due to a data dependency or a section switch (as is the case with the EX macro). No change in machine code produced. Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org> [paul.burton@mips.com: Fix conflict with commit 932afdeec18b ("MIPS: Add Kconfig variable for CPUs with unaligned load/store instructions")] Signed-off-by: Paul Burton <paul.burton@mips.com> Patchwork: https://patchwork.linux-mips.org/patch/20834/ Cc: Ralf Baechle <ralf@linux-mips.org>
-rw-r--r--arch/mips/lib/memset.S48
1 files changed, 24 insertions, 24 deletions
diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S
index 5334574075d4..418611ef13cf 100644
--- a/arch/mips/lib/memset.S
+++ b/arch/mips/lib/memset.S
@@ -78,7 +78,6 @@
78#endif 78#endif
79 .endm 79 .endm
80 80
81 .set noreorder
82 .align 5 81 .align 5
83 82
84 /* 83 /*
@@ -94,13 +93,16 @@
94 .endif 93 .endif
95 94
96 sltiu t0, a2, STORSIZE /* very small region? */ 95 sltiu t0, a2, STORSIZE /* very small region? */
96 .set noreorder
97 bnez t0, .Lsmall_memset\@ 97 bnez t0, .Lsmall_memset\@
98 andi t0, a0, STORMASK /* aligned? */ 98 andi t0, a0, STORMASK /* aligned? */
99 .set reorder
99 100
100#ifdef CONFIG_CPU_MICROMIPS 101#ifdef CONFIG_CPU_MICROMIPS
101 move t8, a1 /* used by 'swp' instruction */ 102 move t8, a1 /* used by 'swp' instruction */
102 move t9, a1 103 move t9, a1
103#endif 104#endif
105 .set noreorder
104#ifndef CONFIG_CPU_DADDI_WORKAROUNDS 106#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
105 beqz t0, 1f 107 beqz t0, 1f
106 PTR_SUBU t0, STORSIZE /* alignment in bytes */ 108 PTR_SUBU t0, STORSIZE /* alignment in bytes */
@@ -111,6 +113,7 @@
111 PTR_SUBU t0, AT /* alignment in bytes */ 113 PTR_SUBU t0, AT /* alignment in bytes */
112 .set at 114 .set at
113#endif 115#endif
116 .set reorder
114 117
115#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR 118#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
116 R10KCBARRIER(0(ra)) 119 R10KCBARRIER(0(ra))
@@ -125,8 +128,10 @@
125#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ 128#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
126#define STORE_BYTE(N) \ 129#define STORE_BYTE(N) \
127 EX(sb, a1, N(a0), .Lbyte_fixup\@); \ 130 EX(sb, a1, N(a0), .Lbyte_fixup\@); \
131 .set noreorder; \
128 beqz t0, 0f; \ 132 beqz t0, 0f; \
129 PTR_ADDU t0, 1; 133 PTR_ADDU t0, 1; \
134 .set reorder;
130 135
131 PTR_ADDU a2, t0 /* correct size */ 136 PTR_ADDU a2, t0 /* correct size */
132 PTR_ADDU t0, 1 137 PTR_ADDU t0, 1
@@ -148,16 +153,14 @@
148#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ 153#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
1491: ori t1, a2, 0x3f /* # of full blocks */ 1541: ori t1, a2, 0x3f /* # of full blocks */
150 xori t1, 0x3f 155 xori t1, 0x3f
156 andi t0, a2, 0x40-STORSIZE
151 beqz t1, .Lmemset_partial\@ /* no block to fill */ 157 beqz t1, .Lmemset_partial\@ /* no block to fill */
152 andi t0, a2, 0x40-STORSIZE
153 158
154 PTR_ADDU t1, a0 /* end address */ 159 PTR_ADDU t1, a0 /* end address */
155 .set reorder
1561: PTR_ADDIU a0, 64 1601: PTR_ADDIU a0, 64
157 R10KCBARRIER(0(ra)) 161 R10KCBARRIER(0(ra))
158 f_fill64 a0, -64, FILL64RG, .Lfwd_fixup\@, \mode 162 f_fill64 a0, -64, FILL64RG, .Lfwd_fixup\@, \mode
159 bne t1, a0, 1b 163 bne t1, a0, 1b
160 .set noreorder
161 164
162.Lmemset_partial\@: 165.Lmemset_partial\@:
163 R10KCBARRIER(0(ra)) 166 R10KCBARRIER(0(ra))
@@ -173,20 +176,18 @@
173 PTR_SUBU t1, AT 176 PTR_SUBU t1, AT
174 .set at 177 .set at
175#endif 178#endif
179 PTR_ADDU a0, t0 /* dest ptr */
176 jr t1 180 jr t1
177 PTR_ADDU a0, t0 /* dest ptr */
178 181
179 .set push
180 .set noreorder
181 .set nomacro
182 /* ... but first do longs ... */ 182 /* ... but first do longs ... */
183 f_fill64 a0, -64, FILL64RG, .Lpartial_fixup\@, \mode 183 f_fill64 a0, -64, FILL64RG, .Lpartial_fixup\@, \mode
1842: .set pop 1842: andi a2, STORMASK /* At most one long to go */
185 andi a2, STORMASK /* At most one long to go */
186 185
186 .set noreorder
187 beqz a2, 1f 187 beqz a2, 1f
188#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR 188#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
189 PTR_ADDU a0, a2 /* What's left */ 189 PTR_ADDU a0, a2 /* What's left */
190 .set reorder
190 R10KCBARRIER(0(ra)) 191 R10KCBARRIER(0(ra))
191#ifdef __MIPSEB__ 192#ifdef __MIPSEB__
192 EX(LONG_S_R, a1, -1(a0), .Llast_fixup\@) 193 EX(LONG_S_R, a1, -1(a0), .Llast_fixup\@)
@@ -195,6 +196,7 @@
195#endif 196#endif
196#else 197#else
197 PTR_SUBU t0, $0, a2 198 PTR_SUBU t0, $0, a2
199 .set reorder
198 move a2, zero /* No remaining longs */ 200 move a2, zero /* No remaining longs */
199 PTR_ADDIU t0, 1 201 PTR_ADDIU t0, 1
200 STORE_BYTE(0) 202 STORE_BYTE(0)
@@ -210,20 +212,22 @@
210#endif 212#endif
2110: 2130:
212#endif 214#endif
2131: jr ra 2151: move a2, zero
214 move a2, zero 216 jr ra
215 217
216.Lsmall_memset\@: 218.Lsmall_memset\@:
219 PTR_ADDU t1, a0, a2
217 beqz a2, 2f 220 beqz a2, 2f
218 PTR_ADDU t1, a0, a2
219 221
2201: PTR_ADDIU a0, 1 /* fill bytewise */ 2221: PTR_ADDIU a0, 1 /* fill bytewise */
221 R10KCBARRIER(0(ra)) 223 R10KCBARRIER(0(ra))
224 .set noreorder
222 bne t1, a0, 1b 225 bne t1, a0, 1b
223 EX(sb, a1, -1(a0), .Lsmall_fixup\@) 226 EX(sb, a1, -1(a0), .Lsmall_fixup\@)
227 .set reorder
224 228
2252: jr ra /* done */ 2292: move a2, zero
226 move a2, zero 230 jr ra /* done */
227 .if __memset == 1 231 .if __memset == 1
228 END(memset) 232 END(memset)
229 .set __memset, 0 233 .set __memset, 0
@@ -237,14 +241,13 @@
237 * a2 = a2 - t0 + 1 241 * a2 = a2 - t0 + 1
238 */ 242 */
239 PTR_SUBU a2, t0 243 PTR_SUBU a2, t0
244 PTR_ADDIU a2, 1
240 jr ra 245 jr ra
241 PTR_ADDIU a2, 1
242#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ 246#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
243 247
244.Lfirst_fixup\@: 248.Lfirst_fixup\@:
245 /* unset_bytes already in a2 */ 249 /* unset_bytes already in a2 */
246 jr ra 250 jr ra
247 nop
248 251
249.Lfwd_fixup\@: 252.Lfwd_fixup\@:
250 /* 253 /*
@@ -255,8 +258,8 @@
255 andi a2, 0x3f 258 andi a2, 0x3f
256 LONG_L t0, THREAD_BUADDR(t0) 259 LONG_L t0, THREAD_BUADDR(t0)
257 LONG_ADDU a2, t1 260 LONG_ADDU a2, t1
261 LONG_SUBU a2, t0
258 jr ra 262 jr ra
259 LONG_SUBU a2, t0
260 263
261.Lpartial_fixup\@: 264.Lpartial_fixup\@:
262 /* 265 /*
@@ -267,24 +270,21 @@
267 andi a2, STORMASK 270 andi a2, STORMASK
268 LONG_L t0, THREAD_BUADDR(t0) 271 LONG_L t0, THREAD_BUADDR(t0)
269 LONG_ADDU a2, a0 272 LONG_ADDU a2, a0
273 LONG_SUBU a2, t0
270 jr ra 274 jr ra
271 LONG_SUBU a2, t0
272 275
273.Llast_fixup\@: 276.Llast_fixup\@:
274 /* unset_bytes already in a2 */ 277 /* unset_bytes already in a2 */
275 jr ra 278 jr ra
276 nop
277 279
278.Lsmall_fixup\@: 280.Lsmall_fixup\@:
279 /* 281 /*
280 * unset_bytes = end_addr - current_addr + 1 282 * unset_bytes = end_addr - current_addr + 1
281 * a2 = t1 - a0 + 1 283 * a2 = t1 - a0 + 1
282 */ 284 */
283 .set reorder
284 PTR_SUBU a2, t1, a0 285 PTR_SUBU a2, t1, a0
285 PTR_ADDIU a2, 1 286 PTR_ADDIU a2, 1
286 jr ra 287 jr ra
287 .set noreorder
288 288
289 .endm 289 .endm
290 290
@@ -298,8 +298,8 @@
298 298
299LEAF(memset) 299LEAF(memset)
300EXPORT_SYMBOL(memset) 300EXPORT_SYMBOL(memset)
301 move v0, a0 /* result */
301 beqz a1, 1f 302 beqz a1, 1f
302 move v0, a0 /* result */
303 303
304 andi a1, 0xff /* spread fillword */ 304 andi a1, 0xff /* spread fillword */
305 LONG_SLL t1, a1, 8 305 LONG_SLL t1, a1, 8