diff options
author | Maciej W. Rozycki <macro@linux-mips.org> | 2018-10-02 07:50:16 -0400 |
---|---|---|
committer | Paul Burton <paul.burton@mips.com> | 2018-10-09 13:31:03 -0400 |
commit | 68dec269ee29c3abfd09596fbee7e40d875a6ab3 (patch) | |
tree | aeabc6974c4bbfb69613fe05d5d44889271a9bbe | |
parent | 2f7619ae90bf78cf576b5e72087aab0435266fdb (diff) |
MIPS: memset: Limit excessive `noreorder' assembly mode use
Rewrite to use the `reorder' assembly mode and remove manually scheduled
delay slots except where GAS cannot schedule a delay-slot instruction
due to a data dependency or a section switch (as is the case with the EX
macro). No change in machine code produced.
Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
[paul.burton@mips.com:
Fix conflict with commit 932afdeec18b ("MIPS: Add Kconfig variable for
CPUs with unaligned load/store instructions")]
Signed-off-by: Paul Burton <paul.burton@mips.com>
Patchwork: https://patchwork.linux-mips.org/patch/20834/
Cc: Ralf Baechle <ralf@linux-mips.org>
-rw-r--r-- | arch/mips/lib/memset.S | 48 |
1 files changed, 24 insertions, 24 deletions
diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S index 5334574075d4..418611ef13cf 100644 --- a/arch/mips/lib/memset.S +++ b/arch/mips/lib/memset.S | |||
@@ -78,7 +78,6 @@ | |||
78 | #endif | 78 | #endif |
79 | .endm | 79 | .endm |
80 | 80 | ||
81 | .set noreorder | ||
82 | .align 5 | 81 | .align 5 |
83 | 82 | ||
84 | /* | 83 | /* |
@@ -94,13 +93,16 @@ | |||
94 | .endif | 93 | .endif |
95 | 94 | ||
96 | sltiu t0, a2, STORSIZE /* very small region? */ | 95 | sltiu t0, a2, STORSIZE /* very small region? */ |
96 | .set noreorder | ||
97 | bnez t0, .Lsmall_memset\@ | 97 | bnez t0, .Lsmall_memset\@ |
98 | andi t0, a0, STORMASK /* aligned? */ | 98 | andi t0, a0, STORMASK /* aligned? */ |
99 | .set reorder | ||
99 | 100 | ||
100 | #ifdef CONFIG_CPU_MICROMIPS | 101 | #ifdef CONFIG_CPU_MICROMIPS |
101 | move t8, a1 /* used by 'swp' instruction */ | 102 | move t8, a1 /* used by 'swp' instruction */ |
102 | move t9, a1 | 103 | move t9, a1 |
103 | #endif | 104 | #endif |
105 | .set noreorder | ||
104 | #ifndef CONFIG_CPU_DADDI_WORKAROUNDS | 106 | #ifndef CONFIG_CPU_DADDI_WORKAROUNDS |
105 | beqz t0, 1f | 107 | beqz t0, 1f |
106 | PTR_SUBU t0, STORSIZE /* alignment in bytes */ | 108 | PTR_SUBU t0, STORSIZE /* alignment in bytes */ |
@@ -111,6 +113,7 @@ | |||
111 | PTR_SUBU t0, AT /* alignment in bytes */ | 113 | PTR_SUBU t0, AT /* alignment in bytes */ |
112 | .set at | 114 | .set at |
113 | #endif | 115 | #endif |
116 | .set reorder | ||
114 | 117 | ||
115 | #ifdef CONFIG_CPU_HAS_LOAD_STORE_LR | 118 | #ifdef CONFIG_CPU_HAS_LOAD_STORE_LR |
116 | R10KCBARRIER(0(ra)) | 119 | R10KCBARRIER(0(ra)) |
@@ -125,8 +128,10 @@ | |||
125 | #else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ | 128 | #else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ |
126 | #define STORE_BYTE(N) \ | 129 | #define STORE_BYTE(N) \ |
127 | EX(sb, a1, N(a0), .Lbyte_fixup\@); \ | 130 | EX(sb, a1, N(a0), .Lbyte_fixup\@); \ |
131 | .set noreorder; \ | ||
128 | beqz t0, 0f; \ | 132 | beqz t0, 0f; \ |
129 | PTR_ADDU t0, 1; | 133 | PTR_ADDU t0, 1; \ |
134 | .set reorder; | ||
130 | 135 | ||
131 | PTR_ADDU a2, t0 /* correct size */ | 136 | PTR_ADDU a2, t0 /* correct size */ |
132 | PTR_ADDU t0, 1 | 137 | PTR_ADDU t0, 1 |
@@ -148,16 +153,14 @@ | |||
148 | #endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ | 153 | #endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ |
149 | 1: ori t1, a2, 0x3f /* # of full blocks */ | 154 | 1: ori t1, a2, 0x3f /* # of full blocks */ |
150 | xori t1, 0x3f | 155 | xori t1, 0x3f |
156 | andi t0, a2, 0x40-STORSIZE | ||
151 | beqz t1, .Lmemset_partial\@ /* no block to fill */ | 157 | beqz t1, .Lmemset_partial\@ /* no block to fill */ |
152 | andi t0, a2, 0x40-STORSIZE | ||
153 | 158 | ||
154 | PTR_ADDU t1, a0 /* end address */ | 159 | PTR_ADDU t1, a0 /* end address */ |
155 | .set reorder | ||
156 | 1: PTR_ADDIU a0, 64 | 160 | 1: PTR_ADDIU a0, 64 |
157 | R10KCBARRIER(0(ra)) | 161 | R10KCBARRIER(0(ra)) |
158 | f_fill64 a0, -64, FILL64RG, .Lfwd_fixup\@, \mode | 162 | f_fill64 a0, -64, FILL64RG, .Lfwd_fixup\@, \mode |
159 | bne t1, a0, 1b | 163 | bne t1, a0, 1b |
160 | .set noreorder | ||
161 | 164 | ||
162 | .Lmemset_partial\@: | 165 | .Lmemset_partial\@: |
163 | R10KCBARRIER(0(ra)) | 166 | R10KCBARRIER(0(ra)) |
@@ -173,20 +176,18 @@ | |||
173 | PTR_SUBU t1, AT | 176 | PTR_SUBU t1, AT |
174 | .set at | 177 | .set at |
175 | #endif | 178 | #endif |
179 | PTR_ADDU a0, t0 /* dest ptr */ | ||
176 | jr t1 | 180 | jr t1 |
177 | PTR_ADDU a0, t0 /* dest ptr */ | ||
178 | 181 | ||
179 | .set push | ||
180 | .set noreorder | ||
181 | .set nomacro | ||
182 | /* ... but first do longs ... */ | 182 | /* ... but first do longs ... */ |
183 | f_fill64 a0, -64, FILL64RG, .Lpartial_fixup\@, \mode | 183 | f_fill64 a0, -64, FILL64RG, .Lpartial_fixup\@, \mode |
184 | 2: .set pop | 184 | 2: andi a2, STORMASK /* At most one long to go */ |
185 | andi a2, STORMASK /* At most one long to go */ | ||
186 | 185 | ||
186 | .set noreorder | ||
187 | beqz a2, 1f | 187 | beqz a2, 1f |
188 | #ifdef CONFIG_CPU_HAS_LOAD_STORE_LR | 188 | #ifdef CONFIG_CPU_HAS_LOAD_STORE_LR |
189 | PTR_ADDU a0, a2 /* What's left */ | 189 | PTR_ADDU a0, a2 /* What's left */ |
190 | .set reorder | ||
190 | R10KCBARRIER(0(ra)) | 191 | R10KCBARRIER(0(ra)) |
191 | #ifdef __MIPSEB__ | 192 | #ifdef __MIPSEB__ |
192 | EX(LONG_S_R, a1, -1(a0), .Llast_fixup\@) | 193 | EX(LONG_S_R, a1, -1(a0), .Llast_fixup\@) |
@@ -195,6 +196,7 @@ | |||
195 | #endif | 196 | #endif |
196 | #else | 197 | #else |
197 | PTR_SUBU t0, $0, a2 | 198 | PTR_SUBU t0, $0, a2 |
199 | .set reorder | ||
198 | move a2, zero /* No remaining longs */ | 200 | move a2, zero /* No remaining longs */ |
199 | PTR_ADDIU t0, 1 | 201 | PTR_ADDIU t0, 1 |
200 | STORE_BYTE(0) | 202 | STORE_BYTE(0) |
@@ -210,20 +212,22 @@ | |||
210 | #endif | 212 | #endif |
211 | 0: | 213 | 0: |
212 | #endif | 214 | #endif |
213 | 1: jr ra | 215 | 1: move a2, zero |
214 | move a2, zero | 216 | jr ra |
215 | 217 | ||
216 | .Lsmall_memset\@: | 218 | .Lsmall_memset\@: |
219 | PTR_ADDU t1, a0, a2 | ||
217 | beqz a2, 2f | 220 | beqz a2, 2f |
218 | PTR_ADDU t1, a0, a2 | ||
219 | 221 | ||
220 | 1: PTR_ADDIU a0, 1 /* fill bytewise */ | 222 | 1: PTR_ADDIU a0, 1 /* fill bytewise */ |
221 | R10KCBARRIER(0(ra)) | 223 | R10KCBARRIER(0(ra)) |
224 | .set noreorder | ||
222 | bne t1, a0, 1b | 225 | bne t1, a0, 1b |
223 | EX(sb, a1, -1(a0), .Lsmall_fixup\@) | 226 | EX(sb, a1, -1(a0), .Lsmall_fixup\@) |
227 | .set reorder | ||
224 | 228 | ||
225 | 2: jr ra /* done */ | 229 | 2: move a2, zero |
226 | move a2, zero | 230 | jr ra /* done */ |
227 | .if __memset == 1 | 231 | .if __memset == 1 |
228 | END(memset) | 232 | END(memset) |
229 | .set __memset, 0 | 233 | .set __memset, 0 |
@@ -237,14 +241,13 @@ | |||
237 | * a2 = a2 - t0 + 1 | 241 | * a2 = a2 - t0 + 1 |
238 | */ | 242 | */ |
239 | PTR_SUBU a2, t0 | 243 | PTR_SUBU a2, t0 |
244 | PTR_ADDIU a2, 1 | ||
240 | jr ra | 245 | jr ra |
241 | PTR_ADDIU a2, 1 | ||
242 | #endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ | 246 | #endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ |
243 | 247 | ||
244 | .Lfirst_fixup\@: | 248 | .Lfirst_fixup\@: |
245 | /* unset_bytes already in a2 */ | 249 | /* unset_bytes already in a2 */ |
246 | jr ra | 250 | jr ra |
247 | nop | ||
248 | 251 | ||
249 | .Lfwd_fixup\@: | 252 | .Lfwd_fixup\@: |
250 | /* | 253 | /* |
@@ -255,8 +258,8 @@ | |||
255 | andi a2, 0x3f | 258 | andi a2, 0x3f |
256 | LONG_L t0, THREAD_BUADDR(t0) | 259 | LONG_L t0, THREAD_BUADDR(t0) |
257 | LONG_ADDU a2, t1 | 260 | LONG_ADDU a2, t1 |
261 | LONG_SUBU a2, t0 | ||
258 | jr ra | 262 | jr ra |
259 | LONG_SUBU a2, t0 | ||
260 | 263 | ||
261 | .Lpartial_fixup\@: | 264 | .Lpartial_fixup\@: |
262 | /* | 265 | /* |
@@ -267,24 +270,21 @@ | |||
267 | andi a2, STORMASK | 270 | andi a2, STORMASK |
268 | LONG_L t0, THREAD_BUADDR(t0) | 271 | LONG_L t0, THREAD_BUADDR(t0) |
269 | LONG_ADDU a2, a0 | 272 | LONG_ADDU a2, a0 |
273 | LONG_SUBU a2, t0 | ||
270 | jr ra | 274 | jr ra |
271 | LONG_SUBU a2, t0 | ||
272 | 275 | ||
273 | .Llast_fixup\@: | 276 | .Llast_fixup\@: |
274 | /* unset_bytes already in a2 */ | 277 | /* unset_bytes already in a2 */ |
275 | jr ra | 278 | jr ra |
276 | nop | ||
277 | 279 | ||
278 | .Lsmall_fixup\@: | 280 | .Lsmall_fixup\@: |
279 | /* | 281 | /* |
280 | * unset_bytes = end_addr - current_addr + 1 | 282 | * unset_bytes = end_addr - current_addr + 1 |
281 | * a2 = t1 - a0 + 1 | 283 | * a2 = t1 - a0 + 1 |
282 | */ | 284 | */ |
283 | .set reorder | ||
284 | PTR_SUBU a2, t1, a0 | 285 | PTR_SUBU a2, t1, a0 |
285 | PTR_ADDIU a2, 1 | 286 | PTR_ADDIU a2, 1 |
286 | jr ra | 287 | jr ra |
287 | .set noreorder | ||
288 | 288 | ||
289 | .endm | 289 | .endm |
290 | 290 | ||
@@ -298,8 +298,8 @@ | |||
298 | 298 | ||
299 | LEAF(memset) | 299 | LEAF(memset) |
300 | EXPORT_SYMBOL(memset) | 300 | EXPORT_SYMBOL(memset) |
301 | move v0, a0 /* result */ | ||
301 | beqz a1, 1f | 302 | beqz a1, 1f |
302 | move v0, a0 /* result */ | ||
303 | 303 | ||
304 | andi a1, 0xff /* spread fillword */ | 304 | andi a1, 0xff /* spread fillword */ |
305 | LONG_SLL t1, a1, 8 | 305 | LONG_SLL t1, a1, 8 |