diff options
author | Dan Williams <dan.j.williams@intel.com> | 2018-05-03 20:06:11 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2018-05-15 02:32:41 -0400 |
commit | da7bc9c57eb0e91e048d05f7dbe5014a8b81ccfa (patch) | |
tree | a9f7f781dc1e2b803ffed73e16f8860ff1d086f9 /arch/x86/lib | |
parent | 67b8d5c7081221efa252e111cd52532ec6d4266f (diff) |
x86/asm/memcpy_mcsafe: Remove loop unrolling
In preparation for teaching memcpy_mcsafe() to return 'bytes remaining'
rather than pass / fail, simplify the implementation to remove loop
unrolling. The unrolling complicates the fault handling for negligible
benefit given modern CPUs perform loop stream detection.
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: hch@lst.de
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-nvdimm@lists.01.org
Link: http://lkml.kernel.org/r/152539237092.31796.9115692316555638048.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/lib')
-rw-r--r-- | arch/x86/lib/memcpy_64.S | 59 |
1 files changed, 10 insertions, 49 deletions
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 9a53a06e5a3e..54c971892db5 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S | |||
@@ -184,11 +184,11 @@ ENDPROC(memcpy_orig) | |||
184 | 184 | ||
185 | #ifndef CONFIG_UML | 185 | #ifndef CONFIG_UML |
186 | /* | 186 | /* |
187 | * memcpy_mcsafe_unrolled - memory copy with machine check exception handling | 187 | * __memcpy_mcsafe - memory copy with machine check exception handling |
188 | * Note that we only catch machine checks when reading the source addresses. | 188 | * Note that we only catch machine checks when reading the source addresses. |
189 | * Writes to target are posted and don't generate machine checks. | 189 | * Writes to target are posted and don't generate machine checks. |
190 | */ | 190 | */ |
191 | ENTRY(memcpy_mcsafe_unrolled) | 191 | ENTRY(__memcpy_mcsafe) |
192 | cmpl $8, %edx | 192 | cmpl $8, %edx |
193 | /* Less than 8 bytes? Go to byte copy loop */ | 193 | /* Less than 8 bytes? Go to byte copy loop */ |
194 | jb .L_no_whole_words | 194 | jb .L_no_whole_words |
@@ -213,49 +213,18 @@ ENTRY(memcpy_mcsafe_unrolled) | |||
213 | jnz .L_copy_leading_bytes | 213 | jnz .L_copy_leading_bytes |
214 | 214 | ||
215 | .L_8byte_aligned: | 215 | .L_8byte_aligned: |
216 | /* Figure out how many whole cache lines (64-bytes) to copy */ | ||
217 | movl %edx, %ecx | ||
218 | andl $63, %edx | ||
219 | shrl $6, %ecx | ||
220 | jz .L_no_whole_cache_lines | ||
221 | |||
222 | /* Loop copying whole cache lines */ | ||
223 | .L_cache_w0: movq (%rsi), %r8 | ||
224 | .L_cache_w1: movq 1*8(%rsi), %r9 | ||
225 | .L_cache_w2: movq 2*8(%rsi), %r10 | ||
226 | .L_cache_w3: movq 3*8(%rsi), %r11 | ||
227 | movq %r8, (%rdi) | ||
228 | movq %r9, 1*8(%rdi) | ||
229 | movq %r10, 2*8(%rdi) | ||
230 | movq %r11, 3*8(%rdi) | ||
231 | .L_cache_w4: movq 4*8(%rsi), %r8 | ||
232 | .L_cache_w5: movq 5*8(%rsi), %r9 | ||
233 | .L_cache_w6: movq 6*8(%rsi), %r10 | ||
234 | .L_cache_w7: movq 7*8(%rsi), %r11 | ||
235 | movq %r8, 4*8(%rdi) | ||
236 | movq %r9, 5*8(%rdi) | ||
237 | movq %r10, 6*8(%rdi) | ||
238 | movq %r11, 7*8(%rdi) | ||
239 | leaq 64(%rsi), %rsi | ||
240 | leaq 64(%rdi), %rdi | ||
241 | decl %ecx | ||
242 | jnz .L_cache_w0 | ||
243 | |||
244 | /* Are there any trailing 8-byte words? */ | ||
245 | .L_no_whole_cache_lines: | ||
246 | movl %edx, %ecx | 216 | movl %edx, %ecx |
247 | andl $7, %edx | 217 | andl $7, %edx |
248 | shrl $3, %ecx | 218 | shrl $3, %ecx |
249 | jz .L_no_whole_words | 219 | jz .L_no_whole_words |
250 | 220 | ||
251 | /* Copy trailing words */ | 221 | .L_copy_words: |
252 | .L_copy_trailing_words: | ||
253 | movq (%rsi), %r8 | 222 | movq (%rsi), %r8 |
254 | mov %r8, (%rdi) | 223 | movq %r8, (%rdi) |
255 | leaq 8(%rsi), %rsi | 224 | addq $8, %rsi |
256 | leaq 8(%rdi), %rdi | 225 | addq $8, %rdi |
257 | decl %ecx | 226 | decl %ecx |
258 | jnz .L_copy_trailing_words | 227 | jnz .L_copy_words |
259 | 228 | ||
260 | /* Any trailing bytes? */ | 229 | /* Any trailing bytes? */ |
261 | .L_no_whole_words: | 230 | .L_no_whole_words: |
@@ -276,8 +245,8 @@ ENTRY(memcpy_mcsafe_unrolled) | |||
276 | .L_done_memcpy_trap: | 245 | .L_done_memcpy_trap: |
277 | xorq %rax, %rax | 246 | xorq %rax, %rax |
278 | ret | 247 | ret |
279 | ENDPROC(memcpy_mcsafe_unrolled) | 248 | ENDPROC(__memcpy_mcsafe) |
280 | EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled) | 249 | EXPORT_SYMBOL_GPL(__memcpy_mcsafe) |
281 | 250 | ||
282 | .section .fixup, "ax" | 251 | .section .fixup, "ax" |
283 | /* Return -EFAULT for any failure */ | 252 | /* Return -EFAULT for any failure */ |
@@ -288,14 +257,6 @@ EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled) | |||
288 | .previous | 257 | .previous |
289 | 258 | ||
290 | _ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail) | 259 | _ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail) |
291 | _ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail) | 260 | _ASM_EXTABLE_FAULT(.L_copy_words, .L_memcpy_mcsafe_fail) |
292 | _ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail) | ||
293 | _ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail) | ||
294 | _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail) | ||
295 | _ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail) | ||
296 | _ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail) | ||
297 | _ASM_EXTABLE_FAULT(.L_cache_w6, .L_memcpy_mcsafe_fail) | ||
298 | _ASM_EXTABLE_FAULT(.L_cache_w7, .L_memcpy_mcsafe_fail) | ||
299 | _ASM_EXTABLE_FAULT(.L_copy_trailing_words, .L_memcpy_mcsafe_fail) | ||
300 | _ASM_EXTABLE_FAULT(.L_copy_trailing_bytes, .L_memcpy_mcsafe_fail) | 261 | _ASM_EXTABLE_FAULT(.L_copy_trailing_bytes, .L_memcpy_mcsafe_fail) |
301 | #endif | 262 | #endif |