aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/lib
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2018-05-03 20:06:11 -0400
committerIngo Molnar <mingo@kernel.org>2018-05-15 02:32:41 -0400
commitda7bc9c57eb0e91e048d05f7dbe5014a8b81ccfa (patch)
treea9f7f781dc1e2b803ffed73e16f8860ff1d086f9 /arch/x86/lib
parent67b8d5c7081221efa252e111cd52532ec6d4266f (diff)
x86/asm/memcpy_mcsafe: Remove loop unrolling
In preparation for teaching memcpy_mcsafe() to return 'bytes remaining' rather than pass / fail, simplify the implementation to remove loop unrolling. The unrolling complicates the fault handling for negligible benefit given modern CPUs perform loop stream detection. Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Borislav Petkov <bp@alien8.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Tony Luck <tony.luck@intel.com> Cc: hch@lst.de Cc: linux-fsdevel@vger.kernel.org Cc: linux-nvdimm@lists.01.org Link: http://lkml.kernel.org/r/152539237092.31796.9115692316555638048.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/lib')
-rw-r--r--arch/x86/lib/memcpy_64.S59
1 files changed, 10 insertions, 49 deletions
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 9a53a06e5a3e..54c971892db5 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -184,11 +184,11 @@ ENDPROC(memcpy_orig)
184 184
185#ifndef CONFIG_UML 185#ifndef CONFIG_UML
186/* 186/*
187 * memcpy_mcsafe_unrolled - memory copy with machine check exception handling 187 * __memcpy_mcsafe - memory copy with machine check exception handling
188 * Note that we only catch machine checks when reading the source addresses. 188 * Note that we only catch machine checks when reading the source addresses.
189 * Writes to target are posted and don't generate machine checks. 189 * Writes to target are posted and don't generate machine checks.
190 */ 190 */
191ENTRY(memcpy_mcsafe_unrolled) 191ENTRY(__memcpy_mcsafe)
192 cmpl $8, %edx 192 cmpl $8, %edx
193 /* Less than 8 bytes? Go to byte copy loop */ 193 /* Less than 8 bytes? Go to byte copy loop */
194 jb .L_no_whole_words 194 jb .L_no_whole_words
@@ -213,49 +213,18 @@ ENTRY(memcpy_mcsafe_unrolled)
213 jnz .L_copy_leading_bytes 213 jnz .L_copy_leading_bytes
214 214
215.L_8byte_aligned: 215.L_8byte_aligned:
216 /* Figure out how many whole cache lines (64-bytes) to copy */
217 movl %edx, %ecx
218 andl $63, %edx
219 shrl $6, %ecx
220 jz .L_no_whole_cache_lines
221
222 /* Loop copying whole cache lines */
223.L_cache_w0: movq (%rsi), %r8
224.L_cache_w1: movq 1*8(%rsi), %r9
225.L_cache_w2: movq 2*8(%rsi), %r10
226.L_cache_w3: movq 3*8(%rsi), %r11
227 movq %r8, (%rdi)
228 movq %r9, 1*8(%rdi)
229 movq %r10, 2*8(%rdi)
230 movq %r11, 3*8(%rdi)
231.L_cache_w4: movq 4*8(%rsi), %r8
232.L_cache_w5: movq 5*8(%rsi), %r9
233.L_cache_w6: movq 6*8(%rsi), %r10
234.L_cache_w7: movq 7*8(%rsi), %r11
235 movq %r8, 4*8(%rdi)
236 movq %r9, 5*8(%rdi)
237 movq %r10, 6*8(%rdi)
238 movq %r11, 7*8(%rdi)
239 leaq 64(%rsi), %rsi
240 leaq 64(%rdi), %rdi
241 decl %ecx
242 jnz .L_cache_w0
243
244 /* Are there any trailing 8-byte words? */
245.L_no_whole_cache_lines:
246 movl %edx, %ecx 216 movl %edx, %ecx
247 andl $7, %edx 217 andl $7, %edx
248 shrl $3, %ecx 218 shrl $3, %ecx
249 jz .L_no_whole_words 219 jz .L_no_whole_words
250 220
251 /* Copy trailing words */ 221.L_copy_words:
252.L_copy_trailing_words:
253 movq (%rsi), %r8 222 movq (%rsi), %r8
254 mov %r8, (%rdi) 223 movq %r8, (%rdi)
255 leaq 8(%rsi), %rsi 224 addq $8, %rsi
256 leaq 8(%rdi), %rdi 225 addq $8, %rdi
257 decl %ecx 226 decl %ecx
258 jnz .L_copy_trailing_words 227 jnz .L_copy_words
259 228
260 /* Any trailing bytes? */ 229 /* Any trailing bytes? */
261.L_no_whole_words: 230.L_no_whole_words:
@@ -276,8 +245,8 @@ ENTRY(memcpy_mcsafe_unrolled)
276.L_done_memcpy_trap: 245.L_done_memcpy_trap:
277 xorq %rax, %rax 246 xorq %rax, %rax
278 ret 247 ret
279ENDPROC(memcpy_mcsafe_unrolled) 248ENDPROC(__memcpy_mcsafe)
280EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled) 249EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
281 250
282 .section .fixup, "ax" 251 .section .fixup, "ax"
283 /* Return -EFAULT for any failure */ 252 /* Return -EFAULT for any failure */
@@ -288,14 +257,6 @@ EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled)
288 .previous 257 .previous
289 258
290 _ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail) 259 _ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail)
291 _ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail) 260 _ASM_EXTABLE_FAULT(.L_copy_words, .L_memcpy_mcsafe_fail)
292 _ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail)
293 _ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail)
294 _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
295 _ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail)
296 _ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail)
297 _ASM_EXTABLE_FAULT(.L_cache_w6, .L_memcpy_mcsafe_fail)
298 _ASM_EXTABLE_FAULT(.L_cache_w7, .L_memcpy_mcsafe_fail)
299 _ASM_EXTABLE_FAULT(.L_copy_trailing_words, .L_memcpy_mcsafe_fail)
300 _ASM_EXTABLE_FAULT(.L_copy_trailing_bytes, .L_memcpy_mcsafe_fail) 261 _ASM_EXTABLE_FAULT(.L_copy_trailing_bytes, .L_memcpy_mcsafe_fail)
301#endif 262#endif