aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86_64/lib/memcpy.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86_64/lib/memcpy.S')
-rw-r--r--arch/x86_64/lib/memcpy.S93
1 files changed, 91 insertions, 2 deletions
diff --git a/arch/x86_64/lib/memcpy.S b/arch/x86_64/lib/memcpy.S
index 92dd80544602..5554948b5554 100644
--- a/arch/x86_64/lib/memcpy.S
+++ b/arch/x86_64/lib/memcpy.S
@@ -11,8 +11,6 @@
11 * 11 *
12 * Output: 12 * Output:
13 * rax original destination 13 * rax original destination
14 *
15 * TODO: check best memcpy for PSC
16 */ 14 */
17 15
18 .globl __memcpy 16 .globl __memcpy
@@ -20,6 +18,95 @@
20 .p2align 4 18 .p2align 4
21__memcpy: 19__memcpy:
22memcpy: 20memcpy:
21 pushq %rbx
22 movq %rdi,%rax
23
24 movl %edx,%ecx
25 shrl $6,%ecx
26 jz .Lhandle_tail
27
28 .p2align 4
29.Lloop_64:
30 decl %ecx
31
32 movq (%rsi),%r11
33 movq 8(%rsi),%r8
34
35 movq %r11,(%rdi)
36 movq %r8,1*8(%rdi)
37
38 movq 2*8(%rsi),%r9
39 movq 3*8(%rsi),%r10
40
41 movq %r9,2*8(%rdi)
42 movq %r10,3*8(%rdi)
43
44 movq 4*8(%rsi),%r11
45 movq 5*8(%rsi),%r8
46
47 movq %r11,4*8(%rdi)
48 movq %r8,5*8(%rdi)
49
50 movq 6*8(%rsi),%r9
51 movq 7*8(%rsi),%r10
52
53 movq %r9,6*8(%rdi)
54 movq %r10,7*8(%rdi)
55
56 leaq 64(%rsi),%rsi
57 leaq 64(%rdi),%rdi
58 jnz .Lloop_64
59
60.Lhandle_tail:
61 movl %edx,%ecx
62 andl $63,%ecx
63 shrl $3,%ecx
64 jz .Lhandle_7
65 .p2align 4
66.Lloop_8:
67 decl %ecx
68 movq (%rsi),%r8
69 movq %r8,(%rdi)
70 leaq 8(%rdi),%rdi
71 leaq 8(%rsi),%rsi
72 jnz .Lloop_8
73
74.Lhandle_7:
75 movl %edx,%ecx
76 andl $7,%ecx
77 jz .Lende
78 .p2align 4
79.Lloop_1:
80 movb (%rsi),%r8b
81 movb %r8b,(%rdi)
82 incq %rdi
83 incq %rsi
84 decl %ecx
85 jnz .Lloop_1
86
87.Lende:
88 popq %rbx
89 ret
90.Lfinal:
91
92 /* Some CPUs run faster using the string copy instructions.
93 It is also a lot simpler. Use this when possible */
94
95 .section .altinstructions,"a"
96 .align 8
97 .quad memcpy
98 .quad memcpy_c
99 .byte X86_FEATURE_REP_GOOD
100 .byte .Lfinal-memcpy
101 .byte memcpy_c_end-memcpy_c
102 .previous
103
104 .section .altinstr_replacement,"ax"
105 /* rdi destination
106 * rsi source
107 * rdx count
108 */
109memcpy_c:
23 movq %rdi,%rax 110 movq %rdi,%rax
24 movl %edx,%ecx 111 movl %edx,%ecx
25 shrl $3,%ecx 112 shrl $3,%ecx
@@ -30,3 +117,5 @@ memcpy:
30 rep 117 rep
31 movsb 118 movsb
32 ret 119 ret
120memcpy_c_end:
121 .previous