aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/lib/memcpy_64.S
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2007-10-11 05:17:08 -0400
committerThomas Gleixner <tglx@linutronix.de>2007-10-11 05:17:08 -0400
commit185f3d38900f750a4566f87cde6a178f3595a115 (patch)
treed463f6da1af452b1bbdf476828ea88427087f255 /arch/x86/lib/memcpy_64.S
parent51b2833060f26258ea2da091c7b9c6a358ac9dd2 (diff)
x86_64: move lib
Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/lib/memcpy_64.S')
-rw-r--r--arch/x86/lib/memcpy_64.S131
1 files changed, 131 insertions, 0 deletions
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
new file mode 100644
index 000000000000..c22981fa2f3a
--- /dev/null
+++ b/arch/x86/lib/memcpy_64.S
@@ -0,0 +1,131 @@
1/* Copyright 2002 Andi Kleen */
2
3#include <linux/linkage.h>
4#include <asm/dwarf2.h>
5#include <asm/cpufeature.h>
6
7/*
8 * memcpy - Copy a memory block.
9 *
10 * Input:
11 * rdi destination
12 * rsi source
13 * rdx count
14 *
15 * Output:
16 * rax original destination
17 */
18
19 ALIGN
20memcpy_c:
21 CFI_STARTPROC
22 movq %rdi,%rax
23 movl %edx,%ecx
24 shrl $3,%ecx
25 andl $7,%edx
26 rep movsq
27 movl %edx,%ecx
28 rep movsb
29 ret
30 CFI_ENDPROC
31ENDPROC(memcpy_c)
32
33ENTRY(__memcpy)
34ENTRY(memcpy)
35 CFI_STARTPROC
36 pushq %rbx
37 CFI_ADJUST_CFA_OFFSET 8
38 CFI_REL_OFFSET rbx, 0
39 movq %rdi,%rax
40
41 movl %edx,%ecx
42 shrl $6,%ecx
43 jz .Lhandle_tail
44
45 .p2align 4
46.Lloop_64:
47 decl %ecx
48
49 movq (%rsi),%r11
50 movq 8(%rsi),%r8
51
52 movq %r11,(%rdi)
53 movq %r8,1*8(%rdi)
54
55 movq 2*8(%rsi),%r9
56 movq 3*8(%rsi),%r10
57
58 movq %r9,2*8(%rdi)
59 movq %r10,3*8(%rdi)
60
61 movq 4*8(%rsi),%r11
62 movq 5*8(%rsi),%r8
63
64 movq %r11,4*8(%rdi)
65 movq %r8,5*8(%rdi)
66
67 movq 6*8(%rsi),%r9
68 movq 7*8(%rsi),%r10
69
70 movq %r9,6*8(%rdi)
71 movq %r10,7*8(%rdi)
72
73 leaq 64(%rsi),%rsi
74 leaq 64(%rdi),%rdi
75 jnz .Lloop_64
76
77.Lhandle_tail:
78 movl %edx,%ecx
79 andl $63,%ecx
80 shrl $3,%ecx
81 jz .Lhandle_7
82 .p2align 4
83.Lloop_8:
84 decl %ecx
85 movq (%rsi),%r8
86 movq %r8,(%rdi)
87 leaq 8(%rdi),%rdi
88 leaq 8(%rsi),%rsi
89 jnz .Lloop_8
90
91.Lhandle_7:
92 movl %edx,%ecx
93 andl $7,%ecx
94 jz .Lende
95 .p2align 4
96.Lloop_1:
97 movb (%rsi),%r8b
98 movb %r8b,(%rdi)
99 incq %rdi
100 incq %rsi
101 decl %ecx
102 jnz .Lloop_1
103
104.Lende:
105 popq %rbx
106 CFI_ADJUST_CFA_OFFSET -8
107 CFI_RESTORE rbx
108 ret
109.Lfinal:
110 CFI_ENDPROC
111ENDPROC(memcpy)
112ENDPROC(__memcpy)
113
114 /* Some CPUs run faster using the string copy instructions.
115 It is also a lot simpler. Use this when possible */
116
117 .section .altinstr_replacement,"ax"
1181: .byte 0xeb /* jmp <disp8> */
119 .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */
1202:
121 .previous
122 .section .altinstructions,"a"
123 .align 8
124 .quad memcpy
125 .quad 1b
126 .byte X86_FEATURE_REP_GOOD
127 /* Replace only beginning, memcpy is used to apply alternatives, so it
128 * is silly to overwrite itself with nops - reboot is only outcome... */
129 .byte 2b - 1b
130 .byte 2b - 1b
131 .previous