diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2007-10-11 05:17:08 -0400 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2007-10-11 05:17:08 -0400 |
commit | 185f3d38900f750a4566f87cde6a178f3595a115 (patch) | |
tree | d463f6da1af452b1bbdf476828ea88427087f255 /arch/x86/lib/memcpy_64.S | |
parent | 51b2833060f26258ea2da091c7b9c6a358ac9dd2 (diff) |
x86_64: move lib
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/lib/memcpy_64.S')
-rw-r--r-- | arch/x86/lib/memcpy_64.S | 131 |
1 files changed, 131 insertions, 0 deletions
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S new file mode 100644 index 000000000000..c22981fa2f3a --- /dev/null +++ b/arch/x86/lib/memcpy_64.S | |||
@@ -0,0 +1,131 @@ | |||
1 | /* Copyright 2002 Andi Kleen */ | ||
2 | |||
3 | #include <linux/linkage.h> | ||
4 | #include <asm/dwarf2.h> | ||
5 | #include <asm/cpufeature.h> | ||
6 | |||
7 | /* | ||
8 | * memcpy - Copy a memory block. | ||
9 | * | ||
10 | * Input: | ||
11 | * rdi destination | ||
12 | * rsi source | ||
13 | * rdx count | ||
14 | * | ||
15 | * Output: | ||
16 | * rax original destination | ||
17 | */ | ||
18 | |||
19 | ALIGN | ||
20 | memcpy_c: | ||
21 | CFI_STARTPROC | ||
22 | movq %rdi,%rax | ||
23 | movl %edx,%ecx | ||
24 | shrl $3,%ecx | ||
25 | andl $7,%edx | ||
26 | rep movsq | ||
27 | movl %edx,%ecx | ||
28 | rep movsb | ||
29 | ret | ||
30 | CFI_ENDPROC | ||
31 | ENDPROC(memcpy_c) | ||
32 | |||
33 | ENTRY(__memcpy) | ||
34 | ENTRY(memcpy) | ||
35 | CFI_STARTPROC | ||
36 | pushq %rbx | ||
37 | CFI_ADJUST_CFA_OFFSET 8 | ||
38 | CFI_REL_OFFSET rbx, 0 | ||
39 | movq %rdi,%rax | ||
40 | |||
41 | movl %edx,%ecx | ||
42 | shrl $6,%ecx | ||
43 | jz .Lhandle_tail | ||
44 | |||
45 | .p2align 4 | ||
46 | .Lloop_64: | ||
47 | decl %ecx | ||
48 | |||
49 | movq (%rsi),%r11 | ||
50 | movq 8(%rsi),%r8 | ||
51 | |||
52 | movq %r11,(%rdi) | ||
53 | movq %r8,1*8(%rdi) | ||
54 | |||
55 | movq 2*8(%rsi),%r9 | ||
56 | movq 3*8(%rsi),%r10 | ||
57 | |||
58 | movq %r9,2*8(%rdi) | ||
59 | movq %r10,3*8(%rdi) | ||
60 | |||
61 | movq 4*8(%rsi),%r11 | ||
62 | movq 5*8(%rsi),%r8 | ||
63 | |||
64 | movq %r11,4*8(%rdi) | ||
65 | movq %r8,5*8(%rdi) | ||
66 | |||
67 | movq 6*8(%rsi),%r9 | ||
68 | movq 7*8(%rsi),%r10 | ||
69 | |||
70 | movq %r9,6*8(%rdi) | ||
71 | movq %r10,7*8(%rdi) | ||
72 | |||
73 | leaq 64(%rsi),%rsi | ||
74 | leaq 64(%rdi),%rdi | ||
75 | jnz .Lloop_64 | ||
76 | |||
77 | .Lhandle_tail: | ||
78 | movl %edx,%ecx | ||
79 | andl $63,%ecx | ||
80 | shrl $3,%ecx | ||
81 | jz .Lhandle_7 | ||
82 | .p2align 4 | ||
83 | .Lloop_8: | ||
84 | decl %ecx | ||
85 | movq (%rsi),%r8 | ||
86 | movq %r8,(%rdi) | ||
87 | leaq 8(%rdi),%rdi | ||
88 | leaq 8(%rsi),%rsi | ||
89 | jnz .Lloop_8 | ||
90 | |||
91 | .Lhandle_7: | ||
92 | movl %edx,%ecx | ||
93 | andl $7,%ecx | ||
94 | jz .Lende | ||
95 | .p2align 4 | ||
96 | .Lloop_1: | ||
97 | movb (%rsi),%r8b | ||
98 | movb %r8b,(%rdi) | ||
99 | incq %rdi | ||
100 | incq %rsi | ||
101 | decl %ecx | ||
102 | jnz .Lloop_1 | ||
103 | |||
104 | .Lende: | ||
105 | popq %rbx | ||
106 | CFI_ADJUST_CFA_OFFSET -8 | ||
107 | CFI_RESTORE rbx | ||
108 | ret | ||
109 | .Lfinal: | ||
110 | CFI_ENDPROC | ||
111 | ENDPROC(memcpy) | ||
112 | ENDPROC(__memcpy) | ||
113 | |||
114 | /* Some CPUs run faster using the string copy instructions. | ||
115 | It is also a lot simpler. Use this when possible */ | ||
116 | |||
117 | .section .altinstr_replacement,"ax" | ||
118 | 1: .byte 0xeb /* jmp <disp8> */ | ||
119 | .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */ | ||
120 | 2: | ||
121 | .previous | ||
122 | .section .altinstructions,"a" | ||
123 | .align 8 | ||
124 | .quad memcpy | ||
125 | .quad 1b | ||
126 | .byte X86_FEATURE_REP_GOOD | ||
127 | /* Replace only beginning, memcpy is used to apply alternatives, so it | ||
128 | * is silly to overwrite itself with nops - reboot is only outcome... */ | ||
129 | .byte 2b - 1b | ||
130 | .byte 2b - 1b | ||
131 | .previous | ||