aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c1
-rw-r--r--arch/x86/lib/memmove_64.S197
-rw-r--r--arch/x86/lib/memmove_64.c192
3 files changed, 198 insertions, 192 deletions
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 1b950d151e58..9796c2f3d074 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -52,6 +52,7 @@ extern void *__memcpy(void *, const void *, __kernel_size_t);
52EXPORT_SYMBOL(memset); 52EXPORT_SYMBOL(memset);
53EXPORT_SYMBOL(memcpy); 53EXPORT_SYMBOL(memcpy);
54EXPORT_SYMBOL(__memcpy); 54EXPORT_SYMBOL(__memcpy);
55EXPORT_SYMBOL(memmove);
55 56
56EXPORT_SYMBOL(empty_zero_page); 57EXPORT_SYMBOL(empty_zero_page);
57#ifndef CONFIG_PARAVIRT 58#ifndef CONFIG_PARAVIRT
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
new file mode 100644
index 000000000000..0ecb8433e5a8
--- /dev/null
+++ b/arch/x86/lib/memmove_64.S
@@ -0,0 +1,197 @@
1/*
2 * Normally compiler builtins are used, but sometimes the compiler calls out
3 * of line code. Based on asm-i386/string.h.
4 *
5 * This assembly file is re-written from memmove_64.c file.
6 * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
7 */
8#define _STRING_C
9#include <linux/linkage.h>
10#include <asm/dwarf2.h>
11
12#undef memmove
13
14/*
15 * Implement memmove(). This can handle overlap between src and dst.
16 *
17 * Input:
18 * rdi: dest
19 * rsi: src
20 * rdx: count
21 *
22 * Output:
23 * rax: dest
24 */
25ENTRY(memmove)
26 CFI_STARTPROC
27 /* Handle more 32bytes in loop */
28 mov %rdi, %rax
29 cmp $0x20, %rdx
30 jb 1f
31
32 /* Decide forward/backward copy mode */
33 cmp %rdi, %rsi
34 jb 2f
35
36 /*
37 * movsq instruction have many startup latency
38 * so we handle small size by general register.
39 */
40 cmp $680, %rdx
41 jb 3f
42 /*
43 * movsq instruction is only good for aligned case.
44 */
45
46 cmpb %dil, %sil
47 je 4f
483:
49 sub $0x20, %rdx
50 /*
51 * We gobble 32byts forward in each loop.
52 */
535:
54 sub $0x20, %rdx
55 movq 0*8(%rsi), %r11
56 movq 1*8(%rsi), %r10
57 movq 2*8(%rsi), %r9
58 movq 3*8(%rsi), %r8
59 leaq 4*8(%rsi), %rsi
60
61 movq %r11, 0*8(%rdi)
62 movq %r10, 1*8(%rdi)
63 movq %r9, 2*8(%rdi)
64 movq %r8, 3*8(%rdi)
65 leaq 4*8(%rdi), %rdi
66 jae 5b
67 addq $0x20, %rdx
68 jmp 1f
69 /*
70 * Handle data forward by movsq.
71 */
72 .p2align 4
734:
74 movq %rdx, %rcx
75 movq -8(%rsi, %rdx), %r11
76 lea -8(%rdi, %rdx), %r10
77 shrq $3, %rcx
78 rep movsq
79 movq %r11, (%r10)
80 jmp 13f
81 /*
82 * Handle data backward by movsq.
83 */
84 .p2align 4
857:
86 movq %rdx, %rcx
87 movq (%rsi), %r11
88 movq %rdi, %r10
89 leaq -8(%rsi, %rdx), %rsi
90 leaq -8(%rdi, %rdx), %rdi
91 shrq $3, %rcx
92 std
93 rep movsq
94 cld
95 movq %r11, (%r10)
96 jmp 13f
97
98 /*
99 * Start to prepare for backward copy.
100 */
101 .p2align 4
1022:
103 cmp $680, %rdx
104 jb 6f
105 cmp %dil, %sil
106 je 7b
1076:
108 /*
109 * Calculate copy position to tail.
110 */
111 addq %rdx, %rsi
112 addq %rdx, %rdi
113 subq $0x20, %rdx
114 /*
115 * We gobble 32byts backward in each loop.
116 */
1178:
118 subq $0x20, %rdx
119 movq -1*8(%rsi), %r11
120 movq -2*8(%rsi), %r10
121 movq -3*8(%rsi), %r9
122 movq -4*8(%rsi), %r8
123 leaq -4*8(%rsi), %rsi
124
125 movq %r11, -1*8(%rdi)
126 movq %r10, -2*8(%rdi)
127 movq %r9, -3*8(%rdi)
128 movq %r8, -4*8(%rdi)
129 leaq -4*8(%rdi), %rdi
130 jae 8b
131 /*
132 * Calculate copy position to head.
133 */
134 addq $0x20, %rdx
135 subq %rdx, %rsi
136 subq %rdx, %rdi
1371:
138 cmpq $16, %rdx
139 jb 9f
140 /*
141 * Move data from 16 bytes to 31 bytes.
142 */
143 movq 0*8(%rsi), %r11
144 movq 1*8(%rsi), %r10
145 movq -2*8(%rsi, %rdx), %r9
146 movq -1*8(%rsi, %rdx), %r8
147 movq %r11, 0*8(%rdi)
148 movq %r10, 1*8(%rdi)
149 movq %r9, -2*8(%rdi, %rdx)
150 movq %r8, -1*8(%rdi, %rdx)
151 jmp 13f
152 .p2align 4
1539:
154 cmpq $8, %rdx
155 jb 10f
156 /*
157 * Move data from 8 bytes to 15 bytes.
158 */
159 movq 0*8(%rsi), %r11
160 movq -1*8(%rsi, %rdx), %r10
161 movq %r11, 0*8(%rdi)
162 movq %r10, -1*8(%rdi, %rdx)
163 jmp 13f
16410:
165 cmpq $4, %rdx
166 jb 11f
167 /*
168 * Move data from 4 bytes to 7 bytes.
169 */
170 movl (%rsi), %r11d
171 movl -4(%rsi, %rdx), %r10d
172 movl %r11d, (%rdi)
173 movl %r10d, -4(%rdi, %rdx)
174 jmp 13f
17511:
176 cmp $2, %rdx
177 jb 12f
178 /*
179 * Move data from 2 bytes to 3 bytes.
180 */
181 movw (%rsi), %r11w
182 movw -2(%rsi, %rdx), %r10w
183 movw %r11w, (%rdi)
184 movw %r10w, -2(%rdi, %rdx)
185 jmp 13f
18612:
187 cmp $1, %rdx
188 jb 13f
189 /*
190 * Move data for 1 byte.
191 */
192 movb (%rsi), %r11b
193 movb %r11b, (%rdi)
19413:
195 retq
196 CFI_ENDPROC
197ENDPROC(memmove)
diff --git a/arch/x86/lib/memmove_64.c b/arch/x86/lib/memmove_64.c
deleted file mode 100644
index 6d0f0ec41b34..000000000000
--- a/arch/x86/lib/memmove_64.c
+++ /dev/null
@@ -1,192 +0,0 @@
1/* Normally compiler builtins are used, but sometimes the compiler calls out
2 of line code. Based on asm-i386/string.h.
3 */
4#define _STRING_C
5#include <linux/string.h>
6#include <linux/module.h>
7
8#undef memmove
9void *memmove(void *dest, const void *src, size_t count)
10{
11 unsigned long d0,d1,d2,d3,d4,d5,d6,d7;
12 char *ret;
13
14 __asm__ __volatile__(
15 /* Handle more 32bytes in loop */
16 "mov %2, %3\n\t"
17 "cmp $0x20, %0\n\t"
18 "jb 1f\n\t"
19
20 /* Decide forward/backward copy mode */
21 "cmp %2, %1\n\t"
22 "jb 2f\n\t"
23
24 /*
25 * movsq instruction have many startup latency
26 * so we handle small size by general register.
27 */
28 "cmp $680, %0\n\t"
29 "jb 3f\n\t"
30 /*
31 * movsq instruction is only good for aligned case.
32 */
33 "cmpb %%dil, %%sil\n\t"
34 "je 4f\n\t"
35 "3:\n\t"
36 "sub $0x20, %0\n\t"
37 /*
38 * We gobble 32byts forward in each loop.
39 */
40 "5:\n\t"
41 "sub $0x20, %0\n\t"
42 "movq 0*8(%1), %4\n\t"
43 "movq 1*8(%1), %5\n\t"
44 "movq 2*8(%1), %6\n\t"
45 "movq 3*8(%1), %7\n\t"
46 "leaq 4*8(%1), %1\n\t"
47
48 "movq %4, 0*8(%2)\n\t"
49 "movq %5, 1*8(%2)\n\t"
50 "movq %6, 2*8(%2)\n\t"
51 "movq %7, 3*8(%2)\n\t"
52 "leaq 4*8(%2), %2\n\t"
53 "jae 5b\n\t"
54 "addq $0x20, %0\n\t"
55 "jmp 1f\n\t"
56 /*
57 * Handle data forward by movsq.
58 */
59 ".p2align 4\n\t"
60 "4:\n\t"
61 "movq %0, %8\n\t"
62 "movq -8(%1, %0), %4\n\t"
63 "lea -8(%2, %0), %5\n\t"
64 "shrq $3, %8\n\t"
65 "rep movsq\n\t"
66 "movq %4, (%5)\n\t"
67 "jmp 13f\n\t"
68 /*
69 * Handle data backward by movsq.
70 */
71 ".p2align 4\n\t"
72 "7:\n\t"
73 "movq %0, %8\n\t"
74 "movq (%1), %4\n\t"
75 "movq %2, %5\n\t"
76 "leaq -8(%1, %0), %1\n\t"
77 "leaq -8(%2, %0), %2\n\t"
78 "shrq $3, %8\n\t"
79 "std\n\t"
80 "rep movsq\n\t"
81 "cld\n\t"
82 "movq %4, (%5)\n\t"
83 "jmp 13f\n\t"
84
85 /*
86 * Start to prepare for backward copy.
87 */
88 ".p2align 4\n\t"
89 "2:\n\t"
90 "cmp $680, %0\n\t"
91 "jb 6f \n\t"
92 "cmp %%dil, %%sil\n\t"
93 "je 7b \n\t"
94 "6:\n\t"
95 /*
96 * Calculate copy position to tail.
97 */
98 "addq %0, %1\n\t"
99 "addq %0, %2\n\t"
100 "subq $0x20, %0\n\t"
101 /*
102 * We gobble 32byts backward in each loop.
103 */
104 "8:\n\t"
105 "subq $0x20, %0\n\t"
106 "movq -1*8(%1), %4\n\t"
107 "movq -2*8(%1), %5\n\t"
108 "movq -3*8(%1), %6\n\t"
109 "movq -4*8(%1), %7\n\t"
110 "leaq -4*8(%1), %1\n\t"
111
112 "movq %4, -1*8(%2)\n\t"
113 "movq %5, -2*8(%2)\n\t"
114 "movq %6, -3*8(%2)\n\t"
115 "movq %7, -4*8(%2)\n\t"
116 "leaq -4*8(%2), %2\n\t"
117 "jae 8b\n\t"
118 /*
119 * Calculate copy position to head.
120 */
121 "addq $0x20, %0\n\t"
122 "subq %0, %1\n\t"
123 "subq %0, %2\n\t"
124 "1:\n\t"
125 "cmpq $16, %0\n\t"
126 "jb 9f\n\t"
127 /*
128 * Move data from 16 bytes to 31 bytes.
129 */
130 "movq 0*8(%1), %4\n\t"
131 "movq 1*8(%1), %5\n\t"
132 "movq -2*8(%1, %0), %6\n\t"
133 "movq -1*8(%1, %0), %7\n\t"
134 "movq %4, 0*8(%2)\n\t"
135 "movq %5, 1*8(%2)\n\t"
136 "movq %6, -2*8(%2, %0)\n\t"
137 "movq %7, -1*8(%2, %0)\n\t"
138 "jmp 13f\n\t"
139 ".p2align 4\n\t"
140 "9:\n\t"
141 "cmpq $8, %0\n\t"
142 "jb 10f\n\t"
143 /*
144 * Move data from 8 bytes to 15 bytes.
145 */
146 "movq 0*8(%1), %4\n\t"
147 "movq -1*8(%1, %0), %5\n\t"
148 "movq %4, 0*8(%2)\n\t"
149 "movq %5, -1*8(%2, %0)\n\t"
150 "jmp 13f\n\t"
151 "10:\n\t"
152 "cmpq $4, %0\n\t"
153 "jb 11f\n\t"
154 /*
155 * Move data from 4 bytes to 7 bytes.
156 */
157 "movl (%1), %4d\n\t"
158 "movl -4(%1, %0), %5d\n\t"
159 "movl %4d, (%2)\n\t"
160 "movl %5d, -4(%2, %0)\n\t"
161 "jmp 13f\n\t"
162 "11:\n\t"
163 "cmp $2, %0\n\t"
164 "jb 12f\n\t"
165 /*
166 * Move data from 2 bytes to 3 bytes.
167 */
168 "movw (%1), %4w\n\t"
169 "movw -2(%1, %0), %5w\n\t"
170 "movw %4w, (%2)\n\t"
171 "movw %5w, -2(%2, %0)\n\t"
172 "jmp 13f\n\t"
173 "12:\n\t"
174 "cmp $1, %0\n\t"
175 "jb 13f\n\t"
176 /*
177 * Move data for 1 byte.
178 */
179 "movb (%1), %4b\n\t"
180 "movb %4b, (%2)\n\t"
181 "13:\n\t"
182 : "=&d" (d0), "=&S" (d1), "=&D" (d2), "=&a" (ret) ,
183 "=r"(d3), "=r"(d4), "=r"(d5), "=r"(d6), "=&c" (d7)
184 :"0" (count),
185 "1" (src),
186 "2" (dest)
187 :"memory");
188
189 return ret;
190
191}
192EXPORT_SYMBOL(memmove);