aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/lib
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-03-12 07:20:17 -0400
committerIngo Molnar <mingo@elte.hu>2009-03-12 07:21:17 -0400
commitf3b6eaf0149186ad0637512ec363582c91e06ee6 (patch)
tree68e72baf189e9f87ab0a48961918243304ab878c /arch/x86/lib
parentdd1ef4ec4721ddc0a1f2b73a4f67930cb320665c (diff)
x86: memcpy, clean up
Impact: cleanup Make this file more readable by bringing it more in line with the usual kernel style. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/lib')
-rw-r--r--arch/x86/lib/memcpy_64.S136
1 files changed, 81 insertions, 55 deletions
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 10c067694af4..ad5441ed1b57 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -1,30 +1,38 @@
1/* Copyright 2002 Andi Kleen */ 1/* Copyright 2002 Andi Kleen */
2 2
3#include <linux/linkage.h> 3#include <linux/linkage.h>
4#include <asm/dwarf2.h> 4
5#include <asm/cpufeature.h> 5#include <asm/cpufeature.h>
6#include <asm/dwarf2.h>
6 7
7/* 8/*
8 * memcpy - Copy a memory block. 9 * memcpy - Copy a memory block.
9 * 10 *
10 * Input: 11 * Input:
11 * rdi destination 12 * rdi destination
12 * rsi source 13 * rsi source
13 * rdx count 14 * rdx count
14 * 15 *
15 * Output: 16 * Output:
16 * rax original destination 17 * rax original destination
17 */ 18 */
18 19
20/*
21 * memcpy_c() - fast string ops (REP MOVSQ) based variant.
22 *
23 * Calls to this get patched into the kernel image via the
24 * alternative instructions framework:
25 */
19 ALIGN 26 ALIGN
20memcpy_c: 27memcpy_c:
21 CFI_STARTPROC 28 CFI_STARTPROC
22 movq %rdi,%rax 29 movq %rdi, %rax
23 movl %edx,%ecx 30
24 shrl $3,%ecx 31 movl %edx, %ecx
25 andl $7,%edx 32 shrl $3, %ecx
33 andl $7, %edx
26 rep movsq 34 rep movsq
27 movl %edx,%ecx 35 movl %edx, %ecx
28 rep movsb 36 rep movsb
29 ret 37 ret
30 CFI_ENDPROC 38 CFI_ENDPROC
@@ -33,92 +41,110 @@ ENDPROC(memcpy_c)
33ENTRY(__memcpy) 41ENTRY(__memcpy)
34ENTRY(memcpy) 42ENTRY(memcpy)
35 CFI_STARTPROC 43 CFI_STARTPROC
36 movq %rdi,%rax
37 44
38 movl %edx,%ecx 45 /*
39 shrl $6,%ecx 46 * Put the number of full 64-byte blocks into %ecx.
47 * Tail portion is handled at the end:
48 */
49 movq %rdi, %rax
50 movl %edx, %ecx
51 shrl $6, %ecx
40 jz .Lhandle_tail 52 jz .Lhandle_tail
41 53
42 .p2align 4 54 .p2align 4
43.Lloop_64: 55.Lloop_64:
56 /*
57 * We decrement the loop index here - and the zero-flag is
58 * checked at the end of the loop (instructions inbetween do
59 * not change the zero flag):
60 */
44 decl %ecx 61 decl %ecx
45 62
46 movq (%rsi),%r11 63 /*
47 movq 8(%rsi),%r8 64 * Move in blocks of 4x16 bytes:
65 */
66 movq 0*8(%rsi), %r11
67 movq 1*8(%rsi), %r8
68 movq %r11, 0*8(%rdi)
69 movq %r8, 1*8(%rdi)
48 70
49 movq %r11,(%rdi) 71 movq 2*8(%rsi), %r9
50 movq %r8,1*8(%rdi) 72 movq 3*8(%rsi), %r10
73 movq %r9, 2*8(%rdi)
74 movq %r10, 3*8(%rdi)
51 75
52 movq 2*8(%rsi),%r9 76 movq 4*8(%rsi), %r11
53 movq 3*8(%rsi),%r10 77 movq 5*8(%rsi), %r8
78 movq %r11, 4*8(%rdi)
79 movq %r8, 5*8(%rdi)
54 80
55 movq %r9,2*8(%rdi) 81 movq 6*8(%rsi), %r9
56 movq %r10,3*8(%rdi) 82 movq 7*8(%rsi), %r10
83 movq %r9, 6*8(%rdi)
84 movq %r10, 7*8(%rdi)
57 85
58 movq 4*8(%rsi),%r11 86 leaq 64(%rsi), %rsi
59 movq 5*8(%rsi),%r8 87 leaq 64(%rdi), %rdi
60 88
61 movq %r11,4*8(%rdi)
62 movq %r8,5*8(%rdi)
63
64 movq 6*8(%rsi),%r9
65 movq 7*8(%rsi),%r10
66
67 movq %r9,6*8(%rdi)
68 movq %r10,7*8(%rdi)
69
70 leaq 64(%rsi),%rsi
71 leaq 64(%rdi),%rdi
72 jnz .Lloop_64 89 jnz .Lloop_64
73 90
74.Lhandle_tail: 91.Lhandle_tail:
75 movl %edx,%ecx 92 movl %edx, %ecx
76 andl $63,%ecx 93 andl $63, %ecx
77 shrl $3,%ecx 94 shrl $3, %ecx
78 jz .Lhandle_7 95 jz .Lhandle_7
96
79 .p2align 4 97 .p2align 4
80.Lloop_8: 98.Lloop_8:
81 decl %ecx 99 decl %ecx
82 movq (%rsi),%r8 100 movq (%rsi), %r8
83 movq %r8,(%rdi) 101 movq %r8, (%rdi)
84 leaq 8(%rdi),%rdi 102 leaq 8(%rdi), %rdi
85 leaq 8(%rsi),%rsi 103 leaq 8(%rsi), %rsi
86 jnz .Lloop_8 104 jnz .Lloop_8
87 105
88.Lhandle_7: 106.Lhandle_7:
89 movl %edx,%ecx 107 movl %edx, %ecx
90 andl $7,%ecx 108 andl $7, %ecx
91 jz .Lende 109 jz .Lend
110
92 .p2align 4 111 .p2align 4
93.Lloop_1: 112.Lloop_1:
94 movb (%rsi),%r8b 113 movb (%rsi), %r8b
95 movb %r8b,(%rdi) 114 movb %r8b, (%rdi)
96 incq %rdi 115 incq %rdi
97 incq %rsi 116 incq %rsi
98 decl %ecx 117 decl %ecx
99 jnz .Lloop_1 118 jnz .Lloop_1
100 119
101.Lende: 120.Lend:
102 ret 121 ret
103 CFI_ENDPROC 122 CFI_ENDPROC
104ENDPROC(memcpy) 123ENDPROC(memcpy)
105ENDPROC(__memcpy) 124ENDPROC(__memcpy)
106 125
107 /* Some CPUs run faster using the string copy instructions. 126 /*
108 It is also a lot simpler. Use this when possible */ 127 * Some CPUs run faster using the string copy instructions.
128 * It is also a lot simpler. Use this when possible:
129 */
109 130
110 .section .altinstr_replacement,"ax" 131 .section .altinstr_replacement, "ax"
1111: .byte 0xeb /* jmp <disp8> */ 1321: .byte 0xeb /* jmp <disp8> */
112 .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */ 133 .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */
1132: 1342:
114 .previous 135 .previous
115 .section .altinstructions,"a" 136
137 .section .altinstructions, "a"
116 .align 8 138 .align 8
117 .quad memcpy 139 .quad memcpy
118 .quad 1b 140 .quad 1b
119 .byte X86_FEATURE_REP_GOOD 141 .byte X86_FEATURE_REP_GOOD
120 /* Replace only beginning, memcpy is used to apply alternatives, so it 142
121 * is silly to overwrite itself with nops - reboot is only outcome... */ 143 /*
144 * Replace only beginning, memcpy is used to apply alternatives,
145 * so it is silly to overwrite itself with nops - reboot is the
146 * only outcome...
147 */
122 .byte 2b - 1b 148 .byte 2b - 1b
123 .byte 2b - 1b 149 .byte 2b - 1b
124 .previous 150 .previous