aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/lib
diff options
context:
space:
mode:
authorMa Ling <ling.ma@intel.com>2012-10-17 15:52:45 -0400
committerIngo Molnar <mingo@kernel.org>2012-10-24 06:42:47 -0400
commit269833bd5a0f4443873da358b71675a890b47c3c (patch)
tree40d476ea896ded8c0957f20c77e583673f8d1d0b /arch/x86/lib
parent0e9e3e306c7e472bdcffa34c4c4584301eda03b3 (diff)
x86/asm: Clean up copy_page_*() comments and code
Modern CPUs use fast-string instruction to accelerate copy performance, by combining data into 128 bit chunks. Modify comments and coding style to match it. Signed-off-by: Ma Ling <ling.ma@intel.com> Cc: iant@google.com Link: http://lkml.kernel.org/r/1350503565-19167-1-git-send-email-ling.ma@intel.com [ Cleaned up the clean up. ] Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/lib')
-rw-r--r--arch/x86/lib/copy_page_64.S120
1 files changed, 59 insertions, 61 deletions
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index 6b34d04d096a..176cca67212b 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -5,91 +5,89 @@
5#include <asm/alternative-asm.h> 5#include <asm/alternative-asm.h>
6 6
7 ALIGN 7 ALIGN
8copy_page_c: 8copy_page_rep:
9 CFI_STARTPROC 9 CFI_STARTPROC
10 movl $4096/8,%ecx 10 movl $4096/8, %ecx
11 rep movsq 11 rep movsq
12 ret 12 ret
13 CFI_ENDPROC 13 CFI_ENDPROC
14ENDPROC(copy_page_c) 14ENDPROC(copy_page_rep)
15 15
16/* Don't use streaming store because it's better when the target 16/*
17 ends up in cache. */ 17 * Don't use streaming copy unless the CPU indicates X86_FEATURE_REP_GOOD.
18 18 * Could vary the prefetch distance based on SMP/UP.
19/* Could vary the prefetch distance based on SMP/UP */ 19*/
20 20
21ENTRY(copy_page) 21ENTRY(copy_page)
22 CFI_STARTPROC 22 CFI_STARTPROC
23 subq $2*8,%rsp 23 subq $2*8, %rsp
24 CFI_ADJUST_CFA_OFFSET 2*8 24 CFI_ADJUST_CFA_OFFSET 2*8
25 movq %rbx,(%rsp) 25 movq %rbx, (%rsp)
26 CFI_REL_OFFSET rbx, 0 26 CFI_REL_OFFSET rbx, 0
27 movq %r12,1*8(%rsp) 27 movq %r12, 1*8(%rsp)
28 CFI_REL_OFFSET r12, 1*8 28 CFI_REL_OFFSET r12, 1*8
29 29
30 movl $(4096/64)-5,%ecx 30 movl $(4096/64)-5, %ecx
31 .p2align 4 31 .p2align 4
32.Loop64: 32.Loop64:
33 dec %rcx 33 dec %rcx
34 34 movq 0x8*0(%rsi), %rax
35 movq (%rsi), %rax 35 movq 0x8*1(%rsi), %rbx
36 movq 8 (%rsi), %rbx 36 movq 0x8*2(%rsi), %rdx
37 movq 16 (%rsi), %rdx 37 movq 0x8*3(%rsi), %r8
38 movq 24 (%rsi), %r8 38 movq 0x8*4(%rsi), %r9
39 movq 32 (%rsi), %r9 39 movq 0x8*5(%rsi), %r10
40 movq 40 (%rsi), %r10 40 movq 0x8*6(%rsi), %r11
41 movq 48 (%rsi), %r11 41 movq 0x8*7(%rsi), %r12
42 movq 56 (%rsi), %r12
43 42
44 prefetcht0 5*64(%rsi) 43 prefetcht0 5*64(%rsi)
45 44
46 movq %rax, (%rdi) 45 movq %rax, 0x8*0(%rdi)
47 movq %rbx, 8 (%rdi) 46 movq %rbx, 0x8*1(%rdi)
48 movq %rdx, 16 (%rdi) 47 movq %rdx, 0x8*2(%rdi)
49 movq %r8, 24 (%rdi) 48 movq %r8, 0x8*3(%rdi)
50 movq %r9, 32 (%rdi) 49 movq %r9, 0x8*4(%rdi)
51 movq %r10, 40 (%rdi) 50 movq %r10, 0x8*5(%rdi)
52 movq %r11, 48 (%rdi) 51 movq %r11, 0x8*6(%rdi)
53 movq %r12, 56 (%rdi) 52 movq %r12, 0x8*7(%rdi)
54 53
55 leaq 64 (%rsi), %rsi 54 leaq 64 (%rsi), %rsi
56 leaq 64 (%rdi), %rdi 55 leaq 64 (%rdi), %rdi
57 56
58 jnz .Loop64 57 jnz .Loop64
59 58
60 movl $5,%ecx 59 movl $5, %ecx
61 .p2align 4 60 .p2align 4
62.Loop2: 61.Loop2:
63 decl %ecx 62 decl %ecx
64 63
65 movq (%rsi), %rax 64 movq 0x8*0(%rsi), %rax
66 movq 8 (%rsi), %rbx 65 movq 0x8*1(%rsi), %rbx
67 movq 16 (%rsi), %rdx 66 movq 0x8*2(%rsi), %rdx
68 movq 24 (%rsi), %r8 67 movq 0x8*3(%rsi), %r8
69 movq 32 (%rsi), %r9 68 movq 0x8*4(%rsi), %r9
70 movq 40 (%rsi), %r10 69 movq 0x8*5(%rsi), %r10
71 movq 48 (%rsi), %r11 70 movq 0x8*6(%rsi), %r11
72 movq 56 (%rsi), %r12 71 movq 0x8*7(%rsi), %r12
73 72
74 movq %rax, (%rdi) 73 movq %rax, 0x8*0(%rdi)
75 movq %rbx, 8 (%rdi) 74 movq %rbx, 0x8*1(%rdi)
76 movq %rdx, 16 (%rdi) 75 movq %rdx, 0x8*2(%rdi)
77 movq %r8, 24 (%rdi) 76 movq %r8, 0x8*3(%rdi)
78 movq %r9, 32 (%rdi) 77 movq %r9, 0x8*4(%rdi)
79 movq %r10, 40 (%rdi) 78 movq %r10, 0x8*5(%rdi)
80 movq %r11, 48 (%rdi) 79 movq %r11, 0x8*6(%rdi)
81 movq %r12, 56 (%rdi) 80 movq %r12, 0x8*7(%rdi)
82 81
83 leaq 64(%rdi),%rdi 82 leaq 64(%rdi), %rdi
84 leaq 64(%rsi),%rsi 83 leaq 64(%rsi), %rsi
85
86 jnz .Loop2 84 jnz .Loop2
87 85
88 movq (%rsp),%rbx 86 movq (%rsp), %rbx
89 CFI_RESTORE rbx 87 CFI_RESTORE rbx
90 movq 1*8(%rsp),%r12 88 movq 1*8(%rsp), %r12
91 CFI_RESTORE r12 89 CFI_RESTORE r12
92 addq $2*8,%rsp 90 addq $2*8, %rsp
93 CFI_ADJUST_CFA_OFFSET -2*8 91 CFI_ADJUST_CFA_OFFSET -2*8
94 ret 92 ret
95.Lcopy_page_end: 93.Lcopy_page_end:
@@ -103,7 +101,7 @@ ENDPROC(copy_page)
103 101
104 .section .altinstr_replacement,"ax" 102 .section .altinstr_replacement,"ax"
1051: .byte 0xeb /* jmp <disp8> */ 1031: .byte 0xeb /* jmp <disp8> */
106 .byte (copy_page_c - copy_page) - (2f - 1b) /* offset */ 104 .byte (copy_page_rep - copy_page) - (2f - 1b) /* offset */
1072: 1052:
108 .previous 106 .previous
109 .section .altinstructions,"a" 107 .section .altinstructions,"a"