diff options
Diffstat (limited to 'arch/x86_64/lib/memset.S')
-rw-r--r-- | arch/x86_64/lib/memset.S | 79 |
1 files changed, 44 insertions, 35 deletions
diff --git a/arch/x86_64/lib/memset.S b/arch/x86_64/lib/memset.S index ad397f2c7de8..09ed1f6b0eaa 100644 --- a/arch/x86_64/lib/memset.S +++ b/arch/x86_64/lib/memset.S | |||
@@ -1,4 +1,9 @@ | |||
1 | /* Copyright 2002 Andi Kleen, SuSE Labs */ | 1 | /* Copyright 2002 Andi Kleen, SuSE Labs */ |
2 | |||
3 | #include <linux/config.h> | ||
4 | #include <linux/linkage.h> | ||
5 | #include <asm/dwarf2.h> | ||
6 | |||
2 | /* | 7 | /* |
3 | * ISO C memset - set a memory block to a byte value. | 8 | * ISO C memset - set a memory block to a byte value. |
4 | * | 9 | * |
@@ -8,11 +13,29 @@ | |||
8 | * | 13 | * |
9 | * rax original destination | 14 | * rax original destination |
10 | */ | 15 | */ |
11 | .globl __memset | 16 | ALIGN |
12 | .globl memset | 17 | memset_c: |
13 | .p2align 4 | 18 | CFI_STARTPROC |
14 | memset: | 19 | movq %rdi,%r9 |
15 | __memset: | 20 | movl %edx,%r8d |
21 | andl $7,%r8d | ||
22 | movl %edx,%ecx | ||
23 | shrl $3,%ecx | ||
24 | /* expand byte value */ | ||
25 | movzbl %sil,%esi | ||
26 | movabs $0x0101010101010101,%rax | ||
27 | mulq %rsi /* with rax, clobbers rdx */ | ||
28 | rep stosq | ||
29 | movl %r8d,%ecx | ||
30 | rep stosb | ||
31 | movq %r9,%rax | ||
32 | ret | ||
33 | CFI_ENDPROC | ||
34 | ENDPROC(memset_c) | ||
35 | |||
36 | ENTRY(memset) | ||
37 | ENTRY(__memset) | ||
38 | CFI_STARTPROC | ||
16 | movq %rdi,%r10 | 39 | movq %rdi,%r10 |
17 | movq %rdx,%r11 | 40 | movq %rdx,%r11 |
18 | 41 | ||
@@ -25,6 +48,7 @@ __memset: | |||
25 | movl %edi,%r9d | 48 | movl %edi,%r9d |
26 | andl $7,%r9d | 49 | andl $7,%r9d |
27 | jnz .Lbad_alignment | 50 | jnz .Lbad_alignment |
51 | CFI_REMEMBER_STATE | ||
28 | .Lafter_bad_alignment: | 52 | .Lafter_bad_alignment: |
29 | 53 | ||
30 | movl %r11d,%ecx | 54 | movl %r11d,%ecx |
@@ -75,6 +99,7 @@ __memset: | |||
75 | movq %r10,%rax | 99 | movq %r10,%rax |
76 | ret | 100 | ret |
77 | 101 | ||
102 | CFI_RESTORE_STATE | ||
78 | .Lbad_alignment: | 103 | .Lbad_alignment: |
79 | cmpq $7,%r11 | 104 | cmpq $7,%r11 |
80 | jbe .Lhandle_7 | 105 | jbe .Lhandle_7 |
@@ -84,42 +109,26 @@ __memset: | |||
84 | addq %r8,%rdi | 109 | addq %r8,%rdi |
85 | subq %r8,%r11 | 110 | subq %r8,%r11 |
86 | jmp .Lafter_bad_alignment | 111 | jmp .Lafter_bad_alignment |
112 | .Lfinal: | ||
113 | CFI_ENDPROC | ||
114 | ENDPROC(memset) | ||
115 | ENDPROC(__memset) | ||
87 | 116 | ||
88 | /* Some CPUs run faster using the string instructions. | 117 | /* Some CPUs run faster using the string instructions. |
89 | It is also a lot simpler. Use this when possible */ | 118 | It is also a lot simpler. Use this when possible */ |
90 | 119 | ||
91 | #include <asm/cpufeature.h> | 120 | #include <asm/cpufeature.h> |
92 | 121 | ||
122 | .section .altinstr_replacement,"ax" | ||
123 | 1: .byte 0xeb /* jmp <disp8> */ | ||
124 | .byte (memset_c - memset) - (2f - 1b) /* offset */ | ||
125 | 2: | ||
126 | .previous | ||
93 | .section .altinstructions,"a" | 127 | .section .altinstructions,"a" |
94 | .align 8 | 128 | .align 8 |
95 | .quad memset | 129 | .quad memset |
96 | .quad memset_c | 130 | .quad 1b |
97 | .byte X86_FEATURE_REP_GOOD | 131 | .byte X86_FEATURE_REP_GOOD |
98 | .byte memset_c_end-memset_c | 132 | .byte .Lfinal - memset |
99 | .byte memset_c_end-memset_c | 133 | .byte 2b - 1b |
100 | .previous | ||
101 | |||
102 | .section .altinstr_replacement,"ax" | ||
103 | /* rdi destination | ||
104 | * rsi value | ||
105 | * rdx count | ||
106 | */ | ||
107 | memset_c: | ||
108 | movq %rdi,%r9 | ||
109 | movl %edx,%r8d | ||
110 | andl $7,%r8d | ||
111 | movl %edx,%ecx | ||
112 | shrl $3,%ecx | ||
113 | /* expand byte value */ | ||
114 | movzbl %sil,%esi | ||
115 | movabs $0x0101010101010101,%rax | ||
116 | mulq %rsi /* with rax, clobbers rdx */ | ||
117 | rep | ||
118 | stosq | ||
119 | movl %r8d,%ecx | ||
120 | rep | ||
121 | stosb | ||
122 | movq %r9,%rax | ||
123 | ret | ||
124 | memset_c_end: | ||
125 | .previous | 134 | .previous |