diff options
author | Fenghua Yu <fenghua.yu@intel.com> | 2011-05-17 18:29:14 -0400 |
---|---|---|
committer | H. Peter Anvin <hpa@linux.intel.com> | 2011-05-17 18:40:27 -0400 |
commit | e365c9df2f2f001450decf9512412d2d5bd1cdef (patch) | |
tree | 3670f466631b7ddeb710f54899020ee4a792edc4 /arch | |
parent | 9072d11da15a71e086eab3b5085184f2c1d06913 (diff) |
x86, mem: clear_page_64.S: Support clear_page() with enhanced REP MOVSB/STOSB
Intel processors are adding enhancements to REP MOVSB/STOSB and the use of
REP MOVSB/STOSB for optimal memcpy/memset or similar functions is recommended.
Enhancement availability is indicated by CPUID.7.0.EBX[9] (Enhanced REP MOVSB/
STOSB).
Support clear_page() with rep stosb for processor supporting enhanced REP MOVSB
/STOSB. On processors supporting enhanced REP MOVSB/STOSB, the alternative
clear_page_c_e function using enhanced REP STOSB overrides the original function
and the fast string function.
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Link: http://lkml.kernel.org/r/1305671358-14478-6-git-send-email-fenghua.yu@intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/lib/clear_page_64.S | 33 |
1 files changed, 24 insertions, 9 deletions
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S index aa4326bfb24a..f2145cfa12a6 100644 --- a/arch/x86/lib/clear_page_64.S +++ b/arch/x86/lib/clear_page_64.S | |||
@@ -1,5 +1,6 @@ | |||
1 | #include <linux/linkage.h> | 1 | #include <linux/linkage.h> |
2 | #include <asm/dwarf2.h> | 2 | #include <asm/dwarf2.h> |
3 | #include <asm/alternative-asm.h> | ||
3 | 4 | ||
4 | /* | 5 | /* |
5 | * Zero a page. | 6 | * Zero a page. |
@@ -14,6 +15,15 @@ ENTRY(clear_page_c) | |||
14 | CFI_ENDPROC | 15 | CFI_ENDPROC |
15 | ENDPROC(clear_page_c) | 16 | ENDPROC(clear_page_c) |
16 | 17 | ||
18 | ENTRY(clear_page_c_e) | ||
19 | CFI_STARTPROC | ||
20 | movl $4096,%ecx | ||
21 | xorl %eax,%eax | ||
22 | rep stosb | ||
23 | ret | ||
24 | CFI_ENDPROC | ||
25 | ENDPROC(clear_page_c_e) | ||
26 | |||
17 | ENTRY(clear_page) | 27 | ENTRY(clear_page) |
18 | CFI_STARTPROC | 28 | CFI_STARTPROC |
19 | xorl %eax,%eax | 29 | xorl %eax,%eax |
@@ -38,21 +48,26 @@ ENTRY(clear_page) | |||
38 | .Lclear_page_end: | 48 | .Lclear_page_end: |
39 | ENDPROC(clear_page) | 49 | ENDPROC(clear_page) |
40 | 50 | ||
41 | /* Some CPUs run faster using the string instructions. | 51 | /* |
42 | It is also a lot simpler. Use this when possible */ | 52 | * Some CPUs support enhanced REP MOVSB/STOSB instructions. |
53 | * It is recommended to use this when possible. | ||
54 | * If enhanced REP MOVSB/STOSB is not available, try to use fast string. | ||
55 | * Otherwise, use original function. | ||
56 | * | ||
57 | */ | ||
43 | 58 | ||
44 | #include <asm/cpufeature.h> | 59 | #include <asm/cpufeature.h> |
45 | 60 | ||
46 | .section .altinstr_replacement,"ax" | 61 | .section .altinstr_replacement,"ax" |
47 | 1: .byte 0xeb /* jmp <disp8> */ | 62 | 1: .byte 0xeb /* jmp <disp8> */ |
48 | .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */ | 63 | .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */ |
49 | 2: | 64 | 2: .byte 0xeb /* jmp <disp8> */ |
65 | .byte (clear_page_c_e - clear_page) - (3f - 2b) /* offset */ | ||
66 | 3: | ||
50 | .previous | 67 | .previous |
51 | .section .altinstructions,"a" | 68 | .section .altinstructions,"a" |
52 | .align 8 | 69 | altinstruction_entry clear_page,1b,X86_FEATURE_REP_GOOD,\ |
53 | .quad clear_page | 70 | .Lclear_page_end-clear_page, 2b-1b |
54 | .quad 1b | 71 | altinstruction_entry clear_page,2b,X86_FEATURE_ERMS, \ |
55 | .word X86_FEATURE_REP_GOOD | 72 | .Lclear_page_end-clear_page,3b-2b |
56 | .byte .Lclear_page_end - clear_page | ||
57 | .byte 2b - 1b | ||
58 | .previous | 73 | .previous |