aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/asm/alternative-asm.h43
-rw-r--r--arch/x86/include/asm/alternative.h65
-rw-r--r--arch/x86/include/asm/apic.h2
-rw-r--r--arch/x86/include/asm/barrier.h6
-rw-r--r--arch/x86/include/asm/cpufeature.h30
-rw-r--r--arch/x86/include/asm/processor.h16
-rw-r--r--arch/x86/include/asm/smap.h30
-rw-r--r--arch/x86/kernel/alternative.c158
-rw-r--r--arch/x86/kernel/cpu/amd.c5
-rw-r--r--arch/x86/kernel/entry_32.S12
-rw-r--r--arch/x86/lib/clear_page_64.S66
-rw-r--r--arch/x86/lib/copy_page_64.S37
-rw-r--r--arch/x86/lib/copy_user_64.S46
-rw-r--r--arch/x86/lib/memcpy_64.S68
-rw-r--r--arch/x86/lib/memmove_64.S19
-rw-r--r--arch/x86/lib/memset_64.S61
-rw-r--r--arch/x86/um/asm/barrier.h4
-rw-r--r--tools/perf/bench/mem-memcpy-x86-64-asm-def.h6
-rw-r--r--tools/perf/bench/mem-memcpy-x86-64-asm.S2
-rw-r--r--tools/perf/bench/mem-memcpy.c128
-rw-r--r--tools/perf/bench/mem-memset-x86-64-asm-def.h6
-rw-r--r--tools/perf/bench/mem-memset-x86-64-asm.S2
-rw-r--r--tools/perf/util/include/asm/alternative-asm.h1
23 files changed, 433 insertions, 380 deletions
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index 372231c22a47..524bddce0b76 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -18,12 +18,53 @@
18 .endm 18 .endm
19#endif 19#endif
20 20
21.macro altinstruction_entry orig alt feature orig_len alt_len 21.macro altinstruction_entry orig alt feature orig_len alt_len pad_len
22 .long \orig - . 22 .long \orig - .
23 .long \alt - . 23 .long \alt - .
24 .word \feature 24 .word \feature
25 .byte \orig_len 25 .byte \orig_len
26 .byte \alt_len 26 .byte \alt_len
27 .byte \pad_len
28.endm
29
30.macro ALTERNATIVE oldinstr, newinstr, feature
31140:
32 \oldinstr
33141:
34 .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
35142:
36
37 .pushsection .altinstructions,"a"
38 altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b
39 .popsection
40
41 .pushsection .altinstr_replacement,"ax"
42143:
43 \newinstr
44144:
45 .popsection
46.endm
47
48.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
49140:
50 \oldinstr
51141:
52 .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
53 .skip -(((145f-144f)-(144f-143f)-(141b-140b)) > 0) * ((145f-144f)-(144f-143f)-(141b-140b)),0x90
54142:
55
56 .pushsection .altinstructions,"a"
57 altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b
58 altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b
59 .popsection
60
61 .pushsection .altinstr_replacement,"ax"
62143:
63 \newinstr1
64144:
65 \newinstr2
66145:
67 .popsection
27.endm 68.endm
28 69
29#endif /* __ASSEMBLY__ */ 70#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 473bdbee378a..5aef6a97d80e 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -48,8 +48,9 @@ struct alt_instr {
48 s32 repl_offset; /* offset to replacement instruction */ 48 s32 repl_offset; /* offset to replacement instruction */
49 u16 cpuid; /* cpuid bit set for replacement */ 49 u16 cpuid; /* cpuid bit set for replacement */
50 u8 instrlen; /* length of original instruction */ 50 u8 instrlen; /* length of original instruction */
51 u8 replacementlen; /* length of new instruction, <= instrlen */ 51 u8 replacementlen; /* length of new instruction */
52}; 52 u8 padlen; /* length of build-time padding */
53} __packed;
53 54
54extern void alternative_instructions(void); 55extern void alternative_instructions(void);
55extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); 56extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
@@ -76,50 +77,61 @@ static inline int alternatives_text_reserved(void *start, void *end)
76} 77}
77#endif /* CONFIG_SMP */ 78#endif /* CONFIG_SMP */
78 79
79#define OLDINSTR(oldinstr) "661:\n\t" oldinstr "\n662:\n" 80#define b_replacement(num) "664"#num
81#define e_replacement(num) "665"#num
80 82
81#define b_replacement(number) "663"#number 83#define alt_end_marker "663"
82#define e_replacement(number) "664"#number 84#define alt_slen "662b-661b"
85#define alt_pad_len alt_end_marker"b-662b"
86#define alt_total_slen alt_end_marker"b-661b"
87#define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f"
83 88
84#define alt_slen "662b-661b" 89#define __OLDINSTR(oldinstr, num) \
85#define alt_rlen(number) e_replacement(number)"f-"b_replacement(number)"f" 90 "661:\n\t" oldinstr "\n662:\n" \
91 ".skip -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * " \
92 "((" alt_rlen(num) ")-(" alt_slen ")),0x90\n"
86 93
87#define ALTINSTR_ENTRY(feature, number) \ 94#define OLDINSTR(oldinstr, num) \
95 __OLDINSTR(oldinstr, num) \
96 alt_end_marker ":\n"
97
98/*
99 * Pad the second replacement alternative with additional NOPs if it is
100 * additionally longer than the first replacement alternative.
101 */
102#define OLDINSTR_2(oldinstr, num1, num2) \
103 __OLDINSTR(oldinstr, num1) \
104 ".skip -(((" alt_rlen(num2) ")-(" alt_rlen(num1) ")-(662b-661b)) > 0) * " \
105 "((" alt_rlen(num2) ")-(" alt_rlen(num1) ")-(662b-661b)),0x90\n" \
106 alt_end_marker ":\n"
107
108#define ALTINSTR_ENTRY(feature, num) \
88 " .long 661b - .\n" /* label */ \ 109 " .long 661b - .\n" /* label */ \
89 " .long " b_replacement(number)"f - .\n" /* new instruction */ \ 110 " .long " b_replacement(num)"f - .\n" /* new instruction */ \
90 " .word " __stringify(feature) "\n" /* feature bit */ \ 111 " .word " __stringify(feature) "\n" /* feature bit */ \
91 " .byte " alt_slen "\n" /* source len */ \ 112 " .byte " alt_total_slen "\n" /* source len */ \
92 " .byte " alt_rlen(number) "\n" /* replacement len */ 113 " .byte " alt_rlen(num) "\n" /* replacement len */ \
114 " .byte " alt_pad_len "\n" /* pad len */
93 115
94#define DISCARD_ENTRY(number) /* rlen <= slen */ \ 116#define ALTINSTR_REPLACEMENT(newinstr, feature, num) /* replacement */ \
95 " .byte 0xff + (" alt_rlen(number) ") - (" alt_slen ")\n" 117 b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n\t"
96
97#define ALTINSTR_REPLACEMENT(newinstr, feature, number) /* replacement */ \
98 b_replacement(number)":\n\t" newinstr "\n" e_replacement(number) ":\n\t"
99 118
100/* alternative assembly primitive: */ 119/* alternative assembly primitive: */
101#define ALTERNATIVE(oldinstr, newinstr, feature) \ 120#define ALTERNATIVE(oldinstr, newinstr, feature) \
102 OLDINSTR(oldinstr) \ 121 OLDINSTR(oldinstr, 1) \
103 ".pushsection .altinstructions,\"a\"\n" \ 122 ".pushsection .altinstructions,\"a\"\n" \
104 ALTINSTR_ENTRY(feature, 1) \ 123 ALTINSTR_ENTRY(feature, 1) \
105 ".popsection\n" \ 124 ".popsection\n" \
106 ".pushsection .discard,\"aw\",@progbits\n" \
107 DISCARD_ENTRY(1) \
108 ".popsection\n" \
109 ".pushsection .altinstr_replacement, \"ax\"\n" \ 125 ".pushsection .altinstr_replacement, \"ax\"\n" \
110 ALTINSTR_REPLACEMENT(newinstr, feature, 1) \ 126 ALTINSTR_REPLACEMENT(newinstr, feature, 1) \
111 ".popsection" 127 ".popsection"
112 128
113#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ 129#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
114 OLDINSTR(oldinstr) \ 130 OLDINSTR_2(oldinstr, 1, 2) \
115 ".pushsection .altinstructions,\"a\"\n" \ 131 ".pushsection .altinstructions,\"a\"\n" \
116 ALTINSTR_ENTRY(feature1, 1) \ 132 ALTINSTR_ENTRY(feature1, 1) \
117 ALTINSTR_ENTRY(feature2, 2) \ 133 ALTINSTR_ENTRY(feature2, 2) \
118 ".popsection\n" \ 134 ".popsection\n" \
119 ".pushsection .discard,\"aw\",@progbits\n" \
120 DISCARD_ENTRY(1) \
121 DISCARD_ENTRY(2) \
122 ".popsection\n" \
123 ".pushsection .altinstr_replacement, \"ax\"\n" \ 135 ".pushsection .altinstr_replacement, \"ax\"\n" \
124 ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ 136 ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \
125 ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ 137 ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \
@@ -146,6 +158,9 @@ static inline int alternatives_text_reserved(void *start, void *end)
146#define alternative(oldinstr, newinstr, feature) \ 158#define alternative(oldinstr, newinstr, feature) \
147 asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory") 159 asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")
148 160
161#define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \
162 asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory")
163
149/* 164/*
150 * Alternative inline assembly with input. 165 * Alternative inline assembly with input.
151 * 166 *
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index efc3b22d896e..8118e94d50ab 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -91,7 +91,7 @@ static inline void native_apic_mem_write(u32 reg, u32 v)
91{ 91{
92 volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg); 92 volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg);
93 93
94 alternative_io("movl %0, %1", "xchgl %0, %1", X86_BUG_11AP, 94 alternative_io("movl %0, %P1", "xchgl %0, %P1", X86_BUG_11AP,
95 ASM_OUTPUT2("=r" (v), "=m" (*addr)), 95 ASM_OUTPUT2("=r" (v), "=m" (*addr)),
96 ASM_OUTPUT2("0" (v), "m" (*addr))); 96 ASM_OUTPUT2("0" (v), "m" (*addr)));
97} 97}
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 2ab1eb33106e..959e45b81fe2 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -95,13 +95,11 @@ do { \
95 * Stop RDTSC speculation. This is needed when you need to use RDTSC 95 * Stop RDTSC speculation. This is needed when you need to use RDTSC
96 * (or get_cycles or vread that possibly accesses the TSC) in a defined 96 * (or get_cycles or vread that possibly accesses the TSC) in a defined
97 * code region. 97 * code region.
98 *
99 * (Could use an alternative three way for this if there was one.)
100 */ 98 */
101static __always_inline void rdtsc_barrier(void) 99static __always_inline void rdtsc_barrier(void)
102{ 100{
103 alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); 101 alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
104 alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); 102 "lfence", X86_FEATURE_LFENCE_RDTSC);
105} 103}
106 104
107#endif /* _ASM_X86_BARRIER_H */ 105#endif /* _ASM_X86_BARRIER_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index d6428ea5d316..0f7a5a1a8db2 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -419,6 +419,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
419 " .word %P0\n" /* 1: do replace */ 419 " .word %P0\n" /* 1: do replace */
420 " .byte 2b - 1b\n" /* source len */ 420 " .byte 2b - 1b\n" /* source len */
421 " .byte 0\n" /* replacement len */ 421 " .byte 0\n" /* replacement len */
422 " .byte 0\n" /* pad len */
422 ".previous\n" 423 ".previous\n"
423 /* skipping size check since replacement size = 0 */ 424 /* skipping size check since replacement size = 0 */
424 : : "i" (X86_FEATURE_ALWAYS) : : t_warn); 425 : : "i" (X86_FEATURE_ALWAYS) : : t_warn);
@@ -433,6 +434,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
433 " .word %P0\n" /* feature bit */ 434 " .word %P0\n" /* feature bit */
434 " .byte 2b - 1b\n" /* source len */ 435 " .byte 2b - 1b\n" /* source len */
435 " .byte 0\n" /* replacement len */ 436 " .byte 0\n" /* replacement len */
437 " .byte 0\n" /* pad len */
436 ".previous\n" 438 ".previous\n"
437 /* skipping size check since replacement size = 0 */ 439 /* skipping size check since replacement size = 0 */
438 : : "i" (bit) : : t_no); 440 : : "i" (bit) : : t_no);
@@ -458,6 +460,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
458 " .word %P1\n" /* feature bit */ 460 " .word %P1\n" /* feature bit */
459 " .byte 2b - 1b\n" /* source len */ 461 " .byte 2b - 1b\n" /* source len */
460 " .byte 4f - 3f\n" /* replacement len */ 462 " .byte 4f - 3f\n" /* replacement len */
463 " .byte 0\n" /* pad len */
461 ".previous\n" 464 ".previous\n"
462 ".section .discard,\"aw\",@progbits\n" 465 ".section .discard,\"aw\",@progbits\n"
463 " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */ 466 " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
@@ -484,31 +487,30 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
484static __always_inline __pure bool _static_cpu_has_safe(u16 bit) 487static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
485{ 488{
486#ifdef CC_HAVE_ASM_GOTO 489#ifdef CC_HAVE_ASM_GOTO
487/* 490 asm_volatile_goto("1: jmp %l[t_dynamic]\n"
488 * We need to spell the jumps to the compiler because, depending on the offset,
489 * the replacement jump can be bigger than the original jump, and this we cannot
490 * have. Thus, we force the jump to the widest, 4-byte, signed relative
491 * offset even though the last would often fit in less bytes.
492 */
493 asm_volatile_goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n"
494 "2:\n" 491 "2:\n"
492 ".skip -(((5f-4f) - (2b-1b)) > 0) * "
493 "((5f-4f) - (2b-1b)),0x90\n"
494 "3:\n"
495 ".section .altinstructions,\"a\"\n" 495 ".section .altinstructions,\"a\"\n"
496 " .long 1b - .\n" /* src offset */ 496 " .long 1b - .\n" /* src offset */
497 " .long 3f - .\n" /* repl offset */ 497 " .long 4f - .\n" /* repl offset */
498 " .word %P1\n" /* always replace */ 498 " .word %P1\n" /* always replace */
499 " .byte 2b - 1b\n" /* src len */ 499 " .byte 3b - 1b\n" /* src len */
500 " .byte 4f - 3f\n" /* repl len */ 500 " .byte 5f - 4f\n" /* repl len */
501 " .byte 3b - 2b\n" /* pad len */
501 ".previous\n" 502 ".previous\n"
502 ".section .altinstr_replacement,\"ax\"\n" 503 ".section .altinstr_replacement,\"ax\"\n"
503 "3: .byte 0xe9\n .long %l[t_no] - 2b\n" 504 "4: jmp %l[t_no]\n"
504 "4:\n" 505 "5:\n"
505 ".previous\n" 506 ".previous\n"
506 ".section .altinstructions,\"a\"\n" 507 ".section .altinstructions,\"a\"\n"
507 " .long 1b - .\n" /* src offset */ 508 " .long 1b - .\n" /* src offset */
508 " .long 0\n" /* no replacement */ 509 " .long 0\n" /* no replacement */
509 " .word %P0\n" /* feature bit */ 510 " .word %P0\n" /* feature bit */
510 " .byte 2b - 1b\n" /* src len */ 511 " .byte 3b - 1b\n" /* src len */
511 " .byte 0\n" /* repl len */ 512 " .byte 0\n" /* repl len */
513 " .byte 0\n" /* pad len */
512 ".previous\n" 514 ".previous\n"
513 : : "i" (bit), "i" (X86_FEATURE_ALWAYS) 515 : : "i" (bit), "i" (X86_FEATURE_ALWAYS)
514 : : t_dynamic, t_no); 516 : : t_dynamic, t_no);
@@ -528,6 +530,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
528 " .word %P2\n" /* always replace */ 530 " .word %P2\n" /* always replace */
529 " .byte 2b - 1b\n" /* source len */ 531 " .byte 2b - 1b\n" /* source len */
530 " .byte 4f - 3f\n" /* replacement len */ 532 " .byte 4f - 3f\n" /* replacement len */
533 " .byte 0\n" /* pad len */
531 ".previous\n" 534 ".previous\n"
532 ".section .discard,\"aw\",@progbits\n" 535 ".section .discard,\"aw\",@progbits\n"
533 " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */ 536 " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
@@ -542,6 +545,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
542 " .word %P1\n" /* feature bit */ 545 " .word %P1\n" /* feature bit */
543 " .byte 4b - 3b\n" /* src len */ 546 " .byte 4b - 3b\n" /* src len */
544 " .byte 6f - 5f\n" /* repl len */ 547 " .byte 6f - 5f\n" /* repl len */
548 " .byte 0\n" /* pad len */
545 ".previous\n" 549 ".previous\n"
546 ".section .discard,\"aw\",@progbits\n" 550 ".section .discard,\"aw\",@progbits\n"
547 " .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */ 551 " .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index ec1c93588cef..7be2c9a6caba 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -761,10 +761,10 @@ extern char ignore_fpu_irq;
761#define ARCH_HAS_SPINLOCK_PREFETCH 761#define ARCH_HAS_SPINLOCK_PREFETCH
762 762
763#ifdef CONFIG_X86_32 763#ifdef CONFIG_X86_32
764# define BASE_PREFETCH ASM_NOP4 764# define BASE_PREFETCH ""
765# define ARCH_HAS_PREFETCH 765# define ARCH_HAS_PREFETCH
766#else 766#else
767# define BASE_PREFETCH "prefetcht0 (%1)" 767# define BASE_PREFETCH "prefetcht0 %P1"
768#endif 768#endif
769 769
770/* 770/*
@@ -775,10 +775,9 @@ extern char ignore_fpu_irq;
775 */ 775 */
776static inline void prefetch(const void *x) 776static inline void prefetch(const void *x)
777{ 777{
778 alternative_input(BASE_PREFETCH, 778 alternative_input(BASE_PREFETCH, "prefetchnta %P1",
779 "prefetchnta (%1)",
780 X86_FEATURE_XMM, 779 X86_FEATURE_XMM,
781 "r" (x)); 780 "m" (*(const char *)x));
782} 781}
783 782
784/* 783/*
@@ -788,10 +787,9 @@ static inline void prefetch(const void *x)
788 */ 787 */
789static inline void prefetchw(const void *x) 788static inline void prefetchw(const void *x)
790{ 789{
791 alternative_input(BASE_PREFETCH, 790 alternative_input(BASE_PREFETCH, "prefetchw %P1",
792 "prefetchw (%1)", 791 X86_FEATURE_3DNOWPREFETCH,
793 X86_FEATURE_3DNOW, 792 "m" (*(const char *)x));
794 "r" (x));
795} 793}
796 794
797static inline void spin_lock_prefetch(const void *x) 795static inline void spin_lock_prefetch(const void *x)
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index 8d3120f4e270..ba665ebd17bb 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -27,23 +27,11 @@
27 27
28#ifdef CONFIG_X86_SMAP 28#ifdef CONFIG_X86_SMAP
29 29
30#define ASM_CLAC \ 30#define ASM_CLAC \
31 661: ASM_NOP3 ; \ 31 ALTERNATIVE "", __stringify(__ASM_CLAC), X86_FEATURE_SMAP
32 .pushsection .altinstr_replacement, "ax" ; \ 32
33 662: __ASM_CLAC ; \ 33#define ASM_STAC \
34 .popsection ; \ 34 ALTERNATIVE "", __stringify(__ASM_STAC), X86_FEATURE_SMAP
35 .pushsection .altinstructions, "a" ; \
36 altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ; \
37 .popsection
38
39#define ASM_STAC \
40 661: ASM_NOP3 ; \
41 .pushsection .altinstr_replacement, "ax" ; \
42 662: __ASM_STAC ; \
43 .popsection ; \
44 .pushsection .altinstructions, "a" ; \
45 altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ; \
46 .popsection
47 35
48#else /* CONFIG_X86_SMAP */ 36#else /* CONFIG_X86_SMAP */
49 37
@@ -61,20 +49,20 @@
61static __always_inline void clac(void) 49static __always_inline void clac(void)
62{ 50{
63 /* Note: a barrier is implicit in alternative() */ 51 /* Note: a barrier is implicit in alternative() */
64 alternative(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP); 52 alternative("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP);
65} 53}
66 54
67static __always_inline void stac(void) 55static __always_inline void stac(void)
68{ 56{
69 /* Note: a barrier is implicit in alternative() */ 57 /* Note: a barrier is implicit in alternative() */
70 alternative(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP); 58 alternative("", __stringify(__ASM_STAC), X86_FEATURE_SMAP);
71} 59}
72 60
73/* These macros can be used in asm() statements */ 61/* These macros can be used in asm() statements */
74#define ASM_CLAC \ 62#define ASM_CLAC \
75 ALTERNATIVE(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP) 63 ALTERNATIVE("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP)
76#define ASM_STAC \ 64#define ASM_STAC \
77 ALTERNATIVE(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP) 65 ALTERNATIVE("", __stringify(__ASM_STAC), X86_FEATURE_SMAP)
78 66
79#else /* CONFIG_X86_SMAP */ 67#else /* CONFIG_X86_SMAP */
80 68
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 703130f469ec..af397cc98d05 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -52,10 +52,25 @@ static int __init setup_noreplace_paravirt(char *str)
52__setup("noreplace-paravirt", setup_noreplace_paravirt); 52__setup("noreplace-paravirt", setup_noreplace_paravirt);
53#endif 53#endif
54 54
55#define DPRINTK(fmt, ...) \ 55#define DPRINTK(fmt, args...) \
56do { \ 56do { \
57 if (debug_alternative) \ 57 if (debug_alternative) \
58 printk(KERN_DEBUG fmt, ##__VA_ARGS__); \ 58 printk(KERN_DEBUG "%s: " fmt "\n", __func__, ##args); \
59} while (0)
60
61#define DUMP_BYTES(buf, len, fmt, args...) \
62do { \
63 if (unlikely(debug_alternative)) { \
64 int j; \
65 \
66 if (!(len)) \
67 break; \
68 \
69 printk(KERN_DEBUG fmt, ##args); \
70 for (j = 0; j < (len) - 1; j++) \
71 printk(KERN_CONT "%02hhx ", buf[j]); \
72 printk(KERN_CONT "%02hhx\n", buf[j]); \
73 } \
59} while (0) 74} while (0)
60 75
61/* 76/*
@@ -243,12 +258,86 @@ extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
243extern s32 __smp_locks[], __smp_locks_end[]; 258extern s32 __smp_locks[], __smp_locks_end[];
244void *text_poke_early(void *addr, const void *opcode, size_t len); 259void *text_poke_early(void *addr, const void *opcode, size_t len);
245 260
246/* Replace instructions with better alternatives for this CPU type. 261/*
247 This runs before SMP is initialized to avoid SMP problems with 262 * Are we looking at a near JMP with a 1 or 4-byte displacement.
248 self modifying code. This implies that asymmetric systems where 263 */
249 APs have less capabilities than the boot processor are not handled. 264static inline bool is_jmp(const u8 opcode)
250 Tough. Make sure you disable such features by hand. */ 265{
266 return opcode == 0xeb || opcode == 0xe9;
267}
268
269static void __init_or_module
270recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
271{
272 u8 *next_rip, *tgt_rip;
273 s32 n_dspl, o_dspl;
274 int repl_len;
275
276 if (a->replacementlen != 5)
277 return;
278
279 o_dspl = *(s32 *)(insnbuf + 1);
280
281 /* next_rip of the replacement JMP */
282 next_rip = repl_insn + a->replacementlen;
283 /* target rip of the replacement JMP */
284 tgt_rip = next_rip + o_dspl;
285 n_dspl = tgt_rip - orig_insn;
286
287 DPRINTK("target RIP: %p, new_displ: 0x%x", tgt_rip, n_dspl);
288
289 if (tgt_rip - orig_insn >= 0) {
290 if (n_dspl - 2 <= 127)
291 goto two_byte_jmp;
292 else
293 goto five_byte_jmp;
294 /* negative offset */
295 } else {
296 if (((n_dspl - 2) & 0xff) == (n_dspl - 2))
297 goto two_byte_jmp;
298 else
299 goto five_byte_jmp;
300 }
301
302two_byte_jmp:
303 n_dspl -= 2;
304
305 insnbuf[0] = 0xeb;
306 insnbuf[1] = (s8)n_dspl;
307 add_nops(insnbuf + 2, 3);
308
309 repl_len = 2;
310 goto done;
311
312five_byte_jmp:
313 n_dspl -= 5;
251 314
315 insnbuf[0] = 0xe9;
316 *(s32 *)&insnbuf[1] = n_dspl;
317
318 repl_len = 5;
319
320done:
321
322 DPRINTK("final displ: 0x%08x, JMP 0x%lx",
323 n_dspl, (unsigned long)orig_insn + n_dspl + repl_len);
324}
325
326static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr)
327{
328 add_nops(instr + (a->instrlen - a->padlen), a->padlen);
329
330 DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ",
331 instr, a->instrlen - a->padlen, a->padlen);
332}
333
334/*
335 * Replace instructions with better alternatives for this CPU type. This runs
336 * before SMP is initialized to avoid SMP problems with self modifying code.
337 * This implies that asymmetric systems where APs have less capabilities than
338 * the boot processor are not handled. Tough. Make sure you disable such
339 * features by hand.
340 */
252void __init_or_module apply_alternatives(struct alt_instr *start, 341void __init_or_module apply_alternatives(struct alt_instr *start,
253 struct alt_instr *end) 342 struct alt_instr *end)
254{ 343{
@@ -256,10 +345,10 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
256 u8 *instr, *replacement; 345 u8 *instr, *replacement;
257 u8 insnbuf[MAX_PATCH_LEN]; 346 u8 insnbuf[MAX_PATCH_LEN];
258 347
259 DPRINTK("%s: alt table %p -> %p\n", __func__, start, end); 348 DPRINTK("alt table %p -> %p", start, end);
260 /* 349 /*
261 * The scan order should be from start to end. A later scanned 350 * The scan order should be from start to end. A later scanned
262 * alternative code can overwrite a previous scanned alternative code. 351 * alternative code can overwrite previously scanned alternative code.
263 * Some kernel functions (e.g. memcpy, memset, etc) use this order to 352 * Some kernel functions (e.g. memcpy, memset, etc) use this order to
264 * patch code. 353 * patch code.
265 * 354 *
@@ -267,29 +356,54 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
267 * order. 356 * order.
268 */ 357 */
269 for (a = start; a < end; a++) { 358 for (a = start; a < end; a++) {
359 int insnbuf_sz = 0;
360
270 instr = (u8 *)&a->instr_offset + a->instr_offset; 361 instr = (u8 *)&a->instr_offset + a->instr_offset;
271 replacement = (u8 *)&a->repl_offset + a->repl_offset; 362 replacement = (u8 *)&a->repl_offset + a->repl_offset;
272 BUG_ON(a->replacementlen > a->instrlen);
273 BUG_ON(a->instrlen > sizeof(insnbuf)); 363 BUG_ON(a->instrlen > sizeof(insnbuf));
274 BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32); 364 BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32);
275 if (!boot_cpu_has(a->cpuid)) 365 if (!boot_cpu_has(a->cpuid)) {
366 if (a->padlen > 1)
367 optimize_nops(a, instr);
368
276 continue; 369 continue;
370 }
371
372 DPRINTK("feat: %d*32+%d, old: (%p, len: %d), repl: (%p, len: %d)",
373 a->cpuid >> 5,
374 a->cpuid & 0x1f,
375 instr, a->instrlen,
376 replacement, a->replacementlen);
377
378 DUMP_BYTES(instr, a->instrlen, "%p: old_insn: ", instr);
379 DUMP_BYTES(replacement, a->replacementlen, "%p: rpl_insn: ", replacement);
277 380
278 memcpy(insnbuf, replacement, a->replacementlen); 381 memcpy(insnbuf, replacement, a->replacementlen);
382 insnbuf_sz = a->replacementlen;
279 383
280 /* 0xe8 is a relative jump; fix the offset. */ 384 /* 0xe8 is a relative jump; fix the offset. */
281 if (*insnbuf == 0xe8 && a->replacementlen == 5) 385 if (*insnbuf == 0xe8 && a->replacementlen == 5) {
282 *(s32 *)(insnbuf + 1) += replacement - instr; 386 *(s32 *)(insnbuf + 1) += replacement - instr;
387 DPRINTK("Fix CALL offset: 0x%x, CALL 0x%lx",
388 *(s32 *)(insnbuf + 1),
389 (unsigned long)instr + *(s32 *)(insnbuf + 1) + 5);
390 }
391
392 if (a->replacementlen && is_jmp(replacement[0]))
393 recompute_jump(a, instr, replacement, insnbuf);
283 394
284 add_nops(insnbuf + a->replacementlen, 395 if (a->instrlen > a->replacementlen) {
285 a->instrlen - a->replacementlen); 396 add_nops(insnbuf + a->replacementlen,
397 a->instrlen - a->replacementlen);
398 insnbuf_sz += a->instrlen - a->replacementlen;
399 }
400 DUMP_BYTES(insnbuf, insnbuf_sz, "%p: final_insn: ", instr);
286 401
287 text_poke_early(instr, insnbuf, a->instrlen); 402 text_poke_early(instr, insnbuf, insnbuf_sz);
288 } 403 }
289} 404}
290 405
291#ifdef CONFIG_SMP 406#ifdef CONFIG_SMP
292
293static void alternatives_smp_lock(const s32 *start, const s32 *end, 407static void alternatives_smp_lock(const s32 *start, const s32 *end,
294 u8 *text, u8 *text_end) 408 u8 *text, u8 *text_end)
295{ 409{
@@ -371,8 +485,8 @@ void __init_or_module alternatives_smp_module_add(struct module *mod,
371 smp->locks_end = locks_end; 485 smp->locks_end = locks_end;
372 smp->text = text; 486 smp->text = text;
373 smp->text_end = text_end; 487 smp->text_end = text_end;
374 DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n", 488 DPRINTK("locks %p -> %p, text %p -> %p, name %s\n",
375 __func__, smp->locks, smp->locks_end, 489 smp->locks, smp->locks_end,
376 smp->text, smp->text_end, smp->name); 490 smp->text, smp->text_end, smp->name);
377 491
378 list_add_tail(&smp->next, &smp_alt_modules); 492 list_add_tail(&smp->next, &smp_alt_modules);
@@ -440,7 +554,7 @@ int alternatives_text_reserved(void *start, void *end)
440 554
441 return 0; 555 return 0;
442} 556}
443#endif 557#endif /* CONFIG_SMP */
444 558
445#ifdef CONFIG_PARAVIRT 559#ifdef CONFIG_PARAVIRT
446void __init_or_module apply_paravirt(struct paravirt_patch_site *start, 560void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index a220239cea65..dd9e50500297 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -711,6 +711,11 @@ static void init_amd(struct cpuinfo_x86 *c)
711 set_cpu_bug(c, X86_BUG_AMD_APIC_C1E); 711 set_cpu_bug(c, X86_BUG_AMD_APIC_C1E);
712 712
713 rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy); 713 rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy);
714
715 /* 3DNow or LM implies PREFETCHW */
716 if (!cpu_has(c, X86_FEATURE_3DNOWPREFETCH))
717 if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM))
718 set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH);
714} 719}
715 720
716#ifdef CONFIG_X86_32 721#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 31e2d5bf3e38..7e0323cc0b7d 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -816,15 +816,9 @@ ENTRY(simd_coprocessor_error)
816 pushl_cfi $0 816 pushl_cfi $0
817#ifdef CONFIG_X86_INVD_BUG 817#ifdef CONFIG_X86_INVD_BUG
818 /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ 818 /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
819661: pushl_cfi $do_general_protection 819 ALTERNATIVE "pushl_cfi $do_general_protection", \
820662: 820 "pushl $do_simd_coprocessor_error", \
821.section .altinstructions,"a" 821 X86_FEATURE_XMM
822 altinstruction_entry 661b, 663f, X86_FEATURE_XMM, 662b-661b, 664f-663f
823.previous
824.section .altinstr_replacement,"ax"
825663: pushl $do_simd_coprocessor_error
826664:
827.previous
828#else 822#else
829 pushl_cfi $do_simd_coprocessor_error 823 pushl_cfi $do_simd_coprocessor_error
830#endif 824#endif
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
index f2145cfa12a6..e67e579c93bd 100644
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -1,31 +1,35 @@
1#include <linux/linkage.h> 1#include <linux/linkage.h>
2#include <asm/dwarf2.h> 2#include <asm/dwarf2.h>
3#include <asm/cpufeature.h>
3#include <asm/alternative-asm.h> 4#include <asm/alternative-asm.h>
4 5
5/* 6/*
6 * Zero a page. 7 * Most CPUs support enhanced REP MOVSB/STOSB instructions. It is
7 * rdi page 8 * recommended to use this when possible and we do use them by default.
8 */ 9 * If enhanced REP MOVSB/STOSB is not available, try to use fast string.
9ENTRY(clear_page_c) 10 * Otherwise, use original.
11 */
12
13/*
14 * Zero a page.
15 * %rdi - page
16 */
17ENTRY(clear_page)
10 CFI_STARTPROC 18 CFI_STARTPROC
19
20 ALTERNATIVE_2 "jmp clear_page_orig", "", X86_FEATURE_REP_GOOD, \
21 "jmp clear_page_c_e", X86_FEATURE_ERMS
22
11 movl $4096/8,%ecx 23 movl $4096/8,%ecx
12 xorl %eax,%eax 24 xorl %eax,%eax
13 rep stosq 25 rep stosq
14 ret 26 ret
15 CFI_ENDPROC 27 CFI_ENDPROC
16ENDPROC(clear_page_c) 28ENDPROC(clear_page)
17 29
18ENTRY(clear_page_c_e) 30ENTRY(clear_page_orig)
19 CFI_STARTPROC 31 CFI_STARTPROC
20 movl $4096,%ecx
21 xorl %eax,%eax
22 rep stosb
23 ret
24 CFI_ENDPROC
25ENDPROC(clear_page_c_e)
26 32
27ENTRY(clear_page)
28 CFI_STARTPROC
29 xorl %eax,%eax 33 xorl %eax,%eax
30 movl $4096/64,%ecx 34 movl $4096/64,%ecx
31 .p2align 4 35 .p2align 4
@@ -45,29 +49,13 @@ ENTRY(clear_page)
45 nop 49 nop
46 ret 50 ret
47 CFI_ENDPROC 51 CFI_ENDPROC
48.Lclear_page_end: 52ENDPROC(clear_page_orig)
49ENDPROC(clear_page)
50
51 /*
52 * Some CPUs support enhanced REP MOVSB/STOSB instructions.
53 * It is recommended to use this when possible.
54 * If enhanced REP MOVSB/STOSB is not available, try to use fast string.
55 * Otherwise, use original function.
56 *
57 */
58 53
59#include <asm/cpufeature.h> 54ENTRY(clear_page_c_e)
60 55 CFI_STARTPROC
61 .section .altinstr_replacement,"ax" 56 movl $4096,%ecx
621: .byte 0xeb /* jmp <disp8> */ 57 xorl %eax,%eax
63 .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */ 58 rep stosb
642: .byte 0xeb /* jmp <disp8> */ 59 ret
65 .byte (clear_page_c_e - clear_page) - (3f - 2b) /* offset */ 60 CFI_ENDPROC
663: 61ENDPROC(clear_page_c_e)
67 .previous
68 .section .altinstructions,"a"
69 altinstruction_entry clear_page,1b,X86_FEATURE_REP_GOOD,\
70 .Lclear_page_end-clear_page, 2b-1b
71 altinstruction_entry clear_page,2b,X86_FEATURE_ERMS, \
72 .Lclear_page_end-clear_page,3b-2b
73 .previous
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index 176cca67212b..8239dbcbf984 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -2,23 +2,26 @@
2 2
3#include <linux/linkage.h> 3#include <linux/linkage.h>
4#include <asm/dwarf2.h> 4#include <asm/dwarf2.h>
5#include <asm/cpufeature.h>
5#include <asm/alternative-asm.h> 6#include <asm/alternative-asm.h>
6 7
8/*
9 * Some CPUs run faster using the string copy instructions (sane microcode).
10 * It is also a lot simpler. Use this when possible. But, don't use streaming
11 * copy unless the CPU indicates X86_FEATURE_REP_GOOD. Could vary the
12 * prefetch distance based on SMP/UP.
13 */
7 ALIGN 14 ALIGN
8copy_page_rep: 15ENTRY(copy_page)
9 CFI_STARTPROC 16 CFI_STARTPROC
17 ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD
10 movl $4096/8, %ecx 18 movl $4096/8, %ecx
11 rep movsq 19 rep movsq
12 ret 20 ret
13 CFI_ENDPROC 21 CFI_ENDPROC
14ENDPROC(copy_page_rep) 22ENDPROC(copy_page)
15
16/*
17 * Don't use streaming copy unless the CPU indicates X86_FEATURE_REP_GOOD.
18 * Could vary the prefetch distance based on SMP/UP.
19*/
20 23
21ENTRY(copy_page) 24ENTRY(copy_page_regs)
22 CFI_STARTPROC 25 CFI_STARTPROC
23 subq $2*8, %rsp 26 subq $2*8, %rsp
24 CFI_ADJUST_CFA_OFFSET 2*8 27 CFI_ADJUST_CFA_OFFSET 2*8
@@ -90,21 +93,5 @@ ENTRY(copy_page)
90 addq $2*8, %rsp 93 addq $2*8, %rsp
91 CFI_ADJUST_CFA_OFFSET -2*8 94 CFI_ADJUST_CFA_OFFSET -2*8
92 ret 95 ret
93.Lcopy_page_end:
94 CFI_ENDPROC 96 CFI_ENDPROC
95ENDPROC(copy_page) 97ENDPROC(copy_page_regs)
96
97 /* Some CPUs run faster using the string copy instructions.
98 It is also a lot simpler. Use this when possible */
99
100#include <asm/cpufeature.h>
101
102 .section .altinstr_replacement,"ax"
1031: .byte 0xeb /* jmp <disp8> */
104 .byte (copy_page_rep - copy_page) - (2f - 1b) /* offset */
1052:
106 .previous
107 .section .altinstructions,"a"
108 altinstruction_entry copy_page, 1b, X86_FEATURE_REP_GOOD, \
109 .Lcopy_page_end-copy_page, 2b-1b
110 .previous
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index dee945d55594..fa997dfaef24 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -8,9 +8,6 @@
8 8
9#include <linux/linkage.h> 9#include <linux/linkage.h>
10#include <asm/dwarf2.h> 10#include <asm/dwarf2.h>
11
12#define FIX_ALIGNMENT 1
13
14#include <asm/current.h> 11#include <asm/current.h>
15#include <asm/asm-offsets.h> 12#include <asm/asm-offsets.h>
16#include <asm/thread_info.h> 13#include <asm/thread_info.h>
@@ -19,33 +16,7 @@
19#include <asm/asm.h> 16#include <asm/asm.h>
20#include <asm/smap.h> 17#include <asm/smap.h>
21 18
22/*
23 * By placing feature2 after feature1 in altinstructions section, we logically
24 * implement:
25 * If CPU has feature2, jmp to alt2 is used
26 * else if CPU has feature1, jmp to alt1 is used
27 * else jmp to orig is used.
28 */
29 .macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2
300:
31 .byte 0xe9 /* 32bit jump */
32 .long \orig-1f /* by default jump to orig */
331:
34 .section .altinstr_replacement,"ax"
352: .byte 0xe9 /* near jump with 32bit immediate */
36 .long \alt1-1b /* offset */ /* or alternatively to alt1 */
373: .byte 0xe9 /* near jump with 32bit immediate */
38 .long \alt2-1b /* offset */ /* or alternatively to alt2 */
39 .previous
40
41 .section .altinstructions,"a"
42 altinstruction_entry 0b,2b,\feature1,5,5
43 altinstruction_entry 0b,3b,\feature2,5,5
44 .previous
45 .endm
46
47 .macro ALIGN_DESTINATION 19 .macro ALIGN_DESTINATION
48#ifdef FIX_ALIGNMENT
49 /* check for bad alignment of destination */ 20 /* check for bad alignment of destination */
50 movl %edi,%ecx 21 movl %edi,%ecx
51 andl $7,%ecx 22 andl $7,%ecx
@@ -67,7 +38,6 @@
67 38
68 _ASM_EXTABLE(100b,103b) 39 _ASM_EXTABLE(100b,103b)
69 _ASM_EXTABLE(101b,103b) 40 _ASM_EXTABLE(101b,103b)
70#endif
71 .endm 41 .endm
72 42
73/* Standard copy_to_user with segment limit checking */ 43/* Standard copy_to_user with segment limit checking */
@@ -79,9 +49,11 @@ ENTRY(_copy_to_user)
79 jc bad_to_user 49 jc bad_to_user
80 cmpq TI_addr_limit(%rax),%rcx 50 cmpq TI_addr_limit(%rax),%rcx
81 ja bad_to_user 51 ja bad_to_user
82 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \ 52 ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \
83 copy_user_generic_unrolled,copy_user_generic_string, \ 53 "jmp copy_user_generic_string", \
84 copy_user_enhanced_fast_string 54 X86_FEATURE_REP_GOOD, \
55 "jmp copy_user_enhanced_fast_string", \
56 X86_FEATURE_ERMS
85 CFI_ENDPROC 57 CFI_ENDPROC
86ENDPROC(_copy_to_user) 58ENDPROC(_copy_to_user)
87 59
@@ -94,9 +66,11 @@ ENTRY(_copy_from_user)
94 jc bad_from_user 66 jc bad_from_user
95 cmpq TI_addr_limit(%rax),%rcx 67 cmpq TI_addr_limit(%rax),%rcx
96 ja bad_from_user 68 ja bad_from_user
97 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \ 69 ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \
98 copy_user_generic_unrolled,copy_user_generic_string, \ 70 "jmp copy_user_generic_string", \
99 copy_user_enhanced_fast_string 71 X86_FEATURE_REP_GOOD, \
72 "jmp copy_user_enhanced_fast_string", \
73 X86_FEATURE_ERMS
100 CFI_ENDPROC 74 CFI_ENDPROC
101ENDPROC(_copy_from_user) 75ENDPROC(_copy_from_user)
102 76
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 89b53c9968e7..b046664f5a1c 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -1,12 +1,20 @@
1/* Copyright 2002 Andi Kleen */ 1/* Copyright 2002 Andi Kleen */
2 2
3#include <linux/linkage.h> 3#include <linux/linkage.h>
4
5#include <asm/cpufeature.h> 4#include <asm/cpufeature.h>
6#include <asm/dwarf2.h> 5#include <asm/dwarf2.h>
7#include <asm/alternative-asm.h> 6#include <asm/alternative-asm.h>
8 7
9/* 8/*
9 * We build a jump to memcpy_orig by default which gets NOPped out on
10 * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
11 * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
12 * to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
13 */
14
15.weak memcpy
16
17/*
10 * memcpy - Copy a memory block. 18 * memcpy - Copy a memory block.
11 * 19 *
12 * Input: 20 * Input:
@@ -17,15 +25,11 @@
17 * Output: 25 * Output:
18 * rax original destination 26 * rax original destination
19 */ 27 */
28ENTRY(__memcpy)
29ENTRY(memcpy)
30 ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
31 "jmp memcpy_erms", X86_FEATURE_ERMS
20 32
21/*
22 * memcpy_c() - fast string ops (REP MOVSQ) based variant.
23 *
24 * This gets patched over the unrolled variant (below) via the
25 * alternative instructions framework:
26 */
27 .section .altinstr_replacement, "ax", @progbits
28.Lmemcpy_c:
29 movq %rdi, %rax 33 movq %rdi, %rax
30 movq %rdx, %rcx 34 movq %rdx, %rcx
31 shrq $3, %rcx 35 shrq $3, %rcx
@@ -34,29 +38,21 @@
34 movl %edx, %ecx 38 movl %edx, %ecx
35 rep movsb 39 rep movsb
36 ret 40 ret
37.Lmemcpy_e: 41ENDPROC(memcpy)
38 .previous 42ENDPROC(__memcpy)
39 43
40/* 44/*
41 * memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than 45 * memcpy_erms() - enhanced fast string memcpy. This is faster and
42 * memcpy_c. Use memcpy_c_e when possible. 46 * simpler than memcpy. Use memcpy_erms when possible.
43 *
44 * This gets patched over the unrolled variant (below) via the
45 * alternative instructions framework:
46 */ 47 */
47 .section .altinstr_replacement, "ax", @progbits 48ENTRY(memcpy_erms)
48.Lmemcpy_c_e:
49 movq %rdi, %rax 49 movq %rdi, %rax
50 movq %rdx, %rcx 50 movq %rdx, %rcx
51 rep movsb 51 rep movsb
52 ret 52 ret
53.Lmemcpy_e_e: 53ENDPROC(memcpy_erms)
54 .previous
55
56.weak memcpy
57 54
58ENTRY(__memcpy) 55ENTRY(memcpy_orig)
59ENTRY(memcpy)
60 CFI_STARTPROC 56 CFI_STARTPROC
61 movq %rdi, %rax 57 movq %rdi, %rax
62 58
@@ -183,26 +179,4 @@ ENTRY(memcpy)
183.Lend: 179.Lend:
184 retq 180 retq
185 CFI_ENDPROC 181 CFI_ENDPROC
186ENDPROC(memcpy) 182ENDPROC(memcpy_orig)
187ENDPROC(__memcpy)
188
189 /*
190 * Some CPUs are adding enhanced REP MOVSB/STOSB feature
191 * If the feature is supported, memcpy_c_e() is the first choice.
192 * If enhanced rep movsb copy is not available, use fast string copy
193 * memcpy_c() when possible. This is faster and code is simpler than
194 * original memcpy().
195 * Otherwise, original memcpy() is used.
196 * In .altinstructions section, ERMS feature is placed after REG_GOOD
197 * feature to implement the right patch order.
198 *
199 * Replace only beginning, memcpy is used to apply alternatives,
200 * so it is silly to overwrite itself with nops - reboot is the
201 * only outcome...
202 */
203 .section .altinstructions, "a"
204 altinstruction_entry __memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\
205 .Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c
206 altinstruction_entry __memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \
207 .Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e
208 .previous
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
index 9c4b530575da..0f8a0d0331b9 100644
--- a/arch/x86/lib/memmove_64.S
+++ b/arch/x86/lib/memmove_64.S
@@ -5,7 +5,6 @@
5 * This assembly file is re-written from memmove_64.c file. 5 * This assembly file is re-written from memmove_64.c file.
6 * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com> 6 * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
7 */ 7 */
8#define _STRING_C
9#include <linux/linkage.h> 8#include <linux/linkage.h>
10#include <asm/dwarf2.h> 9#include <asm/dwarf2.h>
11#include <asm/cpufeature.h> 10#include <asm/cpufeature.h>
@@ -44,6 +43,8 @@ ENTRY(__memmove)
44 jg 2f 43 jg 2f
45 44
46.Lmemmove_begin_forward: 45.Lmemmove_begin_forward:
46 ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; retq", X86_FEATURE_ERMS
47
47 /* 48 /*
48 * movsq instruction have many startup latency 49 * movsq instruction have many startup latency
49 * so we handle small size by general register. 50 * so we handle small size by general register.
@@ -207,21 +208,5 @@ ENTRY(__memmove)
20713: 20813:
208 retq 209 retq
209 CFI_ENDPROC 210 CFI_ENDPROC
210
211 .section .altinstr_replacement,"ax"
212.Lmemmove_begin_forward_efs:
213 /* Forward moving data. */
214 movq %rdx, %rcx
215 rep movsb
216 retq
217.Lmemmove_end_forward_efs:
218 .previous
219
220 .section .altinstructions,"a"
221 altinstruction_entry .Lmemmove_begin_forward, \
222 .Lmemmove_begin_forward_efs,X86_FEATURE_ERMS, \
223 .Lmemmove_end_forward-.Lmemmove_begin_forward, \
224 .Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs
225 .previous
226ENDPROC(__memmove) 211ENDPROC(__memmove)
227ENDPROC(memmove) 212ENDPROC(memmove)
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 6f44935c6a60..93118fb23976 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -5,19 +5,30 @@
5#include <asm/cpufeature.h> 5#include <asm/cpufeature.h>
6#include <asm/alternative-asm.h> 6#include <asm/alternative-asm.h>
7 7
8.weak memset
9
8/* 10/*
9 * ISO C memset - set a memory block to a byte value. This function uses fast 11 * ISO C memset - set a memory block to a byte value. This function uses fast
10 * string to get better performance than the original function. The code is 12 * string to get better performance than the original function. The code is
11 * simpler and shorter than the orignal function as well. 13 * simpler and shorter than the orignal function as well.
12 * 14 *
13 * rdi destination 15 * rdi destination
14 * rsi value (char) 16 * rsi value (char)
15 * rdx count (bytes) 17 * rdx count (bytes)
16 * 18 *
17 * rax original destination 19 * rax original destination
18 */ 20 */
19 .section .altinstr_replacement, "ax", @progbits 21ENTRY(memset)
20.Lmemset_c: 22ENTRY(__memset)
23 /*
24 * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
25 * to use it when possible. If not available, use fast string instructions.
26 *
27 * Otherwise, use original memset function.
28 */
29 ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \
30 "jmp memset_erms", X86_FEATURE_ERMS
31
21 movq %rdi,%r9 32 movq %rdi,%r9
22 movq %rdx,%rcx 33 movq %rdx,%rcx
23 andl $7,%edx 34 andl $7,%edx
@@ -31,8 +42,8 @@
31 rep stosb 42 rep stosb
32 movq %r9,%rax 43 movq %r9,%rax
33 ret 44 ret
34.Lmemset_e: 45ENDPROC(memset)
35 .previous 46ENDPROC(__memset)
36 47
37/* 48/*
38 * ISO C memset - set a memory block to a byte value. This function uses 49 * ISO C memset - set a memory block to a byte value. This function uses
@@ -45,21 +56,16 @@
45 * 56 *
46 * rax original destination 57 * rax original destination
47 */ 58 */
48 .section .altinstr_replacement, "ax", @progbits 59ENTRY(memset_erms)
49.Lmemset_c_e:
50 movq %rdi,%r9 60 movq %rdi,%r9
51 movb %sil,%al 61 movb %sil,%al
52 movq %rdx,%rcx 62 movq %rdx,%rcx
53 rep stosb 63 rep stosb
54 movq %r9,%rax 64 movq %r9,%rax
55 ret 65 ret
56.Lmemset_e_e: 66ENDPROC(memset_erms)
57 .previous
58
59.weak memset
60 67
61ENTRY(memset) 68ENTRY(memset_orig)
62ENTRY(__memset)
63 CFI_STARTPROC 69 CFI_STARTPROC
64 movq %rdi,%r10 70 movq %rdi,%r10
65 71
@@ -134,23 +140,4 @@ ENTRY(__memset)
134 jmp .Lafter_bad_alignment 140 jmp .Lafter_bad_alignment
135.Lfinal: 141.Lfinal:
136 CFI_ENDPROC 142 CFI_ENDPROC
137ENDPROC(memset) 143ENDPROC(memset_orig)
138ENDPROC(__memset)
139
140 /* Some CPUs support enhanced REP MOVSB/STOSB feature.
141 * It is recommended to use this when possible.
142 *
143 * If enhanced REP MOVSB/STOSB feature is not available, use fast string
144 * instructions.
145 *
146 * Otherwise, use original memset function.
147 *
148 * In .altinstructions section, ERMS feature is placed after REG_GOOD
149 * feature to implement the right patch order.
150 */
151 .section .altinstructions,"a"
152 altinstruction_entry __memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\
153 .Lfinal-__memset,.Lmemset_e-.Lmemset_c
154 altinstruction_entry __memset,.Lmemset_c_e,X86_FEATURE_ERMS, \
155 .Lfinal-__memset,.Lmemset_e_e-.Lmemset_c_e
156 .previous
diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h
index 2d7d9a1f5b53..8ffd2146fa6a 100644
--- a/arch/x86/um/asm/barrier.h
+++ b/arch/x86/um/asm/barrier.h
@@ -64,8 +64,8 @@
64 */ 64 */
65static inline void rdtsc_barrier(void) 65static inline void rdtsc_barrier(void)
66{ 66{
67 alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); 67 alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
68 alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); 68 "lfence", X86_FEATURE_LFENCE_RDTSC);
69} 69}
70 70
71#endif 71#endif
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
index d66ab799b35f..8c0c1a2770c8 100644
--- a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
@@ -1,12 +1,12 @@
1 1
2MEMCPY_FN(__memcpy, 2MEMCPY_FN(memcpy_orig,
3 "x86-64-unrolled", 3 "x86-64-unrolled",
4 "unrolled memcpy() in arch/x86/lib/memcpy_64.S") 4 "unrolled memcpy() in arch/x86/lib/memcpy_64.S")
5 5
6MEMCPY_FN(memcpy_c, 6MEMCPY_FN(__memcpy,
7 "x86-64-movsq", 7 "x86-64-movsq",
8 "movsq-based memcpy() in arch/x86/lib/memcpy_64.S") 8 "movsq-based memcpy() in arch/x86/lib/memcpy_64.S")
9 9
10MEMCPY_FN(memcpy_c_e, 10MEMCPY_FN(memcpy_erms,
11 "x86-64-movsb", 11 "x86-64-movsb",
12 "movsb-based memcpy() in arch/x86/lib/memcpy_64.S") 12 "movsb-based memcpy() in arch/x86/lib/memcpy_64.S")
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S
index fcd9cf00600a..e4c2c30143b9 100644
--- a/tools/perf/bench/mem-memcpy-x86-64-asm.S
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S
@@ -1,8 +1,6 @@
1#define memcpy MEMCPY /* don't hide glibc's memcpy() */ 1#define memcpy MEMCPY /* don't hide glibc's memcpy() */
2#define altinstr_replacement text 2#define altinstr_replacement text
3#define globl p2align 4; .globl 3#define globl p2align 4; .globl
4#define Lmemcpy_c globl memcpy_c; memcpy_c
5#define Lmemcpy_c_e globl memcpy_c_e; memcpy_c_e
6#include "../../../arch/x86/lib/memcpy_64.S" 4#include "../../../arch/x86/lib/memcpy_64.S"
7/* 5/*
8 * We need to provide note.GNU-stack section, saying that we want 6 * We need to provide note.GNU-stack section, saying that we want
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
index db1d3a29d97f..d3dfb7936dcd 100644
--- a/tools/perf/bench/mem-memcpy.c
+++ b/tools/perf/bench/mem-memcpy.c
@@ -36,7 +36,7 @@ static const struct option options[] = {
36 "Specify length of memory to copy. " 36 "Specify length of memory to copy. "
37 "Available units: B, KB, MB, GB and TB (upper and lower)"), 37 "Available units: B, KB, MB, GB and TB (upper and lower)"),
38 OPT_STRING('r', "routine", &routine, "default", 38 OPT_STRING('r', "routine", &routine, "default",
39 "Specify routine to copy"), 39 "Specify routine to copy, \"all\" runs all available routines"),
40 OPT_INTEGER('i', "iterations", &iterations, 40 OPT_INTEGER('i', "iterations", &iterations,
41 "repeat memcpy() invocation this number of times"), 41 "repeat memcpy() invocation this number of times"),
42 OPT_BOOLEAN('c', "cycle", &use_cycle, 42 OPT_BOOLEAN('c', "cycle", &use_cycle,
@@ -135,55 +135,16 @@ struct bench_mem_info {
135 const char *const *usage; 135 const char *const *usage;
136}; 136};
137 137
138static int bench_mem_common(int argc, const char **argv, 138static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t len, double totallen)
139 const char *prefix __maybe_unused,
140 struct bench_mem_info *info)
141{ 139{
142 int i; 140 const struct routine *r = &info->routines[r_idx];
143 size_t len;
144 double totallen;
145 double result_bps[2]; 141 double result_bps[2];
146 u64 result_cycle[2]; 142 u64 result_cycle[2];
147 143
148 argc = parse_options(argc, argv, options,
149 info->usage, 0);
150
151 if (no_prefault && only_prefault) {
152 fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n");
153 return 1;
154 }
155
156 if (use_cycle)
157 init_cycle();
158
159 len = (size_t)perf_atoll((char *)length_str);
160 totallen = (double)len * iterations;
161
162 result_cycle[0] = result_cycle[1] = 0ULL; 144 result_cycle[0] = result_cycle[1] = 0ULL;
163 result_bps[0] = result_bps[1] = 0.0; 145 result_bps[0] = result_bps[1] = 0.0;
164 146
165 if ((s64)len <= 0) { 147 printf("Routine %s (%s)\n", r->name, r->desc);
166 fprintf(stderr, "Invalid length:%s\n", length_str);
167 return 1;
168 }
169
170 /* same to without specifying either of prefault and no-prefault */
171 if (only_prefault && no_prefault)
172 only_prefault = no_prefault = false;
173
174 for (i = 0; info->routines[i].name; i++) {
175 if (!strcmp(info->routines[i].name, routine))
176 break;
177 }
178 if (!info->routines[i].name) {
179 printf("Unknown routine:%s\n", routine);
180 printf("Available routines...\n");
181 for (i = 0; info->routines[i].name; i++) {
182 printf("\t%s ... %s\n",
183 info->routines[i].name, info->routines[i].desc);
184 }
185 return 1;
186 }
187 148
188 if (bench_format == BENCH_FORMAT_DEFAULT) 149 if (bench_format == BENCH_FORMAT_DEFAULT)
189 printf("# Copying %s Bytes ...\n\n", length_str); 150 printf("# Copying %s Bytes ...\n\n", length_str);
@@ -191,28 +152,17 @@ static int bench_mem_common(int argc, const char **argv,
191 if (!only_prefault && !no_prefault) { 152 if (!only_prefault && !no_prefault) {
192 /* show both of results */ 153 /* show both of results */
193 if (use_cycle) { 154 if (use_cycle) {
194 result_cycle[0] = 155 result_cycle[0] = info->do_cycle(r, len, false);
195 info->do_cycle(&info->routines[i], len, false); 156 result_cycle[1] = info->do_cycle(r, len, true);
196 result_cycle[1] =
197 info->do_cycle(&info->routines[i], len, true);
198 } else { 157 } else {
199 result_bps[0] = 158 result_bps[0] = info->do_gettimeofday(r, len, false);
200 info->do_gettimeofday(&info->routines[i], 159 result_bps[1] = info->do_gettimeofday(r, len, true);
201 len, false);
202 result_bps[1] =
203 info->do_gettimeofday(&info->routines[i],
204 len, true);
205 } 160 }
206 } else { 161 } else {
207 if (use_cycle) { 162 if (use_cycle)
208 result_cycle[pf] = 163 result_cycle[pf] = info->do_cycle(r, len, only_prefault);
209 info->do_cycle(&info->routines[i], 164 else
210 len, only_prefault); 165 result_bps[pf] = info->do_gettimeofday(r, len, only_prefault);
211 } else {
212 result_bps[pf] =
213 info->do_gettimeofday(&info->routines[i],
214 len, only_prefault);
215 }
216 } 166 }
217 167
218 switch (bench_format) { 168 switch (bench_format) {
@@ -265,6 +215,60 @@ static int bench_mem_common(int argc, const char **argv,
265 die("unknown format: %d\n", bench_format); 215 die("unknown format: %d\n", bench_format);
266 break; 216 break;
267 } 217 }
218}
219
220static int bench_mem_common(int argc, const char **argv,
221 const char *prefix __maybe_unused,
222 struct bench_mem_info *info)
223{
224 int i;
225 size_t len;
226 double totallen;
227
228 argc = parse_options(argc, argv, options,
229 info->usage, 0);
230
231 if (no_prefault && only_prefault) {
232 fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n");
233 return 1;
234 }
235
236 if (use_cycle)
237 init_cycle();
238
239 len = (size_t)perf_atoll((char *)length_str);
240 totallen = (double)len * iterations;
241
242 if ((s64)len <= 0) {
243 fprintf(stderr, "Invalid length:%s\n", length_str);
244 return 1;
245 }
246
247 /* same to without specifying either of prefault and no-prefault */
248 if (only_prefault && no_prefault)
249 only_prefault = no_prefault = false;
250
251 if (!strncmp(routine, "all", 3)) {
252 for (i = 0; info->routines[i].name; i++)
253 __bench_mem_routine(info, i, len, totallen);
254 return 0;
255 }
256
257 for (i = 0; info->routines[i].name; i++) {
258 if (!strcmp(info->routines[i].name, routine))
259 break;
260 }
261 if (!info->routines[i].name) {
262 printf("Unknown routine:%s\n", routine);
263 printf("Available routines...\n");
264 for (i = 0; info->routines[i].name; i++) {
265 printf("\t%s ... %s\n",
266 info->routines[i].name, info->routines[i].desc);
267 }
268 return 1;
269 }
270
271 __bench_mem_routine(info, i, len, totallen);
268 272
269 return 0; 273 return 0;
270} 274}
diff --git a/tools/perf/bench/mem-memset-x86-64-asm-def.h b/tools/perf/bench/mem-memset-x86-64-asm-def.h
index a71dff97c1f5..f02d028771d9 100644
--- a/tools/perf/bench/mem-memset-x86-64-asm-def.h
+++ b/tools/perf/bench/mem-memset-x86-64-asm-def.h
@@ -1,12 +1,12 @@
1 1
2MEMSET_FN(__memset, 2MEMSET_FN(memset_orig,
3 "x86-64-unrolled", 3 "x86-64-unrolled",
4 "unrolled memset() in arch/x86/lib/memset_64.S") 4 "unrolled memset() in arch/x86/lib/memset_64.S")
5 5
6MEMSET_FN(memset_c, 6MEMSET_FN(__memset,
7 "x86-64-stosq", 7 "x86-64-stosq",
8 "movsq-based memset() in arch/x86/lib/memset_64.S") 8 "movsq-based memset() in arch/x86/lib/memset_64.S")
9 9
10MEMSET_FN(memset_c_e, 10MEMSET_FN(memset_erms,
11 "x86-64-stosb", 11 "x86-64-stosb",
12 "movsb-based memset() in arch/x86/lib/memset_64.S") 12 "movsb-based memset() in arch/x86/lib/memset_64.S")
diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S
index 9e5af89ed13a..de278784c866 100644
--- a/tools/perf/bench/mem-memset-x86-64-asm.S
+++ b/tools/perf/bench/mem-memset-x86-64-asm.S
@@ -1,8 +1,6 @@
1#define memset MEMSET /* don't hide glibc's memset() */ 1#define memset MEMSET /* don't hide glibc's memset() */
2#define altinstr_replacement text 2#define altinstr_replacement text
3#define globl p2align 4; .globl 3#define globl p2align 4; .globl
4#define Lmemset_c globl memset_c; memset_c
5#define Lmemset_c_e globl memset_c_e; memset_c_e
6#include "../../../arch/x86/lib/memset_64.S" 4#include "../../../arch/x86/lib/memset_64.S"
7 5
8/* 6/*
diff --git a/tools/perf/util/include/asm/alternative-asm.h b/tools/perf/util/include/asm/alternative-asm.h
index 6789d788d494..3a3a0f16456a 100644
--- a/tools/perf/util/include/asm/alternative-asm.h
+++ b/tools/perf/util/include/asm/alternative-asm.h
@@ -4,5 +4,6 @@
4/* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */ 4/* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */
5 5
6#define altinstruction_entry # 6#define altinstruction_entry #
7#define ALTERNATIVE_2 #
7 8
8#endif 9#endif