aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-05-19 20:36:08 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-05-19 20:36:08 -0400
commitdf48d8716eab9608fe93924e4ae06ff110e8674f (patch)
tree0fe10733a414b3651e1dae29518b7960a4da0aa4 /arch
parentacd30250d7d0f495685d1c7c6184636a22fcdf7f (diff)
parent29510ec3b626c86de9707bb8904ff940d430289b (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (107 commits) perf stat: Add more cache-miss percentage printouts perf stat: Add -d -d and -d -d -d options to show more CPU events ftrace/kbuild: Add recordmcount files to force full build ftrace: Add self-tests for multiple function trace users ftrace: Modify ftrace_set_filter/notrace to take ops ftrace: Allow dynamically allocated function tracers ftrace: Implement separate user function filtering ftrace: Free hash with call_rcu_sched() ftrace: Have global_ops store the functions that are to be traced ftrace: Add ops parameter to ftrace_startup/shutdown functions ftrace: Add enabled_functions file ftrace: Use counters to enable functions to trace ftrace: Separate hash allocation and assignment ftrace: Create a global_ops to hold the filter and notrace hashes ftrace: Use hash instead for FTRACE_FL_FILTER ftrace: Replace FTRACE_FL_NOTRACE flag with a hash of ignored functions perf bench, x86: Add alternatives-asm.h wrapper x86, 64-bit: Fix copy_[to/from]_user() checks for the userspace address limit x86, mem: memset_64.S: Optimize memset by enhanced REP MOVSB/STOSB x86, mem: memmove_64.S: Optimize memmove by enhanced REP MOVSB/STOSB ...
Diffstat (limited to 'arch')
-rw-r--r--arch/mips/include/asm/jump_label.h22
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/s390/include/asm/ftrace.h4
-rw-r--r--arch/s390/include/asm/jump_label.h37
-rw-r--r--arch/s390/kernel/Makefile2
-rw-r--r--arch/s390/kernel/jump_label.c59
-rw-r--r--arch/sparc/include/asm/jump_label.h25
-rw-r--r--arch/x86/include/asm/alternative-asm.h9
-rw-r--r--arch/x86/include/asm/alternative.h3
-rw-r--r--arch/x86/include/asm/cpufeature.h1
-rw-r--r--arch/x86/include/asm/ftrace.h7
-rw-r--r--arch/x86/include/asm/jump_label.h27
-rw-r--r--arch/x86/include/asm/setup.h2
-rw-r--r--arch/x86/include/asm/stacktrace.h3
-rw-r--r--arch/x86/include/asm/uaccess.h2
-rw-r--r--arch/x86/kernel/alternative.c11
-rw-r--r--arch/x86/kernel/cpu/common.c3
-rw-r--r--arch/x86/kernel/cpu/intel.c19
-rw-r--r--arch/x86/kernel/cpu/perf_event.c28
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c14
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c37
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c7
-rw-r--r--arch/x86/kernel/dumpstack.c16
-rw-r--r--arch/x86/kernel/module.c1
-rw-r--r--arch/x86/kernel/stacktrace.c13
-rw-r--r--arch/x86/lib/clear_page_64.S33
-rw-r--r--arch/x86/lib/copy_user_64.S69
-rw-r--r--arch/x86/lib/memcpy_64.S45
-rw-r--r--arch/x86/lib/memmove_64.S29
-rw-r--r--arch/x86/lib/memset_64.S54
-rw-r--r--arch/x86/oprofile/backtrace.c13
31 files changed, 416 insertions, 180 deletions
diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h
index 7622ccf75076..1881b316ca45 100644
--- a/arch/mips/include/asm/jump_label.h
+++ b/arch/mips/include/asm/jump_label.h
@@ -20,16 +20,18 @@
20#define WORD_INSN ".word" 20#define WORD_INSN ".word"
21#endif 21#endif
22 22
23#define JUMP_LABEL(key, label) \ 23static __always_inline bool arch_static_branch(struct jump_label_key *key)
24 do { \ 24{
25 asm goto("1:\tnop\n\t" \ 25 asm goto("1:\tnop\n\t"
26 "nop\n\t" \ 26 "nop\n\t"
27 ".pushsection __jump_table, \"a\"\n\t" \ 27 ".pushsection __jump_table, \"aw\"\n\t"
28 WORD_INSN " 1b, %l[" #label "], %0\n\t" \ 28 WORD_INSN " 1b, %l[l_yes], %0\n\t"
29 ".popsection\n\t" \ 29 ".popsection\n\t"
30 : : "i" (key) : : label); \ 30 : : "i" (key) : : l_yes);
31 } while (0) 31 return false;
32 32l_yes:
33 return true;
34}
33 35
34#endif /* __KERNEL__ */ 36#endif /* __KERNEL__ */
35 37
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 2508a6f31588..4a7f14079e03 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -88,6 +88,7 @@ config S390
88 select HAVE_KERNEL_XZ 88 select HAVE_KERNEL_XZ
89 select HAVE_GET_USER_PAGES_FAST 89 select HAVE_GET_USER_PAGES_FAST
90 select HAVE_ARCH_MUTEX_CPU_RELAX 90 select HAVE_ARCH_MUTEX_CPU_RELAX
91 select HAVE_ARCH_JUMP_LABEL if !MARCH_G5
91 select ARCH_INLINE_SPIN_TRYLOCK 92 select ARCH_INLINE_SPIN_TRYLOCK
92 select ARCH_INLINE_SPIN_TRYLOCK_BH 93 select ARCH_INLINE_SPIN_TRYLOCK_BH
93 select ARCH_INLINE_SPIN_LOCK 94 select ARCH_INLINE_SPIN_LOCK
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index 3c29be4836ed..b7931faaef6d 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -11,15 +11,13 @@ struct dyn_arch_ftrace { };
11 11
12#ifdef CONFIG_64BIT 12#ifdef CONFIG_64BIT
13#define MCOUNT_INSN_SIZE 12 13#define MCOUNT_INSN_SIZE 12
14#define MCOUNT_OFFSET 8
15#else 14#else
16#define MCOUNT_INSN_SIZE 20 15#define MCOUNT_INSN_SIZE 20
17#define MCOUNT_OFFSET 4
18#endif 16#endif
19 17
20static inline unsigned long ftrace_call_adjust(unsigned long addr) 18static inline unsigned long ftrace_call_adjust(unsigned long addr)
21{ 19{
22 return addr - MCOUNT_OFFSET; 20 return addr;
23} 21}
24 22
25#endif /* __ASSEMBLY__ */ 23#endif /* __ASSEMBLY__ */
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
new file mode 100644
index 000000000000..95a6cf2b5b67
--- /dev/null
+++ b/arch/s390/include/asm/jump_label.h
@@ -0,0 +1,37 @@
1#ifndef _ASM_S390_JUMP_LABEL_H
2#define _ASM_S390_JUMP_LABEL_H
3
4#include <linux/types.h>
5
6#define JUMP_LABEL_NOP_SIZE 6
7
8#ifdef CONFIG_64BIT
9#define ASM_PTR ".quad"
10#define ASM_ALIGN ".balign 8"
11#else
12#define ASM_PTR ".long"
13#define ASM_ALIGN ".balign 4"
14#endif
15
16static __always_inline bool arch_static_branch(struct jump_label_key *key)
17{
18 asm goto("0: brcl 0,0\n"
19 ".pushsection __jump_table, \"aw\"\n"
20 ASM_ALIGN "\n"
21 ASM_PTR " 0b, %l[label], %0\n"
22 ".popsection\n"
23 : : "X" (key) : : label);
24 return false;
25label:
26 return true;
27}
28
29typedef unsigned long jump_label_t;
30
31struct jump_entry {
32 jump_label_t code;
33 jump_label_t target;
34 jump_label_t key;
35};
36
37#endif
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 64230bc392fa..5ff15dacb571 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -23,7 +23,7 @@ CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w
23obj-y := bitmap.o traps.o time.o process.o base.o early.o setup.o \ 23obj-y := bitmap.o traps.o time.o process.o base.o early.o setup.o \
24 processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o \ 24 processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o \
25 s390_ext.o debug.o irq.o ipl.o dis.o diag.o mem_detect.o \ 25 s390_ext.o debug.o irq.o ipl.o dis.o diag.o mem_detect.o \
26 vdso.o vtime.o sysinfo.o nmi.o sclp.o 26 vdso.o vtime.o sysinfo.o nmi.o sclp.o jump_label.o
27 27
28obj-y += $(if $(CONFIG_64BIT),entry64.o,entry.o) 28obj-y += $(if $(CONFIG_64BIT),entry64.o,entry.o)
29obj-y += $(if $(CONFIG_64BIT),reipl64.o,reipl.o) 29obj-y += $(if $(CONFIG_64BIT),reipl64.o,reipl.o)
diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c
new file mode 100644
index 000000000000..44cc06bedf77
--- /dev/null
+++ b/arch/s390/kernel/jump_label.c
@@ -0,0 +1,59 @@
1/*
2 * Jump label s390 support
3 *
4 * Copyright IBM Corp. 2011
5 * Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
6 */
7#include <linux/module.h>
8#include <linux/uaccess.h>
9#include <linux/stop_machine.h>
10#include <linux/jump_label.h>
11#include <asm/ipl.h>
12
13#ifdef HAVE_JUMP_LABEL
14
15struct insn {
16 u16 opcode;
17 s32 offset;
18} __packed;
19
20struct insn_args {
21 unsigned long *target;
22 struct insn *insn;
23 ssize_t size;
24};
25
26static int __arch_jump_label_transform(void *data)
27{
28 struct insn_args *args = data;
29 int rc;
30
31 rc = probe_kernel_write(args->target, args->insn, args->size);
32 WARN_ON_ONCE(rc < 0);
33 return 0;
34}
35
36void arch_jump_label_transform(struct jump_entry *entry,
37 enum jump_label_type type)
38{
39 struct insn_args args;
40 struct insn insn;
41
42 if (type == JUMP_LABEL_ENABLE) {
43 /* brcl 15,offset */
44 insn.opcode = 0xc0f4;
45 insn.offset = (entry->target - entry->code) >> 1;
46 } else {
47 /* brcl 0,0 */
48 insn.opcode = 0xc004;
49 insn.offset = 0;
50 }
51
52 args.target = (void *) entry->code;
53 args.insn = &insn;
54 args.size = JUMP_LABEL_NOP_SIZE;
55
56 stop_machine(__arch_jump_label_transform, &args, NULL);
57}
58
59#endif
diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h
index 427d4684e0d2..fc73a82366f8 100644
--- a/arch/sparc/include/asm/jump_label.h
+++ b/arch/sparc/include/asm/jump_label.h
@@ -7,17 +7,20 @@
7 7
8#define JUMP_LABEL_NOP_SIZE 4 8#define JUMP_LABEL_NOP_SIZE 4
9 9
10#define JUMP_LABEL(key, label) \ 10static __always_inline bool arch_static_branch(struct jump_label_key *key)
11 do { \ 11{
12 asm goto("1:\n\t" \ 12 asm goto("1:\n\t"
13 "nop\n\t" \ 13 "nop\n\t"
14 "nop\n\t" \ 14 "nop\n\t"
15 ".pushsection __jump_table, \"a\"\n\t"\ 15 ".pushsection __jump_table, \"aw\"\n\t"
16 ".align 4\n\t" \ 16 ".align 4\n\t"
17 ".word 1b, %l[" #label "], %c0\n\t" \ 17 ".word 1b, %l[l_yes], %c0\n\t"
18 ".popsection \n\t" \ 18 ".popsection \n\t"
19 : : "i" (key) : : label);\ 19 : : "i" (key) : : l_yes);
20 } while (0) 20 return false;
21l_yes:
22 return true;
23}
21 24
22#endif /* __KERNEL__ */ 25#endif /* __KERNEL__ */
23 26
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index a63a68be1cce..94d420b360d1 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -15,4 +15,13 @@
15 .endm 15 .endm
16#endif 16#endif
17 17
18.macro altinstruction_entry orig alt feature orig_len alt_len
19 .align 8
20 .quad \orig
21 .quad \alt
22 .word \feature
23 .byte \orig_len
24 .byte \alt_len
25.endm
26
18#endif /* __ASSEMBLY__ */ 27#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 13009d1af99a..8cdd1e247975 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -4,7 +4,6 @@
4#include <linux/types.h> 4#include <linux/types.h>
5#include <linux/stddef.h> 5#include <linux/stddef.h>
6#include <linux/stringify.h> 6#include <linux/stringify.h>
7#include <linux/jump_label.h>
8#include <asm/asm.h> 7#include <asm/asm.h>
9 8
10/* 9/*
@@ -191,7 +190,7 @@ extern void *text_poke(void *addr, const void *opcode, size_t len);
191extern void *text_poke_smp(void *addr, const void *opcode, size_t len); 190extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
192extern void text_poke_smp_batch(struct text_poke_param *params, int n); 191extern void text_poke_smp_batch(struct text_poke_param *params, int n);
193 192
194#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) 193#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_JUMP_LABEL)
195#define IDEAL_NOP_SIZE_5 5 194#define IDEAL_NOP_SIZE_5 5
196extern unsigned char ideal_nop5[IDEAL_NOP_SIZE_5]; 195extern unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
197extern void arch_init_ideal_nop5(void); 196extern void arch_init_ideal_nop5(void);
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 91f3e087cf21..7f2f7b123293 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -195,6 +195,7 @@
195 195
196/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ 196/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
197#define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ 197#define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
198#define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */
198 199
199#if defined(__KERNEL__) && !defined(__ASSEMBLY__) 200#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
200 201
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index db24c2278be0..268c783ab1c0 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -38,11 +38,10 @@ extern void mcount(void);
38static inline unsigned long ftrace_call_adjust(unsigned long addr) 38static inline unsigned long ftrace_call_adjust(unsigned long addr)
39{ 39{
40 /* 40 /*
41 * call mcount is "e8 <4 byte offset>" 41 * addr is the address of the mcount call instruction.
42 * The addr points to the 4 byte offset and the caller of this 42 * recordmcount does the necessary offset calculation.
43 * function wants the pointer to e8. Simply subtract one.
44 */ 43 */
45 return addr - 1; 44 return addr;
46} 45}
47 46
48#ifdef CONFIG_DYNAMIC_FTRACE 47#ifdef CONFIG_DYNAMIC_FTRACE
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 574dbc22893a..a32b18ce6ead 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -5,20 +5,25 @@
5 5
6#include <linux/types.h> 6#include <linux/types.h>
7#include <asm/nops.h> 7#include <asm/nops.h>
8#include <asm/asm.h>
8 9
9#define JUMP_LABEL_NOP_SIZE 5 10#define JUMP_LABEL_NOP_SIZE 5
10 11
11# define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t" 12#define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
12 13
13# define JUMP_LABEL(key, label) \ 14static __always_inline bool arch_static_branch(struct jump_label_key *key)
14 do { \ 15{
15 asm goto("1:" \ 16 asm goto("1:"
16 JUMP_LABEL_INITIAL_NOP \ 17 JUMP_LABEL_INITIAL_NOP
17 ".pushsection __jump_table, \"aw\" \n\t"\ 18 ".pushsection __jump_table, \"aw\" \n\t"
18 _ASM_PTR "1b, %l[" #label "], %c0 \n\t" \ 19 _ASM_ALIGN "\n\t"
19 ".popsection \n\t" \ 20 _ASM_PTR "1b, %l[l_yes], %c0 \n\t"
20 : : "i" (key) : : label); \ 21 ".popsection \n\t"
21 } while (0) 22 : : "i" (key) : : l_yes);
23 return false;
24l_yes:
25 return true;
26}
22 27
23#endif /* __KERNEL__ */ 28#endif /* __KERNEL__ */
24 29
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index db8aa19a08a2..647d8a06ce4f 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -88,7 +88,7 @@ void *extend_brk(size_t size, size_t align);
88 * executable.) 88 * executable.)
89 */ 89 */
90#define RESERVE_BRK(name,sz) \ 90#define RESERVE_BRK(name,sz) \
91 static void __section(.discard.text) __used \ 91 static void __section(.discard.text) __used notrace \
92 __brk_reservation_fn_##name##__(void) { \ 92 __brk_reservation_fn_##name##__(void) { \
93 asm volatile ( \ 93 asm volatile ( \
94 ".pushsection .brk_reservation,\"aw\",@nobits;" \ 94 ".pushsection .brk_reservation,\"aw\",@nobits;" \
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index d7e89c83645d..70bbe39043a9 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -37,9 +37,6 @@ print_context_stack_bp(struct thread_info *tinfo,
37/* Generic stack tracer with callbacks */ 37/* Generic stack tracer with callbacks */
38 38
39struct stacktrace_ops { 39struct stacktrace_ops {
40 void (*warning)(void *data, char *msg);
41 /* msg must contain %s for the symbol */
42 void (*warning_symbol)(void *data, char *msg, unsigned long symbol);
43 void (*address)(void *data, unsigned long address, int reliable); 40 void (*address)(void *data, unsigned long address, int reliable);
44 /* On negative return stop dumping */ 41 /* On negative return stop dumping */
45 int (*stack)(void *data, char *name); 42 int (*stack)(void *data, char *name);
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index abd3e0ea762a..99f0ad753f32 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -42,7 +42,7 @@
42 * Returns 0 if the range is valid, nonzero otherwise. 42 * Returns 0 if the range is valid, nonzero otherwise.
43 * 43 *
44 * This is equivalent to the following test: 44 * This is equivalent to the following test:
45 * (u33)addr + (u33)size >= (u33)current->addr_limit.seg (u65 for x86_64) 45 * (u33)addr + (u33)size > (u33)current->addr_limit.seg (u65 for x86_64)
46 * 46 *
47 * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry... 47 * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry...
48 */ 48 */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 4a234677e213..1eeeafcb4410 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -210,6 +210,15 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
210 u8 insnbuf[MAX_PATCH_LEN]; 210 u8 insnbuf[MAX_PATCH_LEN];
211 211
212 DPRINTK("%s: alt table %p -> %p\n", __func__, start, end); 212 DPRINTK("%s: alt table %p -> %p\n", __func__, start, end);
213 /*
214 * The scan order should be from start to end. A later scanned
215 * alternative code can overwrite a previous scanned alternative code.
216 * Some kernel functions (e.g. memcpy, memset, etc) use this order to
217 * patch code.
218 *
219 * So be careful if you want to change the scan order to any other
220 * order.
221 */
213 for (a = start; a < end; a++) { 222 for (a = start; a < end; a++) {
214 u8 *instr = a->instr; 223 u8 *instr = a->instr;
215 BUG_ON(a->replacementlen > a->instrlen); 224 BUG_ON(a->replacementlen > a->instrlen);
@@ -679,7 +688,7 @@ void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
679 __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); 688 __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
680} 689}
681 690
682#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) 691#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_JUMP_LABEL)
683 692
684#ifdef CONFIG_X86_64 693#ifdef CONFIG_X86_64
685unsigned char ideal_nop5[5] = { 0x66, 0x66, 0x66, 0x66, 0x90 }; 694unsigned char ideal_nop5[5] = { 0x66, 0x66, 0x66, 0x66, 0x90 };
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index e2ced0074a45..173f3a3fa1a6 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -565,8 +565,7 @@ void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
565 565
566 cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); 566 cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx);
567 567
568 if (eax > 0) 568 c->x86_capability[9] = ebx;
569 c->x86_capability[9] = ebx;
570 } 569 }
571 570
572 /* AMD-defined flags: level 0x80000001 */ 571 /* AMD-defined flags: level 0x80000001 */
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index df86bc8c859d..fc73a34ba8c9 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -29,10 +29,10 @@
29 29
30static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) 30static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
31{ 31{
32 u64 misc_enable;
33
32 /* Unmask CPUID levels if masked: */ 34 /* Unmask CPUID levels if masked: */
33 if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { 35 if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
34 u64 misc_enable;
35
36 rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); 36 rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
37 37
38 if (misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID) { 38 if (misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID) {
@@ -118,8 +118,6 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
118 * (model 2) with the same problem. 118 * (model 2) with the same problem.
119 */ 119 */
120 if (c->x86 == 15) { 120 if (c->x86 == 15) {
121 u64 misc_enable;
122
123 rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); 121 rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
124 122
125 if (misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING) { 123 if (misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING) {
@@ -130,6 +128,19 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
130 } 128 }
131 } 129 }
132#endif 130#endif
131
132 /*
133 * If fast string is not enabled in IA32_MISC_ENABLE for any reason,
134 * clear the fast string and enhanced fast string CPU capabilities.
135 */
136 if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
137 rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
138 if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) {
139 printk(KERN_INFO "Disabled fast string operations\n");
140 setup_clear_cpu_cap(X86_FEATURE_REP_GOOD);
141 setup_clear_cpu_cap(X86_FEATURE_ERMS);
142 }
143 }
133} 144}
134 145
135#ifdef CONFIG_X86_32 146#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index e638689279d3..3a0338b4b179 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -31,6 +31,7 @@
31#include <asm/nmi.h> 31#include <asm/nmi.h>
32#include <asm/compat.h> 32#include <asm/compat.h>
33#include <asm/smp.h> 33#include <asm/smp.h>
34#include <asm/alternative.h>
34 35
35#if 0 36#if 0
36#undef wrmsrl 37#undef wrmsrl
@@ -363,12 +364,18 @@ again:
363 return new_raw_count; 364 return new_raw_count;
364} 365}
365 366
366/* using X86_FEATURE_PERFCTR_CORE to later implement ALTERNATIVE() here */
367static inline int x86_pmu_addr_offset(int index) 367static inline int x86_pmu_addr_offset(int index)
368{ 368{
369 if (boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) 369 int offset;
370 return index << 1; 370
371 return index; 371 /* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */
372 alternative_io(ASM_NOP2,
373 "shll $1, %%eax",
374 X86_FEATURE_PERFCTR_CORE,
375 "=a" (offset),
376 "a" (index));
377
378 return offset;
372} 379}
373 380
374static inline unsigned int x86_pmu_config_addr(int index) 381static inline unsigned int x86_pmu_config_addr(int index)
@@ -1766,17 +1773,6 @@ static struct pmu pmu = {
1766 * callchain support 1773 * callchain support
1767 */ 1774 */
1768 1775
1769static void
1770backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
1771{
1772 /* Ignore warnings */
1773}
1774
1775static void backtrace_warning(void *data, char *msg)
1776{
1777 /* Ignore warnings */
1778}
1779
1780static int backtrace_stack(void *data, char *name) 1776static int backtrace_stack(void *data, char *name)
1781{ 1777{
1782 return 0; 1778 return 0;
@@ -1790,8 +1786,6 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
1790} 1786}
1791 1787
1792static const struct stacktrace_ops backtrace_ops = { 1788static const struct stacktrace_ops backtrace_ops = {
1793 .warning = backtrace_warning,
1794 .warning_symbol = backtrace_warning_symbol,
1795 .stack = backtrace_stack, 1789 .stack = backtrace_stack,
1796 .address = backtrace_address, 1790 .address = backtrace_address,
1797 .walk_stack = print_context_stack_bp, 1791 .walk_stack = print_context_stack_bp,
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index cf4e369cea67..fe29c1d2219e 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -96,12 +96,14 @@ static __initconst const u64 amd_hw_cache_event_ids
96 */ 96 */
97static const u64 amd_perfmon_event_map[] = 97static const u64 amd_perfmon_event_map[] =
98{ 98{
99 [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, 99 [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
100 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, 100 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
101 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, 101 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080,
102 [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, 102 [PERF_COUNT_HW_CACHE_MISSES] = 0x0081,
103 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, 103 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
104 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, 104 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
105 [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */
106 [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */
105}; 107};
106 108
107static u64 amd_pmu_event_map(int hw_event) 109static u64 amd_pmu_event_map(int hw_event)
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 447a28de6f09..41178c826c48 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -36,7 +36,7 @@ static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
36 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, 36 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
37}; 37};
38 38
39static struct event_constraint intel_core_event_constraints[] = 39static struct event_constraint intel_core_event_constraints[] __read_mostly =
40{ 40{
41 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ 41 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
42 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ 42 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
@@ -47,7 +47,7 @@ static struct event_constraint intel_core_event_constraints[] =
47 EVENT_CONSTRAINT_END 47 EVENT_CONSTRAINT_END
48}; 48};
49 49
50static struct event_constraint intel_core2_event_constraints[] = 50static struct event_constraint intel_core2_event_constraints[] __read_mostly =
51{ 51{
52 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 52 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
53 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 53 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
@@ -70,7 +70,7 @@ static struct event_constraint intel_core2_event_constraints[] =
70 EVENT_CONSTRAINT_END 70 EVENT_CONSTRAINT_END
71}; 71};
72 72
73static struct event_constraint intel_nehalem_event_constraints[] = 73static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
74{ 74{
75 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 75 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
76 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 76 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
@@ -86,19 +86,19 @@ static struct event_constraint intel_nehalem_event_constraints[] =
86 EVENT_CONSTRAINT_END 86 EVENT_CONSTRAINT_END
87}; 87};
88 88
89static struct extra_reg intel_nehalem_extra_regs[] = 89static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
90{ 90{
91 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), 91 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
92 EVENT_EXTRA_END 92 EVENT_EXTRA_END
93}; 93};
94 94
95static struct event_constraint intel_nehalem_percore_constraints[] = 95static struct event_constraint intel_nehalem_percore_constraints[] __read_mostly =
96{ 96{
97 INTEL_EVENT_CONSTRAINT(0xb7, 0), 97 INTEL_EVENT_CONSTRAINT(0xb7, 0),
98 EVENT_CONSTRAINT_END 98 EVENT_CONSTRAINT_END
99}; 99};
100 100
101static struct event_constraint intel_westmere_event_constraints[] = 101static struct event_constraint intel_westmere_event_constraints[] __read_mostly =
102{ 102{
103 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 103 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
104 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 104 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
@@ -110,7 +110,7 @@ static struct event_constraint intel_westmere_event_constraints[] =
110 EVENT_CONSTRAINT_END 110 EVENT_CONSTRAINT_END
111}; 111};
112 112
113static struct event_constraint intel_snb_event_constraints[] = 113static struct event_constraint intel_snb_event_constraints[] __read_mostly =
114{ 114{
115 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 115 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
116 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 116 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
@@ -123,21 +123,21 @@ static struct event_constraint intel_snb_event_constraints[] =
123 EVENT_CONSTRAINT_END 123 EVENT_CONSTRAINT_END
124}; 124};
125 125
126static struct extra_reg intel_westmere_extra_regs[] = 126static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
127{ 127{
128 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), 128 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
129 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff), 129 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff),
130 EVENT_EXTRA_END 130 EVENT_EXTRA_END
131}; 131};
132 132
133static struct event_constraint intel_westmere_percore_constraints[] = 133static struct event_constraint intel_westmere_percore_constraints[] __read_mostly =
134{ 134{
135 INTEL_EVENT_CONSTRAINT(0xb7, 0), 135 INTEL_EVENT_CONSTRAINT(0xb7, 0),
136 INTEL_EVENT_CONSTRAINT(0xbb, 0), 136 INTEL_EVENT_CONSTRAINT(0xbb, 0),
137 EVENT_CONSTRAINT_END 137 EVENT_CONSTRAINT_END
138}; 138};
139 139
140static struct event_constraint intel_gen_event_constraints[] = 140static struct event_constraint intel_gen_event_constraints[] __read_mostly =
141{ 141{
142 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 142 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
143 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 143 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
@@ -1440,6 +1440,11 @@ static __init int intel_pmu_init(void)
1440 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 1440 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
1441 x86_pmu.extra_regs = intel_nehalem_extra_regs; 1441 x86_pmu.extra_regs = intel_nehalem_extra_regs;
1442 1442
1443 /* UOPS_ISSUED.STALLED_CYCLES */
1444 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
1445 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
1446 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
1447
1443 if (ebx & 0x40) { 1448 if (ebx & 0x40) {
1444 /* 1449 /*
1445 * Erratum AAJ80 detected, we work it around by using 1450 * Erratum AAJ80 detected, we work it around by using
@@ -1480,6 +1485,12 @@ static __init int intel_pmu_init(void)
1480 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 1485 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
1481 x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; 1486 x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
1482 x86_pmu.extra_regs = intel_westmere_extra_regs; 1487 x86_pmu.extra_regs = intel_westmere_extra_regs;
1488
1489 /* UOPS_ISSUED.STALLED_CYCLES */
1490 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
1491 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
1492 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
1493
1483 pr_cont("Westmere events, "); 1494 pr_cont("Westmere events, ");
1484 break; 1495 break;
1485 1496
@@ -1491,6 +1502,12 @@ static __init int intel_pmu_init(void)
1491 1502
1492 x86_pmu.event_constraints = intel_snb_event_constraints; 1503 x86_pmu.event_constraints = intel_snb_event_constraints;
1493 x86_pmu.pebs_constraints = intel_snb_pebs_events; 1504 x86_pmu.pebs_constraints = intel_snb_pebs_events;
1505
1506 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
1507 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
1508 /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/
1509 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x18001b1;
1510
1494 pr_cont("SandyBridge events, "); 1511 pr_cont("SandyBridge events, ");
1495 break; 1512 break;
1496 1513
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index e93fcd55fae1..ead584fb6a7d 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -468,7 +468,7 @@ static struct p4_event_bind p4_event_bind_map[] = {
468 .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED), 468 .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED),
469 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, 469 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
470 .escr_emask = 470 .escr_emask =
471 P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS), 471 P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS),
472 .cntr = { {12, 13, 16}, {14, 15, 17} }, 472 .cntr = { {12, 13, 16}, {14, 15, 17} },
473 }, 473 },
474 [P4_EVENT_X87_ASSIST] = { 474 [P4_EVENT_X87_ASSIST] = {
@@ -912,8 +912,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
912 int idx, handled = 0; 912 int idx, handled = 0;
913 u64 val; 913 u64 val;
914 914
915 data.addr = 0; 915 perf_sample_data_init(&data, 0);
916 data.raw = NULL;
917 916
918 cpuc = &__get_cpu_var(cpu_hw_events); 917 cpuc = &__get_cpu_var(cpu_hw_events);
919 918
@@ -1197,7 +1196,7 @@ static __init int p4_pmu_init(void)
1197{ 1196{
1198 unsigned int low, high; 1197 unsigned int low, high;
1199 1198
1200 /* If we get stripped -- indexig fails */ 1199 /* If we get stripped -- indexing fails */
1201 BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC); 1200 BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC);
1202 1201
1203 rdmsr(MSR_IA32_MISC_ENABLE, low, high); 1202 rdmsr(MSR_IA32_MISC_ENABLE, low, high);
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index e2a3f0606da4..f478ff6877ef 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -135,20 +135,6 @@ print_context_stack_bp(struct thread_info *tinfo,
135} 135}
136EXPORT_SYMBOL_GPL(print_context_stack_bp); 136EXPORT_SYMBOL_GPL(print_context_stack_bp);
137 137
138
139static void
140print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
141{
142 printk(data);
143 print_symbol(msg, symbol);
144 printk("\n");
145}
146
147static void print_trace_warning(void *data, char *msg)
148{
149 printk("%s%s\n", (char *)data, msg);
150}
151
152static int print_trace_stack(void *data, char *name) 138static int print_trace_stack(void *data, char *name)
153{ 139{
154 printk("%s <%s> ", (char *)data, name); 140 printk("%s <%s> ", (char *)data, name);
@@ -166,8 +152,6 @@ static void print_trace_address(void *data, unsigned long addr, int reliable)
166} 152}
167 153
168static const struct stacktrace_ops print_trace_ops = { 154static const struct stacktrace_ops print_trace_ops = {
169 .warning = print_trace_warning,
170 .warning_symbol = print_trace_warning_symbol,
171 .stack = print_trace_stack, 155 .stack = print_trace_stack,
172 .address = print_trace_address, 156 .address = print_trace_address,
173 .walk_stack = print_context_stack, 157 .walk_stack = print_context_stack,
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index ab23f1ad4bf1..52f256f2cc81 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -24,6 +24,7 @@
24#include <linux/bug.h> 24#include <linux/bug.h>
25#include <linux/mm.h> 25#include <linux/mm.h>
26#include <linux/gfp.h> 26#include <linux/gfp.h>
27#include <linux/jump_label.h>
27 28
28#include <asm/system.h> 29#include <asm/system.h>
29#include <asm/page.h> 30#include <asm/page.h>
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 6515733a289d..55d9bc03f696 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -9,15 +9,6 @@
9#include <linux/uaccess.h> 9#include <linux/uaccess.h>
10#include <asm/stacktrace.h> 10#include <asm/stacktrace.h>
11 11
12static void save_stack_warning(void *data, char *msg)
13{
14}
15
16static void
17save_stack_warning_symbol(void *data, char *msg, unsigned long symbol)
18{
19}
20
21static int save_stack_stack(void *data, char *name) 12static int save_stack_stack(void *data, char *name)
22{ 13{
23 return 0; 14 return 0;
@@ -53,16 +44,12 @@ save_stack_address_nosched(void *data, unsigned long addr, int reliable)
53} 44}
54 45
55static const struct stacktrace_ops save_stack_ops = { 46static const struct stacktrace_ops save_stack_ops = {
56 .warning = save_stack_warning,
57 .warning_symbol = save_stack_warning_symbol,
58 .stack = save_stack_stack, 47 .stack = save_stack_stack,
59 .address = save_stack_address, 48 .address = save_stack_address,
60 .walk_stack = print_context_stack, 49 .walk_stack = print_context_stack,
61}; 50};
62 51
63static const struct stacktrace_ops save_stack_ops_nosched = { 52static const struct stacktrace_ops save_stack_ops_nosched = {
64 .warning = save_stack_warning,
65 .warning_symbol = save_stack_warning_symbol,
66 .stack = save_stack_stack, 53 .stack = save_stack_stack,
67 .address = save_stack_address_nosched, 54 .address = save_stack_address_nosched,
68 .walk_stack = print_context_stack, 55 .walk_stack = print_context_stack,
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
index aa4326bfb24a..f2145cfa12a6 100644
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -1,5 +1,6 @@
1#include <linux/linkage.h> 1#include <linux/linkage.h>
2#include <asm/dwarf2.h> 2#include <asm/dwarf2.h>
3#include <asm/alternative-asm.h>
3 4
4/* 5/*
5 * Zero a page. 6 * Zero a page.
@@ -14,6 +15,15 @@ ENTRY(clear_page_c)
14 CFI_ENDPROC 15 CFI_ENDPROC
15ENDPROC(clear_page_c) 16ENDPROC(clear_page_c)
16 17
18ENTRY(clear_page_c_e)
19 CFI_STARTPROC
20 movl $4096,%ecx
21 xorl %eax,%eax
22 rep stosb
23 ret
24 CFI_ENDPROC
25ENDPROC(clear_page_c_e)
26
17ENTRY(clear_page) 27ENTRY(clear_page)
18 CFI_STARTPROC 28 CFI_STARTPROC
19 xorl %eax,%eax 29 xorl %eax,%eax
@@ -38,21 +48,26 @@ ENTRY(clear_page)
38.Lclear_page_end: 48.Lclear_page_end:
39ENDPROC(clear_page) 49ENDPROC(clear_page)
40 50
41 /* Some CPUs run faster using the string instructions. 51 /*
42 It is also a lot simpler. Use this when possible */ 52 * Some CPUs support enhanced REP MOVSB/STOSB instructions.
53 * It is recommended to use this when possible.
54 * If enhanced REP MOVSB/STOSB is not available, try to use fast string.
55 * Otherwise, use original function.
56 *
57 */
43 58
44#include <asm/cpufeature.h> 59#include <asm/cpufeature.h>
45 60
46 .section .altinstr_replacement,"ax" 61 .section .altinstr_replacement,"ax"
471: .byte 0xeb /* jmp <disp8> */ 621: .byte 0xeb /* jmp <disp8> */
48 .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */ 63 .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */
492: 642: .byte 0xeb /* jmp <disp8> */
65 .byte (clear_page_c_e - clear_page) - (3f - 2b) /* offset */
663:
50 .previous 67 .previous
51 .section .altinstructions,"a" 68 .section .altinstructions,"a"
52 .align 8 69 altinstruction_entry clear_page,1b,X86_FEATURE_REP_GOOD,\
53 .quad clear_page 70 .Lclear_page_end-clear_page, 2b-1b
54 .quad 1b 71 altinstruction_entry clear_page,2b,X86_FEATURE_ERMS, \
55 .word X86_FEATURE_REP_GOOD 72 .Lclear_page_end-clear_page,3b-2b
56 .byte .Lclear_page_end - clear_page
57 .byte 2b - 1b
58 .previous 73 .previous
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 99e482615195..024840266ba0 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -15,23 +15,30 @@
15#include <asm/asm-offsets.h> 15#include <asm/asm-offsets.h>
16#include <asm/thread_info.h> 16#include <asm/thread_info.h>
17#include <asm/cpufeature.h> 17#include <asm/cpufeature.h>
18#include <asm/alternative-asm.h>
18 19
19 .macro ALTERNATIVE_JUMP feature,orig,alt 20/*
21 * By placing feature2 after feature1 in altinstructions section, we logically
22 * implement:
23 * If CPU has feature2, jmp to alt2 is used
24 * else if CPU has feature1, jmp to alt1 is used
25 * else jmp to orig is used.
26 */
27 .macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2
200: 280:
21 .byte 0xe9 /* 32bit jump */ 29 .byte 0xe9 /* 32bit jump */
22 .long \orig-1f /* by default jump to orig */ 30 .long \orig-1f /* by default jump to orig */
231: 311:
24 .section .altinstr_replacement,"ax" 32 .section .altinstr_replacement,"ax"
252: .byte 0xe9 /* near jump with 32bit immediate */ 332: .byte 0xe9 /* near jump with 32bit immediate */
26 .long \alt-1b /* offset */ /* or alternatively to alt */ 34 .long \alt1-1b /* offset */ /* or alternatively to alt1 */
353: .byte 0xe9 /* near jump with 32bit immediate */
36 .long \alt2-1b /* offset */ /* or alternatively to alt2 */
27 .previous 37 .previous
38
28 .section .altinstructions,"a" 39 .section .altinstructions,"a"
29 .align 8 40 altinstruction_entry 0b,2b,\feature1,5,5
30 .quad 0b 41 altinstruction_entry 0b,3b,\feature2,5,5
31 .quad 2b
32 .word \feature /* when feature is set */
33 .byte 5
34 .byte 5
35 .previous 42 .previous
36 .endm 43 .endm
37 44
@@ -72,8 +79,10 @@ ENTRY(_copy_to_user)
72 addq %rdx,%rcx 79 addq %rdx,%rcx
73 jc bad_to_user 80 jc bad_to_user
74 cmpq TI_addr_limit(%rax),%rcx 81 cmpq TI_addr_limit(%rax),%rcx
75 jae bad_to_user 82 ja bad_to_user
76 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string 83 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
84 copy_user_generic_unrolled,copy_user_generic_string, \
85 copy_user_enhanced_fast_string
77 CFI_ENDPROC 86 CFI_ENDPROC
78ENDPROC(_copy_to_user) 87ENDPROC(_copy_to_user)
79 88
@@ -85,8 +94,10 @@ ENTRY(_copy_from_user)
85 addq %rdx,%rcx 94 addq %rdx,%rcx
86 jc bad_from_user 95 jc bad_from_user
87 cmpq TI_addr_limit(%rax),%rcx 96 cmpq TI_addr_limit(%rax),%rcx
88 jae bad_from_user 97 ja bad_from_user
89 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string 98 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
99 copy_user_generic_unrolled,copy_user_generic_string, \
100 copy_user_enhanced_fast_string
90 CFI_ENDPROC 101 CFI_ENDPROC
91ENDPROC(_copy_from_user) 102ENDPROC(_copy_from_user)
92 103
@@ -255,3 +266,37 @@ ENTRY(copy_user_generic_string)
255 .previous 266 .previous
256 CFI_ENDPROC 267 CFI_ENDPROC
257ENDPROC(copy_user_generic_string) 268ENDPROC(copy_user_generic_string)
269
270/*
271 * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
272 * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
273 *
274 * Input:
275 * rdi destination
276 * rsi source
277 * rdx count
278 *
279 * Output:
280 * eax uncopied bytes or 0 if successful.
281 */
282ENTRY(copy_user_enhanced_fast_string)
283 CFI_STARTPROC
284 andl %edx,%edx
285 jz 2f
286 movl %edx,%ecx
2871: rep
288 movsb
2892: xorl %eax,%eax
290 ret
291
292 .section .fixup,"ax"
29312: movl %ecx,%edx /* ecx is zerorest also */
294 jmp copy_user_handle_tail
295 .previous
296
297 .section __ex_table,"a"
298 .align 8
299 .quad 1b,12b
300 .previous
301 CFI_ENDPROC
302ENDPROC(copy_user_enhanced_fast_string)
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 75ef61e35e38..daab21dae2d1 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -4,6 +4,7 @@
4 4
5#include <asm/cpufeature.h> 5#include <asm/cpufeature.h>
6#include <asm/dwarf2.h> 6#include <asm/dwarf2.h>
7#include <asm/alternative-asm.h>
7 8
8/* 9/*
9 * memcpy - Copy a memory block. 10 * memcpy - Copy a memory block.
@@ -37,6 +38,23 @@
37.Lmemcpy_e: 38.Lmemcpy_e:
38 .previous 39 .previous
39 40
41/*
42 * memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than
43 * memcpy_c. Use memcpy_c_e when possible.
44 *
45 * This gets patched over the unrolled variant (below) via the
46 * alternative instructions framework:
47 */
48 .section .altinstr_replacement, "ax", @progbits
49.Lmemcpy_c_e:
50 movq %rdi, %rax
51
52 movl %edx, %ecx
53 rep movsb
54 ret
55.Lmemcpy_e_e:
56 .previous
57
40ENTRY(__memcpy) 58ENTRY(__memcpy)
41ENTRY(memcpy) 59ENTRY(memcpy)
42 CFI_STARTPROC 60 CFI_STARTPROC
@@ -171,21 +189,22 @@ ENDPROC(memcpy)
171ENDPROC(__memcpy) 189ENDPROC(__memcpy)
172 190
173 /* 191 /*
174 * Some CPUs run faster using the string copy instructions. 192 * Some CPUs are adding enhanced REP MOVSB/STOSB feature
175 * It is also a lot simpler. Use this when possible: 193 * If the feature is supported, memcpy_c_e() is the first choice.
176 */ 194 * If enhanced rep movsb copy is not available, use fast string copy
177 195 * memcpy_c() when possible. This is faster and code is simpler than
178 .section .altinstructions, "a" 196 * original memcpy().
179 .align 8 197 * Otherwise, original memcpy() is used.
180 .quad memcpy 198 * In .altinstructions section, ERMS feature is placed after REG_GOOD
181 .quad .Lmemcpy_c 199 * feature to implement the right patch order.
182 .word X86_FEATURE_REP_GOOD 200 *
183
184 /*
185 * Replace only beginning, memcpy is used to apply alternatives, 201 * Replace only beginning, memcpy is used to apply alternatives,
186 * so it is silly to overwrite itself with nops - reboot is the 202 * so it is silly to overwrite itself with nops - reboot is the
187 * only outcome... 203 * only outcome...
188 */ 204 */
189 .byte .Lmemcpy_e - .Lmemcpy_c 205 .section .altinstructions, "a"
190 .byte .Lmemcpy_e - .Lmemcpy_c 206 altinstruction_entry memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\
207 .Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c
208 altinstruction_entry memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \
209 .Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e
191 .previous 210 .previous
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
index 0ecb8433e5a8..d0ec9c2936d7 100644
--- a/arch/x86/lib/memmove_64.S
+++ b/arch/x86/lib/memmove_64.S
@@ -8,6 +8,7 @@
8#define _STRING_C 8#define _STRING_C
9#include <linux/linkage.h> 9#include <linux/linkage.h>
10#include <asm/dwarf2.h> 10#include <asm/dwarf2.h>
11#include <asm/cpufeature.h>
11 12
12#undef memmove 13#undef memmove
13 14
@@ -24,6 +25,7 @@
24 */ 25 */
25ENTRY(memmove) 26ENTRY(memmove)
26 CFI_STARTPROC 27 CFI_STARTPROC
28
27 /* Handle more 32bytes in loop */ 29 /* Handle more 32bytes in loop */
28 mov %rdi, %rax 30 mov %rdi, %rax
29 cmp $0x20, %rdx 31 cmp $0x20, %rdx
@@ -31,8 +33,13 @@ ENTRY(memmove)
31 33
32 /* Decide forward/backward copy mode */ 34 /* Decide forward/backward copy mode */
33 cmp %rdi, %rsi 35 cmp %rdi, %rsi
34 jb 2f 36 jge .Lmemmove_begin_forward
37 mov %rsi, %r8
38 add %rdx, %r8
39 cmp %rdi, %r8
40 jg 2f
35 41
42.Lmemmove_begin_forward:
36 /* 43 /*
37 * movsq instruction have many startup latency 44 * movsq instruction have many startup latency
38 * so we handle small size by general register. 45 * so we handle small size by general register.
@@ -78,6 +85,8 @@ ENTRY(memmove)
78 rep movsq 85 rep movsq
79 movq %r11, (%r10) 86 movq %r11, (%r10)
80 jmp 13f 87 jmp 13f
88.Lmemmove_end_forward:
89
81 /* 90 /*
82 * Handle data backward by movsq. 91 * Handle data backward by movsq.
83 */ 92 */
@@ -194,4 +203,22 @@ ENTRY(memmove)
19413: 20313:
195 retq 204 retq
196 CFI_ENDPROC 205 CFI_ENDPROC
206
207 .section .altinstr_replacement,"ax"
208.Lmemmove_begin_forward_efs:
209 /* Forward moving data. */
210 movq %rdx, %rcx
211 rep movsb
212 retq
213.Lmemmove_end_forward_efs:
214 .previous
215
216 .section .altinstructions,"a"
217 .align 8
218 .quad .Lmemmove_begin_forward
219 .quad .Lmemmove_begin_forward_efs
220 .word X86_FEATURE_ERMS
221 .byte .Lmemmove_end_forward-.Lmemmove_begin_forward
222 .byte .Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs
223 .previous
197ENDPROC(memmove) 224ENDPROC(memmove)
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 09d344269652..79bd454b78a3 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -2,9 +2,13 @@
2 2
3#include <linux/linkage.h> 3#include <linux/linkage.h>
4#include <asm/dwarf2.h> 4#include <asm/dwarf2.h>
5#include <asm/cpufeature.h>
6#include <asm/alternative-asm.h>
5 7
6/* 8/*
7 * ISO C memset - set a memory block to a byte value. 9 * ISO C memset - set a memory block to a byte value. This function uses fast
10 * string to get better performance than the original function. The code is
11 * simpler and shorter than the orignal function as well.
8 * 12 *
9 * rdi destination 13 * rdi destination
10 * rsi value (char) 14 * rsi value (char)
@@ -31,6 +35,28 @@
31.Lmemset_e: 35.Lmemset_e:
32 .previous 36 .previous
33 37
38/*
39 * ISO C memset - set a memory block to a byte value. This function uses
40 * enhanced rep stosb to override the fast string function.
41 * The code is simpler and shorter than the fast string function as well.
42 *
43 * rdi destination
44 * rsi value (char)
45 * rdx count (bytes)
46 *
47 * rax original destination
48 */
49 .section .altinstr_replacement, "ax", @progbits
50.Lmemset_c_e:
51 movq %rdi,%r9
52 movb %sil,%al
53 movl %edx,%ecx
54 rep stosb
55 movq %r9,%rax
56 ret
57.Lmemset_e_e:
58 .previous
59
34ENTRY(memset) 60ENTRY(memset)
35ENTRY(__memset) 61ENTRY(__memset)
36 CFI_STARTPROC 62 CFI_STARTPROC
@@ -112,16 +138,20 @@ ENTRY(__memset)
112ENDPROC(memset) 138ENDPROC(memset)
113ENDPROC(__memset) 139ENDPROC(__memset)
114 140
115 /* Some CPUs run faster using the string instructions. 141 /* Some CPUs support enhanced REP MOVSB/STOSB feature.
116 It is also a lot simpler. Use this when possible */ 142 * It is recommended to use this when possible.
117 143 *
118#include <asm/cpufeature.h> 144 * If enhanced REP MOVSB/STOSB feature is not available, use fast string
119 145 * instructions.
146 *
147 * Otherwise, use original memset function.
148 *
149 * In .altinstructions section, ERMS feature is placed after REG_GOOD
150 * feature to implement the right patch order.
151 */
120 .section .altinstructions,"a" 152 .section .altinstructions,"a"
121 .align 8 153 altinstruction_entry memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\
122 .quad memset 154 .Lfinal-memset,.Lmemset_e-.Lmemset_c
123 .quad .Lmemset_c 155 altinstruction_entry memset,.Lmemset_c_e,X86_FEATURE_ERMS, \
124 .word X86_FEATURE_REP_GOOD 156 .Lfinal-memset,.Lmemset_e_e-.Lmemset_c_e
125 .byte .Lfinal - memset
126 .byte .Lmemset_e - .Lmemset_c
127 .previous 157 .previous
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index 2d49d4e19a36..a5b64ab4cd6e 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -16,17 +16,6 @@
16#include <asm/stacktrace.h> 16#include <asm/stacktrace.h>
17#include <linux/compat.h> 17#include <linux/compat.h>
18 18
19static void backtrace_warning_symbol(void *data, char *msg,
20 unsigned long symbol)
21{
22 /* Ignore warnings */
23}
24
25static void backtrace_warning(void *data, char *msg)
26{
27 /* Ignore warnings */
28}
29
30static int backtrace_stack(void *data, char *name) 19static int backtrace_stack(void *data, char *name)
31{ 20{
32 /* Yes, we want all stacks */ 21 /* Yes, we want all stacks */
@@ -42,8 +31,6 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
42} 31}
43 32
44static struct stacktrace_ops backtrace_ops = { 33static struct stacktrace_ops backtrace_ops = {
45 .warning = backtrace_warning,
46 .warning_symbol = backtrace_warning_symbol,
47 .stack = backtrace_stack, 34 .stack = backtrace_stack,
48 .address = backtrace_address, 35 .address = backtrace_address,
49 .walk_stack = print_context_stack, 36 .walk_stack = print_context_stack,