aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2012-08-21 05:27:00 -0400
committerIngo Molnar <mingo@kernel.org>2012-08-21 05:27:00 -0400
commitbcada3d4b8c96b8792c2306f363992ca5ab9da42 (patch)
treee420679a5db6ea4e1694eef57f9abb6acac8d4d3 /arch/x86/kernel
parent26198c21d1b286a084fe5d514a30bc7e6c712a34 (diff)
parent000078bc3ee69efb1124b8478c7527389a826074 (diff)
Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: * Fix include order for bison/flex-generated C files, from Ben Hutchings * Build fixes and documentation corrections from David Ahern * Group parsing support, from Jiri Olsa * UI/gtk refactorings and improvements from Namhyung Kim * NULL deref fix for perf script, from Namhyung Kim * Assorted cleanups from Robert Richter * Let O= makes handle relative paths, from Steven Rostedt * perf script python fixes, from Feng Tang. * Improve 'perf lock' error message when the needed tracepoints are not present, from David Ahern. * Initial bash completion support, from Frederic Weisbecker * Allow building without libelf, from Namhyung Kim. * Support DWARF CFI based unwind to have callchains when %bp based unwinding is not possible, from Jiri Olsa. * Symbol resolution fixes, while fixing support PPC64 files with an .opt ELF section was the end goal, several fixes for code that handles all architectures and cleanups are included, from Cody Schafer. * Add a description for the JIT interface, from Andi Kleen. * Assorted fixes for Documentation and build in 32 bit, from Robert Richter * Add support for non-tracepoint events in perf script python, from Feng Tang * Cache the libtraceevent event_format associated to each evsel early, so that we avoid relookups, i.e. calling pevent_find_event repeatedly when processing tracepoint events. [ This is to reduce the surface contact with libtraceevents and make clear what is that the perf tools needs from that lib: so far parsing the common and per event fields. ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/acpi/sleep.c4
-rw-r--r--arch/x86/kernel/acpi/sleep.h2
-rw-r--r--arch/x86/kernel/acpi/wakeup_32.S4
-rw-r--r--arch/x86/kernel/acpi/wakeup_64.S4
-rw-r--r--arch/x86/kernel/alternative.c2
-rw-r--r--arch/x86/kernel/amd_nb.c1
-rw-r--r--arch/x86/kernel/apic/apic.c23
-rw-r--r--arch/x86/kernel/apic/io_apic.c4
-rw-r--r--arch/x86/kernel/cpu/Makefile2
-rw-r--r--arch/x86/kernel/cpu/common.c31
-rw-r--r--arch/x86/kernel/cpu/cpu.h9
-rw-r--r--arch/x86/kernel/cpu/hypervisor.c3
-rw-r--r--arch/x86/kernel/cpu/intel.c176
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c7
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c53
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c286
-rw-r--r--arch/x86/kernel/cpu/perf_event.c89
-rw-r--r--arch/x86/kernel/cpu/perf_event.h20
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_ibs.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c7
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.h2
-rw-r--r--arch/x86/kernel/cpu/sched.c55
-rw-r--r--arch/x86/kernel/e820.c2
-rw-r--r--arch/x86/kernel/entry_64.S18
-rw-r--r--arch/x86/kernel/irq.c1
-rw-r--r--arch/x86/kernel/irqinit.c73
-rw-r--r--arch/x86/kernel/kdebugfs.c6
-rw-r--r--arch/x86/kernel/kvm.c64
-rw-r--r--arch/x86/kernel/module.c2
-rw-r--r--arch/x86/kernel/pci-dma.c11
-rw-r--r--arch/x86/kernel/perf_regs.c105
-rw-r--r--arch/x86/kernel/quirks.c2
-rw-r--r--arch/x86/kernel/setup_percpu.c2
-rw-r--r--arch/x86/kernel/smpboot.c8
35 files changed, 727 insertions, 357 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 8215e5652d97..8d7a619718b5 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -100,6 +100,8 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
100obj-$(CONFIG_OF) += devicetree.o 100obj-$(CONFIG_OF) += devicetree.o
101obj-$(CONFIG_UPROBES) += uprobes.o 101obj-$(CONFIG_UPROBES) += uprobes.o
102 102
103obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
104
103### 105###
104# 64 bit specific files 106# 64 bit specific files
105ifeq ($(CONFIG_X86_64),y) 107ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 95bf99de9058..1b8e5a03d942 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -25,10 +25,6 @@ unsigned long acpi_realmode_flags;
25static char temp_stack[4096]; 25static char temp_stack[4096];
26#endif 26#endif
27 27
28asmlinkage void acpi_enter_s3(void)
29{
30 acpi_enter_sleep_state(3, wake_sleep_flags);
31}
32/** 28/**
33 * acpi_suspend_lowlevel - save kernel state 29 * acpi_suspend_lowlevel - save kernel state
34 * 30 *
diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h
index 5653a5791ec9..67f59f8c6956 100644
--- a/arch/x86/kernel/acpi/sleep.h
+++ b/arch/x86/kernel/acpi/sleep.h
@@ -2,7 +2,6 @@
2 * Variables and functions used by the code in sleep.c 2 * Variables and functions used by the code in sleep.c
3 */ 3 */
4 4
5#include <linux/linkage.h>
6#include <asm/realmode.h> 5#include <asm/realmode.h>
7 6
8extern unsigned long saved_video_mode; 7extern unsigned long saved_video_mode;
@@ -11,7 +10,6 @@ extern long saved_magic;
11extern int wakeup_pmode_return; 10extern int wakeup_pmode_return;
12 11
13extern u8 wake_sleep_flags; 12extern u8 wake_sleep_flags;
14extern asmlinkage void acpi_enter_s3(void);
15 13
16extern unsigned long acpi_copy_wakeup_routine(unsigned long); 14extern unsigned long acpi_copy_wakeup_routine(unsigned long);
17extern void wakeup_long64(void); 15extern void wakeup_long64(void);
diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S
index 72610839f03b..13ab720573e3 100644
--- a/arch/x86/kernel/acpi/wakeup_32.S
+++ b/arch/x86/kernel/acpi/wakeup_32.S
@@ -74,7 +74,9 @@ restore_registers:
74ENTRY(do_suspend_lowlevel) 74ENTRY(do_suspend_lowlevel)
75 call save_processor_state 75 call save_processor_state
76 call save_registers 76 call save_registers
77 call acpi_enter_s3 77 pushl $3
78 call acpi_enter_sleep_state
79 addl $4, %esp
78 80
79# In case of S3 failure, we'll emerge here. Jump 81# In case of S3 failure, we'll emerge here. Jump
80# to ret_point to recover 82# to ret_point to recover
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index 014d1d28c397..8ea5164cbd04 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -71,7 +71,9 @@ ENTRY(do_suspend_lowlevel)
71 movq %rsi, saved_rsi 71 movq %rsi, saved_rsi
72 72
73 addq $8, %rsp 73 addq $8, %rsp
74 call acpi_enter_s3 74 movl $3, %edi
75 xorl %eax, %eax
76 call acpi_enter_sleep_state
75 /* in case something went wrong, restore the machine status and go on */ 77 /* in case something went wrong, restore the machine status and go on */
76 jmp resume_point 78 jmp resume_point
77 79
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 931280ff8299..afb7ff79a29f 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -224,7 +224,7 @@ void __init arch_init_ideal_nops(void)
224 ideal_nops = intel_nops; 224 ideal_nops = intel_nops;
225#endif 225#endif
226 } 226 }
227 227 break;
228 default: 228 default:
229#ifdef CONFIG_X86_64 229#ifdef CONFIG_X86_64
230 ideal_nops = k8_nops; 230 ideal_nops = k8_nops;
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index f29f6dd6bc08..aadf3359e2a7 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -19,6 +19,7 @@ const struct pci_device_id amd_nb_misc_ids[] = {
19 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, 19 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
20 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, 20 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
21 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) }, 21 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) },
22 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M10H_F3) },
22 {} 23 {}
23}; 24};
24EXPORT_SYMBOL(amd_nb_misc_ids); 25EXPORT_SYMBOL(amd_nb_misc_ids);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index c421512ca5eb..24deb3082328 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -75,8 +75,8 @@ physid_mask_t phys_cpu_present_map;
75/* 75/*
76 * Map cpu index to physical APIC ID 76 * Map cpu index to physical APIC ID
77 */ 77 */
78DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); 78DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID);
79DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); 79DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid, BAD_APICID);
80EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); 80EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
81EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); 81EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
82 82
@@ -88,7 +88,7 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
88 * used for the mapping. This is where the behaviors of x86_64 and 32 88 * used for the mapping. This is where the behaviors of x86_64 and 32
89 * actually diverge. Let's keep it ugly for now. 89 * actually diverge. Let's keep it ugly for now.
90 */ 90 */
91DEFINE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid, BAD_APICID); 91DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID);
92 92
93/* 93/*
94 * Knob to control our willingness to enable the local APIC. 94 * Knob to control our willingness to enable the local APIC.
@@ -2143,6 +2143,23 @@ int default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
2143} 2143}
2144 2144
2145/* 2145/*
2146 * Override the generic EOI implementation with an optimized version.
2147 * Only called during early boot when only one CPU is active and with
2148 * interrupts disabled, so we know this does not race with actual APIC driver
2149 * use.
2150 */
2151void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v))
2152{
2153 struct apic **drv;
2154
2155 for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
2156 /* Should happen once for each apic */
2157 WARN_ON((*drv)->eoi_write == eoi_write);
2158 (*drv)->eoi_write = eoi_write;
2159 }
2160}
2161
2162/*
2146 * Power management 2163 * Power management
2147 */ 2164 */
2148#ifdef CONFIG_PM 2165#ifdef CONFIG_PM
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 406eee784684..a6c64aaddf9a 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1204,7 +1204,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
1204 BUG_ON(!cfg->vector); 1204 BUG_ON(!cfg->vector);
1205 1205
1206 vector = cfg->vector; 1206 vector = cfg->vector;
1207 for_each_cpu(cpu, cfg->domain) 1207 for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
1208 per_cpu(vector_irq, cpu)[vector] = -1; 1208 per_cpu(vector_irq, cpu)[vector] = -1;
1209 1209
1210 cfg->vector = 0; 1210 cfg->vector = 0;
@@ -1212,7 +1212,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
1212 1212
1213 if (likely(!cfg->move_in_progress)) 1213 if (likely(!cfg->move_in_progress))
1214 return; 1214 return;
1215 for_each_cpu(cpu, cfg->old_domain) { 1215 for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
1216 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; 1216 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
1217 vector++) { 1217 vector++) {
1218 if (per_cpu(vector_irq, cpu)[vector] != irq) 1218 if (per_cpu(vector_irq, cpu)[vector] != irq)
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index bac4c3804cc7..d30a6a9a0121 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -14,7 +14,7 @@ CFLAGS_common.o := $(nostackp)
14 14
15obj-y := intel_cacheinfo.o scattered.o topology.o 15obj-y := intel_cacheinfo.o scattered.o topology.o
16obj-y += proc.o capflags.o powerflags.o common.o 16obj-y += proc.o capflags.o powerflags.o common.o
17obj-y += vmware.o hypervisor.o sched.o mshyperv.o 17obj-y += vmware.o hypervisor.o mshyperv.o
18obj-y += rdrand.o 18obj-y += rdrand.o
19obj-y += match.o 19obj-y += match.o
20 20
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 5bbc082c47ad..46d8786d655e 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -452,6 +452,35 @@ void __cpuinit cpu_detect_cache_sizes(struct cpuinfo_x86 *c)
452 c->x86_cache_size = l2size; 452 c->x86_cache_size = l2size;
453} 453}
454 454
455u16 __read_mostly tlb_lli_4k[NR_INFO];
456u16 __read_mostly tlb_lli_2m[NR_INFO];
457u16 __read_mostly tlb_lli_4m[NR_INFO];
458u16 __read_mostly tlb_lld_4k[NR_INFO];
459u16 __read_mostly tlb_lld_2m[NR_INFO];
460u16 __read_mostly tlb_lld_4m[NR_INFO];
461
462/*
463 * tlb_flushall_shift shows the balance point in replacing cr3 write
464 * with multiple 'invlpg'. It will do this replacement when
465 * flush_tlb_lines <= active_lines/2^tlb_flushall_shift.
466 * If tlb_flushall_shift is -1, means the replacement will be disabled.
467 */
468s8 __read_mostly tlb_flushall_shift = -1;
469
470void __cpuinit cpu_detect_tlb(struct cpuinfo_x86 *c)
471{
472 if (this_cpu->c_detect_tlb)
473 this_cpu->c_detect_tlb(c);
474
475 printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \
476 "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \
477 "tlb_flushall_shift is 0x%x\n",
478 tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES],
479 tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES],
480 tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES],
481 tlb_flushall_shift);
482}
483
455void __cpuinit detect_ht(struct cpuinfo_x86 *c) 484void __cpuinit detect_ht(struct cpuinfo_x86 *c)
456{ 485{
457#ifdef CONFIG_X86_HT 486#ifdef CONFIG_X86_HT
@@ -911,6 +940,8 @@ void __init identify_boot_cpu(void)
911#else 940#else
912 vgetcpu_set_mode(); 941 vgetcpu_set_mode();
913#endif 942#endif
943 if (boot_cpu_data.cpuid_level >= 2)
944 cpu_detect_tlb(&boot_cpu_data);
914} 945}
915 946
916void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) 947void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 8bacc7826fb3..4041c24ae7db 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -20,10 +20,19 @@ struct cpu_dev {
20 void (*c_bsp_init)(struct cpuinfo_x86 *); 20 void (*c_bsp_init)(struct cpuinfo_x86 *);
21 void (*c_init)(struct cpuinfo_x86 *); 21 void (*c_init)(struct cpuinfo_x86 *);
22 void (*c_identify)(struct cpuinfo_x86 *); 22 void (*c_identify)(struct cpuinfo_x86 *);
23 void (*c_detect_tlb)(struct cpuinfo_x86 *);
23 unsigned int (*c_size_cache)(struct cpuinfo_x86 *, unsigned int); 24 unsigned int (*c_size_cache)(struct cpuinfo_x86 *, unsigned int);
24 int c_x86_vendor; 25 int c_x86_vendor;
25}; 26};
26 27
28struct _tlb_table {
29 unsigned char descriptor;
30 char tlb_type;
31 unsigned int entries;
32 /* unsigned int ways; */
33 char info[128];
34};
35
27#define cpu_dev_register(cpu_devX) \ 36#define cpu_dev_register(cpu_devX) \
28 static const struct cpu_dev *const __cpu_dev_##cpu_devX __used \ 37 static const struct cpu_dev *const __cpu_dev_##cpu_devX __used \
29 __attribute__((__section__(".x86_cpu_dev.init"))) = \ 38 __attribute__((__section__(".x86_cpu_dev.init"))) = \
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index 755f64fb0743..a8f8fa9769d6 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -37,6 +37,9 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] =
37#endif 37#endif
38 &x86_hyper_vmware, 38 &x86_hyper_vmware,
39 &x86_hyper_ms_hyperv, 39 &x86_hyper_ms_hyperv,
40#ifdef CONFIG_KVM_GUEST
41 &x86_hyper_kvm,
42#endif
40}; 43};
41 44
42const struct hypervisor_x86 *x86_hyper; 45const struct hypervisor_x86 *x86_hyper;
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 3e6ff6cbf42a..0a4ce2980a5a 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -491,6 +491,181 @@ static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned i
491} 491}
492#endif 492#endif
493 493
494#define TLB_INST_4K 0x01
495#define TLB_INST_4M 0x02
496#define TLB_INST_2M_4M 0x03
497
498#define TLB_INST_ALL 0x05
499#define TLB_INST_1G 0x06
500
501#define TLB_DATA_4K 0x11
502#define TLB_DATA_4M 0x12
503#define TLB_DATA_2M_4M 0x13
504#define TLB_DATA_4K_4M 0x14
505
506#define TLB_DATA_1G 0x16
507
508#define TLB_DATA0_4K 0x21
509#define TLB_DATA0_4M 0x22
510#define TLB_DATA0_2M_4M 0x23
511
512#define STLB_4K 0x41
513
514static const struct _tlb_table intel_tlb_table[] __cpuinitconst = {
515 { 0x01, TLB_INST_4K, 32, " TLB_INST 4 KByte pages, 4-way set associative" },
516 { 0x02, TLB_INST_4M, 2, " TLB_INST 4 MByte pages, full associative" },
517 { 0x03, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way set associative" },
518 { 0x04, TLB_DATA_4M, 8, " TLB_DATA 4 MByte pages, 4-way set associative" },
519 { 0x05, TLB_DATA_4M, 32, " TLB_DATA 4 MByte pages, 4-way set associative" },
520 { 0x0b, TLB_INST_4M, 4, " TLB_INST 4 MByte pages, 4-way set associative" },
521 { 0x4f, TLB_INST_4K, 32, " TLB_INST 4 KByte pages */" },
522 { 0x50, TLB_INST_ALL, 64, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" },
523 { 0x51, TLB_INST_ALL, 128, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" },
524 { 0x52, TLB_INST_ALL, 256, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" },
525 { 0x55, TLB_INST_2M_4M, 7, " TLB_INST 2-MByte or 4-MByte pages, fully associative" },
526 { 0x56, TLB_DATA0_4M, 16, " TLB_DATA0 4 MByte pages, 4-way set associative" },
527 { 0x57, TLB_DATA0_4K, 16, " TLB_DATA0 4 KByte pages, 4-way associative" },
528 { 0x59, TLB_DATA0_4K, 16, " TLB_DATA0 4 KByte pages, fully associative" },
529 { 0x5a, TLB_DATA0_2M_4M, 32, " TLB_DATA0 2-MByte or 4 MByte pages, 4-way set associative" },
530 { 0x5b, TLB_DATA_4K_4M, 64, " TLB_DATA 4 KByte and 4 MByte pages" },
531 { 0x5c, TLB_DATA_4K_4M, 128, " TLB_DATA 4 KByte and 4 MByte pages" },
532 { 0x5d, TLB_DATA_4K_4M, 256, " TLB_DATA 4 KByte and 4 MByte pages" },
533 { 0xb0, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 4-way set associative" },
534 { 0xb1, TLB_INST_2M_4M, 4, " TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries" },
535 { 0xb2, TLB_INST_4K, 64, " TLB_INST 4KByte pages, 4-way set associative" },
536 { 0xb3, TLB_DATA_4K, 128, " TLB_DATA 4 KByte pages, 4-way set associative" },
537 { 0xb4, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 4-way associative" },
538 { 0xba, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way associative" },
539 { 0xc0, TLB_DATA_4K_4M, 8, " TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" },
540 { 0xca, STLB_4K, 512, " STLB 4 KByte pages, 4-way associative" },
541 { 0x00, 0, 0 }
542};
543
544static void __cpuinit intel_tlb_lookup(const unsigned char desc)
545{
546 unsigned char k;
547 if (desc == 0)
548 return;
549
550 /* look up this descriptor in the table */
551 for (k = 0; intel_tlb_table[k].descriptor != desc && \
552 intel_tlb_table[k].descriptor != 0; k++)
553 ;
554
555 if (intel_tlb_table[k].tlb_type == 0)
556 return;
557
558 switch (intel_tlb_table[k].tlb_type) {
559 case STLB_4K:
560 if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries)
561 tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries;
562 if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries)
563 tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries;
564 break;
565 case TLB_INST_ALL:
566 if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries)
567 tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries;
568 if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries)
569 tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries;
570 if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries)
571 tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries;
572 break;
573 case TLB_INST_4K:
574 if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries)
575 tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries;
576 break;
577 case TLB_INST_4M:
578 if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries)
579 tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries;
580 break;
581 case TLB_INST_2M_4M:
582 if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries)
583 tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries;
584 if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries)
585 tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries;
586 break;
587 case TLB_DATA_4K:
588 case TLB_DATA0_4K:
589 if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries)
590 tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries;
591 break;
592 case TLB_DATA_4M:
593 case TLB_DATA0_4M:
594 if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries)
595 tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries;
596 break;
597 case TLB_DATA_2M_4M:
598 case TLB_DATA0_2M_4M:
599 if (tlb_lld_2m[ENTRIES] < intel_tlb_table[k].entries)
600 tlb_lld_2m[ENTRIES] = intel_tlb_table[k].entries;
601 if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries)
602 tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries;
603 break;
604 case TLB_DATA_4K_4M:
605 if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries)
606 tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries;
607 if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries)
608 tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries;
609 break;
610 }
611}
612
613static void __cpuinit intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c)
614{
615 if (!cpu_has_invlpg) {
616 tlb_flushall_shift = -1;
617 return;
618 }
619 switch ((c->x86 << 8) + c->x86_model) {
620 case 0x60f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
621 case 0x616: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
622 case 0x617: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
623 case 0x61d: /* six-core 45 nm xeon "Dunnington" */
624 tlb_flushall_shift = -1;
625 break;
626 case 0x61a: /* 45 nm nehalem, "Bloomfield" */
627 case 0x61e: /* 45 nm nehalem, "Lynnfield" */
628 case 0x625: /* 32 nm nehalem, "Clarkdale" */
629 case 0x62c: /* 32 nm nehalem, "Gulftown" */
630 case 0x62e: /* 45 nm nehalem-ex, "Beckton" */
631 case 0x62f: /* 32 nm Xeon E7 */
632 tlb_flushall_shift = 6;
633 break;
634 case 0x62a: /* SandyBridge */
635 case 0x62d: /* SandyBridge, "Romely-EP" */
636 tlb_flushall_shift = 5;
637 break;
638 case 0x63a: /* Ivybridge */
639 tlb_flushall_shift = 1;
640 break;
641 default:
642 tlb_flushall_shift = 6;
643 }
644}
645
646static void __cpuinit intel_detect_tlb(struct cpuinfo_x86 *c)
647{
648 int i, j, n;
649 unsigned int regs[4];
650 unsigned char *desc = (unsigned char *)regs;
651 /* Number of times to iterate */
652 n = cpuid_eax(2) & 0xFF;
653
654 for (i = 0 ; i < n ; i++) {
655 cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
656
657 /* If bit 31 is set, this is an unknown format */
658 for (j = 0 ; j < 3 ; j++)
659 if (regs[j] & (1 << 31))
660 regs[j] = 0;
661
662 /* Byte 0 is level count, not a descriptor */
663 for (j = 1 ; j < 16 ; j++)
664 intel_tlb_lookup(desc[j]);
665 }
666 intel_tlb_flushall_shift_set(c);
667}
668
494static const struct cpu_dev __cpuinitconst intel_cpu_dev = { 669static const struct cpu_dev __cpuinitconst intel_cpu_dev = {
495 .c_vendor = "Intel", 670 .c_vendor = "Intel",
496 .c_ident = { "GenuineIntel" }, 671 .c_ident = { "GenuineIntel" },
@@ -546,6 +721,7 @@ static const struct cpu_dev __cpuinitconst intel_cpu_dev = {
546 }, 721 },
547 .c_size_cache = intel_size_cache, 722 .c_size_cache = intel_size_cache,
548#endif 723#endif
724 .c_detect_tlb = intel_detect_tlb,
549 .c_early_init = early_init_intel, 725 .c_early_init = early_init_intel,
550 .c_init = init_intel, 726 .c_init = init_intel,
551 .c_x86_vendor = X86_VENDOR_INTEL, 727 .c_x86_vendor = X86_VENDOR_INTEL,
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 413c2ced887c..13017626f9a8 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -55,13 +55,6 @@ static struct severity {
55#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S) 55#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
56#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR) 56#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
57#define MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV) 57#define MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV)
58#define MCACOD 0xffff
59/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */
60#define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */
61#define MCACOD_SCRUBMSK 0xfff0
62#define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */
63#define MCACOD_DATA 0x0134 /* Data Load */
64#define MCACOD_INSTR 0x0150 /* Instruction Fetch */
65 58
66 MCESEV( 59 MCESEV(
67 NO, "Invalid", 60 NO, "Invalid",
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 5a5a5dc1ff15..292d0258311c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -60,8 +60,6 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex);
60 60
61int mce_disabled __read_mostly; 61int mce_disabled __read_mostly;
62 62
63#define MISC_MCELOG_MINOR 227
64
65#define SPINUNIT 100 /* 100ns */ 63#define SPINUNIT 100 /* 100ns */
66 64
67atomic_t mce_entry; 65atomic_t mce_entry;
@@ -105,6 +103,8 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
105 103
106static DEFINE_PER_CPU(struct work_struct, mce_work); 104static DEFINE_PER_CPU(struct work_struct, mce_work);
107 105
106static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
107
108/* 108/*
109 * CPU/chipset specific EDAC code can register a notifier call here to print 109 * CPU/chipset specific EDAC code can register a notifier call here to print
110 * MCE errors in a human-readable form. 110 * MCE errors in a human-readable form.
@@ -652,14 +652,18 @@ EXPORT_SYMBOL_GPL(machine_check_poll);
652 * Do a quick check if any of the events requires a panic. 652 * Do a quick check if any of the events requires a panic.
653 * This decides if we keep the events around or clear them. 653 * This decides if we keep the events around or clear them.
654 */ 654 */
655static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp) 655static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
656 struct pt_regs *regs)
656{ 657{
657 int i, ret = 0; 658 int i, ret = 0;
658 659
659 for (i = 0; i < banks; i++) { 660 for (i = 0; i < banks; i++) {
660 m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); 661 m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
661 if (m->status & MCI_STATUS_VAL) 662 if (m->status & MCI_STATUS_VAL) {
662 __set_bit(i, validp); 663 __set_bit(i, validp);
664 if (quirk_no_way_out)
665 quirk_no_way_out(i, m, regs);
666 }
663 if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) 667 if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY)
664 ret = 1; 668 ret = 1;
665 } 669 }
@@ -1042,7 +1046,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1042 *final = m; 1046 *final = m;
1043 1047
1044 memset(valid_banks, 0, sizeof(valid_banks)); 1048 memset(valid_banks, 0, sizeof(valid_banks));
1045 no_way_out = mce_no_way_out(&m, &msg, valid_banks); 1049 no_way_out = mce_no_way_out(&m, &msg, valid_banks, regs);
1046 1050
1047 barrier(); 1051 barrier();
1048 1052
@@ -1190,6 +1194,7 @@ void mce_notify_process(void)
1190{ 1194{
1191 unsigned long pfn; 1195 unsigned long pfn;
1192 struct mce_info *mi = mce_find_info(); 1196 struct mce_info *mi = mce_find_info();
1197 int flags = MF_ACTION_REQUIRED;
1193 1198
1194 if (!mi) 1199 if (!mi)
1195 mce_panic("Lost physical address for unconsumed uncorrectable error", NULL, NULL); 1200 mce_panic("Lost physical address for unconsumed uncorrectable error", NULL, NULL);
@@ -1204,8 +1209,9 @@ void mce_notify_process(void)
1204 * doomed. We still need to mark the page as poisoned and alert any 1209 * doomed. We still need to mark the page as poisoned and alert any
1205 * other users of the page. 1210 * other users of the page.
1206 */ 1211 */
1207 if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0 || 1212 if (!mi->restartable)
1208 mi->restartable == 0) { 1213 flags |= MF_MUST_KILL;
1214 if (memory_failure(pfn, MCE_VECTOR, flags) < 0) {
1209 pr_err("Memory error not recovered"); 1215 pr_err("Memory error not recovered");
1210 force_sig(SIGBUS, current); 1216 force_sig(SIGBUS, current);
1211 } 1217 }
@@ -1418,6 +1424,34 @@ static void __mcheck_cpu_init_generic(void)
1418 } 1424 }
1419} 1425}
1420 1426
1427/*
1428 * During IFU recovery Sandy Bridge -EP4S processors set the RIPV and
1429 * EIPV bits in MCG_STATUS to zero on the affected logical processor (SDM
1430 * Vol 3B Table 15-20). But this confuses both the code that determines
1431 * whether the machine check occurred in kernel or user mode, and also
1432 * the severity assessment code. Pretend that EIPV was set, and take the
1433 * ip/cs values from the pt_regs that mce_gather_info() ignored earlier.
1434 */
1435static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
1436{
1437 if (bank != 0)
1438 return;
1439 if ((m->mcgstatus & (MCG_STATUS_EIPV|MCG_STATUS_RIPV)) != 0)
1440 return;
1441 if ((m->status & (MCI_STATUS_OVER|MCI_STATUS_UC|
1442 MCI_STATUS_EN|MCI_STATUS_MISCV|MCI_STATUS_ADDRV|
1443 MCI_STATUS_PCC|MCI_STATUS_S|MCI_STATUS_AR|
1444 MCACOD)) !=
1445 (MCI_STATUS_UC|MCI_STATUS_EN|
1446 MCI_STATUS_MISCV|MCI_STATUS_ADDRV|MCI_STATUS_S|
1447 MCI_STATUS_AR|MCACOD_INSTR))
1448 return;
1449
1450 m->mcgstatus |= MCG_STATUS_EIPV;
1451 m->ip = regs->ip;
1452 m->cs = regs->cs;
1453}
1454
1421/* Add per CPU specific workarounds here */ 1455/* Add per CPU specific workarounds here */
1422static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) 1456static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
1423{ 1457{
@@ -1515,6 +1549,9 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
1515 */ 1549 */
1516 if (c->x86 == 6 && c->x86_model <= 13 && mce_bootlog < 0) 1550 if (c->x86 == 6 && c->x86_model <= 13 && mce_bootlog < 0)
1517 mce_bootlog = 0; 1551 mce_bootlog = 0;
1552
1553 if (c->x86 == 6 && c->x86_model == 45)
1554 quirk_no_way_out = quirk_sandybridge_ifu;
1518 } 1555 }
1519 if (monarch_timeout < 0) 1556 if (monarch_timeout < 0)
1520 monarch_timeout = 0; 1557 monarch_timeout = 0;
@@ -2344,7 +2381,7 @@ static __init int mcheck_init_device(void)
2344 2381
2345 return err; 2382 return err;
2346} 2383}
2347device_initcall(mcheck_init_device); 2384device_initcall_sync(mcheck_init_device);
2348 2385
2349/* 2386/*
2350 * Old style boot options parsing. Only for compatibility. 2387 * Old style boot options parsing. Only for compatibility.
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index f4873a64f46d..c4e916d77378 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -1,15 +1,17 @@
1/* 1/*
2 * (c) 2005, 2006 Advanced Micro Devices, Inc. 2 * (c) 2005-2012 Advanced Micro Devices, Inc.
3 * Your use of this code is subject to the terms and conditions of the 3 * Your use of this code is subject to the terms and conditions of the
4 * GNU general public license version 2. See "COPYING" or 4 * GNU general public license version 2. See "COPYING" or
5 * http://www.gnu.org/licenses/gpl.html 5 * http://www.gnu.org/licenses/gpl.html
6 * 6 *
7 * Written by Jacob Shin - AMD, Inc. 7 * Written by Jacob Shin - AMD, Inc.
8 * 8 *
9 * Support : jacob.shin@amd.com 9 * Support: borislav.petkov@amd.com
10 * 10 *
11 * April 2006 11 * April 2006
12 * - added support for AMD Family 0x10 processors 12 * - added support for AMD Family 0x10 processors
13 * May 2012
14 * - major scrubbing
13 * 15 *
14 * All MC4_MISCi registers are shared between multi-cores 16 * All MC4_MISCi registers are shared between multi-cores
15 */ 17 */
@@ -25,6 +27,7 @@
25#include <linux/cpu.h> 27#include <linux/cpu.h>
26#include <linux/smp.h> 28#include <linux/smp.h>
27 29
30#include <asm/amd_nb.h>
28#include <asm/apic.h> 31#include <asm/apic.h>
29#include <asm/idle.h> 32#include <asm/idle.h>
30#include <asm/mce.h> 33#include <asm/mce.h>
@@ -45,23 +48,15 @@
45#define MASK_BLKPTR_LO 0xFF000000 48#define MASK_BLKPTR_LO 0xFF000000
46#define MCG_XBLK_ADDR 0xC0000400 49#define MCG_XBLK_ADDR 0xC0000400
47 50
48struct threshold_block { 51static const char * const th_names[] = {
49 unsigned int block; 52 "load_store",
50 unsigned int bank; 53 "insn_fetch",
51 unsigned int cpu; 54 "combined_unit",
52 u32 address; 55 "",
53 u16 interrupt_enable; 56 "northbridge",
54 bool interrupt_capable; 57 "execution_unit",
55 u16 threshold_limit;
56 struct kobject kobj;
57 struct list_head miscj;
58}; 58};
59 59
60struct threshold_bank {
61 struct kobject *kobj;
62 struct threshold_block *blocks;
63 cpumask_var_t cpus;
64};
65static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks); 60static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks);
66 61
67static unsigned char shared_bank[NR_BANKS] = { 62static unsigned char shared_bank[NR_BANKS] = {
@@ -84,6 +79,26 @@ struct thresh_restart {
84 u16 old_limit; 79 u16 old_limit;
85}; 80};
86 81
82static const char * const bank4_names(struct threshold_block *b)
83{
84 switch (b->address) {
85 /* MSR4_MISC0 */
86 case 0x00000413:
87 return "dram";
88
89 case 0xc0000408:
90 return "ht_links";
91
92 case 0xc0000409:
93 return "l3_cache";
94
95 default:
96 WARN(1, "Funny MSR: 0x%08x\n", b->address);
97 return "";
98 }
99};
100
101
87static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits) 102static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits)
88{ 103{
89 /* 104 /*
@@ -224,8 +239,6 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
224 239
225 if (!block) 240 if (!block)
226 per_cpu(bank_map, cpu) |= (1 << bank); 241 per_cpu(bank_map, cpu) |= (1 << bank);
227 if (shared_bank[bank] && c->cpu_core_id)
228 break;
229 242
230 memset(&b, 0, sizeof(b)); 243 memset(&b, 0, sizeof(b));
231 b.cpu = cpu; 244 b.cpu = cpu;
@@ -326,7 +339,7 @@ struct threshold_attr {
326#define SHOW_FIELDS(name) \ 339#define SHOW_FIELDS(name) \
327static ssize_t show_ ## name(struct threshold_block *b, char *buf) \ 340static ssize_t show_ ## name(struct threshold_block *b, char *buf) \
328{ \ 341{ \
329 return sprintf(buf, "%lx\n", (unsigned long) b->name); \ 342 return sprintf(buf, "%lu\n", (unsigned long) b->name); \
330} 343}
331SHOW_FIELDS(interrupt_enable) 344SHOW_FIELDS(interrupt_enable)
332SHOW_FIELDS(threshold_limit) 345SHOW_FIELDS(threshold_limit)
@@ -377,38 +390,21 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
377 return size; 390 return size;
378} 391}
379 392
380struct threshold_block_cross_cpu {
381 struct threshold_block *tb;
382 long retval;
383};
384
385static void local_error_count_handler(void *_tbcc)
386{
387 struct threshold_block_cross_cpu *tbcc = _tbcc;
388 struct threshold_block *b = tbcc->tb;
389 u32 low, high;
390
391 rdmsr(b->address, low, high);
392 tbcc->retval = (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit);
393}
394
395static ssize_t show_error_count(struct threshold_block *b, char *buf) 393static ssize_t show_error_count(struct threshold_block *b, char *buf)
396{ 394{
397 struct threshold_block_cross_cpu tbcc = { .tb = b, }; 395 u32 lo, hi;
398 396
399 smp_call_function_single(b->cpu, local_error_count_handler, &tbcc, 1); 397 rdmsr_on_cpu(b->cpu, b->address, &lo, &hi);
400 return sprintf(buf, "%lx\n", tbcc.retval);
401}
402
403static ssize_t store_error_count(struct threshold_block *b,
404 const char *buf, size_t count)
405{
406 struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 };
407 398
408 smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); 399 return sprintf(buf, "%u\n", ((hi & THRESHOLD_MAX) -
409 return 1; 400 (THRESHOLD_MAX - b->threshold_limit)));
410} 401}
411 402
403static struct threshold_attr error_count = {
404 .attr = {.name = __stringify(error_count), .mode = 0444 },
405 .show = show_error_count,
406};
407
412#define RW_ATTR(val) \ 408#define RW_ATTR(val) \
413static struct threshold_attr val = { \ 409static struct threshold_attr val = { \
414 .attr = {.name = __stringify(val), .mode = 0644 }, \ 410 .attr = {.name = __stringify(val), .mode = 0644 }, \
@@ -418,7 +414,6 @@ static struct threshold_attr val = { \
418 414
419RW_ATTR(interrupt_enable); 415RW_ATTR(interrupt_enable);
420RW_ATTR(threshold_limit); 416RW_ATTR(threshold_limit);
421RW_ATTR(error_count);
422 417
423static struct attribute *default_attrs[] = { 418static struct attribute *default_attrs[] = {
424 &threshold_limit.attr, 419 &threshold_limit.attr,
@@ -517,7 +512,7 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
517 512
518 err = kobject_init_and_add(&b->kobj, &threshold_ktype, 513 err = kobject_init_and_add(&b->kobj, &threshold_ktype,
519 per_cpu(threshold_banks, cpu)[bank]->kobj, 514 per_cpu(threshold_banks, cpu)[bank]->kobj,
520 "misc%i", block); 515 (bank == 4 ? bank4_names(b) : th_names[bank]));
521 if (err) 516 if (err)
522 goto out_free; 517 goto out_free;
523recurse: 518recurse:
@@ -548,98 +543,91 @@ out_free:
548 return err; 543 return err;
549} 544}
550 545
551static __cpuinit long 546static __cpuinit int __threshold_add_blocks(struct threshold_bank *b)
552local_allocate_threshold_blocks(int cpu, unsigned int bank)
553{ 547{
554 return allocate_threshold_blocks(cpu, bank, 0, 548 struct list_head *head = &b->blocks->miscj;
555 MSR_IA32_MC0_MISC + bank * 4); 549 struct threshold_block *pos = NULL;
550 struct threshold_block *tmp = NULL;
551 int err = 0;
552
553 err = kobject_add(&b->blocks->kobj, b->kobj, b->blocks->kobj.name);
554 if (err)
555 return err;
556
557 list_for_each_entry_safe(pos, tmp, head, miscj) {
558
559 err = kobject_add(&pos->kobj, b->kobj, pos->kobj.name);
560 if (err) {
561 list_for_each_entry_safe_reverse(pos, tmp, head, miscj)
562 kobject_del(&pos->kobj);
563
564 return err;
565 }
566 }
567 return err;
556} 568}
557 569
558/* symlinks sibling shared banks to first core. first core owns dir/files. */
559static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) 570static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
560{ 571{
561 int i, err = 0;
562 struct threshold_bank *b = NULL;
563 struct device *dev = per_cpu(mce_device, cpu); 572 struct device *dev = per_cpu(mce_device, cpu);
564 char name[32]; 573 struct amd_northbridge *nb = NULL;
574 struct threshold_bank *b = NULL;
575 const char *name = th_names[bank];
576 int err = 0;
565 577
566 sprintf(name, "threshold_bank%i", bank); 578 if (shared_bank[bank]) {
567 579
568#ifdef CONFIG_SMP 580 nb = node_to_amd_nb(amd_get_nb_id(cpu));
569 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ 581 WARN_ON(!nb);
570 i = cpumask_first(cpu_llc_shared_mask(cpu));
571 582
572 /* first core not up yet */ 583 /* threshold descriptor already initialized on this node? */
573 if (cpu_data(i).cpu_core_id) 584 if (nb->bank4) {
574 goto out; 585 /* yes, use it */
586 b = nb->bank4;
587 err = kobject_add(b->kobj, &dev->kobj, name);
588 if (err)
589 goto out;
575 590
576 /* already linked */ 591 per_cpu(threshold_banks, cpu)[bank] = b;
577 if (per_cpu(threshold_banks, cpu)[bank]) 592 atomic_inc(&b->cpus);
578 goto out;
579 593
580 b = per_cpu(threshold_banks, i)[bank]; 594 err = __threshold_add_blocks(b);
581 595
582 if (!b)
583 goto out; 596 goto out;
584 597 }
585 err = sysfs_create_link(&dev->kobj, b->kobj, name);
586 if (err)
587 goto out;
588
589 cpumask_copy(b->cpus, cpu_llc_shared_mask(cpu));
590 per_cpu(threshold_banks, cpu)[bank] = b;
591
592 goto out;
593 } 598 }
594#endif
595 599
596 b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL); 600 b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL);
597 if (!b) { 601 if (!b) {
598 err = -ENOMEM; 602 err = -ENOMEM;
599 goto out; 603 goto out;
600 } 604 }
601 if (!zalloc_cpumask_var(&b->cpus, GFP_KERNEL)) {
602 kfree(b);
603 err = -ENOMEM;
604 goto out;
605 }
606 605
607 b->kobj = kobject_create_and_add(name, &dev->kobj); 606 b->kobj = kobject_create_and_add(name, &dev->kobj);
608 if (!b->kobj) 607 if (!b->kobj) {
608 err = -EINVAL;
609 goto out_free; 609 goto out_free;
610 610 }
611#ifndef CONFIG_SMP
612 cpumask_setall(b->cpus);
613#else
614 cpumask_set_cpu(cpu, b->cpus);
615#endif
616 611
617 per_cpu(threshold_banks, cpu)[bank] = b; 612 per_cpu(threshold_banks, cpu)[bank] = b;
618 613
619 err = local_allocate_threshold_blocks(cpu, bank); 614 if (shared_bank[bank]) {
620 if (err) 615 atomic_set(&b->cpus, 1);
621 goto out_free;
622
623 for_each_cpu(i, b->cpus) {
624 if (i == cpu)
625 continue;
626
627 dev = per_cpu(mce_device, i);
628 if (dev)
629 err = sysfs_create_link(&dev->kobj,b->kobj, name);
630 if (err)
631 goto out;
632 616
633 per_cpu(threshold_banks, i)[bank] = b; 617 /* nb is already initialized, see above */
618 WARN_ON(nb->bank4);
619 nb->bank4 = b;
634 } 620 }
635 621
636 goto out; 622 err = allocate_threshold_blocks(cpu, bank, 0,
623 MSR_IA32_MC0_MISC + bank * 4);
624 if (!err)
625 goto out;
637 626
638out_free: 627 out_free:
639 per_cpu(threshold_banks, cpu)[bank] = NULL;
640 free_cpumask_var(b->cpus);
641 kfree(b); 628 kfree(b);
642out: 629
630 out:
643 return err; 631 return err;
644} 632}
645 633
@@ -660,12 +648,6 @@ static __cpuinit int threshold_create_device(unsigned int cpu)
660 return err; 648 return err;
661} 649}
662 650
663/*
664 * let's be hotplug friendly.
665 * in case of multiple core processors, the first core always takes ownership
666 * of shared sysfs dir/files, and rest of the cores will be symlinked to it.
667 */
668
669static void deallocate_threshold_block(unsigned int cpu, 651static void deallocate_threshold_block(unsigned int cpu,
670 unsigned int bank) 652 unsigned int bank)
671{ 653{
@@ -686,41 +668,42 @@ static void deallocate_threshold_block(unsigned int cpu,
686 per_cpu(threshold_banks, cpu)[bank]->blocks = NULL; 668 per_cpu(threshold_banks, cpu)[bank]->blocks = NULL;
687} 669}
688 670
671static void __threshold_remove_blocks(struct threshold_bank *b)
672{
673 struct threshold_block *pos = NULL;
674 struct threshold_block *tmp = NULL;
675
676 kobject_del(b->kobj);
677
678 list_for_each_entry_safe(pos, tmp, &b->blocks->miscj, miscj)
679 kobject_del(&pos->kobj);
680}
681
689static void threshold_remove_bank(unsigned int cpu, int bank) 682static void threshold_remove_bank(unsigned int cpu, int bank)
690{ 683{
684 struct amd_northbridge *nb;
691 struct threshold_bank *b; 685 struct threshold_bank *b;
692 struct device *dev;
693 char name[32];
694 int i = 0;
695 686
696 b = per_cpu(threshold_banks, cpu)[bank]; 687 b = per_cpu(threshold_banks, cpu)[bank];
697 if (!b) 688 if (!b)
698 return; 689 return;
690
699 if (!b->blocks) 691 if (!b->blocks)
700 goto free_out; 692 goto free_out;
701 693
702 sprintf(name, "threshold_bank%i", bank); 694 if (shared_bank[bank]) {
703 695 if (!atomic_dec_and_test(&b->cpus)) {
704#ifdef CONFIG_SMP 696 __threshold_remove_blocks(b);
705 /* sibling symlink */ 697 per_cpu(threshold_banks, cpu)[bank] = NULL;
706 if (shared_bank[bank] && b->blocks->cpu != cpu) { 698 return;
707 dev = per_cpu(mce_device, cpu); 699 } else {
708 sysfs_remove_link(&dev->kobj, name); 700 /*
709 per_cpu(threshold_banks, cpu)[bank] = NULL; 701 * the last CPU on this node using the shared bank is
710 702 * going away, remove that bank now.
711 return; 703 */
712 } 704 nb = node_to_amd_nb(amd_get_nb_id(cpu));
713#endif 705 nb->bank4 = NULL;
714 706 }
715 /* remove all sibling symlinks before unregistering */
716 for_each_cpu(i, b->cpus) {
717 if (i == cpu)
718 continue;
719
720 dev = per_cpu(mce_device, i);
721 if (dev)
722 sysfs_remove_link(&dev->kobj, name);
723 per_cpu(threshold_banks, i)[bank] = NULL;
724 } 707 }
725 708
726 deallocate_threshold_block(cpu, bank); 709 deallocate_threshold_block(cpu, bank);
@@ -728,7 +711,6 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
728free_out: 711free_out:
729 kobject_del(b->kobj); 712 kobject_del(b->kobj);
730 kobject_put(b->kobj); 713 kobject_put(b->kobj);
731 free_cpumask_var(b->cpus);
732 kfree(b); 714 kfree(b);
733 per_cpu(threshold_banks, cpu)[bank] = NULL; 715 per_cpu(threshold_banks, cpu)[bank] = NULL;
734} 716}
@@ -777,4 +759,24 @@ static __init int threshold_init_device(void)
777 759
778 return 0; 760 return 0;
779} 761}
780device_initcall(threshold_init_device); 762/*
763 * there are 3 funcs which need to be _initcalled in a logic sequence:
764 * 1. xen_late_init_mcelog
765 * 2. mcheck_init_device
766 * 3. threshold_init_device
767 *
768 * xen_late_init_mcelog must register xen_mce_chrdev_device before
769 * native mce_chrdev_device registration if running under xen platform;
770 *
771 * mcheck_init_device should be inited before threshold_init_device to
772 * initialize mce_device, otherwise a NULL ptr dereference will cause panic.
773 *
774 * so we use following _initcalls
775 * 1. device_initcall(xen_late_init_mcelog);
776 * 2. device_initcall_sync(mcheck_init_device);
777 * 3. late_initcall(threshold_init_device);
778 *
779 * when running under xen, the initcall order is 1,2,3;
780 * on baremetal, we skip 1 and we do only 2 and 3.
781 */
782late_initcall(threshold_init_device);
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 29557aa06dda..915b876edd1e 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -32,6 +32,8 @@
32#include <asm/smp.h> 32#include <asm/smp.h>
33#include <asm/alternative.h> 33#include <asm/alternative.h>
34#include <asm/timer.h> 34#include <asm/timer.h>
35#include <asm/desc.h>
36#include <asm/ldt.h>
35 37
36#include "perf_event.h" 38#include "perf_event.h"
37 39
@@ -1738,6 +1740,29 @@ valid_user_frame(const void __user *fp, unsigned long size)
1738 return (__range_not_ok(fp, size, TASK_SIZE) == 0); 1740 return (__range_not_ok(fp, size, TASK_SIZE) == 0);
1739} 1741}
1740 1742
1743static unsigned long get_segment_base(unsigned int segment)
1744{
1745 struct desc_struct *desc;
1746 int idx = segment >> 3;
1747
1748 if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) {
1749 if (idx > LDT_ENTRIES)
1750 return 0;
1751
1752 if (idx > current->active_mm->context.size)
1753 return 0;
1754
1755 desc = current->active_mm->context.ldt;
1756 } else {
1757 if (idx > GDT_ENTRIES)
1758 return 0;
1759
1760 desc = __this_cpu_ptr(&gdt_page.gdt[0]);
1761 }
1762
1763 return get_desc_base(desc + idx);
1764}
1765
1741#ifdef CONFIG_COMPAT 1766#ifdef CONFIG_COMPAT
1742 1767
1743#include <asm/compat.h> 1768#include <asm/compat.h>
@@ -1746,13 +1771,17 @@ static inline int
1746perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) 1771perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
1747{ 1772{
1748 /* 32-bit process in 64-bit kernel. */ 1773 /* 32-bit process in 64-bit kernel. */
1774 unsigned long ss_base, cs_base;
1749 struct stack_frame_ia32 frame; 1775 struct stack_frame_ia32 frame;
1750 const void __user *fp; 1776 const void __user *fp;
1751 1777
1752 if (!test_thread_flag(TIF_IA32)) 1778 if (!test_thread_flag(TIF_IA32))
1753 return 0; 1779 return 0;
1754 1780
1755 fp = compat_ptr(regs->bp); 1781 cs_base = get_segment_base(regs->cs);
1782 ss_base = get_segment_base(regs->ss);
1783
1784 fp = compat_ptr(ss_base + regs->bp);
1756 while (entry->nr < PERF_MAX_STACK_DEPTH) { 1785 while (entry->nr < PERF_MAX_STACK_DEPTH) {
1757 unsigned long bytes; 1786 unsigned long bytes;
1758 frame.next_frame = 0; 1787 frame.next_frame = 0;
@@ -1765,8 +1794,8 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
1765 if (!valid_user_frame(fp, sizeof(frame))) 1794 if (!valid_user_frame(fp, sizeof(frame)))
1766 break; 1795 break;
1767 1796
1768 perf_callchain_store(entry, frame.return_address); 1797 perf_callchain_store(entry, cs_base + frame.return_address);
1769 fp = compat_ptr(frame.next_frame); 1798 fp = compat_ptr(ss_base + frame.next_frame);
1770 } 1799 }
1771 return 1; 1800 return 1;
1772} 1801}
@@ -1789,6 +1818,12 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
1789 return; 1818 return;
1790 } 1819 }
1791 1820
1821 /*
1822 * We don't know what to do with VM86 stacks.. ignore them for now.
1823 */
1824 if (regs->flags & (X86_VM_MASK | PERF_EFLAGS_VM))
1825 return;
1826
1792 fp = (void __user *)regs->bp; 1827 fp = (void __user *)regs->bp;
1793 1828
1794 perf_callchain_store(entry, regs->ip); 1829 perf_callchain_store(entry, regs->ip);
@@ -1816,16 +1851,50 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
1816 } 1851 }
1817} 1852}
1818 1853
1819unsigned long perf_instruction_pointer(struct pt_regs *regs) 1854/*
1855 * Deal with code segment offsets for the various execution modes:
1856 *
1857 * VM86 - the good olde 16 bit days, where the linear address is
1858 * 20 bits and we use regs->ip + 0x10 * regs->cs.
1859 *
1860 * IA32 - Where we need to look at GDT/LDT segment descriptor tables
1861 * to figure out what the 32bit base address is.
1862 *
1863 * X32 - has TIF_X32 set, but is running in x86_64
1864 *
1865 * X86_64 - CS,DS,SS,ES are all zero based.
1866 */
1867static unsigned long code_segment_base(struct pt_regs *regs)
1820{ 1868{
1821 unsigned long ip; 1869 /*
1870 * If we are in VM86 mode, add the segment offset to convert to a
1871 * linear address.
1872 */
1873 if (regs->flags & X86_VM_MASK)
1874 return 0x10 * regs->cs;
1875
1876 /*
1877 * For IA32 we look at the GDT/LDT segment base to convert the
1878 * effective IP to a linear address.
1879 */
1880#ifdef CONFIG_X86_32
1881 if (user_mode(regs) && regs->cs != __USER_CS)
1882 return get_segment_base(regs->cs);
1883#else
1884 if (test_thread_flag(TIF_IA32)) {
1885 if (user_mode(regs) && regs->cs != __USER32_CS)
1886 return get_segment_base(regs->cs);
1887 }
1888#endif
1889 return 0;
1890}
1822 1891
1892unsigned long perf_instruction_pointer(struct pt_regs *regs)
1893{
1823 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) 1894 if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
1824 ip = perf_guest_cbs->get_guest_ip(); 1895 return perf_guest_cbs->get_guest_ip();
1825 else
1826 ip = instruction_pointer(regs);
1827 1896
1828 return ip; 1897 return regs->ip + code_segment_base(regs);
1829} 1898}
1830 1899
1831unsigned long perf_misc_flags(struct pt_regs *regs) 1900unsigned long perf_misc_flags(struct pt_regs *regs)
@@ -1838,7 +1907,7 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
1838 else 1907 else
1839 misc |= PERF_RECORD_MISC_GUEST_KERNEL; 1908 misc |= PERF_RECORD_MISC_GUEST_KERNEL;
1840 } else { 1909 } else {
1841 if (!kernel_ip(regs->ip)) 1910 if (user_mode(regs))
1842 misc |= PERF_RECORD_MISC_USER; 1911 misc |= PERF_RECORD_MISC_USER;
1843 else 1912 else
1844 misc |= PERF_RECORD_MISC_KERNEL; 1913 misc |= PERF_RECORD_MISC_KERNEL;
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 821d53b696d1..6605a81ba339 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -516,6 +516,26 @@ static inline bool kernel_ip(unsigned long ip)
516#endif 516#endif
517} 517}
518 518
519/*
520 * Not all PMUs provide the right context information to place the reported IP
521 * into full context. Specifically segment registers are typically not
522 * supplied.
523 *
524 * Assuming the address is a linear address (it is for IBS), we fake the CS and
525 * vm86 mode using the known zero-based code segment and 'fix up' the registers
526 * to reflect this.
527 *
528 * Intel PEBS/LBR appear to typically provide the effective address, nothing
529 * much we can do about that but pray and treat it like a linear address.
530 */
531static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip)
532{
533 regs->cs = kernel_ip(ip) ? __KERNEL_CS : __USER_CS;
534 if (regs->flags & X86_VM_MASK)
535 regs->flags ^= (PERF_EFLAGS_VM | X86_VM_MASK);
536 regs->ip = ip;
537}
538
519#ifdef CONFIG_CPU_SUP_AMD 539#ifdef CONFIG_CPU_SUP_AMD
520 540
521int amd_pmu_init(void); 541int amd_pmu_init(void);
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index da9bcdcd9856..7bfb5bec8630 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -13,6 +13,8 @@
13 13
14#include <asm/apic.h> 14#include <asm/apic.h>
15 15
16#include "perf_event.h"
17
16static u32 ibs_caps; 18static u32 ibs_caps;
17 19
18#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) 20#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
@@ -536,7 +538,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
536 if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) { 538 if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) {
537 regs.flags &= ~PERF_EFLAGS_EXACT; 539 regs.flags &= ~PERF_EFLAGS_EXACT;
538 } else { 540 } else {
539 instruction_pointer_set(&regs, ibs_data.regs[1]); 541 set_linear_ip(&regs, ibs_data.regs[1]);
540 regs.flags |= PERF_EFLAGS_EXACT; 542 regs.flags |= PERF_EFLAGS_EXACT;
541 } 543 }
542 544
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 629ae0b7ad90..e38d97bf4259 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -499,7 +499,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
499 * We sampled a branch insn, rewind using the LBR stack 499 * We sampled a branch insn, rewind using the LBR stack
500 */ 500 */
501 if (ip == to) { 501 if (ip == to) {
502 regs->ip = from; 502 set_linear_ip(regs, from);
503 return 1; 503 return 1;
504 } 504 }
505 505
@@ -529,7 +529,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
529 } while (to < ip); 529 } while (to < ip);
530 530
531 if (to == ip) { 531 if (to == ip) {
532 regs->ip = old_to; 532 set_linear_ip(regs, old_to);
533 return 1; 533 return 1;
534 } 534 }
535 535
@@ -569,7 +569,8 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
569 * A possible PERF_SAMPLE_REGS will have to transfer all regs. 569 * A possible PERF_SAMPLE_REGS will have to transfer all regs.
570 */ 570 */
571 regs = *iregs; 571 regs = *iregs;
572 regs.ip = pebs->ip; 572 regs.flags = pebs->flags;
573 set_linear_ip(&regs, pebs->ip);
573 regs.bp = pebs->bp; 574 regs.bp = pebs->bp;
574 regs.sp = pebs->sp; 575 regs.sp = pebs->sp;
575 576
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index f3851892e077..c9e5dc56630a 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -5,7 +5,7 @@
5#include "perf_event.h" 5#include "perf_event.h"
6 6
7#define UNCORE_PMU_NAME_LEN 32 7#define UNCORE_PMU_NAME_LEN 32
8#define UNCORE_PMU_HRTIMER_INTERVAL (60 * NSEC_PER_SEC) 8#define UNCORE_PMU_HRTIMER_INTERVAL (60LL * NSEC_PER_SEC)
9 9
10#define UNCORE_FIXED_EVENT 0xff 10#define UNCORE_FIXED_EVENT 0xff
11#define UNCORE_PMC_IDX_MAX_GENERIC 8 11#define UNCORE_PMC_IDX_MAX_GENERIC 8
diff --git a/arch/x86/kernel/cpu/sched.c b/arch/x86/kernel/cpu/sched.c
deleted file mode 100644
index a640ae5ad201..000000000000
--- a/arch/x86/kernel/cpu/sched.c
+++ /dev/null
@@ -1,55 +0,0 @@
1#include <linux/sched.h>
2#include <linux/math64.h>
3#include <linux/percpu.h>
4#include <linux/irqflags.h>
5
6#include <asm/cpufeature.h>
7#include <asm/processor.h>
8
9#ifdef CONFIG_SMP
10
11static DEFINE_PER_CPU(struct aperfmperf, old_perf_sched);
12
13static unsigned long scale_aperfmperf(void)
14{
15 struct aperfmperf val, *old = &__get_cpu_var(old_perf_sched);
16 unsigned long ratio, flags;
17
18 local_irq_save(flags);
19 get_aperfmperf(&val);
20 local_irq_restore(flags);
21
22 ratio = calc_aperfmperf_ratio(old, &val);
23 *old = val;
24
25 return ratio;
26}
27
28unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu)
29{
30 /*
31 * do aperf/mperf on the cpu level because it includes things
32 * like turbo mode, which are relevant to full cores.
33 */
34 if (boot_cpu_has(X86_FEATURE_APERFMPERF))
35 return scale_aperfmperf();
36
37 /*
38 * maybe have something cpufreq here
39 */
40
41 return default_scale_freq_power(sd, cpu);
42}
43
44unsigned long arch_scale_smt_power(struct sched_domain *sd, int cpu)
45{
46 /*
47 * aperf/mperf already includes the smt gain
48 */
49 if (boot_cpu_has(X86_FEATURE_APERFMPERF))
50 return SCHED_LOAD_SCALE;
51
52 return default_scale_smt_power(sd, cpu);
53}
54
55#endif
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 41857970517f..ed858e9e9a74 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -944,7 +944,7 @@ void __init e820_reserve_resources(void)
944 for (i = 0; i < e820_saved.nr_map; i++) { 944 for (i = 0; i < e820_saved.nr_map; i++) {
945 struct e820entry *entry = &e820_saved.map[i]; 945 struct e820entry *entry = &e820_saved.map[i];
946 firmware_map_add_early(entry->addr, 946 firmware_map_add_early(entry->addr,
947 entry->addr + entry->size - 1, 947 entry->addr + entry->size,
948 e820_type_to_string(entry->type)); 948 e820_type_to_string(entry->type));
949 } 949 }
950} 950}
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 459d4a0dca8d..b7a81dcb7366 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1124,24 +1124,6 @@ apicinterrupt LOCAL_TIMER_VECTOR \
1124apicinterrupt X86_PLATFORM_IPI_VECTOR \ 1124apicinterrupt X86_PLATFORM_IPI_VECTOR \
1125 x86_platform_ipi smp_x86_platform_ipi 1125 x86_platform_ipi smp_x86_platform_ipi
1126 1126
1127#ifdef CONFIG_SMP
1128 ALIGN
1129 INTR_FRAME
1130.irp idx,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
1131 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
1132.if NUM_INVALIDATE_TLB_VECTORS > \idx
1133ENTRY(invalidate_interrupt\idx)
1134 pushq_cfi $~(INVALIDATE_TLB_VECTOR_START+\idx)
1135 jmp .Lcommon_invalidate_interrupt0
1136 CFI_ADJUST_CFA_OFFSET -8
1137END(invalidate_interrupt\idx)
1138.endif
1139.endr
1140 CFI_ENDPROC
1141apicinterrupt INVALIDATE_TLB_VECTOR_START, \
1142 invalidate_interrupt0, smp_invalidate_interrupt
1143#endif
1144
1145apicinterrupt THRESHOLD_APIC_VECTOR \ 1127apicinterrupt THRESHOLD_APIC_VECTOR \
1146 threshold_interrupt smp_threshold_interrupt 1128 threshold_interrupt smp_threshold_interrupt
1147apicinterrupt THERMAL_APIC_VECTOR \ 1129apicinterrupt THERMAL_APIC_VECTOR \
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 1f5f1d5d2a02..7ad683d78645 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -328,6 +328,7 @@ void fixup_irqs(void)
328 chip->irq_retrigger(data); 328 chip->irq_retrigger(data);
329 raw_spin_unlock(&desc->lock); 329 raw_spin_unlock(&desc->lock);
330 } 330 }
331 __this_cpu_write(vector_irq[vector], -1);
331 } 332 }
332} 333}
333#endif 334#endif
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 252981afd6c4..6e03b0d69138 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -171,79 +171,6 @@ static void __init smp_intr_init(void)
171 */ 171 */
172 alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); 172 alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
173 173
174 /* IPIs for invalidation */
175#define ALLOC_INVTLB_VEC(NR) \
176 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+NR, \
177 invalidate_interrupt##NR)
178
179 switch (NUM_INVALIDATE_TLB_VECTORS) {
180 default:
181 ALLOC_INVTLB_VEC(31);
182 case 31:
183 ALLOC_INVTLB_VEC(30);
184 case 30:
185 ALLOC_INVTLB_VEC(29);
186 case 29:
187 ALLOC_INVTLB_VEC(28);
188 case 28:
189 ALLOC_INVTLB_VEC(27);
190 case 27:
191 ALLOC_INVTLB_VEC(26);
192 case 26:
193 ALLOC_INVTLB_VEC(25);
194 case 25:
195 ALLOC_INVTLB_VEC(24);
196 case 24:
197 ALLOC_INVTLB_VEC(23);
198 case 23:
199 ALLOC_INVTLB_VEC(22);
200 case 22:
201 ALLOC_INVTLB_VEC(21);
202 case 21:
203 ALLOC_INVTLB_VEC(20);
204 case 20:
205 ALLOC_INVTLB_VEC(19);
206 case 19:
207 ALLOC_INVTLB_VEC(18);
208 case 18:
209 ALLOC_INVTLB_VEC(17);
210 case 17:
211 ALLOC_INVTLB_VEC(16);
212 case 16:
213 ALLOC_INVTLB_VEC(15);
214 case 15:
215 ALLOC_INVTLB_VEC(14);
216 case 14:
217 ALLOC_INVTLB_VEC(13);
218 case 13:
219 ALLOC_INVTLB_VEC(12);
220 case 12:
221 ALLOC_INVTLB_VEC(11);
222 case 11:
223 ALLOC_INVTLB_VEC(10);
224 case 10:
225 ALLOC_INVTLB_VEC(9);
226 case 9:
227 ALLOC_INVTLB_VEC(8);
228 case 8:
229 ALLOC_INVTLB_VEC(7);
230 case 7:
231 ALLOC_INVTLB_VEC(6);
232 case 6:
233 ALLOC_INVTLB_VEC(5);
234 case 5:
235 ALLOC_INVTLB_VEC(4);
236 case 4:
237 ALLOC_INVTLB_VEC(3);
238 case 3:
239 ALLOC_INVTLB_VEC(2);
240 case 2:
241 ALLOC_INVTLB_VEC(1);
242 case 1:
243 ALLOC_INVTLB_VEC(0);
244 break;
245 }
246
247 /* IPI for generic function call */ 174 /* IPI for generic function call */
248 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); 175 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
249 176
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index 1d5d31ea686b..dc1404bf8e4b 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -107,7 +107,7 @@ static int __init create_setup_data_nodes(struct dentry *parent)
107{ 107{
108 struct setup_data_node *node; 108 struct setup_data_node *node;
109 struct setup_data *data; 109 struct setup_data *data;
110 int error = -ENOMEM; 110 int error;
111 struct dentry *d; 111 struct dentry *d;
112 struct page *pg; 112 struct page *pg;
113 u64 pa_data; 113 u64 pa_data;
@@ -121,8 +121,10 @@ static int __init create_setup_data_nodes(struct dentry *parent)
121 121
122 while (pa_data) { 122 while (pa_data) {
123 node = kmalloc(sizeof(*node), GFP_KERNEL); 123 node = kmalloc(sizeof(*node), GFP_KERNEL);
124 if (!node) 124 if (!node) {
125 error = -ENOMEM;
125 goto err_dir; 126 goto err_dir;
127 }
126 128
127 pg = pfn_to_page((pa_data+sizeof(*data)-1) >> PAGE_SHIFT); 129 pg = pfn_to_page((pa_data+sizeof(*data)-1) >> PAGE_SHIFT);
128 if (PageHighMem(pg)) { 130 if (PageHighMem(pg)) {
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index e554e5ad2fe8..c1d61ee4b4f1 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -39,6 +39,9 @@
39#include <asm/desc.h> 39#include <asm/desc.h>
40#include <asm/tlbflush.h> 40#include <asm/tlbflush.h>
41#include <asm/idle.h> 41#include <asm/idle.h>
42#include <asm/apic.h>
43#include <asm/apicdef.h>
44#include <asm/hypervisor.h>
42 45
43static int kvmapf = 1; 46static int kvmapf = 1;
44 47
@@ -283,6 +286,22 @@ static void kvm_register_steal_time(void)
283 cpu, __pa(st)); 286 cpu, __pa(st));
284} 287}
285 288
289static DEFINE_PER_CPU(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED;
290
291static void kvm_guest_apic_eoi_write(u32 reg, u32 val)
292{
293 /**
294 * This relies on __test_and_clear_bit to modify the memory
295 * in a way that is atomic with respect to the local CPU.
296 * The hypervisor only accesses this memory from the local CPU so
297 * there's no need for lock or memory barriers.
298 * An optimization barrier is implied in apic write.
299 */
300 if (__test_and_clear_bit(KVM_PV_EOI_BIT, &__get_cpu_var(kvm_apic_eoi)))
301 return;
302 apic_write(APIC_EOI, APIC_EOI_ACK);
303}
304
286void __cpuinit kvm_guest_cpu_init(void) 305void __cpuinit kvm_guest_cpu_init(void)
287{ 306{
288 if (!kvm_para_available()) 307 if (!kvm_para_available())
@@ -300,11 +319,20 @@ void __cpuinit kvm_guest_cpu_init(void)
300 smp_processor_id()); 319 smp_processor_id());
301 } 320 }
302 321
322 if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) {
323 unsigned long pa;
324 /* Size alignment is implied but just to make it explicit. */
325 BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4);
326 __get_cpu_var(kvm_apic_eoi) = 0;
327 pa = __pa(&__get_cpu_var(kvm_apic_eoi)) | KVM_MSR_ENABLED;
328 wrmsrl(MSR_KVM_PV_EOI_EN, pa);
329 }
330
303 if (has_steal_clock) 331 if (has_steal_clock)
304 kvm_register_steal_time(); 332 kvm_register_steal_time();
305} 333}
306 334
307static void kvm_pv_disable_apf(void *unused) 335static void kvm_pv_disable_apf(void)
308{ 336{
309 if (!__get_cpu_var(apf_reason).enabled) 337 if (!__get_cpu_var(apf_reason).enabled)
310 return; 338 return;
@@ -316,11 +344,23 @@ static void kvm_pv_disable_apf(void *unused)
316 smp_processor_id()); 344 smp_processor_id());
317} 345}
318 346
347static void kvm_pv_guest_cpu_reboot(void *unused)
348{
349 /*
350 * We disable PV EOI before we load a new kernel by kexec,
351 * since MSR_KVM_PV_EOI_EN stores a pointer into old kernel's memory.
352 * New kernel can re-enable when it boots.
353 */
354 if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
355 wrmsrl(MSR_KVM_PV_EOI_EN, 0);
356 kvm_pv_disable_apf();
357}
358
319static int kvm_pv_reboot_notify(struct notifier_block *nb, 359static int kvm_pv_reboot_notify(struct notifier_block *nb,
320 unsigned long code, void *unused) 360 unsigned long code, void *unused)
321{ 361{
322 if (code == SYS_RESTART) 362 if (code == SYS_RESTART)
323 on_each_cpu(kvm_pv_disable_apf, NULL, 1); 363 on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1);
324 return NOTIFY_DONE; 364 return NOTIFY_DONE;
325} 365}
326 366
@@ -371,7 +411,9 @@ static void __cpuinit kvm_guest_cpu_online(void *dummy)
371static void kvm_guest_cpu_offline(void *dummy) 411static void kvm_guest_cpu_offline(void *dummy)
372{ 412{
373 kvm_disable_steal_time(); 413 kvm_disable_steal_time();
374 kvm_pv_disable_apf(NULL); 414 if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
415 wrmsrl(MSR_KVM_PV_EOI_EN, 0);
416 kvm_pv_disable_apf();
375 apf_task_wake_all(); 417 apf_task_wake_all();
376} 418}
377 419
@@ -424,6 +466,9 @@ void __init kvm_guest_init(void)
424 pv_time_ops.steal_clock = kvm_steal_clock; 466 pv_time_ops.steal_clock = kvm_steal_clock;
425 } 467 }
426 468
469 if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
470 apic_set_eoi_write(kvm_guest_apic_eoi_write);
471
427#ifdef CONFIG_SMP 472#ifdef CONFIG_SMP
428 smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; 473 smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
429 register_cpu_notifier(&kvm_cpu_notifier); 474 register_cpu_notifier(&kvm_cpu_notifier);
@@ -432,6 +477,19 @@ void __init kvm_guest_init(void)
432#endif 477#endif
433} 478}
434 479
480static bool __init kvm_detect(void)
481{
482 if (!kvm_para_available())
483 return false;
484 return true;
485}
486
487const struct hypervisor_x86 x86_hyper_kvm __refconst = {
488 .name = "KVM",
489 .detect = kvm_detect,
490};
491EXPORT_SYMBOL_GPL(x86_hyper_kvm);
492
435static __init int activate_jump_labels(void) 493static __init int activate_jump_labels(void)
436{ 494{
437 if (has_steal_clock) { 495 if (has_steal_clock) {
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 202494d2ec6e..216a4d754b0c 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -81,7 +81,7 @@ int apply_relocate(Elf32_Shdr *sechdrs,
81 *location += sym->st_value; 81 *location += sym->st_value;
82 break; 82 break;
83 case R_386_PC32: 83 case R_386_PC32:
84 /* Add the value, subtract its postition */ 84 /* Add the value, subtract its position */
85 *location += sym->st_value - (uint32_t)location; 85 *location += sym->st_value - (uint32_t)location;
86 break; 86 break;
87 default: 87 default:
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index c0f420f76cd3..de2b7ad70273 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -45,15 +45,6 @@ int iommu_detected __read_mostly = 0;
45 */ 45 */
46int iommu_pass_through __read_mostly; 46int iommu_pass_through __read_mostly;
47 47
48/*
49 * Group multi-function PCI devices into a single device-group for the
50 * iommu_device_group interface. This tells the iommu driver to pretend
51 * it cannot distinguish between functions of a device, exposing only one
52 * group for the device. Useful for disallowing use of individual PCI
53 * functions from userspace drivers.
54 */
55int iommu_group_mf __read_mostly;
56
57extern struct iommu_table_entry __iommu_table[], __iommu_table_end[]; 48extern struct iommu_table_entry __iommu_table[], __iommu_table_end[];
58 49
59/* Dummy device used for NULL arguments (normally ISA). */ 50/* Dummy device used for NULL arguments (normally ISA). */
@@ -194,8 +185,6 @@ static __init int iommu_setup(char *p)
194#endif 185#endif
195 if (!strncmp(p, "pt", 2)) 186 if (!strncmp(p, "pt", 2))
196 iommu_pass_through = 1; 187 iommu_pass_through = 1;
197 if (!strncmp(p, "group_mf", 8))
198 iommu_group_mf = 1;
199 188
200 gart_parse_options(p); 189 gart_parse_options(p);
201 190
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
new file mode 100644
index 000000000000..c5a3e5cfe07f
--- /dev/null
+++ b/arch/x86/kernel/perf_regs.c
@@ -0,0 +1,105 @@
1#include <linux/errno.h>
2#include <linux/kernel.h>
3#include <linux/sched.h>
4#include <linux/perf_event.h>
5#include <linux/bug.h>
6#include <linux/stddef.h>
7#include <asm/perf_regs.h>
8#include <asm/ptrace.h>
9
10#ifdef CONFIG_X86_32
11#define PERF_REG_X86_MAX PERF_REG_X86_32_MAX
12#else
13#define PERF_REG_X86_MAX PERF_REG_X86_64_MAX
14#endif
15
16#define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r)
17
18static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = {
19 PT_REGS_OFFSET(PERF_REG_X86_AX, ax),
20 PT_REGS_OFFSET(PERF_REG_X86_BX, bx),
21 PT_REGS_OFFSET(PERF_REG_X86_CX, cx),
22 PT_REGS_OFFSET(PERF_REG_X86_DX, dx),
23 PT_REGS_OFFSET(PERF_REG_X86_SI, si),
24 PT_REGS_OFFSET(PERF_REG_X86_DI, di),
25 PT_REGS_OFFSET(PERF_REG_X86_BP, bp),
26 PT_REGS_OFFSET(PERF_REG_X86_SP, sp),
27 PT_REGS_OFFSET(PERF_REG_X86_IP, ip),
28 PT_REGS_OFFSET(PERF_REG_X86_FLAGS, flags),
29 PT_REGS_OFFSET(PERF_REG_X86_CS, cs),
30 PT_REGS_OFFSET(PERF_REG_X86_SS, ss),
31#ifdef CONFIG_X86_32
32 PT_REGS_OFFSET(PERF_REG_X86_DS, ds),
33 PT_REGS_OFFSET(PERF_REG_X86_ES, es),
34 PT_REGS_OFFSET(PERF_REG_X86_FS, fs),
35 PT_REGS_OFFSET(PERF_REG_X86_GS, gs),
36#else
37 /*
38 * The pt_regs struct does not store
39 * ds, es, fs, gs in 64 bit mode.
40 */
41 (unsigned int) -1,
42 (unsigned int) -1,
43 (unsigned int) -1,
44 (unsigned int) -1,
45#endif
46#ifdef CONFIG_X86_64
47 PT_REGS_OFFSET(PERF_REG_X86_R8, r8),
48 PT_REGS_OFFSET(PERF_REG_X86_R9, r9),
49 PT_REGS_OFFSET(PERF_REG_X86_R10, r10),
50 PT_REGS_OFFSET(PERF_REG_X86_R11, r11),
51 PT_REGS_OFFSET(PERF_REG_X86_R12, r12),
52 PT_REGS_OFFSET(PERF_REG_X86_R13, r13),
53 PT_REGS_OFFSET(PERF_REG_X86_R14, r14),
54 PT_REGS_OFFSET(PERF_REG_X86_R15, r15),
55#endif
56};
57
58u64 perf_reg_value(struct pt_regs *regs, int idx)
59{
60 if (WARN_ON_ONCE(idx > ARRAY_SIZE(pt_regs_offset)))
61 return 0;
62
63 return regs_get_register(regs, pt_regs_offset[idx]);
64}
65
66#define REG_RESERVED (~((1ULL << PERF_REG_X86_MAX) - 1ULL))
67
68#ifdef CONFIG_X86_32
69int perf_reg_validate(u64 mask)
70{
71 if (!mask || mask & REG_RESERVED)
72 return -EINVAL;
73
74 return 0;
75}
76
77u64 perf_reg_abi(struct task_struct *task)
78{
79 return PERF_SAMPLE_REGS_ABI_32;
80}
81#else /* CONFIG_X86_64 */
82#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \
83 (1ULL << PERF_REG_X86_ES) | \
84 (1ULL << PERF_REG_X86_FS) | \
85 (1ULL << PERF_REG_X86_GS))
86
87int perf_reg_validate(u64 mask)
88{
89 if (!mask || mask & REG_RESERVED)
90 return -EINVAL;
91
92 if (mask & REG_NOSUPPORT)
93 return -EINVAL;
94
95 return 0;
96}
97
98u64 perf_reg_abi(struct task_struct *task)
99{
100 if (test_tsk_thread_flag(task, TIF_IA32))
101 return PERF_SAMPLE_REGS_ABI_32;
102 else
103 return PERF_SAMPLE_REGS_ABI_64;
104}
105#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 03920a15a632..1b27de563561 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -512,7 +512,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS,
512 512
513#if defined(CONFIG_PCI) && defined(CONFIG_NUMA) 513#if defined(CONFIG_PCI) && defined(CONFIG_NUMA)
514/* Set correct numa_node information for AMD NB functions */ 514/* Set correct numa_node information for AMD NB functions */
515static void __init quirk_amd_nb_node(struct pci_dev *dev) 515static void __devinit quirk_amd_nb_node(struct pci_dev *dev)
516{ 516{
517 struct pci_dev *nb_ht; 517 struct pci_dev *nb_ht;
518 unsigned int devfn; 518 unsigned int devfn;
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 5a98aa272184..5cdff0357746 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -21,7 +21,7 @@
21#include <asm/cpu.h> 21#include <asm/cpu.h>
22#include <asm/stackprotector.h> 22#include <asm/stackprotector.h>
23 23
24DEFINE_PER_CPU(int, cpu_number); 24DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number);
25EXPORT_PER_CPU_SYMBOL(cpu_number); 25EXPORT_PER_CPU_SYMBOL(cpu_number);
26 26
27#ifdef CONFIG_X86_64 27#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index c1a310fb8309..7c5a8c314c02 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -106,17 +106,17 @@ int smp_num_siblings = 1;
106EXPORT_SYMBOL(smp_num_siblings); 106EXPORT_SYMBOL(smp_num_siblings);
107 107
108/* Last level cache ID of each logical CPU */ 108/* Last level cache ID of each logical CPU */
109DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID; 109DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID;
110 110
111/* representing HT siblings of each logical CPU */ 111/* representing HT siblings of each logical CPU */
112DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); 112DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
113EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); 113EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
114 114
115/* representing HT and core siblings of each logical CPU */ 115/* representing HT and core siblings of each logical CPU */
116DEFINE_PER_CPU(cpumask_var_t, cpu_core_map); 116DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
117EXPORT_PER_CPU_SYMBOL(cpu_core_map); 117EXPORT_PER_CPU_SYMBOL(cpu_core_map);
118 118
119DEFINE_PER_CPU(cpumask_var_t, cpu_llc_shared_map); 119DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
120 120
121/* Per CPU bogomips and other parameters */ 121/* Per CPU bogomips and other parameters */
122DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); 122DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);