diff options
author | Ingo Molnar <mingo@kernel.org> | 2012-08-21 05:27:00 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2012-08-21 05:27:00 -0400 |
commit | bcada3d4b8c96b8792c2306f363992ca5ab9da42 (patch) | |
tree | e420679a5db6ea4e1694eef57f9abb6acac8d4d3 /arch/x86/kernel | |
parent | 26198c21d1b286a084fe5d514a30bc7e6c712a34 (diff) | |
parent | 000078bc3ee69efb1124b8478c7527389a826074 (diff) |
Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:
* Fix include order for bison/flex-generated C files, from Ben Hutchings
* Build fixes and documentation corrections from David Ahern
* Group parsing support, from Jiri Olsa
* UI/gtk refactorings and improvements from Namhyung Kim
* NULL deref fix for perf script, from Namhyung Kim
* Assorted cleanups from Robert Richter
* Let O= makes handle relative paths, from Steven Rostedt
* perf script python fixes, from Feng Tang.
* Improve 'perf lock' error message when the needed tracepoints
are not present, from David Ahern.
* Initial bash completion support, from Frederic Weisbecker
* Allow building without libelf, from Namhyung Kim.
* Support DWARF CFI based unwind to have callchains when %bp
based unwinding is not possible, from Jiri Olsa.
* Symbol resolution fixes, while fixing support PPC64 files with an .opt ELF
section was the end goal, several fixes for code that handles all
architectures and cleanups are included, from Cody Schafer.
* Add a description for the JIT interface, from Andi Kleen.
* Assorted fixes for Documentation and build in 32 bit, from Robert Richter
* Add support for non-tracepoint events in perf script python, from Feng Tang
* Cache the libtraceevent event_format associated to each evsel early, so that we
avoid relookups, i.e. calling pevent_find_event repeatedly when processing
tracepoint events.
[ This is to reduce the surface contact with libtraceevents and make clear what
is that the perf tools needs from that lib: so far parsing the common and per
event fields. ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/kernel')
35 files changed, 727 insertions, 357 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 8215e5652d97..8d7a619718b5 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -100,6 +100,8 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o | |||
100 | obj-$(CONFIG_OF) += devicetree.o | 100 | obj-$(CONFIG_OF) += devicetree.o |
101 | obj-$(CONFIG_UPROBES) += uprobes.o | 101 | obj-$(CONFIG_UPROBES) += uprobes.o |
102 | 102 | ||
103 | obj-$(CONFIG_PERF_EVENTS) += perf_regs.o | ||
104 | |||
103 | ### | 105 | ### |
104 | # 64 bit specific files | 106 | # 64 bit specific files |
105 | ifeq ($(CONFIG_X86_64),y) | 107 | ifeq ($(CONFIG_X86_64),y) |
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 95bf99de9058..1b8e5a03d942 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c | |||
@@ -25,10 +25,6 @@ unsigned long acpi_realmode_flags; | |||
25 | static char temp_stack[4096]; | 25 | static char temp_stack[4096]; |
26 | #endif | 26 | #endif |
27 | 27 | ||
28 | asmlinkage void acpi_enter_s3(void) | ||
29 | { | ||
30 | acpi_enter_sleep_state(3, wake_sleep_flags); | ||
31 | } | ||
32 | /** | 28 | /** |
33 | * acpi_suspend_lowlevel - save kernel state | 29 | * acpi_suspend_lowlevel - save kernel state |
34 | * | 30 | * |
diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h index 5653a5791ec9..67f59f8c6956 100644 --- a/arch/x86/kernel/acpi/sleep.h +++ b/arch/x86/kernel/acpi/sleep.h | |||
@@ -2,7 +2,6 @@ | |||
2 | * Variables and functions used by the code in sleep.c | 2 | * Variables and functions used by the code in sleep.c |
3 | */ | 3 | */ |
4 | 4 | ||
5 | #include <linux/linkage.h> | ||
6 | #include <asm/realmode.h> | 5 | #include <asm/realmode.h> |
7 | 6 | ||
8 | extern unsigned long saved_video_mode; | 7 | extern unsigned long saved_video_mode; |
@@ -11,7 +10,6 @@ extern long saved_magic; | |||
11 | extern int wakeup_pmode_return; | 10 | extern int wakeup_pmode_return; |
12 | 11 | ||
13 | extern u8 wake_sleep_flags; | 12 | extern u8 wake_sleep_flags; |
14 | extern asmlinkage void acpi_enter_s3(void); | ||
15 | 13 | ||
16 | extern unsigned long acpi_copy_wakeup_routine(unsigned long); | 14 | extern unsigned long acpi_copy_wakeup_routine(unsigned long); |
17 | extern void wakeup_long64(void); | 15 | extern void wakeup_long64(void); |
diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S index 72610839f03b..13ab720573e3 100644 --- a/arch/x86/kernel/acpi/wakeup_32.S +++ b/arch/x86/kernel/acpi/wakeup_32.S | |||
@@ -74,7 +74,9 @@ restore_registers: | |||
74 | ENTRY(do_suspend_lowlevel) | 74 | ENTRY(do_suspend_lowlevel) |
75 | call save_processor_state | 75 | call save_processor_state |
76 | call save_registers | 76 | call save_registers |
77 | call acpi_enter_s3 | 77 | pushl $3 |
78 | call acpi_enter_sleep_state | ||
79 | addl $4, %esp | ||
78 | 80 | ||
79 | # In case of S3 failure, we'll emerge here. Jump | 81 | # In case of S3 failure, we'll emerge here. Jump |
80 | # to ret_point to recover | 82 | # to ret_point to recover |
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S index 014d1d28c397..8ea5164cbd04 100644 --- a/arch/x86/kernel/acpi/wakeup_64.S +++ b/arch/x86/kernel/acpi/wakeup_64.S | |||
@@ -71,7 +71,9 @@ ENTRY(do_suspend_lowlevel) | |||
71 | movq %rsi, saved_rsi | 71 | movq %rsi, saved_rsi |
72 | 72 | ||
73 | addq $8, %rsp | 73 | addq $8, %rsp |
74 | call acpi_enter_s3 | 74 | movl $3, %edi |
75 | xorl %eax, %eax | ||
76 | call acpi_enter_sleep_state | ||
75 | /* in case something went wrong, restore the machine status and go on */ | 77 | /* in case something went wrong, restore the machine status and go on */ |
76 | jmp resume_point | 78 | jmp resume_point |
77 | 79 | ||
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 931280ff8299..afb7ff79a29f 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
@@ -224,7 +224,7 @@ void __init arch_init_ideal_nops(void) | |||
224 | ideal_nops = intel_nops; | 224 | ideal_nops = intel_nops; |
225 | #endif | 225 | #endif |
226 | } | 226 | } |
227 | 227 | break; | |
228 | default: | 228 | default: |
229 | #ifdef CONFIG_X86_64 | 229 | #ifdef CONFIG_X86_64 |
230 | ideal_nops = k8_nops; | 230 | ideal_nops = k8_nops; |
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index f29f6dd6bc08..aadf3359e2a7 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c | |||
@@ -19,6 +19,7 @@ const struct pci_device_id amd_nb_misc_ids[] = { | |||
19 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, | 19 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, |
20 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, | 20 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, |
21 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) }, | 21 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) }, |
22 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M10H_F3) }, | ||
22 | {} | 23 | {} |
23 | }; | 24 | }; |
24 | EXPORT_SYMBOL(amd_nb_misc_ids); | 25 | EXPORT_SYMBOL(amd_nb_misc_ids); |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index c421512ca5eb..24deb3082328 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -75,8 +75,8 @@ physid_mask_t phys_cpu_present_map; | |||
75 | /* | 75 | /* |
76 | * Map cpu index to physical APIC ID | 76 | * Map cpu index to physical APIC ID |
77 | */ | 77 | */ |
78 | DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); | 78 | DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID); |
79 | DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); | 79 | DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid, BAD_APICID); |
80 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); | 80 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); |
81 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); | 81 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); |
82 | 82 | ||
@@ -88,7 +88,7 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); | |||
88 | * used for the mapping. This is where the behaviors of x86_64 and 32 | 88 | * used for the mapping. This is where the behaviors of x86_64 and 32 |
89 | * actually diverge. Let's keep it ugly for now. | 89 | * actually diverge. Let's keep it ugly for now. |
90 | */ | 90 | */ |
91 | DEFINE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid, BAD_APICID); | 91 | DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID); |
92 | 92 | ||
93 | /* | 93 | /* |
94 | * Knob to control our willingness to enable the local APIC. | 94 | * Knob to control our willingness to enable the local APIC. |
@@ -2143,6 +2143,23 @@ int default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | |||
2143 | } | 2143 | } |
2144 | 2144 | ||
2145 | /* | 2145 | /* |
2146 | * Override the generic EOI implementation with an optimized version. | ||
2147 | * Only called during early boot when only one CPU is active and with | ||
2148 | * interrupts disabled, so we know this does not race with actual APIC driver | ||
2149 | * use. | ||
2150 | */ | ||
2151 | void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v)) | ||
2152 | { | ||
2153 | struct apic **drv; | ||
2154 | |||
2155 | for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) { | ||
2156 | /* Should happen once for each apic */ | ||
2157 | WARN_ON((*drv)->eoi_write == eoi_write); | ||
2158 | (*drv)->eoi_write = eoi_write; | ||
2159 | } | ||
2160 | } | ||
2161 | |||
2162 | /* | ||
2146 | * Power management | 2163 | * Power management |
2147 | */ | 2164 | */ |
2148 | #ifdef CONFIG_PM | 2165 | #ifdef CONFIG_PM |
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 406eee784684..a6c64aaddf9a 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -1204,7 +1204,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg) | |||
1204 | BUG_ON(!cfg->vector); | 1204 | BUG_ON(!cfg->vector); |
1205 | 1205 | ||
1206 | vector = cfg->vector; | 1206 | vector = cfg->vector; |
1207 | for_each_cpu(cpu, cfg->domain) | 1207 | for_each_cpu_and(cpu, cfg->domain, cpu_online_mask) |
1208 | per_cpu(vector_irq, cpu)[vector] = -1; | 1208 | per_cpu(vector_irq, cpu)[vector] = -1; |
1209 | 1209 | ||
1210 | cfg->vector = 0; | 1210 | cfg->vector = 0; |
@@ -1212,7 +1212,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg) | |||
1212 | 1212 | ||
1213 | if (likely(!cfg->move_in_progress)) | 1213 | if (likely(!cfg->move_in_progress)) |
1214 | return; | 1214 | return; |
1215 | for_each_cpu(cpu, cfg->old_domain) { | 1215 | for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) { |
1216 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; | 1216 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; |
1217 | vector++) { | 1217 | vector++) { |
1218 | if (per_cpu(vector_irq, cpu)[vector] != irq) | 1218 | if (per_cpu(vector_irq, cpu)[vector] != irq) |
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index bac4c3804cc7..d30a6a9a0121 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -14,7 +14,7 @@ CFLAGS_common.o := $(nostackp) | |||
14 | 14 | ||
15 | obj-y := intel_cacheinfo.o scattered.o topology.o | 15 | obj-y := intel_cacheinfo.o scattered.o topology.o |
16 | obj-y += proc.o capflags.o powerflags.o common.o | 16 | obj-y += proc.o capflags.o powerflags.o common.o |
17 | obj-y += vmware.o hypervisor.o sched.o mshyperv.o | 17 | obj-y += vmware.o hypervisor.o mshyperv.o |
18 | obj-y += rdrand.o | 18 | obj-y += rdrand.o |
19 | obj-y += match.o | 19 | obj-y += match.o |
20 | 20 | ||
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 5bbc082c47ad..46d8786d655e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -452,6 +452,35 @@ void __cpuinit cpu_detect_cache_sizes(struct cpuinfo_x86 *c) | |||
452 | c->x86_cache_size = l2size; | 452 | c->x86_cache_size = l2size; |
453 | } | 453 | } |
454 | 454 | ||
455 | u16 __read_mostly tlb_lli_4k[NR_INFO]; | ||
456 | u16 __read_mostly tlb_lli_2m[NR_INFO]; | ||
457 | u16 __read_mostly tlb_lli_4m[NR_INFO]; | ||
458 | u16 __read_mostly tlb_lld_4k[NR_INFO]; | ||
459 | u16 __read_mostly tlb_lld_2m[NR_INFO]; | ||
460 | u16 __read_mostly tlb_lld_4m[NR_INFO]; | ||
461 | |||
462 | /* | ||
463 | * tlb_flushall_shift shows the balance point in replacing cr3 write | ||
464 | * with multiple 'invlpg'. It will do this replacement when | ||
465 | * flush_tlb_lines <= active_lines/2^tlb_flushall_shift. | ||
466 | * If tlb_flushall_shift is -1, means the replacement will be disabled. | ||
467 | */ | ||
468 | s8 __read_mostly tlb_flushall_shift = -1; | ||
469 | |||
470 | void __cpuinit cpu_detect_tlb(struct cpuinfo_x86 *c) | ||
471 | { | ||
472 | if (this_cpu->c_detect_tlb) | ||
473 | this_cpu->c_detect_tlb(c); | ||
474 | |||
475 | printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ | ||
476 | "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ | ||
477 | "tlb_flushall_shift is 0x%x\n", | ||
478 | tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], | ||
479 | tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES], | ||
480 | tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES], | ||
481 | tlb_flushall_shift); | ||
482 | } | ||
483 | |||
455 | void __cpuinit detect_ht(struct cpuinfo_x86 *c) | 484 | void __cpuinit detect_ht(struct cpuinfo_x86 *c) |
456 | { | 485 | { |
457 | #ifdef CONFIG_X86_HT | 486 | #ifdef CONFIG_X86_HT |
@@ -911,6 +940,8 @@ void __init identify_boot_cpu(void) | |||
911 | #else | 940 | #else |
912 | vgetcpu_set_mode(); | 941 | vgetcpu_set_mode(); |
913 | #endif | 942 | #endif |
943 | if (boot_cpu_data.cpuid_level >= 2) | ||
944 | cpu_detect_tlb(&boot_cpu_data); | ||
914 | } | 945 | } |
915 | 946 | ||
916 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) | 947 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) |
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 8bacc7826fb3..4041c24ae7db 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h | |||
@@ -20,10 +20,19 @@ struct cpu_dev { | |||
20 | void (*c_bsp_init)(struct cpuinfo_x86 *); | 20 | void (*c_bsp_init)(struct cpuinfo_x86 *); |
21 | void (*c_init)(struct cpuinfo_x86 *); | 21 | void (*c_init)(struct cpuinfo_x86 *); |
22 | void (*c_identify)(struct cpuinfo_x86 *); | 22 | void (*c_identify)(struct cpuinfo_x86 *); |
23 | void (*c_detect_tlb)(struct cpuinfo_x86 *); | ||
23 | unsigned int (*c_size_cache)(struct cpuinfo_x86 *, unsigned int); | 24 | unsigned int (*c_size_cache)(struct cpuinfo_x86 *, unsigned int); |
24 | int c_x86_vendor; | 25 | int c_x86_vendor; |
25 | }; | 26 | }; |
26 | 27 | ||
28 | struct _tlb_table { | ||
29 | unsigned char descriptor; | ||
30 | char tlb_type; | ||
31 | unsigned int entries; | ||
32 | /* unsigned int ways; */ | ||
33 | char info[128]; | ||
34 | }; | ||
35 | |||
27 | #define cpu_dev_register(cpu_devX) \ | 36 | #define cpu_dev_register(cpu_devX) \ |
28 | static const struct cpu_dev *const __cpu_dev_##cpu_devX __used \ | 37 | static const struct cpu_dev *const __cpu_dev_##cpu_devX __used \ |
29 | __attribute__((__section__(".x86_cpu_dev.init"))) = \ | 38 | __attribute__((__section__(".x86_cpu_dev.init"))) = \ |
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index 755f64fb0743..a8f8fa9769d6 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c | |||
@@ -37,6 +37,9 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] = | |||
37 | #endif | 37 | #endif |
38 | &x86_hyper_vmware, | 38 | &x86_hyper_vmware, |
39 | &x86_hyper_ms_hyperv, | 39 | &x86_hyper_ms_hyperv, |
40 | #ifdef CONFIG_KVM_GUEST | ||
41 | &x86_hyper_kvm, | ||
42 | #endif | ||
40 | }; | 43 | }; |
41 | 44 | ||
42 | const struct hypervisor_x86 *x86_hyper; | 45 | const struct hypervisor_x86 *x86_hyper; |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 3e6ff6cbf42a..0a4ce2980a5a 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -491,6 +491,181 @@ static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned i | |||
491 | } | 491 | } |
492 | #endif | 492 | #endif |
493 | 493 | ||
494 | #define TLB_INST_4K 0x01 | ||
495 | #define TLB_INST_4M 0x02 | ||
496 | #define TLB_INST_2M_4M 0x03 | ||
497 | |||
498 | #define TLB_INST_ALL 0x05 | ||
499 | #define TLB_INST_1G 0x06 | ||
500 | |||
501 | #define TLB_DATA_4K 0x11 | ||
502 | #define TLB_DATA_4M 0x12 | ||
503 | #define TLB_DATA_2M_4M 0x13 | ||
504 | #define TLB_DATA_4K_4M 0x14 | ||
505 | |||
506 | #define TLB_DATA_1G 0x16 | ||
507 | |||
508 | #define TLB_DATA0_4K 0x21 | ||
509 | #define TLB_DATA0_4M 0x22 | ||
510 | #define TLB_DATA0_2M_4M 0x23 | ||
511 | |||
512 | #define STLB_4K 0x41 | ||
513 | |||
514 | static const struct _tlb_table intel_tlb_table[] __cpuinitconst = { | ||
515 | { 0x01, TLB_INST_4K, 32, " TLB_INST 4 KByte pages, 4-way set associative" }, | ||
516 | { 0x02, TLB_INST_4M, 2, " TLB_INST 4 MByte pages, full associative" }, | ||
517 | { 0x03, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way set associative" }, | ||
518 | { 0x04, TLB_DATA_4M, 8, " TLB_DATA 4 MByte pages, 4-way set associative" }, | ||
519 | { 0x05, TLB_DATA_4M, 32, " TLB_DATA 4 MByte pages, 4-way set associative" }, | ||
520 | { 0x0b, TLB_INST_4M, 4, " TLB_INST 4 MByte pages, 4-way set associative" }, | ||
521 | { 0x4f, TLB_INST_4K, 32, " TLB_INST 4 KByte pages */" }, | ||
522 | { 0x50, TLB_INST_ALL, 64, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, | ||
523 | { 0x51, TLB_INST_ALL, 128, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, | ||
524 | { 0x52, TLB_INST_ALL, 256, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, | ||
525 | { 0x55, TLB_INST_2M_4M, 7, " TLB_INST 2-MByte or 4-MByte pages, fully associative" }, | ||
526 | { 0x56, TLB_DATA0_4M, 16, " TLB_DATA0 4 MByte pages, 4-way set associative" }, | ||
527 | { 0x57, TLB_DATA0_4K, 16, " TLB_DATA0 4 KByte pages, 4-way associative" }, | ||
528 | { 0x59, TLB_DATA0_4K, 16, " TLB_DATA0 4 KByte pages, fully associative" }, | ||
529 | { 0x5a, TLB_DATA0_2M_4M, 32, " TLB_DATA0 2-MByte or 4 MByte pages, 4-way set associative" }, | ||
530 | { 0x5b, TLB_DATA_4K_4M, 64, " TLB_DATA 4 KByte and 4 MByte pages" }, | ||
531 | { 0x5c, TLB_DATA_4K_4M, 128, " TLB_DATA 4 KByte and 4 MByte pages" }, | ||
532 | { 0x5d, TLB_DATA_4K_4M, 256, " TLB_DATA 4 KByte and 4 MByte pages" }, | ||
533 | { 0xb0, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 4-way set associative" }, | ||
534 | { 0xb1, TLB_INST_2M_4M, 4, " TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries" }, | ||
535 | { 0xb2, TLB_INST_4K, 64, " TLB_INST 4KByte pages, 4-way set associative" }, | ||
536 | { 0xb3, TLB_DATA_4K, 128, " TLB_DATA 4 KByte pages, 4-way set associative" }, | ||
537 | { 0xb4, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 4-way associative" }, | ||
538 | { 0xba, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way associative" }, | ||
539 | { 0xc0, TLB_DATA_4K_4M, 8, " TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" }, | ||
540 | { 0xca, STLB_4K, 512, " STLB 4 KByte pages, 4-way associative" }, | ||
541 | { 0x00, 0, 0 } | ||
542 | }; | ||
543 | |||
544 | static void __cpuinit intel_tlb_lookup(const unsigned char desc) | ||
545 | { | ||
546 | unsigned char k; | ||
547 | if (desc == 0) | ||
548 | return; | ||
549 | |||
550 | /* look up this descriptor in the table */ | ||
551 | for (k = 0; intel_tlb_table[k].descriptor != desc && \ | ||
552 | intel_tlb_table[k].descriptor != 0; k++) | ||
553 | ; | ||
554 | |||
555 | if (intel_tlb_table[k].tlb_type == 0) | ||
556 | return; | ||
557 | |||
558 | switch (intel_tlb_table[k].tlb_type) { | ||
559 | case STLB_4K: | ||
560 | if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) | ||
561 | tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; | ||
562 | if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) | ||
563 | tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; | ||
564 | break; | ||
565 | case TLB_INST_ALL: | ||
566 | if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) | ||
567 | tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; | ||
568 | if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries) | ||
569 | tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries; | ||
570 | if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) | ||
571 | tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; | ||
572 | break; | ||
573 | case TLB_INST_4K: | ||
574 | if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) | ||
575 | tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; | ||
576 | break; | ||
577 | case TLB_INST_4M: | ||
578 | if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) | ||
579 | tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; | ||
580 | break; | ||
581 | case TLB_INST_2M_4M: | ||
582 | if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries) | ||
583 | tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries; | ||
584 | if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) | ||
585 | tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; | ||
586 | break; | ||
587 | case TLB_DATA_4K: | ||
588 | case TLB_DATA0_4K: | ||
589 | if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) | ||
590 | tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; | ||
591 | break; | ||
592 | case TLB_DATA_4M: | ||
593 | case TLB_DATA0_4M: | ||
594 | if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) | ||
595 | tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; | ||
596 | break; | ||
597 | case TLB_DATA_2M_4M: | ||
598 | case TLB_DATA0_2M_4M: | ||
599 | if (tlb_lld_2m[ENTRIES] < intel_tlb_table[k].entries) | ||
600 | tlb_lld_2m[ENTRIES] = intel_tlb_table[k].entries; | ||
601 | if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) | ||
602 | tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; | ||
603 | break; | ||
604 | case TLB_DATA_4K_4M: | ||
605 | if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) | ||
606 | tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; | ||
607 | if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) | ||
608 | tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; | ||
609 | break; | ||
610 | } | ||
611 | } | ||
612 | |||
613 | static void __cpuinit intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c) | ||
614 | { | ||
615 | if (!cpu_has_invlpg) { | ||
616 | tlb_flushall_shift = -1; | ||
617 | return; | ||
618 | } | ||
619 | switch ((c->x86 << 8) + c->x86_model) { | ||
620 | case 0x60f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ | ||
621 | case 0x616: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ | ||
622 | case 0x617: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ | ||
623 | case 0x61d: /* six-core 45 nm xeon "Dunnington" */ | ||
624 | tlb_flushall_shift = -1; | ||
625 | break; | ||
626 | case 0x61a: /* 45 nm nehalem, "Bloomfield" */ | ||
627 | case 0x61e: /* 45 nm nehalem, "Lynnfield" */ | ||
628 | case 0x625: /* 32 nm nehalem, "Clarkdale" */ | ||
629 | case 0x62c: /* 32 nm nehalem, "Gulftown" */ | ||
630 | case 0x62e: /* 45 nm nehalem-ex, "Beckton" */ | ||
631 | case 0x62f: /* 32 nm Xeon E7 */ | ||
632 | tlb_flushall_shift = 6; | ||
633 | break; | ||
634 | case 0x62a: /* SandyBridge */ | ||
635 | case 0x62d: /* SandyBridge, "Romely-EP" */ | ||
636 | tlb_flushall_shift = 5; | ||
637 | break; | ||
638 | case 0x63a: /* Ivybridge */ | ||
639 | tlb_flushall_shift = 1; | ||
640 | break; | ||
641 | default: | ||
642 | tlb_flushall_shift = 6; | ||
643 | } | ||
644 | } | ||
645 | |||
646 | static void __cpuinit intel_detect_tlb(struct cpuinfo_x86 *c) | ||
647 | { | ||
648 | int i, j, n; | ||
649 | unsigned int regs[4]; | ||
650 | unsigned char *desc = (unsigned char *)regs; | ||
651 | /* Number of times to iterate */ | ||
652 | n = cpuid_eax(2) & 0xFF; | ||
653 | |||
654 | for (i = 0 ; i < n ; i++) { | ||
655 | cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); | ||
656 | |||
657 | /* If bit 31 is set, this is an unknown format */ | ||
658 | for (j = 0 ; j < 3 ; j++) | ||
659 | if (regs[j] & (1 << 31)) | ||
660 | regs[j] = 0; | ||
661 | |||
662 | /* Byte 0 is level count, not a descriptor */ | ||
663 | for (j = 1 ; j < 16 ; j++) | ||
664 | intel_tlb_lookup(desc[j]); | ||
665 | } | ||
666 | intel_tlb_flushall_shift_set(c); | ||
667 | } | ||
668 | |||
494 | static const struct cpu_dev __cpuinitconst intel_cpu_dev = { | 669 | static const struct cpu_dev __cpuinitconst intel_cpu_dev = { |
495 | .c_vendor = "Intel", | 670 | .c_vendor = "Intel", |
496 | .c_ident = { "GenuineIntel" }, | 671 | .c_ident = { "GenuineIntel" }, |
@@ -546,6 +721,7 @@ static const struct cpu_dev __cpuinitconst intel_cpu_dev = { | |||
546 | }, | 721 | }, |
547 | .c_size_cache = intel_size_cache, | 722 | .c_size_cache = intel_size_cache, |
548 | #endif | 723 | #endif |
724 | .c_detect_tlb = intel_detect_tlb, | ||
549 | .c_early_init = early_init_intel, | 725 | .c_early_init = early_init_intel, |
550 | .c_init = init_intel, | 726 | .c_init = init_intel, |
551 | .c_x86_vendor = X86_VENDOR_INTEL, | 727 | .c_x86_vendor = X86_VENDOR_INTEL, |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index 413c2ced887c..13017626f9a8 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c | |||
@@ -55,13 +55,6 @@ static struct severity { | |||
55 | #define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S) | 55 | #define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S) |
56 | #define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR) | 56 | #define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR) |
57 | #define MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV) | 57 | #define MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV) |
58 | #define MCACOD 0xffff | ||
59 | /* Architecturally defined codes from SDM Vol. 3B Chapter 15 */ | ||
60 | #define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */ | ||
61 | #define MCACOD_SCRUBMSK 0xfff0 | ||
62 | #define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */ | ||
63 | #define MCACOD_DATA 0x0134 /* Data Load */ | ||
64 | #define MCACOD_INSTR 0x0150 /* Instruction Fetch */ | ||
65 | 58 | ||
66 | MCESEV( | 59 | MCESEV( |
67 | NO, "Invalid", | 60 | NO, "Invalid", |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 5a5a5dc1ff15..292d0258311c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -60,8 +60,6 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex); | |||
60 | 60 | ||
61 | int mce_disabled __read_mostly; | 61 | int mce_disabled __read_mostly; |
62 | 62 | ||
63 | #define MISC_MCELOG_MINOR 227 | ||
64 | |||
65 | #define SPINUNIT 100 /* 100ns */ | 63 | #define SPINUNIT 100 /* 100ns */ |
66 | 64 | ||
67 | atomic_t mce_entry; | 65 | atomic_t mce_entry; |
@@ -105,6 +103,8 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { | |||
105 | 103 | ||
106 | static DEFINE_PER_CPU(struct work_struct, mce_work); | 104 | static DEFINE_PER_CPU(struct work_struct, mce_work); |
107 | 105 | ||
106 | static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); | ||
107 | |||
108 | /* | 108 | /* |
109 | * CPU/chipset specific EDAC code can register a notifier call here to print | 109 | * CPU/chipset specific EDAC code can register a notifier call here to print |
110 | * MCE errors in a human-readable form. | 110 | * MCE errors in a human-readable form. |
@@ -652,14 +652,18 @@ EXPORT_SYMBOL_GPL(machine_check_poll); | |||
652 | * Do a quick check if any of the events requires a panic. | 652 | * Do a quick check if any of the events requires a panic. |
653 | * This decides if we keep the events around or clear them. | 653 | * This decides if we keep the events around or clear them. |
654 | */ | 654 | */ |
655 | static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp) | 655 | static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, |
656 | struct pt_regs *regs) | ||
656 | { | 657 | { |
657 | int i, ret = 0; | 658 | int i, ret = 0; |
658 | 659 | ||
659 | for (i = 0; i < banks; i++) { | 660 | for (i = 0; i < banks; i++) { |
660 | m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); | 661 | m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); |
661 | if (m->status & MCI_STATUS_VAL) | 662 | if (m->status & MCI_STATUS_VAL) { |
662 | __set_bit(i, validp); | 663 | __set_bit(i, validp); |
664 | if (quirk_no_way_out) | ||
665 | quirk_no_way_out(i, m, regs); | ||
666 | } | ||
663 | if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) | 667 | if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) |
664 | ret = 1; | 668 | ret = 1; |
665 | } | 669 | } |
@@ -1042,7 +1046,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1042 | *final = m; | 1046 | *final = m; |
1043 | 1047 | ||
1044 | memset(valid_banks, 0, sizeof(valid_banks)); | 1048 | memset(valid_banks, 0, sizeof(valid_banks)); |
1045 | no_way_out = mce_no_way_out(&m, &msg, valid_banks); | 1049 | no_way_out = mce_no_way_out(&m, &msg, valid_banks, regs); |
1046 | 1050 | ||
1047 | barrier(); | 1051 | barrier(); |
1048 | 1052 | ||
@@ -1190,6 +1194,7 @@ void mce_notify_process(void) | |||
1190 | { | 1194 | { |
1191 | unsigned long pfn; | 1195 | unsigned long pfn; |
1192 | struct mce_info *mi = mce_find_info(); | 1196 | struct mce_info *mi = mce_find_info(); |
1197 | int flags = MF_ACTION_REQUIRED; | ||
1193 | 1198 | ||
1194 | if (!mi) | 1199 | if (!mi) |
1195 | mce_panic("Lost physical address for unconsumed uncorrectable error", NULL, NULL); | 1200 | mce_panic("Lost physical address for unconsumed uncorrectable error", NULL, NULL); |
@@ -1204,8 +1209,9 @@ void mce_notify_process(void) | |||
1204 | * doomed. We still need to mark the page as poisoned and alert any | 1209 | * doomed. We still need to mark the page as poisoned and alert any |
1205 | * other users of the page. | 1210 | * other users of the page. |
1206 | */ | 1211 | */ |
1207 | if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0 || | 1212 | if (!mi->restartable) |
1208 | mi->restartable == 0) { | 1213 | flags |= MF_MUST_KILL; |
1214 | if (memory_failure(pfn, MCE_VECTOR, flags) < 0) { | ||
1209 | pr_err("Memory error not recovered"); | 1215 | pr_err("Memory error not recovered"); |
1210 | force_sig(SIGBUS, current); | 1216 | force_sig(SIGBUS, current); |
1211 | } | 1217 | } |
@@ -1418,6 +1424,34 @@ static void __mcheck_cpu_init_generic(void) | |||
1418 | } | 1424 | } |
1419 | } | 1425 | } |
1420 | 1426 | ||
1427 | /* | ||
1428 | * During IFU recovery Sandy Bridge -EP4S processors set the RIPV and | ||
1429 | * EIPV bits in MCG_STATUS to zero on the affected logical processor (SDM | ||
1430 | * Vol 3B Table 15-20). But this confuses both the code that determines | ||
1431 | * whether the machine check occurred in kernel or user mode, and also | ||
1432 | * the severity assessment code. Pretend that EIPV was set, and take the | ||
1433 | * ip/cs values from the pt_regs that mce_gather_info() ignored earlier. | ||
1434 | */ | ||
1435 | static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs) | ||
1436 | { | ||
1437 | if (bank != 0) | ||
1438 | return; | ||
1439 | if ((m->mcgstatus & (MCG_STATUS_EIPV|MCG_STATUS_RIPV)) != 0) | ||
1440 | return; | ||
1441 | if ((m->status & (MCI_STATUS_OVER|MCI_STATUS_UC| | ||
1442 | MCI_STATUS_EN|MCI_STATUS_MISCV|MCI_STATUS_ADDRV| | ||
1443 | MCI_STATUS_PCC|MCI_STATUS_S|MCI_STATUS_AR| | ||
1444 | MCACOD)) != | ||
1445 | (MCI_STATUS_UC|MCI_STATUS_EN| | ||
1446 | MCI_STATUS_MISCV|MCI_STATUS_ADDRV|MCI_STATUS_S| | ||
1447 | MCI_STATUS_AR|MCACOD_INSTR)) | ||
1448 | return; | ||
1449 | |||
1450 | m->mcgstatus |= MCG_STATUS_EIPV; | ||
1451 | m->ip = regs->ip; | ||
1452 | m->cs = regs->cs; | ||
1453 | } | ||
1454 | |||
1421 | /* Add per CPU specific workarounds here */ | 1455 | /* Add per CPU specific workarounds here */ |
1422 | static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) | 1456 | static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) |
1423 | { | 1457 | { |
@@ -1515,6 +1549,9 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) | |||
1515 | */ | 1549 | */ |
1516 | if (c->x86 == 6 && c->x86_model <= 13 && mce_bootlog < 0) | 1550 | if (c->x86 == 6 && c->x86_model <= 13 && mce_bootlog < 0) |
1517 | mce_bootlog = 0; | 1551 | mce_bootlog = 0; |
1552 | |||
1553 | if (c->x86 == 6 && c->x86_model == 45) | ||
1554 | quirk_no_way_out = quirk_sandybridge_ifu; | ||
1518 | } | 1555 | } |
1519 | if (monarch_timeout < 0) | 1556 | if (monarch_timeout < 0) |
1520 | monarch_timeout = 0; | 1557 | monarch_timeout = 0; |
@@ -2344,7 +2381,7 @@ static __init int mcheck_init_device(void) | |||
2344 | 2381 | ||
2345 | return err; | 2382 | return err; |
2346 | } | 2383 | } |
2347 | device_initcall(mcheck_init_device); | 2384 | device_initcall_sync(mcheck_init_device); |
2348 | 2385 | ||
2349 | /* | 2386 | /* |
2350 | * Old style boot options parsing. Only for compatibility. | 2387 | * Old style boot options parsing. Only for compatibility. |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index f4873a64f46d..c4e916d77378 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
@@ -1,15 +1,17 @@ | |||
1 | /* | 1 | /* |
2 | * (c) 2005, 2006 Advanced Micro Devices, Inc. | 2 | * (c) 2005-2012 Advanced Micro Devices, Inc. |
3 | * Your use of this code is subject to the terms and conditions of the | 3 | * Your use of this code is subject to the terms and conditions of the |
4 | * GNU general public license version 2. See "COPYING" or | 4 | * GNU general public license version 2. See "COPYING" or |
5 | * http://www.gnu.org/licenses/gpl.html | 5 | * http://www.gnu.org/licenses/gpl.html |
6 | * | 6 | * |
7 | * Written by Jacob Shin - AMD, Inc. | 7 | * Written by Jacob Shin - AMD, Inc. |
8 | * | 8 | * |
9 | * Support : jacob.shin@amd.com | 9 | * Support: borislav.petkov@amd.com |
10 | * | 10 | * |
11 | * April 2006 | 11 | * April 2006 |
12 | * - added support for AMD Family 0x10 processors | 12 | * - added support for AMD Family 0x10 processors |
13 | * May 2012 | ||
14 | * - major scrubbing | ||
13 | * | 15 | * |
14 | * All MC4_MISCi registers are shared between multi-cores | 16 | * All MC4_MISCi registers are shared between multi-cores |
15 | */ | 17 | */ |
@@ -25,6 +27,7 @@ | |||
25 | #include <linux/cpu.h> | 27 | #include <linux/cpu.h> |
26 | #include <linux/smp.h> | 28 | #include <linux/smp.h> |
27 | 29 | ||
30 | #include <asm/amd_nb.h> | ||
28 | #include <asm/apic.h> | 31 | #include <asm/apic.h> |
29 | #include <asm/idle.h> | 32 | #include <asm/idle.h> |
30 | #include <asm/mce.h> | 33 | #include <asm/mce.h> |
@@ -45,23 +48,15 @@ | |||
45 | #define MASK_BLKPTR_LO 0xFF000000 | 48 | #define MASK_BLKPTR_LO 0xFF000000 |
46 | #define MCG_XBLK_ADDR 0xC0000400 | 49 | #define MCG_XBLK_ADDR 0xC0000400 |
47 | 50 | ||
48 | struct threshold_block { | 51 | static const char * const th_names[] = { |
49 | unsigned int block; | 52 | "load_store", |
50 | unsigned int bank; | 53 | "insn_fetch", |
51 | unsigned int cpu; | 54 | "combined_unit", |
52 | u32 address; | 55 | "", |
53 | u16 interrupt_enable; | 56 | "northbridge", |
54 | bool interrupt_capable; | 57 | "execution_unit", |
55 | u16 threshold_limit; | ||
56 | struct kobject kobj; | ||
57 | struct list_head miscj; | ||
58 | }; | 58 | }; |
59 | 59 | ||
60 | struct threshold_bank { | ||
61 | struct kobject *kobj; | ||
62 | struct threshold_block *blocks; | ||
63 | cpumask_var_t cpus; | ||
64 | }; | ||
65 | static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks); | 60 | static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks); |
66 | 61 | ||
67 | static unsigned char shared_bank[NR_BANKS] = { | 62 | static unsigned char shared_bank[NR_BANKS] = { |
@@ -84,6 +79,26 @@ struct thresh_restart { | |||
84 | u16 old_limit; | 79 | u16 old_limit; |
85 | }; | 80 | }; |
86 | 81 | ||
82 | static const char * const bank4_names(struct threshold_block *b) | ||
83 | { | ||
84 | switch (b->address) { | ||
85 | /* MSR4_MISC0 */ | ||
86 | case 0x00000413: | ||
87 | return "dram"; | ||
88 | |||
89 | case 0xc0000408: | ||
90 | return "ht_links"; | ||
91 | |||
92 | case 0xc0000409: | ||
93 | return "l3_cache"; | ||
94 | |||
95 | default: | ||
96 | WARN(1, "Funny MSR: 0x%08x\n", b->address); | ||
97 | return ""; | ||
98 | } | ||
99 | }; | ||
100 | |||
101 | |||
87 | static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits) | 102 | static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits) |
88 | { | 103 | { |
89 | /* | 104 | /* |
@@ -224,8 +239,6 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
224 | 239 | ||
225 | if (!block) | 240 | if (!block) |
226 | per_cpu(bank_map, cpu) |= (1 << bank); | 241 | per_cpu(bank_map, cpu) |= (1 << bank); |
227 | if (shared_bank[bank] && c->cpu_core_id) | ||
228 | break; | ||
229 | 242 | ||
230 | memset(&b, 0, sizeof(b)); | 243 | memset(&b, 0, sizeof(b)); |
231 | b.cpu = cpu; | 244 | b.cpu = cpu; |
@@ -326,7 +339,7 @@ struct threshold_attr { | |||
326 | #define SHOW_FIELDS(name) \ | 339 | #define SHOW_FIELDS(name) \ |
327 | static ssize_t show_ ## name(struct threshold_block *b, char *buf) \ | 340 | static ssize_t show_ ## name(struct threshold_block *b, char *buf) \ |
328 | { \ | 341 | { \ |
329 | return sprintf(buf, "%lx\n", (unsigned long) b->name); \ | 342 | return sprintf(buf, "%lu\n", (unsigned long) b->name); \ |
330 | } | 343 | } |
331 | SHOW_FIELDS(interrupt_enable) | 344 | SHOW_FIELDS(interrupt_enable) |
332 | SHOW_FIELDS(threshold_limit) | 345 | SHOW_FIELDS(threshold_limit) |
@@ -377,38 +390,21 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size) | |||
377 | return size; | 390 | return size; |
378 | } | 391 | } |
379 | 392 | ||
380 | struct threshold_block_cross_cpu { | ||
381 | struct threshold_block *tb; | ||
382 | long retval; | ||
383 | }; | ||
384 | |||
385 | static void local_error_count_handler(void *_tbcc) | ||
386 | { | ||
387 | struct threshold_block_cross_cpu *tbcc = _tbcc; | ||
388 | struct threshold_block *b = tbcc->tb; | ||
389 | u32 low, high; | ||
390 | |||
391 | rdmsr(b->address, low, high); | ||
392 | tbcc->retval = (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit); | ||
393 | } | ||
394 | |||
395 | static ssize_t show_error_count(struct threshold_block *b, char *buf) | 393 | static ssize_t show_error_count(struct threshold_block *b, char *buf) |
396 | { | 394 | { |
397 | struct threshold_block_cross_cpu tbcc = { .tb = b, }; | 395 | u32 lo, hi; |
398 | 396 | ||
399 | smp_call_function_single(b->cpu, local_error_count_handler, &tbcc, 1); | 397 | rdmsr_on_cpu(b->cpu, b->address, &lo, &hi); |
400 | return sprintf(buf, "%lx\n", tbcc.retval); | ||
401 | } | ||
402 | |||
403 | static ssize_t store_error_count(struct threshold_block *b, | ||
404 | const char *buf, size_t count) | ||
405 | { | ||
406 | struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 }; | ||
407 | 398 | ||
408 | smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); | 399 | return sprintf(buf, "%u\n", ((hi & THRESHOLD_MAX) - |
409 | return 1; | 400 | (THRESHOLD_MAX - b->threshold_limit))); |
410 | } | 401 | } |
411 | 402 | ||
403 | static struct threshold_attr error_count = { | ||
404 | .attr = {.name = __stringify(error_count), .mode = 0444 }, | ||
405 | .show = show_error_count, | ||
406 | }; | ||
407 | |||
412 | #define RW_ATTR(val) \ | 408 | #define RW_ATTR(val) \ |
413 | static struct threshold_attr val = { \ | 409 | static struct threshold_attr val = { \ |
414 | .attr = {.name = __stringify(val), .mode = 0644 }, \ | 410 | .attr = {.name = __stringify(val), .mode = 0644 }, \ |
@@ -418,7 +414,6 @@ static struct threshold_attr val = { \ | |||
418 | 414 | ||
419 | RW_ATTR(interrupt_enable); | 415 | RW_ATTR(interrupt_enable); |
420 | RW_ATTR(threshold_limit); | 416 | RW_ATTR(threshold_limit); |
421 | RW_ATTR(error_count); | ||
422 | 417 | ||
423 | static struct attribute *default_attrs[] = { | 418 | static struct attribute *default_attrs[] = { |
424 | &threshold_limit.attr, | 419 | &threshold_limit.attr, |
@@ -517,7 +512,7 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu, | |||
517 | 512 | ||
518 | err = kobject_init_and_add(&b->kobj, &threshold_ktype, | 513 | err = kobject_init_and_add(&b->kobj, &threshold_ktype, |
519 | per_cpu(threshold_banks, cpu)[bank]->kobj, | 514 | per_cpu(threshold_banks, cpu)[bank]->kobj, |
520 | "misc%i", block); | 515 | (bank == 4 ? bank4_names(b) : th_names[bank])); |
521 | if (err) | 516 | if (err) |
522 | goto out_free; | 517 | goto out_free; |
523 | recurse: | 518 | recurse: |
@@ -548,98 +543,91 @@ out_free: | |||
548 | return err; | 543 | return err; |
549 | } | 544 | } |
550 | 545 | ||
551 | static __cpuinit long | 546 | static __cpuinit int __threshold_add_blocks(struct threshold_bank *b) |
552 | local_allocate_threshold_blocks(int cpu, unsigned int bank) | ||
553 | { | 547 | { |
554 | return allocate_threshold_blocks(cpu, bank, 0, | 548 | struct list_head *head = &b->blocks->miscj; |
555 | MSR_IA32_MC0_MISC + bank * 4); | 549 | struct threshold_block *pos = NULL; |
550 | struct threshold_block *tmp = NULL; | ||
551 | int err = 0; | ||
552 | |||
553 | err = kobject_add(&b->blocks->kobj, b->kobj, b->blocks->kobj.name); | ||
554 | if (err) | ||
555 | return err; | ||
556 | |||
557 | list_for_each_entry_safe(pos, tmp, head, miscj) { | ||
558 | |||
559 | err = kobject_add(&pos->kobj, b->kobj, pos->kobj.name); | ||
560 | if (err) { | ||
561 | list_for_each_entry_safe_reverse(pos, tmp, head, miscj) | ||
562 | kobject_del(&pos->kobj); | ||
563 | |||
564 | return err; | ||
565 | } | ||
566 | } | ||
567 | return err; | ||
556 | } | 568 | } |
557 | 569 | ||
558 | /* symlinks sibling shared banks to first core. first core owns dir/files. */ | ||
559 | static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | 570 | static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) |
560 | { | 571 | { |
561 | int i, err = 0; | ||
562 | struct threshold_bank *b = NULL; | ||
563 | struct device *dev = per_cpu(mce_device, cpu); | 572 | struct device *dev = per_cpu(mce_device, cpu); |
564 | char name[32]; | 573 | struct amd_northbridge *nb = NULL; |
574 | struct threshold_bank *b = NULL; | ||
575 | const char *name = th_names[bank]; | ||
576 | int err = 0; | ||
565 | 577 | ||
566 | sprintf(name, "threshold_bank%i", bank); | 578 | if (shared_bank[bank]) { |
567 | 579 | ||
568 | #ifdef CONFIG_SMP | 580 | nb = node_to_amd_nb(amd_get_nb_id(cpu)); |
569 | if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ | 581 | WARN_ON(!nb); |
570 | i = cpumask_first(cpu_llc_shared_mask(cpu)); | ||
571 | 582 | ||
572 | /* first core not up yet */ | 583 | /* threshold descriptor already initialized on this node? */ |
573 | if (cpu_data(i).cpu_core_id) | 584 | if (nb->bank4) { |
574 | goto out; | 585 | /* yes, use it */ |
586 | b = nb->bank4; | ||
587 | err = kobject_add(b->kobj, &dev->kobj, name); | ||
588 | if (err) | ||
589 | goto out; | ||
575 | 590 | ||
576 | /* already linked */ | 591 | per_cpu(threshold_banks, cpu)[bank] = b; |
577 | if (per_cpu(threshold_banks, cpu)[bank]) | 592 | atomic_inc(&b->cpus); |
578 | goto out; | ||
579 | 593 | ||
580 | b = per_cpu(threshold_banks, i)[bank]; | 594 | err = __threshold_add_blocks(b); |
581 | 595 | ||
582 | if (!b) | ||
583 | goto out; | 596 | goto out; |
584 | 597 | } | |
585 | err = sysfs_create_link(&dev->kobj, b->kobj, name); | ||
586 | if (err) | ||
587 | goto out; | ||
588 | |||
589 | cpumask_copy(b->cpus, cpu_llc_shared_mask(cpu)); | ||
590 | per_cpu(threshold_banks, cpu)[bank] = b; | ||
591 | |||
592 | goto out; | ||
593 | } | 598 | } |
594 | #endif | ||
595 | 599 | ||
596 | b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL); | 600 | b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL); |
597 | if (!b) { | 601 | if (!b) { |
598 | err = -ENOMEM; | 602 | err = -ENOMEM; |
599 | goto out; | 603 | goto out; |
600 | } | 604 | } |
601 | if (!zalloc_cpumask_var(&b->cpus, GFP_KERNEL)) { | ||
602 | kfree(b); | ||
603 | err = -ENOMEM; | ||
604 | goto out; | ||
605 | } | ||
606 | 605 | ||
607 | b->kobj = kobject_create_and_add(name, &dev->kobj); | 606 | b->kobj = kobject_create_and_add(name, &dev->kobj); |
608 | if (!b->kobj) | 607 | if (!b->kobj) { |
608 | err = -EINVAL; | ||
609 | goto out_free; | 609 | goto out_free; |
610 | 610 | } | |
611 | #ifndef CONFIG_SMP | ||
612 | cpumask_setall(b->cpus); | ||
613 | #else | ||
614 | cpumask_set_cpu(cpu, b->cpus); | ||
615 | #endif | ||
616 | 611 | ||
617 | per_cpu(threshold_banks, cpu)[bank] = b; | 612 | per_cpu(threshold_banks, cpu)[bank] = b; |
618 | 613 | ||
619 | err = local_allocate_threshold_blocks(cpu, bank); | 614 | if (shared_bank[bank]) { |
620 | if (err) | 615 | atomic_set(&b->cpus, 1); |
621 | goto out_free; | ||
622 | |||
623 | for_each_cpu(i, b->cpus) { | ||
624 | if (i == cpu) | ||
625 | continue; | ||
626 | |||
627 | dev = per_cpu(mce_device, i); | ||
628 | if (dev) | ||
629 | err = sysfs_create_link(&dev->kobj,b->kobj, name); | ||
630 | if (err) | ||
631 | goto out; | ||
632 | 616 | ||
633 | per_cpu(threshold_banks, i)[bank] = b; | 617 | /* nb is already initialized, see above */ |
618 | WARN_ON(nb->bank4); | ||
619 | nb->bank4 = b; | ||
634 | } | 620 | } |
635 | 621 | ||
636 | goto out; | 622 | err = allocate_threshold_blocks(cpu, bank, 0, |
623 | MSR_IA32_MC0_MISC + bank * 4); | ||
624 | if (!err) | ||
625 | goto out; | ||
637 | 626 | ||
638 | out_free: | 627 | out_free: |
639 | per_cpu(threshold_banks, cpu)[bank] = NULL; | ||
640 | free_cpumask_var(b->cpus); | ||
641 | kfree(b); | 628 | kfree(b); |
642 | out: | 629 | |
630 | out: | ||
643 | return err; | 631 | return err; |
644 | } | 632 | } |
645 | 633 | ||
@@ -660,12 +648,6 @@ static __cpuinit int threshold_create_device(unsigned int cpu) | |||
660 | return err; | 648 | return err; |
661 | } | 649 | } |
662 | 650 | ||
663 | /* | ||
664 | * let's be hotplug friendly. | ||
665 | * in case of multiple core processors, the first core always takes ownership | ||
666 | * of shared sysfs dir/files, and rest of the cores will be symlinked to it. | ||
667 | */ | ||
668 | |||
669 | static void deallocate_threshold_block(unsigned int cpu, | 651 | static void deallocate_threshold_block(unsigned int cpu, |
670 | unsigned int bank) | 652 | unsigned int bank) |
671 | { | 653 | { |
@@ -686,41 +668,42 @@ static void deallocate_threshold_block(unsigned int cpu, | |||
686 | per_cpu(threshold_banks, cpu)[bank]->blocks = NULL; | 668 | per_cpu(threshold_banks, cpu)[bank]->blocks = NULL; |
687 | } | 669 | } |
688 | 670 | ||
671 | static void __threshold_remove_blocks(struct threshold_bank *b) | ||
672 | { | ||
673 | struct threshold_block *pos = NULL; | ||
674 | struct threshold_block *tmp = NULL; | ||
675 | |||
676 | kobject_del(b->kobj); | ||
677 | |||
678 | list_for_each_entry_safe(pos, tmp, &b->blocks->miscj, miscj) | ||
679 | kobject_del(&pos->kobj); | ||
680 | } | ||
681 | |||
689 | static void threshold_remove_bank(unsigned int cpu, int bank) | 682 | static void threshold_remove_bank(unsigned int cpu, int bank) |
690 | { | 683 | { |
684 | struct amd_northbridge *nb; | ||
691 | struct threshold_bank *b; | 685 | struct threshold_bank *b; |
692 | struct device *dev; | ||
693 | char name[32]; | ||
694 | int i = 0; | ||
695 | 686 | ||
696 | b = per_cpu(threshold_banks, cpu)[bank]; | 687 | b = per_cpu(threshold_banks, cpu)[bank]; |
697 | if (!b) | 688 | if (!b) |
698 | return; | 689 | return; |
690 | |||
699 | if (!b->blocks) | 691 | if (!b->blocks) |
700 | goto free_out; | 692 | goto free_out; |
701 | 693 | ||
702 | sprintf(name, "threshold_bank%i", bank); | 694 | if (shared_bank[bank]) { |
703 | 695 | if (!atomic_dec_and_test(&b->cpus)) { | |
704 | #ifdef CONFIG_SMP | 696 | __threshold_remove_blocks(b); |
705 | /* sibling symlink */ | 697 | per_cpu(threshold_banks, cpu)[bank] = NULL; |
706 | if (shared_bank[bank] && b->blocks->cpu != cpu) { | 698 | return; |
707 | dev = per_cpu(mce_device, cpu); | 699 | } else { |
708 | sysfs_remove_link(&dev->kobj, name); | 700 | /* |
709 | per_cpu(threshold_banks, cpu)[bank] = NULL; | 701 | * the last CPU on this node using the shared bank is |
710 | 702 | * going away, remove that bank now. | |
711 | return; | 703 | */ |
712 | } | 704 | nb = node_to_amd_nb(amd_get_nb_id(cpu)); |
713 | #endif | 705 | nb->bank4 = NULL; |
714 | 706 | } | |
715 | /* remove all sibling symlinks before unregistering */ | ||
716 | for_each_cpu(i, b->cpus) { | ||
717 | if (i == cpu) | ||
718 | continue; | ||
719 | |||
720 | dev = per_cpu(mce_device, i); | ||
721 | if (dev) | ||
722 | sysfs_remove_link(&dev->kobj, name); | ||
723 | per_cpu(threshold_banks, i)[bank] = NULL; | ||
724 | } | 707 | } |
725 | 708 | ||
726 | deallocate_threshold_block(cpu, bank); | 709 | deallocate_threshold_block(cpu, bank); |
@@ -728,7 +711,6 @@ static void threshold_remove_bank(unsigned int cpu, int bank) | |||
728 | free_out: | 711 | free_out: |
729 | kobject_del(b->kobj); | 712 | kobject_del(b->kobj); |
730 | kobject_put(b->kobj); | 713 | kobject_put(b->kobj); |
731 | free_cpumask_var(b->cpus); | ||
732 | kfree(b); | 714 | kfree(b); |
733 | per_cpu(threshold_banks, cpu)[bank] = NULL; | 715 | per_cpu(threshold_banks, cpu)[bank] = NULL; |
734 | } | 716 | } |
@@ -777,4 +759,24 @@ static __init int threshold_init_device(void) | |||
777 | 759 | ||
778 | return 0; | 760 | return 0; |
779 | } | 761 | } |
780 | device_initcall(threshold_init_device); | 762 | /* |
763 | * there are 3 funcs which need to be _initcalled in a logic sequence: | ||
764 | * 1. xen_late_init_mcelog | ||
765 | * 2. mcheck_init_device | ||
766 | * 3. threshold_init_device | ||
767 | * | ||
768 | * xen_late_init_mcelog must register xen_mce_chrdev_device before | ||
769 | * native mce_chrdev_device registration if running under xen platform; | ||
770 | * | ||
771 | * mcheck_init_device should be inited before threshold_init_device to | ||
772 | * initialize mce_device, otherwise a NULL ptr dereference will cause panic. | ||
773 | * | ||
774 | * so we use following _initcalls | ||
775 | * 1. device_initcall(xen_late_init_mcelog); | ||
776 | * 2. device_initcall_sync(mcheck_init_device); | ||
777 | * 3. late_initcall(threshold_init_device); | ||
778 | * | ||
779 | * when running under xen, the initcall order is 1,2,3; | ||
780 | * on baremetal, we skip 1 and we do only 2 and 3. | ||
781 | */ | ||
782 | late_initcall(threshold_init_device); | ||
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 29557aa06dda..915b876edd1e 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -32,6 +32,8 @@ | |||
32 | #include <asm/smp.h> | 32 | #include <asm/smp.h> |
33 | #include <asm/alternative.h> | 33 | #include <asm/alternative.h> |
34 | #include <asm/timer.h> | 34 | #include <asm/timer.h> |
35 | #include <asm/desc.h> | ||
36 | #include <asm/ldt.h> | ||
35 | 37 | ||
36 | #include "perf_event.h" | 38 | #include "perf_event.h" |
37 | 39 | ||
@@ -1738,6 +1740,29 @@ valid_user_frame(const void __user *fp, unsigned long size) | |||
1738 | return (__range_not_ok(fp, size, TASK_SIZE) == 0); | 1740 | return (__range_not_ok(fp, size, TASK_SIZE) == 0); |
1739 | } | 1741 | } |
1740 | 1742 | ||
1743 | static unsigned long get_segment_base(unsigned int segment) | ||
1744 | { | ||
1745 | struct desc_struct *desc; | ||
1746 | int idx = segment >> 3; | ||
1747 | |||
1748 | if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) { | ||
1749 | if (idx > LDT_ENTRIES) | ||
1750 | return 0; | ||
1751 | |||
1752 | if (idx > current->active_mm->context.size) | ||
1753 | return 0; | ||
1754 | |||
1755 | desc = current->active_mm->context.ldt; | ||
1756 | } else { | ||
1757 | if (idx > GDT_ENTRIES) | ||
1758 | return 0; | ||
1759 | |||
1760 | desc = __this_cpu_ptr(&gdt_page.gdt[0]); | ||
1761 | } | ||
1762 | |||
1763 | return get_desc_base(desc + idx); | ||
1764 | } | ||
1765 | |||
1741 | #ifdef CONFIG_COMPAT | 1766 | #ifdef CONFIG_COMPAT |
1742 | 1767 | ||
1743 | #include <asm/compat.h> | 1768 | #include <asm/compat.h> |
@@ -1746,13 +1771,17 @@ static inline int | |||
1746 | perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) | 1771 | perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) |
1747 | { | 1772 | { |
1748 | /* 32-bit process in 64-bit kernel. */ | 1773 | /* 32-bit process in 64-bit kernel. */ |
1774 | unsigned long ss_base, cs_base; | ||
1749 | struct stack_frame_ia32 frame; | 1775 | struct stack_frame_ia32 frame; |
1750 | const void __user *fp; | 1776 | const void __user *fp; |
1751 | 1777 | ||
1752 | if (!test_thread_flag(TIF_IA32)) | 1778 | if (!test_thread_flag(TIF_IA32)) |
1753 | return 0; | 1779 | return 0; |
1754 | 1780 | ||
1755 | fp = compat_ptr(regs->bp); | 1781 | cs_base = get_segment_base(regs->cs); |
1782 | ss_base = get_segment_base(regs->ss); | ||
1783 | |||
1784 | fp = compat_ptr(ss_base + regs->bp); | ||
1756 | while (entry->nr < PERF_MAX_STACK_DEPTH) { | 1785 | while (entry->nr < PERF_MAX_STACK_DEPTH) { |
1757 | unsigned long bytes; | 1786 | unsigned long bytes; |
1758 | frame.next_frame = 0; | 1787 | frame.next_frame = 0; |
@@ -1765,8 +1794,8 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
1765 | if (!valid_user_frame(fp, sizeof(frame))) | 1794 | if (!valid_user_frame(fp, sizeof(frame))) |
1766 | break; | 1795 | break; |
1767 | 1796 | ||
1768 | perf_callchain_store(entry, frame.return_address); | 1797 | perf_callchain_store(entry, cs_base + frame.return_address); |
1769 | fp = compat_ptr(frame.next_frame); | 1798 | fp = compat_ptr(ss_base + frame.next_frame); |
1770 | } | 1799 | } |
1771 | return 1; | 1800 | return 1; |
1772 | } | 1801 | } |
@@ -1789,6 +1818,12 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) | |||
1789 | return; | 1818 | return; |
1790 | } | 1819 | } |
1791 | 1820 | ||
1821 | /* | ||
1822 | * We don't know what to do with VM86 stacks.. ignore them for now. | ||
1823 | */ | ||
1824 | if (regs->flags & (X86_VM_MASK | PERF_EFLAGS_VM)) | ||
1825 | return; | ||
1826 | |||
1792 | fp = (void __user *)regs->bp; | 1827 | fp = (void __user *)regs->bp; |
1793 | 1828 | ||
1794 | perf_callchain_store(entry, regs->ip); | 1829 | perf_callchain_store(entry, regs->ip); |
@@ -1816,16 +1851,50 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) | |||
1816 | } | 1851 | } |
1817 | } | 1852 | } |
1818 | 1853 | ||
1819 | unsigned long perf_instruction_pointer(struct pt_regs *regs) | 1854 | /* |
1855 | * Deal with code segment offsets for the various execution modes: | ||
1856 | * | ||
1857 | * VM86 - the good olde 16 bit days, where the linear address is | ||
1858 | * 20 bits and we use regs->ip + 0x10 * regs->cs. | ||
1859 | * | ||
1860 | * IA32 - Where we need to look at GDT/LDT segment descriptor tables | ||
1861 | * to figure out what the 32bit base address is. | ||
1862 | * | ||
1863 | * X32 - has TIF_X32 set, but is running in x86_64 | ||
1864 | * | ||
1865 | * X86_64 - CS,DS,SS,ES are all zero based. | ||
1866 | */ | ||
1867 | static unsigned long code_segment_base(struct pt_regs *regs) | ||
1820 | { | 1868 | { |
1821 | unsigned long ip; | 1869 | /* |
1870 | * If we are in VM86 mode, add the segment offset to convert to a | ||
1871 | * linear address. | ||
1872 | */ | ||
1873 | if (regs->flags & X86_VM_MASK) | ||
1874 | return 0x10 * regs->cs; | ||
1875 | |||
1876 | /* | ||
1877 | * For IA32 we look at the GDT/LDT segment base to convert the | ||
1878 | * effective IP to a linear address. | ||
1879 | */ | ||
1880 | #ifdef CONFIG_X86_32 | ||
1881 | if (user_mode(regs) && regs->cs != __USER_CS) | ||
1882 | return get_segment_base(regs->cs); | ||
1883 | #else | ||
1884 | if (test_thread_flag(TIF_IA32)) { | ||
1885 | if (user_mode(regs) && regs->cs != __USER32_CS) | ||
1886 | return get_segment_base(regs->cs); | ||
1887 | } | ||
1888 | #endif | ||
1889 | return 0; | ||
1890 | } | ||
1822 | 1891 | ||
1892 | unsigned long perf_instruction_pointer(struct pt_regs *regs) | ||
1893 | { | ||
1823 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) | 1894 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) |
1824 | ip = perf_guest_cbs->get_guest_ip(); | 1895 | return perf_guest_cbs->get_guest_ip(); |
1825 | else | ||
1826 | ip = instruction_pointer(regs); | ||
1827 | 1896 | ||
1828 | return ip; | 1897 | return regs->ip + code_segment_base(regs); |
1829 | } | 1898 | } |
1830 | 1899 | ||
1831 | unsigned long perf_misc_flags(struct pt_regs *regs) | 1900 | unsigned long perf_misc_flags(struct pt_regs *regs) |
@@ -1838,7 +1907,7 @@ unsigned long perf_misc_flags(struct pt_regs *regs) | |||
1838 | else | 1907 | else |
1839 | misc |= PERF_RECORD_MISC_GUEST_KERNEL; | 1908 | misc |= PERF_RECORD_MISC_GUEST_KERNEL; |
1840 | } else { | 1909 | } else { |
1841 | if (!kernel_ip(regs->ip)) | 1910 | if (user_mode(regs)) |
1842 | misc |= PERF_RECORD_MISC_USER; | 1911 | misc |= PERF_RECORD_MISC_USER; |
1843 | else | 1912 | else |
1844 | misc |= PERF_RECORD_MISC_KERNEL; | 1913 | misc |= PERF_RECORD_MISC_KERNEL; |
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 821d53b696d1..6605a81ba339 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -516,6 +516,26 @@ static inline bool kernel_ip(unsigned long ip) | |||
516 | #endif | 516 | #endif |
517 | } | 517 | } |
518 | 518 | ||
519 | /* | ||
520 | * Not all PMUs provide the right context information to place the reported IP | ||
521 | * into full context. Specifically segment registers are typically not | ||
522 | * supplied. | ||
523 | * | ||
524 | * Assuming the address is a linear address (it is for IBS), we fake the CS and | ||
525 | * vm86 mode using the known zero-based code segment and 'fix up' the registers | ||
526 | * to reflect this. | ||
527 | * | ||
528 | * Intel PEBS/LBR appear to typically provide the effective address, nothing | ||
529 | * much we can do about that but pray and treat it like a linear address. | ||
530 | */ | ||
531 | static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip) | ||
532 | { | ||
533 | regs->cs = kernel_ip(ip) ? __KERNEL_CS : __USER_CS; | ||
534 | if (regs->flags & X86_VM_MASK) | ||
535 | regs->flags ^= (PERF_EFLAGS_VM | X86_VM_MASK); | ||
536 | regs->ip = ip; | ||
537 | } | ||
538 | |||
519 | #ifdef CONFIG_CPU_SUP_AMD | 539 | #ifdef CONFIG_CPU_SUP_AMD |
520 | 540 | ||
521 | int amd_pmu_init(void); | 541 | int amd_pmu_init(void); |
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c index da9bcdcd9856..7bfb5bec8630 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c | |||
@@ -13,6 +13,8 @@ | |||
13 | 13 | ||
14 | #include <asm/apic.h> | 14 | #include <asm/apic.h> |
15 | 15 | ||
16 | #include "perf_event.h" | ||
17 | |||
16 | static u32 ibs_caps; | 18 | static u32 ibs_caps; |
17 | 19 | ||
18 | #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) | 20 | #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) |
@@ -536,7 +538,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) | |||
536 | if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) { | 538 | if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) { |
537 | regs.flags &= ~PERF_EFLAGS_EXACT; | 539 | regs.flags &= ~PERF_EFLAGS_EXACT; |
538 | } else { | 540 | } else { |
539 | instruction_pointer_set(®s, ibs_data.regs[1]); | 541 | set_linear_ip(®s, ibs_data.regs[1]); |
540 | regs.flags |= PERF_EFLAGS_EXACT; | 542 | regs.flags |= PERF_EFLAGS_EXACT; |
541 | } | 543 | } |
542 | 544 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 629ae0b7ad90..e38d97bf4259 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -499,7 +499,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | |||
499 | * We sampled a branch insn, rewind using the LBR stack | 499 | * We sampled a branch insn, rewind using the LBR stack |
500 | */ | 500 | */ |
501 | if (ip == to) { | 501 | if (ip == to) { |
502 | regs->ip = from; | 502 | set_linear_ip(regs, from); |
503 | return 1; | 503 | return 1; |
504 | } | 504 | } |
505 | 505 | ||
@@ -529,7 +529,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | |||
529 | } while (to < ip); | 529 | } while (to < ip); |
530 | 530 | ||
531 | if (to == ip) { | 531 | if (to == ip) { |
532 | regs->ip = old_to; | 532 | set_linear_ip(regs, old_to); |
533 | return 1; | 533 | return 1; |
534 | } | 534 | } |
535 | 535 | ||
@@ -569,7 +569,8 @@ static void __intel_pmu_pebs_event(struct perf_event *event, | |||
569 | * A possible PERF_SAMPLE_REGS will have to transfer all regs. | 569 | * A possible PERF_SAMPLE_REGS will have to transfer all regs. |
570 | */ | 570 | */ |
571 | regs = *iregs; | 571 | regs = *iregs; |
572 | regs.ip = pebs->ip; | 572 | regs.flags = pebs->flags; |
573 | set_linear_ip(®s, pebs->ip); | ||
573 | regs.bp = pebs->bp; | 574 | regs.bp = pebs->bp; |
574 | regs.sp = pebs->sp; | 575 | regs.sp = pebs->sp; |
575 | 576 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index f3851892e077..c9e5dc56630a 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h | |||
@@ -5,7 +5,7 @@ | |||
5 | #include "perf_event.h" | 5 | #include "perf_event.h" |
6 | 6 | ||
7 | #define UNCORE_PMU_NAME_LEN 32 | 7 | #define UNCORE_PMU_NAME_LEN 32 |
8 | #define UNCORE_PMU_HRTIMER_INTERVAL (60 * NSEC_PER_SEC) | 8 | #define UNCORE_PMU_HRTIMER_INTERVAL (60LL * NSEC_PER_SEC) |
9 | 9 | ||
10 | #define UNCORE_FIXED_EVENT 0xff | 10 | #define UNCORE_FIXED_EVENT 0xff |
11 | #define UNCORE_PMC_IDX_MAX_GENERIC 8 | 11 | #define UNCORE_PMC_IDX_MAX_GENERIC 8 |
diff --git a/arch/x86/kernel/cpu/sched.c b/arch/x86/kernel/cpu/sched.c deleted file mode 100644 index a640ae5ad201..000000000000 --- a/arch/x86/kernel/cpu/sched.c +++ /dev/null | |||
@@ -1,55 +0,0 @@ | |||
1 | #include <linux/sched.h> | ||
2 | #include <linux/math64.h> | ||
3 | #include <linux/percpu.h> | ||
4 | #include <linux/irqflags.h> | ||
5 | |||
6 | #include <asm/cpufeature.h> | ||
7 | #include <asm/processor.h> | ||
8 | |||
9 | #ifdef CONFIG_SMP | ||
10 | |||
11 | static DEFINE_PER_CPU(struct aperfmperf, old_perf_sched); | ||
12 | |||
13 | static unsigned long scale_aperfmperf(void) | ||
14 | { | ||
15 | struct aperfmperf val, *old = &__get_cpu_var(old_perf_sched); | ||
16 | unsigned long ratio, flags; | ||
17 | |||
18 | local_irq_save(flags); | ||
19 | get_aperfmperf(&val); | ||
20 | local_irq_restore(flags); | ||
21 | |||
22 | ratio = calc_aperfmperf_ratio(old, &val); | ||
23 | *old = val; | ||
24 | |||
25 | return ratio; | ||
26 | } | ||
27 | |||
28 | unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu) | ||
29 | { | ||
30 | /* | ||
31 | * do aperf/mperf on the cpu level because it includes things | ||
32 | * like turbo mode, which are relevant to full cores. | ||
33 | */ | ||
34 | if (boot_cpu_has(X86_FEATURE_APERFMPERF)) | ||
35 | return scale_aperfmperf(); | ||
36 | |||
37 | /* | ||
38 | * maybe have something cpufreq here | ||
39 | */ | ||
40 | |||
41 | return default_scale_freq_power(sd, cpu); | ||
42 | } | ||
43 | |||
44 | unsigned long arch_scale_smt_power(struct sched_domain *sd, int cpu) | ||
45 | { | ||
46 | /* | ||
47 | * aperf/mperf already includes the smt gain | ||
48 | */ | ||
49 | if (boot_cpu_has(X86_FEATURE_APERFMPERF)) | ||
50 | return SCHED_LOAD_SCALE; | ||
51 | |||
52 | return default_scale_smt_power(sd, cpu); | ||
53 | } | ||
54 | |||
55 | #endif | ||
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 41857970517f..ed858e9e9a74 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -944,7 +944,7 @@ void __init e820_reserve_resources(void) | |||
944 | for (i = 0; i < e820_saved.nr_map; i++) { | 944 | for (i = 0; i < e820_saved.nr_map; i++) { |
945 | struct e820entry *entry = &e820_saved.map[i]; | 945 | struct e820entry *entry = &e820_saved.map[i]; |
946 | firmware_map_add_early(entry->addr, | 946 | firmware_map_add_early(entry->addr, |
947 | entry->addr + entry->size - 1, | 947 | entry->addr + entry->size, |
948 | e820_type_to_string(entry->type)); | 948 | e820_type_to_string(entry->type)); |
949 | } | 949 | } |
950 | } | 950 | } |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 459d4a0dca8d..b7a81dcb7366 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -1124,24 +1124,6 @@ apicinterrupt LOCAL_TIMER_VECTOR \ | |||
1124 | apicinterrupt X86_PLATFORM_IPI_VECTOR \ | 1124 | apicinterrupt X86_PLATFORM_IPI_VECTOR \ |
1125 | x86_platform_ipi smp_x86_platform_ipi | 1125 | x86_platform_ipi smp_x86_platform_ipi |
1126 | 1126 | ||
1127 | #ifdef CONFIG_SMP | ||
1128 | ALIGN | ||
1129 | INTR_FRAME | ||
1130 | .irp idx,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \ | ||
1131 | 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 | ||
1132 | .if NUM_INVALIDATE_TLB_VECTORS > \idx | ||
1133 | ENTRY(invalidate_interrupt\idx) | ||
1134 | pushq_cfi $~(INVALIDATE_TLB_VECTOR_START+\idx) | ||
1135 | jmp .Lcommon_invalidate_interrupt0 | ||
1136 | CFI_ADJUST_CFA_OFFSET -8 | ||
1137 | END(invalidate_interrupt\idx) | ||
1138 | .endif | ||
1139 | .endr | ||
1140 | CFI_ENDPROC | ||
1141 | apicinterrupt INVALIDATE_TLB_VECTOR_START, \ | ||
1142 | invalidate_interrupt0, smp_invalidate_interrupt | ||
1143 | #endif | ||
1144 | |||
1145 | apicinterrupt THRESHOLD_APIC_VECTOR \ | 1127 | apicinterrupt THRESHOLD_APIC_VECTOR \ |
1146 | threshold_interrupt smp_threshold_interrupt | 1128 | threshold_interrupt smp_threshold_interrupt |
1147 | apicinterrupt THERMAL_APIC_VECTOR \ | 1129 | apicinterrupt THERMAL_APIC_VECTOR \ |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 1f5f1d5d2a02..7ad683d78645 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -328,6 +328,7 @@ void fixup_irqs(void) | |||
328 | chip->irq_retrigger(data); | 328 | chip->irq_retrigger(data); |
329 | raw_spin_unlock(&desc->lock); | 329 | raw_spin_unlock(&desc->lock); |
330 | } | 330 | } |
331 | __this_cpu_write(vector_irq[vector], -1); | ||
331 | } | 332 | } |
332 | } | 333 | } |
333 | #endif | 334 | #endif |
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 252981afd6c4..6e03b0d69138 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
@@ -171,79 +171,6 @@ static void __init smp_intr_init(void) | |||
171 | */ | 171 | */ |
172 | alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); | 172 | alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); |
173 | 173 | ||
174 | /* IPIs for invalidation */ | ||
175 | #define ALLOC_INVTLB_VEC(NR) \ | ||
176 | alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+NR, \ | ||
177 | invalidate_interrupt##NR) | ||
178 | |||
179 | switch (NUM_INVALIDATE_TLB_VECTORS) { | ||
180 | default: | ||
181 | ALLOC_INVTLB_VEC(31); | ||
182 | case 31: | ||
183 | ALLOC_INVTLB_VEC(30); | ||
184 | case 30: | ||
185 | ALLOC_INVTLB_VEC(29); | ||
186 | case 29: | ||
187 | ALLOC_INVTLB_VEC(28); | ||
188 | case 28: | ||
189 | ALLOC_INVTLB_VEC(27); | ||
190 | case 27: | ||
191 | ALLOC_INVTLB_VEC(26); | ||
192 | case 26: | ||
193 | ALLOC_INVTLB_VEC(25); | ||
194 | case 25: | ||
195 | ALLOC_INVTLB_VEC(24); | ||
196 | case 24: | ||
197 | ALLOC_INVTLB_VEC(23); | ||
198 | case 23: | ||
199 | ALLOC_INVTLB_VEC(22); | ||
200 | case 22: | ||
201 | ALLOC_INVTLB_VEC(21); | ||
202 | case 21: | ||
203 | ALLOC_INVTLB_VEC(20); | ||
204 | case 20: | ||
205 | ALLOC_INVTLB_VEC(19); | ||
206 | case 19: | ||
207 | ALLOC_INVTLB_VEC(18); | ||
208 | case 18: | ||
209 | ALLOC_INVTLB_VEC(17); | ||
210 | case 17: | ||
211 | ALLOC_INVTLB_VEC(16); | ||
212 | case 16: | ||
213 | ALLOC_INVTLB_VEC(15); | ||
214 | case 15: | ||
215 | ALLOC_INVTLB_VEC(14); | ||
216 | case 14: | ||
217 | ALLOC_INVTLB_VEC(13); | ||
218 | case 13: | ||
219 | ALLOC_INVTLB_VEC(12); | ||
220 | case 12: | ||
221 | ALLOC_INVTLB_VEC(11); | ||
222 | case 11: | ||
223 | ALLOC_INVTLB_VEC(10); | ||
224 | case 10: | ||
225 | ALLOC_INVTLB_VEC(9); | ||
226 | case 9: | ||
227 | ALLOC_INVTLB_VEC(8); | ||
228 | case 8: | ||
229 | ALLOC_INVTLB_VEC(7); | ||
230 | case 7: | ||
231 | ALLOC_INVTLB_VEC(6); | ||
232 | case 6: | ||
233 | ALLOC_INVTLB_VEC(5); | ||
234 | case 5: | ||
235 | ALLOC_INVTLB_VEC(4); | ||
236 | case 4: | ||
237 | ALLOC_INVTLB_VEC(3); | ||
238 | case 3: | ||
239 | ALLOC_INVTLB_VEC(2); | ||
240 | case 2: | ||
241 | ALLOC_INVTLB_VEC(1); | ||
242 | case 1: | ||
243 | ALLOC_INVTLB_VEC(0); | ||
244 | break; | ||
245 | } | ||
246 | |||
247 | /* IPI for generic function call */ | 174 | /* IPI for generic function call */ |
248 | alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); | 175 | alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); |
249 | 176 | ||
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index 1d5d31ea686b..dc1404bf8e4b 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c | |||
@@ -107,7 +107,7 @@ static int __init create_setup_data_nodes(struct dentry *parent) | |||
107 | { | 107 | { |
108 | struct setup_data_node *node; | 108 | struct setup_data_node *node; |
109 | struct setup_data *data; | 109 | struct setup_data *data; |
110 | int error = -ENOMEM; | 110 | int error; |
111 | struct dentry *d; | 111 | struct dentry *d; |
112 | struct page *pg; | 112 | struct page *pg; |
113 | u64 pa_data; | 113 | u64 pa_data; |
@@ -121,8 +121,10 @@ static int __init create_setup_data_nodes(struct dentry *parent) | |||
121 | 121 | ||
122 | while (pa_data) { | 122 | while (pa_data) { |
123 | node = kmalloc(sizeof(*node), GFP_KERNEL); | 123 | node = kmalloc(sizeof(*node), GFP_KERNEL); |
124 | if (!node) | 124 | if (!node) { |
125 | error = -ENOMEM; | ||
125 | goto err_dir; | 126 | goto err_dir; |
127 | } | ||
126 | 128 | ||
127 | pg = pfn_to_page((pa_data+sizeof(*data)-1) >> PAGE_SHIFT); | 129 | pg = pfn_to_page((pa_data+sizeof(*data)-1) >> PAGE_SHIFT); |
128 | if (PageHighMem(pg)) { | 130 | if (PageHighMem(pg)) { |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index e554e5ad2fe8..c1d61ee4b4f1 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -39,6 +39,9 @@ | |||
39 | #include <asm/desc.h> | 39 | #include <asm/desc.h> |
40 | #include <asm/tlbflush.h> | 40 | #include <asm/tlbflush.h> |
41 | #include <asm/idle.h> | 41 | #include <asm/idle.h> |
42 | #include <asm/apic.h> | ||
43 | #include <asm/apicdef.h> | ||
44 | #include <asm/hypervisor.h> | ||
42 | 45 | ||
43 | static int kvmapf = 1; | 46 | static int kvmapf = 1; |
44 | 47 | ||
@@ -283,6 +286,22 @@ static void kvm_register_steal_time(void) | |||
283 | cpu, __pa(st)); | 286 | cpu, __pa(st)); |
284 | } | 287 | } |
285 | 288 | ||
289 | static DEFINE_PER_CPU(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED; | ||
290 | |||
291 | static void kvm_guest_apic_eoi_write(u32 reg, u32 val) | ||
292 | { | ||
293 | /** | ||
294 | * This relies on __test_and_clear_bit to modify the memory | ||
295 | * in a way that is atomic with respect to the local CPU. | ||
296 | * The hypervisor only accesses this memory from the local CPU so | ||
297 | * there's no need for lock or memory barriers. | ||
298 | * An optimization barrier is implied in apic write. | ||
299 | */ | ||
300 | if (__test_and_clear_bit(KVM_PV_EOI_BIT, &__get_cpu_var(kvm_apic_eoi))) | ||
301 | return; | ||
302 | apic_write(APIC_EOI, APIC_EOI_ACK); | ||
303 | } | ||
304 | |||
286 | void __cpuinit kvm_guest_cpu_init(void) | 305 | void __cpuinit kvm_guest_cpu_init(void) |
287 | { | 306 | { |
288 | if (!kvm_para_available()) | 307 | if (!kvm_para_available()) |
@@ -300,11 +319,20 @@ void __cpuinit kvm_guest_cpu_init(void) | |||
300 | smp_processor_id()); | 319 | smp_processor_id()); |
301 | } | 320 | } |
302 | 321 | ||
322 | if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) { | ||
323 | unsigned long pa; | ||
324 | /* Size alignment is implied but just to make it explicit. */ | ||
325 | BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4); | ||
326 | __get_cpu_var(kvm_apic_eoi) = 0; | ||
327 | pa = __pa(&__get_cpu_var(kvm_apic_eoi)) | KVM_MSR_ENABLED; | ||
328 | wrmsrl(MSR_KVM_PV_EOI_EN, pa); | ||
329 | } | ||
330 | |||
303 | if (has_steal_clock) | 331 | if (has_steal_clock) |
304 | kvm_register_steal_time(); | 332 | kvm_register_steal_time(); |
305 | } | 333 | } |
306 | 334 | ||
307 | static void kvm_pv_disable_apf(void *unused) | 335 | static void kvm_pv_disable_apf(void) |
308 | { | 336 | { |
309 | if (!__get_cpu_var(apf_reason).enabled) | 337 | if (!__get_cpu_var(apf_reason).enabled) |
310 | return; | 338 | return; |
@@ -316,11 +344,23 @@ static void kvm_pv_disable_apf(void *unused) | |||
316 | smp_processor_id()); | 344 | smp_processor_id()); |
317 | } | 345 | } |
318 | 346 | ||
347 | static void kvm_pv_guest_cpu_reboot(void *unused) | ||
348 | { | ||
349 | /* | ||
350 | * We disable PV EOI before we load a new kernel by kexec, | ||
351 | * since MSR_KVM_PV_EOI_EN stores a pointer into old kernel's memory. | ||
352 | * New kernel can re-enable when it boots. | ||
353 | */ | ||
354 | if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) | ||
355 | wrmsrl(MSR_KVM_PV_EOI_EN, 0); | ||
356 | kvm_pv_disable_apf(); | ||
357 | } | ||
358 | |||
319 | static int kvm_pv_reboot_notify(struct notifier_block *nb, | 359 | static int kvm_pv_reboot_notify(struct notifier_block *nb, |
320 | unsigned long code, void *unused) | 360 | unsigned long code, void *unused) |
321 | { | 361 | { |
322 | if (code == SYS_RESTART) | 362 | if (code == SYS_RESTART) |
323 | on_each_cpu(kvm_pv_disable_apf, NULL, 1); | 363 | on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1); |
324 | return NOTIFY_DONE; | 364 | return NOTIFY_DONE; |
325 | } | 365 | } |
326 | 366 | ||
@@ -371,7 +411,9 @@ static void __cpuinit kvm_guest_cpu_online(void *dummy) | |||
371 | static void kvm_guest_cpu_offline(void *dummy) | 411 | static void kvm_guest_cpu_offline(void *dummy) |
372 | { | 412 | { |
373 | kvm_disable_steal_time(); | 413 | kvm_disable_steal_time(); |
374 | kvm_pv_disable_apf(NULL); | 414 | if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) |
415 | wrmsrl(MSR_KVM_PV_EOI_EN, 0); | ||
416 | kvm_pv_disable_apf(); | ||
375 | apf_task_wake_all(); | 417 | apf_task_wake_all(); |
376 | } | 418 | } |
377 | 419 | ||
@@ -424,6 +466,9 @@ void __init kvm_guest_init(void) | |||
424 | pv_time_ops.steal_clock = kvm_steal_clock; | 466 | pv_time_ops.steal_clock = kvm_steal_clock; |
425 | } | 467 | } |
426 | 468 | ||
469 | if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) | ||
470 | apic_set_eoi_write(kvm_guest_apic_eoi_write); | ||
471 | |||
427 | #ifdef CONFIG_SMP | 472 | #ifdef CONFIG_SMP |
428 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; | 473 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; |
429 | register_cpu_notifier(&kvm_cpu_notifier); | 474 | register_cpu_notifier(&kvm_cpu_notifier); |
@@ -432,6 +477,19 @@ void __init kvm_guest_init(void) | |||
432 | #endif | 477 | #endif |
433 | } | 478 | } |
434 | 479 | ||
480 | static bool __init kvm_detect(void) | ||
481 | { | ||
482 | if (!kvm_para_available()) | ||
483 | return false; | ||
484 | return true; | ||
485 | } | ||
486 | |||
487 | const struct hypervisor_x86 x86_hyper_kvm __refconst = { | ||
488 | .name = "KVM", | ||
489 | .detect = kvm_detect, | ||
490 | }; | ||
491 | EXPORT_SYMBOL_GPL(x86_hyper_kvm); | ||
492 | |||
435 | static __init int activate_jump_labels(void) | 493 | static __init int activate_jump_labels(void) |
436 | { | 494 | { |
437 | if (has_steal_clock) { | 495 | if (has_steal_clock) { |
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 202494d2ec6e..216a4d754b0c 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c | |||
@@ -81,7 +81,7 @@ int apply_relocate(Elf32_Shdr *sechdrs, | |||
81 | *location += sym->st_value; | 81 | *location += sym->st_value; |
82 | break; | 82 | break; |
83 | case R_386_PC32: | 83 | case R_386_PC32: |
84 | /* Add the value, subtract its postition */ | 84 | /* Add the value, subtract its position */ |
85 | *location += sym->st_value - (uint32_t)location; | 85 | *location += sym->st_value - (uint32_t)location; |
86 | break; | 86 | break; |
87 | default: | 87 | default: |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index c0f420f76cd3..de2b7ad70273 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -45,15 +45,6 @@ int iommu_detected __read_mostly = 0; | |||
45 | */ | 45 | */ |
46 | int iommu_pass_through __read_mostly; | 46 | int iommu_pass_through __read_mostly; |
47 | 47 | ||
48 | /* | ||
49 | * Group multi-function PCI devices into a single device-group for the | ||
50 | * iommu_device_group interface. This tells the iommu driver to pretend | ||
51 | * it cannot distinguish between functions of a device, exposing only one | ||
52 | * group for the device. Useful for disallowing use of individual PCI | ||
53 | * functions from userspace drivers. | ||
54 | */ | ||
55 | int iommu_group_mf __read_mostly; | ||
56 | |||
57 | extern struct iommu_table_entry __iommu_table[], __iommu_table_end[]; | 48 | extern struct iommu_table_entry __iommu_table[], __iommu_table_end[]; |
58 | 49 | ||
59 | /* Dummy device used for NULL arguments (normally ISA). */ | 50 | /* Dummy device used for NULL arguments (normally ISA). */ |
@@ -194,8 +185,6 @@ static __init int iommu_setup(char *p) | |||
194 | #endif | 185 | #endif |
195 | if (!strncmp(p, "pt", 2)) | 186 | if (!strncmp(p, "pt", 2)) |
196 | iommu_pass_through = 1; | 187 | iommu_pass_through = 1; |
197 | if (!strncmp(p, "group_mf", 8)) | ||
198 | iommu_group_mf = 1; | ||
199 | 188 | ||
200 | gart_parse_options(p); | 189 | gart_parse_options(p); |
201 | 190 | ||
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c new file mode 100644 index 000000000000..c5a3e5cfe07f --- /dev/null +++ b/arch/x86/kernel/perf_regs.c | |||
@@ -0,0 +1,105 @@ | |||
1 | #include <linux/errno.h> | ||
2 | #include <linux/kernel.h> | ||
3 | #include <linux/sched.h> | ||
4 | #include <linux/perf_event.h> | ||
5 | #include <linux/bug.h> | ||
6 | #include <linux/stddef.h> | ||
7 | #include <asm/perf_regs.h> | ||
8 | #include <asm/ptrace.h> | ||
9 | |||
10 | #ifdef CONFIG_X86_32 | ||
11 | #define PERF_REG_X86_MAX PERF_REG_X86_32_MAX | ||
12 | #else | ||
13 | #define PERF_REG_X86_MAX PERF_REG_X86_64_MAX | ||
14 | #endif | ||
15 | |||
16 | #define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r) | ||
17 | |||
18 | static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = { | ||
19 | PT_REGS_OFFSET(PERF_REG_X86_AX, ax), | ||
20 | PT_REGS_OFFSET(PERF_REG_X86_BX, bx), | ||
21 | PT_REGS_OFFSET(PERF_REG_X86_CX, cx), | ||
22 | PT_REGS_OFFSET(PERF_REG_X86_DX, dx), | ||
23 | PT_REGS_OFFSET(PERF_REG_X86_SI, si), | ||
24 | PT_REGS_OFFSET(PERF_REG_X86_DI, di), | ||
25 | PT_REGS_OFFSET(PERF_REG_X86_BP, bp), | ||
26 | PT_REGS_OFFSET(PERF_REG_X86_SP, sp), | ||
27 | PT_REGS_OFFSET(PERF_REG_X86_IP, ip), | ||
28 | PT_REGS_OFFSET(PERF_REG_X86_FLAGS, flags), | ||
29 | PT_REGS_OFFSET(PERF_REG_X86_CS, cs), | ||
30 | PT_REGS_OFFSET(PERF_REG_X86_SS, ss), | ||
31 | #ifdef CONFIG_X86_32 | ||
32 | PT_REGS_OFFSET(PERF_REG_X86_DS, ds), | ||
33 | PT_REGS_OFFSET(PERF_REG_X86_ES, es), | ||
34 | PT_REGS_OFFSET(PERF_REG_X86_FS, fs), | ||
35 | PT_REGS_OFFSET(PERF_REG_X86_GS, gs), | ||
36 | #else | ||
37 | /* | ||
38 | * The pt_regs struct does not store | ||
39 | * ds, es, fs, gs in 64 bit mode. | ||
40 | */ | ||
41 | (unsigned int) -1, | ||
42 | (unsigned int) -1, | ||
43 | (unsigned int) -1, | ||
44 | (unsigned int) -1, | ||
45 | #endif | ||
46 | #ifdef CONFIG_X86_64 | ||
47 | PT_REGS_OFFSET(PERF_REG_X86_R8, r8), | ||
48 | PT_REGS_OFFSET(PERF_REG_X86_R9, r9), | ||
49 | PT_REGS_OFFSET(PERF_REG_X86_R10, r10), | ||
50 | PT_REGS_OFFSET(PERF_REG_X86_R11, r11), | ||
51 | PT_REGS_OFFSET(PERF_REG_X86_R12, r12), | ||
52 | PT_REGS_OFFSET(PERF_REG_X86_R13, r13), | ||
53 | PT_REGS_OFFSET(PERF_REG_X86_R14, r14), | ||
54 | PT_REGS_OFFSET(PERF_REG_X86_R15, r15), | ||
55 | #endif | ||
56 | }; | ||
57 | |||
58 | u64 perf_reg_value(struct pt_regs *regs, int idx) | ||
59 | { | ||
60 | if (WARN_ON_ONCE(idx > ARRAY_SIZE(pt_regs_offset))) | ||
61 | return 0; | ||
62 | |||
63 | return regs_get_register(regs, pt_regs_offset[idx]); | ||
64 | } | ||
65 | |||
66 | #define REG_RESERVED (~((1ULL << PERF_REG_X86_MAX) - 1ULL)) | ||
67 | |||
68 | #ifdef CONFIG_X86_32 | ||
69 | int perf_reg_validate(u64 mask) | ||
70 | { | ||
71 | if (!mask || mask & REG_RESERVED) | ||
72 | return -EINVAL; | ||
73 | |||
74 | return 0; | ||
75 | } | ||
76 | |||
77 | u64 perf_reg_abi(struct task_struct *task) | ||
78 | { | ||
79 | return PERF_SAMPLE_REGS_ABI_32; | ||
80 | } | ||
81 | #else /* CONFIG_X86_64 */ | ||
82 | #define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \ | ||
83 | (1ULL << PERF_REG_X86_ES) | \ | ||
84 | (1ULL << PERF_REG_X86_FS) | \ | ||
85 | (1ULL << PERF_REG_X86_GS)) | ||
86 | |||
87 | int perf_reg_validate(u64 mask) | ||
88 | { | ||
89 | if (!mask || mask & REG_RESERVED) | ||
90 | return -EINVAL; | ||
91 | |||
92 | if (mask & REG_NOSUPPORT) | ||
93 | return -EINVAL; | ||
94 | |||
95 | return 0; | ||
96 | } | ||
97 | |||
98 | u64 perf_reg_abi(struct task_struct *task) | ||
99 | { | ||
100 | if (test_tsk_thread_flag(task, TIF_IA32)) | ||
101 | return PERF_SAMPLE_REGS_ABI_32; | ||
102 | else | ||
103 | return PERF_SAMPLE_REGS_ABI_64; | ||
104 | } | ||
105 | #endif /* CONFIG_X86_32 */ | ||
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 03920a15a632..1b27de563561 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c | |||
@@ -512,7 +512,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, | |||
512 | 512 | ||
513 | #if defined(CONFIG_PCI) && defined(CONFIG_NUMA) | 513 | #if defined(CONFIG_PCI) && defined(CONFIG_NUMA) |
514 | /* Set correct numa_node information for AMD NB functions */ | 514 | /* Set correct numa_node information for AMD NB functions */ |
515 | static void __init quirk_amd_nb_node(struct pci_dev *dev) | 515 | static void __devinit quirk_amd_nb_node(struct pci_dev *dev) |
516 | { | 516 | { |
517 | struct pci_dev *nb_ht; | 517 | struct pci_dev *nb_ht; |
518 | unsigned int devfn; | 518 | unsigned int devfn; |
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 5a98aa272184..5cdff0357746 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -21,7 +21,7 @@ | |||
21 | #include <asm/cpu.h> | 21 | #include <asm/cpu.h> |
22 | #include <asm/stackprotector.h> | 22 | #include <asm/stackprotector.h> |
23 | 23 | ||
24 | DEFINE_PER_CPU(int, cpu_number); | 24 | DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number); |
25 | EXPORT_PER_CPU_SYMBOL(cpu_number); | 25 | EXPORT_PER_CPU_SYMBOL(cpu_number); |
26 | 26 | ||
27 | #ifdef CONFIG_X86_64 | 27 | #ifdef CONFIG_X86_64 |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c1a310fb8309..7c5a8c314c02 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -106,17 +106,17 @@ int smp_num_siblings = 1; | |||
106 | EXPORT_SYMBOL(smp_num_siblings); | 106 | EXPORT_SYMBOL(smp_num_siblings); |
107 | 107 | ||
108 | /* Last level cache ID of each logical CPU */ | 108 | /* Last level cache ID of each logical CPU */ |
109 | DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID; | 109 | DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID; |
110 | 110 | ||
111 | /* representing HT siblings of each logical CPU */ | 111 | /* representing HT siblings of each logical CPU */ |
112 | DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); | 112 | DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); |
113 | EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); | 113 | EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); |
114 | 114 | ||
115 | /* representing HT and core siblings of each logical CPU */ | 115 | /* representing HT and core siblings of each logical CPU */ |
116 | DEFINE_PER_CPU(cpumask_var_t, cpu_core_map); | 116 | DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map); |
117 | EXPORT_PER_CPU_SYMBOL(cpu_core_map); | 117 | EXPORT_PER_CPU_SYMBOL(cpu_core_map); |
118 | 118 | ||
119 | DEFINE_PER_CPU(cpumask_var_t, cpu_llc_shared_map); | 119 | DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); |
120 | 120 | ||
121 | /* Per CPU bogomips and other parameters */ | 121 | /* Per CPU bogomips and other parameters */ |
122 | DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); | 122 | DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); |