diff options
Diffstat (limited to 'arch/x86/kernel')
35 files changed, 727 insertions, 357 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 8215e5652d97..8d7a619718b5 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -100,6 +100,8 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o | |||
100 | obj-$(CONFIG_OF) += devicetree.o | 100 | obj-$(CONFIG_OF) += devicetree.o |
101 | obj-$(CONFIG_UPROBES) += uprobes.o | 101 | obj-$(CONFIG_UPROBES) += uprobes.o |
102 | 102 | ||
103 | obj-$(CONFIG_PERF_EVENTS) += perf_regs.o | ||
104 | |||
103 | ### | 105 | ### |
104 | # 64 bit specific files | 106 | # 64 bit specific files |
105 | ifeq ($(CONFIG_X86_64),y) | 107 | ifeq ($(CONFIG_X86_64),y) |
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 95bf99de9058..1b8e5a03d942 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c | |||
@@ -25,10 +25,6 @@ unsigned long acpi_realmode_flags; | |||
25 | static char temp_stack[4096]; | 25 | static char temp_stack[4096]; |
26 | #endif | 26 | #endif |
27 | 27 | ||
28 | asmlinkage void acpi_enter_s3(void) | ||
29 | { | ||
30 | acpi_enter_sleep_state(3, wake_sleep_flags); | ||
31 | } | ||
32 | /** | 28 | /** |
33 | * acpi_suspend_lowlevel - save kernel state | 29 | * acpi_suspend_lowlevel - save kernel state |
34 | * | 30 | * |
diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h index 5653a5791ec9..67f59f8c6956 100644 --- a/arch/x86/kernel/acpi/sleep.h +++ b/arch/x86/kernel/acpi/sleep.h | |||
@@ -2,7 +2,6 @@ | |||
2 | * Variables and functions used by the code in sleep.c | 2 | * Variables and functions used by the code in sleep.c |
3 | */ | 3 | */ |
4 | 4 | ||
5 | #include <linux/linkage.h> | ||
6 | #include <asm/realmode.h> | 5 | #include <asm/realmode.h> |
7 | 6 | ||
8 | extern unsigned long saved_video_mode; | 7 | extern unsigned long saved_video_mode; |
@@ -11,7 +10,6 @@ extern long saved_magic; | |||
11 | extern int wakeup_pmode_return; | 10 | extern int wakeup_pmode_return; |
12 | 11 | ||
13 | extern u8 wake_sleep_flags; | 12 | extern u8 wake_sleep_flags; |
14 | extern asmlinkage void acpi_enter_s3(void); | ||
15 | 13 | ||
16 | extern unsigned long acpi_copy_wakeup_routine(unsigned long); | 14 | extern unsigned long acpi_copy_wakeup_routine(unsigned long); |
17 | extern void wakeup_long64(void); | 15 | extern void wakeup_long64(void); |
diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S index 72610839f03b..13ab720573e3 100644 --- a/arch/x86/kernel/acpi/wakeup_32.S +++ b/arch/x86/kernel/acpi/wakeup_32.S | |||
@@ -74,7 +74,9 @@ restore_registers: | |||
74 | ENTRY(do_suspend_lowlevel) | 74 | ENTRY(do_suspend_lowlevel) |
75 | call save_processor_state | 75 | call save_processor_state |
76 | call save_registers | 76 | call save_registers |
77 | call acpi_enter_s3 | 77 | pushl $3 |
78 | call acpi_enter_sleep_state | ||
79 | addl $4, %esp | ||
78 | 80 | ||
79 | # In case of S3 failure, we'll emerge here. Jump | 81 | # In case of S3 failure, we'll emerge here. Jump |
80 | # to ret_point to recover | 82 | # to ret_point to recover |
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S index 014d1d28c397..8ea5164cbd04 100644 --- a/arch/x86/kernel/acpi/wakeup_64.S +++ b/arch/x86/kernel/acpi/wakeup_64.S | |||
@@ -71,7 +71,9 @@ ENTRY(do_suspend_lowlevel) | |||
71 | movq %rsi, saved_rsi | 71 | movq %rsi, saved_rsi |
72 | 72 | ||
73 | addq $8, %rsp | 73 | addq $8, %rsp |
74 | call acpi_enter_s3 | 74 | movl $3, %edi |
75 | xorl %eax, %eax | ||
76 | call acpi_enter_sleep_state | ||
75 | /* in case something went wrong, restore the machine status and go on */ | 77 | /* in case something went wrong, restore the machine status and go on */ |
76 | jmp resume_point | 78 | jmp resume_point |
77 | 79 | ||
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 931280ff8299..afb7ff79a29f 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
@@ -224,7 +224,7 @@ void __init arch_init_ideal_nops(void) | |||
224 | ideal_nops = intel_nops; | 224 | ideal_nops = intel_nops; |
225 | #endif | 225 | #endif |
226 | } | 226 | } |
227 | 227 | break; | |
228 | default: | 228 | default: |
229 | #ifdef CONFIG_X86_64 | 229 | #ifdef CONFIG_X86_64 |
230 | ideal_nops = k8_nops; | 230 | ideal_nops = k8_nops; |
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index f29f6dd6bc08..aadf3359e2a7 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c | |||
@@ -19,6 +19,7 @@ const struct pci_device_id amd_nb_misc_ids[] = { | |||
19 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, | 19 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, |
20 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, | 20 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, |
21 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) }, | 21 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) }, |
22 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M10H_F3) }, | ||
22 | {} | 23 | {} |
23 | }; | 24 | }; |
24 | EXPORT_SYMBOL(amd_nb_misc_ids); | 25 | EXPORT_SYMBOL(amd_nb_misc_ids); |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index c421512ca5eb..24deb3082328 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -75,8 +75,8 @@ physid_mask_t phys_cpu_present_map; | |||
75 | /* | 75 | /* |
76 | * Map cpu index to physical APIC ID | 76 | * Map cpu index to physical APIC ID |
77 | */ | 77 | */ |
78 | DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); | 78 | DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID); |
79 | DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); | 79 | DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid, BAD_APICID); |
80 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); | 80 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); |
81 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); | 81 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); |
82 | 82 | ||
@@ -88,7 +88,7 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); | |||
88 | * used for the mapping. This is where the behaviors of x86_64 and 32 | 88 | * used for the mapping. This is where the behaviors of x86_64 and 32 |
89 | * actually diverge. Let's keep it ugly for now. | 89 | * actually diverge. Let's keep it ugly for now. |
90 | */ | 90 | */ |
91 | DEFINE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid, BAD_APICID); | 91 | DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID); |
92 | 92 | ||
93 | /* | 93 | /* |
94 | * Knob to control our willingness to enable the local APIC. | 94 | * Knob to control our willingness to enable the local APIC. |
@@ -2143,6 +2143,23 @@ int default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | |||
2143 | } | 2143 | } |
2144 | 2144 | ||
2145 | /* | 2145 | /* |
2146 | * Override the generic EOI implementation with an optimized version. | ||
2147 | * Only called during early boot when only one CPU is active and with | ||
2148 | * interrupts disabled, so we know this does not race with actual APIC driver | ||
2149 | * use. | ||
2150 | */ | ||
2151 | void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v)) | ||
2152 | { | ||
2153 | struct apic **drv; | ||
2154 | |||
2155 | for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) { | ||
2156 | /* Should happen once for each apic */ | ||
2157 | WARN_ON((*drv)->eoi_write == eoi_write); | ||
2158 | (*drv)->eoi_write = eoi_write; | ||
2159 | } | ||
2160 | } | ||
2161 | |||
2162 | /* | ||
2146 | * Power management | 2163 | * Power management |
2147 | */ | 2164 | */ |
2148 | #ifdef CONFIG_PM | 2165 | #ifdef CONFIG_PM |
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 406eee784684..a6c64aaddf9a 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -1204,7 +1204,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg) | |||
1204 | BUG_ON(!cfg->vector); | 1204 | BUG_ON(!cfg->vector); |
1205 | 1205 | ||
1206 | vector = cfg->vector; | 1206 | vector = cfg->vector; |
1207 | for_each_cpu(cpu, cfg->domain) | 1207 | for_each_cpu_and(cpu, cfg->domain, cpu_online_mask) |
1208 | per_cpu(vector_irq, cpu)[vector] = -1; | 1208 | per_cpu(vector_irq, cpu)[vector] = -1; |
1209 | 1209 | ||
1210 | cfg->vector = 0; | 1210 | cfg->vector = 0; |
@@ -1212,7 +1212,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg) | |||
1212 | 1212 | ||
1213 | if (likely(!cfg->move_in_progress)) | 1213 | if (likely(!cfg->move_in_progress)) |
1214 | return; | 1214 | return; |
1215 | for_each_cpu(cpu, cfg->old_domain) { | 1215 | for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) { |
1216 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; | 1216 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; |
1217 | vector++) { | 1217 | vector++) { |
1218 | if (per_cpu(vector_irq, cpu)[vector] != irq) | 1218 | if (per_cpu(vector_irq, cpu)[vector] != irq) |
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index bac4c3804cc7..d30a6a9a0121 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -14,7 +14,7 @@ CFLAGS_common.o := $(nostackp) | |||
14 | 14 | ||
15 | obj-y := intel_cacheinfo.o scattered.o topology.o | 15 | obj-y := intel_cacheinfo.o scattered.o topology.o |
16 | obj-y += proc.o capflags.o powerflags.o common.o | 16 | obj-y += proc.o capflags.o powerflags.o common.o |
17 | obj-y += vmware.o hypervisor.o sched.o mshyperv.o | 17 | obj-y += vmware.o hypervisor.o mshyperv.o |
18 | obj-y += rdrand.o | 18 | obj-y += rdrand.o |
19 | obj-y += match.o | 19 | obj-y += match.o |
20 | 20 | ||
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 5bbc082c47ad..46d8786d655e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -452,6 +452,35 @@ void __cpuinit cpu_detect_cache_sizes(struct cpuinfo_x86 *c) | |||
452 | c->x86_cache_size = l2size; | 452 | c->x86_cache_size = l2size; |
453 | } | 453 | } |
454 | 454 | ||
455 | u16 __read_mostly tlb_lli_4k[NR_INFO]; | ||
456 | u16 __read_mostly tlb_lli_2m[NR_INFO]; | ||
457 | u16 __read_mostly tlb_lli_4m[NR_INFO]; | ||
458 | u16 __read_mostly tlb_lld_4k[NR_INFO]; | ||
459 | u16 __read_mostly tlb_lld_2m[NR_INFO]; | ||
460 | u16 __read_mostly tlb_lld_4m[NR_INFO]; | ||
461 | |||
462 | /* | ||
463 | * tlb_flushall_shift shows the balance point in replacing cr3 write | ||
464 | * with multiple 'invlpg'. It will do this replacement when | ||
465 | * flush_tlb_lines <= active_lines/2^tlb_flushall_shift. | ||
466 | * If tlb_flushall_shift is -1, means the replacement will be disabled. | ||
467 | */ | ||
468 | s8 __read_mostly tlb_flushall_shift = -1; | ||
469 | |||
470 | void __cpuinit cpu_detect_tlb(struct cpuinfo_x86 *c) | ||
471 | { | ||
472 | if (this_cpu->c_detect_tlb) | ||
473 | this_cpu->c_detect_tlb(c); | ||
474 | |||
475 | printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ | ||
476 | "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ | ||
477 | "tlb_flushall_shift is 0x%x\n", | ||
478 | tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], | ||
479 | tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES], | ||
480 | tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES], | ||
481 | tlb_flushall_shift); | ||
482 | } | ||
483 | |||
455 | void __cpuinit detect_ht(struct cpuinfo_x86 *c) | 484 | void __cpuinit detect_ht(struct cpuinfo_x86 *c) |
456 | { | 485 | { |
457 | #ifdef CONFIG_X86_HT | 486 | #ifdef CONFIG_X86_HT |
@@ -911,6 +940,8 @@ void __init identify_boot_cpu(void) | |||
911 | #else | 940 | #else |
912 | vgetcpu_set_mode(); | 941 | vgetcpu_set_mode(); |
913 | #endif | 942 | #endif |
943 | if (boot_cpu_data.cpuid_level >= 2) | ||
944 | cpu_detect_tlb(&boot_cpu_data); | ||
914 | } | 945 | } |
915 | 946 | ||
916 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) | 947 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) |
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 8bacc7826fb3..4041c24ae7db 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h | |||
@@ -20,10 +20,19 @@ struct cpu_dev { | |||
20 | void (*c_bsp_init)(struct cpuinfo_x86 *); | 20 | void (*c_bsp_init)(struct cpuinfo_x86 *); |
21 | void (*c_init)(struct cpuinfo_x86 *); | 21 | void (*c_init)(struct cpuinfo_x86 *); |
22 | void (*c_identify)(struct cpuinfo_x86 *); | 22 | void (*c_identify)(struct cpuinfo_x86 *); |
23 | void (*c_detect_tlb)(struct cpuinfo_x86 *); | ||
23 | unsigned int (*c_size_cache)(struct cpuinfo_x86 *, unsigned int); | 24 | unsigned int (*c_size_cache)(struct cpuinfo_x86 *, unsigned int); |
24 | int c_x86_vendor; | 25 | int c_x86_vendor; |
25 | }; | 26 | }; |
26 | 27 | ||
28 | struct _tlb_table { | ||
29 | unsigned char descriptor; | ||
30 | char tlb_type; | ||
31 | unsigned int entries; | ||
32 | /* unsigned int ways; */ | ||
33 | char info[128]; | ||
34 | }; | ||
35 | |||
27 | #define cpu_dev_register(cpu_devX) \ | 36 | #define cpu_dev_register(cpu_devX) \ |
28 | static const struct cpu_dev *const __cpu_dev_##cpu_devX __used \ | 37 | static const struct cpu_dev *const __cpu_dev_##cpu_devX __used \ |
29 | __attribute__((__section__(".x86_cpu_dev.init"))) = \ | 38 | __attribute__((__section__(".x86_cpu_dev.init"))) = \ |
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index 755f64fb0743..a8f8fa9769d6 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c | |||
@@ -37,6 +37,9 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] = | |||
37 | #endif | 37 | #endif |
38 | &x86_hyper_vmware, | 38 | &x86_hyper_vmware, |
39 | &x86_hyper_ms_hyperv, | 39 | &x86_hyper_ms_hyperv, |
40 | #ifdef CONFIG_KVM_GUEST | ||
41 | &x86_hyper_kvm, | ||
42 | #endif | ||
40 | }; | 43 | }; |
41 | 44 | ||
42 | const struct hypervisor_x86 *x86_hyper; | 45 | const struct hypervisor_x86 *x86_hyper; |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 3e6ff6cbf42a..0a4ce2980a5a 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -491,6 +491,181 @@ static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned i | |||
491 | } | 491 | } |
492 | #endif | 492 | #endif |
493 | 493 | ||
494 | #define TLB_INST_4K 0x01 | ||
495 | #define TLB_INST_4M 0x02 | ||
496 | #define TLB_INST_2M_4M 0x03 | ||
497 | |||
498 | #define TLB_INST_ALL 0x05 | ||
499 | #define TLB_INST_1G 0x06 | ||
500 | |||
501 | #define TLB_DATA_4K 0x11 | ||
502 | #define TLB_DATA_4M 0x12 | ||
503 | #define TLB_DATA_2M_4M 0x13 | ||
504 | #define TLB_DATA_4K_4M 0x14 | ||
505 | |||
506 | #define TLB_DATA_1G 0x16 | ||
507 | |||
508 | #define TLB_DATA0_4K 0x21 | ||
509 | #define TLB_DATA0_4M 0x22 | ||
510 | #define TLB_DATA0_2M_4M 0x23 | ||
511 | |||
512 | #define STLB_4K 0x41 | ||
513 | |||
514 | static const struct _tlb_table intel_tlb_table[] __cpuinitconst = { | ||
515 | { 0x01, TLB_INST_4K, 32, " TLB_INST 4 KByte pages, 4-way set associative" }, | ||
516 | { 0x02, TLB_INST_4M, 2, " TLB_INST 4 MByte pages, full associative" }, | ||
517 | { 0x03, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way set associative" }, | ||
518 | { 0x04, TLB_DATA_4M, 8, " TLB_DATA 4 MByte pages, 4-way set associative" }, | ||
519 | { 0x05, TLB_DATA_4M, 32, " TLB_DATA 4 MByte pages, 4-way set associative" }, | ||
520 | { 0x0b, TLB_INST_4M, 4, " TLB_INST 4 MByte pages, 4-way set associative" }, | ||
521 | { 0x4f, TLB_INST_4K, 32, " TLB_INST 4 KByte pages */" }, | ||
522 | { 0x50, TLB_INST_ALL, 64, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, | ||
523 | { 0x51, TLB_INST_ALL, 128, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, | ||
524 | { 0x52, TLB_INST_ALL, 256, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, | ||
525 | { 0x55, TLB_INST_2M_4M, 7, " TLB_INST 2-MByte or 4-MByte pages, fully associative" }, | ||
526 | { 0x56, TLB_DATA0_4M, 16, " TLB_DATA0 4 MByte pages, 4-way set associative" }, | ||
527 | { 0x57, TLB_DATA0_4K, 16, " TLB_DATA0 4 KByte pages, 4-way associative" }, | ||
528 | { 0x59, TLB_DATA0_4K, 16, " TLB_DATA0 4 KByte pages, fully associative" }, | ||
529 | { 0x5a, TLB_DATA0_2M_4M, 32, " TLB_DATA0 2-MByte or 4 MByte pages, 4-way set associative" }, | ||
530 | { 0x5b, TLB_DATA_4K_4M, 64, " TLB_DATA 4 KByte and 4 MByte pages" }, | ||
531 | { 0x5c, TLB_DATA_4K_4M, 128, " TLB_DATA 4 KByte and 4 MByte pages" }, | ||
532 | { 0x5d, TLB_DATA_4K_4M, 256, " TLB_DATA 4 KByte and 4 MByte pages" }, | ||
533 | { 0xb0, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 4-way set associative" }, | ||
534 | { 0xb1, TLB_INST_2M_4M, 4, " TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries" }, | ||
535 | { 0xb2, TLB_INST_4K, 64, " TLB_INST 4KByte pages, 4-way set associative" }, | ||
536 | { 0xb3, TLB_DATA_4K, 128, " TLB_DATA 4 KByte pages, 4-way set associative" }, | ||
537 | { 0xb4, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 4-way associative" }, | ||
538 | { 0xba, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way associative" }, | ||
539 | { 0xc0, TLB_DATA_4K_4M, 8, " TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" }, | ||
540 | { 0xca, STLB_4K, 512, " STLB 4 KByte pages, 4-way associative" }, | ||
541 | { 0x00, 0, 0 } | ||
542 | }; | ||
543 | |||
544 | static void __cpuinit intel_tlb_lookup(const unsigned char desc) | ||
545 | { | ||
546 | unsigned char k; | ||
547 | if (desc == 0) | ||
548 | return; | ||
549 | |||
550 | /* look up this descriptor in the table */ | ||
551 | for (k = 0; intel_tlb_table[k].descriptor != desc && \ | ||
552 | intel_tlb_table[k].descriptor != 0; k++) | ||
553 | ; | ||
554 | |||
555 | if (intel_tlb_table[k].tlb_type == 0) | ||
556 | return; | ||
557 | |||
558 | switch (intel_tlb_table[k].tlb_type) { | ||
559 | case STLB_4K: | ||
560 | if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) | ||
561 | tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; | ||
562 | if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) | ||
563 | tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; | ||
564 | break; | ||
565 | case TLB_INST_ALL: | ||
566 | if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) | ||
567 | tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; | ||
568 | if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries) | ||
569 | tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries; | ||
570 | if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) | ||
571 | tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; | ||
572 | break; | ||
573 | case TLB_INST_4K: | ||
574 | if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) | ||
575 | tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; | ||
576 | break; | ||
577 | case TLB_INST_4M: | ||
578 | if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) | ||
579 | tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; | ||
580 | break; | ||
581 | case TLB_INST_2M_4M: | ||
582 | if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries) | ||
583 | tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries; | ||
584 | if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) | ||
585 | tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; | ||
586 | break; | ||
587 | case TLB_DATA_4K: | ||
588 | case TLB_DATA0_4K: | ||
589 | if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) | ||
590 | tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; | ||
591 | break; | ||
592 | case TLB_DATA_4M: | ||
593 | case TLB_DATA0_4M: | ||
594 | if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) | ||
595 | tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; | ||
596 | break; | ||
597 | case TLB_DATA_2M_4M: | ||
598 | case TLB_DATA0_2M_4M: | ||
599 | if (tlb_lld_2m[ENTRIES] < intel_tlb_table[k].entries) | ||
600 | tlb_lld_2m[ENTRIES] = intel_tlb_table[k].entries; | ||
601 | if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) | ||
602 | tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; | ||
603 | break; | ||
604 | case TLB_DATA_4K_4M: | ||
605 | if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) | ||
606 | tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; | ||
607 | if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) | ||
608 | tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; | ||
609 | break; | ||
610 | } | ||
611 | } | ||
612 | |||
613 | static void __cpuinit intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c) | ||
614 | { | ||
615 | if (!cpu_has_invlpg) { | ||
616 | tlb_flushall_shift = -1; | ||
617 | return; | ||
618 | } | ||
619 | switch ((c->x86 << 8) + c->x86_model) { | ||
620 | case 0x60f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ | ||
621 | case 0x616: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ | ||
622 | case 0x617: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ | ||
623 | case 0x61d: /* six-core 45 nm xeon "Dunnington" */ | ||
624 | tlb_flushall_shift = -1; | ||
625 | break; | ||
626 | case 0x61a: /* 45 nm nehalem, "Bloomfield" */ | ||
627 | case 0x61e: /* 45 nm nehalem, "Lynnfield" */ | ||
628 | case 0x625: /* 32 nm nehalem, "Clarkdale" */ | ||
629 | case 0x62c: /* 32 nm nehalem, "Gulftown" */ | ||
630 | case 0x62e: /* 45 nm nehalem-ex, "Beckton" */ | ||
631 | case 0x62f: /* 32 nm Xeon E7 */ | ||
632 | tlb_flushall_shift = 6; | ||
633 | break; | ||
634 | case 0x62a: /* SandyBridge */ | ||
635 | case 0x62d: /* SandyBridge, "Romely-EP" */ | ||
636 | tlb_flushall_shift = 5; | ||
637 | break; | ||
638 | case 0x63a: /* Ivybridge */ | ||
639 | tlb_flushall_shift = 1; | ||
640 | break; | ||
641 | default: | ||
642 | tlb_flushall_shift = 6; | ||
643 | } | ||
644 | } | ||
645 | |||
646 | static void __cpuinit intel_detect_tlb(struct cpuinfo_x86 *c) | ||
647 | { | ||
648 | int i, j, n; | ||
649 | unsigned int regs[4]; | ||
650 | unsigned char *desc = (unsigned char *)regs; | ||
651 | /* Number of times to iterate */ | ||
652 | n = cpuid_eax(2) & 0xFF; | ||
653 | |||
654 | for (i = 0 ; i < n ; i++) { | ||
655 | cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); | ||
656 | |||
657 | /* If bit 31 is set, this is an unknown format */ | ||
658 | for (j = 0 ; j < 3 ; j++) | ||
659 | if (regs[j] & (1 << 31)) | ||
660 | regs[j] = 0; | ||
661 | |||
662 | /* Byte 0 is level count, not a descriptor */ | ||
663 | for (j = 1 ; j < 16 ; j++) | ||
664 | intel_tlb_lookup(desc[j]); | ||
665 | } | ||
666 | intel_tlb_flushall_shift_set(c); | ||
667 | } | ||
668 | |||
494 | static const struct cpu_dev __cpuinitconst intel_cpu_dev = { | 669 | static const struct cpu_dev __cpuinitconst intel_cpu_dev = { |
495 | .c_vendor = "Intel", | 670 | .c_vendor = "Intel", |
496 | .c_ident = { "GenuineIntel" }, | 671 | .c_ident = { "GenuineIntel" }, |
@@ -546,6 +721,7 @@ static const struct cpu_dev __cpuinitconst intel_cpu_dev = { | |||
546 | }, | 721 | }, |
547 | .c_size_cache = intel_size_cache, | 722 | .c_size_cache = intel_size_cache, |
548 | #endif | 723 | #endif |
724 | .c_detect_tlb = intel_detect_tlb, | ||
549 | .c_early_init = early_init_intel, | 725 | .c_early_init = early_init_intel, |
550 | .c_init = init_intel, | 726 | .c_init = init_intel, |
551 | .c_x86_vendor = X86_VENDOR_INTEL, | 727 | .c_x86_vendor = X86_VENDOR_INTEL, |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index 413c2ced887c..13017626f9a8 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c | |||
@@ -55,13 +55,6 @@ static struct severity { | |||
55 | #define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S) | 55 | #define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S) |
56 | #define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR) | 56 | #define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR) |
57 | #define MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV) | 57 | #define MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV) |
58 | #define MCACOD 0xffff | ||
59 | /* Architecturally defined codes from SDM Vol. 3B Chapter 15 */ | ||
60 | #define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */ | ||
61 | #define MCACOD_SCRUBMSK 0xfff0 | ||
62 | #define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */ | ||
63 | #define MCACOD_DATA 0x0134 /* Data Load */ | ||
64 | #define MCACOD_INSTR 0x0150 /* Instruction Fetch */ | ||
65 | 58 | ||
66 | MCESEV( | 59 | MCESEV( |
67 | NO, "Invalid", | 60 | NO, "Invalid", |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 5a5a5dc1ff15..292d0258311c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -60,8 +60,6 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex); | |||
60 | 60 | ||
61 | int mce_disabled __read_mostly; | 61 | int mce_disabled __read_mostly; |
62 | 62 | ||
63 | #define MISC_MCELOG_MINOR 227 | ||
64 | |||
65 | #define SPINUNIT 100 /* 100ns */ | 63 | #define SPINUNIT 100 /* 100ns */ |
66 | 64 | ||
67 | atomic_t mce_entry; | 65 | atomic_t mce_entry; |
@@ -105,6 +103,8 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { | |||
105 | 103 | ||
106 | static DEFINE_PER_CPU(struct work_struct, mce_work); | 104 | static DEFINE_PER_CPU(struct work_struct, mce_work); |
107 | 105 | ||
106 | static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); | ||
107 | |||
108 | /* | 108 | /* |
109 | * CPU/chipset specific EDAC code can register a notifier call here to print | 109 | * CPU/chipset specific EDAC code can register a notifier call here to print |
110 | * MCE errors in a human-readable form. | 110 | * MCE errors in a human-readable form. |
@@ -652,14 +652,18 @@ EXPORT_SYMBOL_GPL(machine_check_poll); | |||
652 | * Do a quick check if any of the events requires a panic. | 652 | * Do a quick check if any of the events requires a panic. |
653 | * This decides if we keep the events around or clear them. | 653 | * This decides if we keep the events around or clear them. |
654 | */ | 654 | */ |
655 | static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp) | 655 | static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, |
656 | struct pt_regs *regs) | ||
656 | { | 657 | { |
657 | int i, ret = 0; | 658 | int i, ret = 0; |
658 | 659 | ||
659 | for (i = 0; i < banks; i++) { | 660 | for (i = 0; i < banks; i++) { |
660 | m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); | 661 | m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); |
661 | if (m->status & MCI_STATUS_VAL) | 662 | if (m->status & MCI_STATUS_VAL) { |
662 | __set_bit(i, validp); | 663 | __set_bit(i, validp); |
664 | if (quirk_no_way_out) | ||
665 | quirk_no_way_out(i, m, regs); | ||
666 | } | ||
663 | if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) | 667 | if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) |
664 | ret = 1; | 668 | ret = 1; |
665 | } | 669 | } |
@@ -1042,7 +1046,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1042 | *final = m; | 1046 | *final = m; |
1043 | 1047 | ||
1044 | memset(valid_banks, 0, sizeof(valid_banks)); | 1048 | memset(valid_banks, 0, sizeof(valid_banks)); |
1045 | no_way_out = mce_no_way_out(&m, &msg, valid_banks); | 1049 | no_way_out = mce_no_way_out(&m, &msg, valid_banks, regs); |
1046 | 1050 | ||
1047 | barrier(); | 1051 | barrier(); |
1048 | 1052 | ||
@@ -1190,6 +1194,7 @@ void mce_notify_process(void) | |||
1190 | { | 1194 | { |
1191 | unsigned long pfn; | 1195 | unsigned long pfn; |
1192 | struct mce_info *mi = mce_find_info(); | 1196 | struct mce_info *mi = mce_find_info(); |
1197 | int flags = MF_ACTION_REQUIRED; | ||
1193 | 1198 | ||
1194 | if (!mi) | 1199 | if (!mi) |
1195 | mce_panic("Lost physical address for unconsumed uncorrectable error", NULL, NULL); | 1200 | mce_panic("Lost physical address for unconsumed uncorrectable error", NULL, NULL); |
@@ -1204,8 +1209,9 @@ void mce_notify_process(void) | |||
1204 | * doomed. We still need to mark the page as poisoned and alert any | 1209 | * doomed. We still need to mark the page as poisoned and alert any |
1205 | * other users of the page. | 1210 | * other users of the page. |
1206 | */ | 1211 | */ |
1207 | if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0 || | 1212 | if (!mi->restartable) |
1208 | mi->restartable == 0) { | 1213 | flags |= MF_MUST_KILL; |
1214 | if (memory_failure(pfn, MCE_VECTOR, flags) < 0) { | ||
1209 | pr_err("Memory error not recovered"); | 1215 | pr_err("Memory error not recovered"); |
1210 | force_sig(SIGBUS, current); | 1216 | force_sig(SIGBUS, current); |
1211 | } | 1217 | } |
@@ -1418,6 +1424,34 @@ static void __mcheck_cpu_init_generic(void) | |||
1418 | } | 1424 | } |
1419 | } | 1425 | } |
1420 | 1426 | ||
1427 | /* | ||
1428 | * During IFU recovery Sandy Bridge -EP4S processors set the RIPV and | ||
1429 | * EIPV bits in MCG_STATUS to zero on the affected logical processor (SDM | ||
1430 | * Vol 3B Table 15-20). But this confuses both the code that determines | ||
1431 | * whether the machine check occurred in kernel or user mode, and also | ||
1432 | * the severity assessment code. Pretend that EIPV was set, and take the | ||
1433 | * ip/cs values from the pt_regs that mce_gather_info() ignored earlier. | ||
1434 | */ | ||
1435 | static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs) | ||
1436 | { | ||
1437 | if (bank != 0) | ||
1438 | return; | ||
1439 | if ((m->mcgstatus & (MCG_STATUS_EIPV|MCG_STATUS_RIPV)) != 0) | ||
1440 | return; | ||
1441 | if ((m->status & (MCI_STATUS_OVER|MCI_STATUS_UC| | ||
1442 | MCI_STATUS_EN|MCI_STATUS_MISCV|MCI_STATUS_ADDRV| | ||
1443 | MCI_STATUS_PCC|MCI_STATUS_S|MCI_STATUS_AR| | ||
1444 | MCACOD)) != | ||
1445 | (MCI_STATUS_UC|MCI_STATUS_EN| | ||
1446 | MCI_STATUS_MISCV|MCI_STATUS_ADDRV|MCI_STATUS_S| | ||
1447 | MCI_STATUS_AR|MCACOD_INSTR)) | ||
1448 | return; | ||
1449 | |||
1450 | m->mcgstatus |= MCG_STATUS_EIPV; | ||
1451 | m->ip = regs->ip; | ||
1452 | m->cs = regs->cs; | ||
1453 | } | ||
1454 | |||
1421 | /* Add per CPU specific workarounds here */ | 1455 | /* Add per CPU specific workarounds here */ |
1422 | static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) | 1456 | static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) |
1423 | { | 1457 | { |
@@ -1515,6 +1549,9 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) | |||
1515 | */ | 1549 | */ |
1516 | if (c->x86 == 6 && c->x86_model <= 13 && mce_bootlog < 0) | 1550 | if (c->x86 == 6 && c->x86_model <= 13 && mce_bootlog < 0) |
1517 | mce_bootlog = 0; | 1551 | mce_bootlog = 0; |
1552 | |||
1553 | if (c->x86 == 6 && c->x86_model == 45) | ||
1554 | quirk_no_way_out = quirk_sandybridge_ifu; | ||
1518 | } | 1555 | } |
1519 | if (monarch_timeout < 0) | 1556 | if (monarch_timeout < 0) |
1520 | monarch_timeout = 0; | 1557 | monarch_timeout = 0; |
@@ -2344,7 +2381,7 @@ static __init int mcheck_init_device(void) | |||
2344 | 2381 | ||
2345 | return err; | 2382 | return err; |
2346 | } | 2383 | } |
2347 | device_initcall(mcheck_init_device); | 2384 | device_initcall_sync(mcheck_init_device); |
2348 | 2385 | ||
2349 | /* | 2386 | /* |
2350 | * Old style boot options parsing. Only for compatibility. | 2387 | * Old style boot options parsing. Only for compatibility. |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index f4873a64f46d..c4e916d77378 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
@@ -1,15 +1,17 @@ | |||
1 | /* | 1 | /* |
2 | * (c) 2005, 2006 Advanced Micro Devices, Inc. | 2 | * (c) 2005-2012 Advanced Micro Devices, Inc. |
3 | * Your use of this code is subject to the terms and conditions of the | 3 | * Your use of this code is subject to the terms and conditions of the |
4 | * GNU general public license version 2. See "COPYING" or | 4 | * GNU general public license version 2. See "COPYING" or |
5 | * http://www.gnu.org/licenses/gpl.html | 5 | * http://www.gnu.org/licenses/gpl.html |
6 | * | 6 | * |
7 | * Written by Jacob Shin - AMD, Inc. | 7 | * Written by Jacob Shin - AMD, Inc. |
8 | * | 8 | * |
9 | * Support : jacob.shin@amd.com | 9 | * Support: borislav.petkov@amd.com |
10 | * | 10 | * |
11 | * April 2006 | 11 | * April 2006 |
12 | * - added support for AMD Family 0x10 processors | 12 | * - added support for AMD Family 0x10 processors |
13 | * May 2012 | ||
14 | * - major scrubbing | ||
13 | * | 15 | * |
14 | * All MC4_MISCi registers are shared between multi-cores | 16 | * All MC4_MISCi registers are shared between multi-cores |
15 | */ | 17 | */ |
@@ -25,6 +27,7 @@ | |||
25 | #include <linux/cpu.h> | 27 | #include <linux/cpu.h> |
26 | #include <linux/smp.h> | 28 | #include <linux/smp.h> |
27 | 29 | ||
30 | #include <asm/amd_nb.h> | ||
28 | #include <asm/apic.h> | 31 | #include <asm/apic.h> |
29 | #include <asm/idle.h> | 32 | #include <asm/idle.h> |
30 | #include <asm/mce.h> | 33 | #include <asm/mce.h> |
@@ -45,23 +48,15 @@ | |||
45 | #define MASK_BLKPTR_LO 0xFF000000 | 48 | #define MASK_BLKPTR_LO 0xFF000000 |
46 | #define MCG_XBLK_ADDR 0xC0000400 | 49 | #define MCG_XBLK_ADDR 0xC0000400 |
47 | 50 | ||
48 | struct threshold_block { | 51 | static const char * const th_names[] = { |
49 | unsigned int block; | 52 | "load_store", |
50 | unsigned int bank; | 53 | "insn_fetch", |
51 | unsigned int cpu; | 54 | "combined_unit", |
52 | u32 address; | 55 | "", |
53 | u16 interrupt_enable; | 56 | "northbridge", |
54 | bool interrupt_capable; | 57 | "execution_unit", |
55 | u16 threshold_limit; | ||
56 | struct kobject kobj; | ||
57 | struct list_head miscj; | ||
58 | }; | 58 | }; |
59 | 59 | ||
60 | struct threshold_bank { | ||
61 | struct kobject *kobj; | ||
62 | struct threshold_block *blocks; | ||
63 | cpumask_var_t cpus; | ||
64 | }; | ||
65 | static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks); | 60 | static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks); |
66 | 61 | ||
67 | static unsigned char shared_bank[NR_BANKS] = { | 62 | static unsigned char shared_bank[NR_BANKS] = { |
@@ -84,6 +79,26 @@ struct thresh_restart { | |||
84 | u16 old_limit; | 79 | u16 old_limit; |
85 | }; | 80 | }; |
86 | 81 | ||
82 | static const char * const bank4_names(struct threshold_block *b) | ||
83 | { | ||
84 | switch (b->address) { | ||
85 | /* MSR4_MISC0 */ | ||
86 | case 0x00000413: | ||
87 | return "dram"; | ||
88 | |||
89 | case 0xc0000408: | ||
90 | return "ht_links"; | ||
91 | |||
92 | case 0xc0000409: | ||
93 | return "l3_cache"; | ||
94 | |||
95 | default: | ||
96 | WARN(1, "Funny MSR: 0x%08x\n", b->address); | ||
97 | return ""; | ||
98 | } | ||
99 | }; | ||
100 | |||
101 | |||
87 | static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits) | 102 | static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits) |
88 | { | 103 | { |
89 | /* | 104 | /* |
@@ -224,8 +239,6 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
224 | 239 | ||
225 | if (!block) | 240 | if (!block) |
226 | per_cpu(bank_map, cpu) |= (1 << bank); | 241 | per_cpu(bank_map, cpu) |= (1 << bank); |
227 | if (shared_bank[bank] && c->cpu_core_id) | ||
228 | break; | ||
229 | 242 | ||
230 | memset(&b, 0, sizeof(b)); | 243 | memset(&b, 0, sizeof(b)); |
231 | b.cpu = cpu; | 244 | b.cpu = cpu; |
@@ -326,7 +339,7 @@ struct threshold_attr { | |||
326 | #define SHOW_FIELDS(name) \ | 339 | #define SHOW_FIELDS(name) \ |
327 | static ssize_t show_ ## name(struct threshold_block *b, char *buf) \ | 340 | static ssize_t show_ ## name(struct threshold_block *b, char *buf) \ |
328 | { \ | 341 | { \ |
329 | return sprintf(buf, "%lx\n", (unsigned long) b->name); \ | 342 | return sprintf(buf, "%lu\n", (unsigned long) b->name); \ |
330 | } | 343 | } |
331 | SHOW_FIELDS(interrupt_enable) | 344 | SHOW_FIELDS(interrupt_enable) |
332 | SHOW_FIELDS(threshold_limit) | 345 | SHOW_FIELDS(threshold_limit) |
@@ -377,38 +390,21 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size) | |||
377 | return size; | 390 | return size; |
378 | } | 391 | } |
379 | 392 | ||
380 | struct threshold_block_cross_cpu { | ||
381 | struct threshold_block *tb; | ||
382 | long retval; | ||
383 | }; | ||
384 | |||
385 | static void local_error_count_handler(void *_tbcc) | ||
386 | { | ||
387 | struct threshold_block_cross_cpu *tbcc = _tbcc; | ||
388 | struct threshold_block *b = tbcc->tb; | ||
389 | u32 low, high; | ||
390 | |||
391 | rdmsr(b->address, low, high); | ||
392 | tbcc->retval = (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit); | ||
393 | } | ||
394 | |||
395 | static ssize_t show_error_count(struct threshold_block *b, char *buf) | 393 | static ssize_t show_error_count(struct threshold_block *b, char *buf) |
396 | { | 394 | { |
397 | struct threshold_block_cross_cpu tbcc = { .tb = b, }; | 395 | u32 lo, hi; |
398 | 396 | ||
399 | smp_call_function_single(b->cpu, local_error_count_handler, &tbcc, 1); | 397 | rdmsr_on_cpu(b->cpu, b->address, &lo, &hi); |
400 | return sprintf(buf, "%lx\n", tbcc.retval); | ||
401 | } | ||
402 | |||
403 | static ssize_t store_error_count(struct threshold_block *b, | ||
404 | const char *buf, size_t count) | ||
405 | { | ||
406 | struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 }; | ||
407 | 398 | ||
408 | smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); | 399 | return sprintf(buf, "%u\n", ((hi & THRESHOLD_MAX) - |
409 | return 1; | 400 | (THRESHOLD_MAX - b->threshold_limit))); |
410 | } | 401 | } |
411 | 402 | ||
403 | static struct threshold_attr error_count = { | ||
404 | .attr = {.name = __stringify(error_count), .mode = 0444 }, | ||
405 | .show = show_error_count, | ||
406 | }; | ||
407 | |||
412 | #define RW_ATTR(val) \ | 408 | #define RW_ATTR(val) \ |
413 | static struct threshold_attr val = { \ | 409 | static struct threshold_attr val = { \ |
414 | .attr = {.name = __stringify(val), .mode = 0644 }, \ | 410 | .attr = {.name = __stringify(val), .mode = 0644 }, \ |
@@ -418,7 +414,6 @@ static struct threshold_attr val = { \ | |||
418 | 414 | ||
419 | RW_ATTR(interrupt_enable); | 415 | RW_ATTR(interrupt_enable); |
420 | RW_ATTR(threshold_limit); | 416 | RW_ATTR(threshold_limit); |
421 | RW_ATTR(error_count); | ||
422 | 417 | ||
423 | static struct attribute *default_attrs[] = { | 418 | static struct attribute *default_attrs[] = { |
424 | &threshold_limit.attr, | 419 | &threshold_limit.attr, |
@@ -517,7 +512,7 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu, | |||
517 | 512 | ||
518 | err = kobject_init_and_add(&b->kobj, &threshold_ktype, | 513 | err = kobject_init_and_add(&b->kobj, &threshold_ktype, |
519 | per_cpu(threshold_banks, cpu)[bank]->kobj, | 514 | per_cpu(threshold_banks, cpu)[bank]->kobj, |
520 | "misc%i", block); | 515 | (bank == 4 ? bank4_names(b) : th_names[bank])); |
521 | if (err) | 516 | if (err) |
522 | goto out_free; | 517 | goto out_free; |
523 | recurse: | 518 | recurse: |
@@ -548,98 +543,91 @@ out_free: | |||
548 | return err; | 543 | return err; |
549 | } | 544 | } |
550 | 545 | ||
551 | static __cpuinit long | 546 | static __cpuinit int __threshold_add_blocks(struct threshold_bank *b) |
552 | local_allocate_threshold_blocks(int cpu, unsigned int bank) | ||
553 | { | 547 | { |
554 | return allocate_threshold_blocks(cpu, bank, 0, | 548 | struct list_head *head = &b->blocks->miscj; |
555 | MSR_IA32_MC0_MISC + bank * 4); | 549 | struct threshold_block *pos = NULL; |
550 | struct threshold_block *tmp = NULL; | ||
551 | int err = 0; | ||
552 | |||
553 | err = kobject_add(&b->blocks->kobj, b->kobj, b->blocks->kobj.name); | ||
554 | if (err) | ||
555 | return err; | ||
556 | |||
557 | list_for_each_entry_safe(pos, tmp, head, miscj) { | ||
558 | |||
559 | err = kobject_add(&pos->kobj, b->kobj, pos->kobj.name); | ||
560 | if (err) { | ||
561 | list_for_each_entry_safe_reverse(pos, tmp, head, miscj) | ||
562 | kobject_del(&pos->kobj); | ||
563 | |||
564 | return err; | ||
565 | } | ||
566 | } | ||
567 | return err; | ||
556 | } | 568 | } |
557 | 569 | ||
558 | /* symlinks sibling shared banks to first core. first core owns dir/files. */ | ||
559 | static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | 570 | static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) |
560 | { | 571 | { |
561 | int i, err = 0; | ||
562 | struct threshold_bank *b = NULL; | ||
563 | struct device *dev = per_cpu(mce_device, cpu); | 572 | struct device *dev = per_cpu(mce_device, cpu); |
564 | char name[32]; | 573 | struct amd_northbridge *nb = NULL; |
574 | struct threshold_bank *b = NULL; | ||
575 | const char *name = th_names[bank]; | ||
576 | int err = 0; | ||
565 | 577 | ||
566 | sprintf(name, "threshold_bank%i", bank); | 578 | if (shared_bank[bank]) { |
567 | 579 | ||
568 | #ifdef CONFIG_SMP | 580 | nb = node_to_amd_nb(amd_get_nb_id(cpu)); |
569 | if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ | 581 | WARN_ON(!nb); |
570 | i = cpumask_first(cpu_llc_shared_mask(cpu)); | ||
571 | 582 | ||
572 | /* first core not up yet */ | 583 | /* threshold descriptor already initialized on this node? */ |
573 | if (cpu_data(i).cpu_core_id) | 584 | if (nb->bank4) { |
574 | goto out; | 585 | /* yes, use it */ |
586 | b = nb->bank4; | ||
587 | err = kobject_add(b->kobj, &dev->kobj, name); | ||
588 | if (err) | ||
589 | goto out; | ||
575 | 590 | ||
576 | /* already linked */ | 591 | per_cpu(threshold_banks, cpu)[bank] = b; |
577 | if (per_cpu(threshold_banks, cpu)[bank]) | 592 | atomic_inc(&b->cpus); |
578 | goto out; | ||
579 | 593 | ||
580 | b = per_cpu(threshold_banks, i)[bank]; | 594 | err = __threshold_add_blocks(b); |
581 | 595 | ||
582 | if (!b) | ||
583 | goto out; | 596 | goto out; |
584 | 597 | } | |
585 | err = sysfs_create_link(&dev->kobj, b->kobj, name); | ||
586 | if (err) | ||
587 | goto out; | ||
588 | |||
589 | cpumask_copy(b->cpus, cpu_llc_shared_mask(cpu)); | ||
590 | per_cpu(threshold_banks, cpu)[bank] = b; | ||
591 | |||
592 | goto out; | ||
593 | } | 598 | } |
594 | #endif | ||
595 | 599 | ||
596 | b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL); | 600 | b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL); |
597 | if (!b) { | 601 | if (!b) { |
598 | err = -ENOMEM; | 602 | err = -ENOMEM; |
599 | goto out; | 603 | goto out; |
600 | } | 604 | } |
601 | if (!zalloc_cpumask_var(&b->cpus, GFP_KERNEL)) { | ||
602 | kfree(b); | ||
603 | err = -ENOMEM; | ||
604 | goto out; | ||
605 | } | ||
606 | 605 | ||
607 | b->kobj = kobject_create_and_add(name, &dev->kobj); | 606 | b->kobj = kobject_create_and_add(name, &dev->kobj); |
608 | if (!b->kobj) | 607 | if (!b->kobj) { |
608 | err = -EINVAL; | ||
609 | goto out_free; | 609 | goto out_free; |
610 | 610 | } | |
611 | #ifndef CONFIG_SMP | ||
612 | cpumask_setall(b->cpus); | ||
613 | #else | ||
614 | cpumask_set_cpu(cpu, b->cpus); | ||
615 | #endif | ||
616 | 611 | ||
617 | per_cpu(threshold_banks, cpu)[bank] = b; | 612 | per_cpu(threshold_banks, cpu)[bank] = b; |
618 | 613 | ||
619 | err = local_allocate_threshold_blocks(cpu, bank); | 614 | if (shared_bank[bank]) { |
620 | if (err) | 615 | atomic_set(&b->cpus, 1); |
621 | goto out_free; | ||
622 | |||
623 | for_each_cpu(i, b->cpus) { | ||
624 | if (i == cpu) | ||
625 | continue; | ||
626 | |||
627 | dev = per_cpu(mce_device, i); | ||
628 | if (dev) | ||
629 | err = sysfs_create_link(&dev->kobj,b->kobj, name); | ||
630 | if (err) | ||
631 | goto out; | ||
632 | 616 | ||
633 | per_cpu(threshold_banks, i)[bank] = b; | 617 | /* nb is already initialized, see above */ |
618 | WARN_ON(nb->bank4); | ||
619 | nb->bank4 = b; | ||
634 | } | 620 | } |
635 | 621 | ||
636 | goto out; | 622 | err = allocate_threshold_blocks(cpu, bank, 0, |
623 | MSR_IA32_MC0_MISC + bank * 4); | ||
624 | if (!err) | ||
625 | goto out; | ||
637 | 626 | ||
638 | out_free: | 627 | out_free: |
639 | per_cpu(threshold_banks, cpu)[bank] = NULL; | ||
640 | free_cpumask_var(b->cpus); | ||
641 | kfree(b); | 628 | kfree(b); |
642 | out: | 629 | |
630 | out: | ||
643 | return err; | 631 | return err; |
644 | } | 632 | } |
645 | 633 | ||
@@ -660,12 +648,6 @@ static __cpuinit int threshold_create_device(unsigned int cpu) | |||
660 | return err; | 648 | return err; |
661 | } | 649 | } |
662 | 650 | ||
663 | /* | ||
664 | * let's be hotplug friendly. | ||
665 | * in case of multiple core processors, the first core always takes ownership | ||
666 | * of shared sysfs dir/files, and rest of the cores will be symlinked to it. | ||
667 | */ | ||
668 | |||
669 | static void deallocate_threshold_block(unsigned int cpu, | 651 | static void deallocate_threshold_block(unsigned int cpu, |
670 | unsigned int bank) | 652 | unsigned int bank) |
671 | { | 653 | { |
@@ -686,41 +668,42 @@ static void deallocate_threshold_block(unsigned int cpu, | |||
686 | per_cpu(threshold_banks, cpu)[bank]->blocks = NULL; | 668 | per_cpu(threshold_banks, cpu)[bank]->blocks = NULL; |
687 | } | 669 | } |
688 | 670 | ||
671 | static void __threshold_remove_blocks(struct threshold_bank *b) | ||
672 | { | ||
673 | struct threshold_block *pos = NULL; | ||
674 | struct threshold_block *tmp = NULL; | ||
675 | |||
676 | kobject_del(b->kobj); | ||
677 | |||
678 | list_for_each_entry_safe(pos, tmp, &b->blocks->miscj, miscj) | ||
679 | kobject_del(&pos->kobj); | ||
680 | } | ||
681 | |||
689 | static void threshold_remove_bank(unsigned int cpu, int bank) | 682 | static void threshold_remove_bank(unsigned int cpu, int bank) |
690 | { | 683 | { |
684 | struct amd_northbridge *nb; | ||
691 | struct threshold_bank *b; | 685 | struct threshold_bank *b; |
692 | struct device *dev; | ||
693 | char name[32]; | ||
694 | int i = 0; | ||
695 | 686 | ||
696 | b = per_cpu(threshold_banks, cpu)[bank]; | 687 | b = per_cpu(threshold_banks, cpu)[bank]; |
697 | if (!b) | 688 | if (!b) |
698 | return; | 689 | return; |
690 | |||
699 | if (!b->blocks) | 691 | if (!b->blocks) |
700 | goto free_out; | 692 | goto free_out; |
701 | 693 | ||
702 | sprintf(name, "threshold_bank%i", bank); | 694 | if (shared_bank[bank]) { |
703 | 695 | if (!atomic_dec_and_test(&b->cpus)) { | |
704 | #ifdef CONFIG_SMP | 696 | __threshold_remove_blocks(b); |
705 | /* sibling symlink */ | 697 | per_cpu(threshold_banks, cpu)[bank] = NULL; |
706 | if (shared_bank[bank] && b->blocks->cpu != cpu) { | 698 | return; |
707 | dev = per_cpu(mce_device, cpu); | 699 | } else { |
708 | sysfs_remove_link(&dev->kobj, name); | 700 | /* |
709 | per_cpu(threshold_banks, cpu)[bank] = NULL; | 701 | * the last CPU on this node using the shared bank is |
710 | 702 | * going away, remove that bank now. | |
711 | return; | 703 | */ |
712 | } | 704 | nb = node_to_amd_nb(amd_get_nb_id(cpu)); |
713 | #endif | 705 | nb->bank4 = NULL; |
714 | 706 | } | |
715 | /* remove all sibling symlinks before unregistering */ | ||
716 | for_each_cpu(i, b->cpus) { | ||
717 | if (i == cpu) | ||
718 | continue; | ||
719 | |||
720 | dev = per_cpu(mce_device, i); | ||
721 | if (dev) | ||
722 | sysfs_remove_link(&dev->kobj, name); | ||
723 | per_cpu(threshold_banks, i)[bank] = NULL; | ||
724 | } | 707 | } |
725 | 708 | ||
726 | deallocate_threshold_block(cpu, bank); | 709 | deallocate_threshold_block(cpu, bank); |
@@ -728,7 +711,6 @@ static void threshold_remove_bank(unsigned int cpu, int bank) | |||
728 | free_out: | 711 | free_out: |
729 | kobject_del(b->kobj); | 712 | kobject_del(b->kobj); |
730 | kobject_put(b->kobj); | 713 | kobject_put(b->kobj); |
731 | free_cpumask_var(b->cpus); | ||
732 | kfree(b); | 714 | kfree(b); |
733 | per_cpu(threshold_banks, cpu)[bank] = NULL; | 715 | per_cpu(threshold_banks, cpu)[bank] = NULL; |
734 | } | 716 | } |
@@ -777,4 +759,24 @@ static __init int threshold_init_device(void) | |||
777 | 759 | ||
778 | return 0; | 760 | return 0; |
779 | } | 761 | } |
780 | device_initcall(threshold_init_device); | 762 | /* |
763 | * there are 3 funcs which need to be _initcalled in a logic sequence: | ||
764 | * 1. xen_late_init_mcelog | ||
765 | * 2. mcheck_init_device | ||
766 | * 3. threshold_init_device | ||
767 | * | ||
768 | * xen_late_init_mcelog must register xen_mce_chrdev_device before | ||
769 | * native mce_chrdev_device registration if running under xen platform; | ||
770 | * | ||
771 | * mcheck_init_device should be inited before threshold_init_device to | ||
772 | * initialize mce_device, otherwise a NULL ptr dereference will cause panic. | ||
773 | * | ||
774 | * so we use following _initcalls | ||
775 | * 1. device_initcall(xen_late_init_mcelog); | ||
776 | * 2. device_initcall_sync(mcheck_init_device); | ||
777 | * 3. late_initcall(threshold_init_device); | ||
778 | * | ||
779 | * when running under xen, the initcall order is 1,2,3; | ||
780 | * on baremetal, we skip 1 and we do only 2 and 3. | ||
781 | */ | ||
782 | late_initcall(threshold_init_device); | ||
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 29557aa06dda..915b876edd1e 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -32,6 +32,8 @@ | |||
32 | #include <asm/smp.h> | 32 | #include <asm/smp.h> |
33 | #include <asm/alternative.h> | 33 | #include <asm/alternative.h> |
34 | #include <asm/timer.h> | 34 | #include <asm/timer.h> |
35 | #include <asm/desc.h> | ||
36 | #include <asm/ldt.h> | ||
35 | 37 | ||
36 | #include "perf_event.h" | 38 | #include "perf_event.h" |
37 | 39 | ||
@@ -1738,6 +1740,29 @@ valid_user_frame(const void __user *fp, unsigned long size) | |||
1738 | return (__range_not_ok(fp, size, TASK_SIZE) == 0); | 1740 | return (__range_not_ok(fp, size, TASK_SIZE) == 0); |
1739 | } | 1741 | } |
1740 | 1742 | ||
1743 | static unsigned long get_segment_base(unsigned int segment) | ||
1744 | { | ||
1745 | struct desc_struct *desc; | ||
1746 | int idx = segment >> 3; | ||
1747 | |||
1748 | if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) { | ||
1749 | if (idx > LDT_ENTRIES) | ||
1750 | return 0; | ||
1751 | |||
1752 | if (idx > current->active_mm->context.size) | ||
1753 | return 0; | ||
1754 | |||
1755 | desc = current->active_mm->context.ldt; | ||
1756 | } else { | ||
1757 | if (idx > GDT_ENTRIES) | ||
1758 | return 0; | ||
1759 | |||
1760 | desc = __this_cpu_ptr(&gdt_page.gdt[0]); | ||
1761 | } | ||
1762 | |||
1763 | return get_desc_base(desc + idx); | ||
1764 | } | ||
1765 | |||
1741 | #ifdef CONFIG_COMPAT | 1766 | #ifdef CONFIG_COMPAT |
1742 | 1767 | ||
1743 | #include <asm/compat.h> | 1768 | #include <asm/compat.h> |
@@ -1746,13 +1771,17 @@ static inline int | |||
1746 | perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) | 1771 | perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) |
1747 | { | 1772 | { |
1748 | /* 32-bit process in 64-bit kernel. */ | 1773 | /* 32-bit process in 64-bit kernel. */ |
1774 | unsigned long ss_base, cs_base; | ||
1749 | struct stack_frame_ia32 frame; | 1775 | struct stack_frame_ia32 frame; |
1750 | const void __user *fp; | 1776 | const void __user *fp; |
1751 | 1777 | ||
1752 | if (!test_thread_flag(TIF_IA32)) | 1778 | if (!test_thread_flag(TIF_IA32)) |
1753 | return 0; | 1779 | return 0; |
1754 | 1780 | ||
1755 | fp = compat_ptr(regs->bp); | 1781 | cs_base = get_segment_base(regs->cs); |
1782 | ss_base = get_segment_base(regs->ss); | ||
1783 | |||
1784 | fp = compat_ptr(ss_base + regs->bp); | ||
1756 | while (entry->nr < PERF_MAX_STACK_DEPTH) { | 1785 | while (entry->nr < PERF_MAX_STACK_DEPTH) { |
1757 | unsigned long bytes; | 1786 | unsigned long bytes; |
1758 | frame.next_frame = 0; | 1787 | frame.next_frame = 0; |
@@ -1765,8 +1794,8 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
1765 | if (!valid_user_frame(fp, sizeof(frame))) | 1794 | if (!valid_user_frame(fp, sizeof(frame))) |
1766 | break; | 1795 | break; |
1767 | 1796 | ||
1768 | perf_callchain_store(entry, frame.return_address); | 1797 | perf_callchain_store(entry, cs_base + frame.return_address); |
1769 | fp = compat_ptr(frame.next_frame); | 1798 | fp = compat_ptr(ss_base + frame.next_frame); |
1770 | } | 1799 | } |
1771 | return 1; | 1800 | return 1; |
1772 | } | 1801 | } |
@@ -1789,6 +1818,12 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) | |||
1789 | return; | 1818 | return; |
1790 | } | 1819 | } |
1791 | 1820 | ||
1821 | /* | ||
1822 | * We don't know what to do with VM86 stacks.. ignore them for now. | ||
1823 | */ | ||
1824 | if (regs->flags & (X86_VM_MASK | PERF_EFLAGS_VM)) | ||
1825 | return; | ||
1826 | |||
1792 | fp = (void __user *)regs->bp; | 1827 | fp = (void __user *)regs->bp; |
1793 | 1828 | ||
1794 | perf_callchain_store(entry, regs->ip); | 1829 | perf_callchain_store(entry, regs->ip); |
@@ -1816,16 +1851,50 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) | |||
1816 | } | 1851 | } |
1817 | } | 1852 | } |
1818 | 1853 | ||
1819 | unsigned long perf_instruction_pointer(struct pt_regs *regs) | 1854 | /* |
1855 | * Deal with code segment offsets for the various execution modes: | ||
1856 | * | ||
1857 | * VM86 - the good olde 16 bit days, where the linear address is | ||
1858 | * 20 bits and we use regs->ip + 0x10 * regs->cs. | ||
1859 | * | ||
1860 | * IA32 - Where we need to look at GDT/LDT segment descriptor tables | ||
1861 | * to figure out what the 32bit base address is. | ||
1862 | * | ||
1863 | * X32 - has TIF_X32 set, but is running in x86_64 | ||
1864 | * | ||
1865 | * X86_64 - CS,DS,SS,ES are all zero based. | ||
1866 | */ | ||
1867 | static unsigned long code_segment_base(struct pt_regs *regs) | ||
1820 | { | 1868 | { |
1821 | unsigned long ip; | 1869 | /* |
1870 | * If we are in VM86 mode, add the segment offset to convert to a | ||
1871 | * linear address. | ||
1872 | */ | ||
1873 | if (regs->flags & X86_VM_MASK) | ||
1874 | return 0x10 * regs->cs; | ||
1875 | |||
1876 | /* | ||
1877 | * For IA32 we look at the GDT/LDT segment base to convert the | ||
1878 | * effective IP to a linear address. | ||
1879 | */ | ||
1880 | #ifdef CONFIG_X86_32 | ||
1881 | if (user_mode(regs) && regs->cs != __USER_CS) | ||
1882 | return get_segment_base(regs->cs); | ||
1883 | #else | ||
1884 | if (test_thread_flag(TIF_IA32)) { | ||
1885 | if (user_mode(regs) && regs->cs != __USER32_CS) | ||
1886 | return get_segment_base(regs->cs); | ||
1887 | } | ||
1888 | #endif | ||
1889 | return 0; | ||
1890 | } | ||
1822 | 1891 | ||
1892 | unsigned long perf_instruction_pointer(struct pt_regs *regs) | ||
1893 | { | ||
1823 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) | 1894 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) |
1824 | ip = perf_guest_cbs->get_guest_ip(); | 1895 | return perf_guest_cbs->get_guest_ip(); |
1825 | else | ||
1826 | ip = instruction_pointer(regs); | ||
1827 | 1896 | ||
1828 | return ip; | 1897 | return regs->ip + code_segment_base(regs); |
1829 | } | 1898 | } |
1830 | 1899 | ||
1831 | unsigned long perf_misc_flags(struct pt_regs *regs) | 1900 | unsigned long perf_misc_flags(struct pt_regs *regs) |
@@ -1838,7 +1907,7 @@ unsigned long perf_misc_flags(struct pt_regs *regs) | |||
1838 | else | 1907 | else |
1839 | misc |= PERF_RECORD_MISC_GUEST_KERNEL; | 1908 | misc |= PERF_RECORD_MISC_GUEST_KERNEL; |
1840 | } else { | 1909 | } else { |
1841 | if (!kernel_ip(regs->ip)) | 1910 | if (user_mode(regs)) |
1842 | misc |= PERF_RECORD_MISC_USER; | 1911 | misc |= PERF_RECORD_MISC_USER; |
1843 | else | 1912 | else |
1844 | misc |= PERF_RECORD_MISC_KERNEL; | 1913 | misc |= PERF_RECORD_MISC_KERNEL; |
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 821d53b696d1..6605a81ba339 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -516,6 +516,26 @@ static inline bool kernel_ip(unsigned long ip) | |||
516 | #endif | 516 | #endif |
517 | } | 517 | } |
518 | 518 | ||
519 | /* | ||
520 | * Not all PMUs provide the right context information to place the reported IP | ||
521 | * into full context. Specifically segment registers are typically not | ||
522 | * supplied. | ||
523 | * | ||
524 | * Assuming the address is a linear address (it is for IBS), we fake the CS and | ||
525 | * vm86 mode using the known zero-based code segment and 'fix up' the registers | ||
526 | * to reflect this. | ||
527 | * | ||
528 | * Intel PEBS/LBR appear to typically provide the effective address, nothing | ||
529 | * much we can do about that but pray and treat it like a linear address. | ||
530 | */ | ||
531 | static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip) | ||
532 | { | ||
533 | regs->cs = kernel_ip(ip) ? __KERNEL_CS : __USER_CS; | ||
534 | if (regs->flags & X86_VM_MASK) | ||
535 | regs->flags ^= (PERF_EFLAGS_VM | X86_VM_MASK); | ||
536 | regs->ip = ip; | ||
537 | } | ||
538 | |||
519 | #ifdef CONFIG_CPU_SUP_AMD | 539 | #ifdef CONFIG_CPU_SUP_AMD |
520 | 540 | ||
521 | int amd_pmu_init(void); | 541 | int amd_pmu_init(void); |
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c index da9bcdcd9856..7bfb5bec8630 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c | |||
@@ -13,6 +13,8 @@ | |||
13 | 13 | ||
14 | #include <asm/apic.h> | 14 | #include <asm/apic.h> |
15 | 15 | ||
16 | #include "perf_event.h" | ||
17 | |||
16 | static u32 ibs_caps; | 18 | static u32 ibs_caps; |
17 | 19 | ||
18 | #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) | 20 | #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) |
@@ -536,7 +538,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) | |||
536 | if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) { | 538 | if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) { |
537 | regs.flags &= ~PERF_EFLAGS_EXACT; | 539 | regs.flags &= ~PERF_EFLAGS_EXACT; |
538 | } else { | 540 | } else { |
539 | instruction_pointer_set(®s, ibs_data.regs[1]); | 541 | set_linear_ip(®s, ibs_data.regs[1]); |
540 | regs.flags |= PERF_EFLAGS_EXACT; | 542 | regs.flags |= PERF_EFLAGS_EXACT; |
541 | } | 543 | } |
542 | 544 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 629ae0b7ad90..e38d97bf4259 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -499,7 +499,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | |||
499 | * We sampled a branch insn, rewind using the LBR stack | 499 | * We sampled a branch insn, rewind using the LBR stack |
500 | */ | 500 | */ |
501 | if (ip == to) { | 501 | if (ip == to) { |
502 | regs->ip = from; | 502 | set_linear_ip(regs, from); |
503 | return 1; | 503 | return 1; |
504 | } | 504 | } |
505 | 505 | ||
@@ -529,7 +529,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | |||
529 | } while (to < ip); | 529 | } while (to < ip); |
530 | 530 | ||
531 | if (to == ip) { | 531 | if (to == ip) { |
532 | regs->ip = old_to; | 532 | set_linear_ip(regs, old_to); |
533 | return 1; | 533 | return 1; |
534 | } | 534 | } |
535 | 535 | ||
@@ -569,7 +569,8 @@ static void __intel_pmu_pebs_event(struct perf_event *event, | |||
569 | * A possible PERF_SAMPLE_REGS will have to transfer all regs. | 569 | * A possible PERF_SAMPLE_REGS will have to transfer all regs. |
570 | */ | 570 | */ |
571 | regs = *iregs; | 571 | regs = *iregs; |
572 | regs.ip = pebs->ip; | 572 | regs.flags = pebs->flags; |
573 | set_linear_ip(®s, pebs->ip); | ||
573 | regs.bp = pebs->bp; | 574 | regs.bp = pebs->bp; |
574 | regs.sp = pebs->sp; | 575 | regs.sp = pebs->sp; |
575 | 576 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index f3851892e077..c9e5dc56630a 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h | |||
@@ -5,7 +5,7 @@ | |||
5 | #include "perf_event.h" | 5 | #include "perf_event.h" |
6 | 6 | ||
7 | #define UNCORE_PMU_NAME_LEN 32 | 7 | #define UNCORE_PMU_NAME_LEN 32 |
8 | #define UNCORE_PMU_HRTIMER_INTERVAL (60 * NSEC_PER_SEC) | 8 | #define UNCORE_PMU_HRTIMER_INTERVAL (60LL * NSEC_PER_SEC) |
9 | 9 | ||
10 | #define UNCORE_FIXED_EVENT 0xff | 10 | #define UNCORE_FIXED_EVENT 0xff |
11 | #define UNCORE_PMC_IDX_MAX_GENERIC 8 | 11 | #define UNCORE_PMC_IDX_MAX_GENERIC 8 |
diff --git a/arch/x86/kernel/cpu/sched.c b/arch/x86/kernel/cpu/sched.c deleted file mode 100644 index a640ae5ad201..000000000000 --- a/arch/x86/kernel/cpu/sched.c +++ /dev/null | |||
@@ -1,55 +0,0 @@ | |||
1 | #include <linux/sched.h> | ||
2 | #include <linux/math64.h> | ||
3 | #include <linux/percpu.h> | ||
4 | #include <linux/irqflags.h> | ||
5 | |||
6 | #include <asm/cpufeature.h> | ||
7 | #include <asm/processor.h> | ||
8 | |||
9 | #ifdef CONFIG_SMP | ||
10 | |||
11 | static DEFINE_PER_CPU(struct aperfmperf, old_perf_sched); | ||
12 | |||
13 | static unsigned long scale_aperfmperf(void) | ||
14 | { | ||
15 | struct aperfmperf val, *old = &__get_cpu_var(old_perf_sched); | ||
16 | unsigned long ratio, flags; | ||
17 | |||
18 | local_irq_save(flags); | ||
19 | get_aperfmperf(&val); | ||
20 | local_irq_restore(flags); | ||
21 | |||
22 | ratio = calc_aperfmperf_ratio(old, &val); | ||
23 | *old = val; | ||
24 | |||
25 | return ratio; | ||
26 | } | ||
27 | |||
28 | unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu) | ||
29 | { | ||
30 | /* | ||
31 | * do aperf/mperf on the cpu level because it includes things | ||
32 | * like turbo mode, which are relevant to full cores. | ||
33 | */ | ||
34 | if (boot_cpu_has(X86_FEATURE_APERFMPERF)) | ||
35 | return scale_aperfmperf(); | ||
36 | |||
37 | /* | ||
38 | * maybe have something cpufreq here | ||
39 | */ | ||
40 | |||
41 | return default_scale_freq_power(sd, cpu); | ||
42 | } | ||
43 | |||
44 | unsigned long arch_scale_smt_power(struct sched_domain *sd, int cpu) | ||
45 | { | ||
46 | /* | ||
47 | * aperf/mperf already includes the smt gain | ||
48 | */ | ||
49 | if (boot_cpu_has(X86_FEATURE_APERFMPERF)) | ||
50 | return SCHED_LOAD_SCALE; | ||
51 | |||
52 | return default_scale_smt_power(sd, cpu); | ||
53 | } | ||
54 | |||
55 | #endif | ||
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 41857970517f..ed858e9e9a74 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -944,7 +944,7 @@ void __init e820_reserve_resources(void) | |||
944 | for (i = 0; i < e820_saved.nr_map; i++) { | 944 | for (i = 0; i < e820_saved.nr_map; i++) { |
945 | struct e820entry *entry = &e820_saved.map[i]; | 945 | struct e820entry *entry = &e820_saved.map[i]; |
946 | firmware_map_add_early(entry->addr, | 946 | firmware_map_add_early(entry->addr, |
947 | entry->addr + entry->size - 1, | 947 | entry->addr + entry->size, |
948 | e820_type_to_string(entry->type)); | 948 | e820_type_to_string(entry->type)); |
949 | } | 949 | } |
950 | } | 950 | } |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 459d4a0dca8d..b7a81dcb7366 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -1124,24 +1124,6 @@ apicinterrupt LOCAL_TIMER_VECTOR \ | |||
1124 | apicinterrupt X86_PLATFORM_IPI_VECTOR \ | 1124 | apicinterrupt X86_PLATFORM_IPI_VECTOR \ |
1125 | x86_platform_ipi smp_x86_platform_ipi | 1125 | x86_platform_ipi smp_x86_platform_ipi |
1126 | 1126 | ||
1127 | #ifdef CONFIG_SMP | ||
1128 | ALIGN | ||
1129 | INTR_FRAME | ||
1130 | .irp idx,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \ | ||
1131 | 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 | ||
1132 | .if NUM_INVALIDATE_TLB_VECTORS > \idx | ||
1133 | ENTRY(invalidate_interrupt\idx) | ||
1134 | pushq_cfi $~(INVALIDATE_TLB_VECTOR_START+\idx) | ||
1135 | jmp .Lcommon_invalidate_interrupt0 | ||
1136 | CFI_ADJUST_CFA_OFFSET -8 | ||
1137 | END(invalidate_interrupt\idx) | ||
1138 | .endif | ||
1139 | .endr | ||
1140 | CFI_ENDPROC | ||
1141 | apicinterrupt INVALIDATE_TLB_VECTOR_START, \ | ||
1142 | invalidate_interrupt0, smp_invalidate_interrupt | ||
1143 | #endif | ||
1144 | |||
1145 | apicinterrupt THRESHOLD_APIC_VECTOR \ | 1127 | apicinterrupt THRESHOLD_APIC_VECTOR \ |
1146 | threshold_interrupt smp_threshold_interrupt | 1128 | threshold_interrupt smp_threshold_interrupt |
1147 | apicinterrupt THERMAL_APIC_VECTOR \ | 1129 | apicinterrupt THERMAL_APIC_VECTOR \ |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 1f5f1d5d2a02..7ad683d78645 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -328,6 +328,7 @@ void fixup_irqs(void) | |||
328 | chip->irq_retrigger(data); | 328 | chip->irq_retrigger(data); |
329 | raw_spin_unlock(&desc->lock); | 329 | raw_spin_unlock(&desc->lock); |
330 | } | 330 | } |
331 | __this_cpu_write(vector_irq[vector], -1); | ||
331 | } | 332 | } |
332 | } | 333 | } |
333 | #endif | 334 | #endif |
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 252981afd6c4..6e03b0d69138 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
@@ -171,79 +171,6 @@ static void __init smp_intr_init(void) | |||
171 | */ | 171 | */ |
172 | alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); | 172 | alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); |
173 | 173 | ||
174 | /* IPIs for invalidation */ | ||
175 | #define ALLOC_INVTLB_VEC(NR) \ | ||
176 | alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+NR, \ | ||
177 | invalidate_interrupt##NR) | ||
178 | |||
179 | switch (NUM_INVALIDATE_TLB_VECTORS) { | ||
180 | default: | ||
181 | ALLOC_INVTLB_VEC(31); | ||
182 | case 31: | ||
183 | ALLOC_INVTLB_VEC(30); | ||
184 | case 30: | ||
185 | ALLOC_INVTLB_VEC(29); | ||
186 | case 29: | ||
187 | ALLOC_INVTLB_VEC(28); | ||
188 | case 28: | ||
189 | ALLOC_INVTLB_VEC(27); | ||
190 | case 27: | ||
191 | ALLOC_INVTLB_VEC(26); | ||
192 | case 26: | ||
193 | ALLOC_INVTLB_VEC(25); | ||
194 | case 25: | ||
195 | ALLOC_INVTLB_VEC(24); | ||
196 | case 24: | ||
197 | ALLOC_INVTLB_VEC(23); | ||
198 | case 23: | ||
199 | ALLOC_INVTLB_VEC(22); | ||
200 | case 22: | ||
201 | ALLOC_INVTLB_VEC(21); | ||
202 | case 21: | ||
203 | ALLOC_INVTLB_VEC(20); | ||
204 | case 20: | ||
205 | ALLOC_INVTLB_VEC(19); | ||
206 | case 19: | ||
207 | ALLOC_INVTLB_VEC(18); | ||
208 | case 18: | ||
209 | ALLOC_INVTLB_VEC(17); | ||
210 | case 17: | ||
211 | ALLOC_INVTLB_VEC(16); | ||
212 | case 16: | ||
213 | ALLOC_INVTLB_VEC(15); | ||
214 | case 15: | ||
215 | ALLOC_INVTLB_VEC(14); | ||
216 | case 14: | ||
217 | ALLOC_INVTLB_VEC(13); | ||
218 | case 13: | ||
219 | ALLOC_INVTLB_VEC(12); | ||
220 | case 12: | ||
221 | ALLOC_INVTLB_VEC(11); | ||
222 | case 11: | ||
223 | ALLOC_INVTLB_VEC(10); | ||
224 | case 10: | ||
225 | ALLOC_INVTLB_VEC(9); | ||
226 | case 9: | ||
227 | ALLOC_INVTLB_VEC(8); | ||
228 | case 8: | ||
229 | ALLOC_INVTLB_VEC(7); | ||
230 | case 7: | ||
231 | ALLOC_INVTLB_VEC(6); | ||
232 | case 6: | ||
233 | ALLOC_INVTLB_VEC(5); | ||
234 | case 5: | ||
235 | ALLOC_INVTLB_VEC(4); | ||
236 | case 4: | ||
237 | ALLOC_INVTLB_VEC(3); | ||
238 | case 3: | ||
239 | ALLOC_INVTLB_VEC(2); | ||
240 | case 2: | ||
241 | ALLOC_INVTLB_VEC(1); | ||
242 | case 1: | ||
243 | ALLOC_INVTLB_VEC(0); | ||
244 | break; | ||
245 | } | ||
246 | |||
247 | /* IPI for generic function call */ | 174 | /* IPI for generic function call */ |
248 | alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); | 175 | alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); |
249 | 176 | ||
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index 1d5d31ea686b..dc1404bf8e4b 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c | |||
@@ -107,7 +107,7 @@ static int __init create_setup_data_nodes(struct dentry *parent) | |||
107 | { | 107 | { |
108 | struct setup_data_node *node; | 108 | struct setup_data_node *node; |
109 | struct setup_data *data; | 109 | struct setup_data *data; |
110 | int error = -ENOMEM; | 110 | int error; |
111 | struct dentry *d; | 111 | struct dentry *d; |
112 | struct page *pg; | 112 | struct page *pg; |
113 | u64 pa_data; | 113 | u64 pa_data; |
@@ -121,8 +121,10 @@ static int __init create_setup_data_nodes(struct dentry *parent) | |||
121 | 121 | ||
122 | while (pa_data) { | 122 | while (pa_data) { |
123 | node = kmalloc(sizeof(*node), GFP_KERNEL); | 123 | node = kmalloc(sizeof(*node), GFP_KERNEL); |
124 | if (!node) | 124 | if (!node) { |
125 | error = -ENOMEM; | ||
125 | goto err_dir; | 126 | goto err_dir; |
127 | } | ||
126 | 128 | ||
127 | pg = pfn_to_page((pa_data+sizeof(*data)-1) >> PAGE_SHIFT); | 129 | pg = pfn_to_page((pa_data+sizeof(*data)-1) >> PAGE_SHIFT); |
128 | if (PageHighMem(pg)) { | 130 | if (PageHighMem(pg)) { |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index e554e5ad2fe8..c1d61ee4b4f1 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -39,6 +39,9 @@ | |||
39 | #include <asm/desc.h> | 39 | #include <asm/desc.h> |
40 | #include <asm/tlbflush.h> | 40 | #include <asm/tlbflush.h> |
41 | #include <asm/idle.h> | 41 | #include <asm/idle.h> |
42 | #include <asm/apic.h> | ||
43 | #include <asm/apicdef.h> | ||
44 | #include <asm/hypervisor.h> | ||
42 | 45 | ||
43 | static int kvmapf = 1; | 46 | static int kvmapf = 1; |
44 | 47 | ||
@@ -283,6 +286,22 @@ static void kvm_register_steal_time(void) | |||
283 | cpu, __pa(st)); | 286 | cpu, __pa(st)); |
284 | } | 287 | } |
285 | 288 | ||
289 | static DEFINE_PER_CPU(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED; | ||
290 | |||
291 | static void kvm_guest_apic_eoi_write(u32 reg, u32 val) | ||
292 | { | ||
293 | /** | ||
294 | * This relies on __test_and_clear_bit to modify the memory | ||
295 | * in a way that is atomic with respect to the local CPU. | ||
296 | * The hypervisor only accesses this memory from the local CPU so | ||
297 | * there's no need for lock or memory barriers. | ||
298 | * An optimization barrier is implied in apic write. | ||
299 | */ | ||
300 | if (__test_and_clear_bit(KVM_PV_EOI_BIT, &__get_cpu_var(kvm_apic_eoi))) | ||
301 | return; | ||
302 | apic_write(APIC_EOI, APIC_EOI_ACK); | ||
303 | } | ||
304 | |||
286 | void __cpuinit kvm_guest_cpu_init(void) | 305 | void __cpuinit kvm_guest_cpu_init(void) |
287 | { | 306 | { |
288 | if (!kvm_para_available()) | 307 | if (!kvm_para_available()) |
@@ -300,11 +319,20 @@ void __cpuinit kvm_guest_cpu_init(void) | |||
300 | smp_processor_id()); | 319 | smp_processor_id()); |
301 | } | 320 | } |
302 | 321 | ||
322 | if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) { | ||
323 | unsigned long pa; | ||
324 | /* Size alignment is implied but just to make it explicit. */ | ||
325 | BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4); | ||
326 | __get_cpu_var(kvm_apic_eoi) = 0; | ||
327 | pa = __pa(&__get_cpu_var(kvm_apic_eoi)) | KVM_MSR_ENABLED; | ||
328 | wrmsrl(MSR_KVM_PV_EOI_EN, pa); | ||
329 | } | ||
330 | |||
303 | if (has_steal_clock) | 331 | if (has_steal_clock) |
304 | kvm_register_steal_time(); | 332 | kvm_register_steal_time(); |
305 | } | 333 | } |
306 | 334 | ||
307 | static void kvm_pv_disable_apf(void *unused) | 335 | static void kvm_pv_disable_apf(void) |
308 | { | 336 | { |
309 | if (!__get_cpu_var(apf_reason).enabled) | 337 | if (!__get_cpu_var(apf_reason).enabled) |
310 | return; | 338 | return; |
@@ -316,11 +344,23 @@ static void kvm_pv_disable_apf(void *unused) | |||
316 | smp_processor_id()); | 344 | smp_processor_id()); |
317 | } | 345 | } |
318 | 346 | ||
347 | static void kvm_pv_guest_cpu_reboot(void *unused) | ||
348 | { | ||
349 | /* | ||
350 | * We disable PV EOI before we load a new kernel by kexec, | ||
351 | * since MSR_KVM_PV_EOI_EN stores a pointer into old kernel's memory. | ||
352 | * New kernel can re-enable when it boots. | ||
353 | */ | ||
354 | if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) | ||
355 | wrmsrl(MSR_KVM_PV_EOI_EN, 0); | ||
356 | kvm_pv_disable_apf(); | ||
357 | } | ||
358 | |||
319 | static int kvm_pv_reboot_notify(struct notifier_block *nb, | 359 | static int kvm_pv_reboot_notify(struct notifier_block *nb, |
320 | unsigned long code, void *unused) | 360 | unsigned long code, void *unused) |
321 | { | 361 | { |
322 | if (code == SYS_RESTART) | 362 | if (code == SYS_RESTART) |
323 | on_each_cpu(kvm_pv_disable_apf, NULL, 1); | 363 | on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1); |
324 | return NOTIFY_DONE; | 364 | return NOTIFY_DONE; |
325 | } | 365 | } |
326 | 366 | ||
@@ -371,7 +411,9 @@ static void __cpuinit kvm_guest_cpu_online(void *dummy) | |||
371 | static void kvm_guest_cpu_offline(void *dummy) | 411 | static void kvm_guest_cpu_offline(void *dummy) |
372 | { | 412 | { |
373 | kvm_disable_steal_time(); | 413 | kvm_disable_steal_time(); |
374 | kvm_pv_disable_apf(NULL); | 414 | if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) |
415 | wrmsrl(MSR_KVM_PV_EOI_EN, 0); | ||
416 | kvm_pv_disable_apf(); | ||
375 | apf_task_wake_all(); | 417 | apf_task_wake_all(); |
376 | } | 418 | } |
377 | 419 | ||
@@ -424,6 +466,9 @@ void __init kvm_guest_init(void) | |||
424 | pv_time_ops.steal_clock = kvm_steal_clock; | 466 | pv_time_ops.steal_clock = kvm_steal_clock; |
425 | } | 467 | } |
426 | 468 | ||
469 | if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) | ||
470 | apic_set_eoi_write(kvm_guest_apic_eoi_write); | ||
471 | |||
427 | #ifdef CONFIG_SMP | 472 | #ifdef CONFIG_SMP |
428 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; | 473 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; |
429 | register_cpu_notifier(&kvm_cpu_notifier); | 474 | register_cpu_notifier(&kvm_cpu_notifier); |
@@ -432,6 +477,19 @@ void __init kvm_guest_init(void) | |||
432 | #endif | 477 | #endif |
433 | } | 478 | } |
434 | 479 | ||
480 | static bool __init kvm_detect(void) | ||
481 | { | ||
482 | if (!kvm_para_available()) | ||
483 | return false; | ||
484 | return true; | ||
485 | } | ||
486 | |||
487 | const struct hypervisor_x86 x86_hyper_kvm __refconst = { | ||
488 | .name = "KVM", | ||
489 | .detect = kvm_detect, | ||
490 | }; | ||
491 | EXPORT_SYMBOL_GPL(x86_hyper_kvm); | ||
492 | |||
435 | static __init int activate_jump_labels(void) | 493 | static __init int activate_jump_labels(void) |
436 | { | 494 | { |
437 | if (has_steal_clock) { | 495 | if (has_steal_clock) { |
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 202494d2ec6e..216a4d754b0c 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c | |||
@@ -81,7 +81,7 @@ int apply_relocate(Elf32_Shdr *sechdrs, | |||
81 | *location += sym->st_value; | 81 | *location += sym->st_value; |
82 | break; | 82 | break; |
83 | case R_386_PC32: | 83 | case R_386_PC32: |
84 | /* Add the value, subtract its postition */ | 84 | /* Add the value, subtract its position */ |
85 | *location += sym->st_value - (uint32_t)location; | 85 | *location += sym->st_value - (uint32_t)location; |
86 | break; | 86 | break; |
87 | default: | 87 | default: |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index c0f420f76cd3..de2b7ad70273 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -45,15 +45,6 @@ int iommu_detected __read_mostly = 0; | |||
45 | */ | 45 | */ |
46 | int iommu_pass_through __read_mostly; | 46 | int iommu_pass_through __read_mostly; |
47 | 47 | ||
48 | /* | ||
49 | * Group multi-function PCI devices into a single device-group for the | ||
50 | * iommu_device_group interface. This tells the iommu driver to pretend | ||
51 | * it cannot distinguish between functions of a device, exposing only one | ||
52 | * group for the device. Useful for disallowing use of individual PCI | ||
53 | * functions from userspace drivers. | ||
54 | */ | ||
55 | int iommu_group_mf __read_mostly; | ||
56 | |||
57 | extern struct iommu_table_entry __iommu_table[], __iommu_table_end[]; | 48 | extern struct iommu_table_entry __iommu_table[], __iommu_table_end[]; |
58 | 49 | ||
59 | /* Dummy device used for NULL arguments (normally ISA). */ | 50 | /* Dummy device used for NULL arguments (normally ISA). */ |
@@ -194,8 +185,6 @@ static __init int iommu_setup(char *p) | |||
194 | #endif | 185 | #endif |
195 | if (!strncmp(p, "pt", 2)) | 186 | if (!strncmp(p, "pt", 2)) |
196 | iommu_pass_through = 1; | 187 | iommu_pass_through = 1; |
197 | if (!strncmp(p, "group_mf", 8)) | ||
198 | iommu_group_mf = 1; | ||
199 | 188 | ||
200 | gart_parse_options(p); | 189 | gart_parse_options(p); |
201 | 190 | ||
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c new file mode 100644 index 000000000000..c5a3e5cfe07f --- /dev/null +++ b/arch/x86/kernel/perf_regs.c | |||
@@ -0,0 +1,105 @@ | |||
1 | #include <linux/errno.h> | ||
2 | #include <linux/kernel.h> | ||
3 | #include <linux/sched.h> | ||
4 | #include <linux/perf_event.h> | ||
5 | #include <linux/bug.h> | ||
6 | #include <linux/stddef.h> | ||
7 | #include <asm/perf_regs.h> | ||
8 | #include <asm/ptrace.h> | ||
9 | |||
10 | #ifdef CONFIG_X86_32 | ||
11 | #define PERF_REG_X86_MAX PERF_REG_X86_32_MAX | ||
12 | #else | ||
13 | #define PERF_REG_X86_MAX PERF_REG_X86_64_MAX | ||
14 | #endif | ||
15 | |||
16 | #define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r) | ||
17 | |||
18 | static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = { | ||
19 | PT_REGS_OFFSET(PERF_REG_X86_AX, ax), | ||
20 | PT_REGS_OFFSET(PERF_REG_X86_BX, bx), | ||
21 | PT_REGS_OFFSET(PERF_REG_X86_CX, cx), | ||
22 | PT_REGS_OFFSET(PERF_REG_X86_DX, dx), | ||
23 | PT_REGS_OFFSET(PERF_REG_X86_SI, si), | ||
24 | PT_REGS_OFFSET(PERF_REG_X86_DI, di), | ||
25 | PT_REGS_OFFSET(PERF_REG_X86_BP, bp), | ||
26 | PT_REGS_OFFSET(PERF_REG_X86_SP, sp), | ||
27 | PT_REGS_OFFSET(PERF_REG_X86_IP, ip), | ||
28 | PT_REGS_OFFSET(PERF_REG_X86_FLAGS, flags), | ||
29 | PT_REGS_OFFSET(PERF_REG_X86_CS, cs), | ||
30 | PT_REGS_OFFSET(PERF_REG_X86_SS, ss), | ||
31 | #ifdef CONFIG_X86_32 | ||
32 | PT_REGS_OFFSET(PERF_REG_X86_DS, ds), | ||
33 | PT_REGS_OFFSET(PERF_REG_X86_ES, es), | ||
34 | PT_REGS_OFFSET(PERF_REG_X86_FS, fs), | ||
35 | PT_REGS_OFFSET(PERF_REG_X86_GS, gs), | ||
36 | #else | ||
37 | /* | ||
38 | * The pt_regs struct does not store | ||
39 | * ds, es, fs, gs in 64 bit mode. | ||
40 | */ | ||
41 | (unsigned int) -1, | ||
42 | (unsigned int) -1, | ||
43 | (unsigned int) -1, | ||
44 | (unsigned int) -1, | ||
45 | #endif | ||
46 | #ifdef CONFIG_X86_64 | ||
47 | PT_REGS_OFFSET(PERF_REG_X86_R8, r8), | ||
48 | PT_REGS_OFFSET(PERF_REG_X86_R9, r9), | ||
49 | PT_REGS_OFFSET(PERF_REG_X86_R10, r10), | ||
50 | PT_REGS_OFFSET(PERF_REG_X86_R11, r11), | ||
51 | PT_REGS_OFFSET(PERF_REG_X86_R12, r12), | ||
52 | PT_REGS_OFFSET(PERF_REG_X86_R13, r13), | ||
53 | PT_REGS_OFFSET(PERF_REG_X86_R14, r14), | ||
54 | PT_REGS_OFFSET(PERF_REG_X86_R15, r15), | ||
55 | #endif | ||
56 | }; | ||
57 | |||
58 | u64 perf_reg_value(struct pt_regs *regs, int idx) | ||
59 | { | ||
60 | if (WARN_ON_ONCE(idx > ARRAY_SIZE(pt_regs_offset))) | ||
61 | return 0; | ||
62 | |||
63 | return regs_get_register(regs, pt_regs_offset[idx]); | ||
64 | } | ||
65 | |||
66 | #define REG_RESERVED (~((1ULL << PERF_REG_X86_MAX) - 1ULL)) | ||
67 | |||
68 | #ifdef CONFIG_X86_32 | ||
69 | int perf_reg_validate(u64 mask) | ||
70 | { | ||
71 | if (!mask || mask & REG_RESERVED) | ||
72 | return -EINVAL; | ||
73 | |||
74 | return 0; | ||
75 | } | ||
76 | |||
77 | u64 perf_reg_abi(struct task_struct *task) | ||
78 | { | ||
79 | return PERF_SAMPLE_REGS_ABI_32; | ||
80 | } | ||
81 | #else /* CONFIG_X86_64 */ | ||
82 | #define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \ | ||
83 | (1ULL << PERF_REG_X86_ES) | \ | ||
84 | (1ULL << PERF_REG_X86_FS) | \ | ||
85 | (1ULL << PERF_REG_X86_GS)) | ||
86 | |||
87 | int perf_reg_validate(u64 mask) | ||
88 | { | ||
89 | if (!mask || mask & REG_RESERVED) | ||
90 | return -EINVAL; | ||
91 | |||
92 | if (mask & REG_NOSUPPORT) | ||
93 | return -EINVAL; | ||
94 | |||
95 | return 0; | ||
96 | } | ||
97 | |||
98 | u64 perf_reg_abi(struct task_struct *task) | ||
99 | { | ||
100 | if (test_tsk_thread_flag(task, TIF_IA32)) | ||
101 | return PERF_SAMPLE_REGS_ABI_32; | ||
102 | else | ||
103 | return PERF_SAMPLE_REGS_ABI_64; | ||
104 | } | ||
105 | #endif /* CONFIG_X86_32 */ | ||
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 03920a15a632..1b27de563561 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c | |||
@@ -512,7 +512,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, | |||
512 | 512 | ||
513 | #if defined(CONFIG_PCI) && defined(CONFIG_NUMA) | 513 | #if defined(CONFIG_PCI) && defined(CONFIG_NUMA) |
514 | /* Set correct numa_node information for AMD NB functions */ | 514 | /* Set correct numa_node information for AMD NB functions */ |
515 | static void __init quirk_amd_nb_node(struct pci_dev *dev) | 515 | static void __devinit quirk_amd_nb_node(struct pci_dev *dev) |
516 | { | 516 | { |
517 | struct pci_dev *nb_ht; | 517 | struct pci_dev *nb_ht; |
518 | unsigned int devfn; | 518 | unsigned int devfn; |
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 5a98aa272184..5cdff0357746 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -21,7 +21,7 @@ | |||
21 | #include <asm/cpu.h> | 21 | #include <asm/cpu.h> |
22 | #include <asm/stackprotector.h> | 22 | #include <asm/stackprotector.h> |
23 | 23 | ||
24 | DEFINE_PER_CPU(int, cpu_number); | 24 | DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number); |
25 | EXPORT_PER_CPU_SYMBOL(cpu_number); | 25 | EXPORT_PER_CPU_SYMBOL(cpu_number); |
26 | 26 | ||
27 | #ifdef CONFIG_X86_64 | 27 | #ifdef CONFIG_X86_64 |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c1a310fb8309..7c5a8c314c02 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -106,17 +106,17 @@ int smp_num_siblings = 1; | |||
106 | EXPORT_SYMBOL(smp_num_siblings); | 106 | EXPORT_SYMBOL(smp_num_siblings); |
107 | 107 | ||
108 | /* Last level cache ID of each logical CPU */ | 108 | /* Last level cache ID of each logical CPU */ |
109 | DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID; | 109 | DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID; |
110 | 110 | ||
111 | /* representing HT siblings of each logical CPU */ | 111 | /* representing HT siblings of each logical CPU */ |
112 | DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); | 112 | DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); |
113 | EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); | 113 | EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); |
114 | 114 | ||
115 | /* representing HT and core siblings of each logical CPU */ | 115 | /* representing HT and core siblings of each logical CPU */ |
116 | DEFINE_PER_CPU(cpumask_var_t, cpu_core_map); | 116 | DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map); |
117 | EXPORT_PER_CPU_SYMBOL(cpu_core_map); | 117 | EXPORT_PER_CPU_SYMBOL(cpu_core_map); |
118 | 118 | ||
119 | DEFINE_PER_CPU(cpumask_var_t, cpu_llc_shared_map); | 119 | DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); |
120 | 120 | ||
121 | /* Per CPU bogomips and other parameters */ | 121 | /* Per CPU bogomips and other parameters */ |
122 | DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); | 122 | DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); |