diff options
author | H. Peter Anvin <hpa@linux.intel.com> | 2013-01-25 19:31:21 -0500 |
---|---|---|
committer | H. Peter Anvin <hpa@linux.intel.com> | 2013-01-25 19:31:21 -0500 |
commit | 7b5c4a65cc27f017c170b025f8d6d75dabb11c6f (patch) | |
tree | 05deacbc66a9f5c27147a6ea975211ae82281044 /arch/x86/kernel | |
parent | 3596f5bb0a6afd01a784bfe120f420edbbf82861 (diff) | |
parent | 949db153b6466c6f7cad5a427ecea94985927311 (diff) |
Merge tag 'v3.8-rc5' into x86/mm
The __pa() fixup series that follows touches KVM code that is not
present in the existing branch based on v3.7-rc5, so merge in the
current upstream from Linus.
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch/x86/kernel')
55 files changed, 1297 insertions, 797 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 91ce48f05f9f..34e923a53762 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -9,7 +9,6 @@ CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) | |||
9 | ifdef CONFIG_FUNCTION_TRACER | 9 | ifdef CONFIG_FUNCTION_TRACER |
10 | # Do not profile debug and lowlevel utilities | 10 | # Do not profile debug and lowlevel utilities |
11 | CFLAGS_REMOVE_tsc.o = -pg | 11 | CFLAGS_REMOVE_tsc.o = -pg |
12 | CFLAGS_REMOVE_rtc.o = -pg | ||
13 | CFLAGS_REMOVE_paravirt-spinlocks.o = -pg | 12 | CFLAGS_REMOVE_paravirt-spinlocks.o = -pg |
14 | CFLAGS_REMOVE_pvclock.o = -pg | 13 | CFLAGS_REMOVE_pvclock.o = -pg |
15 | CFLAGS_REMOVE_kvmclock.o = -pg | 14 | CFLAGS_REMOVE_kvmclock.o = -pg |
@@ -62,6 +61,7 @@ obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o | |||
62 | obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o | 61 | obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o |
63 | obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o | 62 | obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o |
64 | obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o | 63 | obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o |
64 | obj-$(CONFIG_X86_TSC) += trace_clock.o | ||
65 | obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o | 65 | obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o |
66 | obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o | 66 | obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o |
67 | obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o | 67 | obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index e651f7a589ac..bacf4b0d91f4 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -574,6 +574,12 @@ int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) | |||
574 | 574 | ||
575 | return irq; | 575 | return irq; |
576 | } | 576 | } |
577 | EXPORT_SYMBOL_GPL(acpi_register_gsi); | ||
578 | |||
579 | void acpi_unregister_gsi(u32 gsi) | ||
580 | { | ||
581 | } | ||
582 | EXPORT_SYMBOL_GPL(acpi_unregister_gsi); | ||
577 | 583 | ||
578 | void __init acpi_set_irq_model_pic(void) | 584 | void __init acpi_set_irq_model_pic(void) |
579 | { | 585 | { |
@@ -1700,3 +1706,9 @@ int __acpi_release_global_lock(unsigned int *lock) | |||
1700 | } while (unlikely (val != old)); | 1706 | } while (unlikely (val != old)); |
1701 | return old & 0x1; | 1707 | return old & 0x1; |
1702 | } | 1708 | } |
1709 | |||
1710 | void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size) | ||
1711 | { | ||
1712 | e820_add_region(addr, size, E820_ACPI); | ||
1713 | update_e820(); | ||
1714 | } | ||
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index f146a3c10814..0532f5d6e4ef 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c | |||
@@ -101,6 +101,8 @@ static int __init acpi_sleep_setup(char *str) | |||
101 | #endif | 101 | #endif |
102 | if (strncmp(str, "nonvs", 5) == 0) | 102 | if (strncmp(str, "nonvs", 5) == 0) |
103 | acpi_nvs_nosave(); | 103 | acpi_nvs_nosave(); |
104 | if (strncmp(str, "nonvs_s3", 8) == 0) | ||
105 | acpi_nvs_nosave_s3(); | ||
104 | if (strncmp(str, "old_ordering", 12) == 0) | 106 | if (strncmp(str, "old_ordering", 12) == 0) |
105 | acpi_old_suspend_ordering(); | 107 | acpi_old_suspend_ordering(); |
106 | str = strchr(str, ','); | 108 | str = strchr(str, ','); |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index b17416e72fbd..b994cc84aa7e 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -90,21 +90,6 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); | |||
90 | */ | 90 | */ |
91 | DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID); | 91 | DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID); |
92 | 92 | ||
93 | /* | ||
94 | * Knob to control our willingness to enable the local APIC. | ||
95 | * | ||
96 | * +1=force-enable | ||
97 | */ | ||
98 | static int force_enable_local_apic __initdata; | ||
99 | /* | ||
100 | * APIC command line parameters | ||
101 | */ | ||
102 | static int __init parse_lapic(char *arg) | ||
103 | { | ||
104 | force_enable_local_apic = 1; | ||
105 | return 0; | ||
106 | } | ||
107 | early_param("lapic", parse_lapic); | ||
108 | /* Local APIC was disabled by the BIOS and enabled by the kernel */ | 93 | /* Local APIC was disabled by the BIOS and enabled by the kernel */ |
109 | static int enabled_via_apicbase; | 94 | static int enabled_via_apicbase; |
110 | 95 | ||
@@ -133,6 +118,25 @@ static inline void imcr_apic_to_pic(void) | |||
133 | } | 118 | } |
134 | #endif | 119 | #endif |
135 | 120 | ||
121 | /* | ||
122 | * Knob to control our willingness to enable the local APIC. | ||
123 | * | ||
124 | * +1=force-enable | ||
125 | */ | ||
126 | static int force_enable_local_apic __initdata; | ||
127 | /* | ||
128 | * APIC command line parameters | ||
129 | */ | ||
130 | static int __init parse_lapic(char *arg) | ||
131 | { | ||
132 | if (config_enabled(CONFIG_X86_32) && !arg) | ||
133 | force_enable_local_apic = 1; | ||
134 | else if (!strncmp(arg, "notscdeadline", 13)) | ||
135 | setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); | ||
136 | return 0; | ||
137 | } | ||
138 | early_param("lapic", parse_lapic); | ||
139 | |||
136 | #ifdef CONFIG_X86_64 | 140 | #ifdef CONFIG_X86_64 |
137 | static int apic_calibrate_pmtmr __initdata; | 141 | static int apic_calibrate_pmtmr __initdata; |
138 | static __init int setup_apicpmtimer(char *s) | 142 | static __init int setup_apicpmtimer(char *s) |
@@ -315,6 +319,7 @@ int lapic_get_maxlvt(void) | |||
315 | 319 | ||
316 | /* Clock divisor */ | 320 | /* Clock divisor */ |
317 | #define APIC_DIVISOR 16 | 321 | #define APIC_DIVISOR 16 |
322 | #define TSC_DIVISOR 32 | ||
318 | 323 | ||
319 | /* | 324 | /* |
320 | * This function sets up the local APIC timer, with a timeout of | 325 | * This function sets up the local APIC timer, with a timeout of |
@@ -333,6 +338,9 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) | |||
333 | lvtt_value = LOCAL_TIMER_VECTOR; | 338 | lvtt_value = LOCAL_TIMER_VECTOR; |
334 | if (!oneshot) | 339 | if (!oneshot) |
335 | lvtt_value |= APIC_LVT_TIMER_PERIODIC; | 340 | lvtt_value |= APIC_LVT_TIMER_PERIODIC; |
341 | else if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) | ||
342 | lvtt_value |= APIC_LVT_TIMER_TSCDEADLINE; | ||
343 | |||
336 | if (!lapic_is_integrated()) | 344 | if (!lapic_is_integrated()) |
337 | lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); | 345 | lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); |
338 | 346 | ||
@@ -341,6 +349,11 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) | |||
341 | 349 | ||
342 | apic_write(APIC_LVTT, lvtt_value); | 350 | apic_write(APIC_LVTT, lvtt_value); |
343 | 351 | ||
352 | if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) { | ||
353 | printk_once(KERN_DEBUG "TSC deadline timer enabled\n"); | ||
354 | return; | ||
355 | } | ||
356 | |||
344 | /* | 357 | /* |
345 | * Divide PICLK by 16 | 358 | * Divide PICLK by 16 |
346 | */ | 359 | */ |
@@ -453,6 +466,16 @@ static int lapic_next_event(unsigned long delta, | |||
453 | return 0; | 466 | return 0; |
454 | } | 467 | } |
455 | 468 | ||
469 | static int lapic_next_deadline(unsigned long delta, | ||
470 | struct clock_event_device *evt) | ||
471 | { | ||
472 | u64 tsc; | ||
473 | |||
474 | rdtscll(tsc); | ||
475 | wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR)); | ||
476 | return 0; | ||
477 | } | ||
478 | |||
456 | /* | 479 | /* |
457 | * Setup the lapic timer in periodic or oneshot mode | 480 | * Setup the lapic timer in periodic or oneshot mode |
458 | */ | 481 | */ |
@@ -533,7 +556,15 @@ static void __cpuinit setup_APIC_timer(void) | |||
533 | memcpy(levt, &lapic_clockevent, sizeof(*levt)); | 556 | memcpy(levt, &lapic_clockevent, sizeof(*levt)); |
534 | levt->cpumask = cpumask_of(smp_processor_id()); | 557 | levt->cpumask = cpumask_of(smp_processor_id()); |
535 | 558 | ||
536 | clockevents_register_device(levt); | 559 | if (this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) { |
560 | levt->features &= ~(CLOCK_EVT_FEAT_PERIODIC | | ||
561 | CLOCK_EVT_FEAT_DUMMY); | ||
562 | levt->set_next_event = lapic_next_deadline; | ||
563 | clockevents_config_and_register(levt, | ||
564 | (tsc_khz / TSC_DIVISOR) * 1000, | ||
565 | 0xF, ~0UL); | ||
566 | } else | ||
567 | clockevents_register_device(levt); | ||
537 | } | 568 | } |
538 | 569 | ||
539 | /* | 570 | /* |
@@ -661,7 +692,9 @@ static int __init calibrate_APIC_clock(void) | |||
661 | * in the clockevent structure and return. | 692 | * in the clockevent structure and return. |
662 | */ | 693 | */ |
663 | 694 | ||
664 | if (lapic_timer_frequency) { | 695 | if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) { |
696 | return 0; | ||
697 | } else if (lapic_timer_frequency) { | ||
665 | apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n", | 698 | apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n", |
666 | lapic_timer_frequency); | 699 | lapic_timer_frequency); |
667 | lapic_clockevent.mult = div_sc(lapic_timer_frequency/APIC_DIVISOR, | 700 | lapic_clockevent.mult = div_sc(lapic_timer_frequency/APIC_DIVISOR, |
@@ -674,6 +707,9 @@ static int __init calibrate_APIC_clock(void) | |||
674 | return 0; | 707 | return 0; |
675 | } | 708 | } |
676 | 709 | ||
710 | apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" | ||
711 | "calibrating APIC timer ...\n"); | ||
712 | |||
677 | local_irq_disable(); | 713 | local_irq_disable(); |
678 | 714 | ||
679 | /* Replace the global interrupt handler */ | 715 | /* Replace the global interrupt handler */ |
@@ -811,9 +847,6 @@ void __init setup_boot_APIC_clock(void) | |||
811 | return; | 847 | return; |
812 | } | 848 | } |
813 | 849 | ||
814 | apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" | ||
815 | "calibrating APIC timer ...\n"); | ||
816 | |||
817 | if (calibrate_APIC_clock()) { | 850 | if (calibrate_APIC_clock()) { |
818 | /* No broadcast on UP ! */ | 851 | /* No broadcast on UP ! */ |
819 | if (num_possible_cpus() > 1) | 852 | if (num_possible_cpus() > 1) |
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index ae9196f31261..9a9110918ca7 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/hardirq.h> | 22 | #include <linux/hardirq.h> |
23 | #include <linux/delay.h> | 23 | #include <linux/delay.h> |
24 | 24 | ||
25 | #include <asm/numachip/numachip.h> | ||
25 | #include <asm/numachip/numachip_csr.h> | 26 | #include <asm/numachip/numachip_csr.h> |
26 | #include <asm/smp.h> | 27 | #include <asm/smp.h> |
27 | #include <asm/apic.h> | 28 | #include <asm/apic.h> |
@@ -180,6 +181,7 @@ static int __init numachip_system_init(void) | |||
180 | return 0; | 181 | return 0; |
181 | 182 | ||
182 | x86_cpuinit.fixup_cpu_id = fixup_cpu_id; | 183 | x86_cpuinit.fixup_cpu_id = fixup_cpu_id; |
184 | x86_init.pci.arch_init = pci_numachip_init; | ||
183 | 185 | ||
184 | map_csrs(); | 186 | map_csrs(); |
185 | 187 | ||
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 1817fa911024..b739d398bb29 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -234,11 +234,11 @@ int __init arch_early_irq_init(void) | |||
234 | zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node); | 234 | zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node); |
235 | /* | 235 | /* |
236 | * For legacy IRQ's, start with assigning irq0 to irq15 to | 236 | * For legacy IRQ's, start with assigning irq0 to irq15 to |
237 | * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0. | 237 | * IRQ0_VECTOR to IRQ15_VECTOR for all cpu's. |
238 | */ | 238 | */ |
239 | if (i < legacy_pic->nr_legacy_irqs) { | 239 | if (i < legacy_pic->nr_legacy_irqs) { |
240 | cfg[i].vector = IRQ0_VECTOR + i; | 240 | cfg[i].vector = IRQ0_VECTOR + i; |
241 | cpumask_set_cpu(0, cfg[i].domain); | 241 | cpumask_setall(cfg[i].domain); |
242 | } | 242 | } |
243 | } | 243 | } |
244 | 244 | ||
@@ -1141,7 +1141,8 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) | |||
1141 | * allocation for the members that are not used anymore. | 1141 | * allocation for the members that are not used anymore. |
1142 | */ | 1142 | */ |
1143 | cpumask_andnot(cfg->old_domain, cfg->domain, tmp_mask); | 1143 | cpumask_andnot(cfg->old_domain, cfg->domain, tmp_mask); |
1144 | cfg->move_in_progress = 1; | 1144 | cfg->move_in_progress = |
1145 | cpumask_intersects(cfg->old_domain, cpu_online_mask); | ||
1145 | cpumask_and(cfg->domain, cfg->domain, tmp_mask); | 1146 | cpumask_and(cfg->domain, cfg->domain, tmp_mask); |
1146 | break; | 1147 | break; |
1147 | } | 1148 | } |
@@ -1172,8 +1173,9 @@ next: | |||
1172 | current_vector = vector; | 1173 | current_vector = vector; |
1173 | current_offset = offset; | 1174 | current_offset = offset; |
1174 | if (cfg->vector) { | 1175 | if (cfg->vector) { |
1175 | cfg->move_in_progress = 1; | ||
1176 | cpumask_copy(cfg->old_domain, cfg->domain); | 1176 | cpumask_copy(cfg->old_domain, cfg->domain); |
1177 | cfg->move_in_progress = | ||
1178 | cpumask_intersects(cfg->old_domain, cpu_online_mask); | ||
1177 | } | 1179 | } |
1178 | for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) | 1180 | for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) |
1179 | per_cpu(vector_irq, new_cpu)[vector] = irq; | 1181 | per_cpu(vector_irq, new_cpu)[vector] = irq; |
@@ -1241,12 +1243,6 @@ void __setup_vector_irq(int cpu) | |||
1241 | cfg = irq_get_chip_data(irq); | 1243 | cfg = irq_get_chip_data(irq); |
1242 | if (!cfg) | 1244 | if (!cfg) |
1243 | continue; | 1245 | continue; |
1244 | /* | ||
1245 | * If it is a legacy IRQ handled by the legacy PIC, this cpu | ||
1246 | * will be part of the irq_cfg's domain. | ||
1247 | */ | ||
1248 | if (irq < legacy_pic->nr_legacy_irqs && !IO_APIC_IRQ(irq)) | ||
1249 | cpumask_set_cpu(cpu, cfg->domain); | ||
1250 | 1246 | ||
1251 | if (!cpumask_test_cpu(cpu, cfg->domain)) | 1247 | if (!cpumask_test_cpu(cpu, cfg->domain)) |
1252 | continue; | 1248 | continue; |
@@ -1356,16 +1352,6 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg, | |||
1356 | if (!IO_APIC_IRQ(irq)) | 1352 | if (!IO_APIC_IRQ(irq)) |
1357 | return; | 1353 | return; |
1358 | 1354 | ||
1359 | /* | ||
1360 | * For legacy irqs, cfg->domain starts with cpu 0. Now that IO-APIC | ||
1361 | * can handle this irq and the apic driver is finialized at this point, | ||
1362 | * update the cfg->domain. | ||
1363 | */ | ||
1364 | if (irq < legacy_pic->nr_legacy_irqs && | ||
1365 | cpumask_equal(cfg->domain, cpumask_of(0))) | ||
1366 | apic->vector_allocation_domain(0, cfg->domain, | ||
1367 | apic->target_cpus()); | ||
1368 | |||
1369 | if (assign_irq_vector(irq, cfg, apic->target_cpus())) | 1355 | if (assign_irq_vector(irq, cfg, apic->target_cpus())) |
1370 | return; | 1356 | return; |
1371 | 1357 | ||
@@ -2199,9 +2185,11 @@ static int ioapic_retrigger_irq(struct irq_data *data) | |||
2199 | { | 2185 | { |
2200 | struct irq_cfg *cfg = data->chip_data; | 2186 | struct irq_cfg *cfg = data->chip_data; |
2201 | unsigned long flags; | 2187 | unsigned long flags; |
2188 | int cpu; | ||
2202 | 2189 | ||
2203 | raw_spin_lock_irqsave(&vector_lock, flags); | 2190 | raw_spin_lock_irqsave(&vector_lock, flags); |
2204 | apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); | 2191 | cpu = cpumask_first_and(cfg->domain, cpu_online_mask); |
2192 | apic->send_IPI_mask(cpumask_of(cpu), cfg->vector); | ||
2205 | raw_spin_unlock_irqrestore(&vector_lock, flags); | 2193 | raw_spin_unlock_irqrestore(&vector_lock, flags); |
2206 | 2194 | ||
2207 | return 1; | 2195 | return 1; |
@@ -3317,8 +3305,9 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id) | |||
3317 | int ret; | 3305 | int ret; |
3318 | 3306 | ||
3319 | if (irq_remapping_enabled) { | 3307 | if (irq_remapping_enabled) { |
3320 | if (!setup_hpet_msi_remapped(irq, id)) | 3308 | ret = setup_hpet_msi_remapped(irq, id); |
3321 | return -1; | 3309 | if (ret) |
3310 | return ret; | ||
3322 | } | 3311 | } |
3323 | 3312 | ||
3324 | ret = msi_compose_msg(NULL, irq, &msg, id); | 3313 | ret = msi_compose_msg(NULL, irq, &msg, id); |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index f7e98a2c0d12..15239fffd6fe 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -304,7 +304,7 @@ static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c) | |||
304 | int cpu = smp_processor_id(); | 304 | int cpu = smp_processor_id(); |
305 | 305 | ||
306 | /* get information required for multi-node processors */ | 306 | /* get information required for multi-node processors */ |
307 | if (cpu_has(c, X86_FEATURE_TOPOEXT)) { | 307 | if (cpu_has_topoext) { |
308 | u32 eax, ebx, ecx, edx; | 308 | u32 eax, ebx, ecx, edx; |
309 | 309 | ||
310 | cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); | 310 | cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); |
@@ -631,6 +631,20 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
631 | } | 631 | } |
632 | } | 632 | } |
633 | 633 | ||
634 | /* | ||
635 | * The way access filter has a performance penalty on some workloads. | ||
636 | * Disable it on the affected CPUs. | ||
637 | */ | ||
638 | if ((c->x86 == 0x15) && | ||
639 | (c->x86_model >= 0x02) && (c->x86_model < 0x20)) { | ||
640 | u64 val; | ||
641 | |||
642 | if (!rdmsrl_safe(0xc0011021, &val) && !(val & 0x1E)) { | ||
643 | val |= 0x1E; | ||
644 | wrmsrl_safe(0xc0011021, val); | ||
645 | } | ||
646 | } | ||
647 | |||
634 | cpu_detect_cache_sizes(c); | 648 | cpu_detect_cache_sizes(c); |
635 | 649 | ||
636 | /* Multi core CPU? */ | 650 | /* Multi core CPU? */ |
@@ -643,12 +657,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
643 | detect_ht(c); | 657 | detect_ht(c); |
644 | #endif | 658 | #endif |
645 | 659 | ||
646 | if (c->extended_cpuid_level >= 0x80000006) { | 660 | init_amd_cacheinfo(c); |
647 | if (cpuid_edx(0x80000006) & 0xf000) | ||
648 | num_cache_leaves = 4; | ||
649 | else | ||
650 | num_cache_leaves = 3; | ||
651 | } | ||
652 | 661 | ||
653 | if (c->x86 >= 0xf) | 662 | if (c->x86 >= 0xf) |
654 | set_cpu_cap(c, X86_FEATURE_K8); | 663 | set_cpu_cap(c, X86_FEATURE_K8); |
@@ -739,9 +748,6 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, | |||
739 | 748 | ||
740 | static void __cpuinit cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c) | 749 | static void __cpuinit cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c) |
741 | { | 750 | { |
742 | if (!cpu_has_invlpg) | ||
743 | return; | ||
744 | |||
745 | tlb_flushall_shift = 5; | 751 | tlb_flushall_shift = 5; |
746 | 752 | ||
747 | if (c->x86 <= 0x11) | 753 | if (c->x86 <= 0x11) |
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d0e910da16c5..92dfec986a48 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c | |||
@@ -107,53 +107,17 @@ static void __init check_hlt(void) | |||
107 | } | 107 | } |
108 | 108 | ||
109 | /* | 109 | /* |
110 | * Most 386 processors have a bug where a POPAD can lock the | ||
111 | * machine even from user space. | ||
112 | */ | ||
113 | |||
114 | static void __init check_popad(void) | ||
115 | { | ||
116 | #ifndef CONFIG_X86_POPAD_OK | ||
117 | int res, inp = (int) &res; | ||
118 | |||
119 | pr_info("Checking for popad bug... "); | ||
120 | __asm__ __volatile__( | ||
121 | "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx " | ||
122 | : "=&a" (res) | ||
123 | : "d" (inp) | ||
124 | : "ecx", "edi"); | ||
125 | /* | ||
126 | * If this fails, it means that any user program may lock the | ||
127 | * CPU hard. Too bad. | ||
128 | */ | ||
129 | if (res != 12345678) | ||
130 | pr_cont("Buggy\n"); | ||
131 | else | ||
132 | pr_cont("OK\n"); | ||
133 | #endif | ||
134 | } | ||
135 | |||
136 | /* | ||
137 | * Check whether we are able to run this kernel safely on SMP. | 110 | * Check whether we are able to run this kernel safely on SMP. |
138 | * | 111 | * |
139 | * - In order to run on a i386, we need to be compiled for i386 | 112 | * - i386 is no longer supported. |
140 | * (for due to lack of "invlpg" and working WP on a i386) | ||
141 | * - In order to run on anything without a TSC, we need to be | 113 | * - In order to run on anything without a TSC, we need to be |
142 | * compiled for a i486. | 114 | * compiled for a i486. |
143 | */ | 115 | */ |
144 | 116 | ||
145 | static void __init check_config(void) | 117 | static void __init check_config(void) |
146 | { | 118 | { |
147 | /* | 119 | if (boot_cpu_data.x86 < 4) |
148 | * We'd better not be a i386 if we're configured to use some | ||
149 | * i486+ only features! (WP works in supervisor mode and the | ||
150 | * new "invlpg" and "bswap" instructions) | ||
151 | */ | ||
152 | #if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || \ | ||
153 | defined(CONFIG_X86_BSWAP) | ||
154 | if (boot_cpu_data.x86 == 3) | ||
155 | panic("Kernel requires i486+ for 'invlpg' and other features"); | 120 | panic("Kernel requires i486+ for 'invlpg' and other features"); |
156 | #endif | ||
157 | } | 121 | } |
158 | 122 | ||
159 | 123 | ||
@@ -166,7 +130,6 @@ void __init check_bugs(void) | |||
166 | #endif | 130 | #endif |
167 | check_config(); | 131 | check_config(); |
168 | check_hlt(); | 132 | check_hlt(); |
169 | check_popad(); | ||
170 | init_utsname()->machine[1] = | 133 | init_utsname()->machine[1] = |
171 | '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); | 134 | '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); |
172 | alternative_instructions(); | 135 | alternative_instructions(); |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7505f7b13e71..9c3ab43a6954 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -1173,15 +1173,6 @@ DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); | |||
1173 | DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); | 1173 | DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); |
1174 | #endif | 1174 | #endif |
1175 | 1175 | ||
1176 | /* Make sure %fs and %gs are initialized properly in idle threads */ | ||
1177 | struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) | ||
1178 | { | ||
1179 | memset(regs, 0, sizeof(struct pt_regs)); | ||
1180 | regs->fs = __KERNEL_PERCPU; | ||
1181 | regs->gs = __KERNEL_STACK_CANARY; | ||
1182 | |||
1183 | return regs; | ||
1184 | } | ||
1185 | #endif /* CONFIG_X86_64 */ | 1176 | #endif /* CONFIG_X86_64 */ |
1186 | 1177 | ||
1187 | /* | 1178 | /* |
@@ -1237,7 +1228,7 @@ void __cpuinit cpu_init(void) | |||
1237 | oist = &per_cpu(orig_ist, cpu); | 1228 | oist = &per_cpu(orig_ist, cpu); |
1238 | 1229 | ||
1239 | #ifdef CONFIG_NUMA | 1230 | #ifdef CONFIG_NUMA |
1240 | if (cpu != 0 && this_cpu_read(numa_node) == 0 && | 1231 | if (this_cpu_read(numa_node) == 0 && |
1241 | early_cpu_to_node(cpu) != NUMA_NO_NODE) | 1232 | early_cpu_to_node(cpu) != NUMA_NO_NODE) |
1242 | set_numa_node(early_cpu_to_node(cpu)); | 1233 | set_numa_node(early_cpu_to_node(cpu)); |
1243 | #endif | 1234 | #endif |
@@ -1269,8 +1260,7 @@ void __cpuinit cpu_init(void) | |||
1269 | barrier(); | 1260 | barrier(); |
1270 | 1261 | ||
1271 | x86_configure_nx(); | 1262 | x86_configure_nx(); |
1272 | if (cpu != 0) | 1263 | enable_x2apic(); |
1273 | enable_x2apic(); | ||
1274 | 1264 | ||
1275 | /* | 1265 | /* |
1276 | * set up and load the per-CPU TSS | 1266 | * set up and load the per-CPU TSS |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 2249e7e44521..fdfefa27b948 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -612,10 +612,6 @@ static void __cpuinit intel_tlb_lookup(const unsigned char desc) | |||
612 | 612 | ||
613 | static void __cpuinit intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c) | 613 | static void __cpuinit intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c) |
614 | { | 614 | { |
615 | if (!cpu_has_invlpg) { | ||
616 | tlb_flushall_shift = -1; | ||
617 | return; | ||
618 | } | ||
619 | switch ((c->x86 << 8) + c->x86_model) { | 615 | switch ((c->x86 << 8) + c->x86_model) { |
620 | case 0x60f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ | 616 | case 0x60f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ |
621 | case 0x616: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ | 617 | case 0x616: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 93c5451bdd52..fe9edec6698a 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -538,7 +538,11 @@ __cpuinit cpuid4_cache_lookup_regs(int index, | |||
538 | unsigned edx; | 538 | unsigned edx; |
539 | 539 | ||
540 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { | 540 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { |
541 | amd_cpuid4(index, &eax, &ebx, &ecx); | 541 | if (cpu_has_topoext) |
542 | cpuid_count(0x8000001d, index, &eax.full, | ||
543 | &ebx.full, &ecx.full, &edx); | ||
544 | else | ||
545 | amd_cpuid4(index, &eax, &ebx, &ecx); | ||
542 | amd_init_l3_cache(this_leaf, index); | 546 | amd_init_l3_cache(this_leaf, index); |
543 | } else { | 547 | } else { |
544 | cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); | 548 | cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); |
@@ -557,21 +561,39 @@ __cpuinit cpuid4_cache_lookup_regs(int index, | |||
557 | return 0; | 561 | return 0; |
558 | } | 562 | } |
559 | 563 | ||
560 | static int __cpuinit find_num_cache_leaves(void) | 564 | static int __cpuinit find_num_cache_leaves(struct cpuinfo_x86 *c) |
561 | { | 565 | { |
562 | unsigned int eax, ebx, ecx, edx; | 566 | unsigned int eax, ebx, ecx, edx, op; |
563 | union _cpuid4_leaf_eax cache_eax; | 567 | union _cpuid4_leaf_eax cache_eax; |
564 | int i = -1; | 568 | int i = -1; |
565 | 569 | ||
570 | if (c->x86_vendor == X86_VENDOR_AMD) | ||
571 | op = 0x8000001d; | ||
572 | else | ||
573 | op = 4; | ||
574 | |||
566 | do { | 575 | do { |
567 | ++i; | 576 | ++i; |
568 | /* Do cpuid(4) loop to find out num_cache_leaves */ | 577 | /* Do cpuid(op) loop to find out num_cache_leaves */ |
569 | cpuid_count(4, i, &eax, &ebx, &ecx, &edx); | 578 | cpuid_count(op, i, &eax, &ebx, &ecx, &edx); |
570 | cache_eax.full = eax; | 579 | cache_eax.full = eax; |
571 | } while (cache_eax.split.type != CACHE_TYPE_NULL); | 580 | } while (cache_eax.split.type != CACHE_TYPE_NULL); |
572 | return i; | 581 | return i; |
573 | } | 582 | } |
574 | 583 | ||
584 | void __cpuinit init_amd_cacheinfo(struct cpuinfo_x86 *c) | ||
585 | { | ||
586 | |||
587 | if (cpu_has_topoext) { | ||
588 | num_cache_leaves = find_num_cache_leaves(c); | ||
589 | } else if (c->extended_cpuid_level >= 0x80000006) { | ||
590 | if (cpuid_edx(0x80000006) & 0xf000) | ||
591 | num_cache_leaves = 4; | ||
592 | else | ||
593 | num_cache_leaves = 3; | ||
594 | } | ||
595 | } | ||
596 | |||
575 | unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) | 597 | unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) |
576 | { | 598 | { |
577 | /* Cache sizes */ | 599 | /* Cache sizes */ |
@@ -588,7 +610,7 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) | |||
588 | 610 | ||
589 | if (is_initialized == 0) { | 611 | if (is_initialized == 0) { |
590 | /* Init num_cache_leaves from boot CPU */ | 612 | /* Init num_cache_leaves from boot CPU */ |
591 | num_cache_leaves = find_num_cache_leaves(); | 613 | num_cache_leaves = find_num_cache_leaves(c); |
592 | is_initialized++; | 614 | is_initialized++; |
593 | } | 615 | } |
594 | 616 | ||
@@ -728,37 +750,50 @@ static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info); | |||
728 | static int __cpuinit cache_shared_amd_cpu_map_setup(unsigned int cpu, int index) | 750 | static int __cpuinit cache_shared_amd_cpu_map_setup(unsigned int cpu, int index) |
729 | { | 751 | { |
730 | struct _cpuid4_info *this_leaf; | 752 | struct _cpuid4_info *this_leaf; |
731 | int ret, i, sibling; | 753 | int i, sibling; |
732 | struct cpuinfo_x86 *c = &cpu_data(cpu); | ||
733 | 754 | ||
734 | ret = 0; | 755 | if (cpu_has_topoext) { |
735 | if (index == 3) { | 756 | unsigned int apicid, nshared, first, last; |
736 | ret = 1; | 757 | |
737 | for_each_cpu(i, cpu_llc_shared_mask(cpu)) { | 758 | if (!per_cpu(ici_cpuid4_info, cpu)) |
759 | return 0; | ||
760 | |||
761 | this_leaf = CPUID4_INFO_IDX(cpu, index); | ||
762 | nshared = this_leaf->base.eax.split.num_threads_sharing + 1; | ||
763 | apicid = cpu_data(cpu).apicid; | ||
764 | first = apicid - (apicid % nshared); | ||
765 | last = first + nshared - 1; | ||
766 | |||
767 | for_each_online_cpu(i) { | ||
768 | apicid = cpu_data(i).apicid; | ||
769 | if ((apicid < first) || (apicid > last)) | ||
770 | continue; | ||
738 | if (!per_cpu(ici_cpuid4_info, i)) | 771 | if (!per_cpu(ici_cpuid4_info, i)) |
739 | continue; | 772 | continue; |
740 | this_leaf = CPUID4_INFO_IDX(i, index); | 773 | this_leaf = CPUID4_INFO_IDX(i, index); |
741 | for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) { | 774 | |
742 | if (!cpu_online(sibling)) | 775 | for_each_online_cpu(sibling) { |
776 | apicid = cpu_data(sibling).apicid; | ||
777 | if ((apicid < first) || (apicid > last)) | ||
743 | continue; | 778 | continue; |
744 | set_bit(sibling, this_leaf->shared_cpu_map); | 779 | set_bit(sibling, this_leaf->shared_cpu_map); |
745 | } | 780 | } |
746 | } | 781 | } |
747 | } else if ((c->x86 == 0x15) && ((index == 1) || (index == 2))) { | 782 | } else if (index == 3) { |
748 | ret = 1; | 783 | for_each_cpu(i, cpu_llc_shared_mask(cpu)) { |
749 | for_each_cpu(i, cpu_sibling_mask(cpu)) { | ||
750 | if (!per_cpu(ici_cpuid4_info, i)) | 784 | if (!per_cpu(ici_cpuid4_info, i)) |
751 | continue; | 785 | continue; |
752 | this_leaf = CPUID4_INFO_IDX(i, index); | 786 | this_leaf = CPUID4_INFO_IDX(i, index); |
753 | for_each_cpu(sibling, cpu_sibling_mask(cpu)) { | 787 | for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) { |
754 | if (!cpu_online(sibling)) | 788 | if (!cpu_online(sibling)) |
755 | continue; | 789 | continue; |
756 | set_bit(sibling, this_leaf->shared_cpu_map); | 790 | set_bit(sibling, this_leaf->shared_cpu_map); |
757 | } | 791 | } |
758 | } | 792 | } |
759 | } | 793 | } else |
794 | return 0; | ||
760 | 795 | ||
761 | return ret; | 796 | return 1; |
762 | } | 797 | } |
763 | 798 | ||
764 | static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) | 799 | static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index 6a05c1d327a9..5b7d4fa5d3b7 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h | |||
@@ -24,8 +24,6 @@ struct mce_bank { | |||
24 | int mce_severity(struct mce *a, int tolerant, char **msg); | 24 | int mce_severity(struct mce *a, int tolerant, char **msg); |
25 | struct dentry *mce_get_debugfs_dir(void); | 25 | struct dentry *mce_get_debugfs_dir(void); |
26 | 26 | ||
27 | extern int mce_ser; | ||
28 | |||
29 | extern struct mce_bank *mce_banks; | 27 | extern struct mce_bank *mce_banks; |
30 | 28 | ||
31 | #ifdef CONFIG_X86_MCE_INTEL | 29 | #ifdef CONFIG_X86_MCE_INTEL |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index 13017626f9a8..beb1f1689e52 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c | |||
@@ -193,9 +193,9 @@ int mce_severity(struct mce *m, int tolerant, char **msg) | |||
193 | continue; | 193 | continue; |
194 | if ((m->mcgstatus & s->mcgmask) != s->mcgres) | 194 | if ((m->mcgstatus & s->mcgmask) != s->mcgres) |
195 | continue; | 195 | continue; |
196 | if (s->ser == SER_REQUIRED && !mce_ser) | 196 | if (s->ser == SER_REQUIRED && !mca_cfg.ser) |
197 | continue; | 197 | continue; |
198 | if (s->ser == NO_SER && mce_ser) | 198 | if (s->ser == NO_SER && mca_cfg.ser) |
199 | continue; | 199 | continue; |
200 | if (s->context && ctx != s->context) | 200 | if (s->context && ctx != s->context) |
201 | continue; | 201 | continue; |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 46cbf8689692..80dbda84f1c3 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -58,34 +58,26 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex); | |||
58 | #define CREATE_TRACE_POINTS | 58 | #define CREATE_TRACE_POINTS |
59 | #include <trace/events/mce.h> | 59 | #include <trace/events/mce.h> |
60 | 60 | ||
61 | int mce_disabled __read_mostly; | ||
62 | |||
63 | #define SPINUNIT 100 /* 100ns */ | 61 | #define SPINUNIT 100 /* 100ns */ |
64 | 62 | ||
65 | atomic_t mce_entry; | 63 | atomic_t mce_entry; |
66 | 64 | ||
67 | DEFINE_PER_CPU(unsigned, mce_exception_count); | 65 | DEFINE_PER_CPU(unsigned, mce_exception_count); |
68 | 66 | ||
69 | /* | 67 | struct mce_bank *mce_banks __read_mostly; |
70 | * Tolerant levels: | 68 | |
71 | * 0: always panic on uncorrected errors, log corrected errors | 69 | struct mca_config mca_cfg __read_mostly = { |
72 | * 1: panic or SIGBUS on uncorrected errors, log corrected errors | 70 | .bootlog = -1, |
73 | * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors | 71 | /* |
74 | * 3: never panic or SIGBUS, log all errors (for testing only) | 72 | * Tolerant levels: |
75 | */ | 73 | * 0: always panic on uncorrected errors, log corrected errors |
76 | static int tolerant __read_mostly = 1; | 74 | * 1: panic or SIGBUS on uncorrected errors, log corrected errors |
77 | static int banks __read_mostly; | 75 | * 2: SIGBUS or log uncorrected errors (if possible), log corr. errors |
78 | static int rip_msr __read_mostly; | 76 | * 3: never panic or SIGBUS, log all errors (for testing only) |
79 | static int mce_bootlog __read_mostly = -1; | 77 | */ |
80 | static int monarch_timeout __read_mostly = -1; | 78 | .tolerant = 1, |
81 | static int mce_panic_timeout __read_mostly; | 79 | .monarch_timeout = -1 |
82 | static int mce_dont_log_ce __read_mostly; | 80 | }; |
83 | int mce_cmci_disabled __read_mostly; | ||
84 | int mce_ignore_ce __read_mostly; | ||
85 | int mce_ser __read_mostly; | ||
86 | int mce_bios_cmci_threshold __read_mostly; | ||
87 | |||
88 | struct mce_bank *mce_banks __read_mostly; | ||
89 | 81 | ||
90 | /* User mode helper program triggered by machine check event */ | 82 | /* User mode helper program triggered by machine check event */ |
91 | static unsigned long mce_need_notify; | 83 | static unsigned long mce_need_notify; |
@@ -302,7 +294,7 @@ static void wait_for_panic(void) | |||
302 | while (timeout-- > 0) | 294 | while (timeout-- > 0) |
303 | udelay(1); | 295 | udelay(1); |
304 | if (panic_timeout == 0) | 296 | if (panic_timeout == 0) |
305 | panic_timeout = mce_panic_timeout; | 297 | panic_timeout = mca_cfg.panic_timeout; |
306 | panic("Panicing machine check CPU died"); | 298 | panic("Panicing machine check CPU died"); |
307 | } | 299 | } |
308 | 300 | ||
@@ -360,7 +352,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp) | |||
360 | pr_emerg(HW_ERR "Machine check: %s\n", exp); | 352 | pr_emerg(HW_ERR "Machine check: %s\n", exp); |
361 | if (!fake_panic) { | 353 | if (!fake_panic) { |
362 | if (panic_timeout == 0) | 354 | if (panic_timeout == 0) |
363 | panic_timeout = mce_panic_timeout; | 355 | panic_timeout = mca_cfg.panic_timeout; |
364 | panic(msg); | 356 | panic(msg); |
365 | } else | 357 | } else |
366 | pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg); | 358 | pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg); |
@@ -372,7 +364,7 @@ static int msr_to_offset(u32 msr) | |||
372 | { | 364 | { |
373 | unsigned bank = __this_cpu_read(injectm.bank); | 365 | unsigned bank = __this_cpu_read(injectm.bank); |
374 | 366 | ||
375 | if (msr == rip_msr) | 367 | if (msr == mca_cfg.rip_msr) |
376 | return offsetof(struct mce, ip); | 368 | return offsetof(struct mce, ip); |
377 | if (msr == MSR_IA32_MCx_STATUS(bank)) | 369 | if (msr == MSR_IA32_MCx_STATUS(bank)) |
378 | return offsetof(struct mce, status); | 370 | return offsetof(struct mce, status); |
@@ -451,8 +443,8 @@ static inline void mce_gather_info(struct mce *m, struct pt_regs *regs) | |||
451 | m->cs |= 3; | 443 | m->cs |= 3; |
452 | } | 444 | } |
453 | /* Use accurate RIP reporting if available. */ | 445 | /* Use accurate RIP reporting if available. */ |
454 | if (rip_msr) | 446 | if (mca_cfg.rip_msr) |
455 | m->ip = mce_rdmsrl(rip_msr); | 447 | m->ip = mce_rdmsrl(mca_cfg.rip_msr); |
456 | } | 448 | } |
457 | } | 449 | } |
458 | 450 | ||
@@ -513,7 +505,7 @@ static int mce_ring_add(unsigned long pfn) | |||
513 | 505 | ||
514 | int mce_available(struct cpuinfo_x86 *c) | 506 | int mce_available(struct cpuinfo_x86 *c) |
515 | { | 507 | { |
516 | if (mce_disabled) | 508 | if (mca_cfg.disabled) |
517 | return 0; | 509 | return 0; |
518 | return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); | 510 | return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); |
519 | } | 511 | } |
@@ -565,7 +557,7 @@ static void mce_read_aux(struct mce *m, int i) | |||
565 | /* | 557 | /* |
566 | * Mask the reported address by the reported granularity. | 558 | * Mask the reported address by the reported granularity. |
567 | */ | 559 | */ |
568 | if (mce_ser && (m->status & MCI_STATUS_MISCV)) { | 560 | if (mca_cfg.ser && (m->status & MCI_STATUS_MISCV)) { |
569 | u8 shift = MCI_MISC_ADDR_LSB(m->misc); | 561 | u8 shift = MCI_MISC_ADDR_LSB(m->misc); |
570 | m->addr >>= shift; | 562 | m->addr >>= shift; |
571 | m->addr <<= shift; | 563 | m->addr <<= shift; |
@@ -599,7 +591,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
599 | 591 | ||
600 | mce_gather_info(&m, NULL); | 592 | mce_gather_info(&m, NULL); |
601 | 593 | ||
602 | for (i = 0; i < banks; i++) { | 594 | for (i = 0; i < mca_cfg.banks; i++) { |
603 | if (!mce_banks[i].ctl || !test_bit(i, *b)) | 595 | if (!mce_banks[i].ctl || !test_bit(i, *b)) |
604 | continue; | 596 | continue; |
605 | 597 | ||
@@ -620,7 +612,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
620 | * TBD do the same check for MCI_STATUS_EN here? | 612 | * TBD do the same check for MCI_STATUS_EN here? |
621 | */ | 613 | */ |
622 | if (!(flags & MCP_UC) && | 614 | if (!(flags & MCP_UC) && |
623 | (m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC))) | 615 | (m.status & (mca_cfg.ser ? MCI_STATUS_S : MCI_STATUS_UC))) |
624 | continue; | 616 | continue; |
625 | 617 | ||
626 | mce_read_aux(&m, i); | 618 | mce_read_aux(&m, i); |
@@ -631,7 +623,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
631 | * Don't get the IP here because it's unlikely to | 623 | * Don't get the IP here because it's unlikely to |
632 | * have anything to do with the actual error location. | 624 | * have anything to do with the actual error location. |
633 | */ | 625 | */ |
634 | if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) | 626 | if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce) |
635 | mce_log(&m); | 627 | mce_log(&m); |
636 | 628 | ||
637 | /* | 629 | /* |
@@ -658,14 +650,14 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, | |||
658 | { | 650 | { |
659 | int i, ret = 0; | 651 | int i, ret = 0; |
660 | 652 | ||
661 | for (i = 0; i < banks; i++) { | 653 | for (i = 0; i < mca_cfg.banks; i++) { |
662 | m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); | 654 | m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); |
663 | if (m->status & MCI_STATUS_VAL) { | 655 | if (m->status & MCI_STATUS_VAL) { |
664 | __set_bit(i, validp); | 656 | __set_bit(i, validp); |
665 | if (quirk_no_way_out) | 657 | if (quirk_no_way_out) |
666 | quirk_no_way_out(i, m, regs); | 658 | quirk_no_way_out(i, m, regs); |
667 | } | 659 | } |
668 | if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) | 660 | if (mce_severity(m, mca_cfg.tolerant, msg) >= MCE_PANIC_SEVERITY) |
669 | ret = 1; | 661 | ret = 1; |
670 | } | 662 | } |
671 | return ret; | 663 | return ret; |
@@ -696,11 +688,11 @@ static int mce_timed_out(u64 *t) | |||
696 | rmb(); | 688 | rmb(); |
697 | if (atomic_read(&mce_paniced)) | 689 | if (atomic_read(&mce_paniced)) |
698 | wait_for_panic(); | 690 | wait_for_panic(); |
699 | if (!monarch_timeout) | 691 | if (!mca_cfg.monarch_timeout) |
700 | goto out; | 692 | goto out; |
701 | if ((s64)*t < SPINUNIT) { | 693 | if ((s64)*t < SPINUNIT) { |
702 | /* CHECKME: Make panic default for 1 too? */ | 694 | /* CHECKME: Make panic default for 1 too? */ |
703 | if (tolerant < 1) | 695 | if (mca_cfg.tolerant < 1) |
704 | mce_panic("Timeout synchronizing machine check over CPUs", | 696 | mce_panic("Timeout synchronizing machine check over CPUs", |
705 | NULL, NULL); | 697 | NULL, NULL); |
706 | cpu_missing = 1; | 698 | cpu_missing = 1; |
@@ -750,7 +742,8 @@ static void mce_reign(void) | |||
750 | * Grade the severity of the errors of all the CPUs. | 742 | * Grade the severity of the errors of all the CPUs. |
751 | */ | 743 | */ |
752 | for_each_possible_cpu(cpu) { | 744 | for_each_possible_cpu(cpu) { |
753 | int severity = mce_severity(&per_cpu(mces_seen, cpu), tolerant, | 745 | int severity = mce_severity(&per_cpu(mces_seen, cpu), |
746 | mca_cfg.tolerant, | ||
754 | &nmsg); | 747 | &nmsg); |
755 | if (severity > global_worst) { | 748 | if (severity > global_worst) { |
756 | msg = nmsg; | 749 | msg = nmsg; |
@@ -764,7 +757,7 @@ static void mce_reign(void) | |||
764 | * This dumps all the mces in the log buffer and stops the | 757 | * This dumps all the mces in the log buffer and stops the |
765 | * other CPUs. | 758 | * other CPUs. |
766 | */ | 759 | */ |
767 | if (m && global_worst >= MCE_PANIC_SEVERITY && tolerant < 3) | 760 | if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) |
768 | mce_panic("Fatal Machine check", m, msg); | 761 | mce_panic("Fatal Machine check", m, msg); |
769 | 762 | ||
770 | /* | 763 | /* |
@@ -777,7 +770,7 @@ static void mce_reign(void) | |||
777 | * No machine check event found. Must be some external | 770 | * No machine check event found. Must be some external |
778 | * source or one CPU is hung. Panic. | 771 | * source or one CPU is hung. Panic. |
779 | */ | 772 | */ |
780 | if (global_worst <= MCE_KEEP_SEVERITY && tolerant < 3) | 773 | if (global_worst <= MCE_KEEP_SEVERITY && mca_cfg.tolerant < 3) |
781 | mce_panic("Machine check from unknown source", NULL, NULL); | 774 | mce_panic("Machine check from unknown source", NULL, NULL); |
782 | 775 | ||
783 | /* | 776 | /* |
@@ -801,7 +794,7 @@ static int mce_start(int *no_way_out) | |||
801 | { | 794 | { |
802 | int order; | 795 | int order; |
803 | int cpus = num_online_cpus(); | 796 | int cpus = num_online_cpus(); |
804 | u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC; | 797 | u64 timeout = (u64)mca_cfg.monarch_timeout * NSEC_PER_USEC; |
805 | 798 | ||
806 | if (!timeout) | 799 | if (!timeout) |
807 | return -1; | 800 | return -1; |
@@ -865,7 +858,7 @@ static int mce_start(int *no_way_out) | |||
865 | static int mce_end(int order) | 858 | static int mce_end(int order) |
866 | { | 859 | { |
867 | int ret = -1; | 860 | int ret = -1; |
868 | u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC; | 861 | u64 timeout = (u64)mca_cfg.monarch_timeout * NSEC_PER_USEC; |
869 | 862 | ||
870 | if (!timeout) | 863 | if (!timeout) |
871 | goto reset; | 864 | goto reset; |
@@ -946,7 +939,7 @@ static void mce_clear_state(unsigned long *toclear) | |||
946 | { | 939 | { |
947 | int i; | 940 | int i; |
948 | 941 | ||
949 | for (i = 0; i < banks; i++) { | 942 | for (i = 0; i < mca_cfg.banks; i++) { |
950 | if (test_bit(i, toclear)) | 943 | if (test_bit(i, toclear)) |
951 | mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0); | 944 | mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0); |
952 | } | 945 | } |
@@ -1011,6 +1004,7 @@ static void mce_clear_info(struct mce_info *mi) | |||
1011 | */ | 1004 | */ |
1012 | void do_machine_check(struct pt_regs *regs, long error_code) | 1005 | void do_machine_check(struct pt_regs *regs, long error_code) |
1013 | { | 1006 | { |
1007 | struct mca_config *cfg = &mca_cfg; | ||
1014 | struct mce m, *final; | 1008 | struct mce m, *final; |
1015 | int i; | 1009 | int i; |
1016 | int worst = 0; | 1010 | int worst = 0; |
@@ -1022,7 +1016,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1022 | int order; | 1016 | int order; |
1023 | /* | 1017 | /* |
1024 | * If no_way_out gets set, there is no safe way to recover from this | 1018 | * If no_way_out gets set, there is no safe way to recover from this |
1025 | * MCE. If tolerant is cranked up, we'll try anyway. | 1019 | * MCE. If mca_cfg.tolerant is cranked up, we'll try anyway. |
1026 | */ | 1020 | */ |
1027 | int no_way_out = 0; | 1021 | int no_way_out = 0; |
1028 | /* | 1022 | /* |
@@ -1038,7 +1032,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1038 | 1032 | ||
1039 | this_cpu_inc(mce_exception_count); | 1033 | this_cpu_inc(mce_exception_count); |
1040 | 1034 | ||
1041 | if (!banks) | 1035 | if (!cfg->banks) |
1042 | goto out; | 1036 | goto out; |
1043 | 1037 | ||
1044 | mce_gather_info(&m, regs); | 1038 | mce_gather_info(&m, regs); |
@@ -1065,7 +1059,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1065 | * because the first one to see it will clear it. | 1059 | * because the first one to see it will clear it. |
1066 | */ | 1060 | */ |
1067 | order = mce_start(&no_way_out); | 1061 | order = mce_start(&no_way_out); |
1068 | for (i = 0; i < banks; i++) { | 1062 | for (i = 0; i < cfg->banks; i++) { |
1069 | __clear_bit(i, toclear); | 1063 | __clear_bit(i, toclear); |
1070 | if (!test_bit(i, valid_banks)) | 1064 | if (!test_bit(i, valid_banks)) |
1071 | continue; | 1065 | continue; |
@@ -1084,7 +1078,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1084 | * Non uncorrected or non signaled errors are handled by | 1078 | * Non uncorrected or non signaled errors are handled by |
1085 | * machine_check_poll. Leave them alone, unless this panics. | 1079 | * machine_check_poll. Leave them alone, unless this panics. |
1086 | */ | 1080 | */ |
1087 | if (!(m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)) && | 1081 | if (!(m.status & (cfg->ser ? MCI_STATUS_S : MCI_STATUS_UC)) && |
1088 | !no_way_out) | 1082 | !no_way_out) |
1089 | continue; | 1083 | continue; |
1090 | 1084 | ||
@@ -1093,7 +1087,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1093 | */ | 1087 | */ |
1094 | add_taint(TAINT_MACHINE_CHECK); | 1088 | add_taint(TAINT_MACHINE_CHECK); |
1095 | 1089 | ||
1096 | severity = mce_severity(&m, tolerant, NULL); | 1090 | severity = mce_severity(&m, cfg->tolerant, NULL); |
1097 | 1091 | ||
1098 | /* | 1092 | /* |
1099 | * When machine check was for corrected handler don't touch, | 1093 | * When machine check was for corrected handler don't touch, |
@@ -1117,7 +1111,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1117 | * When the ring overflows we just ignore the AO error. | 1111 | * When the ring overflows we just ignore the AO error. |
1118 | * RED-PEN add some logging mechanism when | 1112 | * RED-PEN add some logging mechanism when |
1119 | * usable_address or mce_add_ring fails. | 1113 | * usable_address or mce_add_ring fails. |
1120 | * RED-PEN don't ignore overflow for tolerant == 0 | 1114 | * RED-PEN don't ignore overflow for mca_cfg.tolerant == 0 |
1121 | */ | 1115 | */ |
1122 | if (severity == MCE_AO_SEVERITY && mce_usable_address(&m)) | 1116 | if (severity == MCE_AO_SEVERITY && mce_usable_address(&m)) |
1123 | mce_ring_add(m.addr >> PAGE_SHIFT); | 1117 | mce_ring_add(m.addr >> PAGE_SHIFT); |
@@ -1149,7 +1143,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1149 | * issues we try to recover, or limit damage to the current | 1143 | * issues we try to recover, or limit damage to the current |
1150 | * process. | 1144 | * process. |
1151 | */ | 1145 | */ |
1152 | if (tolerant < 3) { | 1146 | if (cfg->tolerant < 3) { |
1153 | if (no_way_out) | 1147 | if (no_way_out) |
1154 | mce_panic("Fatal machine check on current CPU", &m, msg); | 1148 | mce_panic("Fatal machine check on current CPU", &m, msg); |
1155 | if (worst == MCE_AR_SEVERITY) { | 1149 | if (worst == MCE_AR_SEVERITY) { |
@@ -1377,11 +1371,13 @@ EXPORT_SYMBOL_GPL(mce_notify_irq); | |||
1377 | static int __cpuinit __mcheck_cpu_mce_banks_init(void) | 1371 | static int __cpuinit __mcheck_cpu_mce_banks_init(void) |
1378 | { | 1372 | { |
1379 | int i; | 1373 | int i; |
1374 | u8 num_banks = mca_cfg.banks; | ||
1380 | 1375 | ||
1381 | mce_banks = kzalloc(banks * sizeof(struct mce_bank), GFP_KERNEL); | 1376 | mce_banks = kzalloc(num_banks * sizeof(struct mce_bank), GFP_KERNEL); |
1382 | if (!mce_banks) | 1377 | if (!mce_banks) |
1383 | return -ENOMEM; | 1378 | return -ENOMEM; |
1384 | for (i = 0; i < banks; i++) { | 1379 | |
1380 | for (i = 0; i < num_banks; i++) { | ||
1385 | struct mce_bank *b = &mce_banks[i]; | 1381 | struct mce_bank *b = &mce_banks[i]; |
1386 | 1382 | ||
1387 | b->ctl = -1ULL; | 1383 | b->ctl = -1ULL; |
@@ -1401,7 +1397,7 @@ static int __cpuinit __mcheck_cpu_cap_init(void) | |||
1401 | rdmsrl(MSR_IA32_MCG_CAP, cap); | 1397 | rdmsrl(MSR_IA32_MCG_CAP, cap); |
1402 | 1398 | ||
1403 | b = cap & MCG_BANKCNT_MASK; | 1399 | b = cap & MCG_BANKCNT_MASK; |
1404 | if (!banks) | 1400 | if (!mca_cfg.banks) |
1405 | pr_info("CPU supports %d MCE banks\n", b); | 1401 | pr_info("CPU supports %d MCE banks\n", b); |
1406 | 1402 | ||
1407 | if (b > MAX_NR_BANKS) { | 1403 | if (b > MAX_NR_BANKS) { |
@@ -1411,8 +1407,9 @@ static int __cpuinit __mcheck_cpu_cap_init(void) | |||
1411 | } | 1407 | } |
1412 | 1408 | ||
1413 | /* Don't support asymmetric configurations today */ | 1409 | /* Don't support asymmetric configurations today */ |
1414 | WARN_ON(banks != 0 && b != banks); | 1410 | WARN_ON(mca_cfg.banks != 0 && b != mca_cfg.banks); |
1415 | banks = b; | 1411 | mca_cfg.banks = b; |
1412 | |||
1416 | if (!mce_banks) { | 1413 | if (!mce_banks) { |
1417 | int err = __mcheck_cpu_mce_banks_init(); | 1414 | int err = __mcheck_cpu_mce_banks_init(); |
1418 | 1415 | ||
@@ -1422,25 +1419,29 @@ static int __cpuinit __mcheck_cpu_cap_init(void) | |||
1422 | 1419 | ||
1423 | /* Use accurate RIP reporting if available. */ | 1420 | /* Use accurate RIP reporting if available. */ |
1424 | if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9) | 1421 | if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9) |
1425 | rip_msr = MSR_IA32_MCG_EIP; | 1422 | mca_cfg.rip_msr = MSR_IA32_MCG_EIP; |
1426 | 1423 | ||
1427 | if (cap & MCG_SER_P) | 1424 | if (cap & MCG_SER_P) |
1428 | mce_ser = 1; | 1425 | mca_cfg.ser = true; |
1429 | 1426 | ||
1430 | return 0; | 1427 | return 0; |
1431 | } | 1428 | } |
1432 | 1429 | ||
1433 | static void __mcheck_cpu_init_generic(void) | 1430 | static void __mcheck_cpu_init_generic(void) |
1434 | { | 1431 | { |
1432 | enum mcp_flags m_fl = 0; | ||
1435 | mce_banks_t all_banks; | 1433 | mce_banks_t all_banks; |
1436 | u64 cap; | 1434 | u64 cap; |
1437 | int i; | 1435 | int i; |
1438 | 1436 | ||
1437 | if (!mca_cfg.bootlog) | ||
1438 | m_fl = MCP_DONTLOG; | ||
1439 | |||
1439 | /* | 1440 | /* |
1440 | * Log the machine checks left over from the previous reset. | 1441 | * Log the machine checks left over from the previous reset. |
1441 | */ | 1442 | */ |
1442 | bitmap_fill(all_banks, MAX_NR_BANKS); | 1443 | bitmap_fill(all_banks, MAX_NR_BANKS); |
1443 | machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks); | 1444 | machine_check_poll(MCP_UC | m_fl, &all_banks); |
1444 | 1445 | ||
1445 | set_in_cr4(X86_CR4_MCE); | 1446 | set_in_cr4(X86_CR4_MCE); |
1446 | 1447 | ||
@@ -1448,7 +1449,7 @@ static void __mcheck_cpu_init_generic(void) | |||
1448 | if (cap & MCG_CTL_P) | 1449 | if (cap & MCG_CTL_P) |
1449 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); | 1450 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); |
1450 | 1451 | ||
1451 | for (i = 0; i < banks; i++) { | 1452 | for (i = 0; i < mca_cfg.banks; i++) { |
1452 | struct mce_bank *b = &mce_banks[i]; | 1453 | struct mce_bank *b = &mce_banks[i]; |
1453 | 1454 | ||
1454 | if (!b->init) | 1455 | if (!b->init) |
@@ -1489,6 +1490,8 @@ static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs) | |||
1489 | /* Add per CPU specific workarounds here */ | 1490 | /* Add per CPU specific workarounds here */ |
1490 | static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) | 1491 | static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) |
1491 | { | 1492 | { |
1493 | struct mca_config *cfg = &mca_cfg; | ||
1494 | |||
1492 | if (c->x86_vendor == X86_VENDOR_UNKNOWN) { | 1495 | if (c->x86_vendor == X86_VENDOR_UNKNOWN) { |
1493 | pr_info("unknown CPU type - not enabling MCE support\n"); | 1496 | pr_info("unknown CPU type - not enabling MCE support\n"); |
1494 | return -EOPNOTSUPP; | 1497 | return -EOPNOTSUPP; |
@@ -1496,7 +1499,7 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) | |||
1496 | 1499 | ||
1497 | /* This should be disabled by the BIOS, but isn't always */ | 1500 | /* This should be disabled by the BIOS, but isn't always */ |
1498 | if (c->x86_vendor == X86_VENDOR_AMD) { | 1501 | if (c->x86_vendor == X86_VENDOR_AMD) { |
1499 | if (c->x86 == 15 && banks > 4) { | 1502 | if (c->x86 == 15 && cfg->banks > 4) { |
1500 | /* | 1503 | /* |
1501 | * disable GART TBL walk error reporting, which | 1504 | * disable GART TBL walk error reporting, which |
1502 | * trips off incorrectly with the IOMMU & 3ware | 1505 | * trips off incorrectly with the IOMMU & 3ware |
@@ -1504,18 +1507,18 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) | |||
1504 | */ | 1507 | */ |
1505 | clear_bit(10, (unsigned long *)&mce_banks[4].ctl); | 1508 | clear_bit(10, (unsigned long *)&mce_banks[4].ctl); |
1506 | } | 1509 | } |
1507 | if (c->x86 <= 17 && mce_bootlog < 0) { | 1510 | if (c->x86 <= 17 && cfg->bootlog < 0) { |
1508 | /* | 1511 | /* |
1509 | * Lots of broken BIOS around that don't clear them | 1512 | * Lots of broken BIOS around that don't clear them |
1510 | * by default and leave crap in there. Don't log: | 1513 | * by default and leave crap in there. Don't log: |
1511 | */ | 1514 | */ |
1512 | mce_bootlog = 0; | 1515 | cfg->bootlog = 0; |
1513 | } | 1516 | } |
1514 | /* | 1517 | /* |
1515 | * Various K7s with broken bank 0 around. Always disable | 1518 | * Various K7s with broken bank 0 around. Always disable |
1516 | * by default. | 1519 | * by default. |
1517 | */ | 1520 | */ |
1518 | if (c->x86 == 6 && banks > 0) | 1521 | if (c->x86 == 6 && cfg->banks > 0) |
1519 | mce_banks[0].ctl = 0; | 1522 | mce_banks[0].ctl = 0; |
1520 | 1523 | ||
1521 | /* | 1524 | /* |
@@ -1566,7 +1569,7 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) | |||
1566 | * valid event later, merely don't write CTL0. | 1569 | * valid event later, merely don't write CTL0. |
1567 | */ | 1570 | */ |
1568 | 1571 | ||
1569 | if (c->x86 == 6 && c->x86_model < 0x1A && banks > 0) | 1572 | if (c->x86 == 6 && c->x86_model < 0x1A && cfg->banks > 0) |
1570 | mce_banks[0].init = 0; | 1573 | mce_banks[0].init = 0; |
1571 | 1574 | ||
1572 | /* | 1575 | /* |
@@ -1574,23 +1577,23 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) | |||
1574 | * synchronization with a one second timeout. | 1577 | * synchronization with a one second timeout. |
1575 | */ | 1578 | */ |
1576 | if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) && | 1579 | if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) && |
1577 | monarch_timeout < 0) | 1580 | cfg->monarch_timeout < 0) |
1578 | monarch_timeout = USEC_PER_SEC; | 1581 | cfg->monarch_timeout = USEC_PER_SEC; |
1579 | 1582 | ||
1580 | /* | 1583 | /* |
1581 | * There are also broken BIOSes on some Pentium M and | 1584 | * There are also broken BIOSes on some Pentium M and |
1582 | * earlier systems: | 1585 | * earlier systems: |
1583 | */ | 1586 | */ |
1584 | if (c->x86 == 6 && c->x86_model <= 13 && mce_bootlog < 0) | 1587 | if (c->x86 == 6 && c->x86_model <= 13 && cfg->bootlog < 0) |
1585 | mce_bootlog = 0; | 1588 | cfg->bootlog = 0; |
1586 | 1589 | ||
1587 | if (c->x86 == 6 && c->x86_model == 45) | 1590 | if (c->x86 == 6 && c->x86_model == 45) |
1588 | quirk_no_way_out = quirk_sandybridge_ifu; | 1591 | quirk_no_way_out = quirk_sandybridge_ifu; |
1589 | } | 1592 | } |
1590 | if (monarch_timeout < 0) | 1593 | if (cfg->monarch_timeout < 0) |
1591 | monarch_timeout = 0; | 1594 | cfg->monarch_timeout = 0; |
1592 | if (mce_bootlog != 0) | 1595 | if (cfg->bootlog != 0) |
1593 | mce_panic_timeout = 30; | 1596 | cfg->panic_timeout = 30; |
1594 | 1597 | ||
1595 | return 0; | 1598 | return 0; |
1596 | } | 1599 | } |
@@ -1635,7 +1638,7 @@ static void mce_start_timer(unsigned int cpu, struct timer_list *t) | |||
1635 | 1638 | ||
1636 | __this_cpu_write(mce_next_interval, iv); | 1639 | __this_cpu_write(mce_next_interval, iv); |
1637 | 1640 | ||
1638 | if (mce_ignore_ce || !iv) | 1641 | if (mca_cfg.ignore_ce || !iv) |
1639 | return; | 1642 | return; |
1640 | 1643 | ||
1641 | t->expires = round_jiffies(jiffies + iv); | 1644 | t->expires = round_jiffies(jiffies + iv); |
@@ -1668,7 +1671,7 @@ void (*machine_check_vector)(struct pt_regs *, long error_code) = | |||
1668 | */ | 1671 | */ |
1669 | void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c) | 1672 | void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c) |
1670 | { | 1673 | { |
1671 | if (mce_disabled) | 1674 | if (mca_cfg.disabled) |
1672 | return; | 1675 | return; |
1673 | 1676 | ||
1674 | if (__mcheck_cpu_ancient_init(c)) | 1677 | if (__mcheck_cpu_ancient_init(c)) |
@@ -1678,7 +1681,7 @@ void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c) | |||
1678 | return; | 1681 | return; |
1679 | 1682 | ||
1680 | if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) { | 1683 | if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) { |
1681 | mce_disabled = 1; | 1684 | mca_cfg.disabled = true; |
1682 | return; | 1685 | return; |
1683 | } | 1686 | } |
1684 | 1687 | ||
@@ -1951,6 +1954,8 @@ static struct miscdevice mce_chrdev_device = { | |||
1951 | */ | 1954 | */ |
1952 | static int __init mcheck_enable(char *str) | 1955 | static int __init mcheck_enable(char *str) |
1953 | { | 1956 | { |
1957 | struct mca_config *cfg = &mca_cfg; | ||
1958 | |||
1954 | if (*str == 0) { | 1959 | if (*str == 0) { |
1955 | enable_p5_mce(); | 1960 | enable_p5_mce(); |
1956 | return 1; | 1961 | return 1; |
@@ -1958,22 +1963,22 @@ static int __init mcheck_enable(char *str) | |||
1958 | if (*str == '=') | 1963 | if (*str == '=') |
1959 | str++; | 1964 | str++; |
1960 | if (!strcmp(str, "off")) | 1965 | if (!strcmp(str, "off")) |
1961 | mce_disabled = 1; | 1966 | cfg->disabled = true; |
1962 | else if (!strcmp(str, "no_cmci")) | 1967 | else if (!strcmp(str, "no_cmci")) |
1963 | mce_cmci_disabled = 1; | 1968 | cfg->cmci_disabled = true; |
1964 | else if (!strcmp(str, "dont_log_ce")) | 1969 | else if (!strcmp(str, "dont_log_ce")) |
1965 | mce_dont_log_ce = 1; | 1970 | cfg->dont_log_ce = true; |
1966 | else if (!strcmp(str, "ignore_ce")) | 1971 | else if (!strcmp(str, "ignore_ce")) |
1967 | mce_ignore_ce = 1; | 1972 | cfg->ignore_ce = true; |
1968 | else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) | 1973 | else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) |
1969 | mce_bootlog = (str[0] == 'b'); | 1974 | cfg->bootlog = (str[0] == 'b'); |
1970 | else if (!strcmp(str, "bios_cmci_threshold")) | 1975 | else if (!strcmp(str, "bios_cmci_threshold")) |
1971 | mce_bios_cmci_threshold = 1; | 1976 | cfg->bios_cmci_threshold = true; |
1972 | else if (isdigit(str[0])) { | 1977 | else if (isdigit(str[0])) { |
1973 | get_option(&str, &tolerant); | 1978 | get_option(&str, &(cfg->tolerant)); |
1974 | if (*str == ',') { | 1979 | if (*str == ',') { |
1975 | ++str; | 1980 | ++str; |
1976 | get_option(&str, &monarch_timeout); | 1981 | get_option(&str, &(cfg->monarch_timeout)); |
1977 | } | 1982 | } |
1978 | } else { | 1983 | } else { |
1979 | pr_info("mce argument %s ignored. Please use /sys\n", str); | 1984 | pr_info("mce argument %s ignored. Please use /sys\n", str); |
@@ -2002,7 +2007,7 @@ static int mce_disable_error_reporting(void) | |||
2002 | { | 2007 | { |
2003 | int i; | 2008 | int i; |
2004 | 2009 | ||
2005 | for (i = 0; i < banks; i++) { | 2010 | for (i = 0; i < mca_cfg.banks; i++) { |
2006 | struct mce_bank *b = &mce_banks[i]; | 2011 | struct mce_bank *b = &mce_banks[i]; |
2007 | 2012 | ||
2008 | if (b->init) | 2013 | if (b->init) |
@@ -2142,15 +2147,15 @@ static ssize_t set_ignore_ce(struct device *s, | |||
2142 | if (strict_strtoull(buf, 0, &new) < 0) | 2147 | if (strict_strtoull(buf, 0, &new) < 0) |
2143 | return -EINVAL; | 2148 | return -EINVAL; |
2144 | 2149 | ||
2145 | if (mce_ignore_ce ^ !!new) { | 2150 | if (mca_cfg.ignore_ce ^ !!new) { |
2146 | if (new) { | 2151 | if (new) { |
2147 | /* disable ce features */ | 2152 | /* disable ce features */ |
2148 | mce_timer_delete_all(); | 2153 | mce_timer_delete_all(); |
2149 | on_each_cpu(mce_disable_cmci, NULL, 1); | 2154 | on_each_cpu(mce_disable_cmci, NULL, 1); |
2150 | mce_ignore_ce = 1; | 2155 | mca_cfg.ignore_ce = true; |
2151 | } else { | 2156 | } else { |
2152 | /* enable ce features */ | 2157 | /* enable ce features */ |
2153 | mce_ignore_ce = 0; | 2158 | mca_cfg.ignore_ce = false; |
2154 | on_each_cpu(mce_enable_ce, (void *)1, 1); | 2159 | on_each_cpu(mce_enable_ce, (void *)1, 1); |
2155 | } | 2160 | } |
2156 | } | 2161 | } |
@@ -2166,14 +2171,14 @@ static ssize_t set_cmci_disabled(struct device *s, | |||
2166 | if (strict_strtoull(buf, 0, &new) < 0) | 2171 | if (strict_strtoull(buf, 0, &new) < 0) |
2167 | return -EINVAL; | 2172 | return -EINVAL; |
2168 | 2173 | ||
2169 | if (mce_cmci_disabled ^ !!new) { | 2174 | if (mca_cfg.cmci_disabled ^ !!new) { |
2170 | if (new) { | 2175 | if (new) { |
2171 | /* disable cmci */ | 2176 | /* disable cmci */ |
2172 | on_each_cpu(mce_disable_cmci, NULL, 1); | 2177 | on_each_cpu(mce_disable_cmci, NULL, 1); |
2173 | mce_cmci_disabled = 1; | 2178 | mca_cfg.cmci_disabled = true; |
2174 | } else { | 2179 | } else { |
2175 | /* enable cmci */ | 2180 | /* enable cmci */ |
2176 | mce_cmci_disabled = 0; | 2181 | mca_cfg.cmci_disabled = false; |
2177 | on_each_cpu(mce_enable_ce, NULL, 1); | 2182 | on_each_cpu(mce_enable_ce, NULL, 1); |
2178 | } | 2183 | } |
2179 | } | 2184 | } |
@@ -2190,9 +2195,9 @@ static ssize_t store_int_with_restart(struct device *s, | |||
2190 | } | 2195 | } |
2191 | 2196 | ||
2192 | static DEVICE_ATTR(trigger, 0644, show_trigger, set_trigger); | 2197 | static DEVICE_ATTR(trigger, 0644, show_trigger, set_trigger); |
2193 | static DEVICE_INT_ATTR(tolerant, 0644, tolerant); | 2198 | static DEVICE_INT_ATTR(tolerant, 0644, mca_cfg.tolerant); |
2194 | static DEVICE_INT_ATTR(monarch_timeout, 0644, monarch_timeout); | 2199 | static DEVICE_INT_ATTR(monarch_timeout, 0644, mca_cfg.monarch_timeout); |
2195 | static DEVICE_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce); | 2200 | static DEVICE_BOOL_ATTR(dont_log_ce, 0644, mca_cfg.dont_log_ce); |
2196 | 2201 | ||
2197 | static struct dev_ext_attribute dev_attr_check_interval = { | 2202 | static struct dev_ext_attribute dev_attr_check_interval = { |
2198 | __ATTR(check_interval, 0644, device_show_int, store_int_with_restart), | 2203 | __ATTR(check_interval, 0644, device_show_int, store_int_with_restart), |
@@ -2200,13 +2205,13 @@ static struct dev_ext_attribute dev_attr_check_interval = { | |||
2200 | }; | 2205 | }; |
2201 | 2206 | ||
2202 | static struct dev_ext_attribute dev_attr_ignore_ce = { | 2207 | static struct dev_ext_attribute dev_attr_ignore_ce = { |
2203 | __ATTR(ignore_ce, 0644, device_show_int, set_ignore_ce), | 2208 | __ATTR(ignore_ce, 0644, device_show_bool, set_ignore_ce), |
2204 | &mce_ignore_ce | 2209 | &mca_cfg.ignore_ce |
2205 | }; | 2210 | }; |
2206 | 2211 | ||
2207 | static struct dev_ext_attribute dev_attr_cmci_disabled = { | 2212 | static struct dev_ext_attribute dev_attr_cmci_disabled = { |
2208 | __ATTR(cmci_disabled, 0644, device_show_int, set_cmci_disabled), | 2213 | __ATTR(cmci_disabled, 0644, device_show_bool, set_cmci_disabled), |
2209 | &mce_cmci_disabled | 2214 | &mca_cfg.cmci_disabled |
2210 | }; | 2215 | }; |
2211 | 2216 | ||
2212 | static struct device_attribute *mce_device_attrs[] = { | 2217 | static struct device_attribute *mce_device_attrs[] = { |
@@ -2253,7 +2258,7 @@ static __cpuinit int mce_device_create(unsigned int cpu) | |||
2253 | if (err) | 2258 | if (err) |
2254 | goto error; | 2259 | goto error; |
2255 | } | 2260 | } |
2256 | for (j = 0; j < banks; j++) { | 2261 | for (j = 0; j < mca_cfg.banks; j++) { |
2257 | err = device_create_file(dev, &mce_banks[j].attr); | 2262 | err = device_create_file(dev, &mce_banks[j].attr); |
2258 | if (err) | 2263 | if (err) |
2259 | goto error2; | 2264 | goto error2; |
@@ -2285,7 +2290,7 @@ static __cpuinit void mce_device_remove(unsigned int cpu) | |||
2285 | for (i = 0; mce_device_attrs[i]; i++) | 2290 | for (i = 0; mce_device_attrs[i]; i++) |
2286 | device_remove_file(dev, mce_device_attrs[i]); | 2291 | device_remove_file(dev, mce_device_attrs[i]); |
2287 | 2292 | ||
2288 | for (i = 0; i < banks; i++) | 2293 | for (i = 0; i < mca_cfg.banks; i++) |
2289 | device_remove_file(dev, &mce_banks[i].attr); | 2294 | device_remove_file(dev, &mce_banks[i].attr); |
2290 | 2295 | ||
2291 | device_unregister(dev); | 2296 | device_unregister(dev); |
@@ -2304,7 +2309,7 @@ static void __cpuinit mce_disable_cpu(void *h) | |||
2304 | 2309 | ||
2305 | if (!(action & CPU_TASKS_FROZEN)) | 2310 | if (!(action & CPU_TASKS_FROZEN)) |
2306 | cmci_clear(); | 2311 | cmci_clear(); |
2307 | for (i = 0; i < banks; i++) { | 2312 | for (i = 0; i < mca_cfg.banks; i++) { |
2308 | struct mce_bank *b = &mce_banks[i]; | 2313 | struct mce_bank *b = &mce_banks[i]; |
2309 | 2314 | ||
2310 | if (b->init) | 2315 | if (b->init) |
@@ -2322,7 +2327,7 @@ static void __cpuinit mce_reenable_cpu(void *h) | |||
2322 | 2327 | ||
2323 | if (!(action & CPU_TASKS_FROZEN)) | 2328 | if (!(action & CPU_TASKS_FROZEN)) |
2324 | cmci_reenable(); | 2329 | cmci_reenable(); |
2325 | for (i = 0; i < banks; i++) { | 2330 | for (i = 0; i < mca_cfg.banks; i++) { |
2326 | struct mce_bank *b = &mce_banks[i]; | 2331 | struct mce_bank *b = &mce_banks[i]; |
2327 | 2332 | ||
2328 | if (b->init) | 2333 | if (b->init) |
@@ -2375,7 +2380,7 @@ static __init void mce_init_banks(void) | |||
2375 | { | 2380 | { |
2376 | int i; | 2381 | int i; |
2377 | 2382 | ||
2378 | for (i = 0; i < banks; i++) { | 2383 | for (i = 0; i < mca_cfg.banks; i++) { |
2379 | struct mce_bank *b = &mce_banks[i]; | 2384 | struct mce_bank *b = &mce_banks[i]; |
2380 | struct device_attribute *a = &b->attr; | 2385 | struct device_attribute *a = &b->attr; |
2381 | 2386 | ||
@@ -2426,7 +2431,7 @@ device_initcall_sync(mcheck_init_device); | |||
2426 | */ | 2431 | */ |
2427 | static int __init mcheck_disable(char *str) | 2432 | static int __init mcheck_disable(char *str) |
2428 | { | 2433 | { |
2429 | mce_disabled = 1; | 2434 | mca_cfg.disabled = true; |
2430 | return 1; | 2435 | return 1; |
2431 | } | 2436 | } |
2432 | __setup("nomce", mcheck_disable); | 2437 | __setup("nomce", mcheck_disable); |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 698b6ec12e0f..1ac581f38dfa 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
@@ -6,7 +6,7 @@ | |||
6 | * | 6 | * |
7 | * Written by Jacob Shin - AMD, Inc. | 7 | * Written by Jacob Shin - AMD, Inc. |
8 | * | 8 | * |
9 | * Support: borislav.petkov@amd.com | 9 | * Maintained by: Borislav Petkov <bp@alien8.de> |
10 | * | 10 | * |
11 | * April 2006 | 11 | * April 2006 |
12 | * - added support for AMD Family 0x10 processors | 12 | * - added support for AMD Family 0x10 processors |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 5f88abf07e9c..402c454fbff0 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c | |||
@@ -53,7 +53,7 @@ static int cmci_supported(int *banks) | |||
53 | { | 53 | { |
54 | u64 cap; | 54 | u64 cap; |
55 | 55 | ||
56 | if (mce_cmci_disabled || mce_ignore_ce) | 56 | if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce) |
57 | return 0; | 57 | return 0; |
58 | 58 | ||
59 | /* | 59 | /* |
@@ -200,7 +200,7 @@ static void cmci_discover(int banks) | |||
200 | continue; | 200 | continue; |
201 | } | 201 | } |
202 | 202 | ||
203 | if (!mce_bios_cmci_threshold) { | 203 | if (!mca_cfg.bios_cmci_threshold) { |
204 | val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; | 204 | val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; |
205 | val |= CMCI_THRESHOLD; | 205 | val |= CMCI_THRESHOLD; |
206 | } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) { | 206 | } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) { |
@@ -227,7 +227,7 @@ static void cmci_discover(int banks) | |||
227 | * set the thresholds properly or does not work with | 227 | * set the thresholds properly or does not work with |
228 | * this boot option. Note down now and report later. | 228 | * this boot option. Note down now and report later. |
229 | */ | 229 | */ |
230 | if (mce_bios_cmci_threshold && bios_zero_thresh && | 230 | if (mca_cfg.bios_cmci_threshold && bios_zero_thresh && |
231 | (val & MCI_CTL2_CMCI_THRESHOLD_MASK)) | 231 | (val & MCI_CTL2_CMCI_THRESHOLD_MASK)) |
232 | bios_wrong_thresh = 1; | 232 | bios_wrong_thresh = 1; |
233 | } else { | 233 | } else { |
@@ -235,7 +235,7 @@ static void cmci_discover(int banks) | |||
235 | } | 235 | } |
236 | } | 236 | } |
237 | raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); | 237 | raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); |
238 | if (mce_bios_cmci_threshold && bios_wrong_thresh) { | 238 | if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) { |
239 | pr_info_once( | 239 | pr_info_once( |
240 | "bios_cmci_threshold: Some banks do not have valid thresholds set\n"); | 240 | "bios_cmci_threshold: Some banks do not have valid thresholds set\n"); |
241 | pr_info_once( | 241 | pr_info_once( |
@@ -285,34 +285,39 @@ void cmci_clear(void) | |||
285 | raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); | 285 | raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); |
286 | } | 286 | } |
287 | 287 | ||
288 | static long cmci_rediscover_work_func(void *arg) | ||
289 | { | ||
290 | int banks; | ||
291 | |||
292 | /* Recheck banks in case CPUs don't all have the same */ | ||
293 | if (cmci_supported(&banks)) | ||
294 | cmci_discover(banks); | ||
295 | |||
296 | return 0; | ||
297 | } | ||
298 | |||
288 | /* | 299 | /* |
289 | * After a CPU went down cycle through all the others and rediscover | 300 | * After a CPU went down cycle through all the others and rediscover |
290 | * Must run in process context. | 301 | * Must run in process context. |
291 | */ | 302 | */ |
292 | void cmci_rediscover(int dying) | 303 | void cmci_rediscover(int dying) |
293 | { | 304 | { |
294 | int banks; | 305 | int cpu, banks; |
295 | int cpu; | ||
296 | cpumask_var_t old; | ||
297 | 306 | ||
298 | if (!cmci_supported(&banks)) | 307 | if (!cmci_supported(&banks)) |
299 | return; | 308 | return; |
300 | if (!alloc_cpumask_var(&old, GFP_KERNEL)) | ||
301 | return; | ||
302 | cpumask_copy(old, ¤t->cpus_allowed); | ||
303 | 309 | ||
304 | for_each_online_cpu(cpu) { | 310 | for_each_online_cpu(cpu) { |
305 | if (cpu == dying) | 311 | if (cpu == dying) |
306 | continue; | 312 | continue; |
307 | if (set_cpus_allowed_ptr(current, cpumask_of(cpu))) | 313 | |
314 | if (cpu == smp_processor_id()) { | ||
315 | cmci_rediscover_work_func(NULL); | ||
308 | continue; | 316 | continue; |
309 | /* Recheck banks in case CPUs don't all have the same */ | 317 | } |
310 | if (cmci_supported(&banks)) | ||
311 | cmci_discover(banks); | ||
312 | } | ||
313 | 318 | ||
314 | set_cpus_allowed_ptr(current, old); | 319 | work_on_cpu(cpu, cmci_rediscover_work_func, NULL); |
315 | free_cpumask_var(old); | 320 | } |
316 | } | 321 | } |
317 | 322 | ||
318 | /* | 323 | /* |
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 6b96110bb0c3..726bf963c227 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
@@ -606,7 +606,7 @@ void __init mtrr_bp_init(void) | |||
606 | 606 | ||
607 | /* | 607 | /* |
608 | * This is an AMD specific MSR, but we assume(hope?) that | 608 | * This is an AMD specific MSR, but we assume(hope?) that |
609 | * Intel will implement it to when they extend the address | 609 | * Intel will implement it too when they extend the address |
610 | * bus of the Xeon. | 610 | * bus of the Xeon. |
611 | */ | 611 | */ |
612 | if (cpuid_eax(0x80000000) >= 0x80000008) { | 612 | if (cpuid_eax(0x80000000) >= 0x80000008) { |
@@ -695,11 +695,16 @@ void mtrr_ap_init(void) | |||
695 | } | 695 | } |
696 | 696 | ||
697 | /** | 697 | /** |
698 | * Save current fixed-range MTRR state of the BSP | 698 | * Save current fixed-range MTRR state of the first cpu in cpu_online_mask. |
699 | */ | 699 | */ |
700 | void mtrr_save_state(void) | 700 | void mtrr_save_state(void) |
701 | { | 701 | { |
702 | smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1); | 702 | int first_cpu; |
703 | |||
704 | get_online_cpus(); | ||
705 | first_cpu = cpumask_first(cpu_online_mask); | ||
706 | smp_call_function_single(first_cpu, mtrr_save_fixed_ranges, NULL, 1); | ||
707 | put_online_cpus(); | ||
703 | } | 708 | } |
704 | 709 | ||
705 | void set_mtrr_aps_delayed_init(void) | 710 | void set_mtrr_aps_delayed_init(void) |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 4a3374e61a93..6774c17a5576 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -340,9 +340,6 @@ int x86_setup_perfctr(struct perf_event *event) | |||
340 | /* BTS is currently only allowed for user-mode. */ | 340 | /* BTS is currently only allowed for user-mode. */ |
341 | if (!attr->exclude_kernel) | 341 | if (!attr->exclude_kernel) |
342 | return -EOPNOTSUPP; | 342 | return -EOPNOTSUPP; |
343 | |||
344 | if (!attr->exclude_guest) | ||
345 | return -EOPNOTSUPP; | ||
346 | } | 343 | } |
347 | 344 | ||
348 | hwc->config |= config; | 345 | hwc->config |= config; |
@@ -385,9 +382,6 @@ int x86_pmu_hw_config(struct perf_event *event) | |||
385 | if (event->attr.precise_ip) { | 382 | if (event->attr.precise_ip) { |
386 | int precise = 0; | 383 | int precise = 0; |
387 | 384 | ||
388 | if (!event->attr.exclude_guest) | ||
389 | return -EOPNOTSUPP; | ||
390 | |||
391 | /* Support for constant skid */ | 385 | /* Support for constant skid */ |
392 | if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) { | 386 | if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) { |
393 | precise++; | 387 | precise++; |
@@ -1316,6 +1310,121 @@ static struct attribute_group x86_pmu_format_group = { | |||
1316 | .attrs = NULL, | 1310 | .attrs = NULL, |
1317 | }; | 1311 | }; |
1318 | 1312 | ||
1313 | struct perf_pmu_events_attr { | ||
1314 | struct device_attribute attr; | ||
1315 | u64 id; | ||
1316 | }; | ||
1317 | |||
1318 | /* | ||
1319 | * Remove all undefined events (x86_pmu.event_map(id) == 0) | ||
1320 | * out of events_attr attributes. | ||
1321 | */ | ||
1322 | static void __init filter_events(struct attribute **attrs) | ||
1323 | { | ||
1324 | int i, j; | ||
1325 | |||
1326 | for (i = 0; attrs[i]; i++) { | ||
1327 | if (x86_pmu.event_map(i)) | ||
1328 | continue; | ||
1329 | |||
1330 | for (j = i; attrs[j]; j++) | ||
1331 | attrs[j] = attrs[j + 1]; | ||
1332 | |||
1333 | /* Check the shifted attr. */ | ||
1334 | i--; | ||
1335 | } | ||
1336 | } | ||
1337 | |||
1338 | static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, | ||
1339 | char *page) | ||
1340 | { | ||
1341 | struct perf_pmu_events_attr *pmu_attr = \ | ||
1342 | container_of(attr, struct perf_pmu_events_attr, attr); | ||
1343 | |||
1344 | u64 config = x86_pmu.event_map(pmu_attr->id); | ||
1345 | return x86_pmu.events_sysfs_show(page, config); | ||
1346 | } | ||
1347 | |||
1348 | #define EVENT_VAR(_id) event_attr_##_id | ||
1349 | #define EVENT_PTR(_id) &event_attr_##_id.attr.attr | ||
1350 | |||
1351 | #define EVENT_ATTR(_name, _id) \ | ||
1352 | static struct perf_pmu_events_attr EVENT_VAR(_id) = { \ | ||
1353 | .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \ | ||
1354 | .id = PERF_COUNT_HW_##_id, \ | ||
1355 | }; | ||
1356 | |||
1357 | EVENT_ATTR(cpu-cycles, CPU_CYCLES ); | ||
1358 | EVENT_ATTR(instructions, INSTRUCTIONS ); | ||
1359 | EVENT_ATTR(cache-references, CACHE_REFERENCES ); | ||
1360 | EVENT_ATTR(cache-misses, CACHE_MISSES ); | ||
1361 | EVENT_ATTR(branch-instructions, BRANCH_INSTRUCTIONS ); | ||
1362 | EVENT_ATTR(branch-misses, BRANCH_MISSES ); | ||
1363 | EVENT_ATTR(bus-cycles, BUS_CYCLES ); | ||
1364 | EVENT_ATTR(stalled-cycles-frontend, STALLED_CYCLES_FRONTEND ); | ||
1365 | EVENT_ATTR(stalled-cycles-backend, STALLED_CYCLES_BACKEND ); | ||
1366 | EVENT_ATTR(ref-cycles, REF_CPU_CYCLES ); | ||
1367 | |||
1368 | static struct attribute *empty_attrs; | ||
1369 | |||
1370 | static struct attribute *events_attr[] = { | ||
1371 | EVENT_PTR(CPU_CYCLES), | ||
1372 | EVENT_PTR(INSTRUCTIONS), | ||
1373 | EVENT_PTR(CACHE_REFERENCES), | ||
1374 | EVENT_PTR(CACHE_MISSES), | ||
1375 | EVENT_PTR(BRANCH_INSTRUCTIONS), | ||
1376 | EVENT_PTR(BRANCH_MISSES), | ||
1377 | EVENT_PTR(BUS_CYCLES), | ||
1378 | EVENT_PTR(STALLED_CYCLES_FRONTEND), | ||
1379 | EVENT_PTR(STALLED_CYCLES_BACKEND), | ||
1380 | EVENT_PTR(REF_CPU_CYCLES), | ||
1381 | NULL, | ||
1382 | }; | ||
1383 | |||
1384 | static struct attribute_group x86_pmu_events_group = { | ||
1385 | .name = "events", | ||
1386 | .attrs = events_attr, | ||
1387 | }; | ||
1388 | |||
1389 | ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event) | ||
1390 | { | ||
1391 | u64 umask = (config & ARCH_PERFMON_EVENTSEL_UMASK) >> 8; | ||
1392 | u64 cmask = (config & ARCH_PERFMON_EVENTSEL_CMASK) >> 24; | ||
1393 | bool edge = (config & ARCH_PERFMON_EVENTSEL_EDGE); | ||
1394 | bool pc = (config & ARCH_PERFMON_EVENTSEL_PIN_CONTROL); | ||
1395 | bool any = (config & ARCH_PERFMON_EVENTSEL_ANY); | ||
1396 | bool inv = (config & ARCH_PERFMON_EVENTSEL_INV); | ||
1397 | ssize_t ret; | ||
1398 | |||
1399 | /* | ||
1400 | * We have whole page size to spend and just little data | ||
1401 | * to write, so we can safely use sprintf. | ||
1402 | */ | ||
1403 | ret = sprintf(page, "event=0x%02llx", event); | ||
1404 | |||
1405 | if (umask) | ||
1406 | ret += sprintf(page + ret, ",umask=0x%02llx", umask); | ||
1407 | |||
1408 | if (edge) | ||
1409 | ret += sprintf(page + ret, ",edge"); | ||
1410 | |||
1411 | if (pc) | ||
1412 | ret += sprintf(page + ret, ",pc"); | ||
1413 | |||
1414 | if (any) | ||
1415 | ret += sprintf(page + ret, ",any"); | ||
1416 | |||
1417 | if (inv) | ||
1418 | ret += sprintf(page + ret, ",inv"); | ||
1419 | |||
1420 | if (cmask) | ||
1421 | ret += sprintf(page + ret, ",cmask=0x%02llx", cmask); | ||
1422 | |||
1423 | ret += sprintf(page + ret, "\n"); | ||
1424 | |||
1425 | return ret; | ||
1426 | } | ||
1427 | |||
1319 | static int __init init_hw_perf_events(void) | 1428 | static int __init init_hw_perf_events(void) |
1320 | { | 1429 | { |
1321 | struct x86_pmu_quirk *quirk; | 1430 | struct x86_pmu_quirk *quirk; |
@@ -1362,6 +1471,11 @@ static int __init init_hw_perf_events(void) | |||
1362 | x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ | 1471 | x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ |
1363 | x86_pmu_format_group.attrs = x86_pmu.format_attrs; | 1472 | x86_pmu_format_group.attrs = x86_pmu.format_attrs; |
1364 | 1473 | ||
1474 | if (!x86_pmu.events_sysfs_show) | ||
1475 | x86_pmu_events_group.attrs = &empty_attrs; | ||
1476 | else | ||
1477 | filter_events(x86_pmu_events_group.attrs); | ||
1478 | |||
1365 | pr_info("... version: %d\n", x86_pmu.version); | 1479 | pr_info("... version: %d\n", x86_pmu.version); |
1366 | pr_info("... bit width: %d\n", x86_pmu.cntval_bits); | 1480 | pr_info("... bit width: %d\n", x86_pmu.cntval_bits); |
1367 | pr_info("... generic registers: %d\n", x86_pmu.num_counters); | 1481 | pr_info("... generic registers: %d\n", x86_pmu.num_counters); |
@@ -1651,6 +1765,7 @@ static struct attribute_group x86_pmu_attr_group = { | |||
1651 | static const struct attribute_group *x86_pmu_attr_groups[] = { | 1765 | static const struct attribute_group *x86_pmu_attr_groups[] = { |
1652 | &x86_pmu_attr_group, | 1766 | &x86_pmu_attr_group, |
1653 | &x86_pmu_format_group, | 1767 | &x86_pmu_format_group, |
1768 | &x86_pmu_events_group, | ||
1654 | NULL, | 1769 | NULL, |
1655 | }; | 1770 | }; |
1656 | 1771 | ||
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 271d25700297..115c1ea97746 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -354,6 +354,8 @@ struct x86_pmu { | |||
354 | int attr_rdpmc; | 354 | int attr_rdpmc; |
355 | struct attribute **format_attrs; | 355 | struct attribute **format_attrs; |
356 | 356 | ||
357 | ssize_t (*events_sysfs_show)(char *page, u64 config); | ||
358 | |||
357 | /* | 359 | /* |
358 | * CPU Hotplug hooks | 360 | * CPU Hotplug hooks |
359 | */ | 361 | */ |
@@ -536,6 +538,9 @@ static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip) | |||
536 | regs->ip = ip; | 538 | regs->ip = ip; |
537 | } | 539 | } |
538 | 540 | ||
541 | ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event); | ||
542 | ssize_t intel_event_sysfs_show(char *page, u64 config); | ||
543 | |||
539 | #ifdef CONFIG_CPU_SUP_AMD | 544 | #ifdef CONFIG_CPU_SUP_AMD |
540 | 545 | ||
541 | int amd_pmu_init(void); | 546 | int amd_pmu_init(void); |
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 4528ae7b6ec4..c93bc4e813a0 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -568,6 +568,14 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev | |||
568 | } | 568 | } |
569 | } | 569 | } |
570 | 570 | ||
571 | static ssize_t amd_event_sysfs_show(char *page, u64 config) | ||
572 | { | ||
573 | u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) | | ||
574 | (config & AMD64_EVENTSEL_EVENT) >> 24; | ||
575 | |||
576 | return x86_event_sysfs_show(page, config, event); | ||
577 | } | ||
578 | |||
571 | static __initconst const struct x86_pmu amd_pmu = { | 579 | static __initconst const struct x86_pmu amd_pmu = { |
572 | .name = "AMD", | 580 | .name = "AMD", |
573 | .handle_irq = x86_pmu_handle_irq, | 581 | .handle_irq = x86_pmu_handle_irq, |
@@ -591,6 +599,7 @@ static __initconst const struct x86_pmu amd_pmu = { | |||
591 | .put_event_constraints = amd_put_event_constraints, | 599 | .put_event_constraints = amd_put_event_constraints, |
592 | 600 | ||
593 | .format_attrs = amd_format_attr, | 601 | .format_attrs = amd_format_attr, |
602 | .events_sysfs_show = amd_event_sysfs_show, | ||
594 | 603 | ||
595 | .cpu_prepare = amd_pmu_cpu_prepare, | 604 | .cpu_prepare = amd_pmu_cpu_prepare, |
596 | .cpu_starting = amd_pmu_cpu_starting, | 605 | .cpu_starting = amd_pmu_cpu_starting, |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 324bb523d9d9..93b9e1181f83 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -1603,6 +1603,13 @@ static struct attribute *intel_arch_formats_attr[] = { | |||
1603 | NULL, | 1603 | NULL, |
1604 | }; | 1604 | }; |
1605 | 1605 | ||
1606 | ssize_t intel_event_sysfs_show(char *page, u64 config) | ||
1607 | { | ||
1608 | u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT); | ||
1609 | |||
1610 | return x86_event_sysfs_show(page, config, event); | ||
1611 | } | ||
1612 | |||
1606 | static __initconst const struct x86_pmu core_pmu = { | 1613 | static __initconst const struct x86_pmu core_pmu = { |
1607 | .name = "core", | 1614 | .name = "core", |
1608 | .handle_irq = x86_pmu_handle_irq, | 1615 | .handle_irq = x86_pmu_handle_irq, |
@@ -1628,6 +1635,7 @@ static __initconst const struct x86_pmu core_pmu = { | |||
1628 | .event_constraints = intel_core_event_constraints, | 1635 | .event_constraints = intel_core_event_constraints, |
1629 | .guest_get_msrs = core_guest_get_msrs, | 1636 | .guest_get_msrs = core_guest_get_msrs, |
1630 | .format_attrs = intel_arch_formats_attr, | 1637 | .format_attrs = intel_arch_formats_attr, |
1638 | .events_sysfs_show = intel_event_sysfs_show, | ||
1631 | }; | 1639 | }; |
1632 | 1640 | ||
1633 | struct intel_shared_regs *allocate_shared_regs(int cpu) | 1641 | struct intel_shared_regs *allocate_shared_regs(int cpu) |
@@ -1766,6 +1774,7 @@ static __initconst const struct x86_pmu intel_pmu = { | |||
1766 | .pebs_aliases = intel_pebs_aliases_core2, | 1774 | .pebs_aliases = intel_pebs_aliases_core2, |
1767 | 1775 | ||
1768 | .format_attrs = intel_arch3_formats_attr, | 1776 | .format_attrs = intel_arch3_formats_attr, |
1777 | .events_sysfs_show = intel_event_sysfs_show, | ||
1769 | 1778 | ||
1770 | .cpu_prepare = intel_pmu_cpu_prepare, | 1779 | .cpu_prepare = intel_pmu_cpu_prepare, |
1771 | .cpu_starting = intel_pmu_cpu_starting, | 1780 | .cpu_starting = intel_pmu_cpu_starting, |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 3cf3d97cce3a..b43200dbfe7e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c | |||
@@ -2500,7 +2500,7 @@ static bool pcidrv_registered; | |||
2500 | /* | 2500 | /* |
2501 | * add a pci uncore device | 2501 | * add a pci uncore device |
2502 | */ | 2502 | */ |
2503 | static int __devinit uncore_pci_add(struct intel_uncore_type *type, struct pci_dev *pdev) | 2503 | static int uncore_pci_add(struct intel_uncore_type *type, struct pci_dev *pdev) |
2504 | { | 2504 | { |
2505 | struct intel_uncore_pmu *pmu; | 2505 | struct intel_uncore_pmu *pmu; |
2506 | struct intel_uncore_box *box; | 2506 | struct intel_uncore_box *box; |
@@ -2571,8 +2571,8 @@ static void uncore_pci_remove(struct pci_dev *pdev) | |||
2571 | kfree(box); | 2571 | kfree(box); |
2572 | } | 2572 | } |
2573 | 2573 | ||
2574 | static int __devinit uncore_pci_probe(struct pci_dev *pdev, | 2574 | static int uncore_pci_probe(struct pci_dev *pdev, |
2575 | const struct pci_device_id *id) | 2575 | const struct pci_device_id *id) |
2576 | { | 2576 | { |
2577 | struct intel_uncore_type *type; | 2577 | struct intel_uncore_type *type; |
2578 | 2578 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index 7d0270bd793e..f2af39f5dc3d 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c | |||
@@ -227,6 +227,8 @@ static __initconst const struct x86_pmu p6_pmu = { | |||
227 | .event_constraints = p6_event_constraints, | 227 | .event_constraints = p6_event_constraints, |
228 | 228 | ||
229 | .format_attrs = intel_p6_formats_attr, | 229 | .format_attrs = intel_p6_formats_attr, |
230 | .events_sysfs_show = intel_event_sysfs_show, | ||
231 | |||
230 | }; | 232 | }; |
231 | 233 | ||
232 | __init int p6_pmu_init(void) | 234 | __init int p6_pmu_init(void) |
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index fbd895562292..3286a92e662a 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c | |||
@@ -26,11 +26,6 @@ static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c, | |||
26 | #ifdef CONFIG_X86_32 | 26 | #ifdef CONFIG_X86_32 |
27 | static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) | 27 | static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) |
28 | { | 28 | { |
29 | /* | ||
30 | * We use exception 16 if we have hardware math and we've either seen | ||
31 | * it or the CPU claims it is internal | ||
32 | */ | ||
33 | int fpu_exception = c->hard_math && (ignore_fpu_irq || cpu_has_fpu); | ||
34 | seq_printf(m, | 29 | seq_printf(m, |
35 | "fdiv_bug\t: %s\n" | 30 | "fdiv_bug\t: %s\n" |
36 | "hlt_bug\t\t: %s\n" | 31 | "hlt_bug\t\t: %s\n" |
@@ -45,7 +40,7 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) | |||
45 | c->f00f_bug ? "yes" : "no", | 40 | c->f00f_bug ? "yes" : "no", |
46 | c->coma_bug ? "yes" : "no", | 41 | c->coma_bug ? "yes" : "no", |
47 | c->hard_math ? "yes" : "no", | 42 | c->hard_math ? "yes" : "no", |
48 | fpu_exception ? "yes" : "no", | 43 | c->hard_math ? "yes" : "no", |
49 | c->cpuid_level, | 44 | c->cpuid_level, |
50 | c->wp_works_ok ? "yes" : "no"); | 45 | c->wp_works_ok ? "yes" : "no"); |
51 | } | 46 | } |
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 13ad89971d47..74467feb4dc5 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/delay.h> | 16 | #include <linux/delay.h> |
17 | #include <linux/elf.h> | 17 | #include <linux/elf.h> |
18 | #include <linux/elfcore.h> | 18 | #include <linux/elfcore.h> |
19 | #include <linux/module.h> | ||
19 | 20 | ||
20 | #include <asm/processor.h> | 21 | #include <asm/processor.h> |
21 | #include <asm/hardirq.h> | 22 | #include <asm/hardirq.h> |
@@ -30,6 +31,27 @@ | |||
30 | 31 | ||
31 | int in_crash_kexec; | 32 | int in_crash_kexec; |
32 | 33 | ||
34 | /* | ||
35 | * This is used to VMCLEAR all VMCSs loaded on the | ||
36 | * processor. And when loading kvm_intel module, the | ||
37 | * callback function pointer will be assigned. | ||
38 | * | ||
39 | * protected by rcu. | ||
40 | */ | ||
41 | crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL; | ||
42 | EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss); | ||
43 | |||
44 | static inline void cpu_crash_vmclear_loaded_vmcss(void) | ||
45 | { | ||
46 | crash_vmclear_fn *do_vmclear_operation = NULL; | ||
47 | |||
48 | rcu_read_lock(); | ||
49 | do_vmclear_operation = rcu_dereference(crash_vmclear_loaded_vmcss); | ||
50 | if (do_vmclear_operation) | ||
51 | do_vmclear_operation(); | ||
52 | rcu_read_unlock(); | ||
53 | } | ||
54 | |||
33 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) | 55 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) |
34 | 56 | ||
35 | static void kdump_nmi_callback(int cpu, struct pt_regs *regs) | 57 | static void kdump_nmi_callback(int cpu, struct pt_regs *regs) |
@@ -46,6 +68,11 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs) | |||
46 | #endif | 68 | #endif |
47 | crash_save_cpu(regs, cpu); | 69 | crash_save_cpu(regs, cpu); |
48 | 70 | ||
71 | /* | ||
72 | * VMCLEAR VMCSs loaded on all cpus if needed. | ||
73 | */ | ||
74 | cpu_crash_vmclear_loaded_vmcss(); | ||
75 | |||
49 | /* Disable VMX or SVM if needed. | 76 | /* Disable VMX or SVM if needed. |
50 | * | 77 | * |
51 | * We need to disable virtualization on all CPUs. | 78 | * We need to disable virtualization on all CPUs. |
@@ -88,6 +115,11 @@ void native_machine_crash_shutdown(struct pt_regs *regs) | |||
88 | 115 | ||
89 | kdump_nmi_shootdown_cpus(); | 116 | kdump_nmi_shootdown_cpus(); |
90 | 117 | ||
118 | /* | ||
119 | * VMCLEAR VMCSs loaded on this cpu if needed. | ||
120 | */ | ||
121 | cpu_crash_vmclear_loaded_vmcss(); | ||
122 | |||
91 | /* Booting kdump kernel with VMX or SVM enabled won't work, | 123 | /* Booting kdump kernel with VMX or SVM enabled won't work, |
92 | * because (among other limitations) we can't disable paging | 124 | * because (among other limitations) we can't disable paging |
93 | * with the virt flags. | 125 | * with the virt flags. |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 88b725aa1d52..6ed91d9980e2 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -739,30 +739,11 @@ ENTRY(ptregs_##name) ; \ | |||
739 | ENDPROC(ptregs_##name) | 739 | ENDPROC(ptregs_##name) |
740 | 740 | ||
741 | PTREGSCALL1(iopl) | 741 | PTREGSCALL1(iopl) |
742 | PTREGSCALL0(fork) | ||
743 | PTREGSCALL0(vfork) | ||
744 | PTREGSCALL2(sigaltstack) | ||
745 | PTREGSCALL0(sigreturn) | 742 | PTREGSCALL0(sigreturn) |
746 | PTREGSCALL0(rt_sigreturn) | 743 | PTREGSCALL0(rt_sigreturn) |
747 | PTREGSCALL2(vm86) | 744 | PTREGSCALL2(vm86) |
748 | PTREGSCALL1(vm86old) | 745 | PTREGSCALL1(vm86old) |
749 | 746 | ||
750 | /* Clone is an oddball. The 4th arg is in %edi */ | ||
751 | ENTRY(ptregs_clone) | ||
752 | CFI_STARTPROC | ||
753 | leal 4(%esp),%eax | ||
754 | pushl_cfi %eax | ||
755 | pushl_cfi PT_EDI(%eax) | ||
756 | movl PT_EDX(%eax),%ecx | ||
757 | movl PT_ECX(%eax),%edx | ||
758 | movl PT_EBX(%eax),%eax | ||
759 | call sys_clone | ||
760 | addl $8,%esp | ||
761 | CFI_ADJUST_CFA_OFFSET -8 | ||
762 | ret | ||
763 | CFI_ENDPROC | ||
764 | ENDPROC(ptregs_clone) | ||
765 | |||
766 | .macro FIXUP_ESPFIX_STACK | 747 | .macro FIXUP_ESPFIX_STACK |
767 | /* | 748 | /* |
768 | * Switch back for ESPFIX stack to the normal zerobased stack | 749 | * Switch back for ESPFIX stack to the normal zerobased stack |
@@ -1084,7 +1065,6 @@ ENTRY(xen_failsafe_callback) | |||
1084 | lea 16(%esp),%esp | 1065 | lea 16(%esp),%esp |
1085 | CFI_ADJUST_CFA_OFFSET -16 | 1066 | CFI_ADJUST_CFA_OFFSET -16 |
1086 | jz 5f | 1067 | jz 5f |
1087 | addl $16,%esp | ||
1088 | jmp iret_exc | 1068 | jmp iret_exc |
1089 | 5: pushl_cfi $-1 /* orig_ax = -1 => not a system call */ | 1069 | 5: pushl_cfi $-1 /* orig_ax = -1 => not a system call */ |
1090 | SAVE_ALL | 1070 | SAVE_ALL |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index b51b2c7ee51f..07a7a04529bc 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -56,7 +56,7 @@ | |||
56 | #include <asm/ftrace.h> | 56 | #include <asm/ftrace.h> |
57 | #include <asm/percpu.h> | 57 | #include <asm/percpu.h> |
58 | #include <asm/asm.h> | 58 | #include <asm/asm.h> |
59 | #include <asm/rcu.h> | 59 | #include <asm/context_tracking.h> |
60 | #include <asm/smap.h> | 60 | #include <asm/smap.h> |
61 | #include <linux/err.h> | 61 | #include <linux/err.h> |
62 | 62 | ||
@@ -845,10 +845,25 @@ ENTRY(\label) | |||
845 | END(\label) | 845 | END(\label) |
846 | .endm | 846 | .endm |
847 | 847 | ||
848 | PTREGSCALL stub_clone, sys_clone, %r8 | 848 | .macro FORK_LIKE func |
849 | PTREGSCALL stub_fork, sys_fork, %rdi | 849 | ENTRY(stub_\func) |
850 | PTREGSCALL stub_vfork, sys_vfork, %rdi | 850 | CFI_STARTPROC |
851 | PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx | 851 | popq %r11 /* save return address */ |
852 | PARTIAL_FRAME 0 | ||
853 | SAVE_REST | ||
854 | pushq %r11 /* put it back on stack */ | ||
855 | FIXUP_TOP_OF_STACK %r11, 8 | ||
856 | DEFAULT_FRAME 0 8 /* offset 8: return address */ | ||
857 | call sys_\func | ||
858 | RESTORE_TOP_OF_STACK %r11, 8 | ||
859 | ret $REST_SKIP /* pop extended registers */ | ||
860 | CFI_ENDPROC | ||
861 | END(stub_\func) | ||
862 | .endm | ||
863 | |||
864 | FORK_LIKE clone | ||
865 | FORK_LIKE fork | ||
866 | FORK_LIKE vfork | ||
852 | PTREGSCALL stub_iopl, sys_iopl, %rsi | 867 | PTREGSCALL stub_iopl, sys_iopl, %rsi |
853 | 868 | ||
854 | ENTRY(ptregscall_common) | 869 | ENTRY(ptregscall_common) |
@@ -897,8 +912,6 @@ ENTRY(stub_rt_sigreturn) | |||
897 | END(stub_rt_sigreturn) | 912 | END(stub_rt_sigreturn) |
898 | 913 | ||
899 | #ifdef CONFIG_X86_X32_ABI | 914 | #ifdef CONFIG_X86_X32_ABI |
900 | PTREGSCALL stub_x32_sigaltstack, sys32_sigaltstack, %rdx | ||
901 | |||
902 | ENTRY(stub_x32_rt_sigreturn) | 915 | ENTRY(stub_x32_rt_sigreturn) |
903 | CFI_STARTPROC | 916 | CFI_STARTPROC |
904 | addq $8, %rsp | 917 | addq $8, %rsp |
@@ -995,8 +1008,8 @@ END(interrupt) | |||
995 | */ | 1008 | */ |
996 | .p2align CONFIG_X86_L1_CACHE_SHIFT | 1009 | .p2align CONFIG_X86_L1_CACHE_SHIFT |
997 | common_interrupt: | 1010 | common_interrupt: |
998 | ASM_CLAC | ||
999 | XCPT_FRAME | 1011 | XCPT_FRAME |
1012 | ASM_CLAC | ||
1000 | addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ | 1013 | addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ |
1001 | interrupt do_IRQ | 1014 | interrupt do_IRQ |
1002 | /* 0(%rsp): old_rsp-ARGOFFSET */ | 1015 | /* 0(%rsp): old_rsp-ARGOFFSET */ |
@@ -1135,8 +1148,8 @@ END(common_interrupt) | |||
1135 | */ | 1148 | */ |
1136 | .macro apicinterrupt num sym do_sym | 1149 | .macro apicinterrupt num sym do_sym |
1137 | ENTRY(\sym) | 1150 | ENTRY(\sym) |
1138 | ASM_CLAC | ||
1139 | INTR_FRAME | 1151 | INTR_FRAME |
1152 | ASM_CLAC | ||
1140 | pushq_cfi $~(\num) | 1153 | pushq_cfi $~(\num) |
1141 | .Lcommon_\sym: | 1154 | .Lcommon_\sym: |
1142 | interrupt \do_sym | 1155 | interrupt \do_sym |
@@ -1190,8 +1203,8 @@ apicinterrupt IRQ_WORK_VECTOR \ | |||
1190 | */ | 1203 | */ |
1191 | .macro zeroentry sym do_sym | 1204 | .macro zeroentry sym do_sym |
1192 | ENTRY(\sym) | 1205 | ENTRY(\sym) |
1193 | ASM_CLAC | ||
1194 | INTR_FRAME | 1206 | INTR_FRAME |
1207 | ASM_CLAC | ||
1195 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1208 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
1196 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ | 1209 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ |
1197 | subq $ORIG_RAX-R15, %rsp | 1210 | subq $ORIG_RAX-R15, %rsp |
@@ -1208,8 +1221,8 @@ END(\sym) | |||
1208 | 1221 | ||
1209 | .macro paranoidzeroentry sym do_sym | 1222 | .macro paranoidzeroentry sym do_sym |
1210 | ENTRY(\sym) | 1223 | ENTRY(\sym) |
1211 | ASM_CLAC | ||
1212 | INTR_FRAME | 1224 | INTR_FRAME |
1225 | ASM_CLAC | ||
1213 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1226 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
1214 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ | 1227 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ |
1215 | subq $ORIG_RAX-R15, %rsp | 1228 | subq $ORIG_RAX-R15, %rsp |
@@ -1227,8 +1240,8 @@ END(\sym) | |||
1227 | #define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8) | 1240 | #define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8) |
1228 | .macro paranoidzeroentry_ist sym do_sym ist | 1241 | .macro paranoidzeroentry_ist sym do_sym ist |
1229 | ENTRY(\sym) | 1242 | ENTRY(\sym) |
1230 | ASM_CLAC | ||
1231 | INTR_FRAME | 1243 | INTR_FRAME |
1244 | ASM_CLAC | ||
1232 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1245 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
1233 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ | 1246 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ |
1234 | subq $ORIG_RAX-R15, %rsp | 1247 | subq $ORIG_RAX-R15, %rsp |
@@ -1247,8 +1260,8 @@ END(\sym) | |||
1247 | 1260 | ||
1248 | .macro errorentry sym do_sym | 1261 | .macro errorentry sym do_sym |
1249 | ENTRY(\sym) | 1262 | ENTRY(\sym) |
1250 | ASM_CLAC | ||
1251 | XCPT_FRAME | 1263 | XCPT_FRAME |
1264 | ASM_CLAC | ||
1252 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1265 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
1253 | subq $ORIG_RAX-R15, %rsp | 1266 | subq $ORIG_RAX-R15, %rsp |
1254 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 | 1267 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
@@ -1266,8 +1279,8 @@ END(\sym) | |||
1266 | /* error code is on the stack already */ | 1279 | /* error code is on the stack already */ |
1267 | .macro paranoiderrorentry sym do_sym | 1280 | .macro paranoiderrorentry sym do_sym |
1268 | ENTRY(\sym) | 1281 | ENTRY(\sym) |
1269 | ASM_CLAC | ||
1270 | XCPT_FRAME | 1282 | XCPT_FRAME |
1283 | ASM_CLAC | ||
1271 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1284 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
1272 | subq $ORIG_RAX-R15, %rsp | 1285 | subq $ORIG_RAX-R15, %rsp |
1273 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 | 1286 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
@@ -1699,9 +1712,10 @@ nested_nmi: | |||
1699 | 1712 | ||
1700 | 1: | 1713 | 1: |
1701 | /* Set up the interrupted NMIs stack to jump to repeat_nmi */ | 1714 | /* Set up the interrupted NMIs stack to jump to repeat_nmi */ |
1702 | leaq -6*8(%rsp), %rdx | 1715 | leaq -1*8(%rsp), %rdx |
1703 | movq %rdx, %rsp | 1716 | movq %rdx, %rsp |
1704 | CFI_ADJUST_CFA_OFFSET 6*8 | 1717 | CFI_ADJUST_CFA_OFFSET 1*8 |
1718 | leaq -10*8(%rsp), %rdx | ||
1705 | pushq_cfi $__KERNEL_DS | 1719 | pushq_cfi $__KERNEL_DS |
1706 | pushq_cfi %rdx | 1720 | pushq_cfi %rdx |
1707 | pushfq_cfi | 1721 | pushfq_cfi |
@@ -1709,8 +1723,8 @@ nested_nmi: | |||
1709 | pushq_cfi $repeat_nmi | 1723 | pushq_cfi $repeat_nmi |
1710 | 1724 | ||
1711 | /* Put stack back */ | 1725 | /* Put stack back */ |
1712 | addq $(11*8), %rsp | 1726 | addq $(6*8), %rsp |
1713 | CFI_ADJUST_CFA_OFFSET -11*8 | 1727 | CFI_ADJUST_CFA_OFFSET -6*8 |
1714 | 1728 | ||
1715 | nested_nmi_out: | 1729 | nested_nmi_out: |
1716 | popq_cfi %rdx | 1730 | popq_cfi %rdx |
@@ -1736,18 +1750,18 @@ first_nmi: | |||
1736 | * +-------------------------+ | 1750 | * +-------------------------+ |
1737 | * | NMI executing variable | | 1751 | * | NMI executing variable | |
1738 | * +-------------------------+ | 1752 | * +-------------------------+ |
1739 | * | Saved SS | | ||
1740 | * | Saved Return RSP | | ||
1741 | * | Saved RFLAGS | | ||
1742 | * | Saved CS | | ||
1743 | * | Saved RIP | | ||
1744 | * +-------------------------+ | ||
1745 | * | copied SS | | 1753 | * | copied SS | |
1746 | * | copied Return RSP | | 1754 | * | copied Return RSP | |
1747 | * | copied RFLAGS | | 1755 | * | copied RFLAGS | |
1748 | * | copied CS | | 1756 | * | copied CS | |
1749 | * | copied RIP | | 1757 | * | copied RIP | |
1750 | * +-------------------------+ | 1758 | * +-------------------------+ |
1759 | * | Saved SS | | ||
1760 | * | Saved Return RSP | | ||
1761 | * | Saved RFLAGS | | ||
1762 | * | Saved CS | | ||
1763 | * | Saved RIP | | ||
1764 | * +-------------------------+ | ||
1751 | * | pt_regs | | 1765 | * | pt_regs | |
1752 | * +-------------------------+ | 1766 | * +-------------------------+ |
1753 | * | 1767 | * |
@@ -1763,9 +1777,14 @@ first_nmi: | |||
1763 | /* Set the NMI executing variable on the stack. */ | 1777 | /* Set the NMI executing variable on the stack. */ |
1764 | pushq_cfi $1 | 1778 | pushq_cfi $1 |
1765 | 1779 | ||
1780 | /* | ||
1781 | * Leave room for the "copied" frame | ||
1782 | */ | ||
1783 | subq $(5*8), %rsp | ||
1784 | |||
1766 | /* Copy the stack frame to the Saved frame */ | 1785 | /* Copy the stack frame to the Saved frame */ |
1767 | .rept 5 | 1786 | .rept 5 |
1768 | pushq_cfi 6*8(%rsp) | 1787 | pushq_cfi 11*8(%rsp) |
1769 | .endr | 1788 | .endr |
1770 | CFI_DEF_CFA_OFFSET SS+8-RIP | 1789 | CFI_DEF_CFA_OFFSET SS+8-RIP |
1771 | 1790 | ||
@@ -1786,12 +1805,15 @@ repeat_nmi: | |||
1786 | * is benign for the non-repeat case, where 1 was pushed just above | 1805 | * is benign for the non-repeat case, where 1 was pushed just above |
1787 | * to this very stack slot). | 1806 | * to this very stack slot). |
1788 | */ | 1807 | */ |
1789 | movq $1, 5*8(%rsp) | 1808 | movq $1, 10*8(%rsp) |
1790 | 1809 | ||
1791 | /* Make another copy, this one may be modified by nested NMIs */ | 1810 | /* Make another copy, this one may be modified by nested NMIs */ |
1811 | addq $(10*8), %rsp | ||
1812 | CFI_ADJUST_CFA_OFFSET -10*8 | ||
1792 | .rept 5 | 1813 | .rept 5 |
1793 | pushq_cfi 4*8(%rsp) | 1814 | pushq_cfi -6*8(%rsp) |
1794 | .endr | 1815 | .endr |
1816 | subq $(5*8), %rsp | ||
1795 | CFI_DEF_CFA_OFFSET SS+8-RIP | 1817 | CFI_DEF_CFA_OFFSET SS+8-RIP |
1796 | end_repeat_nmi: | 1818 | end_repeat_nmi: |
1797 | 1819 | ||
@@ -1842,8 +1864,12 @@ nmi_swapgs: | |||
1842 | SWAPGS_UNSAFE_STACK | 1864 | SWAPGS_UNSAFE_STACK |
1843 | nmi_restore: | 1865 | nmi_restore: |
1844 | RESTORE_ALL 8 | 1866 | RESTORE_ALL 8 |
1867 | |||
1868 | /* Pop the extra iret frame */ | ||
1869 | addq $(5*8), %rsp | ||
1870 | |||
1845 | /* Clear the NMI executing stack variable */ | 1871 | /* Clear the NMI executing stack variable */ |
1846 | movq $0, 10*8(%rsp) | 1872 | movq $0, 5*8(%rsp) |
1847 | jmp irq_return | 1873 | jmp irq_return |
1848 | CFI_ENDPROC | 1874 | CFI_ENDPROC |
1849 | END(nmi) | 1875 | END(nmi) |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 957a47aec64e..8e7f6556028f 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -266,6 +266,19 @@ num_subarch_entries = (. - subarch_entries) / 4 | |||
266 | jmp default_entry | 266 | jmp default_entry |
267 | #endif /* CONFIG_PARAVIRT */ | 267 | #endif /* CONFIG_PARAVIRT */ |
268 | 268 | ||
269 | #ifdef CONFIG_HOTPLUG_CPU | ||
270 | /* | ||
271 | * Boot CPU0 entry point. It's called from play_dead(). Everything has been set | ||
272 | * up already except stack. We just set up stack here. Then call | ||
273 | * start_secondary(). | ||
274 | */ | ||
275 | ENTRY(start_cpu0) | ||
276 | movl stack_start, %ecx | ||
277 | movl %ecx, %esp | ||
278 | jmp *(initial_code) | ||
279 | ENDPROC(start_cpu0) | ||
280 | #endif | ||
281 | |||
269 | /* | 282 | /* |
270 | * Non-boot CPU entry point; entered from trampoline.S | 283 | * Non-boot CPU entry point; entered from trampoline.S |
271 | * We can't lgdt here, because lgdt itself uses a data segment, but | 284 | * We can't lgdt here, because lgdt itself uses a data segment, but |
@@ -292,8 +305,8 @@ default_entry: | |||
292 | * be using the global pages. | 305 | * be using the global pages. |
293 | * | 306 | * |
294 | * NOTE! If we are on a 486 we may have no cr4 at all! | 307 | * NOTE! If we are on a 486 we may have no cr4 at all! |
295 | * Specifically, cr4 exists if and only if CPUID exists, | 308 | * Specifically, cr4 exists if and only if CPUID exists |
296 | * which in turn exists if and only if EFLAGS.ID exists. | 309 | * and has flags other than the FPU flag set. |
297 | */ | 310 | */ |
298 | movl $X86_EFLAGS_ID,%ecx | 311 | movl $X86_EFLAGS_ID,%ecx |
299 | pushl %ecx | 312 | pushl %ecx |
@@ -308,6 +321,11 @@ default_entry: | |||
308 | testl %ecx,%eax | 321 | testl %ecx,%eax |
309 | jz 6f # No ID flag = no CPUID = no CR4 | 322 | jz 6f # No ID flag = no CPUID = no CR4 |
310 | 323 | ||
324 | movl $1,%eax | ||
325 | cpuid | ||
326 | andl $~1,%edx # Ignore CPUID.FPU | ||
327 | jz 6f # No flags or only CPUID.FPU = no CR4 | ||
328 | |||
311 | movl pa(mmu_cr4_features),%eax | 329 | movl pa(mmu_cr4_features),%eax |
312 | movl %eax,%cr4 | 330 | movl %eax,%cr4 |
313 | 331 | ||
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 94bf9cc2c7ee..980053c4b9cc 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -252,6 +252,22 @@ ENTRY(secondary_startup_64) | |||
252 | pushq %rax # target address in negative space | 252 | pushq %rax # target address in negative space |
253 | lretq | 253 | lretq |
254 | 254 | ||
255 | #ifdef CONFIG_HOTPLUG_CPU | ||
256 | /* | ||
257 | * Boot CPU0 entry point. It's called from play_dead(). Everything has been set | ||
258 | * up already except stack. We just set up stack here. Then call | ||
259 | * start_secondary(). | ||
260 | */ | ||
261 | ENTRY(start_cpu0) | ||
262 | movq stack_start(%rip),%rsp | ||
263 | movq initial_code(%rip),%rax | ||
264 | pushq $0 # fake return address to stop unwinder | ||
265 | pushq $__KERNEL_CS # set correct cs | ||
266 | pushq %rax # target address in negative space | ||
267 | lretq | ||
268 | ENDPROC(start_cpu0) | ||
269 | #endif | ||
270 | |||
255 | /* SMP bootup changes these two */ | 271 | /* SMP bootup changes these two */ |
256 | __REFDATA | 272 | __REFDATA |
257 | .align 8 | 273 | .align 8 |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 1460a5df92f7..e28670f9a589 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -434,7 +434,7 @@ void hpet_msi_unmask(struct irq_data *data) | |||
434 | 434 | ||
435 | /* unmask it */ | 435 | /* unmask it */ |
436 | cfg = hpet_readl(HPET_Tn_CFG(hdev->num)); | 436 | cfg = hpet_readl(HPET_Tn_CFG(hdev->num)); |
437 | cfg |= HPET_TN_FSB; | 437 | cfg |= HPET_TN_ENABLE | HPET_TN_FSB; |
438 | hpet_writel(cfg, HPET_Tn_CFG(hdev->num)); | 438 | hpet_writel(cfg, HPET_Tn_CFG(hdev->num)); |
439 | } | 439 | } |
440 | 440 | ||
@@ -445,7 +445,7 @@ void hpet_msi_mask(struct irq_data *data) | |||
445 | 445 | ||
446 | /* mask it */ | 446 | /* mask it */ |
447 | cfg = hpet_readl(HPET_Tn_CFG(hdev->num)); | 447 | cfg = hpet_readl(HPET_Tn_CFG(hdev->num)); |
448 | cfg &= ~HPET_TN_FSB; | 448 | cfg &= ~(HPET_TN_ENABLE | HPET_TN_FSB); |
449 | hpet_writel(cfg, HPET_Tn_CFG(hdev->num)); | 449 | hpet_writel(cfg, HPET_Tn_CFG(hdev->num)); |
450 | } | 450 | } |
451 | 451 | ||
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 675a05012449..245a71db401a 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
@@ -175,7 +175,11 @@ void __cpuinit fpu_init(void) | |||
175 | cr0 |= X86_CR0_EM; | 175 | cr0 |= X86_CR0_EM; |
176 | write_cr0(cr0); | 176 | write_cr0(cr0); |
177 | 177 | ||
178 | if (!smp_processor_id()) | 178 | /* |
179 | * init_thread_xstate is only called once to avoid overriding | ||
180 | * xstate_size during boot time or during CPU hotplug. | ||
181 | */ | ||
182 | if (xstate_size == 0) | ||
179 | init_thread_xstate(); | 183 | init_thread_xstate(); |
180 | 184 | ||
181 | mxcsr_feature_mask_init(); | 185 | mxcsr_feature_mask_init(); |
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 6e03b0d69138..7dc4e459c2b3 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
@@ -42,39 +42,6 @@ | |||
42 | * (these are usually mapped into the 0x30-0xff vector range) | 42 | * (these are usually mapped into the 0x30-0xff vector range) |
43 | */ | 43 | */ |
44 | 44 | ||
45 | #ifdef CONFIG_X86_32 | ||
46 | /* | ||
47 | * Note that on a 486, we don't want to do a SIGFPE on an irq13 | ||
48 | * as the irq is unreliable, and exception 16 works correctly | ||
49 | * (ie as explained in the intel literature). On a 386, you | ||
50 | * can't use exception 16 due to bad IBM design, so we have to | ||
51 | * rely on the less exact irq13. | ||
52 | * | ||
53 | * Careful.. Not only is IRQ13 unreliable, but it is also | ||
54 | * leads to races. IBM designers who came up with it should | ||
55 | * be shot. | ||
56 | */ | ||
57 | |||
58 | static irqreturn_t math_error_irq(int cpl, void *dev_id) | ||
59 | { | ||
60 | outb(0, 0xF0); | ||
61 | if (ignore_fpu_irq || !boot_cpu_data.hard_math) | ||
62 | return IRQ_NONE; | ||
63 | math_error(get_irq_regs(), 0, X86_TRAP_MF); | ||
64 | return IRQ_HANDLED; | ||
65 | } | ||
66 | |||
67 | /* | ||
68 | * New motherboards sometimes make IRQ 13 be a PCI interrupt, | ||
69 | * so allow interrupt sharing. | ||
70 | */ | ||
71 | static struct irqaction fpu_irq = { | ||
72 | .handler = math_error_irq, | ||
73 | .name = "fpu", | ||
74 | .flags = IRQF_NO_THREAD, | ||
75 | }; | ||
76 | #endif | ||
77 | |||
78 | /* | 45 | /* |
79 | * IRQ2 is cascade interrupt to second interrupt controller | 46 | * IRQ2 is cascade interrupt to second interrupt controller |
80 | */ | 47 | */ |
@@ -242,13 +209,6 @@ void __init native_init_IRQ(void) | |||
242 | setup_irq(2, &irq2); | 209 | setup_irq(2, &irq2); |
243 | 210 | ||
244 | #ifdef CONFIG_X86_32 | 211 | #ifdef CONFIG_X86_32 |
245 | /* | ||
246 | * External FPU? Set up irq13 if so, for | ||
247 | * original braindamaged IBM FERR coupling. | ||
248 | */ | ||
249 | if (boot_cpu_data.hard_math && !cpu_has_fpu) | ||
250 | setup_irq(FPU_IRQ, &fpu_irq); | ||
251 | |||
252 | irq_ctx_init(smp_processor_id()); | 212 | irq_ctx_init(smp_processor_id()); |
253 | #endif | 213 | #endif |
254 | } | 214 | } |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 4180a874c764..9c2bd8bd4b4c 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -42,6 +42,8 @@ | |||
42 | #include <asm/apic.h> | 42 | #include <asm/apic.h> |
43 | #include <asm/apicdef.h> | 43 | #include <asm/apicdef.h> |
44 | #include <asm/hypervisor.h> | 44 | #include <asm/hypervisor.h> |
45 | #include <asm/kvm_guest.h> | ||
46 | #include <asm/context_tracking.h> | ||
45 | 47 | ||
46 | static int kvmapf = 1; | 48 | static int kvmapf = 1; |
47 | 49 | ||
@@ -62,6 +64,15 @@ static int parse_no_stealacc(char *arg) | |||
62 | 64 | ||
63 | early_param("no-steal-acc", parse_no_stealacc); | 65 | early_param("no-steal-acc", parse_no_stealacc); |
64 | 66 | ||
67 | static int kvmclock_vsyscall = 1; | ||
68 | static int parse_no_kvmclock_vsyscall(char *arg) | ||
69 | { | ||
70 | kvmclock_vsyscall = 0; | ||
71 | return 0; | ||
72 | } | ||
73 | |||
74 | early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall); | ||
75 | |||
65 | static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); | 76 | static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); |
66 | static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64); | 77 | static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64); |
67 | static int has_steal_clock = 0; | 78 | static int has_steal_clock = 0; |
@@ -110,11 +121,8 @@ void kvm_async_pf_task_wait(u32 token) | |||
110 | struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; | 121 | struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; |
111 | struct kvm_task_sleep_node n, *e; | 122 | struct kvm_task_sleep_node n, *e; |
112 | DEFINE_WAIT(wait); | 123 | DEFINE_WAIT(wait); |
113 | int cpu, idle; | ||
114 | 124 | ||
115 | cpu = get_cpu(); | 125 | rcu_irq_enter(); |
116 | idle = idle_cpu(cpu); | ||
117 | put_cpu(); | ||
118 | 126 | ||
119 | spin_lock(&b->lock); | 127 | spin_lock(&b->lock); |
120 | e = _find_apf_task(b, token); | 128 | e = _find_apf_task(b, token); |
@@ -123,12 +131,14 @@ void kvm_async_pf_task_wait(u32 token) | |||
123 | hlist_del(&e->link); | 131 | hlist_del(&e->link); |
124 | kfree(e); | 132 | kfree(e); |
125 | spin_unlock(&b->lock); | 133 | spin_unlock(&b->lock); |
134 | |||
135 | rcu_irq_exit(); | ||
126 | return; | 136 | return; |
127 | } | 137 | } |
128 | 138 | ||
129 | n.token = token; | 139 | n.token = token; |
130 | n.cpu = smp_processor_id(); | 140 | n.cpu = smp_processor_id(); |
131 | n.halted = idle || preempt_count() > 1; | 141 | n.halted = is_idle_task(current) || preempt_count() > 1; |
132 | init_waitqueue_head(&n.wq); | 142 | init_waitqueue_head(&n.wq); |
133 | hlist_add_head(&n.link, &b->list); | 143 | hlist_add_head(&n.link, &b->list); |
134 | spin_unlock(&b->lock); | 144 | spin_unlock(&b->lock); |
@@ -147,13 +157,16 @@ void kvm_async_pf_task_wait(u32 token) | |||
147 | /* | 157 | /* |
148 | * We cannot reschedule. So halt. | 158 | * We cannot reschedule. So halt. |
149 | */ | 159 | */ |
160 | rcu_irq_exit(); | ||
150 | native_safe_halt(); | 161 | native_safe_halt(); |
162 | rcu_irq_enter(); | ||
151 | local_irq_disable(); | 163 | local_irq_disable(); |
152 | } | 164 | } |
153 | } | 165 | } |
154 | if (!n.halted) | 166 | if (!n.halted) |
155 | finish_wait(&n.wq, &wait); | 167 | finish_wait(&n.wq, &wait); |
156 | 168 | ||
169 | rcu_irq_exit(); | ||
157 | return; | 170 | return; |
158 | } | 171 | } |
159 | EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait); | 172 | EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait); |
@@ -247,10 +260,10 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
247 | break; | 260 | break; |
248 | case KVM_PV_REASON_PAGE_NOT_PRESENT: | 261 | case KVM_PV_REASON_PAGE_NOT_PRESENT: |
249 | /* page is swapped out by the host. */ | 262 | /* page is swapped out by the host. */ |
250 | rcu_irq_enter(); | 263 | exception_enter(regs); |
251 | exit_idle(); | 264 | exit_idle(); |
252 | kvm_async_pf_task_wait((u32)read_cr2()); | 265 | kvm_async_pf_task_wait((u32)read_cr2()); |
253 | rcu_irq_exit(); | 266 | exception_exit(regs); |
254 | break; | 267 | break; |
255 | case KVM_PV_REASON_PAGE_READY: | 268 | case KVM_PV_REASON_PAGE_READY: |
256 | rcu_irq_enter(); | 269 | rcu_irq_enter(); |
@@ -471,6 +484,9 @@ void __init kvm_guest_init(void) | |||
471 | if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) | 484 | if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) |
472 | apic_set_eoi_write(kvm_guest_apic_eoi_write); | 485 | apic_set_eoi_write(kvm_guest_apic_eoi_write); |
473 | 486 | ||
487 | if (kvmclock_vsyscall) | ||
488 | kvm_setup_vsyscall_timeinfo(); | ||
489 | |||
474 | #ifdef CONFIG_SMP | 490 | #ifdef CONFIG_SMP |
475 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; | 491 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; |
476 | register_cpu_notifier(&kvm_cpu_notifier); | 492 | register_cpu_notifier(&kvm_cpu_notifier); |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index f1b42b3a186c..220a360010f8 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <asm/apic.h> | 23 | #include <asm/apic.h> |
24 | #include <linux/percpu.h> | 24 | #include <linux/percpu.h> |
25 | #include <linux/hardirq.h> | 25 | #include <linux/hardirq.h> |
26 | #include <linux/memblock.h> | ||
26 | 27 | ||
27 | #include <asm/x86_init.h> | 28 | #include <asm/x86_init.h> |
28 | #include <asm/reboot.h> | 29 | #include <asm/reboot.h> |
@@ -39,7 +40,7 @@ static int parse_no_kvmclock(char *arg) | |||
39 | early_param("no-kvmclock", parse_no_kvmclock); | 40 | early_param("no-kvmclock", parse_no_kvmclock); |
40 | 41 | ||
41 | /* The hypervisor will put information about time periodically here */ | 42 | /* The hypervisor will put information about time periodically here */ |
42 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct pvclock_vcpu_time_info, hv_clock); | 43 | static struct pvclock_vsyscall_time_info *hv_clock; |
43 | static struct pvclock_wall_clock wall_clock; | 44 | static struct pvclock_wall_clock wall_clock; |
44 | 45 | ||
45 | /* | 46 | /* |
@@ -52,15 +53,20 @@ static unsigned long kvm_get_wallclock(void) | |||
52 | struct pvclock_vcpu_time_info *vcpu_time; | 53 | struct pvclock_vcpu_time_info *vcpu_time; |
53 | struct timespec ts; | 54 | struct timespec ts; |
54 | int low, high; | 55 | int low, high; |
56 | int cpu; | ||
55 | 57 | ||
56 | low = (int)__pa_symbol(&wall_clock); | 58 | low = (int)__pa_symbol(&wall_clock); |
57 | high = ((u64)__pa_symbol(&wall_clock) >> 32); | 59 | high = ((u64)__pa_symbol(&wall_clock) >> 32); |
58 | 60 | ||
59 | native_write_msr(msr_kvm_wall_clock, low, high); | 61 | native_write_msr(msr_kvm_wall_clock, low, high); |
60 | 62 | ||
61 | vcpu_time = &get_cpu_var(hv_clock); | 63 | preempt_disable(); |
64 | cpu = smp_processor_id(); | ||
65 | |||
66 | vcpu_time = &hv_clock[cpu].pvti; | ||
62 | pvclock_read_wallclock(&wall_clock, vcpu_time, &ts); | 67 | pvclock_read_wallclock(&wall_clock, vcpu_time, &ts); |
63 | put_cpu_var(hv_clock); | 68 | |
69 | preempt_enable(); | ||
64 | 70 | ||
65 | return ts.tv_sec; | 71 | return ts.tv_sec; |
66 | } | 72 | } |
@@ -74,9 +80,11 @@ static cycle_t kvm_clock_read(void) | |||
74 | { | 80 | { |
75 | struct pvclock_vcpu_time_info *src; | 81 | struct pvclock_vcpu_time_info *src; |
76 | cycle_t ret; | 82 | cycle_t ret; |
83 | int cpu; | ||
77 | 84 | ||
78 | preempt_disable_notrace(); | 85 | preempt_disable_notrace(); |
79 | src = &__get_cpu_var(hv_clock); | 86 | cpu = smp_processor_id(); |
87 | src = &hv_clock[cpu].pvti; | ||
80 | ret = pvclock_clocksource_read(src); | 88 | ret = pvclock_clocksource_read(src); |
81 | preempt_enable_notrace(); | 89 | preempt_enable_notrace(); |
82 | return ret; | 90 | return ret; |
@@ -99,8 +107,15 @@ static cycle_t kvm_clock_get_cycles(struct clocksource *cs) | |||
99 | static unsigned long kvm_get_tsc_khz(void) | 107 | static unsigned long kvm_get_tsc_khz(void) |
100 | { | 108 | { |
101 | struct pvclock_vcpu_time_info *src; | 109 | struct pvclock_vcpu_time_info *src; |
102 | src = &per_cpu(hv_clock, 0); | 110 | int cpu; |
103 | return pvclock_tsc_khz(src); | 111 | unsigned long tsc_khz; |
112 | |||
113 | preempt_disable(); | ||
114 | cpu = smp_processor_id(); | ||
115 | src = &hv_clock[cpu].pvti; | ||
116 | tsc_khz = pvclock_tsc_khz(src); | ||
117 | preempt_enable(); | ||
118 | return tsc_khz; | ||
104 | } | 119 | } |
105 | 120 | ||
106 | static void kvm_get_preset_lpj(void) | 121 | static void kvm_get_preset_lpj(void) |
@@ -119,10 +134,14 @@ bool kvm_check_and_clear_guest_paused(void) | |||
119 | { | 134 | { |
120 | bool ret = false; | 135 | bool ret = false; |
121 | struct pvclock_vcpu_time_info *src; | 136 | struct pvclock_vcpu_time_info *src; |
137 | int cpu = smp_processor_id(); | ||
122 | 138 | ||
123 | src = &__get_cpu_var(hv_clock); | 139 | if (!hv_clock) |
140 | return ret; | ||
141 | |||
142 | src = &hv_clock[cpu].pvti; | ||
124 | if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) { | 143 | if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) { |
125 | __this_cpu_and(hv_clock.flags, ~PVCLOCK_GUEST_STOPPED); | 144 | src->flags &= ~PVCLOCK_GUEST_STOPPED; |
126 | ret = true; | 145 | ret = true; |
127 | } | 146 | } |
128 | 147 | ||
@@ -141,9 +160,10 @@ int kvm_register_clock(char *txt) | |||
141 | { | 160 | { |
142 | int cpu = smp_processor_id(); | 161 | int cpu = smp_processor_id(); |
143 | int low, high, ret; | 162 | int low, high, ret; |
163 | struct pvclock_vcpu_time_info *src = &hv_clock[cpu].pvti; | ||
144 | 164 | ||
145 | low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1; | 165 | low = (int)__pa(src) | 1; |
146 | high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); | 166 | high = ((u64)__pa(src) >> 32); |
147 | ret = native_write_msr_safe(msr_kvm_system_time, low, high); | 167 | ret = native_write_msr_safe(msr_kvm_system_time, low, high); |
148 | printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", | 168 | printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", |
149 | cpu, high, low, txt); | 169 | cpu, high, low, txt); |
@@ -197,6 +217,8 @@ static void kvm_shutdown(void) | |||
197 | 217 | ||
198 | void __init kvmclock_init(void) | 218 | void __init kvmclock_init(void) |
199 | { | 219 | { |
220 | unsigned long mem; | ||
221 | |||
200 | if (!kvm_para_available()) | 222 | if (!kvm_para_available()) |
201 | return; | 223 | return; |
202 | 224 | ||
@@ -209,8 +231,18 @@ void __init kvmclock_init(void) | |||
209 | printk(KERN_INFO "kvm-clock: Using msrs %x and %x", | 231 | printk(KERN_INFO "kvm-clock: Using msrs %x and %x", |
210 | msr_kvm_system_time, msr_kvm_wall_clock); | 232 | msr_kvm_system_time, msr_kvm_wall_clock); |
211 | 233 | ||
212 | if (kvm_register_clock("boot clock")) | 234 | mem = memblock_alloc(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS, |
235 | PAGE_SIZE); | ||
236 | if (!mem) | ||
237 | return; | ||
238 | hv_clock = __va(mem); | ||
239 | |||
240 | if (kvm_register_clock("boot clock")) { | ||
241 | hv_clock = NULL; | ||
242 | memblock_free(mem, | ||
243 | sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS); | ||
213 | return; | 244 | return; |
245 | } | ||
214 | pv_time_ops.sched_clock = kvm_clock_read; | 246 | pv_time_ops.sched_clock = kvm_clock_read; |
215 | x86_platform.calibrate_tsc = kvm_get_tsc_khz; | 247 | x86_platform.calibrate_tsc = kvm_get_tsc_khz; |
216 | x86_platform.get_wallclock = kvm_get_wallclock; | 248 | x86_platform.get_wallclock = kvm_get_wallclock; |
@@ -233,3 +265,37 @@ void __init kvmclock_init(void) | |||
233 | if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) | 265 | if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) |
234 | pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT); | 266 | pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT); |
235 | } | 267 | } |
268 | |||
269 | int __init kvm_setup_vsyscall_timeinfo(void) | ||
270 | { | ||
271 | #ifdef CONFIG_X86_64 | ||
272 | int cpu; | ||
273 | int ret; | ||
274 | u8 flags; | ||
275 | struct pvclock_vcpu_time_info *vcpu_time; | ||
276 | unsigned int size; | ||
277 | |||
278 | size = sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS; | ||
279 | |||
280 | preempt_disable(); | ||
281 | cpu = smp_processor_id(); | ||
282 | |||
283 | vcpu_time = &hv_clock[cpu].pvti; | ||
284 | flags = pvclock_read_flags(vcpu_time); | ||
285 | |||
286 | if (!(flags & PVCLOCK_TSC_STABLE_BIT)) { | ||
287 | preempt_enable(); | ||
288 | return 1; | ||
289 | } | ||
290 | |||
291 | if ((ret = pvclock_init_vsyscall(hv_clock, size))) { | ||
292 | preempt_enable(); | ||
293 | return ret; | ||
294 | } | ||
295 | |||
296 | preempt_enable(); | ||
297 | |||
298 | kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK; | ||
299 | #endif | ||
300 | return 0; | ||
301 | } | ||
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 7720ff5a9ee2..efdec7cd8e01 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c | |||
@@ -8,8 +8,8 @@ | |||
8 | * Tigran Aivazian <tigran@aivazian.fsnet.co.uk> | 8 | * Tigran Aivazian <tigran@aivazian.fsnet.co.uk> |
9 | * | 9 | * |
10 | * Maintainers: | 10 | * Maintainers: |
11 | * Andreas Herrmann <andreas.herrmann3@amd.com> | 11 | * Andreas Herrmann <herrmann.der.user@googlemail.com> |
12 | * Borislav Petkov <borislav.petkov@amd.com> | 12 | * Borislav Petkov <bp@alien8.de> |
13 | * | 13 | * |
14 | * This driver allows to upgrade microcode on F10h AMD | 14 | * This driver allows to upgrade microcode on F10h AMD |
15 | * CPUs and later. | 15 | * CPUs and later. |
@@ -190,6 +190,7 @@ static unsigned int verify_patch_size(int cpu, u32 patch_size, | |||
190 | #define F1XH_MPB_MAX_SIZE 2048 | 190 | #define F1XH_MPB_MAX_SIZE 2048 |
191 | #define F14H_MPB_MAX_SIZE 1824 | 191 | #define F14H_MPB_MAX_SIZE 1824 |
192 | #define F15H_MPB_MAX_SIZE 4096 | 192 | #define F15H_MPB_MAX_SIZE 4096 |
193 | #define F16H_MPB_MAX_SIZE 3458 | ||
193 | 194 | ||
194 | switch (c->x86) { | 195 | switch (c->x86) { |
195 | case 0x14: | 196 | case 0x14: |
@@ -198,6 +199,9 @@ static unsigned int verify_patch_size(int cpu, u32 patch_size, | |||
198 | case 0x15: | 199 | case 0x15: |
199 | max_size = F15H_MPB_MAX_SIZE; | 200 | max_size = F15H_MPB_MAX_SIZE; |
200 | break; | 201 | break; |
202 | case 0x16: | ||
203 | max_size = F16H_MPB_MAX_SIZE; | ||
204 | break; | ||
201 | default: | 205 | default: |
202 | max_size = F1XH_MPB_MAX_SIZE; | 206 | max_size = F1XH_MPB_MAX_SIZE; |
203 | break; | 207 | break; |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index de2b7ad70273..0f5dec5c80e0 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -265,7 +265,7 @@ rootfs_initcall(pci_iommu_init); | |||
265 | #ifdef CONFIG_PCI | 265 | #ifdef CONFIG_PCI |
266 | /* Many VIA bridges seem to corrupt data for DAC. Disable it here */ | 266 | /* Many VIA bridges seem to corrupt data for DAC. Disable it here */ |
267 | 267 | ||
268 | static __devinit void via_no_dac(struct pci_dev *dev) | 268 | static void via_no_dac(struct pci_dev *dev) |
269 | { | 269 | { |
270 | if (forbid_dac == 0) { | 270 | if (forbid_dac == 0) { |
271 | dev_info(&dev->dev, "disabling DAC on VIA PCI bridge\n"); | 271 | dev_info(&dev->dev, "disabling DAC on VIA PCI bridge\n"); |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index b644e1c765dc..2ed787f15bf0 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -262,36 +262,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
262 | propagate_user_return_notify(prev_p, next_p); | 262 | propagate_user_return_notify(prev_p, next_p); |
263 | } | 263 | } |
264 | 264 | ||
265 | int sys_fork(struct pt_regs *regs) | ||
266 | { | ||
267 | return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL); | ||
268 | } | ||
269 | |||
270 | /* | ||
271 | * This is trivial, and on the face of it looks like it | ||
272 | * could equally well be done in user mode. | ||
273 | * | ||
274 | * Not so, for quite unobvious reasons - register pressure. | ||
275 | * In user mode vfork() cannot have a stack frame, and if | ||
276 | * done by calling the "clone()" system call directly, you | ||
277 | * do not have enough call-clobbered registers to hold all | ||
278 | * the information you need. | ||
279 | */ | ||
280 | int sys_vfork(struct pt_regs *regs) | ||
281 | { | ||
282 | return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0, | ||
283 | NULL, NULL); | ||
284 | } | ||
285 | |||
286 | long | ||
287 | sys_clone(unsigned long clone_flags, unsigned long newsp, | ||
288 | void __user *parent_tid, void __user *child_tid, struct pt_regs *regs) | ||
289 | { | ||
290 | if (!newsp) | ||
291 | newsp = regs->sp; | ||
292 | return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid); | ||
293 | } | ||
294 | |||
295 | /* | 265 | /* |
296 | * Idle related variables and functions | 266 | * Idle related variables and functions |
297 | */ | 267 | */ |
@@ -306,11 +276,6 @@ void (*pm_idle)(void); | |||
306 | EXPORT_SYMBOL(pm_idle); | 276 | EXPORT_SYMBOL(pm_idle); |
307 | #endif | 277 | #endif |
308 | 278 | ||
309 | static inline int hlt_use_halt(void) | ||
310 | { | ||
311 | return 1; | ||
312 | } | ||
313 | |||
314 | #ifndef CONFIG_SMP | 279 | #ifndef CONFIG_SMP |
315 | static inline void play_dead(void) | 280 | static inline void play_dead(void) |
316 | { | 281 | { |
@@ -410,28 +375,22 @@ void cpu_idle(void) | |||
410 | */ | 375 | */ |
411 | void default_idle(void) | 376 | void default_idle(void) |
412 | { | 377 | { |
413 | if (hlt_use_halt()) { | 378 | trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id()); |
414 | trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id()); | 379 | trace_cpu_idle_rcuidle(1, smp_processor_id()); |
415 | trace_cpu_idle_rcuidle(1, smp_processor_id()); | 380 | current_thread_info()->status &= ~TS_POLLING; |
416 | current_thread_info()->status &= ~TS_POLLING; | 381 | /* |
417 | /* | 382 | * TS_POLLING-cleared state must be visible before we |
418 | * TS_POLLING-cleared state must be visible before we | 383 | * test NEED_RESCHED: |
419 | * test NEED_RESCHED: | 384 | */ |
420 | */ | 385 | smp_mb(); |
421 | smp_mb(); | ||
422 | 386 | ||
423 | if (!need_resched()) | 387 | if (!need_resched()) |
424 | safe_halt(); /* enables interrupts racelessly */ | 388 | safe_halt(); /* enables interrupts racelessly */ |
425 | else | 389 | else |
426 | local_irq_enable(); | ||
427 | current_thread_info()->status |= TS_POLLING; | ||
428 | trace_power_end_rcuidle(smp_processor_id()); | ||
429 | trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); | ||
430 | } else { | ||
431 | local_irq_enable(); | 390 | local_irq_enable(); |
432 | /* loop is done by the caller */ | 391 | current_thread_info()->status |= TS_POLLING; |
433 | cpu_relax(); | 392 | trace_power_end_rcuidle(smp_processor_id()); |
434 | } | 393 | trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); |
435 | } | 394 | } |
436 | #ifdef CONFIG_APM_MODULE | 395 | #ifdef CONFIG_APM_MODULE |
437 | EXPORT_SYMBOL(default_idle); | 396 | EXPORT_SYMBOL(default_idle); |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 44e0bff38e72..b5a8905785e6 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -128,8 +128,7 @@ void release_thread(struct task_struct *dead_task) | |||
128 | } | 128 | } |
129 | 129 | ||
130 | int copy_thread(unsigned long clone_flags, unsigned long sp, | 130 | int copy_thread(unsigned long clone_flags, unsigned long sp, |
131 | unsigned long arg, | 131 | unsigned long arg, struct task_struct *p) |
132 | struct task_struct *p, struct pt_regs *regs) | ||
133 | { | 132 | { |
134 | struct pt_regs *childregs = task_pt_regs(p); | 133 | struct pt_regs *childregs = task_pt_regs(p); |
135 | struct task_struct *tsk; | 134 | struct task_struct *tsk; |
@@ -138,7 +137,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
138 | p->thread.sp = (unsigned long) childregs; | 137 | p->thread.sp = (unsigned long) childregs; |
139 | p->thread.sp0 = (unsigned long) (childregs+1); | 138 | p->thread.sp0 = (unsigned long) (childregs+1); |
140 | 139 | ||
141 | if (unlikely(!regs)) { | 140 | if (unlikely(p->flags & PF_KTHREAD)) { |
142 | /* kernel thread */ | 141 | /* kernel thread */ |
143 | memset(childregs, 0, sizeof(struct pt_regs)); | 142 | memset(childregs, 0, sizeof(struct pt_regs)); |
144 | p->thread.ip = (unsigned long) ret_from_kernel_thread; | 143 | p->thread.ip = (unsigned long) ret_from_kernel_thread; |
@@ -156,12 +155,13 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
156 | memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); | 155 | memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); |
157 | return 0; | 156 | return 0; |
158 | } | 157 | } |
159 | *childregs = *regs; | 158 | *childregs = *current_pt_regs(); |
160 | childregs->ax = 0; | 159 | childregs->ax = 0; |
161 | childregs->sp = sp; | 160 | if (sp) |
161 | childregs->sp = sp; | ||
162 | 162 | ||
163 | p->thread.ip = (unsigned long) ret_from_fork; | 163 | p->thread.ip = (unsigned long) ret_from_fork; |
164 | task_user_gs(p) = get_user_gs(regs); | 164 | task_user_gs(p) = get_user_gs(current_pt_regs()); |
165 | 165 | ||
166 | p->fpu_counter = 0; | 166 | p->fpu_counter = 0; |
167 | p->thread.io_bitmap_ptr = NULL; | 167 | p->thread.io_bitmap_ptr = NULL; |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 16c6365e2b86..6e68a6194965 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -146,8 +146,7 @@ static inline u32 read_32bit_tls(struct task_struct *t, int tls) | |||
146 | } | 146 | } |
147 | 147 | ||
148 | int copy_thread(unsigned long clone_flags, unsigned long sp, | 148 | int copy_thread(unsigned long clone_flags, unsigned long sp, |
149 | unsigned long arg, | 149 | unsigned long arg, struct task_struct *p) |
150 | struct task_struct *p, struct pt_regs *regs) | ||
151 | { | 150 | { |
152 | int err; | 151 | int err; |
153 | struct pt_regs *childregs; | 152 | struct pt_regs *childregs; |
@@ -169,7 +168,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
169 | savesegment(ds, p->thread.ds); | 168 | savesegment(ds, p->thread.ds); |
170 | memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); | 169 | memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); |
171 | 170 | ||
172 | if (unlikely(!regs)) { | 171 | if (unlikely(p->flags & PF_KTHREAD)) { |
173 | /* kernel thread */ | 172 | /* kernel thread */ |
174 | memset(childregs, 0, sizeof(struct pt_regs)); | 173 | memset(childregs, 0, sizeof(struct pt_regs)); |
175 | childregs->sp = (unsigned long)childregs; | 174 | childregs->sp = (unsigned long)childregs; |
@@ -181,10 +180,11 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
181 | childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1; | 180 | childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1; |
182 | return 0; | 181 | return 0; |
183 | } | 182 | } |
184 | *childregs = *regs; | 183 | *childregs = *current_pt_regs(); |
185 | 184 | ||
186 | childregs->ax = 0; | 185 | childregs->ax = 0; |
187 | childregs->sp = sp; | 186 | if (sp) |
187 | childregs->sp = sp; | ||
188 | 188 | ||
189 | err = -ENOMEM; | 189 | err = -ENOMEM; |
190 | memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); | 190 | memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index b00b33a18390..b629bbe0d9bd 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -22,6 +22,8 @@ | |||
22 | #include <linux/perf_event.h> | 22 | #include <linux/perf_event.h> |
23 | #include <linux/hw_breakpoint.h> | 23 | #include <linux/hw_breakpoint.h> |
24 | #include <linux/rcupdate.h> | 24 | #include <linux/rcupdate.h> |
25 | #include <linux/module.h> | ||
26 | #include <linux/context_tracking.h> | ||
25 | 27 | ||
26 | #include <asm/uaccess.h> | 28 | #include <asm/uaccess.h> |
27 | #include <asm/pgtable.h> | 29 | #include <asm/pgtable.h> |
@@ -166,6 +168,35 @@ static inline bool invalid_selector(u16 value) | |||
166 | 168 | ||
167 | #define FLAG_MASK FLAG_MASK_32 | 169 | #define FLAG_MASK FLAG_MASK_32 |
168 | 170 | ||
171 | /* | ||
172 | * X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode | ||
173 | * when it traps. The previous stack will be directly underneath the saved | ||
174 | * registers, and 'sp/ss' won't even have been saved. Thus the '®s->sp'. | ||
175 | * | ||
176 | * Now, if the stack is empty, '®s->sp' is out of range. In this | ||
177 | * case we try to take the previous stack. To always return a non-null | ||
178 | * stack pointer we fall back to regs as stack if no previous stack | ||
179 | * exists. | ||
180 | * | ||
181 | * This is valid only for kernel mode traps. | ||
182 | */ | ||
183 | unsigned long kernel_stack_pointer(struct pt_regs *regs) | ||
184 | { | ||
185 | unsigned long context = (unsigned long)regs & ~(THREAD_SIZE - 1); | ||
186 | unsigned long sp = (unsigned long)®s->sp; | ||
187 | struct thread_info *tinfo; | ||
188 | |||
189 | if (context == (sp & ~(THREAD_SIZE - 1))) | ||
190 | return sp; | ||
191 | |||
192 | tinfo = (struct thread_info *)context; | ||
193 | if (tinfo->previous_esp) | ||
194 | return tinfo->previous_esp; | ||
195 | |||
196 | return (unsigned long)regs; | ||
197 | } | ||
198 | EXPORT_SYMBOL_GPL(kernel_stack_pointer); | ||
199 | |||
169 | static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno) | 200 | static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno) |
170 | { | 201 | { |
171 | BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); | 202 | BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); |
@@ -1461,7 +1492,7 @@ long syscall_trace_enter(struct pt_regs *regs) | |||
1461 | { | 1492 | { |
1462 | long ret = 0; | 1493 | long ret = 0; |
1463 | 1494 | ||
1464 | rcu_user_exit(); | 1495 | user_exit(); |
1465 | 1496 | ||
1466 | /* | 1497 | /* |
1467 | * If we stepped into a sysenter/syscall insn, it trapped in | 1498 | * If we stepped into a sysenter/syscall insn, it trapped in |
@@ -1511,6 +1542,13 @@ void syscall_trace_leave(struct pt_regs *regs) | |||
1511 | { | 1542 | { |
1512 | bool step; | 1543 | bool step; |
1513 | 1544 | ||
1545 | /* | ||
1546 | * We may come here right after calling schedule_user() | ||
1547 | * or do_notify_resume(), in which case we can be in RCU | ||
1548 | * user mode. | ||
1549 | */ | ||
1550 | user_exit(); | ||
1551 | |||
1514 | audit_syscall_exit(regs); | 1552 | audit_syscall_exit(regs); |
1515 | 1553 | ||
1516 | if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) | 1554 | if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) |
@@ -1527,5 +1565,5 @@ void syscall_trace_leave(struct pt_regs *regs) | |||
1527 | if (step || test_thread_flag(TIF_SYSCALL_TRACE)) | 1565 | if (step || test_thread_flag(TIF_SYSCALL_TRACE)) |
1528 | tracehook_report_syscall_exit(regs, step); | 1566 | tracehook_report_syscall_exit(regs, step); |
1529 | 1567 | ||
1530 | rcu_user_enter(); | 1568 | user_enter(); |
1531 | } | 1569 | } |
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 42eb3300dfc6..85c39590c1a4 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c | |||
@@ -17,23 +17,13 @@ | |||
17 | 17 | ||
18 | #include <linux/kernel.h> | 18 | #include <linux/kernel.h> |
19 | #include <linux/percpu.h> | 19 | #include <linux/percpu.h> |
20 | #include <linux/notifier.h> | ||
21 | #include <linux/sched.h> | ||
22 | #include <linux/gfp.h> | ||
23 | #include <linux/bootmem.h> | ||
24 | #include <asm/fixmap.h> | ||
20 | #include <asm/pvclock.h> | 25 | #include <asm/pvclock.h> |
21 | 26 | ||
22 | /* | ||
23 | * These are perodically updated | ||
24 | * xen: magic shared_info page | ||
25 | * kvm: gpa registered via msr | ||
26 | * and then copied here. | ||
27 | */ | ||
28 | struct pvclock_shadow_time { | ||
29 | u64 tsc_timestamp; /* TSC at last update of time vals. */ | ||
30 | u64 system_timestamp; /* Time, in nanosecs, since boot. */ | ||
31 | u32 tsc_to_nsec_mul; | ||
32 | int tsc_shift; | ||
33 | u32 version; | ||
34 | u8 flags; | ||
35 | }; | ||
36 | |||
37 | static u8 valid_flags __read_mostly = 0; | 27 | static u8 valid_flags __read_mostly = 0; |
38 | 28 | ||
39 | void pvclock_set_flags(u8 flags) | 29 | void pvclock_set_flags(u8 flags) |
@@ -41,34 +31,6 @@ void pvclock_set_flags(u8 flags) | |||
41 | valid_flags = flags; | 31 | valid_flags = flags; |
42 | } | 32 | } |
43 | 33 | ||
44 | static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow) | ||
45 | { | ||
46 | u64 delta = native_read_tsc() - shadow->tsc_timestamp; | ||
47 | return pvclock_scale_delta(delta, shadow->tsc_to_nsec_mul, | ||
48 | shadow->tsc_shift); | ||
49 | } | ||
50 | |||
51 | /* | ||
52 | * Reads a consistent set of time-base values from hypervisor, | ||
53 | * into a shadow data area. | ||
54 | */ | ||
55 | static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst, | ||
56 | struct pvclock_vcpu_time_info *src) | ||
57 | { | ||
58 | do { | ||
59 | dst->version = src->version; | ||
60 | rmb(); /* fetch version before data */ | ||
61 | dst->tsc_timestamp = src->tsc_timestamp; | ||
62 | dst->system_timestamp = src->system_time; | ||
63 | dst->tsc_to_nsec_mul = src->tsc_to_system_mul; | ||
64 | dst->tsc_shift = src->tsc_shift; | ||
65 | dst->flags = src->flags; | ||
66 | rmb(); /* test version after fetching data */ | ||
67 | } while ((src->version & 1) || (dst->version != src->version)); | ||
68 | |||
69 | return dst->version; | ||
70 | } | ||
71 | |||
72 | unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src) | 34 | unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src) |
73 | { | 35 | { |
74 | u64 pv_tsc_khz = 1000000ULL << 32; | 36 | u64 pv_tsc_khz = 1000000ULL << 32; |
@@ -88,23 +50,32 @@ void pvclock_resume(void) | |||
88 | atomic64_set(&last_value, 0); | 50 | atomic64_set(&last_value, 0); |
89 | } | 51 | } |
90 | 52 | ||
53 | u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src) | ||
54 | { | ||
55 | unsigned version; | ||
56 | cycle_t ret; | ||
57 | u8 flags; | ||
58 | |||
59 | do { | ||
60 | version = __pvclock_read_cycles(src, &ret, &flags); | ||
61 | } while ((src->version & 1) || version != src->version); | ||
62 | |||
63 | return flags & valid_flags; | ||
64 | } | ||
65 | |||
91 | cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) | 66 | cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) |
92 | { | 67 | { |
93 | struct pvclock_shadow_time shadow; | ||
94 | unsigned version; | 68 | unsigned version; |
95 | cycle_t ret, offset; | 69 | cycle_t ret; |
96 | u64 last; | 70 | u64 last; |
71 | u8 flags; | ||
97 | 72 | ||
98 | do { | 73 | do { |
99 | version = pvclock_get_time_values(&shadow, src); | 74 | version = __pvclock_read_cycles(src, &ret, &flags); |
100 | barrier(); | 75 | } while ((src->version & 1) || version != src->version); |
101 | offset = pvclock_get_nsec_offset(&shadow); | ||
102 | ret = shadow.system_timestamp + offset; | ||
103 | barrier(); | ||
104 | } while (version != src->version); | ||
105 | 76 | ||
106 | if ((valid_flags & PVCLOCK_TSC_STABLE_BIT) && | 77 | if ((valid_flags & PVCLOCK_TSC_STABLE_BIT) && |
107 | (shadow.flags & PVCLOCK_TSC_STABLE_BIT)) | 78 | (flags & PVCLOCK_TSC_STABLE_BIT)) |
108 | return ret; | 79 | return ret; |
109 | 80 | ||
110 | /* | 81 | /* |
@@ -156,3 +127,71 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, | |||
156 | 127 | ||
157 | set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); | 128 | set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); |
158 | } | 129 | } |
130 | |||
131 | static struct pvclock_vsyscall_time_info *pvclock_vdso_info; | ||
132 | |||
133 | static struct pvclock_vsyscall_time_info * | ||
134 | pvclock_get_vsyscall_user_time_info(int cpu) | ||
135 | { | ||
136 | if (!pvclock_vdso_info) { | ||
137 | BUG(); | ||
138 | return NULL; | ||
139 | } | ||
140 | |||
141 | return &pvclock_vdso_info[cpu]; | ||
142 | } | ||
143 | |||
144 | struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu) | ||
145 | { | ||
146 | return &pvclock_get_vsyscall_user_time_info(cpu)->pvti; | ||
147 | } | ||
148 | |||
149 | #ifdef CONFIG_X86_64 | ||
150 | static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l, | ||
151 | void *v) | ||
152 | { | ||
153 | struct task_migration_notifier *mn = v; | ||
154 | struct pvclock_vsyscall_time_info *pvti; | ||
155 | |||
156 | pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu); | ||
157 | |||
158 | /* this is NULL when pvclock vsyscall is not initialized */ | ||
159 | if (unlikely(pvti == NULL)) | ||
160 | return NOTIFY_DONE; | ||
161 | |||
162 | pvti->migrate_count++; | ||
163 | |||
164 | return NOTIFY_DONE; | ||
165 | } | ||
166 | |||
167 | static struct notifier_block pvclock_migrate = { | ||
168 | .notifier_call = pvclock_task_migrate, | ||
169 | }; | ||
170 | |||
171 | /* | ||
172 | * Initialize the generic pvclock vsyscall state. This will allocate | ||
173 | * a/some page(s) for the per-vcpu pvclock information, set up a | ||
174 | * fixmap mapping for the page(s) | ||
175 | */ | ||
176 | |||
177 | int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i, | ||
178 | int size) | ||
179 | { | ||
180 | int idx; | ||
181 | |||
182 | WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE); | ||
183 | |||
184 | pvclock_vdso_info = i; | ||
185 | |||
186 | for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) { | ||
187 | __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx, | ||
188 | __pa_symbol(i) + (idx*PAGE_SIZE), | ||
189 | PAGE_KERNEL_VVAR); | ||
190 | } | ||
191 | |||
192 | |||
193 | register_task_migration_notifier(&pvclock_migrate); | ||
194 | |||
195 | return 0; | ||
196 | } | ||
197 | #endif | ||
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 1b27de563561..26ee48a33dc4 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c | |||
@@ -8,7 +8,7 @@ | |||
8 | 8 | ||
9 | #if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI) | 9 | #if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI) |
10 | 10 | ||
11 | static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) | 11 | static void quirk_intel_irqbalance(struct pci_dev *dev) |
12 | { | 12 | { |
13 | u8 config; | 13 | u8 config; |
14 | u16 word; | 14 | u16 word; |
@@ -512,7 +512,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, | |||
512 | 512 | ||
513 | #if defined(CONFIG_PCI) && defined(CONFIG_NUMA) | 513 | #if defined(CONFIG_PCI) && defined(CONFIG_NUMA) |
514 | /* Set correct numa_node information for AMD NB functions */ | 514 | /* Set correct numa_node information for AMD NB functions */ |
515 | static void __devinit quirk_amd_nb_node(struct pci_dev *dev) | 515 | static void quirk_amd_nb_node(struct pci_dev *dev) |
516 | { | 516 | { |
517 | struct pci_dev *nb_ht; | 517 | struct pci_dev *nb_ht; |
518 | unsigned int devfn; | 518 | unsigned int devfn; |
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 4929c1be0ac0..801602b5d745 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c | |||
@@ -195,12 +195,6 @@ void read_persistent_clock(struct timespec *ts) | |||
195 | ts->tv_nsec = 0; | 195 | ts->tv_nsec = 0; |
196 | } | 196 | } |
197 | 197 | ||
198 | unsigned long long native_read_tsc(void) | ||
199 | { | ||
200 | return __native_read_tsc(); | ||
201 | } | ||
202 | EXPORT_SYMBOL(native_read_tsc); | ||
203 | |||
204 | 198 | ||
205 | static struct resource rtc_resources[] = { | 199 | static struct resource rtc_resources[] = { |
206 | [0] = { | 200 | [0] = { |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 2702c5d4acd2..8354399b3aae 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -143,11 +143,7 @@ int default_check_phys_apicid_present(int phys_apicid) | |||
143 | } | 143 | } |
144 | #endif | 144 | #endif |
145 | 145 | ||
146 | #ifndef CONFIG_DEBUG_BOOT_PARAMS | ||
147 | struct boot_params __initdata boot_params; | ||
148 | #else | ||
149 | struct boot_params boot_params; | 146 | struct boot_params boot_params; |
150 | #endif | ||
151 | 147 | ||
152 | /* | 148 | /* |
153 | * Machine setup.. | 149 | * Machine setup.. |
@@ -614,6 +610,83 @@ static __init void reserve_ibft_region(void) | |||
614 | 610 | ||
615 | static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10; | 611 | static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10; |
616 | 612 | ||
613 | static bool __init snb_gfx_workaround_needed(void) | ||
614 | { | ||
615 | #ifdef CONFIG_PCI | ||
616 | int i; | ||
617 | u16 vendor, devid; | ||
618 | static const __initconst u16 snb_ids[] = { | ||
619 | 0x0102, | ||
620 | 0x0112, | ||
621 | 0x0122, | ||
622 | 0x0106, | ||
623 | 0x0116, | ||
624 | 0x0126, | ||
625 | 0x010a, | ||
626 | }; | ||
627 | |||
628 | /* Assume no if something weird is going on with PCI */ | ||
629 | if (!early_pci_allowed()) | ||
630 | return false; | ||
631 | |||
632 | vendor = read_pci_config_16(0, 2, 0, PCI_VENDOR_ID); | ||
633 | if (vendor != 0x8086) | ||
634 | return false; | ||
635 | |||
636 | devid = read_pci_config_16(0, 2, 0, PCI_DEVICE_ID); | ||
637 | for (i = 0; i < ARRAY_SIZE(snb_ids); i++) | ||
638 | if (devid == snb_ids[i]) | ||
639 | return true; | ||
640 | #endif | ||
641 | |||
642 | return false; | ||
643 | } | ||
644 | |||
645 | /* | ||
646 | * Sandy Bridge graphics has trouble with certain ranges, exclude | ||
647 | * them from allocation. | ||
648 | */ | ||
649 | static void __init trim_snb_memory(void) | ||
650 | { | ||
651 | static const __initconst unsigned long bad_pages[] = { | ||
652 | 0x20050000, | ||
653 | 0x20110000, | ||
654 | 0x20130000, | ||
655 | 0x20138000, | ||
656 | 0x40004000, | ||
657 | }; | ||
658 | int i; | ||
659 | |||
660 | if (!snb_gfx_workaround_needed()) | ||
661 | return; | ||
662 | |||
663 | printk(KERN_DEBUG "reserving inaccessible SNB gfx pages\n"); | ||
664 | |||
665 | /* | ||
666 | * Reserve all memory below the 1 MB mark that has not | ||
667 | * already been reserved. | ||
668 | */ | ||
669 | memblock_reserve(0, 1<<20); | ||
670 | |||
671 | for (i = 0; i < ARRAY_SIZE(bad_pages); i++) { | ||
672 | if (memblock_reserve(bad_pages[i], PAGE_SIZE)) | ||
673 | printk(KERN_WARNING "failed to reserve 0x%08lx\n", | ||
674 | bad_pages[i]); | ||
675 | } | ||
676 | } | ||
677 | |||
678 | /* | ||
679 | * Here we put platform-specific memory range workarounds, i.e. | ||
680 | * memory known to be corrupt or otherwise in need to be reserved on | ||
681 | * specific platforms. | ||
682 | * | ||
683 | * If this gets used more widely it could use a real dispatch mechanism. | ||
684 | */ | ||
685 | static void __init trim_platform_memory_ranges(void) | ||
686 | { | ||
687 | trim_snb_memory(); | ||
688 | } | ||
689 | |||
617 | static void __init trim_bios_range(void) | 690 | static void __init trim_bios_range(void) |
618 | { | 691 | { |
619 | /* | 692 | /* |
@@ -634,6 +707,7 @@ static void __init trim_bios_range(void) | |||
634 | * take them out. | 707 | * take them out. |
635 | */ | 708 | */ |
636 | e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1); | 709 | e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1); |
710 | |||
637 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | 711 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); |
638 | } | 712 | } |
639 | 713 | ||
@@ -912,6 +986,8 @@ void __init setup_arch(char **cmdline_p) | |||
912 | 986 | ||
913 | setup_real_mode(); | 987 | setup_real_mode(); |
914 | 988 | ||
989 | trim_platform_memory_ranges(); | ||
990 | |||
915 | init_gbpages(); | 991 | init_gbpages(); |
916 | 992 | ||
917 | /* max_pfn_mapped is updated here */ | 993 | /* max_pfn_mapped is updated here */ |
@@ -956,6 +1032,10 @@ void __init setup_arch(char **cmdline_p) | |||
956 | 1032 | ||
957 | reserve_initrd(); | 1033 | reserve_initrd(); |
958 | 1034 | ||
1035 | #if defined(CONFIG_ACPI) && defined(CONFIG_BLK_DEV_INITRD) | ||
1036 | acpi_initrd_override((void *)initrd_start, initrd_end - initrd_start); | ||
1037 | #endif | ||
1038 | |||
959 | reserve_crashkernel(); | 1039 | reserve_crashkernel(); |
960 | 1040 | ||
961 | vsmp_init(); | 1041 | vsmp_init(); |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 70b27ee6118e..d6bf1f34a6e9 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/uaccess.h> | 22 | #include <linux/uaccess.h> |
23 | #include <linux/user-return-notifier.h> | 23 | #include <linux/user-return-notifier.h> |
24 | #include <linux/uprobes.h> | 24 | #include <linux/uprobes.h> |
25 | #include <linux/context_tracking.h> | ||
25 | 26 | ||
26 | #include <asm/processor.h> | 27 | #include <asm/processor.h> |
27 | #include <asm/ucontext.h> | 28 | #include <asm/ucontext.h> |
@@ -363,10 +364,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
363 | else | 364 | else |
364 | put_user_ex(0, &frame->uc.uc_flags); | 365 | put_user_ex(0, &frame->uc.uc_flags); |
365 | put_user_ex(0, &frame->uc.uc_link); | 366 | put_user_ex(0, &frame->uc.uc_link); |
366 | put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); | 367 | err |= __save_altstack(&frame->uc.uc_stack, regs->sp); |
367 | put_user_ex(sas_ss_flags(regs->sp), | ||
368 | &frame->uc.uc_stack.ss_flags); | ||
369 | put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); | ||
370 | 368 | ||
371 | /* Set up to return from userspace. */ | 369 | /* Set up to return from userspace. */ |
372 | restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); | 370 | restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); |
@@ -413,7 +411,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
413 | struct rt_sigframe __user *frame; | 411 | struct rt_sigframe __user *frame; |
414 | void __user *fp = NULL; | 412 | void __user *fp = NULL; |
415 | int err = 0; | 413 | int err = 0; |
416 | struct task_struct *me = current; | ||
417 | 414 | ||
418 | frame = get_sigframe(ka, regs, sizeof(struct rt_sigframe), &fp); | 415 | frame = get_sigframe(ka, regs, sizeof(struct rt_sigframe), &fp); |
419 | 416 | ||
@@ -432,10 +429,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
432 | else | 429 | else |
433 | put_user_ex(0, &frame->uc.uc_flags); | 430 | put_user_ex(0, &frame->uc.uc_flags); |
434 | put_user_ex(0, &frame->uc.uc_link); | 431 | put_user_ex(0, &frame->uc.uc_link); |
435 | put_user_ex(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); | 432 | err |= __save_altstack(&frame->uc.uc_stack, regs->sp); |
436 | put_user_ex(sas_ss_flags(regs->sp), | ||
437 | &frame->uc.uc_stack.ss_flags); | ||
438 | put_user_ex(me->sas_ss_size, &frame->uc.uc_stack.ss_size); | ||
439 | 433 | ||
440 | /* Set up to return from userspace. If provided, use a stub | 434 | /* Set up to return from userspace. If provided, use a stub |
441 | already in userspace. */ | 435 | already in userspace. */ |
@@ -502,10 +496,7 @@ static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, | |||
502 | else | 496 | else |
503 | put_user_ex(0, &frame->uc.uc_flags); | 497 | put_user_ex(0, &frame->uc.uc_flags); |
504 | put_user_ex(0, &frame->uc.uc_link); | 498 | put_user_ex(0, &frame->uc.uc_link); |
505 | put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); | 499 | err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp); |
506 | put_user_ex(sas_ss_flags(regs->sp), | ||
507 | &frame->uc.uc_stack.ss_flags); | ||
508 | put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); | ||
509 | put_user_ex(0, &frame->uc.uc__pad0); | 500 | put_user_ex(0, &frame->uc.uc__pad0); |
510 | 501 | ||
511 | if (ka->sa.sa_flags & SA_RESTORER) { | 502 | if (ka->sa.sa_flags & SA_RESTORER) { |
@@ -602,13 +593,6 @@ sys_sigaction(int sig, const struct old_sigaction __user *act, | |||
602 | } | 593 | } |
603 | #endif /* CONFIG_X86_32 */ | 594 | #endif /* CONFIG_X86_32 */ |
604 | 595 | ||
605 | long | ||
606 | sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, | ||
607 | struct pt_regs *regs) | ||
608 | { | ||
609 | return do_sigaltstack(uss, uoss, regs->sp); | ||
610 | } | ||
611 | |||
612 | /* | 596 | /* |
613 | * Do a signal return; undo the signal stack. | 597 | * Do a signal return; undo the signal stack. |
614 | */ | 598 | */ |
@@ -658,7 +642,7 @@ long sys_rt_sigreturn(struct pt_regs *regs) | |||
658 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) | 642 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) |
659 | goto badframe; | 643 | goto badframe; |
660 | 644 | ||
661 | if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT) | 645 | if (restore_altstack(&frame->uc.uc_stack)) |
662 | goto badframe; | 646 | goto badframe; |
663 | 647 | ||
664 | return ax; | 648 | return ax; |
@@ -816,7 +800,7 @@ static void do_signal(struct pt_regs *regs) | |||
816 | void | 800 | void |
817 | do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | 801 | do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) |
818 | { | 802 | { |
819 | rcu_user_exit(); | 803 | user_exit(); |
820 | 804 | ||
821 | #ifdef CONFIG_X86_MCE | 805 | #ifdef CONFIG_X86_MCE |
822 | /* notify userspace of pending MCEs */ | 806 | /* notify userspace of pending MCEs */ |
@@ -838,7 +822,7 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | |||
838 | if (thread_info_flags & _TIF_USER_RETURN_NOTIFY) | 822 | if (thread_info_flags & _TIF_USER_RETURN_NOTIFY) |
839 | fire_user_return_notifiers(); | 823 | fire_user_return_notifiers(); |
840 | 824 | ||
841 | rcu_user_enter(); | 825 | user_enter(); |
842 | } | 826 | } |
843 | 827 | ||
844 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where) | 828 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where) |
@@ -864,7 +848,6 @@ asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs) | |||
864 | struct rt_sigframe_x32 __user *frame; | 848 | struct rt_sigframe_x32 __user *frame; |
865 | sigset_t set; | 849 | sigset_t set; |
866 | unsigned long ax; | 850 | unsigned long ax; |
867 | struct pt_regs tregs; | ||
868 | 851 | ||
869 | frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8); | 852 | frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8); |
870 | 853 | ||
@@ -878,8 +861,7 @@ asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs) | |||
878 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) | 861 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) |
879 | goto badframe; | 862 | goto badframe; |
880 | 863 | ||
881 | tregs = *regs; | 864 | if (compat_restore_altstack(&frame->uc.uc_stack)) |
882 | if (sys32_sigaltstack(&frame->uc.uc_stack, NULL, &tregs) == -EFAULT) | ||
883 | goto badframe; | 865 | goto badframe; |
884 | 866 | ||
885 | return ax; | 867 | return ax; |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c80a33bc528b..ed0fe385289d 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -68,6 +68,8 @@ | |||
68 | #include <asm/mwait.h> | 68 | #include <asm/mwait.h> |
69 | #include <asm/apic.h> | 69 | #include <asm/apic.h> |
70 | #include <asm/io_apic.h> | 70 | #include <asm/io_apic.h> |
71 | #include <asm/i387.h> | ||
72 | #include <asm/fpu-internal.h> | ||
71 | #include <asm/setup.h> | 73 | #include <asm/setup.h> |
72 | #include <asm/uv/uv.h> | 74 | #include <asm/uv/uv.h> |
73 | #include <linux/mc146818rtc.h> | 75 | #include <linux/mc146818rtc.h> |
@@ -125,8 +127,8 @@ EXPORT_PER_CPU_SYMBOL(cpu_info); | |||
125 | atomic_t init_deasserted; | 127 | atomic_t init_deasserted; |
126 | 128 | ||
127 | /* | 129 | /* |
128 | * Report back to the Boot Processor. | 130 | * Report back to the Boot Processor during boot time or to the caller processor |
129 | * Running on AP. | 131 | * during CPU online. |
130 | */ | 132 | */ |
131 | static void __cpuinit smp_callin(void) | 133 | static void __cpuinit smp_callin(void) |
132 | { | 134 | { |
@@ -138,15 +140,17 @@ static void __cpuinit smp_callin(void) | |||
138 | * we may get here before an INIT-deassert IPI reaches | 140 | * we may get here before an INIT-deassert IPI reaches |
139 | * our local APIC. We have to wait for the IPI or we'll | 141 | * our local APIC. We have to wait for the IPI or we'll |
140 | * lock up on an APIC access. | 142 | * lock up on an APIC access. |
143 | * | ||
144 | * Since CPU0 is not wakened up by INIT, it doesn't wait for the IPI. | ||
141 | */ | 145 | */ |
142 | if (apic->wait_for_init_deassert) | 146 | cpuid = smp_processor_id(); |
147 | if (apic->wait_for_init_deassert && cpuid != 0) | ||
143 | apic->wait_for_init_deassert(&init_deasserted); | 148 | apic->wait_for_init_deassert(&init_deasserted); |
144 | 149 | ||
145 | /* | 150 | /* |
146 | * (This works even if the APIC is not enabled.) | 151 | * (This works even if the APIC is not enabled.) |
147 | */ | 152 | */ |
148 | phys_id = read_apic_id(); | 153 | phys_id = read_apic_id(); |
149 | cpuid = smp_processor_id(); | ||
150 | if (cpumask_test_cpu(cpuid, cpu_callin_mask)) { | 154 | if (cpumask_test_cpu(cpuid, cpu_callin_mask)) { |
151 | panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, | 155 | panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, |
152 | phys_id, cpuid); | 156 | phys_id, cpuid); |
@@ -228,6 +232,8 @@ static void __cpuinit smp_callin(void) | |||
228 | cpumask_set_cpu(cpuid, cpu_callin_mask); | 232 | cpumask_set_cpu(cpuid, cpu_callin_mask); |
229 | } | 233 | } |
230 | 234 | ||
235 | static int cpu0_logical_apicid; | ||
236 | static int enable_start_cpu0; | ||
231 | /* | 237 | /* |
232 | * Activate a secondary processor. | 238 | * Activate a secondary processor. |
233 | */ | 239 | */ |
@@ -243,6 +249,8 @@ notrace static void __cpuinit start_secondary(void *unused) | |||
243 | preempt_disable(); | 249 | preempt_disable(); |
244 | smp_callin(); | 250 | smp_callin(); |
245 | 251 | ||
252 | enable_start_cpu0 = 0; | ||
253 | |||
246 | #ifdef CONFIG_X86_32 | 254 | #ifdef CONFIG_X86_32 |
247 | /* switch away from the initial page table */ | 255 | /* switch away from the initial page table */ |
248 | load_cr3(swapper_pg_dir); | 256 | load_cr3(swapper_pg_dir); |
@@ -279,19 +287,30 @@ notrace static void __cpuinit start_secondary(void *unused) | |||
279 | cpu_idle(); | 287 | cpu_idle(); |
280 | } | 288 | } |
281 | 289 | ||
290 | void __init smp_store_boot_cpu_info(void) | ||
291 | { | ||
292 | int id = 0; /* CPU 0 */ | ||
293 | struct cpuinfo_x86 *c = &cpu_data(id); | ||
294 | |||
295 | *c = boot_cpu_data; | ||
296 | c->cpu_index = id; | ||
297 | } | ||
298 | |||
282 | /* | 299 | /* |
283 | * The bootstrap kernel entry code has set these up. Save them for | 300 | * The bootstrap kernel entry code has set these up. Save them for |
284 | * a given CPU | 301 | * a given CPU |
285 | */ | 302 | */ |
286 | |||
287 | void __cpuinit smp_store_cpu_info(int id) | 303 | void __cpuinit smp_store_cpu_info(int id) |
288 | { | 304 | { |
289 | struct cpuinfo_x86 *c = &cpu_data(id); | 305 | struct cpuinfo_x86 *c = &cpu_data(id); |
290 | 306 | ||
291 | *c = boot_cpu_data; | 307 | *c = boot_cpu_data; |
292 | c->cpu_index = id; | 308 | c->cpu_index = id; |
293 | if (id != 0) | 309 | /* |
294 | identify_secondary_cpu(c); | 310 | * During boot time, CPU0 has this setup already. Save the info when |
311 | * bringing up AP or offlined CPU0. | ||
312 | */ | ||
313 | identify_secondary_cpu(c); | ||
295 | } | 314 | } |
296 | 315 | ||
297 | static bool __cpuinit | 316 | static bool __cpuinit |
@@ -313,7 +332,7 @@ do { \ | |||
313 | 332 | ||
314 | static bool __cpuinit match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) | 333 | static bool __cpuinit match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) |
315 | { | 334 | { |
316 | if (cpu_has(c, X86_FEATURE_TOPOEXT)) { | 335 | if (cpu_has_topoext) { |
317 | int cpu1 = c->cpu_index, cpu2 = o->cpu_index; | 336 | int cpu1 = c->cpu_index, cpu2 = o->cpu_index; |
318 | 337 | ||
319 | if (c->phys_proc_id == o->phys_proc_id && | 338 | if (c->phys_proc_id == o->phys_proc_id && |
@@ -481,7 +500,7 @@ void __inquire_remote_apic(int apicid) | |||
481 | * won't ... remember to clear down the APIC, etc later. | 500 | * won't ... remember to clear down the APIC, etc later. |
482 | */ | 501 | */ |
483 | int __cpuinit | 502 | int __cpuinit |
484 | wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) | 503 | wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip) |
485 | { | 504 | { |
486 | unsigned long send_status, accept_status = 0; | 505 | unsigned long send_status, accept_status = 0; |
487 | int maxlvt; | 506 | int maxlvt; |
@@ -489,7 +508,7 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) | |||
489 | /* Target chip */ | 508 | /* Target chip */ |
490 | /* Boot on the stack */ | 509 | /* Boot on the stack */ |
491 | /* Kick the second */ | 510 | /* Kick the second */ |
492 | apic_icr_write(APIC_DM_NMI | apic->dest_logical, logical_apicid); | 511 | apic_icr_write(APIC_DM_NMI | apic->dest_logical, apicid); |
493 | 512 | ||
494 | pr_debug("Waiting for send to finish...\n"); | 513 | pr_debug("Waiting for send to finish...\n"); |
495 | send_status = safe_apic_wait_icr_idle(); | 514 | send_status = safe_apic_wait_icr_idle(); |
@@ -649,6 +668,63 @@ static void __cpuinit announce_cpu(int cpu, int apicid) | |||
649 | node, cpu, apicid); | 668 | node, cpu, apicid); |
650 | } | 669 | } |
651 | 670 | ||
671 | static int wakeup_cpu0_nmi(unsigned int cmd, struct pt_regs *regs) | ||
672 | { | ||
673 | int cpu; | ||
674 | |||
675 | cpu = smp_processor_id(); | ||
676 | if (cpu == 0 && !cpu_online(cpu) && enable_start_cpu0) | ||
677 | return NMI_HANDLED; | ||
678 | |||
679 | return NMI_DONE; | ||
680 | } | ||
681 | |||
682 | /* | ||
683 | * Wake up AP by INIT, INIT, STARTUP sequence. | ||
684 | * | ||
685 | * Instead of waiting for STARTUP after INITs, BSP will execute the BIOS | ||
686 | * boot-strap code which is not a desired behavior for waking up BSP. To | ||
687 | * void the boot-strap code, wake up CPU0 by NMI instead. | ||
688 | * | ||
689 | * This works to wake up soft offlined CPU0 only. If CPU0 is hard offlined | ||
690 | * (i.e. physically hot removed and then hot added), NMI won't wake it up. | ||
691 | * We'll change this code in the future to wake up hard offlined CPU0 if | ||
692 | * real platform and request are available. | ||
693 | */ | ||
694 | static int __cpuinit | ||
695 | wakeup_cpu_via_init_nmi(int cpu, unsigned long start_ip, int apicid, | ||
696 | int *cpu0_nmi_registered) | ||
697 | { | ||
698 | int id; | ||
699 | int boot_error; | ||
700 | |||
701 | /* | ||
702 | * Wake up AP by INIT, INIT, STARTUP sequence. | ||
703 | */ | ||
704 | if (cpu) | ||
705 | return wakeup_secondary_cpu_via_init(apicid, start_ip); | ||
706 | |||
707 | /* | ||
708 | * Wake up BSP by nmi. | ||
709 | * | ||
710 | * Register a NMI handler to help wake up CPU0. | ||
711 | */ | ||
712 | boot_error = register_nmi_handler(NMI_LOCAL, | ||
713 | wakeup_cpu0_nmi, 0, "wake_cpu0"); | ||
714 | |||
715 | if (!boot_error) { | ||
716 | enable_start_cpu0 = 1; | ||
717 | *cpu0_nmi_registered = 1; | ||
718 | if (apic->dest_logical == APIC_DEST_LOGICAL) | ||
719 | id = cpu0_logical_apicid; | ||
720 | else | ||
721 | id = apicid; | ||
722 | boot_error = wakeup_secondary_cpu_via_nmi(id, start_ip); | ||
723 | } | ||
724 | |||
725 | return boot_error; | ||
726 | } | ||
727 | |||
652 | /* | 728 | /* |
653 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad | 729 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad |
654 | * (ie clustered apic addressing mode), this is a LOGICAL apic ID. | 730 | * (ie clustered apic addressing mode), this is a LOGICAL apic ID. |
@@ -664,6 +740,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle) | |||
664 | 740 | ||
665 | unsigned long boot_error = 0; | 741 | unsigned long boot_error = 0; |
666 | int timeout; | 742 | int timeout; |
743 | int cpu0_nmi_registered = 0; | ||
667 | 744 | ||
668 | /* Just in case we booted with a single CPU. */ | 745 | /* Just in case we booted with a single CPU. */ |
669 | alternatives_enable_smp(); | 746 | alternatives_enable_smp(); |
@@ -711,13 +788,16 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle) | |||
711 | } | 788 | } |
712 | 789 | ||
713 | /* | 790 | /* |
714 | * Kick the secondary CPU. Use the method in the APIC driver | 791 | * Wake up a CPU in difference cases: |
715 | * if it's defined - or use an INIT boot APIC message otherwise: | 792 | * - Use the method in the APIC driver if it's defined |
793 | * Otherwise, | ||
794 | * - Use an INIT boot APIC message for APs or NMI for BSP. | ||
716 | */ | 795 | */ |
717 | if (apic->wakeup_secondary_cpu) | 796 | if (apic->wakeup_secondary_cpu) |
718 | boot_error = apic->wakeup_secondary_cpu(apicid, start_ip); | 797 | boot_error = apic->wakeup_secondary_cpu(apicid, start_ip); |
719 | else | 798 | else |
720 | boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip); | 799 | boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid, |
800 | &cpu0_nmi_registered); | ||
721 | 801 | ||
722 | if (!boot_error) { | 802 | if (!boot_error) { |
723 | /* | 803 | /* |
@@ -782,6 +862,13 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle) | |||
782 | */ | 862 | */ |
783 | smpboot_restore_warm_reset_vector(); | 863 | smpboot_restore_warm_reset_vector(); |
784 | } | 864 | } |
865 | /* | ||
866 | * Clean up the nmi handler. Do this after the callin and callout sync | ||
867 | * to avoid impact of possible long unregister time. | ||
868 | */ | ||
869 | if (cpu0_nmi_registered) | ||
870 | unregister_nmi_handler(NMI_LOCAL, "wake_cpu0"); | ||
871 | |||
785 | return boot_error; | 872 | return boot_error; |
786 | } | 873 | } |
787 | 874 | ||
@@ -795,7 +882,7 @@ int __cpuinit native_cpu_up(unsigned int cpu, struct task_struct *tidle) | |||
795 | 882 | ||
796 | pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu); | 883 | pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu); |
797 | 884 | ||
798 | if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid || | 885 | if (apicid == BAD_APICID || |
799 | !physid_isset(apicid, phys_cpu_present_map) || | 886 | !physid_isset(apicid, phys_cpu_present_map) || |
800 | !apic->apic_id_valid(apicid)) { | 887 | !apic->apic_id_valid(apicid)) { |
801 | pr_err("%s: bad cpu %d\n", __func__, cpu); | 888 | pr_err("%s: bad cpu %d\n", __func__, cpu); |
@@ -818,6 +905,9 @@ int __cpuinit native_cpu_up(unsigned int cpu, struct task_struct *tidle) | |||
818 | 905 | ||
819 | per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; | 906 | per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; |
820 | 907 | ||
908 | /* the FPU context is blank, nobody can own it */ | ||
909 | __cpu_disable_lazy_restore(cpu); | ||
910 | |||
821 | err = do_boot_cpu(apicid, cpu, tidle); | 911 | err = do_boot_cpu(apicid, cpu, tidle); |
822 | if (err) { | 912 | if (err) { |
823 | pr_debug("do_boot_cpu failed %d\n", err); | 913 | pr_debug("do_boot_cpu failed %d\n", err); |
@@ -990,7 +1080,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
990 | /* | 1080 | /* |
991 | * Setup boot CPU information | 1081 | * Setup boot CPU information |
992 | */ | 1082 | */ |
993 | smp_store_cpu_info(0); /* Final full version of the data */ | 1083 | smp_store_boot_cpu_info(); /* Final full version of the data */ |
994 | cpumask_copy(cpu_callin_mask, cpumask_of(0)); | 1084 | cpumask_copy(cpu_callin_mask, cpumask_of(0)); |
995 | mb(); | 1085 | mb(); |
996 | 1086 | ||
@@ -1026,6 +1116,11 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
1026 | */ | 1116 | */ |
1027 | setup_local_APIC(); | 1117 | setup_local_APIC(); |
1028 | 1118 | ||
1119 | if (x2apic_mode) | ||
1120 | cpu0_logical_apicid = apic_read(APIC_LDR); | ||
1121 | else | ||
1122 | cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); | ||
1123 | |||
1029 | /* | 1124 | /* |
1030 | * Enable IO APIC before setting up error vector | 1125 | * Enable IO APIC before setting up error vector |
1031 | */ | 1126 | */ |
@@ -1214,19 +1309,6 @@ void cpu_disable_common(void) | |||
1214 | 1309 | ||
1215 | int native_cpu_disable(void) | 1310 | int native_cpu_disable(void) |
1216 | { | 1311 | { |
1217 | int cpu = smp_processor_id(); | ||
1218 | |||
1219 | /* | ||
1220 | * Perhaps use cpufreq to drop frequency, but that could go | ||
1221 | * into generic code. | ||
1222 | * | ||
1223 | * We won't take down the boot processor on i386 due to some | ||
1224 | * interrupts only being able to be serviced by the BSP. | ||
1225 | * Especially so if we're not using an IOAPIC -zwane | ||
1226 | */ | ||
1227 | if (cpu == 0) | ||
1228 | return -EBUSY; | ||
1229 | |||
1230 | clear_local_APIC(); | 1312 | clear_local_APIC(); |
1231 | 1313 | ||
1232 | cpu_disable_common(); | 1314 | cpu_disable_common(); |
@@ -1266,6 +1348,14 @@ void play_dead_common(void) | |||
1266 | local_irq_disable(); | 1348 | local_irq_disable(); |
1267 | } | 1349 | } |
1268 | 1350 | ||
1351 | static bool wakeup_cpu0(void) | ||
1352 | { | ||
1353 | if (smp_processor_id() == 0 && enable_start_cpu0) | ||
1354 | return true; | ||
1355 | |||
1356 | return false; | ||
1357 | } | ||
1358 | |||
1269 | /* | 1359 | /* |
1270 | * We need to flush the caches before going to sleep, lest we have | 1360 | * We need to flush the caches before going to sleep, lest we have |
1271 | * dirty data in our caches when we come back up. | 1361 | * dirty data in our caches when we come back up. |
@@ -1329,6 +1419,11 @@ static inline void mwait_play_dead(void) | |||
1329 | __monitor(mwait_ptr, 0, 0); | 1419 | __monitor(mwait_ptr, 0, 0); |
1330 | mb(); | 1420 | mb(); |
1331 | __mwait(eax, 0); | 1421 | __mwait(eax, 0); |
1422 | /* | ||
1423 | * If NMI wants to wake up CPU0, start CPU0. | ||
1424 | */ | ||
1425 | if (wakeup_cpu0()) | ||
1426 | start_cpu0(); | ||
1332 | } | 1427 | } |
1333 | } | 1428 | } |
1334 | 1429 | ||
@@ -1339,6 +1434,11 @@ static inline void hlt_play_dead(void) | |||
1339 | 1434 | ||
1340 | while (1) { | 1435 | while (1) { |
1341 | native_halt(); | 1436 | native_halt(); |
1437 | /* | ||
1438 | * If NMI wants to wake up CPU0, start CPU0. | ||
1439 | */ | ||
1440 | if (wakeup_cpu0()) | ||
1441 | start_cpu0(); | ||
1342 | } | 1442 | } |
1343 | } | 1443 | } |
1344 | 1444 | ||
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index cd3b2438a980..9b4d51d0c0d0 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c | |||
@@ -165,10 +165,11 @@ void set_task_blockstep(struct task_struct *task, bool on) | |||
165 | * Ensure irq/preemption can't change debugctl in between. | 165 | * Ensure irq/preemption can't change debugctl in between. |
166 | * Note also that both TIF_BLOCKSTEP and debugctl should | 166 | * Note also that both TIF_BLOCKSTEP and debugctl should |
167 | * be changed atomically wrt preemption. | 167 | * be changed atomically wrt preemption. |
168 | * FIXME: this means that set/clear TIF_BLOCKSTEP is simply | 168 | * |
169 | * wrong if task != current, SIGKILL can wakeup the stopped | 169 | * NOTE: this means that set/clear TIF_BLOCKSTEP is only safe if |
170 | * tracee and set/clear can play with the running task, this | 170 | * task is current or it can't be running, otherwise we can race |
171 | * can confuse the next __switch_to_xtra(). | 171 | * with __switch_to_xtra(). We rely on ptrace_freeze_traced() but |
172 | * PTRACE_KILL is not safe. | ||
172 | */ | 173 | */ |
173 | local_irq_disable(); | 174 | local_irq_disable(); |
174 | debugctl = get_debugctlmsr(); | 175 | debugctl = get_debugctlmsr(); |
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index b4d3c3927dd8..97ef74b88e0f 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c | |||
@@ -21,37 +21,23 @@ | |||
21 | 21 | ||
22 | /* | 22 | /* |
23 | * Align a virtual address to avoid aliasing in the I$ on AMD F15h. | 23 | * Align a virtual address to avoid aliasing in the I$ on AMD F15h. |
24 | * | ||
25 | * @flags denotes the allocation direction - bottomup or topdown - | ||
26 | * or vDSO; see call sites below. | ||
27 | */ | 24 | */ |
28 | unsigned long align_addr(unsigned long addr, struct file *filp, | 25 | static unsigned long get_align_mask(void) |
29 | enum align_flags flags) | ||
30 | { | 26 | { |
31 | unsigned long tmp_addr; | ||
32 | |||
33 | /* handle 32- and 64-bit case with a single conditional */ | 27 | /* handle 32- and 64-bit case with a single conditional */ |
34 | if (va_align.flags < 0 || !(va_align.flags & (2 - mmap_is_ia32()))) | 28 | if (va_align.flags < 0 || !(va_align.flags & (2 - mmap_is_ia32()))) |
35 | return addr; | 29 | return 0; |
36 | 30 | ||
37 | if (!(current->flags & PF_RANDOMIZE)) | 31 | if (!(current->flags & PF_RANDOMIZE)) |
38 | return addr; | 32 | return 0; |
39 | |||
40 | if (!((flags & ALIGN_VDSO) || filp)) | ||
41 | return addr; | ||
42 | |||
43 | tmp_addr = addr; | ||
44 | |||
45 | /* | ||
46 | * We need an address which is <= than the original | ||
47 | * one only when in topdown direction. | ||
48 | */ | ||
49 | if (!(flags & ALIGN_TOPDOWN)) | ||
50 | tmp_addr += va_align.mask; | ||
51 | 33 | ||
52 | tmp_addr &= ~va_align.mask; | 34 | return va_align.mask; |
35 | } | ||
53 | 36 | ||
54 | return tmp_addr; | 37 | unsigned long align_vdso_addr(unsigned long addr) |
38 | { | ||
39 | unsigned long align_mask = get_align_mask(); | ||
40 | return (addr + align_mask) & ~align_mask; | ||
55 | } | 41 | } |
56 | 42 | ||
57 | static int __init control_va_addr_alignment(char *str) | 43 | static int __init control_va_addr_alignment(char *str) |
@@ -126,7 +112,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, | |||
126 | { | 112 | { |
127 | struct mm_struct *mm = current->mm; | 113 | struct mm_struct *mm = current->mm; |
128 | struct vm_area_struct *vma; | 114 | struct vm_area_struct *vma; |
129 | unsigned long start_addr; | 115 | struct vm_unmapped_area_info info; |
130 | unsigned long begin, end; | 116 | unsigned long begin, end; |
131 | 117 | ||
132 | if (flags & MAP_FIXED) | 118 | if (flags & MAP_FIXED) |
@@ -144,50 +130,16 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, | |||
144 | (!vma || addr + len <= vma->vm_start)) | 130 | (!vma || addr + len <= vma->vm_start)) |
145 | return addr; | 131 | return addr; |
146 | } | 132 | } |
147 | if (((flags & MAP_32BIT) || test_thread_flag(TIF_ADDR32)) | ||
148 | && len <= mm->cached_hole_size) { | ||
149 | mm->cached_hole_size = 0; | ||
150 | mm->free_area_cache = begin; | ||
151 | } | ||
152 | addr = mm->free_area_cache; | ||
153 | if (addr < begin) | ||
154 | addr = begin; | ||
155 | start_addr = addr; | ||
156 | |||
157 | full_search: | ||
158 | |||
159 | addr = align_addr(addr, filp, 0); | ||
160 | |||
161 | for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { | ||
162 | /* At this point: (!vma || addr < vma->vm_end). */ | ||
163 | if (end - len < addr) { | ||
164 | /* | ||
165 | * Start a new search - just in case we missed | ||
166 | * some holes. | ||
167 | */ | ||
168 | if (start_addr != begin) { | ||
169 | start_addr = addr = begin; | ||
170 | mm->cached_hole_size = 0; | ||
171 | goto full_search; | ||
172 | } | ||
173 | return -ENOMEM; | ||
174 | } | ||
175 | if (!vma || addr + len <= vma->vm_start) { | ||
176 | /* | ||
177 | * Remember the place where we stopped the search: | ||
178 | */ | ||
179 | mm->free_area_cache = addr + len; | ||
180 | return addr; | ||
181 | } | ||
182 | if (addr + mm->cached_hole_size < vma->vm_start) | ||
183 | mm->cached_hole_size = vma->vm_start - addr; | ||
184 | 133 | ||
185 | addr = vma->vm_end; | 134 | info.flags = 0; |
186 | addr = align_addr(addr, filp, 0); | 135 | info.length = len; |
187 | } | 136 | info.low_limit = begin; |
137 | info.high_limit = end; | ||
138 | info.align_mask = filp ? get_align_mask() : 0; | ||
139 | info.align_offset = pgoff << PAGE_SHIFT; | ||
140 | return vm_unmapped_area(&info); | ||
188 | } | 141 | } |
189 | 142 | ||
190 | |||
191 | unsigned long | 143 | unsigned long |
192 | arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, | 144 | arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, |
193 | const unsigned long len, const unsigned long pgoff, | 145 | const unsigned long len, const unsigned long pgoff, |
@@ -195,7 +147,8 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, | |||
195 | { | 147 | { |
196 | struct vm_area_struct *vma; | 148 | struct vm_area_struct *vma; |
197 | struct mm_struct *mm = current->mm; | 149 | struct mm_struct *mm = current->mm; |
198 | unsigned long addr = addr0, start_addr; | 150 | unsigned long addr = addr0; |
151 | struct vm_unmapped_area_info info; | ||
199 | 152 | ||
200 | /* requested length too big for entire address space */ | 153 | /* requested length too big for entire address space */ |
201 | if (len > TASK_SIZE) | 154 | if (len > TASK_SIZE) |
@@ -217,51 +170,16 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, | |||
217 | return addr; | 170 | return addr; |
218 | } | 171 | } |
219 | 172 | ||
220 | /* check if free_area_cache is useful for us */ | 173 | info.flags = VM_UNMAPPED_AREA_TOPDOWN; |
221 | if (len <= mm->cached_hole_size) { | 174 | info.length = len; |
222 | mm->cached_hole_size = 0; | 175 | info.low_limit = PAGE_SIZE; |
223 | mm->free_area_cache = mm->mmap_base; | 176 | info.high_limit = mm->mmap_base; |
224 | } | 177 | info.align_mask = filp ? get_align_mask() : 0; |
225 | 178 | info.align_offset = pgoff << PAGE_SHIFT; | |
226 | try_again: | 179 | addr = vm_unmapped_area(&info); |
227 | /* either no address requested or can't fit in requested address hole */ | 180 | if (!(addr & ~PAGE_MASK)) |
228 | start_addr = addr = mm->free_area_cache; | 181 | return addr; |
229 | 182 | VM_BUG_ON(addr != -ENOMEM); | |
230 | if (addr < len) | ||
231 | goto fail; | ||
232 | |||
233 | addr -= len; | ||
234 | do { | ||
235 | addr = align_addr(addr, filp, ALIGN_TOPDOWN); | ||
236 | |||
237 | /* | ||
238 | * Lookup failure means no vma is above this address, | ||
239 | * else if new region fits below vma->vm_start, | ||
240 | * return with success: | ||
241 | */ | ||
242 | vma = find_vma(mm, addr); | ||
243 | if (!vma || addr+len <= vma->vm_start) | ||
244 | /* remember the address as a hint for next time */ | ||
245 | return mm->free_area_cache = addr; | ||
246 | |||
247 | /* remember the largest hole we saw so far */ | ||
248 | if (addr + mm->cached_hole_size < vma->vm_start) | ||
249 | mm->cached_hole_size = vma->vm_start - addr; | ||
250 | |||
251 | /* try just below the current vma->vm_start */ | ||
252 | addr = vma->vm_start-len; | ||
253 | } while (len < vma->vm_start); | ||
254 | |||
255 | fail: | ||
256 | /* | ||
257 | * if hint left us with no space for the requested | ||
258 | * mapping then try again: | ||
259 | */ | ||
260 | if (start_addr != mm->mmap_base) { | ||
261 | mm->free_area_cache = mm->mmap_base; | ||
262 | mm->cached_hole_size = 0; | ||
263 | goto try_again; | ||
264 | } | ||
265 | 183 | ||
266 | bottomup: | 184 | bottomup: |
267 | /* | 185 | /* |
@@ -270,14 +188,5 @@ bottomup: | |||
270 | * can happen with large stack limits and large mmap() | 188 | * can happen with large stack limits and large mmap() |
271 | * allocations. | 189 | * allocations. |
272 | */ | 190 | */ |
273 | mm->cached_hole_size = ~0UL; | 191 | return arch_get_unmapped_area(filp, addr0, len, pgoff, flags); |
274 | mm->free_area_cache = TASK_UNMAPPED_BASE; | ||
275 | addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); | ||
276 | /* | ||
277 | * Restore the topdown base: | ||
278 | */ | ||
279 | mm->free_area_cache = mm->mmap_base; | ||
280 | mm->cached_hole_size = ~0UL; | ||
281 | |||
282 | return addr; | ||
283 | } | 192 | } |
diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c index 76ee97709a00..6e60b5fe2244 100644 --- a/arch/x86/kernel/topology.c +++ b/arch/x86/kernel/topology.c | |||
@@ -30,23 +30,110 @@ | |||
30 | #include <linux/mmzone.h> | 30 | #include <linux/mmzone.h> |
31 | #include <linux/init.h> | 31 | #include <linux/init.h> |
32 | #include <linux/smp.h> | 32 | #include <linux/smp.h> |
33 | #include <linux/irq.h> | ||
33 | #include <asm/cpu.h> | 34 | #include <asm/cpu.h> |
34 | 35 | ||
35 | static DEFINE_PER_CPU(struct x86_cpu, cpu_devices); | 36 | static DEFINE_PER_CPU(struct x86_cpu, cpu_devices); |
36 | 37 | ||
37 | #ifdef CONFIG_HOTPLUG_CPU | 38 | #ifdef CONFIG_HOTPLUG_CPU |
39 | |||
40 | #ifdef CONFIG_BOOTPARAM_HOTPLUG_CPU0 | ||
41 | static int cpu0_hotpluggable = 1; | ||
42 | #else | ||
43 | static int cpu0_hotpluggable; | ||
44 | static int __init enable_cpu0_hotplug(char *str) | ||
45 | { | ||
46 | cpu0_hotpluggable = 1; | ||
47 | return 1; | ||
48 | } | ||
49 | |||
50 | __setup("cpu0_hotplug", enable_cpu0_hotplug); | ||
51 | #endif | ||
52 | |||
53 | #ifdef CONFIG_DEBUG_HOTPLUG_CPU0 | ||
54 | /* | ||
55 | * This function offlines a CPU as early as possible and allows userspace to | ||
56 | * boot up without the CPU. The CPU can be onlined back by user after boot. | ||
57 | * | ||
58 | * This is only called for debugging CPU offline/online feature. | ||
59 | */ | ||
60 | int __ref _debug_hotplug_cpu(int cpu, int action) | ||
61 | { | ||
62 | struct device *dev = get_cpu_device(cpu); | ||
63 | int ret; | ||
64 | |||
65 | if (!cpu_is_hotpluggable(cpu)) | ||
66 | return -EINVAL; | ||
67 | |||
68 | cpu_hotplug_driver_lock(); | ||
69 | |||
70 | switch (action) { | ||
71 | case 0: | ||
72 | ret = cpu_down(cpu); | ||
73 | if (!ret) { | ||
74 | pr_info("CPU %u is now offline\n", cpu); | ||
75 | kobject_uevent(&dev->kobj, KOBJ_OFFLINE); | ||
76 | } else | ||
77 | pr_debug("Can't offline CPU%d.\n", cpu); | ||
78 | break; | ||
79 | case 1: | ||
80 | ret = cpu_up(cpu); | ||
81 | if (!ret) | ||
82 | kobject_uevent(&dev->kobj, KOBJ_ONLINE); | ||
83 | else | ||
84 | pr_debug("Can't online CPU%d.\n", cpu); | ||
85 | break; | ||
86 | default: | ||
87 | ret = -EINVAL; | ||
88 | } | ||
89 | |||
90 | cpu_hotplug_driver_unlock(); | ||
91 | |||
92 | return ret; | ||
93 | } | ||
94 | |||
95 | static int __init debug_hotplug_cpu(void) | ||
96 | { | ||
97 | _debug_hotplug_cpu(0, 0); | ||
98 | return 0; | ||
99 | } | ||
100 | |||
101 | late_initcall_sync(debug_hotplug_cpu); | ||
102 | #endif /* CONFIG_DEBUG_HOTPLUG_CPU0 */ | ||
103 | |||
38 | int __ref arch_register_cpu(int num) | 104 | int __ref arch_register_cpu(int num) |
39 | { | 105 | { |
106 | struct cpuinfo_x86 *c = &cpu_data(num); | ||
107 | |||
108 | /* | ||
109 | * Currently CPU0 is only hotpluggable on Intel platforms. Other | ||
110 | * vendors can add hotplug support later. | ||
111 | */ | ||
112 | if (c->x86_vendor != X86_VENDOR_INTEL) | ||
113 | cpu0_hotpluggable = 0; | ||
114 | |||
40 | /* | 115 | /* |
41 | * CPU0 cannot be offlined due to several | 116 | * Two known BSP/CPU0 dependencies: Resume from suspend/hibernate |
42 | * restrictions and assumptions in kernel. This basically | 117 | * depends on BSP. PIC interrupts depend on BSP. |
43 | * doesn't add a control file, one cannot attempt to offline | ||
44 | * BSP. | ||
45 | * | 118 | * |
46 | * Also certain PCI quirks require not to enable hotplug control | 119 | * If the BSP depencies are under control, one can tell kernel to |
47 | * for all CPU's. | 120 | * enable BSP hotplug. This basically adds a control file and |
121 | * one can attempt to offline BSP. | ||
48 | */ | 122 | */ |
49 | if (num) | 123 | if (num == 0 && cpu0_hotpluggable) { |
124 | unsigned int irq; | ||
125 | /* | ||
126 | * We won't take down the boot processor on i386 if some | ||
127 | * interrupts only are able to be serviced by the BSP in PIC. | ||
128 | */ | ||
129 | for_each_active_irq(irq) { | ||
130 | if (!IO_APIC_IRQ(irq) && irq_has_action(irq)) { | ||
131 | cpu0_hotpluggable = 0; | ||
132 | break; | ||
133 | } | ||
134 | } | ||
135 | } | ||
136 | if (num || cpu0_hotpluggable) | ||
50 | per_cpu(cpu_devices, num).cpu.hotpluggable = 1; | 137 | per_cpu(cpu_devices, num).cpu.hotpluggable = 1; |
51 | 138 | ||
52 | return register_cpu(&per_cpu(cpu_devices, num).cpu, num); | 139 | return register_cpu(&per_cpu(cpu_devices, num).cpu, num); |
diff --git a/arch/x86/kernel/trace_clock.c b/arch/x86/kernel/trace_clock.c new file mode 100644 index 000000000000..25b993729f9b --- /dev/null +++ b/arch/x86/kernel/trace_clock.c | |||
@@ -0,0 +1,21 @@ | |||
1 | /* | ||
2 | * X86 trace clocks | ||
3 | */ | ||
4 | #include <asm/trace_clock.h> | ||
5 | #include <asm/barrier.h> | ||
6 | #include <asm/msr.h> | ||
7 | |||
8 | /* | ||
9 | * trace_clock_x86_tsc(): A clock that is just the cycle counter. | ||
10 | * | ||
11 | * Unlike the other clocks, this is not in nanoseconds. | ||
12 | */ | ||
13 | u64 notrace trace_clock_x86_tsc(void) | ||
14 | { | ||
15 | u64 ret; | ||
16 | |||
17 | rdtsc_barrier(); | ||
18 | rdtscll(ret); | ||
19 | |||
20 | return ret; | ||
21 | } | ||
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 8276dc6794cc..ecffca11f4e9 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -55,7 +55,7 @@ | |||
55 | #include <asm/i387.h> | 55 | #include <asm/i387.h> |
56 | #include <asm/fpu-internal.h> | 56 | #include <asm/fpu-internal.h> |
57 | #include <asm/mce.h> | 57 | #include <asm/mce.h> |
58 | #include <asm/rcu.h> | 58 | #include <asm/context_tracking.h> |
59 | 59 | ||
60 | #include <asm/mach_traps.h> | 60 | #include <asm/mach_traps.h> |
61 | 61 | ||
@@ -69,9 +69,6 @@ | |||
69 | 69 | ||
70 | asmlinkage int system_call(void); | 70 | asmlinkage int system_call(void); |
71 | 71 | ||
72 | /* Do we ignore FPU interrupts ? */ | ||
73 | char ignore_fpu_irq; | ||
74 | |||
75 | /* | 72 | /* |
76 | * The IDT has to be page-aligned to simplify the Pentium | 73 | * The IDT has to be page-aligned to simplify the Pentium |
77 | * F0 0F bug workaround. | 74 | * F0 0F bug workaround. |
@@ -564,9 +561,6 @@ void math_error(struct pt_regs *regs, int error_code, int trapnr) | |||
564 | 561 | ||
565 | dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) | 562 | dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) |
566 | { | 563 | { |
567 | #ifdef CONFIG_X86_32 | ||
568 | ignore_fpu_irq = 1; | ||
569 | #endif | ||
570 | exception_enter(regs); | 564 | exception_enter(regs); |
571 | math_error(regs, error_code, X86_TRAP_MF); | 565 | math_error(regs, error_code, X86_TRAP_MF); |
572 | exception_exit(regs); | 566 | exception_exit(regs); |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index cfa5d4f7ca56..06ccb5073a3f 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -77,6 +77,12 @@ unsigned long long | |||
77 | sched_clock(void) __attribute__((alias("native_sched_clock"))); | 77 | sched_clock(void) __attribute__((alias("native_sched_clock"))); |
78 | #endif | 78 | #endif |
79 | 79 | ||
80 | unsigned long long native_read_tsc(void) | ||
81 | { | ||
82 | return __native_read_tsc(); | ||
83 | } | ||
84 | EXPORT_SYMBOL(native_read_tsc); | ||
85 | |||
80 | int check_tsc_unstable(void) | 86 | int check_tsc_unstable(void) |
81 | { | 87 | { |
82 | return tsc_unstable; | 88 | return tsc_unstable; |
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index aafa5557b396..c71025b67462 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c | |||
@@ -478,6 +478,11 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) | |||
478 | regs->ip = current->utask->xol_vaddr; | 478 | regs->ip = current->utask->xol_vaddr; |
479 | pre_xol_rip_insn(auprobe, regs, autask); | 479 | pre_xol_rip_insn(auprobe, regs, autask); |
480 | 480 | ||
481 | autask->saved_tf = !!(regs->flags & X86_EFLAGS_TF); | ||
482 | regs->flags |= X86_EFLAGS_TF; | ||
483 | if (test_tsk_thread_flag(current, TIF_BLOCKSTEP)) | ||
484 | set_task_blockstep(current, false); | ||
485 | |||
481 | return 0; | 486 | return 0; |
482 | } | 487 | } |
483 | 488 | ||
@@ -603,6 +608,16 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) | |||
603 | if (auprobe->fixups & UPROBE_FIX_CALL) | 608 | if (auprobe->fixups & UPROBE_FIX_CALL) |
604 | result = adjust_ret_addr(regs->sp, correction); | 609 | result = adjust_ret_addr(regs->sp, correction); |
605 | 610 | ||
611 | /* | ||
612 | * arch_uprobe_pre_xol() doesn't save the state of TIF_BLOCKSTEP | ||
613 | * so we can get an extra SIGTRAP if we do not clear TF. We need | ||
614 | * to examine the opcode to make it right. | ||
615 | */ | ||
616 | if (utask->autask.saved_tf) | ||
617 | send_sig(SIGTRAP, current, 0); | ||
618 | else if (!(auprobe->fixups & UPROBE_FIX_SETF)) | ||
619 | regs->flags &= ~X86_EFLAGS_TF; | ||
620 | |||
606 | return result; | 621 | return result; |
607 | } | 622 | } |
608 | 623 | ||
@@ -647,6 +662,10 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) | |||
647 | current->thread.trap_nr = utask->autask.saved_trap_nr; | 662 | current->thread.trap_nr = utask->autask.saved_trap_nr; |
648 | handle_riprel_post_xol(auprobe, regs, NULL); | 663 | handle_riprel_post_xol(auprobe, regs, NULL); |
649 | instruction_pointer_set(regs, utask->vaddr); | 664 | instruction_pointer_set(regs, utask->vaddr); |
665 | |||
666 | /* clear TF if it was set by us in arch_uprobe_pre_xol() */ | ||
667 | if (!utask->autask.saved_tf) | ||
668 | regs->flags &= ~X86_EFLAGS_TF; | ||
650 | } | 669 | } |
651 | 670 | ||
652 | /* | 671 | /* |
@@ -676,38 +695,3 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) | |||
676 | send_sig(SIGTRAP, current, 0); | 695 | send_sig(SIGTRAP, current, 0); |
677 | return ret; | 696 | return ret; |
678 | } | 697 | } |
679 | |||
680 | void arch_uprobe_enable_step(struct arch_uprobe *auprobe) | ||
681 | { | ||
682 | struct task_struct *task = current; | ||
683 | struct arch_uprobe_task *autask = &task->utask->autask; | ||
684 | struct pt_regs *regs = task_pt_regs(task); | ||
685 | |||
686 | autask->saved_tf = !!(regs->flags & X86_EFLAGS_TF); | ||
687 | |||
688 | regs->flags |= X86_EFLAGS_TF; | ||
689 | if (test_tsk_thread_flag(task, TIF_BLOCKSTEP)) | ||
690 | set_task_blockstep(task, false); | ||
691 | } | ||
692 | |||
693 | void arch_uprobe_disable_step(struct arch_uprobe *auprobe) | ||
694 | { | ||
695 | struct task_struct *task = current; | ||
696 | struct arch_uprobe_task *autask = &task->utask->autask; | ||
697 | bool trapped = (task->utask->state == UTASK_SSTEP_TRAPPED); | ||
698 | struct pt_regs *regs = task_pt_regs(task); | ||
699 | /* | ||
700 | * The state of TIF_BLOCKSTEP was not saved so we can get an extra | ||
701 | * SIGTRAP if we do not clear TF. We need to examine the opcode to | ||
702 | * make it right. | ||
703 | */ | ||
704 | if (unlikely(trapped)) { | ||
705 | if (!autask->saved_tf) | ||
706 | regs->flags &= ~X86_EFLAGS_TF; | ||
707 | } else { | ||
708 | if (autask->saved_tf) | ||
709 | send_sig(SIGTRAP, task, 0); | ||
710 | else if (!(auprobe->fixups & UPROBE_FIX_SETF)) | ||
711 | regs->flags &= ~X86_EFLAGS_TF; | ||
712 | } | ||
713 | } | ||
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 5c9687b1bde6..1dfe69cc78a8 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c | |||
@@ -182,7 +182,7 @@ static void mark_screen_rdonly(struct mm_struct *mm) | |||
182 | if (pud_none_or_clear_bad(pud)) | 182 | if (pud_none_or_clear_bad(pud)) |
183 | goto out; | 183 | goto out; |
184 | pmd = pmd_offset(pud, 0xA0000); | 184 | pmd = pmd_offset(pud, 0xA0000); |
185 | split_huge_page_pmd(mm, pmd); | 185 | split_huge_page_pmd_mm(mm, 0xA0000, pmd); |
186 | if (pmd_none_or_clear_bad(pmd)) | 186 | if (pmd_none_or_clear_bad(pmd)) |
187 | goto out; | 187 | goto out; |
188 | pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl); | 188 | pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl); |
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 3a3e8c9e280d..9a907a67be8f 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -145,19 +145,6 @@ static int addr_to_vsyscall_nr(unsigned long addr) | |||
145 | return nr; | 145 | return nr; |
146 | } | 146 | } |
147 | 147 | ||
148 | #ifdef CONFIG_SECCOMP | ||
149 | static int vsyscall_seccomp(struct task_struct *tsk, int syscall_nr) | ||
150 | { | ||
151 | if (!seccomp_mode(&tsk->seccomp)) | ||
152 | return 0; | ||
153 | task_pt_regs(tsk)->orig_ax = syscall_nr; | ||
154 | task_pt_regs(tsk)->ax = syscall_nr; | ||
155 | return __secure_computing(syscall_nr); | ||
156 | } | ||
157 | #else | ||
158 | #define vsyscall_seccomp(_tsk, _nr) 0 | ||
159 | #endif | ||
160 | |||
161 | static bool write_ok_or_segv(unsigned long ptr, size_t size) | 148 | static bool write_ok_or_segv(unsigned long ptr, size_t size) |
162 | { | 149 | { |
163 | /* | 150 | /* |
@@ -190,10 +177,9 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) | |||
190 | { | 177 | { |
191 | struct task_struct *tsk; | 178 | struct task_struct *tsk; |
192 | unsigned long caller; | 179 | unsigned long caller; |
193 | int vsyscall_nr; | 180 | int vsyscall_nr, syscall_nr, tmp; |
194 | int prev_sig_on_uaccess_error; | 181 | int prev_sig_on_uaccess_error; |
195 | long ret; | 182 | long ret; |
196 | int skip; | ||
197 | 183 | ||
198 | /* | 184 | /* |
199 | * No point in checking CS -- the only way to get here is a user mode | 185 | * No point in checking CS -- the only way to get here is a user mode |
@@ -225,56 +211,84 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) | |||
225 | } | 211 | } |
226 | 212 | ||
227 | tsk = current; | 213 | tsk = current; |
228 | /* | ||
229 | * With a real vsyscall, page faults cause SIGSEGV. We want to | ||
230 | * preserve that behavior to make writing exploits harder. | ||
231 | */ | ||
232 | prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error; | ||
233 | current_thread_info()->sig_on_uaccess_error = 1; | ||
234 | 214 | ||
235 | /* | 215 | /* |
216 | * Check for access_ok violations and find the syscall nr. | ||
217 | * | ||
236 | * NULL is a valid user pointer (in the access_ok sense) on 32-bit and | 218 | * NULL is a valid user pointer (in the access_ok sense) on 32-bit and |
237 | * 64-bit, so we don't need to special-case it here. For all the | 219 | * 64-bit, so we don't need to special-case it here. For all the |
238 | * vsyscalls, NULL means "don't write anything" not "write it at | 220 | * vsyscalls, NULL means "don't write anything" not "write it at |
239 | * address 0". | 221 | * address 0". |
240 | */ | 222 | */ |
241 | ret = -EFAULT; | ||
242 | skip = 0; | ||
243 | switch (vsyscall_nr) { | 223 | switch (vsyscall_nr) { |
244 | case 0: | 224 | case 0: |
245 | skip = vsyscall_seccomp(tsk, __NR_gettimeofday); | ||
246 | if (skip) | ||
247 | break; | ||
248 | |||
249 | if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) || | 225 | if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) || |
250 | !write_ok_or_segv(regs->si, sizeof(struct timezone))) | 226 | !write_ok_or_segv(regs->si, sizeof(struct timezone))) { |
251 | break; | 227 | ret = -EFAULT; |
228 | goto check_fault; | ||
229 | } | ||
230 | |||
231 | syscall_nr = __NR_gettimeofday; | ||
232 | break; | ||
233 | |||
234 | case 1: | ||
235 | if (!write_ok_or_segv(regs->di, sizeof(time_t))) { | ||
236 | ret = -EFAULT; | ||
237 | goto check_fault; | ||
238 | } | ||
239 | |||
240 | syscall_nr = __NR_time; | ||
241 | break; | ||
242 | |||
243 | case 2: | ||
244 | if (!write_ok_or_segv(regs->di, sizeof(unsigned)) || | ||
245 | !write_ok_or_segv(regs->si, sizeof(unsigned))) { | ||
246 | ret = -EFAULT; | ||
247 | goto check_fault; | ||
248 | } | ||
249 | |||
250 | syscall_nr = __NR_getcpu; | ||
251 | break; | ||
252 | } | ||
253 | |||
254 | /* | ||
255 | * Handle seccomp. regs->ip must be the original value. | ||
256 | * See seccomp_send_sigsys and Documentation/prctl/seccomp_filter.txt. | ||
257 | * | ||
258 | * We could optimize the seccomp disabled case, but performance | ||
259 | * here doesn't matter. | ||
260 | */ | ||
261 | regs->orig_ax = syscall_nr; | ||
262 | regs->ax = -ENOSYS; | ||
263 | tmp = secure_computing(syscall_nr); | ||
264 | if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) { | ||
265 | warn_bad_vsyscall(KERN_DEBUG, regs, | ||
266 | "seccomp tried to change syscall nr or ip"); | ||
267 | do_exit(SIGSYS); | ||
268 | } | ||
269 | if (tmp) | ||
270 | goto do_ret; /* skip requested */ | ||
252 | 271 | ||
272 | /* | ||
273 | * With a real vsyscall, page faults cause SIGSEGV. We want to | ||
274 | * preserve that behavior to make writing exploits harder. | ||
275 | */ | ||
276 | prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error; | ||
277 | current_thread_info()->sig_on_uaccess_error = 1; | ||
278 | |||
279 | ret = -EFAULT; | ||
280 | switch (vsyscall_nr) { | ||
281 | case 0: | ||
253 | ret = sys_gettimeofday( | 282 | ret = sys_gettimeofday( |
254 | (struct timeval __user *)regs->di, | 283 | (struct timeval __user *)regs->di, |
255 | (struct timezone __user *)regs->si); | 284 | (struct timezone __user *)regs->si); |
256 | break; | 285 | break; |
257 | 286 | ||
258 | case 1: | 287 | case 1: |
259 | skip = vsyscall_seccomp(tsk, __NR_time); | ||
260 | if (skip) | ||
261 | break; | ||
262 | |||
263 | if (!write_ok_or_segv(regs->di, sizeof(time_t))) | ||
264 | break; | ||
265 | |||
266 | ret = sys_time((time_t __user *)regs->di); | 288 | ret = sys_time((time_t __user *)regs->di); |
267 | break; | 289 | break; |
268 | 290 | ||
269 | case 2: | 291 | case 2: |
270 | skip = vsyscall_seccomp(tsk, __NR_getcpu); | ||
271 | if (skip) | ||
272 | break; | ||
273 | |||
274 | if (!write_ok_or_segv(regs->di, sizeof(unsigned)) || | ||
275 | !write_ok_or_segv(regs->si, sizeof(unsigned))) | ||
276 | break; | ||
277 | |||
278 | ret = sys_getcpu((unsigned __user *)regs->di, | 292 | ret = sys_getcpu((unsigned __user *)regs->di, |
279 | (unsigned __user *)regs->si, | 293 | (unsigned __user *)regs->si, |
280 | NULL); | 294 | NULL); |
@@ -283,12 +297,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) | |||
283 | 297 | ||
284 | current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error; | 298 | current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error; |
285 | 299 | ||
286 | if (skip) { | 300 | check_fault: |
287 | if ((long)regs->ax <= 0L) /* seccomp errno emulation */ | ||
288 | goto do_ret; | ||
289 | goto done; /* seccomp trace/trap */ | ||
290 | } | ||
291 | |||
292 | if (ret == -EFAULT) { | 301 | if (ret == -EFAULT) { |
293 | /* Bad news -- userspace fed a bad pointer to a vsyscall. */ | 302 | /* Bad news -- userspace fed a bad pointer to a vsyscall. */ |
294 | warn_bad_vsyscall(KERN_INFO, regs, | 303 | warn_bad_vsyscall(KERN_INFO, regs, |
@@ -311,7 +320,6 @@ do_ret: | |||
311 | /* Emulate a ret instruction. */ | 320 | /* Emulate a ret instruction. */ |
312 | regs->ip = caller; | 321 | regs->ip = caller; |
313 | regs->sp += 8; | 322 | regs->sp += 8; |
314 | done: | ||
315 | return true; | 323 | return true; |
316 | 324 | ||
317 | sigsegv: | 325 | sigsegv: |