aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2016-06-08 07:02:16 -0400
committerIngo Molnar <mingo@kernel.org>2016-06-08 07:02:16 -0400
commit8e8c668927b029f6ccc350eb1aa936864cc4eb6f (patch)
treef91ec6d49e2b01de5b4b3d517209d13b216a3f0f /arch/x86/kernel
parentf0133acc7d4835cfbb86393b7d2a4fba7519585b (diff)
parent970442c599b22ccd644ebfe94d1d303bf6f87c05 (diff)
Merge branch 'x86/urgent' into x86/cpu, to pick up dependency
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile7
-rw-r--r--arch/x86/kernel/acpi/boot.c18
-rw-r--r--arch/x86/kernel/alternative.c1
-rw-r--r--arch/x86/kernel/apic/apic.c30
-rw-r--r--arch/x86/kernel/apic/apic_noop.c4
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c1
-rw-r--r--arch/x86/kernel/apic/io_apic.c2
-rw-r--r--arch/x86/kernel/apic/ipi.c2
-rw-r--r--arch/x86/kernel/apic/vector.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c852
-rw-r--r--arch/x86/kernel/apm_32.c2
-rw-r--r--arch/x86/kernel/asm-offsets.c1
-rw-r--r--arch/x86/kernel/cpu/amd.c18
-rw-r--r--arch/x86/kernel/cpu/common.c99
-rw-r--r--arch/x86/kernel/cpu/cyrix.c2
-rw-r--r--arch/x86/kernel/cpu/intel.c43
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-genpool.c46
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h15
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c30
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c160
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c94
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c2
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c24
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c13
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h1
-rw-r--r--arch/x86/kernel/devicetree.c2
-rw-r--r--arch/x86/kernel/dumpstack.c19
-rw-r--r--arch/x86/kernel/ebda.c (renamed from arch/x86/kernel/head.c)2
-rw-r--r--arch/x86/kernel/fpu/bugs.c16
-rw-r--r--arch/x86/kernel/fpu/core.c50
-rw-r--r--arch/x86/kernel/fpu/init.c16
-rw-r--r--arch/x86/kernel/fpu/regset.c25
-rw-r--r--arch/x86/kernel/fpu/xstate.c18
-rw-r--r--arch/x86/kernel/head32.c2
-rw-r--r--arch/x86/kernel/head64.c1
-rw-r--r--arch/x86/kernel/head_32.S116
-rw-r--r--arch/x86/kernel/head_64.S103
-rw-r--r--arch/x86/kernel/hpet.c3
-rw-r--r--arch/x86/kernel/jump_label.c1
-rw-r--r--arch/x86/kernel/kexec-bzimage64.c18
-rw-r--r--arch/x86/kernel/kgdb.c1
-rw-r--r--arch/x86/kernel/kprobes/core.c1
-rw-r--r--arch/x86/kernel/kprobes/opt.c1
-rw-r--r--arch/x86/kernel/kvm.c8
-rw-r--r--arch/x86/kernel/livepatch.c70
-rw-r--r--arch/x86/kernel/machine_kexec_64.c45
-rw-r--r--arch/x86/kernel/mcount_64.S3
-rw-r--r--arch/x86/kernel/module.c1
-rw-r--r--arch/x86/kernel/paravirt.c7
-rw-r--r--arch/x86/kernel/pci-iommu_table.c2
-rw-r--r--arch/x86/kernel/platform-quirks.c35
-rw-r--r--arch/x86/kernel/process.c5
-rw-r--r--arch/x86/kernel/process_64.c245
-rw-r--r--arch/x86/kernel/ptrace.c54
-rw-r--r--arch/x86/kernel/reboot.c9
-rw-r--r--arch/x86/kernel/rtc.c18
-rw-r--r--arch/x86/kernel/setup.c12
-rw-r--r--arch/x86/kernel/signal.c29
-rw-r--r--arch/x86/kernel/smpboot.c7
-rw-r--r--arch/x86/kernel/sysfb_efi.c29
-rw-r--r--arch/x86/kernel/tboot.c6
-rw-r--r--arch/x86/kernel/tls.c42
-rw-r--r--arch/x86/kernel/traps.c1
-rw-r--r--arch/x86/kernel/tsc.c33
-rw-r--r--arch/x86/kernel/tsc_msr.c5
-rw-r--r--arch/x86/kernel/uprobes.c6
-rw-r--r--arch/x86/kernel/vmlinux.lds.S2
69 files changed, 1633 insertions, 909 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 616ebd22ef9a..0503f5bfb18d 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -2,7 +2,11 @@
2# Makefile for the linux kernel. 2# Makefile for the linux kernel.
3# 3#
4 4
5extra-y := head_$(BITS).o head$(BITS).o head.o vmlinux.lds 5extra-y := head_$(BITS).o
6extra-y += head$(BITS).o
7extra-y += ebda.o
8extra-y += platform-quirks.o
9extra-y += vmlinux.lds
6 10
7CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) 11CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
8 12
@@ -79,7 +83,6 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse.o
79obj-y += apic/ 83obj-y += apic/
80obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o 84obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
81obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o 85obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
82obj-$(CONFIG_LIVEPATCH) += livepatch.o
83obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o 86obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
84obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o 87obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
85obj-$(CONFIG_X86_TSC) += trace_clock.o 88obj-$(CONFIG_X86_TSC) += trace_clock.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 8c2f1ef6ca23..9414f84584e4 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -136,7 +136,7 @@ static int __init acpi_parse_madt(struct acpi_table_header *table)
136{ 136{
137 struct acpi_table_madt *madt = NULL; 137 struct acpi_table_madt *madt = NULL;
138 138
139 if (!cpu_has_apic) 139 if (!boot_cpu_has(X86_FEATURE_APIC))
140 return -EINVAL; 140 return -EINVAL;
141 141
142 madt = (struct acpi_table_madt *)table; 142 madt = (struct acpi_table_madt *)table;
@@ -445,7 +445,6 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger,
445 polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK; 445 polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK;
446 446
447 mp_override_legacy_irq(bus_irq, polarity, trigger, gsi); 447 mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
448 acpi_penalize_sci_irq(bus_irq, trigger, polarity);
449 448
450 /* 449 /*
451 * stash over-ride to indicate we've been here 450 * stash over-ride to indicate we've been here
@@ -913,6 +912,15 @@ late_initcall(hpet_insert_resource);
913 912
914static int __init acpi_parse_fadt(struct acpi_table_header *table) 913static int __init acpi_parse_fadt(struct acpi_table_header *table)
915{ 914{
915 if (!(acpi_gbl_FADT.boot_flags & ACPI_FADT_LEGACY_DEVICES)) {
916 pr_debug("ACPI: no legacy devices present\n");
917 x86_platform.legacy.devices.pnpbios = 0;
918 }
919
920 if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_CMOS_RTC) {
921 pr_debug("ACPI: not registering RTC platform device\n");
922 x86_platform.legacy.rtc = 0;
923 }
916 924
917#ifdef CONFIG_X86_PM_TIMER 925#ifdef CONFIG_X86_PM_TIMER
918 /* detect the location of the ACPI PM Timer */ 926 /* detect the location of the ACPI PM Timer */
@@ -951,7 +959,7 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
951{ 959{
952 int count; 960 int count;
953 961
954 if (!cpu_has_apic) 962 if (!boot_cpu_has(X86_FEATURE_APIC))
955 return -ENODEV; 963 return -ENODEV;
956 964
957 /* 965 /*
@@ -979,7 +987,7 @@ static int __init acpi_parse_madt_lapic_entries(void)
979 int ret; 987 int ret;
980 struct acpi_subtable_proc madt_proc[2]; 988 struct acpi_subtable_proc madt_proc[2];
981 989
982 if (!cpu_has_apic) 990 if (!boot_cpu_has(X86_FEATURE_APIC))
983 return -ENODEV; 991 return -ENODEV;
984 992
985 /* 993 /*
@@ -1125,7 +1133,7 @@ static int __init acpi_parse_madt_ioapic_entries(void)
1125 if (acpi_disabled || acpi_noirq) 1133 if (acpi_disabled || acpi_noirq)
1126 return -ENODEV; 1134 return -ENODEV;
1127 1135
1128 if (!cpu_has_apic) 1136 if (!boot_cpu_has(X86_FEATURE_APIC))
1129 return -ENODEV; 1137 return -ENODEV;
1130 1138
1131 /* 1139 /*
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 25f909362b7a..5cb272a7a5a3 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -11,6 +11,7 @@
11#include <linux/stop_machine.h> 11#include <linux/stop_machine.h>
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/kdebug.h> 13#include <linux/kdebug.h>
14#include <asm/text-patching.h>
14#include <asm/alternative.h> 15#include <asm/alternative.h>
15#include <asm/sections.h> 16#include <asm/sections.h>
16#include <asm/pgtable.h> 17#include <asm/pgtable.h>
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index d7867c885bf8..60078a67d7e3 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -607,7 +607,7 @@ static void __init lapic_cal_handler(struct clock_event_device *dev)
607 long tapic = apic_read(APIC_TMCCT); 607 long tapic = apic_read(APIC_TMCCT);
608 unsigned long pm = acpi_pm_read_early(); 608 unsigned long pm = acpi_pm_read_early();
609 609
610 if (cpu_has_tsc) 610 if (boot_cpu_has(X86_FEATURE_TSC))
611 tsc = rdtsc(); 611 tsc = rdtsc();
612 612
613 switch (lapic_cal_loops++) { 613 switch (lapic_cal_loops++) {
@@ -668,7 +668,7 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
668 *delta = (long)res; 668 *delta = (long)res;
669 669
670 /* Correct the tsc counter value */ 670 /* Correct the tsc counter value */
671 if (cpu_has_tsc) { 671 if (boot_cpu_has(X86_FEATURE_TSC)) {
672 res = (((u64)(*deltatsc)) * pm_100ms); 672 res = (((u64)(*deltatsc)) * pm_100ms);
673 do_div(res, deltapm); 673 do_div(res, deltapm);
674 apic_printk(APIC_VERBOSE, "TSC delta adjusted to " 674 apic_printk(APIC_VERBOSE, "TSC delta adjusted to "
@@ -760,7 +760,7 @@ static int __init calibrate_APIC_clock(void)
760 apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", 760 apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
761 lapic_timer_frequency); 761 lapic_timer_frequency);
762 762
763 if (cpu_has_tsc) { 763 if (boot_cpu_has(X86_FEATURE_TSC)) {
764 apic_printk(APIC_VERBOSE, "..... CPU clock speed is " 764 apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
765 "%ld.%04ld MHz.\n", 765 "%ld.%04ld MHz.\n",
766 (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ), 766 (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ),
@@ -1085,7 +1085,7 @@ void lapic_shutdown(void)
1085{ 1085{
1086 unsigned long flags; 1086 unsigned long flags;
1087 1087
1088 if (!cpu_has_apic && !apic_from_smp_config()) 1088 if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config())
1089 return; 1089 return;
1090 1090
1091 local_irq_save(flags); 1091 local_irq_save(flags);
@@ -1134,7 +1134,7 @@ void __init init_bsp_APIC(void)
1134 * Don't do the setup now if we have a SMP BIOS as the 1134 * Don't do the setup now if we have a SMP BIOS as the
1135 * through-I/O-APIC virtual wire mode might be active. 1135 * through-I/O-APIC virtual wire mode might be active.
1136 */ 1136 */
1137 if (smp_found_config || !cpu_has_apic) 1137 if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC))
1138 return; 1138 return;
1139 1139
1140 /* 1140 /*
@@ -1227,7 +1227,7 @@ void setup_local_APIC(void)
1227 unsigned long long tsc = 0, ntsc; 1227 unsigned long long tsc = 0, ntsc;
1228 long long max_loops = cpu_khz ? cpu_khz : 1000000; 1228 long long max_loops = cpu_khz ? cpu_khz : 1000000;
1229 1229
1230 if (cpu_has_tsc) 1230 if (boot_cpu_has(X86_FEATURE_TSC))
1231 tsc = rdtsc(); 1231 tsc = rdtsc();
1232 1232
1233 if (disable_apic) { 1233 if (disable_apic) {
@@ -1311,7 +1311,7 @@ void setup_local_APIC(void)
1311 break; 1311 break;
1312 } 1312 }
1313 if (queued) { 1313 if (queued) {
1314 if (cpu_has_tsc && cpu_khz) { 1314 if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) {
1315 ntsc = rdtsc(); 1315 ntsc = rdtsc();
1316 max_loops = (cpu_khz << 10) - (ntsc - tsc); 1316 max_loops = (cpu_khz << 10) - (ntsc - tsc);
1317 } else 1317 } else
@@ -1445,7 +1445,7 @@ static void __x2apic_disable(void)
1445{ 1445{
1446 u64 msr; 1446 u64 msr;
1447 1447
1448 if (!cpu_has_apic) 1448 if (!boot_cpu_has(X86_FEATURE_APIC))
1449 return; 1449 return;
1450 1450
1451 rdmsrl(MSR_IA32_APICBASE, msr); 1451 rdmsrl(MSR_IA32_APICBASE, msr);
@@ -1632,7 +1632,7 @@ void __init enable_IR_x2apic(void)
1632 */ 1632 */
1633static int __init detect_init_APIC(void) 1633static int __init detect_init_APIC(void)
1634{ 1634{
1635 if (!cpu_has_apic) { 1635 if (!boot_cpu_has(X86_FEATURE_APIC)) {
1636 pr_info("No local APIC present\n"); 1636 pr_info("No local APIC present\n");
1637 return -1; 1637 return -1;
1638 } 1638 }
@@ -1711,14 +1711,14 @@ static int __init detect_init_APIC(void)
1711 goto no_apic; 1711 goto no_apic;
1712 case X86_VENDOR_INTEL: 1712 case X86_VENDOR_INTEL:
1713 if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 || 1713 if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
1714 (boot_cpu_data.x86 == 5 && cpu_has_apic)) 1714 (boot_cpu_data.x86 == 5 && boot_cpu_has(X86_FEATURE_APIC)))
1715 break; 1715 break;
1716 goto no_apic; 1716 goto no_apic;
1717 default: 1717 default:
1718 goto no_apic; 1718 goto no_apic;
1719 } 1719 }
1720 1720
1721 if (!cpu_has_apic) { 1721 if (!boot_cpu_has(X86_FEATURE_APIC)) {
1722 /* 1722 /*
1723 * Over-ride BIOS and try to enable the local APIC only if 1723 * Over-ride BIOS and try to enable the local APIC only if
1724 * "lapic" specified. 1724 * "lapic" specified.
@@ -2233,19 +2233,19 @@ int __init APIC_init_uniprocessor(void)
2233 return -1; 2233 return -1;
2234 } 2234 }
2235#ifdef CONFIG_X86_64 2235#ifdef CONFIG_X86_64
2236 if (!cpu_has_apic) { 2236 if (!boot_cpu_has(X86_FEATURE_APIC)) {
2237 disable_apic = 1; 2237 disable_apic = 1;
2238 pr_info("Apic disabled by BIOS\n"); 2238 pr_info("Apic disabled by BIOS\n");
2239 return -1; 2239 return -1;
2240 } 2240 }
2241#else 2241#else
2242 if (!smp_found_config && !cpu_has_apic) 2242 if (!smp_found_config && !boot_cpu_has(X86_FEATURE_APIC))
2243 return -1; 2243 return -1;
2244 2244
2245 /* 2245 /*
2246 * Complain if the BIOS pretends there is one. 2246 * Complain if the BIOS pretends there is one.
2247 */ 2247 */
2248 if (!cpu_has_apic && 2248 if (!boot_cpu_has(X86_FEATURE_APIC) &&
2249 APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { 2249 APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
2250 pr_err("BIOS bug, local APIC 0x%x not detected!...\n", 2250 pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
2251 boot_cpu_physical_apicid); 2251 boot_cpu_physical_apicid);
@@ -2426,7 +2426,7 @@ static void apic_pm_activate(void)
2426static int __init init_lapic_sysfs(void) 2426static int __init init_lapic_sysfs(void)
2427{ 2427{
2428 /* XXX: remove suspend/resume procs if !apic_pm_state.active? */ 2428 /* XXX: remove suspend/resume procs if !apic_pm_state.active? */
2429 if (cpu_has_apic) 2429 if (boot_cpu_has(X86_FEATURE_APIC))
2430 register_syscore_ops(&lapic_syscore_ops); 2430 register_syscore_ops(&lapic_syscore_ops);
2431 2431
2432 return 0; 2432 return 0;
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 331a7a07c48f..13d19ed58514 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -100,13 +100,13 @@ static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask,
100 100
101static u32 noop_apic_read(u32 reg) 101static u32 noop_apic_read(u32 reg)
102{ 102{
103 WARN_ON_ONCE((cpu_has_apic && !disable_apic)); 103 WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic);
104 return 0; 104 return 0;
105} 105}
106 106
107static void noop_apic_write(u32 reg, u32 v) 107static void noop_apic_write(u32 reg, u32 v)
108{ 108{
109 WARN_ON_ONCE(cpu_has_apic && !disable_apic); 109 WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic);
110} 110}
111 111
112struct apic apic_noop = { 112struct apic apic_noop = {
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 045e424fb368..7788ce643bf4 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -18,7 +18,6 @@
18#include <linux/nmi.h> 18#include <linux/nmi.h>
19#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/delay.h> 20#include <linux/delay.h>
21#include <linux/seq_buf.h>
22 21
23#ifdef CONFIG_HARDLOCKUP_DETECTOR 22#ifdef CONFIG_HARDLOCKUP_DETECTOR
24u64 hw_nmi_get_sample_period(int watchdog_thresh) 23u64 hw_nmi_get_sample_period(int watchdog_thresh)
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index fdb0fbfb1197..84e33ff5a6d5 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1454,7 +1454,7 @@ void native_disable_io_apic(void)
1454 ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); 1454 ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
1455 } 1455 }
1456 1456
1457 if (cpu_has_apic || apic_from_smp_config()) 1457 if (boot_cpu_has(X86_FEATURE_APIC) || apic_from_smp_config())
1458 disconnect_bsp_APIC(ioapic_i8259.pin != -1); 1458 disconnect_bsp_APIC(ioapic_i8259.pin != -1);
1459} 1459}
1460 1460
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 28bde88b0085..2a0f225afebd 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -230,7 +230,7 @@ int safe_smp_processor_id(void)
230{ 230{
231 int apicid, cpuid; 231 int apicid, cpuid;
232 232
233 if (!cpu_has_apic) 233 if (!boot_cpu_has(X86_FEATURE_APIC))
234 return 0; 234 return 0;
235 235
236 apicid = hard_smp_processor_id(); 236 apicid = hard_smp_processor_id();
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index ef495511f019..a5e400afc563 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -944,7 +944,7 @@ static int __init print_ICs(void)
944 print_PIC(); 944 print_PIC();
945 945
946 /* don't print out if apic is not there */ 946 /* don't print out if apic is not there */
947 if (!cpu_has_apic && !apic_from_smp_config()) 947 if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config())
948 return 0; 948 return 0;
949 949
950 print_local_APICs(show_lapic); 950 print_local_APICs(show_lapic);
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 8f4942e2bcbb..29003154fafd 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -48,12 +48,35 @@ static u64 gru_start_paddr, gru_end_paddr;
48static u64 gru_dist_base, gru_first_node_paddr = -1LL, gru_last_node_paddr; 48static u64 gru_dist_base, gru_first_node_paddr = -1LL, gru_last_node_paddr;
49static u64 gru_dist_lmask, gru_dist_umask; 49static u64 gru_dist_lmask, gru_dist_umask;
50static union uvh_apicid uvh_apicid; 50static union uvh_apicid uvh_apicid;
51
52/* info derived from CPUID */
53static struct {
54 unsigned int apicid_shift;
55 unsigned int apicid_mask;
56 unsigned int socketid_shift; /* aka pnode_shift for UV1/2/3 */
57 unsigned int pnode_mask;
58 unsigned int gpa_shift;
59} uv_cpuid;
60
51int uv_min_hub_revision_id; 61int uv_min_hub_revision_id;
52EXPORT_SYMBOL_GPL(uv_min_hub_revision_id); 62EXPORT_SYMBOL_GPL(uv_min_hub_revision_id);
53unsigned int uv_apicid_hibits; 63unsigned int uv_apicid_hibits;
54EXPORT_SYMBOL_GPL(uv_apicid_hibits); 64EXPORT_SYMBOL_GPL(uv_apicid_hibits);
55 65
56static struct apic apic_x2apic_uv_x; 66static struct apic apic_x2apic_uv_x;
67static struct uv_hub_info_s uv_hub_info_node0;
68
69/* Set this to use hardware error handler instead of kernel panic */
70static int disable_uv_undefined_panic = 1;
71unsigned long uv_undefined(char *str)
72{
73 if (likely(!disable_uv_undefined_panic))
74 panic("UV: error: undefined MMR: %s\n", str);
75 else
76 pr_crit("UV: error: undefined MMR: %s\n", str);
77 return ~0ul; /* cause a machine fault */
78}
79EXPORT_SYMBOL(uv_undefined);
57 80
58static unsigned long __init uv_early_read_mmr(unsigned long addr) 81static unsigned long __init uv_early_read_mmr(unsigned long addr)
59{ 82{
@@ -108,21 +131,71 @@ static int __init early_get_pnodeid(void)
108 case UV3_HUB_PART_NUMBER_X: 131 case UV3_HUB_PART_NUMBER_X:
109 uv_min_hub_revision_id += UV3_HUB_REVISION_BASE; 132 uv_min_hub_revision_id += UV3_HUB_REVISION_BASE;
110 break; 133 break;
134 case UV4_HUB_PART_NUMBER:
135 uv_min_hub_revision_id += UV4_HUB_REVISION_BASE - 1;
136 break;
111 } 137 }
112 138
113 uv_hub_info->hub_revision = uv_min_hub_revision_id; 139 uv_hub_info->hub_revision = uv_min_hub_revision_id;
114 pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1); 140 uv_cpuid.pnode_mask = (1 << m_n_config.s.n_skt) - 1;
141 pnode = (node_id.s.node_id >> 1) & uv_cpuid.pnode_mask;
142 uv_cpuid.gpa_shift = 46; /* default unless changed */
143
144 pr_info("UV: rev:%d part#:%x nodeid:%04x n_skt:%d pnmsk:%x pn:%x\n",
145 node_id.s.revision, node_id.s.part_number, node_id.s.node_id,
146 m_n_config.s.n_skt, uv_cpuid.pnode_mask, pnode);
115 return pnode; 147 return pnode;
116} 148}
117 149
118static void __init early_get_apic_pnode_shift(void) 150/* [copied from arch/x86/kernel/cpu/topology.c:detect_extended_topology()] */
151#define SMT_LEVEL 0 /* leaf 0xb SMT level */
152#define INVALID_TYPE 0 /* leaf 0xb sub-leaf types */
153#define SMT_TYPE 1
154#define CORE_TYPE 2
155#define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff)
156#define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f)
157
158static void set_x2apic_bits(void)
159{
160 unsigned int eax, ebx, ecx, edx, sub_index;
161 unsigned int sid_shift;
162
163 cpuid(0, &eax, &ebx, &ecx, &edx);
164 if (eax < 0xb) {
165 pr_info("UV: CPU does not have CPUID.11\n");
166 return;
167 }
168 cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
169 if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE)) {
170 pr_info("UV: CPUID.11 not implemented\n");
171 return;
172 }
173 sid_shift = BITS_SHIFT_NEXT_LEVEL(eax);
174 sub_index = 1;
175 do {
176 cpuid_count(0xb, sub_index, &eax, &ebx, &ecx, &edx);
177 if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) {
178 sid_shift = BITS_SHIFT_NEXT_LEVEL(eax);
179 break;
180 }
181 sub_index++;
182 } while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE);
183 uv_cpuid.apicid_shift = 0;
184 uv_cpuid.apicid_mask = (~(-1 << sid_shift));
185 uv_cpuid.socketid_shift = sid_shift;
186}
187
188static void __init early_get_apic_socketid_shift(void)
119{ 189{
120 uvh_apicid.v = uv_early_read_mmr(UVH_APICID); 190 if (is_uv2_hub() || is_uv3_hub())
121 if (!uvh_apicid.v) 191 uvh_apicid.v = uv_early_read_mmr(UVH_APICID);
122 /* 192
123 * Old bios, use default value 193 set_x2apic_bits();
124 */ 194
125 uvh_apicid.s.pnode_shift = UV_APIC_PNODE_SHIFT; 195 pr_info("UV: apicid_shift:%d apicid_mask:0x%x\n",
196 uv_cpuid.apicid_shift, uv_cpuid.apicid_mask);
197 pr_info("UV: socketid_shift:%d pnode_mask:0x%x\n",
198 uv_cpuid.socketid_shift, uv_cpuid.pnode_mask);
126} 199}
127 200
128/* 201/*
@@ -150,13 +223,18 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
150 if (strncmp(oem_id, "SGI", 3) != 0) 223 if (strncmp(oem_id, "SGI", 3) != 0)
151 return 0; 224 return 0;
152 225
226 /* Setup early hub type field in uv_hub_info for Node 0 */
227 uv_cpu_info->p_uv_hub_info = &uv_hub_info_node0;
228
153 /* 229 /*
154 * Determine UV arch type. 230 * Determine UV arch type.
155 * SGI: UV100/1000 231 * SGI: UV100/1000
156 * SGI2: UV2000/3000 232 * SGI2: UV2000/3000
157 * SGI3: UV300 (truncated to 4 chars because of different varieties) 233 * SGI3: UV300 (truncated to 4 chars because of different varieties)
234 * SGI4: UV400 (truncated to 4 chars because of different varieties)
158 */ 235 */
159 uv_hub_info->hub_revision = 236 uv_hub_info->hub_revision =
237 !strncmp(oem_id, "SGI4", 4) ? UV4_HUB_REVISION_BASE :
160 !strncmp(oem_id, "SGI3", 4) ? UV3_HUB_REVISION_BASE : 238 !strncmp(oem_id, "SGI3", 4) ? UV3_HUB_REVISION_BASE :
161 !strcmp(oem_id, "SGI2") ? UV2_HUB_REVISION_BASE : 239 !strcmp(oem_id, "SGI2") ? UV2_HUB_REVISION_BASE :
162 !strcmp(oem_id, "SGI") ? UV1_HUB_REVISION_BASE : 0; 240 !strcmp(oem_id, "SGI") ? UV1_HUB_REVISION_BASE : 0;
@@ -165,7 +243,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
165 goto badbios; 243 goto badbios;
166 244
167 pnodeid = early_get_pnodeid(); 245 pnodeid = early_get_pnodeid();
168 early_get_apic_pnode_shift(); 246 early_get_apic_socketid_shift();
169 x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; 247 x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range;
170 x86_platform.nmi_init = uv_nmi_init; 248 x86_platform.nmi_init = uv_nmi_init;
171 249
@@ -211,17 +289,11 @@ int is_uv_system(void)
211} 289}
212EXPORT_SYMBOL_GPL(is_uv_system); 290EXPORT_SYMBOL_GPL(is_uv_system);
213 291
214DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); 292void **__uv_hub_info_list;
215EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info); 293EXPORT_SYMBOL_GPL(__uv_hub_info_list);
216
217struct uv_blade_info *uv_blade_info;
218EXPORT_SYMBOL_GPL(uv_blade_info);
219
220short *uv_node_to_blade;
221EXPORT_SYMBOL_GPL(uv_node_to_blade);
222 294
223short *uv_cpu_to_blade; 295DEFINE_PER_CPU(struct uv_cpu_info_s, __uv_cpu_info);
224EXPORT_SYMBOL_GPL(uv_cpu_to_blade); 296EXPORT_PER_CPU_SYMBOL_GPL(__uv_cpu_info);
225 297
226short uv_possible_blades; 298short uv_possible_blades;
227EXPORT_SYMBOL_GPL(uv_possible_blades); 299EXPORT_SYMBOL_GPL(uv_possible_blades);
@@ -229,6 +301,115 @@ EXPORT_SYMBOL_GPL(uv_possible_blades);
229unsigned long sn_rtc_cycles_per_second; 301unsigned long sn_rtc_cycles_per_second;
230EXPORT_SYMBOL(sn_rtc_cycles_per_second); 302EXPORT_SYMBOL(sn_rtc_cycles_per_second);
231 303
304/* the following values are used for the per node hub info struct */
305static __initdata unsigned short *_node_to_pnode;
306static __initdata unsigned short _min_socket, _max_socket;
307static __initdata unsigned short _min_pnode, _max_pnode, _gr_table_len;
308static __initdata struct uv_gam_range_entry *uv_gre_table;
309static __initdata struct uv_gam_parameters *uv_gp_table;
310static __initdata unsigned short *_socket_to_node;
311static __initdata unsigned short *_socket_to_pnode;
312static __initdata unsigned short *_pnode_to_socket;
313static __initdata struct uv_gam_range_s *_gr_table;
314#define SOCK_EMPTY ((unsigned short)~0)
315
316extern int uv_hub_info_version(void)
317{
318 return UV_HUB_INFO_VERSION;
319}
320EXPORT_SYMBOL(uv_hub_info_version);
321
322/* Build GAM range lookup table */
323static __init void build_uv_gr_table(void)
324{
325 struct uv_gam_range_entry *gre = uv_gre_table;
326 struct uv_gam_range_s *grt;
327 unsigned long last_limit = 0, ram_limit = 0;
328 int bytes, i, sid, lsid = -1;
329
330 if (!gre)
331 return;
332
333 bytes = _gr_table_len * sizeof(struct uv_gam_range_s);
334 grt = kzalloc(bytes, GFP_KERNEL);
335 BUG_ON(!grt);
336 _gr_table = grt;
337
338 for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
339 if (gre->type == UV_GAM_RANGE_TYPE_HOLE) {
340 if (!ram_limit) { /* mark hole between ram/non-ram */
341 ram_limit = last_limit;
342 last_limit = gre->limit;
343 lsid++;
344 continue;
345 }
346 last_limit = gre->limit;
347 pr_info("UV: extra hole in GAM RE table @%d\n",
348 (int)(gre - uv_gre_table));
349 continue;
350 }
351 if (_max_socket < gre->sockid) {
352 pr_err("UV: GAM table sockid(%d) too large(>%d) @%d\n",
353 gre->sockid, _max_socket,
354 (int)(gre - uv_gre_table));
355 continue;
356 }
357 sid = gre->sockid - _min_socket;
358 if (lsid < sid) { /* new range */
359 grt = &_gr_table[sid];
360 grt->base = lsid;
361 grt->nasid = gre->nasid;
362 grt->limit = last_limit = gre->limit;
363 lsid = sid;
364 continue;
365 }
366 if (lsid == sid && !ram_limit) { /* update range */
367 if (grt->limit == last_limit) { /* .. if contiguous */
368 grt->limit = last_limit = gre->limit;
369 continue;
370 }
371 }
372 if (!ram_limit) { /* non-contiguous ram range */
373 grt++;
374 grt->base = sid - 1;
375 grt->nasid = gre->nasid;
376 grt->limit = last_limit = gre->limit;
377 continue;
378 }
379 grt++; /* non-contiguous/non-ram */
380 grt->base = grt - _gr_table; /* base is this entry */
381 grt->nasid = gre->nasid;
382 grt->limit = last_limit = gre->limit;
383 lsid++;
384 }
385
386 /* shorten table if possible */
387 grt++;
388 i = grt - _gr_table;
389 if (i < _gr_table_len) {
390 void *ret;
391
392 bytes = i * sizeof(struct uv_gam_range_s);
393 ret = krealloc(_gr_table, bytes, GFP_KERNEL);
394 if (ret) {
395 _gr_table = ret;
396 _gr_table_len = i;
397 }
398 }
399
400 /* display resultant gam range table */
401 for (i = 0, grt = _gr_table; i < _gr_table_len; i++, grt++) {
402 int gb = grt->base;
403 unsigned long start = gb < 0 ? 0 :
404 (unsigned long)_gr_table[gb].limit << UV_GAM_RANGE_SHFT;
405 unsigned long end =
406 (unsigned long)grt->limit << UV_GAM_RANGE_SHFT;
407
408 pr_info("UV: GAM Range %2d %04x 0x%013lx-0x%013lx (%d)\n",
409 i, grt->nasid, start, end, gb);
410 }
411}
412
232static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) 413static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
233{ 414{
234 unsigned long val; 415 unsigned long val;
@@ -355,7 +536,6 @@ static unsigned long set_apic_id(unsigned int id)
355 536
356static unsigned int uv_read_apic_id(void) 537static unsigned int uv_read_apic_id(void)
357{ 538{
358
359 return x2apic_get_apic_id(apic_read(APIC_ID)); 539 return x2apic_get_apic_id(apic_read(APIC_ID));
360} 540}
361 541
@@ -430,58 +610,38 @@ static void set_x2apic_extra_bits(int pnode)
430 __this_cpu_write(x2apic_extra_bits, pnode << uvh_apicid.s.pnode_shift); 610 __this_cpu_write(x2apic_extra_bits, pnode << uvh_apicid.s.pnode_shift);
431} 611}
432 612
433/* 613#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_LENGTH 3
434 * Called on boot cpu.
435 */
436static __init int boot_pnode_to_blade(int pnode)
437{
438 int blade;
439
440 for (blade = 0; blade < uv_num_possible_blades(); blade++)
441 if (pnode == uv_blade_info[blade].pnode)
442 return blade;
443 BUG();
444}
445
446struct redir_addr {
447 unsigned long redirect;
448 unsigned long alias;
449};
450
451#define DEST_SHIFT UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 614#define DEST_SHIFT UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT
452 615
453static __initdata struct redir_addr redir_addrs[] = {
454 {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR},
455 {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR},
456 {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR},
457};
458
459static unsigned char get_n_lshift(int m_val)
460{
461 union uv3h_gr0_gam_gr_config_u m_gr_config;
462
463 if (is_uv1_hub())
464 return m_val;
465
466 if (is_uv2_hub())
467 return m_val == 40 ? 40 : 39;
468
469 m_gr_config.v = uv_read_local_mmr(UV3H_GR0_GAM_GR_CONFIG);
470 return m_gr_config.s3.m_skt;
471}
472
473static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size) 616static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
474{ 617{
475 union uvh_rh_gam_alias210_overlay_config_2_mmr_u alias; 618 union uvh_rh_gam_alias210_overlay_config_2_mmr_u alias;
476 union uvh_rh_gam_alias210_redirect_config_2_mmr_u redirect; 619 union uvh_rh_gam_alias210_redirect_config_2_mmr_u redirect;
620 unsigned long m_redirect;
621 unsigned long m_overlay;
477 int i; 622 int i;
478 623
479 for (i = 0; i < ARRAY_SIZE(redir_addrs); i++) { 624 for (i = 0; i < UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_LENGTH; i++) {
480 alias.v = uv_read_local_mmr(redir_addrs[i].alias); 625 switch (i) {
626 case 0:
627 m_redirect = UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR;
628 m_overlay = UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR;
629 break;
630 case 1:
631 m_redirect = UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR;
632 m_overlay = UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR;
633 break;
634 case 2:
635 m_redirect = UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR;
636 m_overlay = UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR;
637 break;
638 }
639 alias.v = uv_read_local_mmr(m_overlay);
481 if (alias.s.enable && alias.s.base == 0) { 640 if (alias.s.enable && alias.s.base == 0) {
482 *size = (1UL << alias.s.m_alias); 641 *size = (1UL << alias.s.m_alias);
483 redirect.v = uv_read_local_mmr(redir_addrs[i].redirect); 642 redirect.v = uv_read_local_mmr(m_redirect);
484 *base = (unsigned long)redirect.s.dest_base << DEST_SHIFT; 643 *base = (unsigned long)redirect.s.dest_base
644 << DEST_SHIFT;
485 return; 645 return;
486 } 646 }
487 } 647 }
@@ -544,6 +704,8 @@ static __init void map_gru_high(int max_pnode)
544{ 704{
545 union uvh_rh_gam_gru_overlay_config_mmr_u gru; 705 union uvh_rh_gam_gru_overlay_config_mmr_u gru;
546 int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT; 706 int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT;
707 unsigned long mask = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK;
708 unsigned long base;
547 709
548 gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR); 710 gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR);
549 if (!gru.s.enable) { 711 if (!gru.s.enable) {
@@ -555,8 +717,9 @@ static __init void map_gru_high(int max_pnode)
555 map_gru_distributed(gru.v); 717 map_gru_distributed(gru.v);
556 return; 718 return;
557 } 719 }
558 map_high("GRU", gru.s.base, shift, shift, max_pnode, map_wb); 720 base = (gru.v & mask) >> shift;
559 gru_start_paddr = ((u64)gru.s.base << shift); 721 map_high("GRU", base, shift, shift, max_pnode, map_wb);
722 gru_start_paddr = ((u64)base << shift);
560 gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1); 723 gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1);
561} 724}
562 725
@@ -595,6 +758,7 @@ static __initdata struct mmioh_config mmiohs[] = {
595 }, 758 },
596}; 759};
597 760
761/* UV3 & UV4 have identical MMIOH overlay configs */
598static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode) 762static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode)
599{ 763{
600 union uv3h_rh_gam_mmioh_overlay_config0_mmr_u overlay; 764 union uv3h_rh_gam_mmioh_overlay_config0_mmr_u overlay;
@@ -674,7 +838,7 @@ static __init void map_mmioh_high(int min_pnode, int max_pnode)
674 unsigned long mmr, base; 838 unsigned long mmr, base;
675 int shift, enable, m_io, n_io; 839 int shift, enable, m_io, n_io;
676 840
677 if (is_uv3_hub()) { 841 if (is_uv3_hub() || is_uv4_hub()) {
678 /* Map both MMIOH Regions */ 842 /* Map both MMIOH Regions */
679 map_mmioh_high_uv3(0, min_pnode, max_pnode); 843 map_mmioh_high_uv3(0, min_pnode, max_pnode);
680 map_mmioh_high_uv3(1, min_pnode, max_pnode); 844 map_mmioh_high_uv3(1, min_pnode, max_pnode);
@@ -739,8 +903,8 @@ static __init void uv_rtc_init(void)
739 */ 903 */
740static void uv_heartbeat(unsigned long ignored) 904static void uv_heartbeat(unsigned long ignored)
741{ 905{
742 struct timer_list *timer = &uv_hub_info->scir.timer; 906 struct timer_list *timer = &uv_scir_info->timer;
743 unsigned char bits = uv_hub_info->scir.state; 907 unsigned char bits = uv_scir_info->state;
744 908
745 /* flip heartbeat bit */ 909 /* flip heartbeat bit */
746 bits ^= SCIR_CPU_HEARTBEAT; 910 bits ^= SCIR_CPU_HEARTBEAT;
@@ -760,14 +924,14 @@ static void uv_heartbeat(unsigned long ignored)
760 924
761static void uv_heartbeat_enable(int cpu) 925static void uv_heartbeat_enable(int cpu)
762{ 926{
763 while (!uv_cpu_hub_info(cpu)->scir.enabled) { 927 while (!uv_cpu_scir_info(cpu)->enabled) {
764 struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer; 928 struct timer_list *timer = &uv_cpu_scir_info(cpu)->timer;
765 929
766 uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY); 930 uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
767 setup_timer(timer, uv_heartbeat, cpu); 931 setup_timer(timer, uv_heartbeat, cpu);
768 timer->expires = jiffies + SCIR_CPU_HB_INTERVAL; 932 timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
769 add_timer_on(timer, cpu); 933 add_timer_on(timer, cpu);
770 uv_cpu_hub_info(cpu)->scir.enabled = 1; 934 uv_cpu_scir_info(cpu)->enabled = 1;
771 935
772 /* also ensure that boot cpu is enabled */ 936 /* also ensure that boot cpu is enabled */
773 cpu = 0; 937 cpu = 0;
@@ -777,9 +941,9 @@ static void uv_heartbeat_enable(int cpu)
777#ifdef CONFIG_HOTPLUG_CPU 941#ifdef CONFIG_HOTPLUG_CPU
778static void uv_heartbeat_disable(int cpu) 942static void uv_heartbeat_disable(int cpu)
779{ 943{
780 if (uv_cpu_hub_info(cpu)->scir.enabled) { 944 if (uv_cpu_scir_info(cpu)->enabled) {
781 uv_cpu_hub_info(cpu)->scir.enabled = 0; 945 uv_cpu_scir_info(cpu)->enabled = 0;
782 del_timer(&uv_cpu_hub_info(cpu)->scir.timer); 946 del_timer(&uv_cpu_scir_info(cpu)->timer);
783 } 947 }
784 uv_set_cpu_scir_bits(cpu, 0xff); 948 uv_set_cpu_scir_bits(cpu, 0xff);
785} 949}
@@ -862,157 +1026,475 @@ int uv_set_vga_state(struct pci_dev *pdev, bool decode,
862void uv_cpu_init(void) 1026void uv_cpu_init(void)
863{ 1027{
864 /* CPU 0 initialization will be done via uv_system_init. */ 1028 /* CPU 0 initialization will be done via uv_system_init. */
865 if (!uv_blade_info) 1029 if (smp_processor_id() == 0)
866 return; 1030 return;
867 1031
868 uv_blade_info[uv_numa_blade_id()].nr_online_cpus++; 1032 uv_hub_info->nr_online_cpus++;
869 1033
870 if (get_uv_system_type() == UV_NON_UNIQUE_APIC) 1034 if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
871 set_x2apic_extra_bits(uv_hub_info->pnode); 1035 set_x2apic_extra_bits(uv_hub_info->pnode);
872} 1036}
873 1037
874void __init uv_system_init(void) 1038struct mn {
1039 unsigned char m_val;
1040 unsigned char n_val;
1041 unsigned char m_shift;
1042 unsigned char n_lshift;
1043};
1044
1045static void get_mn(struct mn *mnp)
875{ 1046{
876 union uvh_rh_gam_config_mmr_u m_n_config; 1047 union uvh_rh_gam_config_mmr_u m_n_config;
877 union uvh_node_id_u node_id; 1048 union uv3h_gr0_gam_gr_config_u m_gr_config;
878 unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
879 int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val;
880 int gnode_extra, min_pnode = 999999, max_pnode = -1;
881 unsigned long mmr_base, present, paddr;
882 unsigned short pnode_mask;
883 unsigned char n_lshift;
884 char *hub = (is_uv1_hub() ? "UV100/1000" :
885 (is_uv2_hub() ? "UV2000/3000" :
886 (is_uv3_hub() ? "UV300" : NULL)));
887 1049
888 if (!hub) { 1050 m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR);
889 pr_err("UV: Unknown/unsupported UV hub\n"); 1051 mnp->n_val = m_n_config.s.n_skt;
890 return; 1052 if (is_uv4_hub()) {
1053 mnp->m_val = 0;
1054 mnp->n_lshift = 0;
1055 } else if (is_uv3_hub()) {
1056 mnp->m_val = m_n_config.s3.m_skt;
1057 m_gr_config.v = uv_read_local_mmr(UV3H_GR0_GAM_GR_CONFIG);
1058 mnp->n_lshift = m_gr_config.s3.m_skt;
1059 } else if (is_uv2_hub()) {
1060 mnp->m_val = m_n_config.s2.m_skt;
1061 mnp->n_lshift = mnp->m_val == 40 ? 40 : 39;
1062 } else if (is_uv1_hub()) {
1063 mnp->m_val = m_n_config.s1.m_skt;
1064 mnp->n_lshift = mnp->m_val;
891 } 1065 }
892 pr_info("UV: Found %s hub\n", hub); 1066 mnp->m_shift = mnp->m_val ? 64 - mnp->m_val : 0;
1067}
893 1068
894 /* We now only need to map the MMRs on UV1 */ 1069void __init uv_init_hub_info(struct uv_hub_info_s *hub_info)
895 if (is_uv1_hub()) 1070{
896 map_low_mmrs(); 1071 struct mn mn = {0}; /* avoid unitialized warnings */
1072 union uvh_node_id_u node_id;
897 1073
898 m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR ); 1074 get_mn(&mn);
899 m_val = m_n_config.s.m_skt; 1075 hub_info->m_val = mn.m_val;
900 n_val = m_n_config.s.n_skt; 1076 hub_info->n_val = mn.n_val;
901 pnode_mask = (1 << n_val) - 1; 1077 hub_info->m_shift = mn.m_shift;
902 n_lshift = get_n_lshift(m_val); 1078 hub_info->n_lshift = mn.n_lshift ? mn.n_lshift : 0;
903 mmr_base = 1079
904 uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & 1080 hub_info->hub_revision = uv_hub_info->hub_revision;
905 ~UV_MMR_ENABLE; 1081 hub_info->pnode_mask = uv_cpuid.pnode_mask;
1082 hub_info->min_pnode = _min_pnode;
1083 hub_info->min_socket = _min_socket;
1084 hub_info->pnode_to_socket = _pnode_to_socket;
1085 hub_info->socket_to_node = _socket_to_node;
1086 hub_info->socket_to_pnode = _socket_to_pnode;
1087 hub_info->gr_table_len = _gr_table_len;
1088 hub_info->gr_table = _gr_table;
1089 hub_info->gpa_mask = mn.m_val ?
1090 (1UL << (mn.m_val + mn.n_val)) - 1 :
1091 (1UL << uv_cpuid.gpa_shift) - 1;
906 1092
907 node_id.v = uv_read_local_mmr(UVH_NODE_ID); 1093 node_id.v = uv_read_local_mmr(UVH_NODE_ID);
908 gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1; 1094 hub_info->gnode_extra =
909 gnode_upper = ((unsigned long)gnode_extra << m_val); 1095 (node_id.s.node_id & ~((1 << mn.n_val) - 1)) >> 1;
910 pr_info("UV: N:%d M:%d pnode_mask:0x%x gnode_upper/extra:0x%lx/0x%x n_lshift 0x%x\n", 1096
911 n_val, m_val, pnode_mask, gnode_upper, gnode_extra, 1097 hub_info->gnode_upper =
912 n_lshift); 1098 ((unsigned long)hub_info->gnode_extra << mn.m_val);
1099
1100 if (uv_gp_table) {
1101 hub_info->global_mmr_base = uv_gp_table->mmr_base;
1102 hub_info->global_mmr_shift = uv_gp_table->mmr_shift;
1103 hub_info->global_gru_base = uv_gp_table->gru_base;
1104 hub_info->global_gru_shift = uv_gp_table->gru_shift;
1105 hub_info->gpa_shift = uv_gp_table->gpa_shift;
1106 hub_info->gpa_mask = (1UL << hub_info->gpa_shift) - 1;
1107 } else {
1108 hub_info->global_mmr_base =
1109 uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
1110 ~UV_MMR_ENABLE;
1111 hub_info->global_mmr_shift = _UV_GLOBAL_MMR64_PNODE_SHIFT;
1112 }
913 1113
914 pr_info("UV: global MMR base 0x%lx\n", mmr_base); 1114 get_lowmem_redirect(
1115 &hub_info->lowmem_remap_base, &hub_info->lowmem_remap_top);
915 1116
916 for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) 1117 hub_info->apic_pnode_shift = uv_cpuid.socketid_shift;
917 uv_possible_blades +=
918 hweight64(uv_read_local_mmr( UVH_NODE_PRESENT_TABLE + i * 8));
919 1118
920 /* uv_num_possible_blades() is really the hub count */ 1119 /* show system specific info */
921 pr_info("UV: Found %d blades, %d hubs\n", 1120 pr_info("UV: N:%d M:%d m_shift:%d n_lshift:%d\n",
922 is_uv1_hub() ? uv_num_possible_blades() : 1121 hub_info->n_val, hub_info->m_val,
923 (uv_num_possible_blades() + 1) / 2, 1122 hub_info->m_shift, hub_info->n_lshift);
924 uv_num_possible_blades());
925 1123
926 bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); 1124 pr_info("UV: gpa_mask/shift:0x%lx/%d pnode_mask:0x%x apic_pns:%d\n",
927 uv_blade_info = kzalloc(bytes, GFP_KERNEL); 1125 hub_info->gpa_mask, hub_info->gpa_shift,
928 BUG_ON(!uv_blade_info); 1126 hub_info->pnode_mask, hub_info->apic_pnode_shift);
929 1127
930 for (blade = 0; blade < uv_num_possible_blades(); blade++) 1128 pr_info("UV: mmr_base/shift:0x%lx/%ld gru_base/shift:0x%lx/%ld\n",
931 uv_blade_info[blade].memory_nid = -1; 1129 hub_info->global_mmr_base, hub_info->global_mmr_shift,
1130 hub_info->global_gru_base, hub_info->global_gru_shift);
932 1131
933 get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size); 1132 pr_info("UV: gnode_upper:0x%lx gnode_extra:0x%x\n",
1133 hub_info->gnode_upper, hub_info->gnode_extra);
1134}
1135
1136static void __init decode_gam_params(unsigned long ptr)
1137{
1138 uv_gp_table = (struct uv_gam_parameters *)ptr;
1139
1140 pr_info("UV: GAM Params...\n");
1141 pr_info("UV: mmr_base/shift:0x%llx/%d gru_base/shift:0x%llx/%d gpa_shift:%d\n",
1142 uv_gp_table->mmr_base, uv_gp_table->mmr_shift,
1143 uv_gp_table->gru_base, uv_gp_table->gru_shift,
1144 uv_gp_table->gpa_shift);
1145}
1146
1147static void __init decode_gam_rng_tbl(unsigned long ptr)
1148{
1149 struct uv_gam_range_entry *gre = (struct uv_gam_range_entry *)ptr;
1150 unsigned long lgre = 0;
1151 int index = 0;
1152 int sock_min = 999999, pnode_min = 99999;
1153 int sock_max = -1, pnode_max = -1;
1154
1155 uv_gre_table = gre;
1156 for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
1157 if (!index) {
1158 pr_info("UV: GAM Range Table...\n");
1159 pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s %3s\n",
1160 "Range", "", "Size", "Type", "NASID",
1161 "SID", "PN", "PXM");
1162 }
1163 pr_info(
1164 "UV: %2d: 0x%014lx-0x%014lx %5luG %3d %04x %02x %02x %3d\n",
1165 index++,
1166 (unsigned long)lgre << UV_GAM_RANGE_SHFT,
1167 (unsigned long)gre->limit << UV_GAM_RANGE_SHFT,
1168 ((unsigned long)(gre->limit - lgre)) >>
1169 (30 - UV_GAM_RANGE_SHFT), /* 64M -> 1G */
1170 gre->type, gre->nasid, gre->sockid,
1171 gre->pnode, gre->pxm);
1172
1173 lgre = gre->limit;
1174 if (sock_min > gre->sockid)
1175 sock_min = gre->sockid;
1176 if (sock_max < gre->sockid)
1177 sock_max = gre->sockid;
1178 if (pnode_min > gre->pnode)
1179 pnode_min = gre->pnode;
1180 if (pnode_max < gre->pnode)
1181 pnode_max = gre->pnode;
1182 }
1183 _min_socket = sock_min;
1184 _max_socket = sock_max;
1185 _min_pnode = pnode_min;
1186 _max_pnode = pnode_max;
1187 _gr_table_len = index;
1188 pr_info(
1189 "UV: GRT: %d entries, sockets(min:%x,max:%x) pnodes(min:%x,max:%x)\n",
1190 index, _min_socket, _max_socket, _min_pnode, _max_pnode);
1191}
1192
1193static void __init decode_uv_systab(void)
1194{
1195 struct uv_systab *st;
1196 int i;
1197
1198 st = uv_systab;
1199 if ((!st || st->revision < UV_SYSTAB_VERSION_UV4) && !is_uv4_hub())
1200 return;
1201 if (st->revision != UV_SYSTAB_VERSION_UV4_LATEST) {
1202 pr_crit(
1203 "UV: BIOS UVsystab version(%x) mismatch, expecting(%x)\n",
1204 st->revision, UV_SYSTAB_VERSION_UV4_LATEST);
1205 BUG();
1206 }
1207
1208 for (i = 0; st->entry[i].type != UV_SYSTAB_TYPE_UNUSED; i++) {
1209 unsigned long ptr = st->entry[i].offset;
934 1210
935 bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes(); 1211 if (!ptr)
936 uv_node_to_blade = kmalloc(bytes, GFP_KERNEL); 1212 continue;
937 BUG_ON(!uv_node_to_blade); 1213
938 memset(uv_node_to_blade, 255, bytes); 1214 ptr = ptr + (unsigned long)st;
939 1215
940 bytes = sizeof(uv_cpu_to_blade[0]) * num_possible_cpus(); 1216 switch (st->entry[i].type) {
941 uv_cpu_to_blade = kmalloc(bytes, GFP_KERNEL); 1217 case UV_SYSTAB_TYPE_GAM_PARAMS:
942 BUG_ON(!uv_cpu_to_blade); 1218 decode_gam_params(ptr);
943 memset(uv_cpu_to_blade, 255, bytes); 1219 break;
944 1220
945 blade = 0; 1221 case UV_SYSTAB_TYPE_GAM_RNG_TBL:
1222 decode_gam_rng_tbl(ptr);
1223 break;
1224 }
1225 }
1226}
1227
1228/*
1229 * Setup physical blade translations from UVH_NODE_PRESENT_TABLE
1230 * .. NB: UVH_NODE_PRESENT_TABLE is going away,
1231 * .. being replaced by GAM Range Table
1232 */
1233static __init void boot_init_possible_blades(struct uv_hub_info_s *hub_info)
1234{
1235 int i, uv_pb = 0;
1236
1237 pr_info("UV: NODE_PRESENT_DEPTH = %d\n", UVH_NODE_PRESENT_TABLE_DEPTH);
946 for (i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) { 1238 for (i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) {
947 present = uv_read_local_mmr(UVH_NODE_PRESENT_TABLE + i * 8); 1239 unsigned long np;
948 for (j = 0; j < 64; j++) { 1240
949 if (!test_bit(j, &present)) 1241 np = uv_read_local_mmr(UVH_NODE_PRESENT_TABLE + i * 8);
950 continue; 1242 if (np)
951 pnode = (i * 64 + j) & pnode_mask; 1243 pr_info("UV: NODE_PRESENT(%d) = 0x%016lx\n", i, np);
952 uv_blade_info[blade].pnode = pnode; 1244
953 uv_blade_info[blade].nr_possible_cpus = 0; 1245 uv_pb += hweight64(np);
954 uv_blade_info[blade].nr_online_cpus = 0; 1246 }
955 spin_lock_init(&uv_blade_info[blade].nmi_lock); 1247 if (uv_possible_blades != uv_pb)
956 min_pnode = min(pnode, min_pnode); 1248 uv_possible_blades = uv_pb;
957 max_pnode = max(pnode, max_pnode); 1249}
958 blade++; 1250
1251static void __init build_socket_tables(void)
1252{
1253 struct uv_gam_range_entry *gre = uv_gre_table;
1254 int num, nump;
1255 int cpu, i, lnid;
1256 int minsock = _min_socket;
1257 int maxsock = _max_socket;
1258 int minpnode = _min_pnode;
1259 int maxpnode = _max_pnode;
1260 size_t bytes;
1261
1262 if (!gre) {
1263 if (is_uv1_hub() || is_uv2_hub() || is_uv3_hub()) {
1264 pr_info("UV: No UVsystab socket table, ignoring\n");
1265 return; /* not required */
1266 }
1267 pr_crit(
1268 "UV: Error: UVsystab address translations not available!\n");
1269 BUG();
1270 }
1271
1272 /* build socket id -> node id, pnode */
1273 num = maxsock - minsock + 1;
1274 bytes = num * sizeof(_socket_to_node[0]);
1275 _socket_to_node = kmalloc(bytes, GFP_KERNEL);
1276 _socket_to_pnode = kmalloc(bytes, GFP_KERNEL);
1277
1278 nump = maxpnode - minpnode + 1;
1279 bytes = nump * sizeof(_pnode_to_socket[0]);
1280 _pnode_to_socket = kmalloc(bytes, GFP_KERNEL);
1281 BUG_ON(!_socket_to_node || !_socket_to_pnode || !_pnode_to_socket);
1282
1283 for (i = 0; i < num; i++)
1284 _socket_to_node[i] = _socket_to_pnode[i] = SOCK_EMPTY;
1285
1286 for (i = 0; i < nump; i++)
1287 _pnode_to_socket[i] = SOCK_EMPTY;
1288
1289 /* fill in pnode/node/addr conversion list values */
1290 pr_info("UV: GAM Building socket/pnode/pxm conversion tables\n");
1291 for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
1292 if (gre->type == UV_GAM_RANGE_TYPE_HOLE)
1293 continue;
1294 i = gre->sockid - minsock;
1295 if (_socket_to_pnode[i] != SOCK_EMPTY)
1296 continue; /* duplicate */
1297 _socket_to_pnode[i] = gre->pnode;
1298 _socket_to_node[i] = gre->pxm;
1299
1300 i = gre->pnode - minpnode;
1301 _pnode_to_socket[i] = gre->sockid;
1302
1303 pr_info(
1304 "UV: sid:%02x type:%d nasid:%04x pn:%02x pxm:%2d pn2s:%2x\n",
1305 gre->sockid, gre->type, gre->nasid,
1306 _socket_to_pnode[gre->sockid - minsock],
1307 _socket_to_node[gre->sockid - minsock],
1308 _pnode_to_socket[gre->pnode - minpnode]);
1309 }
1310
1311 /* check socket -> node values */
1312 lnid = -1;
1313 for_each_present_cpu(cpu) {
1314 int nid = cpu_to_node(cpu);
1315 int apicid, sockid;
1316
1317 if (lnid == nid)
1318 continue;
1319 lnid = nid;
1320 apicid = per_cpu(x86_cpu_to_apicid, cpu);
1321 sockid = apicid >> uv_cpuid.socketid_shift;
1322 i = sockid - minsock;
1323
1324 if (nid != _socket_to_node[i]) {
1325 pr_warn(
1326 "UV: %02x: type:%d socket:%02x PXM:%02x != node:%2d\n",
1327 i, sockid, gre->type, _socket_to_node[i], nid);
1328 _socket_to_node[i] = nid;
1329 }
1330 }
1331
1332 /* Setup physical blade to pnode translation from GAM Range Table */
1333 bytes = num_possible_nodes() * sizeof(_node_to_pnode[0]);
1334 _node_to_pnode = kmalloc(bytes, GFP_KERNEL);
1335 BUG_ON(!_node_to_pnode);
1336
1337 for (lnid = 0; lnid < num_possible_nodes(); lnid++) {
1338 unsigned short sockid;
1339
1340 for (sockid = minsock; sockid <= maxsock; sockid++) {
1341 if (lnid == _socket_to_node[sockid - minsock]) {
1342 _node_to_pnode[lnid] =
1343 _socket_to_pnode[sockid - minsock];
1344 break;
1345 }
1346 }
1347 if (sockid > maxsock) {
1348 pr_err("UV: socket for node %d not found!\n", lnid);
1349 BUG();
1350 }
1351 }
1352
1353 /*
1354 * If socket id == pnode or socket id == node for all nodes,
1355 * system runs faster by removing corresponding conversion table.
1356 */
1357 pr_info("UV: Checking socket->node/pnode for identity maps\n");
1358 if (minsock == 0) {
1359 for (i = 0; i < num; i++)
1360 if (_socket_to_node[i] == SOCK_EMPTY ||
1361 i != _socket_to_node[i])
1362 break;
1363 if (i >= num) {
1364 kfree(_socket_to_node);
1365 _socket_to_node = NULL;
1366 pr_info("UV: 1:1 socket_to_node table removed\n");
959 } 1367 }
960 } 1368 }
1369 if (minsock == minpnode) {
1370 for (i = 0; i < num; i++)
1371 if (_socket_to_pnode[i] != SOCK_EMPTY &&
1372 _socket_to_pnode[i] != i + minpnode)
1373 break;
1374 if (i >= num) {
1375 kfree(_socket_to_pnode);
1376 _socket_to_pnode = NULL;
1377 pr_info("UV: 1:1 socket_to_pnode table removed\n");
1378 }
1379 }
1380}
1381
1382void __init uv_system_init(void)
1383{
1384 struct uv_hub_info_s hub_info = {0};
1385 int bytes, cpu, nodeid;
1386 unsigned short min_pnode = 9999, max_pnode = 0;
1387 char *hub = is_uv4_hub() ? "UV400" :
1388 is_uv3_hub() ? "UV300" :
1389 is_uv2_hub() ? "UV2000/3000" :
1390 is_uv1_hub() ? "UV100/1000" : NULL;
1391
1392 if (!hub) {
1393 pr_err("UV: Unknown/unsupported UV hub\n");
1394 return;
1395 }
1396 pr_info("UV: Found %s hub\n", hub);
1397
1398 map_low_mmrs();
1399
1400 uv_bios_init(); /* get uv_systab for decoding */
1401 decode_uv_systab();
1402 build_socket_tables();
1403 build_uv_gr_table();
1404 uv_init_hub_info(&hub_info);
1405 uv_possible_blades = num_possible_nodes();
1406 if (!_node_to_pnode)
1407 boot_init_possible_blades(&hub_info);
1408
1409 /* uv_num_possible_blades() is really the hub count */
1410 pr_info("UV: Found %d hubs, %d nodes, %d cpus\n",
1411 uv_num_possible_blades(),
1412 num_possible_nodes(),
1413 num_possible_cpus());
961 1414
962 uv_bios_init();
963 uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, &sn_coherency_id, 1415 uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, &sn_coherency_id,
964 &sn_region_size, &system_serial_number); 1416 &sn_region_size, &system_serial_number);
1417 hub_info.coherency_domain_number = sn_coherency_id;
965 uv_rtc_init(); 1418 uv_rtc_init();
966 1419
967 for_each_present_cpu(cpu) { 1420 bytes = sizeof(void *) * uv_num_possible_blades();
968 int apicid = per_cpu(x86_cpu_to_apicid, cpu); 1421 __uv_hub_info_list = kzalloc(bytes, GFP_KERNEL);
1422 BUG_ON(!__uv_hub_info_list);
969 1423
970 nid = cpu_to_node(cpu); 1424 bytes = sizeof(struct uv_hub_info_s);
971 /* 1425 for_each_node(nodeid) {
972 * apic_pnode_shift must be set before calling uv_apicid_to_pnode(); 1426 struct uv_hub_info_s *new_hub;
973 */
974 uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
975 uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift;
976 uv_cpu_hub_info(cpu)->hub_revision = uv_hub_info->hub_revision;
977 1427
978 uv_cpu_hub_info(cpu)->m_shift = 64 - m_val; 1428 if (__uv_hub_info_list[nodeid]) {
979 uv_cpu_hub_info(cpu)->n_lshift = n_lshift; 1429 pr_err("UV: Node %d UV HUB already initialized!?\n",
1430 nodeid);
1431 BUG();
1432 }
1433
1434 /* Allocate new per hub info list */
1435 new_hub = (nodeid == 0) ?
1436 &uv_hub_info_node0 :
1437 kzalloc_node(bytes, GFP_KERNEL, nodeid);
1438 BUG_ON(!new_hub);
1439 __uv_hub_info_list[nodeid] = new_hub;
1440 new_hub = uv_hub_info_list(nodeid);
1441 BUG_ON(!new_hub);
1442 *new_hub = hub_info;
1443
1444 /* Use information from GAM table if available */
1445 if (_node_to_pnode)
1446 new_hub->pnode = _node_to_pnode[nodeid];
1447 else /* Fill in during cpu loop */
1448 new_hub->pnode = 0xffff;
1449 new_hub->numa_blade_id = uv_node_to_blade_id(nodeid);
1450 new_hub->memory_nid = -1;
1451 new_hub->nr_possible_cpus = 0;
1452 new_hub->nr_online_cpus = 0;
1453 }
980 1454
1455 /* Initialize per cpu info */
1456 for_each_possible_cpu(cpu) {
1457 int apicid = per_cpu(x86_cpu_to_apicid, cpu);
1458 int numa_node_id;
1459 unsigned short pnode;
1460
1461 nodeid = cpu_to_node(cpu);
1462 numa_node_id = numa_cpu_node(cpu);
981 pnode = uv_apicid_to_pnode(apicid); 1463 pnode = uv_apicid_to_pnode(apicid);
982 blade = boot_pnode_to_blade(pnode); 1464
983 lcpu = uv_blade_info[blade].nr_possible_cpus; 1465 uv_cpu_info_per(cpu)->p_uv_hub_info = uv_hub_info_list(nodeid);
984 uv_blade_info[blade].nr_possible_cpus++; 1466 uv_cpu_info_per(cpu)->blade_cpu_id =
985 1467 uv_cpu_hub_info(cpu)->nr_possible_cpus++;
986 /* Any node on the blade, else will contain -1. */ 1468 if (uv_cpu_hub_info(cpu)->memory_nid == -1)
987 uv_blade_info[blade].memory_nid = nid; 1469 uv_cpu_hub_info(cpu)->memory_nid = cpu_to_node(cpu);
988 1470 if (nodeid != numa_node_id && /* init memoryless node */
989 uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; 1471 uv_hub_info_list(numa_node_id)->pnode == 0xffff)
990 uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size; 1472 uv_hub_info_list(numa_node_id)->pnode = pnode;
991 uv_cpu_hub_info(cpu)->m_val = m_val; 1473 else if (uv_cpu_hub_info(cpu)->pnode == 0xffff)
992 uv_cpu_hub_info(cpu)->n_val = n_val; 1474 uv_cpu_hub_info(cpu)->pnode = pnode;
993 uv_cpu_hub_info(cpu)->numa_blade_id = blade; 1475 uv_cpu_scir_info(cpu)->offset = uv_scir_offset(apicid);
994 uv_cpu_hub_info(cpu)->blade_processor_id = lcpu;
995 uv_cpu_hub_info(cpu)->pnode = pnode;
996 uv_cpu_hub_info(cpu)->gpa_mask = (1UL << (m_val + n_val)) - 1;
997 uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
998 uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra;
999 uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
1000 uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id;
1001 uv_cpu_hub_info(cpu)->scir.offset = uv_scir_offset(apicid);
1002 uv_node_to_blade[nid] = blade;
1003 uv_cpu_to_blade[cpu] = blade;
1004 } 1476 }
1005 1477
1006 /* Add blade/pnode info for nodes without cpus */ 1478 for_each_node(nodeid) {
1007 for_each_online_node(nid) { 1479 unsigned short pnode = uv_hub_info_list(nodeid)->pnode;
1008 if (uv_node_to_blade[nid] >= 0) 1480
1009 continue; 1481 /* Add pnode info for pre-GAM list nodes without cpus */
1010 paddr = node_start_pfn(nid) << PAGE_SHIFT; 1482 if (pnode == 0xffff) {
1011 pnode = uv_gpa_to_pnode(uv_soc_phys_ram_to_gpa(paddr)); 1483 unsigned long paddr;
1012 blade = boot_pnode_to_blade(pnode); 1484
1013 uv_node_to_blade[nid] = blade; 1485 paddr = node_start_pfn(nodeid) << PAGE_SHIFT;
1486 pnode = uv_gpa_to_pnode(uv_soc_phys_ram_to_gpa(paddr));
1487 uv_hub_info_list(nodeid)->pnode = pnode;
1488 }
1489 min_pnode = min(pnode, min_pnode);
1490 max_pnode = max(pnode, max_pnode);
1491 pr_info("UV: UVHUB node:%2d pn:%02x nrcpus:%d\n",
1492 nodeid,
1493 uv_hub_info_list(nodeid)->pnode,
1494 uv_hub_info_list(nodeid)->nr_possible_cpus);
1014 } 1495 }
1015 1496
1497 pr_info("UV: min_pnode:%02x max_pnode:%02x\n", min_pnode, max_pnode);
1016 map_gru_high(max_pnode); 1498 map_gru_high(max_pnode);
1017 map_mmr_high(max_pnode); 1499 map_mmr_high(max_pnode);
1018 map_mmioh_high(min_pnode, max_pnode); 1500 map_mmioh_high(min_pnode, max_pnode);
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 9307f182fe30..c7364bd633e1 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -2267,7 +2267,7 @@ static int __init apm_init(void)
2267 2267
2268 dmi_check_system(apm_dmi_table); 2268 dmi_check_system(apm_dmi_table);
2269 2269
2270 if (apm_info.bios.version == 0 || paravirt_enabled() || machine_is_olpc()) { 2270 if (apm_info.bios.version == 0 || machine_is_olpc()) {
2271 printk(KERN_INFO "apm: BIOS not found.\n"); 2271 printk(KERN_INFO "apm: BIOS not found.\n");
2272 return -ENODEV; 2272 return -ENODEV;
2273 } 2273 }
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 5c042466f274..674134e9f5e5 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -80,6 +80,7 @@ void common(void) {
80 OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); 80 OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
81 OFFSET(BP_version, boot_params, hdr.version); 81 OFFSET(BP_version, boot_params, hdr.version);
82 OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment); 82 OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
83 OFFSET(BP_init_size, boot_params, hdr.init_size);
83 OFFSET(BP_pref_address, boot_params, hdr.pref_address); 84 OFFSET(BP_pref_address, boot_params, hdr.pref_address);
84 OFFSET(BP_code32_start, boot_params, hdr.code32_start); 85 OFFSET(BP_code32_start, boot_params, hdr.code32_start);
85 86
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 2fec875392cc..c343a54bed39 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -565,14 +565,17 @@ static void early_init_amd(struct cpuinfo_x86 *c)
565 * can safely set X86_FEATURE_EXTD_APICID unconditionally for families 565 * can safely set X86_FEATURE_EXTD_APICID unconditionally for families
566 * after 16h. 566 * after 16h.
567 */ 567 */
568 if (cpu_has_apic && c->x86 > 0x16) { 568 if (boot_cpu_has(X86_FEATURE_APIC)) {
569 set_cpu_cap(c, X86_FEATURE_EXTD_APICID); 569 if (c->x86 > 0x16)
570 } else if (cpu_has_apic && c->x86 >= 0xf) {
571 /* check CPU config space for extended APIC ID */
572 unsigned int val;
573 val = read_pci_config(0, 24, 0, 0x68);
574 if ((val & ((1 << 17) | (1 << 18))) == ((1 << 17) | (1 << 18)))
575 set_cpu_cap(c, X86_FEATURE_EXTD_APICID); 570 set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
571 else if (c->x86 >= 0xf) {
572 /* check CPU config space for extended APIC ID */
573 unsigned int val;
574
575 val = read_pci_config(0, 24, 0, 0x68);
576 if ((val >> 17 & 0x3) == 0x3)
577 set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
578 }
576 } 579 }
577#endif 580#endif
578 581
@@ -628,6 +631,7 @@ static void init_amd_k8(struct cpuinfo_x86 *c)
628 */ 631 */
629 msr_set_bit(MSR_K7_HWCR, 6); 632 msr_set_bit(MSR_K7_HWCR, 6);
630#endif 633#endif
634 set_cpu_bug(c, X86_BUG_SWAPGS_FENCE);
631} 635}
632 636
633static void init_amd_gh(struct cpuinfo_x86 *c) 637static void init_amd_gh(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 8394b3d1f94f..0fe6953f421c 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -37,6 +37,7 @@
37#include <asm/mtrr.h> 37#include <asm/mtrr.h>
38#include <linux/numa.h> 38#include <linux/numa.h>
39#include <asm/asm.h> 39#include <asm/asm.h>
40#include <asm/bugs.h>
40#include <asm/cpu.h> 41#include <asm/cpu.h>
41#include <asm/mce.h> 42#include <asm/mce.h>
42#include <asm/msr.h> 43#include <asm/msr.h>
@@ -270,6 +271,8 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
270static __init int setup_disable_smep(char *arg) 271static __init int setup_disable_smep(char *arg)
271{ 272{
272 setup_clear_cpu_cap(X86_FEATURE_SMEP); 273 setup_clear_cpu_cap(X86_FEATURE_SMEP);
274 /* Check for things that depend on SMEP being enabled: */
275 check_mpx_erratum(&boot_cpu_data);
273 return 1; 276 return 1;
274} 277}
275__setup("nosmep", setup_disable_smep); 278__setup("nosmep", setup_disable_smep);
@@ -310,6 +313,10 @@ static bool pku_disabled;
310 313
311static __always_inline void setup_pku(struct cpuinfo_x86 *c) 314static __always_inline void setup_pku(struct cpuinfo_x86 *c)
312{ 315{
316 /* check the boot processor, plus compile options for PKU: */
317 if (!cpu_feature_enabled(X86_FEATURE_PKU))
318 return;
319 /* checks the actual processor's cpuid bits: */
313 if (!cpu_has(c, X86_FEATURE_PKU)) 320 if (!cpu_has(c, X86_FEATURE_PKU))
314 return; 321 return;
315 if (pku_disabled) 322 if (pku_disabled)
@@ -430,7 +437,7 @@ void load_percpu_segment(int cpu)
430#ifdef CONFIG_X86_32 437#ifdef CONFIG_X86_32
431 loadsegment(fs, __KERNEL_PERCPU); 438 loadsegment(fs, __KERNEL_PERCPU);
432#else 439#else
433 loadsegment(gs, 0); 440 __loadsegment_simple(gs, 0);
434 wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); 441 wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
435#endif 442#endif
436 load_stack_canary_segment(); 443 load_stack_canary_segment();
@@ -717,6 +724,13 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
717 } 724 }
718 } 725 }
719 726
727 if (c->extended_cpuid_level >= 0x80000007) {
728 cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
729
730 c->x86_capability[CPUID_8000_0007_EBX] = ebx;
731 c->x86_power = edx;
732 }
733
720 if (c->extended_cpuid_level >= 0x80000008) { 734 if (c->extended_cpuid_level >= 0x80000008) {
721 cpuid(0x80000008, &eax, &ebx, &ecx, &edx); 735 cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
722 736
@@ -729,9 +743,6 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
729 c->x86_phys_bits = 36; 743 c->x86_phys_bits = 36;
730#endif 744#endif
731 745
732 if (c->extended_cpuid_level >= 0x80000007)
733 c->x86_power = cpuid_edx(0x80000007);
734
735 if (c->extended_cpuid_level >= 0x8000000a) 746 if (c->extended_cpuid_level >= 0x8000000a)
736 c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a); 747 c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
737 748
@@ -862,30 +873,34 @@ static void detect_nopl(struct cpuinfo_x86 *c)
862#else 873#else
863 set_cpu_cap(c, X86_FEATURE_NOPL); 874 set_cpu_cap(c, X86_FEATURE_NOPL);
864#endif 875#endif
876}
865 877
878static void detect_null_seg_behavior(struct cpuinfo_x86 *c)
879{
880#ifdef CONFIG_X86_64
866 /* 881 /*
867 * ESPFIX is a strange bug. All real CPUs have it. Paravirt 882 * Empirically, writing zero to a segment selector on AMD does
868 * systems that run Linux at CPL > 0 may or may not have the 883 * not clear the base, whereas writing zero to a segment
869 * issue, but, even if they have the issue, there's absolutely 884 * selector on Intel does clear the base. Intel's behavior
870 * nothing we can do about it because we can't use the real IRET 885 * allows slightly faster context switches in the common case
871 * instruction. 886 * where GS is unused by the prev and next threads.
872 * 887 *
873 * NB: For the time being, only 32-bit kernels support 888 * Since neither vendor documents this anywhere that I can see,
874 * X86_BUG_ESPFIX as such. 64-bit kernels directly choose 889 * detect it directly instead of hardcoding the choice by
875 * whether to apply espfix using paravirt hooks. If any 890 * vendor.
876 * non-paravirt system ever shows up that does *not* have the 891 *
877 * ESPFIX issue, we can change this. 892 * I've designated AMD's behavior as the "bug" because it's
893 * counterintuitive and less friendly.
878 */ 894 */
879#ifdef CONFIG_X86_32 895
880#ifdef CONFIG_PARAVIRT 896 unsigned long old_base, tmp;
881 do { 897 rdmsrl(MSR_FS_BASE, old_base);
882 extern void native_iret(void); 898 wrmsrl(MSR_FS_BASE, 1);
883 if (pv_cpu_ops.iret == native_iret) 899 loadsegment(fs, 0);
884 set_cpu_bug(c, X86_BUG_ESPFIX); 900 rdmsrl(MSR_FS_BASE, tmp);
885 } while (0); 901 if (tmp != 0)
886#else 902 set_cpu_bug(c, X86_BUG_NULL_SEG);
887 set_cpu_bug(c, X86_BUG_ESPFIX); 903 wrmsrl(MSR_FS_BASE, old_base);
888#endif
889#endif 904#endif
890} 905}
891 906
@@ -921,6 +936,33 @@ static void generic_identify(struct cpuinfo_x86 *c)
921 get_model_name(c); /* Default name */ 936 get_model_name(c); /* Default name */
922 937
923 detect_nopl(c); 938 detect_nopl(c);
939
940 detect_null_seg_behavior(c);
941
942 /*
943 * ESPFIX is a strange bug. All real CPUs have it. Paravirt
944 * systems that run Linux at CPL > 0 may or may not have the
945 * issue, but, even if they have the issue, there's absolutely
946 * nothing we can do about it because we can't use the real IRET
947 * instruction.
948 *
949 * NB: For the time being, only 32-bit kernels support
950 * X86_BUG_ESPFIX as such. 64-bit kernels directly choose
951 * whether to apply espfix using paravirt hooks. If any
952 * non-paravirt system ever shows up that does *not* have the
953 * ESPFIX issue, we can change this.
954 */
955#ifdef CONFIG_X86_32
956# ifdef CONFIG_PARAVIRT
957 do {
958 extern void native_iret(void);
959 if (pv_cpu_ops.iret == native_iret)
960 set_cpu_bug(c, X86_BUG_ESPFIX);
961 } while (0);
962# else
963 set_cpu_bug(c, X86_BUG_ESPFIX);
964# endif
965#endif
924} 966}
925 967
926static void x86_init_cache_qos(struct cpuinfo_x86 *c) 968static void x86_init_cache_qos(struct cpuinfo_x86 *c)
@@ -1076,12 +1118,12 @@ void enable_sep_cpu(void)
1076 struct tss_struct *tss; 1118 struct tss_struct *tss;
1077 int cpu; 1119 int cpu;
1078 1120
1121 if (!boot_cpu_has(X86_FEATURE_SEP))
1122 return;
1123
1079 cpu = get_cpu(); 1124 cpu = get_cpu();
1080 tss = &per_cpu(cpu_tss, cpu); 1125 tss = &per_cpu(cpu_tss, cpu);
1081 1126
1082 if (!boot_cpu_has(X86_FEATURE_SEP))
1083 goto out;
1084
1085 /* 1127 /*
1086 * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field -- 1128 * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
1087 * see the big comment in struct x86_hw_tss's definition. 1129 * see the big comment in struct x86_hw_tss's definition.
@@ -1096,7 +1138,6 @@ void enable_sep_cpu(void)
1096 1138
1097 wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0); 1139 wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
1098 1140
1099out:
1100 put_cpu(); 1141 put_cpu();
1101} 1142}
1102#endif 1143#endif
@@ -1528,7 +1569,7 @@ void cpu_init(void)
1528 pr_info("Initializing CPU#%d\n", cpu); 1569 pr_info("Initializing CPU#%d\n", cpu);
1529 1570
1530 if (cpu_feature_enabled(X86_FEATURE_VME) || 1571 if (cpu_feature_enabled(X86_FEATURE_VME) ||
1531 cpu_has_tsc || 1572 boot_cpu_has(X86_FEATURE_TSC) ||
1532 boot_cpu_has(X86_FEATURE_DE)) 1573 boot_cpu_has(X86_FEATURE_DE))
1533 cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); 1574 cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
1534 1575
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 6adef9cac23e..bd9dcd6b712d 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -333,7 +333,7 @@ static void init_cyrix(struct cpuinfo_x86 *c)
333 switch (dir0_lsn) { 333 switch (dir0_lsn) {
334 case 0xd: /* either a 486SLC or DLC w/o DEVID */ 334 case 0xd: /* either a 486SLC or DLC w/o DEVID */
335 dir0_msn = 0; 335 dir0_msn = 0;
336 p = Cx486_name[(cpu_has_fpu ? 1 : 0)]; 336 p = Cx486_name[!!boot_cpu_has(X86_FEATURE_FPU)];
337 break; 337 break;
338 338
339 case 0xe: /* a 486S A step */ 339 case 0xe: /* a 486S A step */
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 5354080f76c3..c1a89bc026ac 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -25,6 +25,41 @@
25#include <asm/apic.h> 25#include <asm/apic.h>
26#endif 26#endif
27 27
28/*
29 * Just in case our CPU detection goes bad, or you have a weird system,
30 * allow a way to override the automatic disabling of MPX.
31 */
32static int forcempx;
33
34static int __init forcempx_setup(char *__unused)
35{
36 forcempx = 1;
37
38 return 1;
39}
40__setup("intel-skd-046-workaround=disable", forcempx_setup);
41
42void check_mpx_erratum(struct cpuinfo_x86 *c)
43{
44 if (forcempx)
45 return;
46 /*
47 * Turn off the MPX feature on CPUs where SMEP is not
48 * available or disabled.
49 *
50 * Works around Intel Erratum SKD046: "Branch Instructions
51 * May Initialize MPX Bound Registers Incorrectly".
52 *
53 * This might falsely disable MPX on systems without
54 * SMEP, like Atom processors without SMEP. But there
55 * is no such hardware known at the moment.
56 */
57 if (cpu_has(c, X86_FEATURE_MPX) && !cpu_has(c, X86_FEATURE_SMEP)) {
58 setup_clear_cpu_cap(X86_FEATURE_MPX);
59 pr_warn("x86/mpx: Disabling MPX since SMEP not present\n");
60 }
61}
62
28static void early_init_intel(struct cpuinfo_x86 *c) 63static void early_init_intel(struct cpuinfo_x86 *c)
29{ 64{
30 u64 misc_enable; 65 u64 misc_enable;
@@ -173,6 +208,8 @@ static void early_init_intel(struct cpuinfo_x86 *c)
173 if (edx & (1U << 28)) 208 if (edx & (1U << 28))
174 c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff); 209 c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff);
175 } 210 }
211
212 check_mpx_erratum(c);
176} 213}
177 214
178#ifdef CONFIG_X86_32 215#ifdef CONFIG_X86_32
@@ -233,7 +270,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
233 * The Quark is also family 5, but does not have the same bug. 270 * The Quark is also family 5, but does not have the same bug.
234 */ 271 */
235 clear_cpu_bug(c, X86_BUG_F00F); 272 clear_cpu_bug(c, X86_BUG_F00F);
236 if (!paravirt_enabled() && c->x86 == 5 && c->x86_model < 9) { 273 if (c->x86 == 5 && c->x86_model < 9) {
237 static int f00f_workaround_enabled; 274 static int f00f_workaround_enabled;
238 275
239 set_cpu_bug(c, X86_BUG_F00F); 276 set_cpu_bug(c, X86_BUG_F00F);
@@ -280,7 +317,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
280 * integrated APIC (see 11AP erratum in "Pentium Processor 317 * integrated APIC (see 11AP erratum in "Pentium Processor
281 * Specification Update"). 318 * Specification Update").
282 */ 319 */
283 if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 && 320 if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
284 (c->x86_mask < 0x6 || c->x86_mask == 0xb)) 321 (c->x86_mask < 0x6 || c->x86_mask == 0xb))
285 set_cpu_bug(c, X86_BUG_11AP); 322 set_cpu_bug(c, X86_BUG_11AP);
286 323
@@ -335,7 +372,7 @@ static int intel_num_cpu_cores(struct cpuinfo_x86 *c)
335{ 372{
336 unsigned int eax, ebx, ecx, edx; 373 unsigned int eax, ebx, ecx, edx;
337 374
338 if (c->cpuid_level < 4) 375 if (!IS_ENABLED(CONFIG_SMP) || c->cpuid_level < 4)
339 return 1; 376 return 1;
340 377
341 /* Intel has a non-standard dependency on %ecx for this CPUID level. */ 378 /* Intel has a non-standard dependency on %ecx for this CPUID level. */
diff --git a/arch/x86/kernel/cpu/mcheck/mce-genpool.c b/arch/x86/kernel/cpu/mcheck/mce-genpool.c
index 2658e2af74ec..93d824ec3120 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-genpool.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-genpool.c
@@ -26,6 +26,52 @@ static struct gen_pool *mce_evt_pool;
26static LLIST_HEAD(mce_event_llist); 26static LLIST_HEAD(mce_event_llist);
27static char gen_pool_buf[MCE_POOLSZ]; 27static char gen_pool_buf[MCE_POOLSZ];
28 28
29/*
30 * Compare the record "t" with each of the records on list "l" to see if
31 * an equivalent one is present in the list.
32 */
33static bool is_duplicate_mce_record(struct mce_evt_llist *t, struct mce_evt_llist *l)
34{
35 struct mce_evt_llist *node;
36 struct mce *m1, *m2;
37
38 m1 = &t->mce;
39
40 llist_for_each_entry(node, &l->llnode, llnode) {
41 m2 = &node->mce;
42
43 if (!mce_cmp(m1, m2))
44 return true;
45 }
46 return false;
47}
48
49/*
50 * The system has panicked - we'd like to peruse the list of MCE records
51 * that have been queued, but not seen by anyone yet. The list is in
52 * reverse time order, so we need to reverse it. While doing that we can
53 * also drop duplicate records (these were logged because some banks are
54 * shared between cores or by all threads on a socket).
55 */
56struct llist_node *mce_gen_pool_prepare_records(void)
57{
58 struct llist_node *head;
59 LLIST_HEAD(new_head);
60 struct mce_evt_llist *node, *t;
61
62 head = llist_del_all(&mce_event_llist);
63 if (!head)
64 return NULL;
65
66 /* squeeze out duplicates while reversing order */
67 llist_for_each_entry_safe(node, t, head, llnode) {
68 if (!is_duplicate_mce_record(node, t))
69 llist_add(&node->llnode, &new_head);
70 }
71
72 return new_head.first;
73}
74
29void mce_gen_pool_process(void) 75void mce_gen_pool_process(void)
30{ 76{
31 struct llist_node *head; 77 struct llist_node *head;
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 547720efd923..cd74a3f00aea 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -35,6 +35,7 @@ void mce_gen_pool_process(void);
35bool mce_gen_pool_empty(void); 35bool mce_gen_pool_empty(void);
36int mce_gen_pool_add(struct mce *mce); 36int mce_gen_pool_add(struct mce *mce);
37int mce_gen_pool_init(void); 37int mce_gen_pool_init(void);
38struct llist_node *mce_gen_pool_prepare_records(void);
38 39
39extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp); 40extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp);
40struct dentry *mce_get_debugfs_dir(void); 41struct dentry *mce_get_debugfs_dir(void);
@@ -81,3 +82,17 @@ static inline int apei_clear_mce(u64 record_id)
81#endif 82#endif
82 83
83void mce_inject_log(struct mce *m); 84void mce_inject_log(struct mce *m);
85
86/*
87 * We consider records to be equivalent if bank+status+addr+misc all match.
88 * This is only used when the system is going down because of a fatal error
89 * to avoid cluttering the console log with essentially repeated information.
90 * In normal processing all errors seen are logged.
91 */
92static inline bool mce_cmp(struct mce *m1, struct mce *m2)
93{
94 return m1->bank != m2->bank ||
95 m1->status != m2->status ||
96 m1->addr != m2->addr ||
97 m1->misc != m2->misc;
98}
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 5119766d9889..631356c8cca4 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -204,6 +204,33 @@ static int error_context(struct mce *m)
204 return IN_KERNEL; 204 return IN_KERNEL;
205} 205}
206 206
207static int mce_severity_amd_smca(struct mce *m, int err_ctx)
208{
209 u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
210 u32 low, high;
211
212 /*
213 * We need to look at the following bits:
214 * - "succor" bit (data poisoning support), and
215 * - TCC bit (Task Context Corrupt)
216 * in MCi_STATUS to determine error severity.
217 */
218 if (!mce_flags.succor)
219 return MCE_PANIC_SEVERITY;
220
221 if (rdmsr_safe(addr, &low, &high))
222 return MCE_PANIC_SEVERITY;
223
224 /* TCC (Task context corrupt). If set and if IN_KERNEL, panic. */
225 if ((low & MCI_CONFIG_MCAX) &&
226 (m->status & MCI_STATUS_TCC) &&
227 (err_ctx == IN_KERNEL))
228 return MCE_PANIC_SEVERITY;
229
230 /* ...otherwise invoke hwpoison handler. */
231 return MCE_AR_SEVERITY;
232}
233
207/* 234/*
208 * See AMD Error Scope Hierarchy table in a newer BKDG. For example 235 * See AMD Error Scope Hierarchy table in a newer BKDG. For example
209 * 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features" 236 * 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
@@ -225,6 +252,9 @@ static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_exc
225 * to at least kill process to prolong system operation. 252 * to at least kill process to prolong system operation.
226 */ 253 */
227 if (mce_flags.overflow_recov) { 254 if (mce_flags.overflow_recov) {
255 if (mce_flags.smca)
256 return mce_severity_amd_smca(m, ctx);
257
228 /* software can try to contain */ 258 /* software can try to contain */
229 if (!(m->mcgstatus & MCG_STATUS_RIPV) && (ctx == IN_KERNEL)) 259 if (!(m->mcgstatus & MCG_STATUS_RIPV) && (ctx == IN_KERNEL))
230 return MCE_PANIC_SEVERITY; 260 return MCE_PANIC_SEVERITY;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index f0c921b03e42..92e5e37d97bf 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -161,7 +161,6 @@ void mce_log(struct mce *mce)
161 if (!mce_gen_pool_add(mce)) 161 if (!mce_gen_pool_add(mce))
162 irq_work_queue(&mce_irq_work); 162 irq_work_queue(&mce_irq_work);
163 163
164 mce->finished = 0;
165 wmb(); 164 wmb();
166 for (;;) { 165 for (;;) {
167 entry = mce_log_get_idx_check(mcelog.next); 166 entry = mce_log_get_idx_check(mcelog.next);
@@ -194,7 +193,6 @@ void mce_log(struct mce *mce)
194 mcelog.entry[entry].finished = 1; 193 mcelog.entry[entry].finished = 1;
195 wmb(); 194 wmb();
196 195
197 mce->finished = 1;
198 set_bit(0, &mce_need_notify); 196 set_bit(0, &mce_need_notify);
199} 197}
200 198
@@ -224,6 +222,53 @@ void mce_unregister_decode_chain(struct notifier_block *nb)
224} 222}
225EXPORT_SYMBOL_GPL(mce_unregister_decode_chain); 223EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
226 224
225static inline u32 ctl_reg(int bank)
226{
227 return MSR_IA32_MCx_CTL(bank);
228}
229
230static inline u32 status_reg(int bank)
231{
232 return MSR_IA32_MCx_STATUS(bank);
233}
234
235static inline u32 addr_reg(int bank)
236{
237 return MSR_IA32_MCx_ADDR(bank);
238}
239
240static inline u32 misc_reg(int bank)
241{
242 return MSR_IA32_MCx_MISC(bank);
243}
244
245static inline u32 smca_ctl_reg(int bank)
246{
247 return MSR_AMD64_SMCA_MCx_CTL(bank);
248}
249
250static inline u32 smca_status_reg(int bank)
251{
252 return MSR_AMD64_SMCA_MCx_STATUS(bank);
253}
254
255static inline u32 smca_addr_reg(int bank)
256{
257 return MSR_AMD64_SMCA_MCx_ADDR(bank);
258}
259
260static inline u32 smca_misc_reg(int bank)
261{
262 return MSR_AMD64_SMCA_MCx_MISC(bank);
263}
264
265struct mca_msr_regs msr_ops = {
266 .ctl = ctl_reg,
267 .status = status_reg,
268 .addr = addr_reg,
269 .misc = misc_reg
270};
271
227static void print_mce(struct mce *m) 272static void print_mce(struct mce *m)
228{ 273{
229 int ret = 0; 274 int ret = 0;
@@ -290,7 +335,9 @@ static void wait_for_panic(void)
290 335
291static void mce_panic(const char *msg, struct mce *final, char *exp) 336static void mce_panic(const char *msg, struct mce *final, char *exp)
292{ 337{
293 int i, apei_err = 0; 338 int apei_err = 0;
339 struct llist_node *pending;
340 struct mce_evt_llist *l;
294 341
295 if (!fake_panic) { 342 if (!fake_panic) {
296 /* 343 /*
@@ -307,11 +354,10 @@ static void mce_panic(const char *msg, struct mce *final, char *exp)
307 if (atomic_inc_return(&mce_fake_panicked) > 1) 354 if (atomic_inc_return(&mce_fake_panicked) > 1)
308 return; 355 return;
309 } 356 }
357 pending = mce_gen_pool_prepare_records();
310 /* First print corrected ones that are still unlogged */ 358 /* First print corrected ones that are still unlogged */
311 for (i = 0; i < MCE_LOG_LEN; i++) { 359 llist_for_each_entry(l, pending, llnode) {
312 struct mce *m = &mcelog.entry[i]; 360 struct mce *m = &l->mce;
313 if (!(m->status & MCI_STATUS_VAL))
314 continue;
315 if (!(m->status & MCI_STATUS_UC)) { 361 if (!(m->status & MCI_STATUS_UC)) {
316 print_mce(m); 362 print_mce(m);
317 if (!apei_err) 363 if (!apei_err)
@@ -319,13 +365,11 @@ static void mce_panic(const char *msg, struct mce *final, char *exp)
319 } 365 }
320 } 366 }
321 /* Now print uncorrected but with the final one last */ 367 /* Now print uncorrected but with the final one last */
322 for (i = 0; i < MCE_LOG_LEN; i++) { 368 llist_for_each_entry(l, pending, llnode) {
323 struct mce *m = &mcelog.entry[i]; 369 struct mce *m = &l->mce;
324 if (!(m->status & MCI_STATUS_VAL))
325 continue;
326 if (!(m->status & MCI_STATUS_UC)) 370 if (!(m->status & MCI_STATUS_UC))
327 continue; 371 continue;
328 if (!final || memcmp(m, final, sizeof(struct mce))) { 372 if (!final || mce_cmp(m, final)) {
329 print_mce(m); 373 print_mce(m);
330 if (!apei_err) 374 if (!apei_err)
331 apei_err = apei_write_mce(m); 375 apei_err = apei_write_mce(m);
@@ -356,11 +400,11 @@ static int msr_to_offset(u32 msr)
356 400
357 if (msr == mca_cfg.rip_msr) 401 if (msr == mca_cfg.rip_msr)
358 return offsetof(struct mce, ip); 402 return offsetof(struct mce, ip);
359 if (msr == MSR_IA32_MCx_STATUS(bank)) 403 if (msr == msr_ops.status(bank))
360 return offsetof(struct mce, status); 404 return offsetof(struct mce, status);
361 if (msr == MSR_IA32_MCx_ADDR(bank)) 405 if (msr == msr_ops.addr(bank))
362 return offsetof(struct mce, addr); 406 return offsetof(struct mce, addr);
363 if (msr == MSR_IA32_MCx_MISC(bank)) 407 if (msr == msr_ops.misc(bank))
364 return offsetof(struct mce, misc); 408 return offsetof(struct mce, misc);
365 if (msr == MSR_IA32_MCG_STATUS) 409 if (msr == MSR_IA32_MCG_STATUS)
366 return offsetof(struct mce, mcgstatus); 410 return offsetof(struct mce, mcgstatus);
@@ -523,9 +567,9 @@ static struct notifier_block mce_srao_nb = {
523static void mce_read_aux(struct mce *m, int i) 567static void mce_read_aux(struct mce *m, int i)
524{ 568{
525 if (m->status & MCI_STATUS_MISCV) 569 if (m->status & MCI_STATUS_MISCV)
526 m->misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i)); 570 m->misc = mce_rdmsrl(msr_ops.misc(i));
527 if (m->status & MCI_STATUS_ADDRV) { 571 if (m->status & MCI_STATUS_ADDRV) {
528 m->addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i)); 572 m->addr = mce_rdmsrl(msr_ops.addr(i));
529 573
530 /* 574 /*
531 * Mask the reported address by the reported granularity. 575 * Mask the reported address by the reported granularity.
@@ -607,7 +651,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
607 m.tsc = 0; 651 m.tsc = 0;
608 652
609 barrier(); 653 barrier();
610 m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); 654 m.status = mce_rdmsrl(msr_ops.status(i));
611 if (!(m.status & MCI_STATUS_VAL)) 655 if (!(m.status & MCI_STATUS_VAL))
612 continue; 656 continue;
613 657
@@ -654,7 +698,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
654 /* 698 /*
655 * Clear state for this bank. 699 * Clear state for this bank.
656 */ 700 */
657 mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0); 701 mce_wrmsrl(msr_ops.status(i), 0);
658 } 702 }
659 703
660 /* 704 /*
@@ -679,7 +723,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
679 char *tmp; 723 char *tmp;
680 724
681 for (i = 0; i < mca_cfg.banks; i++) { 725 for (i = 0; i < mca_cfg.banks; i++) {
682 m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); 726 m->status = mce_rdmsrl(msr_ops.status(i));
683 if (m->status & MCI_STATUS_VAL) { 727 if (m->status & MCI_STATUS_VAL) {
684 __set_bit(i, validp); 728 __set_bit(i, validp);
685 if (quirk_no_way_out) 729 if (quirk_no_way_out)
@@ -830,9 +874,9 @@ static int mce_start(int *no_way_out)
830 874
831 atomic_add(*no_way_out, &global_nwo); 875 atomic_add(*no_way_out, &global_nwo);
832 /* 876 /*
833 * global_nwo should be updated before mce_callin 877 * Rely on the implied barrier below, such that global_nwo
878 * is updated before mce_callin.
834 */ 879 */
835 smp_wmb();
836 order = atomic_inc_return(&mce_callin); 880 order = atomic_inc_return(&mce_callin);
837 881
838 /* 882 /*
@@ -957,7 +1001,7 @@ static void mce_clear_state(unsigned long *toclear)
957 1001
958 for (i = 0; i < mca_cfg.banks; i++) { 1002 for (i = 0; i < mca_cfg.banks; i++) {
959 if (test_bit(i, toclear)) 1003 if (test_bit(i, toclear))
960 mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0); 1004 mce_wrmsrl(msr_ops.status(i), 0);
961 } 1005 }
962} 1006}
963 1007
@@ -994,11 +1038,12 @@ void do_machine_check(struct pt_regs *regs, long error_code)
994 int i; 1038 int i;
995 int worst = 0; 1039 int worst = 0;
996 int severity; 1040 int severity;
1041
997 /* 1042 /*
998 * Establish sequential order between the CPUs entering the machine 1043 * Establish sequential order between the CPUs entering the machine
999 * check handler. 1044 * check handler.
1000 */ 1045 */
1001 int order; 1046 int order = -1;
1002 /* 1047 /*
1003 * If no_way_out gets set, there is no safe way to recover from this 1048 * If no_way_out gets set, there is no safe way to recover from this
1004 * MCE. If mca_cfg.tolerant is cranked up, we'll try anyway. 1049 * MCE. If mca_cfg.tolerant is cranked up, we'll try anyway.
@@ -1012,7 +1057,12 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1012 DECLARE_BITMAP(toclear, MAX_NR_BANKS); 1057 DECLARE_BITMAP(toclear, MAX_NR_BANKS);
1013 DECLARE_BITMAP(valid_banks, MAX_NR_BANKS); 1058 DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
1014 char *msg = "Unknown"; 1059 char *msg = "Unknown";
1015 int lmce = 0; 1060
1061 /*
1062 * MCEs are always local on AMD. Same is determined by MCG_STATUS_LMCES
1063 * on Intel.
1064 */
1065 int lmce = 1;
1016 1066
1017 /* If this CPU is offline, just bail out. */ 1067 /* If this CPU is offline, just bail out. */
1018 if (cpu_is_offline(smp_processor_id())) { 1068 if (cpu_is_offline(smp_processor_id())) {
@@ -1051,19 +1101,20 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1051 kill_it = 1; 1101 kill_it = 1;
1052 1102
1053 /* 1103 /*
1054 * Check if this MCE is signaled to only this logical processor 1104 * Check if this MCE is signaled to only this logical processor,
1105 * on Intel only.
1055 */ 1106 */
1056 if (m.mcgstatus & MCG_STATUS_LMCES) 1107 if (m.cpuvendor == X86_VENDOR_INTEL)
1057 lmce = 1; 1108 lmce = m.mcgstatus & MCG_STATUS_LMCES;
1058 else { 1109
1059 /* 1110 /*
1060 * Go through all the banks in exclusion of the other CPUs. 1111 * Go through all banks in exclusion of the other CPUs. This way we
1061 * This way we don't report duplicated events on shared banks 1112 * don't report duplicated events on shared banks because the first one
1062 * because the first one to see it will clear it. 1113 * to see it will clear it. If this is a Local MCE, then no need to
1063 * If this is a Local MCE, then no need to perform rendezvous. 1114 * perform rendezvous.
1064 */ 1115 */
1116 if (!lmce)
1065 order = mce_start(&no_way_out); 1117 order = mce_start(&no_way_out);
1066 }
1067 1118
1068 for (i = 0; i < cfg->banks; i++) { 1119 for (i = 0; i < cfg->banks; i++) {
1069 __clear_bit(i, toclear); 1120 __clear_bit(i, toclear);
@@ -1076,7 +1127,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1076 m.addr = 0; 1127 m.addr = 0;
1077 m.bank = i; 1128 m.bank = i;
1078 1129
1079 m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); 1130 m.status = mce_rdmsrl(msr_ops.status(i));
1080 if ((m.status & MCI_STATUS_VAL) == 0) 1131 if ((m.status & MCI_STATUS_VAL) == 0)
1081 continue; 1132 continue;
1082 1133
@@ -1420,7 +1471,6 @@ static void __mcheck_cpu_init_generic(void)
1420 enum mcp_flags m_fl = 0; 1471 enum mcp_flags m_fl = 0;
1421 mce_banks_t all_banks; 1472 mce_banks_t all_banks;
1422 u64 cap; 1473 u64 cap;
1423 int i;
1424 1474
1425 if (!mca_cfg.bootlog) 1475 if (!mca_cfg.bootlog)
1426 m_fl = MCP_DONTLOG; 1476 m_fl = MCP_DONTLOG;
@@ -1436,14 +1486,19 @@ static void __mcheck_cpu_init_generic(void)
1436 rdmsrl(MSR_IA32_MCG_CAP, cap); 1486 rdmsrl(MSR_IA32_MCG_CAP, cap);
1437 if (cap & MCG_CTL_P) 1487 if (cap & MCG_CTL_P)
1438 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); 1488 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
1489}
1490
1491static void __mcheck_cpu_init_clear_banks(void)
1492{
1493 int i;
1439 1494
1440 for (i = 0; i < mca_cfg.banks; i++) { 1495 for (i = 0; i < mca_cfg.banks; i++) {
1441 struct mce_bank *b = &mce_banks[i]; 1496 struct mce_bank *b = &mce_banks[i];
1442 1497
1443 if (!b->init) 1498 if (!b->init)
1444 continue; 1499 continue;
1445 wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); 1500 wrmsrl(msr_ops.ctl(i), b->ctl);
1446 wrmsrl(MSR_IA32_MCx_STATUS(i), 0); 1501 wrmsrl(msr_ops.status(i), 0);
1447 } 1502 }
1448} 1503}
1449 1504
@@ -1495,7 +1550,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
1495 */ 1550 */
1496 clear_bit(10, (unsigned long *)&mce_banks[4].ctl); 1551 clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
1497 } 1552 }
1498 if (c->x86 <= 17 && cfg->bootlog < 0) { 1553 if (c->x86 < 17 && cfg->bootlog < 0) {
1499 /* 1554 /*
1500 * Lots of broken BIOS around that don't clear them 1555 * Lots of broken BIOS around that don't clear them
1501 * by default and leave crap in there. Don't log: 1556 * by default and leave crap in there. Don't log:
@@ -1628,11 +1683,19 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
1628 break; 1683 break;
1629 1684
1630 case X86_VENDOR_AMD: { 1685 case X86_VENDOR_AMD: {
1631 u32 ebx = cpuid_ebx(0x80000007); 1686 mce_flags.overflow_recov = !!cpu_has(c, X86_FEATURE_OVERFLOW_RECOV);
1687 mce_flags.succor = !!cpu_has(c, X86_FEATURE_SUCCOR);
1688 mce_flags.smca = !!cpu_has(c, X86_FEATURE_SMCA);
1632 1689
1633 mce_flags.overflow_recov = !!(ebx & BIT(0)); 1690 /*
1634 mce_flags.succor = !!(ebx & BIT(1)); 1691 * Install proper ops for Scalable MCA enabled processors
1635 mce_flags.smca = !!(ebx & BIT(3)); 1692 */
1693 if (mce_flags.smca) {
1694 msr_ops.ctl = smca_ctl_reg;
1695 msr_ops.status = smca_status_reg;
1696 msr_ops.addr = smca_addr_reg;
1697 msr_ops.misc = smca_misc_reg;
1698 }
1636 mce_amd_feature_init(c); 1699 mce_amd_feature_init(c);
1637 1700
1638 break; 1701 break;
@@ -1717,6 +1780,7 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
1717 1780
1718 __mcheck_cpu_init_generic(); 1781 __mcheck_cpu_init_generic();
1719 __mcheck_cpu_init_vendor(c); 1782 __mcheck_cpu_init_vendor(c);
1783 __mcheck_cpu_init_clear_banks();
1720 __mcheck_cpu_init_timer(); 1784 __mcheck_cpu_init_timer();
1721} 1785}
1722 1786
@@ -2082,7 +2146,7 @@ static void mce_disable_error_reporting(void)
2082 struct mce_bank *b = &mce_banks[i]; 2146 struct mce_bank *b = &mce_banks[i];
2083 2147
2084 if (b->init) 2148 if (b->init)
2085 wrmsrl(MSR_IA32_MCx_CTL(i), 0); 2149 wrmsrl(msr_ops.ctl(i), 0);
2086 } 2150 }
2087 return; 2151 return;
2088} 2152}
@@ -2121,6 +2185,7 @@ static void mce_syscore_resume(void)
2121{ 2185{
2122 __mcheck_cpu_init_generic(); 2186 __mcheck_cpu_init_generic();
2123 __mcheck_cpu_init_vendor(raw_cpu_ptr(&cpu_info)); 2187 __mcheck_cpu_init_vendor(raw_cpu_ptr(&cpu_info));
2188 __mcheck_cpu_init_clear_banks();
2124} 2189}
2125 2190
2126static struct syscore_ops mce_syscore_ops = { 2191static struct syscore_ops mce_syscore_ops = {
@@ -2138,6 +2203,7 @@ static void mce_cpu_restart(void *data)
2138 if (!mce_available(raw_cpu_ptr(&cpu_info))) 2203 if (!mce_available(raw_cpu_ptr(&cpu_info)))
2139 return; 2204 return;
2140 __mcheck_cpu_init_generic(); 2205 __mcheck_cpu_init_generic();
2206 __mcheck_cpu_init_clear_banks();
2141 __mcheck_cpu_init_timer(); 2207 __mcheck_cpu_init_timer();
2142} 2208}
2143 2209
@@ -2413,7 +2479,7 @@ static void mce_reenable_cpu(void *h)
2413 struct mce_bank *b = &mce_banks[i]; 2479 struct mce_bank *b = &mce_banks[i];
2414 2480
2415 if (b->init) 2481 if (b->init)
2416 wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); 2482 wrmsrl(msr_ops.ctl(i), b->ctl);
2417 } 2483 }
2418} 2484}
2419 2485
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 9d656fd436ef..10b0661651e0 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -54,14 +54,6 @@
54/* Threshold LVT offset is at MSR0xC0000410[15:12] */ 54/* Threshold LVT offset is at MSR0xC0000410[15:12] */
55#define SMCA_THR_LVT_OFF 0xF000 55#define SMCA_THR_LVT_OFF 0xF000
56 56
57/*
58 * OS is required to set the MCAX bit to acknowledge that it is now using the
59 * new MSR ranges and new registers under each bank. It also means that the OS
60 * will configure deferred errors in the new MCx_CONFIG register. If the bit is
61 * not set, uncorrectable errors will cause a system panic.
62 */
63#define SMCA_MCAX_EN_OFF 0x1
64
65static const char * const th_names[] = { 57static const char * const th_names[] = {
66 "load_store", 58 "load_store",
67 "insn_fetch", 59 "insn_fetch",
@@ -333,7 +325,7 @@ static u32 get_block_address(u32 current_addr, u32 low, u32 high,
333 /* Fall back to method we used for older processors: */ 325 /* Fall back to method we used for older processors: */
334 switch (block) { 326 switch (block) {
335 case 0: 327 case 0:
336 addr = MSR_IA32_MCx_MISC(bank); 328 addr = msr_ops.misc(bank);
337 break; 329 break;
338 case 1: 330 case 1:
339 offset = ((low & MASK_BLKPTR_LO) >> 21); 331 offset = ((low & MASK_BLKPTR_LO) >> 21);
@@ -351,6 +343,7 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
351 int offset, u32 misc_high) 343 int offset, u32 misc_high)
352{ 344{
353 unsigned int cpu = smp_processor_id(); 345 unsigned int cpu = smp_processor_id();
346 u32 smca_low, smca_high, smca_addr;
354 struct threshold_block b; 347 struct threshold_block b;
355 int new; 348 int new;
356 349
@@ -369,24 +362,49 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
369 362
370 b.interrupt_enable = 1; 363 b.interrupt_enable = 1;
371 364
372 if (mce_flags.smca) { 365 if (!mce_flags.smca) {
373 u32 smca_low, smca_high; 366 new = (misc_high & MASK_LVTOFF_HI) >> 20;
374 u32 smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank); 367 goto set_offset;
368 }
375 369
376 if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) { 370 smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank);
377 smca_high |= SMCA_MCAX_EN_OFF;
378 wrmsr(smca_addr, smca_low, smca_high);
379 }
380 371
381 /* Gather LVT offset for thresholding: */ 372 if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) {
382 if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high)) 373 /*
383 goto out; 374 * OS is required to set the MCAX bit to acknowledge that it is
375 * now using the new MSR ranges and new registers under each
376 * bank. It also means that the OS will configure deferred
377 * errors in the new MCx_CONFIG register. If the bit is not set,
378 * uncorrectable errors will cause a system panic.
379 *
380 * MCA_CONFIG[MCAX] is bit 32 (0 in the high portion of the MSR.)
381 */
382 smca_high |= BIT(0);
384 383
385 new = (smca_low & SMCA_THR_LVT_OFF) >> 12; 384 /*
386 } else { 385 * SMCA logs Deferred Error information in MCA_DE{STAT,ADDR}
387 new = (misc_high & MASK_LVTOFF_HI) >> 20; 386 * registers with the option of additionally logging to
387 * MCA_{STATUS,ADDR} if MCA_CONFIG[LogDeferredInMcaStat] is set.
388 *
389 * This bit is usually set by BIOS to retain the old behavior
390 * for OSes that don't use the new registers. Linux supports the
391 * new registers so let's disable that additional logging here.
392 *
393 * MCA_CONFIG[LogDeferredInMcaStat] is bit 34 (bit 2 in the high
394 * portion of the MSR).
395 */
396 smca_high &= ~BIT(2);
397
398 wrmsr(smca_addr, smca_low, smca_high);
388 } 399 }
389 400
401 /* Gather LVT offset for thresholding: */
402 if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high))
403 goto out;
404
405 new = (smca_low & SMCA_THR_LVT_OFF) >> 12;
406
407set_offset:
390 offset = setup_APIC_mce_threshold(offset, new); 408 offset = setup_APIC_mce_threshold(offset, new);
391 409
392 if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt)) 410 if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt))
@@ -430,12 +448,23 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
430 deferred_error_interrupt_enable(c); 448 deferred_error_interrupt_enable(c);
431} 449}
432 450
433static void __log_error(unsigned int bank, bool threshold_err, u64 misc) 451static void
452__log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc)
434{ 453{
454 u32 msr_status = msr_ops.status(bank);
455 u32 msr_addr = msr_ops.addr(bank);
435 struct mce m; 456 struct mce m;
436 u64 status; 457 u64 status;
437 458
438 rdmsrl(MSR_IA32_MCx_STATUS(bank), status); 459 WARN_ON_ONCE(deferred_err && threshold_err);
460
461 if (deferred_err && mce_flags.smca) {
462 msr_status = MSR_AMD64_SMCA_MCx_DESTAT(bank);
463 msr_addr = MSR_AMD64_SMCA_MCx_DEADDR(bank);
464 }
465
466 rdmsrl(msr_status, status);
467
439 if (!(status & MCI_STATUS_VAL)) 468 if (!(status & MCI_STATUS_VAL))
440 return; 469 return;
441 470
@@ -448,10 +477,11 @@ static void __log_error(unsigned int bank, bool threshold_err, u64 misc)
448 m.misc = misc; 477 m.misc = misc;
449 478
450 if (m.status & MCI_STATUS_ADDRV) 479 if (m.status & MCI_STATUS_ADDRV)
451 rdmsrl(MSR_IA32_MCx_ADDR(bank), m.addr); 480 rdmsrl(msr_addr, m.addr);
452 481
453 mce_log(&m); 482 mce_log(&m);
454 wrmsrl(MSR_IA32_MCx_STATUS(bank), 0); 483
484 wrmsrl(msr_status, 0);
455} 485}
456 486
457static inline void __smp_deferred_error_interrupt(void) 487static inline void __smp_deferred_error_interrupt(void)
@@ -479,17 +509,21 @@ asmlinkage __visible void smp_trace_deferred_error_interrupt(void)
479/* APIC interrupt handler for deferred errors */ 509/* APIC interrupt handler for deferred errors */
480static void amd_deferred_error_interrupt(void) 510static void amd_deferred_error_interrupt(void)
481{ 511{
482 u64 status;
483 unsigned int bank; 512 unsigned int bank;
513 u32 msr_status;
514 u64 status;
484 515
485 for (bank = 0; bank < mca_cfg.banks; ++bank) { 516 for (bank = 0; bank < mca_cfg.banks; ++bank) {
486 rdmsrl(MSR_IA32_MCx_STATUS(bank), status); 517 msr_status = (mce_flags.smca) ? MSR_AMD64_SMCA_MCx_DESTAT(bank)
518 : msr_ops.status(bank);
519
520 rdmsrl(msr_status, status);
487 521
488 if (!(status & MCI_STATUS_VAL) || 522 if (!(status & MCI_STATUS_VAL) ||
489 !(status & MCI_STATUS_DEFERRED)) 523 !(status & MCI_STATUS_DEFERRED))
490 continue; 524 continue;
491 525
492 __log_error(bank, false, 0); 526 __log_error(bank, true, false, 0);
493 break; 527 break;
494 } 528 }
495} 529}
@@ -544,7 +578,7 @@ static void amd_threshold_interrupt(void)
544 return; 578 return;
545 579
546log: 580log:
547 __log_error(bank, true, ((u64)high << 32) | low); 581 __log_error(bank, false, true, ((u64)high << 32) | low);
548} 582}
549 583
550/* 584/*
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 1e8bb6c94f14..1defb8ea882c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -84,7 +84,7 @@ static int cmci_supported(int *banks)
84 */ 84 */
85 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) 85 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
86 return 0; 86 return 0;
87 if (!cpu_has_apic || lapic_get_maxlvt() < 6) 87 if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6)
88 return 0; 88 return 0;
89 rdmsrl(MSR_IA32_MCG_CAP, cap); 89 rdmsrl(MSR_IA32_MCG_CAP, cap);
90 *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff); 90 *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index ac780cad3b86..6b9dc4d18ccc 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -450,7 +450,7 @@ asmlinkage __visible void smp_trace_thermal_interrupt(struct pt_regs *regs)
450/* Thermal monitoring depends on APIC, ACPI and clock modulation */ 450/* Thermal monitoring depends on APIC, ACPI and clock modulation */
451static int intel_thermal_supported(struct cpuinfo_x86 *c) 451static int intel_thermal_supported(struct cpuinfo_x86 *c)
452{ 452{
453 if (!cpu_has_apic) 453 if (!boot_cpu_has(X86_FEATURE_APIC))
454 return 0; 454 return 0;
455 if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) 455 if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
456 return 0; 456 return 0;
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index cbb3cf09b065..65cbbcd48fe4 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -422,7 +422,7 @@ static void show_saved_mc(void)
422 data_size = get_datasize(mc_saved_header); 422 data_size = get_datasize(mc_saved_header);
423 date = mc_saved_header->date; 423 date = mc_saved_header->date;
424 424
425 pr_debug("mc_saved[%d]: sig=0x%x, pf=0x%x, rev=0x%x, toal size=0x%x, date = %04x-%02x-%02x\n", 425 pr_debug("mc_saved[%d]: sig=0x%x, pf=0x%x, rev=0x%x, total size=0x%x, date = %04x-%02x-%02x\n",
426 i, sig, pf, rev, total_size, 426 i, sig, pf, rev, total_size,
427 date & 0xffff, 427 date & 0xffff,
428 date >> 24, 428 date >> 24,
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index f1bed301bdb2..16e37a2581ac 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -444,11 +444,24 @@ static void __init print_mtrr_state(void)
444 pr_debug("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20); 444 pr_debug("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20);
445} 445}
446 446
447/* PAT setup for BP. We need to go through sync steps here */
448void __init mtrr_bp_pat_init(void)
449{
450 unsigned long flags;
451
452 local_irq_save(flags);
453 prepare_set();
454
455 pat_init();
456
457 post_set();
458 local_irq_restore(flags);
459}
460
447/* Grab all of the MTRR state for this CPU into *state */ 461/* Grab all of the MTRR state for this CPU into *state */
448bool __init get_mtrr_state(void) 462bool __init get_mtrr_state(void)
449{ 463{
450 struct mtrr_var_range *vrs; 464 struct mtrr_var_range *vrs;
451 unsigned long flags;
452 unsigned lo, dummy; 465 unsigned lo, dummy;
453 unsigned int i; 466 unsigned int i;
454 467
@@ -481,15 +494,6 @@ bool __init get_mtrr_state(void)
481 494
482 mtrr_state_set = 1; 495 mtrr_state_set = 1;
483 496
484 /* PAT setup for BP. We need to go through sync steps here */
485 local_irq_save(flags);
486 prepare_set();
487
488 pat_init();
489
490 post_set();
491 local_irq_restore(flags);
492
493 return !!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED); 497 return !!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED);
494} 498}
495 499
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 10f8d4796240..7d393ecdeee6 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -752,6 +752,9 @@ void __init mtrr_bp_init(void)
752 /* BIOS may override */ 752 /* BIOS may override */
753 __mtrr_enabled = get_mtrr_state(); 753 __mtrr_enabled = get_mtrr_state();
754 754
755 if (mtrr_enabled())
756 mtrr_bp_pat_init();
757
755 if (mtrr_cleanup(phys_addr)) { 758 if (mtrr_cleanup(phys_addr)) {
756 changed_by_mtrr_cleanup = 1; 759 changed_by_mtrr_cleanup = 1;
757 mtrr_if->set_all(); 760 mtrr_if->set_all();
@@ -759,8 +762,16 @@ void __init mtrr_bp_init(void)
759 } 762 }
760 } 763 }
761 764
762 if (!mtrr_enabled()) 765 if (!mtrr_enabled()) {
763 pr_info("MTRR: Disabled\n"); 766 pr_info("MTRR: Disabled\n");
767
768 /*
769 * PAT initialization relies on MTRR's rendezvous handler.
770 * Skip PAT init until the handler can initialize both
771 * features independently.
772 */
773 pat_disable("MTRRs disabled, skipping PAT initialization too.");
774 }
764} 775}
765 776
766void mtrr_ap_init(void) 777void mtrr_ap_init(void)
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 951884dcc433..6c7ced07d16d 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -52,6 +52,7 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
52void fill_mtrr_var_range(unsigned int index, 52void fill_mtrr_var_range(unsigned int index,
53 u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); 53 u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
54bool get_mtrr_state(void); 54bool get_mtrr_state(void);
55void mtrr_bp_pat_init(void);
55 56
56extern void set_mtrr_ops(const struct mtrr_ops *ops); 57extern void set_mtrr_ops(const struct mtrr_ops *ops);
57 58
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 1f4acd68b98b..3fe45f84ced4 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -151,7 +151,7 @@ static void __init dtb_lapic_setup(void)
151 return; 151 return;
152 152
153 /* Did the boot loader setup the local APIC ? */ 153 /* Did the boot loader setup the local APIC ? */
154 if (!cpu_has_apic) { 154 if (!boot_cpu_has(X86_FEATURE_APIC)) {
155 if (apic_force_enable(r.start)) 155 if (apic_force_enable(r.start))
156 return; 156 return;
157 } 157 }
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 8efa57a5f29e..2bb25c3fe2e8 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -260,19 +260,12 @@ int __die(const char *str, struct pt_regs *regs, long err)
260 unsigned long sp; 260 unsigned long sp;
261#endif 261#endif
262 printk(KERN_DEFAULT 262 printk(KERN_DEFAULT
263 "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); 263 "%s: %04lx [#%d]%s%s%s%s\n", str, err & 0xffff, ++die_counter,
264#ifdef CONFIG_PREEMPT 264 IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
265 printk("PREEMPT "); 265 IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
266#endif 266 debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",
267#ifdef CONFIG_SMP 267 IS_ENABLED(CONFIG_KASAN) ? " KASAN" : "");
268 printk("SMP "); 268
269#endif
270 if (debug_pagealloc_enabled())
271 printk("DEBUG_PAGEALLOC ");
272#ifdef CONFIG_KASAN
273 printk("KASAN");
274#endif
275 printk("\n");
276 if (notify_die(DIE_OOPS, str, regs, err, 269 if (notify_die(DIE_OOPS, str, regs, err,
277 current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP) 270 current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)
278 return 1; 271 return 1;
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/ebda.c
index 992f442ca155..afe65dffee80 100644
--- a/arch/x86/kernel/head.c
+++ b/arch/x86/kernel/ebda.c
@@ -38,7 +38,7 @@ void __init reserve_ebda_region(void)
38 * that the paravirt case can handle memory setup 38 * that the paravirt case can handle memory setup
39 * correctly, without our help. 39 * correctly, without our help.
40 */ 40 */
41 if (paravirt_enabled()) 41 if (!x86_platform.legacy.ebda_search)
42 return; 42 return;
43 43
44 /* end of low (conventional) memory */ 44 /* end of low (conventional) memory */
diff --git a/arch/x86/kernel/fpu/bugs.c b/arch/x86/kernel/fpu/bugs.c
index dd9ca9b60ff3..aad34aafc0e0 100644
--- a/arch/x86/kernel/fpu/bugs.c
+++ b/arch/x86/kernel/fpu/bugs.c
@@ -21,11 +21,15 @@ static double __initdata y = 3145727.0;
21 * We should really only care about bugs here 21 * We should really only care about bugs here
22 * anyway. Not features. 22 * anyway. Not features.
23 */ 23 */
24static void __init check_fpu(void) 24void __init fpu__init_check_bugs(void)
25{ 25{
26 u32 cr0_saved; 26 u32 cr0_saved;
27 s32 fdiv_bug; 27 s32 fdiv_bug;
28 28
29 /* kernel_fpu_begin/end() relies on patched alternative instructions. */
30 if (!boot_cpu_has(X86_FEATURE_FPU))
31 return;
32
29 /* We might have CR0::TS set already, clear it: */ 33 /* We might have CR0::TS set already, clear it: */
30 cr0_saved = read_cr0(); 34 cr0_saved = read_cr0();
31 write_cr0(cr0_saved & ~X86_CR0_TS); 35 write_cr0(cr0_saved & ~X86_CR0_TS);
@@ -59,13 +63,3 @@ static void __init check_fpu(void)
59 pr_warn("Hmm, FPU with FDIV bug\n"); 63 pr_warn("Hmm, FPU with FDIV bug\n");
60 } 64 }
61} 65}
62
63void __init fpu__init_check_bugs(void)
64{
65 /*
66 * kernel_fpu_begin/end() in check_fpu() relies on the patched
67 * alternative instructions.
68 */
69 if (cpu_has_fpu)
70 check_fpu();
71}
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 8e37cc8a539a..97027545a72d 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -217,14 +217,14 @@ static inline void fpstate_init_fstate(struct fregs_state *fp)
217 217
218void fpstate_init(union fpregs_state *state) 218void fpstate_init(union fpregs_state *state)
219{ 219{
220 if (!cpu_has_fpu) { 220 if (!static_cpu_has(X86_FEATURE_FPU)) {
221 fpstate_init_soft(&state->soft); 221 fpstate_init_soft(&state->soft);
222 return; 222 return;
223 } 223 }
224 224
225 memset(state, 0, xstate_size); 225 memset(state, 0, xstate_size);
226 226
227 if (cpu_has_fxsr) 227 if (static_cpu_has(X86_FEATURE_FXSR))
228 fpstate_init_fxstate(&state->fxsave); 228 fpstate_init_fxstate(&state->fxsave);
229 else 229 else
230 fpstate_init_fstate(&state->fsave); 230 fpstate_init_fstate(&state->fsave);
@@ -237,7 +237,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
237 dst_fpu->fpregs_active = 0; 237 dst_fpu->fpregs_active = 0;
238 dst_fpu->last_cpu = -1; 238 dst_fpu->last_cpu = -1;
239 239
240 if (!src_fpu->fpstate_active || !cpu_has_fpu) 240 if (!src_fpu->fpstate_active || !static_cpu_has(X86_FEATURE_FPU))
241 return 0; 241 return 0;
242 242
243 WARN_ON_FPU(src_fpu != &current->thread.fpu); 243 WARN_ON_FPU(src_fpu != &current->thread.fpu);
@@ -506,33 +506,6 @@ void fpu__clear(struct fpu *fpu)
506 * x87 math exception handling: 506 * x87 math exception handling:
507 */ 507 */
508 508
509static inline unsigned short get_fpu_cwd(struct fpu *fpu)
510{
511 if (cpu_has_fxsr) {
512 return fpu->state.fxsave.cwd;
513 } else {
514 return (unsigned short)fpu->state.fsave.cwd;
515 }
516}
517
518static inline unsigned short get_fpu_swd(struct fpu *fpu)
519{
520 if (cpu_has_fxsr) {
521 return fpu->state.fxsave.swd;
522 } else {
523 return (unsigned short)fpu->state.fsave.swd;
524 }
525}
526
527static inline unsigned short get_fpu_mxcsr(struct fpu *fpu)
528{
529 if (cpu_has_xmm) {
530 return fpu->state.fxsave.mxcsr;
531 } else {
532 return MXCSR_DEFAULT;
533 }
534}
535
536int fpu__exception_code(struct fpu *fpu, int trap_nr) 509int fpu__exception_code(struct fpu *fpu, int trap_nr)
537{ 510{
538 int err; 511 int err;
@@ -547,10 +520,15 @@ int fpu__exception_code(struct fpu *fpu, int trap_nr)
547 * so if this combination doesn't produce any single exception, 520 * so if this combination doesn't produce any single exception,
548 * then we have a bad program that isn't synchronizing its FPU usage 521 * then we have a bad program that isn't synchronizing its FPU usage
549 * and it will suffer the consequences since we won't be able to 522 * and it will suffer the consequences since we won't be able to
550 * fully reproduce the context of the exception 523 * fully reproduce the context of the exception.
551 */ 524 */
552 cwd = get_fpu_cwd(fpu); 525 if (boot_cpu_has(X86_FEATURE_FXSR)) {
553 swd = get_fpu_swd(fpu); 526 cwd = fpu->state.fxsave.cwd;
527 swd = fpu->state.fxsave.swd;
528 } else {
529 cwd = (unsigned short)fpu->state.fsave.cwd;
530 swd = (unsigned short)fpu->state.fsave.swd;
531 }
554 532
555 err = swd & ~cwd; 533 err = swd & ~cwd;
556 } else { 534 } else {
@@ -560,7 +538,11 @@ int fpu__exception_code(struct fpu *fpu, int trap_nr)
560 * unmasked exception was caught we must mask the exception mask bits 538 * unmasked exception was caught we must mask the exception mask bits
561 * at 0x1f80, and then use these to mask the exception bits at 0x3f. 539 * at 0x1f80, and then use these to mask the exception bits at 0x3f.
562 */ 540 */
563 unsigned short mxcsr = get_fpu_mxcsr(fpu); 541 unsigned short mxcsr = MXCSR_DEFAULT;
542
543 if (boot_cpu_has(X86_FEATURE_XMM))
544 mxcsr = fpu->state.fxsave.mxcsr;
545
564 err = ~(mxcsr >> 7) & mxcsr; 546 err = ~(mxcsr >> 7) & mxcsr;
565 } 547 }
566 548
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 54c86fffbf9f..aacfd7a82cec 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -29,22 +29,22 @@ static void fpu__init_cpu_generic(void)
29 unsigned long cr0; 29 unsigned long cr0;
30 unsigned long cr4_mask = 0; 30 unsigned long cr4_mask = 0;
31 31
32 if (cpu_has_fxsr) 32 if (boot_cpu_has(X86_FEATURE_FXSR))
33 cr4_mask |= X86_CR4_OSFXSR; 33 cr4_mask |= X86_CR4_OSFXSR;
34 if (cpu_has_xmm) 34 if (boot_cpu_has(X86_FEATURE_XMM))
35 cr4_mask |= X86_CR4_OSXMMEXCPT; 35 cr4_mask |= X86_CR4_OSXMMEXCPT;
36 if (cr4_mask) 36 if (cr4_mask)
37 cr4_set_bits(cr4_mask); 37 cr4_set_bits(cr4_mask);
38 38
39 cr0 = read_cr0(); 39 cr0 = read_cr0();
40 cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */ 40 cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */
41 if (!cpu_has_fpu) 41 if (!boot_cpu_has(X86_FEATURE_FPU))
42 cr0 |= X86_CR0_EM; 42 cr0 |= X86_CR0_EM;
43 write_cr0(cr0); 43 write_cr0(cr0);
44 44
45 /* Flush out any pending x87 state: */ 45 /* Flush out any pending x87 state: */
46#ifdef CONFIG_MATH_EMULATION 46#ifdef CONFIG_MATH_EMULATION
47 if (!cpu_has_fpu) 47 if (!boot_cpu_has(X86_FEATURE_FPU))
48 fpstate_init_soft(&current->thread.fpu.state.soft); 48 fpstate_init_soft(&current->thread.fpu.state.soft);
49 else 49 else
50#endif 50#endif
@@ -89,7 +89,7 @@ static void fpu__init_system_early_generic(struct cpuinfo_x86 *c)
89 } 89 }
90 90
91#ifndef CONFIG_MATH_EMULATION 91#ifndef CONFIG_MATH_EMULATION
92 if (!cpu_has_fpu) { 92 if (!boot_cpu_has(X86_FEATURE_FPU)) {
93 pr_emerg("x86/fpu: Giving up, no FPU found and no math emulation present\n"); 93 pr_emerg("x86/fpu: Giving up, no FPU found and no math emulation present\n");
94 for (;;) 94 for (;;)
95 asm volatile("hlt"); 95 asm volatile("hlt");
@@ -106,7 +106,7 @@ static void __init fpu__init_system_mxcsr(void)
106{ 106{
107 unsigned int mask = 0; 107 unsigned int mask = 0;
108 108
109 if (cpu_has_fxsr) { 109 if (boot_cpu_has(X86_FEATURE_FXSR)) {
110 /* Static because GCC does not get 16-byte stack alignment right: */ 110 /* Static because GCC does not get 16-byte stack alignment right: */
111 static struct fxregs_state fxregs __initdata; 111 static struct fxregs_state fxregs __initdata;
112 112
@@ -212,7 +212,7 @@ static void __init fpu__init_system_xstate_size_legacy(void)
212 * fpu__init_system_xstate(). 212 * fpu__init_system_xstate().
213 */ 213 */
214 214
215 if (!cpu_has_fpu) { 215 if (!boot_cpu_has(X86_FEATURE_FPU)) {
216 /* 216 /*
217 * Disable xsave as we do not support it if i387 217 * Disable xsave as we do not support it if i387
218 * emulation is enabled. 218 * emulation is enabled.
@@ -221,7 +221,7 @@ static void __init fpu__init_system_xstate_size_legacy(void)
221 setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); 221 setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
222 xstate_size = sizeof(struct swregs_state); 222 xstate_size = sizeof(struct swregs_state);
223 } else { 223 } else {
224 if (cpu_has_fxsr) 224 if (boot_cpu_has(X86_FEATURE_FXSR))
225 xstate_size = sizeof(struct fxregs_state); 225 xstate_size = sizeof(struct fxregs_state);
226 else 226 else
227 xstate_size = sizeof(struct fregs_state); 227 xstate_size = sizeof(struct fregs_state);
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 8bd1c003942a..81422dfb152b 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -21,7 +21,10 @@ int regset_xregset_fpregs_active(struct task_struct *target, const struct user_r
21{ 21{
22 struct fpu *target_fpu = &target->thread.fpu; 22 struct fpu *target_fpu = &target->thread.fpu;
23 23
24 return (cpu_has_fxsr && target_fpu->fpstate_active) ? regset->n : 0; 24 if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->fpstate_active)
25 return regset->n;
26 else
27 return 0;
25} 28}
26 29
27int xfpregs_get(struct task_struct *target, const struct user_regset *regset, 30int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
@@ -30,7 +33,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
30{ 33{
31 struct fpu *fpu = &target->thread.fpu; 34 struct fpu *fpu = &target->thread.fpu;
32 35
33 if (!cpu_has_fxsr) 36 if (!boot_cpu_has(X86_FEATURE_FXSR))
34 return -ENODEV; 37 return -ENODEV;
35 38
36 fpu__activate_fpstate_read(fpu); 39 fpu__activate_fpstate_read(fpu);
@@ -47,7 +50,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
47 struct fpu *fpu = &target->thread.fpu; 50 struct fpu *fpu = &target->thread.fpu;
48 int ret; 51 int ret;
49 52
50 if (!cpu_has_fxsr) 53 if (!boot_cpu_has(X86_FEATURE_FXSR))
51 return -ENODEV; 54 return -ENODEV;
52 55
53 fpu__activate_fpstate_write(fpu); 56 fpu__activate_fpstate_write(fpu);
@@ -65,7 +68,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
65 * update the header bits in the xsave header, indicating the 68 * update the header bits in the xsave header, indicating the
66 * presence of FP and SSE state. 69 * presence of FP and SSE state.
67 */ 70 */
68 if (cpu_has_xsave) 71 if (boot_cpu_has(X86_FEATURE_XSAVE))
69 fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FPSSE; 72 fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FPSSE;
70 73
71 return ret; 74 return ret;
@@ -79,7 +82,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
79 struct xregs_state *xsave; 82 struct xregs_state *xsave;
80 int ret; 83 int ret;
81 84
82 if (!cpu_has_xsave) 85 if (!boot_cpu_has(X86_FEATURE_XSAVE))
83 return -ENODEV; 86 return -ENODEV;
84 87
85 fpu__activate_fpstate_read(fpu); 88 fpu__activate_fpstate_read(fpu);
@@ -108,7 +111,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
108 struct xregs_state *xsave; 111 struct xregs_state *xsave;
109 int ret; 112 int ret;
110 113
111 if (!cpu_has_xsave) 114 if (!boot_cpu_has(X86_FEATURE_XSAVE))
112 return -ENODEV; 115 return -ENODEV;
113 116
114 fpu__activate_fpstate_write(fpu); 117 fpu__activate_fpstate_write(fpu);
@@ -275,10 +278,10 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
275 278
276 fpu__activate_fpstate_read(fpu); 279 fpu__activate_fpstate_read(fpu);
277 280
278 if (!static_cpu_has(X86_FEATURE_FPU)) 281 if (!boot_cpu_has(X86_FEATURE_FPU))
279 return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); 282 return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
280 283
281 if (!cpu_has_fxsr) 284 if (!boot_cpu_has(X86_FEATURE_FXSR))
282 return user_regset_copyout(&pos, &count, &kbuf, &ubuf, 285 return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
283 &fpu->state.fsave, 0, 286 &fpu->state.fsave, 0,
284 -1); 287 -1);
@@ -306,10 +309,10 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
306 fpu__activate_fpstate_write(fpu); 309 fpu__activate_fpstate_write(fpu);
307 fpstate_sanitize_xstate(fpu); 310 fpstate_sanitize_xstate(fpu);
308 311
309 if (!static_cpu_has(X86_FEATURE_FPU)) 312 if (!boot_cpu_has(X86_FEATURE_FPU))
310 return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); 313 return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
311 314
312 if (!cpu_has_fxsr) 315 if (!boot_cpu_has(X86_FEATURE_FXSR))
313 return user_regset_copyin(&pos, &count, &kbuf, &ubuf, 316 return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
314 &fpu->state.fsave, 0, 317 &fpu->state.fsave, 0,
315 -1); 318 -1);
@@ -325,7 +328,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
325 * update the header bit in the xsave header, indicating the 328 * update the header bit in the xsave header, indicating the
326 * presence of FP. 329 * presence of FP.
327 */ 330 */
328 if (cpu_has_xsave) 331 if (boot_cpu_has(X86_FEATURE_XSAVE))
329 fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FP; 332 fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FP;
330 return ret; 333 return ret;
331} 334}
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index b48ef35b28d4..4ea2a59483c7 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -190,7 +190,7 @@ void fpstate_sanitize_xstate(struct fpu *fpu)
190 */ 190 */
191void fpu__init_cpu_xstate(void) 191void fpu__init_cpu_xstate(void)
192{ 192{
193 if (!cpu_has_xsave || !xfeatures_mask) 193 if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask)
194 return; 194 return;
195 195
196 cr4_set_bits(X86_CR4_OSXSAVE); 196 cr4_set_bits(X86_CR4_OSXSAVE);
@@ -280,7 +280,7 @@ static void __init setup_xstate_comp(void)
280 xstate_comp_offsets[0] = 0; 280 xstate_comp_offsets[0] = 0;
281 xstate_comp_offsets[1] = offsetof(struct fxregs_state, xmm_space); 281 xstate_comp_offsets[1] = offsetof(struct fxregs_state, xmm_space);
282 282
283 if (!cpu_has_xsaves) { 283 if (!boot_cpu_has(X86_FEATURE_XSAVES)) {
284 for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { 284 for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
285 if (xfeature_enabled(i)) { 285 if (xfeature_enabled(i)) {
286 xstate_comp_offsets[i] = xstate_offsets[i]; 286 xstate_comp_offsets[i] = xstate_offsets[i];
@@ -316,13 +316,13 @@ static void __init setup_init_fpu_buf(void)
316 WARN_ON_FPU(!on_boot_cpu); 316 WARN_ON_FPU(!on_boot_cpu);
317 on_boot_cpu = 0; 317 on_boot_cpu = 0;
318 318
319 if (!cpu_has_xsave) 319 if (!boot_cpu_has(X86_FEATURE_XSAVE))
320 return; 320 return;
321 321
322 setup_xstate_features(); 322 setup_xstate_features();
323 print_xstate_features(); 323 print_xstate_features();
324 324
325 if (cpu_has_xsaves) { 325 if (boot_cpu_has(X86_FEATURE_XSAVES)) {
326 init_fpstate.xsave.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask; 326 init_fpstate.xsave.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask;
327 init_fpstate.xsave.header.xfeatures = xfeatures_mask; 327 init_fpstate.xsave.header.xfeatures = xfeatures_mask;
328 } 328 }
@@ -417,7 +417,7 @@ static int xfeature_size(int xfeature_nr)
417 */ 417 */
418static int using_compacted_format(void) 418static int using_compacted_format(void)
419{ 419{
420 return cpu_has_xsaves; 420 return boot_cpu_has(X86_FEATURE_XSAVES);
421} 421}
422 422
423static void __xstate_dump_leaves(void) 423static void __xstate_dump_leaves(void)
@@ -549,7 +549,7 @@ static unsigned int __init calculate_xstate_size(void)
549 unsigned int eax, ebx, ecx, edx; 549 unsigned int eax, ebx, ecx, edx;
550 unsigned int calculated_xstate_size; 550 unsigned int calculated_xstate_size;
551 551
552 if (!cpu_has_xsaves) { 552 if (!boot_cpu_has(X86_FEATURE_XSAVES)) {
553 /* 553 /*
554 * - CPUID function 0DH, sub-function 0: 554 * - CPUID function 0DH, sub-function 0:
555 * EBX enumerates the size (in bytes) required by 555 * EBX enumerates the size (in bytes) required by
@@ -630,7 +630,7 @@ void __init fpu__init_system_xstate(void)
630 WARN_ON_FPU(!on_boot_cpu); 630 WARN_ON_FPU(!on_boot_cpu);
631 on_boot_cpu = 0; 631 on_boot_cpu = 0;
632 632
633 if (!cpu_has_xsave) { 633 if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
634 pr_info("x86/fpu: Legacy x87 FPU detected.\n"); 634 pr_info("x86/fpu: Legacy x87 FPU detected.\n");
635 return; 635 return;
636 } 636 }
@@ -667,7 +667,7 @@ void __init fpu__init_system_xstate(void)
667 pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n", 667 pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
668 xfeatures_mask, 668 xfeatures_mask,
669 xstate_size, 669 xstate_size,
670 cpu_has_xsaves ? "compacted" : "standard"); 670 boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard");
671} 671}
672 672
673/* 673/*
@@ -678,7 +678,7 @@ void fpu__resume_cpu(void)
678 /* 678 /*
679 * Restore XCR0 on xsave capable CPUs: 679 * Restore XCR0 on xsave capable CPUs:
680 */ 680 */
681 if (cpu_has_xsave) 681 if (boot_cpu_has(X86_FEATURE_XSAVE))
682 xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask); 682 xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask);
683} 683}
684 684
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 2911ef3a9f1c..d784bb547a9d 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -34,6 +34,8 @@ asmlinkage __visible void __init i386_start_kernel(void)
34 cr4_init_shadow(); 34 cr4_init_shadow();
35 sanitize_boot_params(&boot_params); 35 sanitize_boot_params(&boot_params);
36 36
37 x86_early_init_platform_quirks();
38
37 /* Call the subarch specific early setup function */ 39 /* Call the subarch specific early setup function */
38 switch (boot_params.hdr.hardware_subarch) { 40 switch (boot_params.hdr.hardware_subarch) {
39 case X86_SUBARCH_INTEL_MID: 41 case X86_SUBARCH_INTEL_MID:
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 1f4422d5c8d0..b72fb0b71dd1 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -182,6 +182,7 @@ void __init x86_64_start_reservations(char *real_mode_data)
182 if (!boot_params.hdr.version) 182 if (!boot_params.hdr.version)
183 copy_bootdata(__va(real_mode_data)); 183 copy_bootdata(__va(real_mode_data));
184 184
185 x86_early_init_platform_quirks();
185 reserve_ebda_region(); 186 reserve_ebda_region();
186 187
187 switch (boot_params.hdr.hardware_subarch) { 188 switch (boot_params.hdr.hardware_subarch) {
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index af1112980dd4..6f8902b0d151 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -555,62 +555,53 @@ early_idt_handler_common:
555 */ 555 */
556 cld 556 cld
557 557
558 cmpl $2,(%esp) # X86_TRAP_NMI
559 je .Lis_nmi # Ignore NMI
560
561 cmpl $2,%ss:early_recursion_flag
562 je hlt_loop
563 incl %ss:early_recursion_flag 558 incl %ss:early_recursion_flag
564 559
565 push %eax # 16(%esp) 560 /* The vector number is in pt_regs->gs */
566 push %ecx # 12(%esp)
567 push %edx # 8(%esp)
568 push %ds # 4(%esp)
569 push %es # 0(%esp)
570 movl $(__KERNEL_DS),%eax
571 movl %eax,%ds
572 movl %eax,%es
573
574 cmpl $(__KERNEL_CS),32(%esp)
575 jne 10f
576 561
577 leal 28(%esp),%eax # Pointer to %eip 562 cld
578 call early_fixup_exception 563 pushl %fs /* pt_regs->fs */
579 andl %eax,%eax 564 movw $0, 2(%esp) /* clear high bits (some CPUs leave garbage) */
580 jnz ex_entry /* found an exception entry */ 565 pushl %es /* pt_regs->es */
581 566 movw $0, 2(%esp) /* clear high bits (some CPUs leave garbage) */
58210: 567 pushl %ds /* pt_regs->ds */
583#ifdef CONFIG_PRINTK 568 movw $0, 2(%esp) /* clear high bits (some CPUs leave garbage) */
584 xorl %eax,%eax 569 pushl %eax /* pt_regs->ax */
585 movw %ax,2(%esp) /* clean up the segment values on some cpus */ 570 pushl %ebp /* pt_regs->bp */
586 movw %ax,6(%esp) 571 pushl %edi /* pt_regs->di */
587 movw %ax,34(%esp) 572 pushl %esi /* pt_regs->si */
588 leal 40(%esp),%eax 573 pushl %edx /* pt_regs->dx */
589 pushl %eax /* %esp before the exception */ 574 pushl %ecx /* pt_regs->cx */
590 pushl %ebx 575 pushl %ebx /* pt_regs->bx */
591 pushl %ebp 576
592 pushl %esi 577 /* Fix up DS and ES */
593 pushl %edi 578 movl $(__KERNEL_DS), %ecx
594 movl %cr2,%eax 579 movl %ecx, %ds
595 pushl %eax 580 movl %ecx, %es
596 pushl (20+6*4)(%esp) /* trapno */ 581
597 pushl $fault_msg 582 /* Load the vector number into EDX */
598 call printk 583 movl PT_GS(%esp), %edx
599#endif 584
600 call dump_stack 585 /* Load GS into pt_regs->gs and clear high bits */
601hlt_loop: 586 movw %gs, PT_GS(%esp)
602 hlt 587 movw $0, PT_GS+2(%esp)
603 jmp hlt_loop 588
604 589 movl %esp, %eax /* args are pt_regs (EAX), trapnr (EDX) */
605ex_entry: 590 call early_fixup_exception
606 pop %es 591
607 pop %ds 592 popl %ebx /* pt_regs->bx */
608 pop %edx 593 popl %ecx /* pt_regs->cx */
609 pop %ecx 594 popl %edx /* pt_regs->dx */
610 pop %eax 595 popl %esi /* pt_regs->si */
611 decl %ss:early_recursion_flag 596 popl %edi /* pt_regs->di */
612.Lis_nmi: 597 popl %ebp /* pt_regs->bp */
613 addl $8,%esp /* drop vector number and error code */ 598 popl %eax /* pt_regs->ax */
599 popl %ds /* pt_regs->ds */
600 popl %es /* pt_regs->es */
601 popl %fs /* pt_regs->fs */
602 popl %gs /* pt_regs->gs */
603 decl %ss:early_recursion_flag
604 addl $4, %esp /* pop pt_regs->orig_ax */
614 iret 605 iret
615ENDPROC(early_idt_handler_common) 606ENDPROC(early_idt_handler_common)
616 607
@@ -647,10 +638,14 @@ ignore_int:
647 popl %eax 638 popl %eax
648#endif 639#endif
649 iret 640 iret
641
642hlt_loop:
643 hlt
644 jmp hlt_loop
650ENDPROC(ignore_int) 645ENDPROC(ignore_int)
651__INITDATA 646__INITDATA
652 .align 4 647 .align 4
653early_recursion_flag: 648GLOBAL(early_recursion_flag)
654 .long 0 649 .long 0
655 650
656__REFDATA 651__REFDATA
@@ -715,19 +710,6 @@ __INITRODATA
715int_msg: 710int_msg:
716 .asciz "Unknown interrupt or fault at: %p %p %p\n" 711 .asciz "Unknown interrupt or fault at: %p %p %p\n"
717 712
718fault_msg:
719/* fault info: */
720 .ascii "BUG: Int %d: CR2 %p\n"
721/* regs pushed in early_idt_handler: */
722 .ascii " EDI %p ESI %p EBP %p EBX %p\n"
723 .ascii " ESP %p ES %p DS %p\n"
724 .ascii " EDX %p ECX %p EAX %p\n"
725/* fault frame: */
726 .ascii " vec %p err %p EIP %p CS %p flg %p\n"
727 .ascii "Stack: %p %p %p %p %p %p %p %p\n"
728 .ascii " %p %p %p %p %p %p %p %p\n"
729 .asciz " %p %p %p %p %p %p %p %p\n"
730
731#include "../../x86/xen/xen-head.S" 713#include "../../x86/xen/xen-head.S"
732 714
733/* 715/*
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 22fbf9df61bb..5df831ef1442 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -20,6 +20,7 @@
20#include <asm/processor-flags.h> 20#include <asm/processor-flags.h>
21#include <asm/percpu.h> 21#include <asm/percpu.h>
22#include <asm/nops.h> 22#include <asm/nops.h>
23#include "../entry/calling.h"
23 24
24#ifdef CONFIG_PARAVIRT 25#ifdef CONFIG_PARAVIRT
25#include <asm/asm-offsets.h> 26#include <asm/asm-offsets.h>
@@ -64,6 +65,14 @@ startup_64:
64 * tables and then reload them. 65 * tables and then reload them.
65 */ 66 */
66 67
68 /*
69 * Setup stack for verify_cpu(). "-8" because stack_start is defined
70 * this way, see below. Our best guess is a NULL ptr for stack
71 * termination heuristics and we don't want to break anything which
72 * might depend on it (kgdb, ...).
73 */
74 leaq (__end_init_task - 8)(%rip), %rsp
75
67 /* Sanitize CPU configuration */ 76 /* Sanitize CPU configuration */
68 call verify_cpu 77 call verify_cpu
69 78
@@ -350,90 +359,48 @@ early_idt_handler_common:
350 */ 359 */
351 cld 360 cld
352 361
353 cmpl $2,(%rsp) # X86_TRAP_NMI
354 je .Lis_nmi # Ignore NMI
355
356 cmpl $2,early_recursion_flag(%rip)
357 jz 1f
358 incl early_recursion_flag(%rip) 362 incl early_recursion_flag(%rip)
359 363
360 pushq %rax # 64(%rsp) 364 /* The vector number is currently in the pt_regs->di slot. */
361 pushq %rcx # 56(%rsp) 365 pushq %rsi /* pt_regs->si */
362 pushq %rdx # 48(%rsp) 366 movq 8(%rsp), %rsi /* RSI = vector number */
363 pushq %rsi # 40(%rsp) 367 movq %rdi, 8(%rsp) /* pt_regs->di = RDI */
364 pushq %rdi # 32(%rsp) 368 pushq %rdx /* pt_regs->dx */
365 pushq %r8 # 24(%rsp) 369 pushq %rcx /* pt_regs->cx */
366 pushq %r9 # 16(%rsp) 370 pushq %rax /* pt_regs->ax */
367 pushq %r10 # 8(%rsp) 371 pushq %r8 /* pt_regs->r8 */
368 pushq %r11 # 0(%rsp) 372 pushq %r9 /* pt_regs->r9 */
369 373 pushq %r10 /* pt_regs->r10 */
370 cmpl $__KERNEL_CS,96(%rsp) 374 pushq %r11 /* pt_regs->r11 */
371 jne 11f 375 pushq %rbx /* pt_regs->bx */
372 376 pushq %rbp /* pt_regs->bp */
373 cmpl $14,72(%rsp) # Page fault? 377 pushq %r12 /* pt_regs->r12 */
378 pushq %r13 /* pt_regs->r13 */
379 pushq %r14 /* pt_regs->r14 */
380 pushq %r15 /* pt_regs->r15 */
381
382 cmpq $14,%rsi /* Page fault? */
374 jnz 10f 383 jnz 10f
375 GET_CR2_INTO(%rdi) # can clobber any volatile register if pv 384 GET_CR2_INTO(%rdi) /* Can clobber any volatile register if pv */
376 call early_make_pgtable 385 call early_make_pgtable
377 andl %eax,%eax 386 andl %eax,%eax
378 jz 20f # All good 387 jz 20f /* All good */
379 388
38010: 38910:
381 leaq 88(%rsp),%rdi # Pointer to %rip 390 movq %rsp,%rdi /* RDI = pt_regs; RSI is already trapnr */
382 call early_fixup_exception 391 call early_fixup_exception
383 andl %eax,%eax 392
384 jnz 20f # Found an exception entry 39320:
385
38611:
387#ifdef CONFIG_EARLY_PRINTK
388 GET_CR2_INTO(%r9) # can clobber any volatile register if pv
389 movl 80(%rsp),%r8d # error code
390 movl 72(%rsp),%esi # vector number
391 movl 96(%rsp),%edx # %cs
392 movq 88(%rsp),%rcx # %rip
393 xorl %eax,%eax
394 leaq early_idt_msg(%rip),%rdi
395 call early_printk
396 cmpl $2,early_recursion_flag(%rip)
397 jz 1f
398 call dump_stack
399#ifdef CONFIG_KALLSYMS
400 leaq early_idt_ripmsg(%rip),%rdi
401 movq 40(%rsp),%rsi # %rip again
402 call __print_symbol
403#endif
404#endif /* EARLY_PRINTK */
4051: hlt
406 jmp 1b
407
40820: # Exception table entry found or page table generated
409 popq %r11
410 popq %r10
411 popq %r9
412 popq %r8
413 popq %rdi
414 popq %rsi
415 popq %rdx
416 popq %rcx
417 popq %rax
418 decl early_recursion_flag(%rip) 394 decl early_recursion_flag(%rip)
419.Lis_nmi: 395 jmp restore_regs_and_iret
420 addq $16,%rsp # drop vector number and error code
421 INTERRUPT_RETURN
422ENDPROC(early_idt_handler_common) 396ENDPROC(early_idt_handler_common)
423 397
424 __INITDATA 398 __INITDATA
425 399
426 .balign 4 400 .balign 4
427early_recursion_flag: 401GLOBAL(early_recursion_flag)
428 .long 0 402 .long 0
429 403
430#ifdef CONFIG_EARLY_PRINTK
431early_idt_msg:
432 .asciz "PANIC: early exception %02lx rip %lx:%lx error %lx cr2 %lx\n"
433early_idt_ripmsg:
434 .asciz "RIP %s\n"
435#endif /* CONFIG_EARLY_PRINTK */
436
437#define NEXT_PAGE(name) \ 404#define NEXT_PAGE(name) \
438 .balign PAGE_SIZE; \ 405 .balign PAGE_SIZE; \
439GLOBAL(name) 406GLOBAL(name)
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index a1f0e4a5c47e..f112af7aa62e 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -54,7 +54,7 @@ struct hpet_dev {
54 char name[10]; 54 char name[10];
55}; 55};
56 56
57inline struct hpet_dev *EVT_TO_HPET_DEV(struct clock_event_device *evtdev) 57static inline struct hpet_dev *EVT_TO_HPET_DEV(struct clock_event_device *evtdev)
58{ 58{
59 return container_of(evtdev, struct hpet_dev, evt); 59 return container_of(evtdev, struct hpet_dev, evt);
60} 60}
@@ -773,7 +773,6 @@ static struct clocksource clocksource_hpet = {
773 .mask = HPET_MASK, 773 .mask = HPET_MASK,
774 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 774 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
775 .resume = hpet_resume_counter, 775 .resume = hpet_resume_counter,
776 .archdata = { .vclock_mode = VCLOCK_HPET },
777}; 776};
778 777
779static int hpet_clocksource_register(void) 778static int hpet_clocksource_register(void)
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index e565e0e4d216..fc25f698d792 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -13,6 +13,7 @@
13#include <linux/cpu.h> 13#include <linux/cpu.h>
14#include <asm/kprobes.h> 14#include <asm/kprobes.h>
15#include <asm/alternative.h> 15#include <asm/alternative.h>
16#include <asm/text-patching.h>
16 17
17#ifdef HAVE_JUMP_LABEL 18#ifdef HAVE_JUMP_LABEL
18 19
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index 2af478e3fd4e..f2356bda2b05 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -19,8 +19,7 @@
19#include <linux/kernel.h> 19#include <linux/kernel.h>
20#include <linux/mm.h> 20#include <linux/mm.h>
21#include <linux/efi.h> 21#include <linux/efi.h>
22#include <linux/verify_pefile.h> 22#include <linux/verification.h>
23#include <keys/system_keyring.h>
24 23
25#include <asm/bootparam.h> 24#include <asm/bootparam.h>
26#include <asm/setup.h> 25#include <asm/setup.h>
@@ -529,18 +528,9 @@ static int bzImage64_cleanup(void *loader_data)
529#ifdef CONFIG_KEXEC_BZIMAGE_VERIFY_SIG 528#ifdef CONFIG_KEXEC_BZIMAGE_VERIFY_SIG
530static int bzImage64_verify_sig(const char *kernel, unsigned long kernel_len) 529static int bzImage64_verify_sig(const char *kernel, unsigned long kernel_len)
531{ 530{
532 bool trusted; 531 return verify_pefile_signature(kernel, kernel_len,
533 int ret; 532 NULL,
534 533 VERIFYING_KEXEC_PE_SIGNATURE);
535 ret = verify_pefile_signature(kernel, kernel_len,
536 system_trusted_keyring,
537 VERIFYING_KEXEC_PE_SIGNATURE,
538 &trusted);
539 if (ret < 0)
540 return ret;
541 if (!trusted)
542 return -EKEYREJECTED;
543 return 0;
544} 534}
545#endif 535#endif
546 536
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 2da6ee9ae69b..04cde527d728 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -45,6 +45,7 @@
45#include <linux/uaccess.h> 45#include <linux/uaccess.h>
46#include <linux/memory.h> 46#include <linux/memory.h>
47 47
48#include <asm/text-patching.h>
48#include <asm/debugreg.h> 49#include <asm/debugreg.h>
49#include <asm/apicdef.h> 50#include <asm/apicdef.h>
50#include <asm/apic.h> 51#include <asm/apic.h>
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index ae703acb85c1..38cf7a741250 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -51,6 +51,7 @@
51#include <linux/ftrace.h> 51#include <linux/ftrace.h>
52#include <linux/frame.h> 52#include <linux/frame.h>
53 53
54#include <asm/text-patching.h>
54#include <asm/cacheflush.h> 55#include <asm/cacheflush.h>
55#include <asm/desc.h> 56#include <asm/desc.h>
56#include <asm/pgtable.h> 57#include <asm/pgtable.h>
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 7b3b9d15c47a..4425f593f0ec 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -29,6 +29,7 @@
29#include <linux/kallsyms.h> 29#include <linux/kallsyms.h>
30#include <linux/ftrace.h> 30#include <linux/ftrace.h>
31 31
32#include <asm/text-patching.h>
32#include <asm/cacheflush.h> 33#include <asm/cacheflush.h>
33#include <asm/desc.h> 34#include <asm/desc.h>
34#include <asm/pgtable.h> 35#include <asm/pgtable.h>
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index dc1207e2f193..eea2a6f72b31 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -285,14 +285,6 @@ static void __init paravirt_ops_setup(void)
285{ 285{
286 pv_info.name = "KVM"; 286 pv_info.name = "KVM";
287 287
288 /*
289 * KVM isn't paravirt in the sense of paravirt_enabled. A KVM
290 * guest kernel works like a bare metal kernel with additional
291 * features, and paravirt_enabled is about features that are
292 * missing.
293 */
294 pv_info.paravirt_enabled = 0;
295
296 if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) 288 if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
297 pv_cpu_ops.io_delay = kvm_io_delay; 289 pv_cpu_ops.io_delay = kvm_io_delay;
298 290
diff --git a/arch/x86/kernel/livepatch.c b/arch/x86/kernel/livepatch.c
deleted file mode 100644
index 92fc1a51f994..000000000000
--- a/arch/x86/kernel/livepatch.c
+++ /dev/null
@@ -1,70 +0,0 @@
1/*
2 * livepatch.c - x86-specific Kernel Live Patching Core
3 *
4 * Copyright (C) 2014 Seth Jennings <sjenning@redhat.com>
5 * Copyright (C) 2014 SUSE
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20
21#include <linux/module.h>
22#include <linux/uaccess.h>
23#include <asm/elf.h>
24#include <asm/livepatch.h>
25
26/**
27 * klp_write_module_reloc() - write a relocation in a module
28 * @mod: module in which the section to be modified is found
29 * @type: ELF relocation type (see asm/elf.h)
30 * @loc: address that the relocation should be written to
31 * @value: relocation value (sym address + addend)
32 *
33 * This function writes a relocation to the specified location for
34 * a particular module.
35 */
36int klp_write_module_reloc(struct module *mod, unsigned long type,
37 unsigned long loc, unsigned long value)
38{
39 size_t size = 4;
40 unsigned long val;
41 unsigned long core = (unsigned long)mod->core_layout.base;
42 unsigned long core_size = mod->core_layout.size;
43
44 switch (type) {
45 case R_X86_64_NONE:
46 return 0;
47 case R_X86_64_64:
48 val = value;
49 size = 8;
50 break;
51 case R_X86_64_32:
52 val = (u32)value;
53 break;
54 case R_X86_64_32S:
55 val = (s32)value;
56 break;
57 case R_X86_64_PC32:
58 val = (u32)(value - loc);
59 break;
60 default:
61 /* unsupported relocation type */
62 return -EINVAL;
63 }
64
65 if (loc < core || loc >= core + core_size)
66 /* loc does not point to any symbol inside the module */
67 return -EINVAL;
68
69 return probe_kernel_write((void *)loc, &val, size);
70}
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index ba7fbba9831b..5a294e48b185 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -538,3 +538,48 @@ overflow:
538 return -ENOEXEC; 538 return -ENOEXEC;
539} 539}
540#endif /* CONFIG_KEXEC_FILE */ 540#endif /* CONFIG_KEXEC_FILE */
541
542static int
543kexec_mark_range(unsigned long start, unsigned long end, bool protect)
544{
545 struct page *page;
546 unsigned int nr_pages;
547
548 /*
549 * For physical range: [start, end]. We must skip the unassigned
550 * crashk resource with zero-valued "end" member.
551 */
552 if (!end || start > end)
553 return 0;
554
555 page = pfn_to_page(start >> PAGE_SHIFT);
556 nr_pages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1;
557 if (protect)
558 return set_pages_ro(page, nr_pages);
559 else
560 return set_pages_rw(page, nr_pages);
561}
562
563static void kexec_mark_crashkres(bool protect)
564{
565 unsigned long control;
566
567 kexec_mark_range(crashk_low_res.start, crashk_low_res.end, protect);
568
569 /* Don't touch the control code page used in crash_kexec().*/
570 control = PFN_PHYS(page_to_pfn(kexec_crash_image->control_code_page));
571 /* Control code page is located in the 2nd page. */
572 kexec_mark_range(crashk_res.start, control + PAGE_SIZE - 1, protect);
573 control += KEXEC_CONTROL_PAGE_SIZE;
574 kexec_mark_range(control, crashk_res.end, protect);
575}
576
577void arch_kexec_protect_crashkres(void)
578{
579 kexec_mark_crashkres(true);
580}
581
582void arch_kexec_unprotect_crashkres(void)
583{
584 kexec_mark_crashkres(false);
585}
diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S
index ed48a9f465f8..61924222a9e1 100644
--- a/arch/x86/kernel/mcount_64.S
+++ b/arch/x86/kernel/mcount_64.S
@@ -182,7 +182,8 @@ GLOBAL(ftrace_graph_call)
182 jmp ftrace_stub 182 jmp ftrace_stub
183#endif 183#endif
184 184
185GLOBAL(ftrace_stub) 185/* This is weak to keep gas from relaxing the jumps */
186WEAK(ftrace_stub)
186 retq 187 retq
187END(ftrace_caller) 188END(ftrace_caller)
188 189
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 005c03e93fc5..477ae806c2fa 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -31,6 +31,7 @@
31#include <linux/jump_label.h> 31#include <linux/jump_label.h>
32#include <linux/random.h> 32#include <linux/random.h>
33 33
34#include <asm/text-patching.h>
34#include <asm/page.h> 35#include <asm/page.h>
35#include <asm/pgtable.h> 36#include <asm/pgtable.h>
36#include <asm/setup.h> 37#include <asm/setup.h>
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index f08ac28b8136..7b3b3f24c3ea 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -294,7 +294,6 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
294 294
295struct pv_info pv_info = { 295struct pv_info pv_info = {
296 .name = "bare hardware", 296 .name = "bare hardware",
297 .paravirt_enabled = 0,
298 .kernel_rpl = 0, 297 .kernel_rpl = 0,
299 .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ 298 .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */
300 299
@@ -339,8 +338,10 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
339 .write_cr8 = native_write_cr8, 338 .write_cr8 = native_write_cr8,
340#endif 339#endif
341 .wbinvd = native_wbinvd, 340 .wbinvd = native_wbinvd,
342 .read_msr = native_read_msr_safe, 341 .read_msr = native_read_msr,
343 .write_msr = native_write_msr_safe, 342 .write_msr = native_write_msr,
343 .read_msr_safe = native_read_msr_safe,
344 .write_msr_safe = native_write_msr_safe,
344 .read_pmc = native_read_pmc, 345 .read_pmc = native_read_pmc,
345 .load_tr_desc = native_load_tr_desc, 346 .load_tr_desc = native_load_tr_desc,
346 .set_ldt = native_set_ldt, 347 .set_ldt = native_set_ldt,
diff --git a/arch/x86/kernel/pci-iommu_table.c b/arch/x86/kernel/pci-iommu_table.c
index 35ccf75696eb..f712dfdf1357 100644
--- a/arch/x86/kernel/pci-iommu_table.c
+++ b/arch/x86/kernel/pci-iommu_table.c
@@ -72,7 +72,7 @@ void __init check_iommu_entries(struct iommu_table_entry *start,
72 } 72 }
73} 73}
74#else 74#else
75inline void check_iommu_entries(struct iommu_table_entry *start, 75void __init check_iommu_entries(struct iommu_table_entry *start,
76 struct iommu_table_entry *finish) 76 struct iommu_table_entry *finish)
77{ 77{
78} 78}
diff --git a/arch/x86/kernel/platform-quirks.c b/arch/x86/kernel/platform-quirks.c
new file mode 100644
index 000000000000..b2f8a33b36ff
--- /dev/null
+++ b/arch/x86/kernel/platform-quirks.c
@@ -0,0 +1,35 @@
1#include <linux/kernel.h>
2#include <linux/init.h>
3
4#include <asm/setup.h>
5#include <asm/bios_ebda.h>
6
7void __init x86_early_init_platform_quirks(void)
8{
9 x86_platform.legacy.rtc = 1;
10 x86_platform.legacy.ebda_search = 0;
11 x86_platform.legacy.devices.pnpbios = 1;
12
13 switch (boot_params.hdr.hardware_subarch) {
14 case X86_SUBARCH_PC:
15 x86_platform.legacy.ebda_search = 1;
16 break;
17 case X86_SUBARCH_XEN:
18 case X86_SUBARCH_LGUEST:
19 case X86_SUBARCH_INTEL_MID:
20 case X86_SUBARCH_CE4100:
21 x86_platform.legacy.devices.pnpbios = 0;
22 x86_platform.legacy.rtc = 0;
23 break;
24 }
25
26 if (x86_platform.set_legacy_features)
27 x86_platform.set_legacy_features();
28}
29
30#if defined(CONFIG_PNPBIOS)
31bool __init arch_pnpbios_disabled(void)
32{
33 return x86_platform.legacy.devices.pnpbios == 0;
34}
35#endif
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 2915d54e9dd5..96becbbb52e0 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -97,10 +97,9 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
97/* 97/*
98 * Free current thread data structures etc.. 98 * Free current thread data structures etc..
99 */ 99 */
100void exit_thread(void) 100void exit_thread(struct task_struct *tsk)
101{ 101{
102 struct task_struct *me = current; 102 struct thread_struct *t = &tsk->thread;
103 struct thread_struct *t = &me->thread;
104 unsigned long *bp = t->io_bitmap_ptr; 103 unsigned long *bp = t->io_bitmap_ptr;
105 struct fpu *fpu = &t->fpu; 104 struct fpu *fpu = &t->fpu;
106 105
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 6cbab31ac23a..6e789ca1f841 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -136,25 +136,6 @@ void release_thread(struct task_struct *dead_task)
136 } 136 }
137} 137}
138 138
139static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
140{
141 struct user_desc ud = {
142 .base_addr = addr,
143 .limit = 0xfffff,
144 .seg_32bit = 1,
145 .limit_in_pages = 1,
146 .useable = 1,
147 };
148 struct desc_struct *desc = t->thread.tls_array;
149 desc += tls;
150 fill_ldt(desc, &ud);
151}
152
153static inline u32 read_32bit_tls(struct task_struct *t, int tls)
154{
155 return get_desc_base(&t->thread.tls_array[tls]);
156}
157
158int copy_thread_tls(unsigned long clone_flags, unsigned long sp, 139int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
159 unsigned long arg, struct task_struct *p, unsigned long tls) 140 unsigned long arg, struct task_struct *p, unsigned long tls)
160{ 141{
@@ -169,9 +150,9 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
169 p->thread.io_bitmap_ptr = NULL; 150 p->thread.io_bitmap_ptr = NULL;
170 151
171 savesegment(gs, p->thread.gsindex); 152 savesegment(gs, p->thread.gsindex);
172 p->thread.gs = p->thread.gsindex ? 0 : me->thread.gs; 153 p->thread.gsbase = p->thread.gsindex ? 0 : me->thread.gsbase;
173 savesegment(fs, p->thread.fsindex); 154 savesegment(fs, p->thread.fsindex);
174 p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs; 155 p->thread.fsbase = p->thread.fsindex ? 0 : me->thread.fsbase;
175 savesegment(es, p->thread.es); 156 savesegment(es, p->thread.es);
176 savesegment(ds, p->thread.ds); 157 savesegment(ds, p->thread.ds);
177 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); 158 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
@@ -210,7 +191,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
210 */ 191 */
211 if (clone_flags & CLONE_SETTLS) { 192 if (clone_flags & CLONE_SETTLS) {
212#ifdef CONFIG_IA32_EMULATION 193#ifdef CONFIG_IA32_EMULATION
213 if (is_ia32_task()) 194 if (in_ia32_syscall())
214 err = do_set_thread_area(p, -1, 195 err = do_set_thread_area(p, -1,
215 (struct user_desc __user *)tls, 0); 196 (struct user_desc __user *)tls, 0);
216 else 197 else
@@ -282,7 +263,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
282 struct fpu *next_fpu = &next->fpu; 263 struct fpu *next_fpu = &next->fpu;
283 int cpu = smp_processor_id(); 264 int cpu = smp_processor_id();
284 struct tss_struct *tss = &per_cpu(cpu_tss, cpu); 265 struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
285 unsigned fsindex, gsindex; 266 unsigned prev_fsindex, prev_gsindex;
286 fpu_switch_t fpu_switch; 267 fpu_switch_t fpu_switch;
287 268
288 fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu); 269 fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu);
@@ -292,8 +273,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
292 * 273 *
293 * (e.g. xen_load_tls()) 274 * (e.g. xen_load_tls())
294 */ 275 */
295 savesegment(fs, fsindex); 276 savesegment(fs, prev_fsindex);
296 savesegment(gs, gsindex); 277 savesegment(gs, prev_gsindex);
297 278
298 /* 279 /*
299 * Load TLS before restoring any segments so that segment loads 280 * Load TLS before restoring any segments so that segment loads
@@ -336,66 +317,104 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
336 * Switch FS and GS. 317 * Switch FS and GS.
337 * 318 *
338 * These are even more complicated than DS and ES: they have 319 * These are even more complicated than DS and ES: they have
339 * 64-bit bases are that controlled by arch_prctl. Those bases 320 * 64-bit bases are that controlled by arch_prctl. The bases
340 * only differ from the values in the GDT or LDT if the selector 321 * don't necessarily match the selectors, as user code can do
341 * is 0. 322 * any number of things to cause them to be inconsistent.
342 *
343 * Loading the segment register resets the hidden base part of
344 * the register to 0 or the value from the GDT / LDT. If the
345 * next base address zero, writing 0 to the segment register is
346 * much faster than using wrmsr to explicitly zero the base.
347 *
348 * The thread_struct.fs and thread_struct.gs values are 0
349 * if the fs and gs bases respectively are not overridden
350 * from the values implied by fsindex and gsindex. They
351 * are nonzero, and store the nonzero base addresses, if
352 * the bases are overridden.
353 *
354 * (fs != 0 && fsindex != 0) || (gs != 0 && gsindex != 0) should
355 * be impossible.
356 *
357 * Therefore we need to reload the segment registers if either
358 * the old or new selector is nonzero, and we need to override
359 * the base address if next thread expects it to be overridden.
360 * 323 *
361 * This code is unnecessarily slow in the case where the old and 324 * We don't promise to preserve the bases if the selectors are
362 * new indexes are zero and the new base is nonzero -- it will 325 * nonzero. We also don't promise to preserve the base if the
363 * unnecessarily write 0 to the selector before writing the new 326 * selector is zero and the base doesn't match whatever was
364 * base address. 327 * most recently passed to ARCH_SET_FS/GS. (If/when the
328 * FSGSBASE instructions are enabled, we'll need to offer
329 * stronger guarantees.)
365 * 330 *
366 * Note: This all depends on arch_prctl being the only way that 331 * As an invariant,
367 * user code can override the segment base. Once wrfsbase and 332 * (fsbase != 0 && fsindex != 0) || (gsbase != 0 && gsindex != 0) is
368 * wrgsbase are enabled, most of this code will need to change. 333 * impossible.
369 */ 334 */
370 if (unlikely(fsindex | next->fsindex | prev->fs)) { 335 if (next->fsindex) {
336 /* Loading a nonzero value into FS sets the index and base. */
371 loadsegment(fs, next->fsindex); 337 loadsegment(fs, next->fsindex);
372 338 } else {
373 /* 339 if (next->fsbase) {
374 * If user code wrote a nonzero value to FS, then it also 340 /* Next index is zero but next base is nonzero. */
375 * cleared the overridden base address. 341 if (prev_fsindex)
376 * 342 loadsegment(fs, 0);
377 * XXX: if user code wrote 0 to FS and cleared the base 343 wrmsrl(MSR_FS_BASE, next->fsbase);
378 * address itself, we won't notice and we'll incorrectly 344 } else {
379 * restore the prior base address next time we reschdule 345 /* Next base and index are both zero. */
380 * the process. 346 if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
381 */ 347 /*
382 if (fsindex) 348 * We don't know the previous base and can't
383 prev->fs = 0; 349 * find out without RDMSR. Forcibly clear it.
350 */
351 loadsegment(fs, __USER_DS);
352 loadsegment(fs, 0);
353 } else {
354 /*
355 * If the previous index is zero and ARCH_SET_FS
356 * didn't change the base, then the base is
357 * also zero and we don't need to do anything.
358 */
359 if (prev->fsbase || prev_fsindex)
360 loadsegment(fs, 0);
361 }
362 }
384 } 363 }
385 if (next->fs) 364 /*
386 wrmsrl(MSR_FS_BASE, next->fs); 365 * Save the old state and preserve the invariant.
387 prev->fsindex = fsindex; 366 * NB: if prev_fsindex == 0, then we can't reliably learn the base
367 * without RDMSR because Intel user code can zero it without telling
368 * us and AMD user code can program any 32-bit value without telling
369 * us.
370 */
371 if (prev_fsindex)
372 prev->fsbase = 0;
373 prev->fsindex = prev_fsindex;
388 374
389 if (unlikely(gsindex | next->gsindex | prev->gs)) { 375 if (next->gsindex) {
376 /* Loading a nonzero value into GS sets the index and base. */
390 load_gs_index(next->gsindex); 377 load_gs_index(next->gsindex);
391 378 } else {
392 /* This works (and fails) the same way as fsindex above. */ 379 if (next->gsbase) {
393 if (gsindex) 380 /* Next index is zero but next base is nonzero. */
394 prev->gs = 0; 381 if (prev_gsindex)
382 load_gs_index(0);
383 wrmsrl(MSR_KERNEL_GS_BASE, next->gsbase);
384 } else {
385 /* Next base and index are both zero. */
386 if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
387 /*
388 * We don't know the previous base and can't
389 * find out without RDMSR. Forcibly clear it.
390 *
391 * This contains a pointless SWAPGS pair.
392 * Fixing it would involve an explicit check
393 * for Xen or a new pvop.
394 */
395 load_gs_index(__USER_DS);
396 load_gs_index(0);
397 } else {
398 /*
399 * If the previous index is zero and ARCH_SET_GS
400 * didn't change the base, then the base is
401 * also zero and we don't need to do anything.
402 */
403 if (prev->gsbase || prev_gsindex)
404 load_gs_index(0);
405 }
406 }
395 } 407 }
396 if (next->gs) 408 /*
397 wrmsrl(MSR_KERNEL_GS_BASE, next->gs); 409 * Save the old state and preserve the invariant.
398 prev->gsindex = gsindex; 410 * NB: if prev_gsindex == 0, then we can't reliably learn the base
411 * without RDMSR because Intel user code can zero it without telling
412 * us and AMD user code can program any 32-bit value without telling
413 * us.
414 */
415 if (prev_gsindex)
416 prev->gsbase = 0;
417 prev->gsindex = prev_gsindex;
399 418
400 switch_fpu_finish(next_fpu, fpu_switch); 419 switch_fpu_finish(next_fpu, fpu_switch);
401 420
@@ -513,81 +532,47 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
513 532
514 switch (code) { 533 switch (code) {
515 case ARCH_SET_GS: 534 case ARCH_SET_GS:
516 if (addr >= TASK_SIZE_OF(task)) 535 if (addr >= TASK_SIZE_MAX)
517 return -EPERM; 536 return -EPERM;
518 cpu = get_cpu(); 537 cpu = get_cpu();
519 /* handle small bases via the GDT because that's faster to 538 task->thread.gsindex = 0;
520 switch. */ 539 task->thread.gsbase = addr;
521 if (addr <= 0xffffffff) { 540 if (doit) {
522 set_32bit_tls(task, GS_TLS, addr); 541 load_gs_index(0);
523 if (doit) { 542 ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
524 load_TLS(&task->thread, cpu);
525 load_gs_index(GS_TLS_SEL);
526 }
527 task->thread.gsindex = GS_TLS_SEL;
528 task->thread.gs = 0;
529 } else {
530 task->thread.gsindex = 0;
531 task->thread.gs = addr;
532 if (doit) {
533 load_gs_index(0);
534 ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
535 }
536 } 543 }
537 put_cpu(); 544 put_cpu();
538 break; 545 break;
539 case ARCH_SET_FS: 546 case ARCH_SET_FS:
540 /* Not strictly needed for fs, but do it for symmetry 547 /* Not strictly needed for fs, but do it for symmetry
541 with gs */ 548 with gs */
542 if (addr >= TASK_SIZE_OF(task)) 549 if (addr >= TASK_SIZE_MAX)
543 return -EPERM; 550 return -EPERM;
544 cpu = get_cpu(); 551 cpu = get_cpu();
545 /* handle small bases via the GDT because that's faster to 552 task->thread.fsindex = 0;
546 switch. */ 553 task->thread.fsbase = addr;
547 if (addr <= 0xffffffff) { 554 if (doit) {
548 set_32bit_tls(task, FS_TLS, addr); 555 /* set the selector to 0 to not confuse __switch_to */
549 if (doit) { 556 loadsegment(fs, 0);
550 load_TLS(&task->thread, cpu); 557 ret = wrmsrl_safe(MSR_FS_BASE, addr);
551 loadsegment(fs, FS_TLS_SEL);
552 }
553 task->thread.fsindex = FS_TLS_SEL;
554 task->thread.fs = 0;
555 } else {
556 task->thread.fsindex = 0;
557 task->thread.fs = addr;
558 if (doit) {
559 /* set the selector to 0 to not confuse
560 __switch_to */
561 loadsegment(fs, 0);
562 ret = wrmsrl_safe(MSR_FS_BASE, addr);
563 }
564 } 558 }
565 put_cpu(); 559 put_cpu();
566 break; 560 break;
567 case ARCH_GET_FS: { 561 case ARCH_GET_FS: {
568 unsigned long base; 562 unsigned long base;
569 if (task->thread.fsindex == FS_TLS_SEL) 563 if (doit)
570 base = read_32bit_tls(task, FS_TLS);
571 else if (doit)
572 rdmsrl(MSR_FS_BASE, base); 564 rdmsrl(MSR_FS_BASE, base);
573 else 565 else
574 base = task->thread.fs; 566 base = task->thread.fsbase;
575 ret = put_user(base, (unsigned long __user *)addr); 567 ret = put_user(base, (unsigned long __user *)addr);
576 break; 568 break;
577 } 569 }
578 case ARCH_GET_GS: { 570 case ARCH_GET_GS: {
579 unsigned long base; 571 unsigned long base;
580 unsigned gsindex; 572 if (doit)
581 if (task->thread.gsindex == GS_TLS_SEL) 573 rdmsrl(MSR_KERNEL_GS_BASE, base);
582 base = read_32bit_tls(task, GS_TLS); 574 else
583 else if (doit) { 575 base = task->thread.gsbase;
584 savesegment(gs, gsindex);
585 if (gsindex)
586 rdmsrl(MSR_KERNEL_GS_BASE, base);
587 else
588 base = task->thread.gs;
589 } else
590 base = task->thread.gs;
591 ret = put_user(base, (unsigned long __user *)addr); 576 ret = put_user(base, (unsigned long __user *)addr);
592 break; 577 break;
593 } 578 }
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 32e9d9cbb884..600edd225e81 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -303,29 +303,11 @@ static int set_segment_reg(struct task_struct *task,
303 303
304 switch (offset) { 304 switch (offset) {
305 case offsetof(struct user_regs_struct,fs): 305 case offsetof(struct user_regs_struct,fs):
306 /*
307 * If this is setting fs as for normal 64-bit use but
308 * setting fs_base has implicitly changed it, leave it.
309 */
310 if ((value == FS_TLS_SEL && task->thread.fsindex == 0 &&
311 task->thread.fs != 0) ||
312 (value == 0 && task->thread.fsindex == FS_TLS_SEL &&
313 task->thread.fs == 0))
314 break;
315 task->thread.fsindex = value; 306 task->thread.fsindex = value;
316 if (task == current) 307 if (task == current)
317 loadsegment(fs, task->thread.fsindex); 308 loadsegment(fs, task->thread.fsindex);
318 break; 309 break;
319 case offsetof(struct user_regs_struct,gs): 310 case offsetof(struct user_regs_struct,gs):
320 /*
321 * If this is setting gs as for normal 64-bit use but
322 * setting gs_base has implicitly changed it, leave it.
323 */
324 if ((value == GS_TLS_SEL && task->thread.gsindex == 0 &&
325 task->thread.gs != 0) ||
326 (value == 0 && task->thread.gsindex == GS_TLS_SEL &&
327 task->thread.gs == 0))
328 break;
329 task->thread.gsindex = value; 311 task->thread.gsindex = value;
330 if (task == current) 312 if (task == current)
331 load_gs_index(task->thread.gsindex); 313 load_gs_index(task->thread.gsindex);
@@ -410,23 +392,23 @@ static int putreg(struct task_struct *child,
410 392
411#ifdef CONFIG_X86_64 393#ifdef CONFIG_X86_64
412 case offsetof(struct user_regs_struct,fs_base): 394 case offsetof(struct user_regs_struct,fs_base):
413 if (value >= TASK_SIZE_OF(child)) 395 if (value >= TASK_SIZE_MAX)
414 return -EIO; 396 return -EIO;
415 /* 397 /*
416 * When changing the segment base, use do_arch_prctl 398 * When changing the segment base, use do_arch_prctl
417 * to set either thread.fs or thread.fsindex and the 399 * to set either thread.fs or thread.fsindex and the
418 * corresponding GDT slot. 400 * corresponding GDT slot.
419 */ 401 */
420 if (child->thread.fs != value) 402 if (child->thread.fsbase != value)
421 return do_arch_prctl(child, ARCH_SET_FS, value); 403 return do_arch_prctl(child, ARCH_SET_FS, value);
422 return 0; 404 return 0;
423 case offsetof(struct user_regs_struct,gs_base): 405 case offsetof(struct user_regs_struct,gs_base):
424 /* 406 /*
425 * Exactly the same here as the %fs handling above. 407 * Exactly the same here as the %fs handling above.
426 */ 408 */
427 if (value >= TASK_SIZE_OF(child)) 409 if (value >= TASK_SIZE_MAX)
428 return -EIO; 410 return -EIO;
429 if (child->thread.gs != value) 411 if (child->thread.gsbase != value)
430 return do_arch_prctl(child, ARCH_SET_GS, value); 412 return do_arch_prctl(child, ARCH_SET_GS, value);
431 return 0; 413 return 0;
432#endif 414#endif
@@ -453,31 +435,17 @@ static unsigned long getreg(struct task_struct *task, unsigned long offset)
453#ifdef CONFIG_X86_64 435#ifdef CONFIG_X86_64
454 case offsetof(struct user_regs_struct, fs_base): { 436 case offsetof(struct user_regs_struct, fs_base): {
455 /* 437 /*
456 * do_arch_prctl may have used a GDT slot instead of 438 * XXX: This will not behave as expected if called on
457 * the MSR. To userland, it appears the same either 439 * current or if fsindex != 0.
458 * way, except the %fs segment selector might not be 0.
459 */ 440 */
460 unsigned int seg = task->thread.fsindex; 441 return task->thread.fsbase;
461 if (task->thread.fs != 0)
462 return task->thread.fs;
463 if (task == current)
464 asm("movl %%fs,%0" : "=r" (seg));
465 if (seg != FS_TLS_SEL)
466 return 0;
467 return get_desc_base(&task->thread.tls_array[FS_TLS]);
468 } 442 }
469 case offsetof(struct user_regs_struct, gs_base): { 443 case offsetof(struct user_regs_struct, gs_base): {
470 /* 444 /*
471 * Exactly the same here as the %fs handling above. 445 * XXX: This will not behave as expected if called on
446 * current or if fsindex != 0.
472 */ 447 */
473 unsigned int seg = task->thread.gsindex; 448 return task->thread.gsbase;
474 if (task->thread.gs != 0)
475 return task->thread.gs;
476 if (task == current)
477 asm("movl %%gs,%0" : "=r" (seg));
478 if (seg != GS_TLS_SEL)
479 return 0;
480 return get_desc_base(&task->thread.tls_array[GS_TLS]);
481 } 449 }
482#endif 450#endif
483 } 451 }
@@ -1266,7 +1234,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
1266 compat_ulong_t caddr, compat_ulong_t cdata) 1234 compat_ulong_t caddr, compat_ulong_t cdata)
1267{ 1235{
1268#ifdef CONFIG_X86_X32_ABI 1236#ifdef CONFIG_X86_X32_ABI
1269 if (!is_ia32_task()) 1237 if (!in_ia32_syscall())
1270 return x32_arch_ptrace(child, request, caddr, cdata); 1238 return x32_arch_ptrace(child, request, caddr, cdata);
1271#endif 1239#endif
1272#ifdef CONFIG_IA32_EMULATION 1240#ifdef CONFIG_IA32_EMULATION
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index ab0adc0fa5db..a9b31eb815f2 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -535,6 +535,15 @@ static void native_machine_emergency_restart(void)
535 mode = reboot_mode == REBOOT_WARM ? 0x1234 : 0; 535 mode = reboot_mode == REBOOT_WARM ? 0x1234 : 0;
536 *((unsigned short *)__va(0x472)) = mode; 536 *((unsigned short *)__va(0x472)) = mode;
537 537
538 /*
539 * If an EFI capsule has been registered with the firmware then
540 * override the reboot= parameter.
541 */
542 if (efi_capsule_pending(NULL)) {
543 pr_info("EFI capsule is pending, forcing EFI reboot.\n");
544 reboot_type = BOOT_EFI;
545 }
546
538 for (;;) { 547 for (;;) {
539 /* Could also try the reset bit in the Hammer NB */ 548 /* Could also try the reset bit in the Hammer NB */
540 switch (reboot_type) { 549 switch (reboot_type) {
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 4af8d063fb36..eceaa082ec3f 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -14,6 +14,7 @@
14#include <asm/time.h> 14#include <asm/time.h>
15#include <asm/intel-mid.h> 15#include <asm/intel-mid.h>
16#include <asm/rtc.h> 16#include <asm/rtc.h>
17#include <asm/setup.h>
17 18
18#ifdef CONFIG_X86_32 19#ifdef CONFIG_X86_32
19/* 20/*
@@ -185,22 +186,7 @@ static __init int add_rtc_cmos(void)
185 } 186 }
186 } 187 }
187#endif 188#endif
188 if (of_have_populated_dt()) 189 if (!x86_platform.legacy.rtc)
189 return 0;
190
191 /* Intel MID platforms don't have ioport rtc */
192 if (intel_mid_identify_cpu())
193 return -ENODEV;
194
195#ifdef CONFIG_ACPI
196 if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_CMOS_RTC) {
197 /* This warning can likely go away again in a year or two. */
198 pr_info("ACPI: not registering RTC platform device\n");
199 return -ENODEV;
200 }
201#endif
202
203 if (paravirt_enabled() && !paravirt_has(RTC))
204 return -ENODEV; 190 return -ENODEV;
205 191
206 platform_device_register(&rtc_device); 192 platform_device_register(&rtc_device);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 2367ae07eb76..c4e7b3991b60 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -398,6 +398,11 @@ static void __init reserve_initrd(void)
398 398
399 memblock_free(ramdisk_image, ramdisk_end - ramdisk_image); 399 memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
400} 400}
401
402static void __init early_initrd_acpi_init(void)
403{
404 early_acpi_table_init((void *)initrd_start, initrd_end - initrd_start);
405}
401#else 406#else
402static void __init early_reserve_initrd(void) 407static void __init early_reserve_initrd(void)
403{ 408{
@@ -405,6 +410,9 @@ static void __init early_reserve_initrd(void)
405static void __init reserve_initrd(void) 410static void __init reserve_initrd(void)
406{ 411{
407} 412}
413static void __init early_initrd_acpi_init(void)
414{
415}
408#endif /* CONFIG_BLK_DEV_INITRD */ 416#endif /* CONFIG_BLK_DEV_INITRD */
409 417
410static void __init parse_setup_data(void) 418static void __init parse_setup_data(void)
@@ -1138,9 +1146,7 @@ void __init setup_arch(char **cmdline_p)
1138 1146
1139 reserve_initrd(); 1147 reserve_initrd();
1140 1148
1141#if defined(CONFIG_ACPI) && defined(CONFIG_BLK_DEV_INITRD) 1149 early_initrd_acpi_init();
1142 acpi_initrd_override((void *)initrd_start, initrd_end - initrd_start);
1143#endif
1144 1150
1145 vsmp_init(); 1151 vsmp_init();
1146 1152
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 548ddf7d6fd2..22cc2f9f8aec 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -248,18 +248,17 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
248 if (config_enabled(CONFIG_X86_64)) 248 if (config_enabled(CONFIG_X86_64))
249 sp -= 128; 249 sp -= 128;
250 250
251 if (!onsigstack) { 251 /* This is the X/Open sanctioned signal stack switching. */
252 /* This is the X/Open sanctioned signal stack switching. */ 252 if (ka->sa.sa_flags & SA_ONSTACK) {
253 if (ka->sa.sa_flags & SA_ONSTACK) { 253 if (sas_ss_flags(sp) == 0)
254 if (current->sas_ss_size) 254 sp = current->sas_ss_sp + current->sas_ss_size;
255 sp = current->sas_ss_sp + current->sas_ss_size; 255 } else if (config_enabled(CONFIG_X86_32) &&
256 } else if (config_enabled(CONFIG_X86_32) && 256 !onsigstack &&
257 (regs->ss & 0xffff) != __USER_DS && 257 (regs->ss & 0xffff) != __USER_DS &&
258 !(ka->sa.sa_flags & SA_RESTORER) && 258 !(ka->sa.sa_flags & SA_RESTORER) &&
259 ka->sa.sa_restorer) { 259 ka->sa.sa_restorer) {
260 /* This is the legacy signal stack switching. */ 260 /* This is the legacy signal stack switching. */
261 sp = (unsigned long) ka->sa.sa_restorer; 261 sp = (unsigned long) ka->sa.sa_restorer;
262 }
263 } 262 }
264 263
265 if (fpu->fpstate_active) { 264 if (fpu->fpstate_active) {
@@ -391,7 +390,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
391 put_user_ex(&frame->uc, &frame->puc); 390 put_user_ex(&frame->uc, &frame->puc);
392 391
393 /* Create the ucontext. */ 392 /* Create the ucontext. */
394 if (cpu_has_xsave) 393 if (boot_cpu_has(X86_FEATURE_XSAVE))
395 put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); 394 put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
396 else 395 else
397 put_user_ex(0, &frame->uc.uc_flags); 396 put_user_ex(0, &frame->uc.uc_flags);
@@ -442,7 +441,7 @@ static unsigned long frame_uc_flags(struct pt_regs *regs)
442{ 441{
443 unsigned long flags; 442 unsigned long flags;
444 443
445 if (cpu_has_xsave) 444 if (boot_cpu_has(X86_FEATURE_XSAVE))
446 flags = UC_FP_XSTATE | UC_SIGCONTEXT_SS; 445 flags = UC_FP_XSTATE | UC_SIGCONTEXT_SS;
447 else 446 else
448 flags = UC_SIGCONTEXT_SS; 447 flags = UC_SIGCONTEXT_SS;
@@ -762,7 +761,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
762static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) 761static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
763{ 762{
764#ifdef CONFIG_X86_64 763#ifdef CONFIG_X86_64
765 if (is_ia32_task()) 764 if (in_ia32_syscall())
766 return __NR_ia32_restart_syscall; 765 return __NR_ia32_restart_syscall;
767#endif 766#endif
768#ifdef CONFIG_X86_X32_ABI 767#ifdef CONFIG_X86_X32_ABI
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index a2065d3b3b39..fafe8b923cac 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -332,6 +332,11 @@ static void __init smp_init_package_map(void)
332 * primary cores. 332 * primary cores.
333 */ 333 */
334 ncpus = boot_cpu_data.x86_max_cores; 334 ncpus = boot_cpu_data.x86_max_cores;
335 if (!ncpus) {
336 pr_warn("x86_max_cores == zero !?!?");
337 ncpus = 1;
338 }
339
335 __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus); 340 __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus);
336 341
337 /* 342 /*
@@ -1231,7 +1236,7 @@ static int __init smp_sanity_check(unsigned max_cpus)
1231 * If we couldn't find a local APIC, then get out of here now! 1236 * If we couldn't find a local APIC, then get out of here now!
1232 */ 1237 */
1233 if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && 1238 if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) &&
1234 !cpu_has_apic) { 1239 !boot_cpu_has(X86_FEATURE_APIC)) {
1235 if (!disable_apic) { 1240 if (!disable_apic) {
1236 pr_err("BIOS bug, local APIC #%d not detected!...\n", 1241 pr_err("BIOS bug, local APIC #%d not detected!...\n",
1237 boot_cpu_physical_apicid); 1242 boot_cpu_physical_apicid);
diff --git a/arch/x86/kernel/sysfb_efi.c b/arch/x86/kernel/sysfb_efi.c
index b285d4e8c68e..623965e86b65 100644
--- a/arch/x86/kernel/sysfb_efi.c
+++ b/arch/x86/kernel/sysfb_efi.c
@@ -68,6 +68,21 @@ struct efifb_dmi_info efifb_dmi_list[] = {
68 [M_UNKNOWN] = { NULL, 0, 0, 0, 0, OVERRIDE_NONE } 68 [M_UNKNOWN] = { NULL, 0, 0, 0, 0, OVERRIDE_NONE }
69}; 69};
70 70
71void efifb_setup_from_dmi(struct screen_info *si, const char *opt)
72{
73 int i;
74
75 for (i = 0; i < M_UNKNOWN; i++) {
76 if (efifb_dmi_list[i].base != 0 &&
77 !strcmp(opt, efifb_dmi_list[i].optname)) {
78 si->lfb_base = efifb_dmi_list[i].base;
79 si->lfb_linelength = efifb_dmi_list[i].stride;
80 si->lfb_width = efifb_dmi_list[i].width;
81 si->lfb_height = efifb_dmi_list[i].height;
82 }
83 }
84}
85
71#define choose_value(dmivalue, fwvalue, field, flags) ({ \ 86#define choose_value(dmivalue, fwvalue, field, flags) ({ \
72 typeof(fwvalue) _ret_ = fwvalue; \ 87 typeof(fwvalue) _ret_ = fwvalue; \
73 if ((flags) & (field)) \ 88 if ((flags) & (field)) \
@@ -106,14 +121,24 @@ static int __init efifb_set_system(const struct dmi_system_id *id)
106 continue; 121 continue;
107 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { 122 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
108 resource_size_t start, end; 123 resource_size_t start, end;
124 unsigned long flags;
125
126 flags = pci_resource_flags(dev, i);
127 if (!(flags & IORESOURCE_MEM))
128 continue;
129
130 if (flags & IORESOURCE_UNSET)
131 continue;
132
133 if (pci_resource_len(dev, i) == 0)
134 continue;
109 135
110 start = pci_resource_start(dev, i); 136 start = pci_resource_start(dev, i);
111 if (start == 0)
112 break;
113 end = pci_resource_end(dev, i); 137 end = pci_resource_end(dev, i);
114 if (screen_info.lfb_base >= start && 138 if (screen_info.lfb_base >= start &&
115 screen_info.lfb_base < end) { 139 screen_info.lfb_base < end) {
116 found_bar = 1; 140 found_bar = 1;
141 break;
117 } 142 }
118 } 143 }
119 } 144 }
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index e72a07f20b05..9b0185fbe3eb 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -74,12 +74,6 @@ void __init tboot_probe(void)
74 return; 74 return;
75 } 75 }
76 76
77 /* only a natively booted kernel should be using TXT */
78 if (paravirt_enabled()) {
79 pr_warning("non-0 tboot_addr but pv_ops is enabled\n");
80 return;
81 }
82
83 /* Map and check for tboot UUID. */ 77 /* Map and check for tboot UUID. */
84 set_fixmap(FIX_TBOOT_BASE, boot_params.tboot_addr); 78 set_fixmap(FIX_TBOOT_BASE, boot_params.tboot_addr);
85 tboot = (struct tboot *)fix_to_virt(FIX_TBOOT_BASE); 79 tboot = (struct tboot *)fix_to_virt(FIX_TBOOT_BASE);
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index 7fc5e843f247..9692a5e9fdab 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -114,6 +114,7 @@ int do_set_thread_area(struct task_struct *p, int idx,
114 int can_allocate) 114 int can_allocate)
115{ 115{
116 struct user_desc info; 116 struct user_desc info;
117 unsigned short __maybe_unused sel, modified_sel;
117 118
118 if (copy_from_user(&info, u_info, sizeof(info))) 119 if (copy_from_user(&info, u_info, sizeof(info)))
119 return -EFAULT; 120 return -EFAULT;
@@ -141,6 +142,47 @@ int do_set_thread_area(struct task_struct *p, int idx,
141 142
142 set_tls_desc(p, idx, &info, 1); 143 set_tls_desc(p, idx, &info, 1);
143 144
145 /*
146 * If DS, ES, FS, or GS points to the modified segment, forcibly
147 * refresh it. Only needed on x86_64 because x86_32 reloads them
148 * on return to user mode.
149 */
150 modified_sel = (idx << 3) | 3;
151
152 if (p == current) {
153#ifdef CONFIG_X86_64
154 savesegment(ds, sel);
155 if (sel == modified_sel)
156 loadsegment(ds, sel);
157
158 savesegment(es, sel);
159 if (sel == modified_sel)
160 loadsegment(es, sel);
161
162 savesegment(fs, sel);
163 if (sel == modified_sel)
164 loadsegment(fs, sel);
165
166 savesegment(gs, sel);
167 if (sel == modified_sel)
168 load_gs_index(sel);
169#endif
170
171#ifdef CONFIG_X86_32_LAZY_GS
172 savesegment(gs, sel);
173 if (sel == modified_sel)
174 loadsegment(gs, sel);
175#endif
176 } else {
177#ifdef CONFIG_X86_64
178 if (p->thread.fsindex == modified_sel)
179 p->thread.fsbase = info.base_addr;
180
181 if (p->thread.gsindex == modified_sel)
182 p->thread.gsbase = info.base_addr;
183#endif
184 }
185
144 return 0; 186 return 0;
145} 187}
146 188
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 06cbe25861f1..d1590486204a 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -51,6 +51,7 @@
51#include <asm/processor.h> 51#include <asm/processor.h>
52#include <asm/debugreg.h> 52#include <asm/debugreg.h>
53#include <linux/atomic.h> 53#include <linux/atomic.h>
54#include <asm/text-patching.h>
54#include <asm/ftrace.h> 55#include <asm/ftrace.h>
55#include <asm/traps.h> 56#include <asm/traps.h>
56#include <asm/desc.h> 57#include <asm/desc.h>
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index c9c4c7ce3eb2..38ba6de56ede 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -36,7 +36,7 @@ static int __read_mostly tsc_unstable;
36 36
37/* native_sched_clock() is called before tsc_init(), so 37/* native_sched_clock() is called before tsc_init(), so
38 we must start with the TSC soft disabled to prevent 38 we must start with the TSC soft disabled to prevent
39 erroneous rdtsc usage on !cpu_has_tsc processors */ 39 erroneous rdtsc usage on !boot_cpu_has(X86_FEATURE_TSC) processors */
40static int __read_mostly tsc_disabled = -1; 40static int __read_mostly tsc_disabled = -1;
41 41
42static DEFINE_STATIC_KEY_FALSE(__use_tsc); 42static DEFINE_STATIC_KEY_FALSE(__use_tsc);
@@ -834,15 +834,15 @@ int recalibrate_cpu_khz(void)
834#ifndef CONFIG_SMP 834#ifndef CONFIG_SMP
835 unsigned long cpu_khz_old = cpu_khz; 835 unsigned long cpu_khz_old = cpu_khz;
836 836
837 if (cpu_has_tsc) { 837 if (!boot_cpu_has(X86_FEATURE_TSC))
838 tsc_khz = x86_platform.calibrate_tsc();
839 cpu_khz = tsc_khz;
840 cpu_data(0).loops_per_jiffy =
841 cpufreq_scale(cpu_data(0).loops_per_jiffy,
842 cpu_khz_old, cpu_khz);
843 return 0;
844 } else
845 return -ENODEV; 838 return -ENODEV;
839
840 tsc_khz = x86_platform.calibrate_tsc();
841 cpu_khz = tsc_khz;
842 cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy,
843 cpu_khz_old, cpu_khz);
844
845 return 0;
846#else 846#else
847 return -ENODEV; 847 return -ENODEV;
848#endif 848#endif
@@ -922,9 +922,6 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
922 struct cpufreq_freqs *freq = data; 922 struct cpufreq_freqs *freq = data;
923 unsigned long *lpj; 923 unsigned long *lpj;
924 924
925 if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC))
926 return 0;
927
928 lpj = &boot_cpu_data.loops_per_jiffy; 925 lpj = &boot_cpu_data.loops_per_jiffy;
929#ifdef CONFIG_SMP 926#ifdef CONFIG_SMP
930 if (!(freq->flags & CPUFREQ_CONST_LOOPS)) 927 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
@@ -954,9 +951,9 @@ static struct notifier_block time_cpufreq_notifier_block = {
954 .notifier_call = time_cpufreq_notifier 951 .notifier_call = time_cpufreq_notifier
955}; 952};
956 953
957static int __init cpufreq_tsc(void) 954static int __init cpufreq_register_tsc_scaling(void)
958{ 955{
959 if (!cpu_has_tsc) 956 if (!boot_cpu_has(X86_FEATURE_TSC))
960 return 0; 957 return 0;
961 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) 958 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
962 return 0; 959 return 0;
@@ -965,7 +962,7 @@ static int __init cpufreq_tsc(void)
965 return 0; 962 return 0;
966} 963}
967 964
968core_initcall(cpufreq_tsc); 965core_initcall(cpufreq_register_tsc_scaling);
969 966
970#endif /* CONFIG_CPU_FREQ */ 967#endif /* CONFIG_CPU_FREQ */
971 968
@@ -1081,7 +1078,7 @@ static void __init check_system_tsc_reliable(void)
1081 */ 1078 */
1082int unsynchronized_tsc(void) 1079int unsynchronized_tsc(void)
1083{ 1080{
1084 if (!cpu_has_tsc || tsc_unstable) 1081 if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_unstable)
1085 return 1; 1082 return 1;
1086 1083
1087#ifdef CONFIG_SMP 1084#ifdef CONFIG_SMP
@@ -1205,7 +1202,7 @@ out:
1205 1202
1206static int __init init_tsc_clocksource(void) 1203static int __init init_tsc_clocksource(void)
1207{ 1204{
1208 if (!cpu_has_tsc || tsc_disabled > 0 || !tsc_khz) 1205 if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_disabled > 0 || !tsc_khz)
1209 return 0; 1206 return 0;
1210 1207
1211 if (tsc_clocksource_reliable) 1208 if (tsc_clocksource_reliable)
@@ -1242,7 +1239,7 @@ void __init tsc_init(void)
1242 u64 lpj; 1239 u64 lpj;
1243 int cpu; 1240 int cpu;
1244 1241
1245 if (!cpu_has_tsc) { 1242 if (!boot_cpu_has(X86_FEATURE_TSC)) {
1246 setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); 1243 setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
1247 return; 1244 return;
1248 } 1245 }
diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
index 92ae6acac8a7..9911a0620f9a 100644
--- a/arch/x86/kernel/tsc_msr.c
+++ b/arch/x86/kernel/tsc_msr.c
@@ -23,6 +23,7 @@
23#include <asm/param.h> 23#include <asm/param.h>
24 24
25/* CPU reference clock frequency: in KHz */ 25/* CPU reference clock frequency: in KHz */
26#define FREQ_80 80000
26#define FREQ_83 83200 27#define FREQ_83 83200
27#define FREQ_100 99840 28#define FREQ_100 99840
28#define FREQ_133 133200 29#define FREQ_133 133200
@@ -56,6 +57,8 @@ static struct freq_desc freq_desc_tables[] = {
56 { 6, 0x37, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_166, 0, 0, 0, 0 } }, 57 { 6, 0x37, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_166, 0, 0, 0, 0 } },
57 /* ANN */ 58 /* ANN */
58 { 6, 0x5a, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_100, 0, 0, 0, 0 } }, 59 { 6, 0x5a, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_100, 0, 0, 0, 0 } },
60 /* AIRMONT */
61 { 6, 0x4c, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_166, FREQ_80, 0, 0, 0 } },
59}; 62};
60 63
61static int match_cpu(u8 family, u8 model) 64static int match_cpu(u8 family, u8 model)
@@ -92,7 +95,7 @@ unsigned long try_msr_calibrate_tsc(void)
92 95
93 if (freq_desc_tables[cpu_index].msr_plat) { 96 if (freq_desc_tables[cpu_index].msr_plat) {
94 rdmsr(MSR_PLATFORM_INFO, lo, hi); 97 rdmsr(MSR_PLATFORM_INFO, lo, hi);
95 ratio = (lo >> 8) & 0x1f; 98 ratio = (lo >> 8) & 0xff;
96 } else { 99 } else {
97 rdmsr(MSR_IA32_PERF_STATUS, lo, hi); 100 rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
98 ratio = (hi >> 8) & 0x1f; 101 ratio = (hi >> 8) & 0x1f;
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index bf4db6eaec8f..6c1ff31d99ff 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -516,7 +516,7 @@ struct uprobe_xol_ops {
516 516
517static inline int sizeof_long(void) 517static inline int sizeof_long(void)
518{ 518{
519 return is_ia32_task() ? 4 : 8; 519 return in_ia32_syscall() ? 4 : 8;
520} 520}
521 521
522static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) 522static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
@@ -578,7 +578,7 @@ static void default_abort_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
578 riprel_post_xol(auprobe, regs); 578 riprel_post_xol(auprobe, regs);
579} 579}
580 580
581static struct uprobe_xol_ops default_xol_ops = { 581static const struct uprobe_xol_ops default_xol_ops = {
582 .pre_xol = default_pre_xol_op, 582 .pre_xol = default_pre_xol_op,
583 .post_xol = default_post_xol_op, 583 .post_xol = default_post_xol_op,
584 .abort = default_abort_op, 584 .abort = default_abort_op,
@@ -695,7 +695,7 @@ static void branch_clear_offset(struct arch_uprobe *auprobe, struct insn *insn)
695 0, insn->immediate.nbytes); 695 0, insn->immediate.nbytes);
696} 696}
697 697
698static struct uprobe_xol_ops branch_xol_ops = { 698static const struct uprobe_xol_ops branch_xol_ops = {
699 .emulate = branch_emulate_op, 699 .emulate = branch_emulate_op,
700 .post_xol = branch_post_xol_op, 700 .post_xol = branch_post_xol_op,
701}; 701};
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 4c941f88d405..9297a002d8e5 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -334,7 +334,7 @@ SECTIONS
334 __brk_limit = .; 334 __brk_limit = .;
335 } 335 }
336 336
337 . = ALIGN(PAGE_SIZE); 337 . = ALIGN(PAGE_SIZE); /* keep VO_INIT_SIZE page aligned */
338 _end = .; 338 _end = .;
339 339
340 STABS_DEBUG 340 STABS_DEBUG