diff options
author | Ingo Molnar <mingo@kernel.org> | 2016-06-08 07:02:16 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2016-06-08 07:02:16 -0400 |
commit | 8e8c668927b029f6ccc350eb1aa936864cc4eb6f (patch) | |
tree | f91ec6d49e2b01de5b4b3d517209d13b216a3f0f /arch/x86/kernel | |
parent | f0133acc7d4835cfbb86393b7d2a4fba7519585b (diff) | |
parent | 970442c599b22ccd644ebfe94d1d303bf6f87c05 (diff) |
Merge branch 'x86/urgent' into x86/cpu, to pick up dependency
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/kernel')
69 files changed, 1633 insertions, 909 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 616ebd22ef9a..0503f5bfb18d 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -2,7 +2,11 @@ | |||
2 | # Makefile for the linux kernel. | 2 | # Makefile for the linux kernel. |
3 | # | 3 | # |
4 | 4 | ||
5 | extra-y := head_$(BITS).o head$(BITS).o head.o vmlinux.lds | 5 | extra-y := head_$(BITS).o |
6 | extra-y += head$(BITS).o | ||
7 | extra-y += ebda.o | ||
8 | extra-y += platform-quirks.o | ||
9 | extra-y += vmlinux.lds | ||
6 | 10 | ||
7 | CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) | 11 | CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) |
8 | 12 | ||
@@ -79,7 +83,6 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse.o | |||
79 | obj-y += apic/ | 83 | obj-y += apic/ |
80 | obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o | 84 | obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o |
81 | obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o | 85 | obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o |
82 | obj-$(CONFIG_LIVEPATCH) += livepatch.o | ||
83 | obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o | 86 | obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o |
84 | obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o | 87 | obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o |
85 | obj-$(CONFIG_X86_TSC) += trace_clock.o | 88 | obj-$(CONFIG_X86_TSC) += trace_clock.o |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 8c2f1ef6ca23..9414f84584e4 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -136,7 +136,7 @@ static int __init acpi_parse_madt(struct acpi_table_header *table) | |||
136 | { | 136 | { |
137 | struct acpi_table_madt *madt = NULL; | 137 | struct acpi_table_madt *madt = NULL; |
138 | 138 | ||
139 | if (!cpu_has_apic) | 139 | if (!boot_cpu_has(X86_FEATURE_APIC)) |
140 | return -EINVAL; | 140 | return -EINVAL; |
141 | 141 | ||
142 | madt = (struct acpi_table_madt *)table; | 142 | madt = (struct acpi_table_madt *)table; |
@@ -445,7 +445,6 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger, | |||
445 | polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK; | 445 | polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK; |
446 | 446 | ||
447 | mp_override_legacy_irq(bus_irq, polarity, trigger, gsi); | 447 | mp_override_legacy_irq(bus_irq, polarity, trigger, gsi); |
448 | acpi_penalize_sci_irq(bus_irq, trigger, polarity); | ||
449 | 448 | ||
450 | /* | 449 | /* |
451 | * stash over-ride to indicate we've been here | 450 | * stash over-ride to indicate we've been here |
@@ -913,6 +912,15 @@ late_initcall(hpet_insert_resource); | |||
913 | 912 | ||
914 | static int __init acpi_parse_fadt(struct acpi_table_header *table) | 913 | static int __init acpi_parse_fadt(struct acpi_table_header *table) |
915 | { | 914 | { |
915 | if (!(acpi_gbl_FADT.boot_flags & ACPI_FADT_LEGACY_DEVICES)) { | ||
916 | pr_debug("ACPI: no legacy devices present\n"); | ||
917 | x86_platform.legacy.devices.pnpbios = 0; | ||
918 | } | ||
919 | |||
920 | if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_CMOS_RTC) { | ||
921 | pr_debug("ACPI: not registering RTC platform device\n"); | ||
922 | x86_platform.legacy.rtc = 0; | ||
923 | } | ||
916 | 924 | ||
917 | #ifdef CONFIG_X86_PM_TIMER | 925 | #ifdef CONFIG_X86_PM_TIMER |
918 | /* detect the location of the ACPI PM Timer */ | 926 | /* detect the location of the ACPI PM Timer */ |
@@ -951,7 +959,7 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void) | |||
951 | { | 959 | { |
952 | int count; | 960 | int count; |
953 | 961 | ||
954 | if (!cpu_has_apic) | 962 | if (!boot_cpu_has(X86_FEATURE_APIC)) |
955 | return -ENODEV; | 963 | return -ENODEV; |
956 | 964 | ||
957 | /* | 965 | /* |
@@ -979,7 +987,7 @@ static int __init acpi_parse_madt_lapic_entries(void) | |||
979 | int ret; | 987 | int ret; |
980 | struct acpi_subtable_proc madt_proc[2]; | 988 | struct acpi_subtable_proc madt_proc[2]; |
981 | 989 | ||
982 | if (!cpu_has_apic) | 990 | if (!boot_cpu_has(X86_FEATURE_APIC)) |
983 | return -ENODEV; | 991 | return -ENODEV; |
984 | 992 | ||
985 | /* | 993 | /* |
@@ -1125,7 +1133,7 @@ static int __init acpi_parse_madt_ioapic_entries(void) | |||
1125 | if (acpi_disabled || acpi_noirq) | 1133 | if (acpi_disabled || acpi_noirq) |
1126 | return -ENODEV; | 1134 | return -ENODEV; |
1127 | 1135 | ||
1128 | if (!cpu_has_apic) | 1136 | if (!boot_cpu_has(X86_FEATURE_APIC)) |
1129 | return -ENODEV; | 1137 | return -ENODEV; |
1130 | 1138 | ||
1131 | /* | 1139 | /* |
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 25f909362b7a..5cb272a7a5a3 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/stop_machine.h> | 11 | #include <linux/stop_machine.h> |
12 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
13 | #include <linux/kdebug.h> | 13 | #include <linux/kdebug.h> |
14 | #include <asm/text-patching.h> | ||
14 | #include <asm/alternative.h> | 15 | #include <asm/alternative.h> |
15 | #include <asm/sections.h> | 16 | #include <asm/sections.h> |
16 | #include <asm/pgtable.h> | 17 | #include <asm/pgtable.h> |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index d7867c885bf8..60078a67d7e3 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -607,7 +607,7 @@ static void __init lapic_cal_handler(struct clock_event_device *dev) | |||
607 | long tapic = apic_read(APIC_TMCCT); | 607 | long tapic = apic_read(APIC_TMCCT); |
608 | unsigned long pm = acpi_pm_read_early(); | 608 | unsigned long pm = acpi_pm_read_early(); |
609 | 609 | ||
610 | if (cpu_has_tsc) | 610 | if (boot_cpu_has(X86_FEATURE_TSC)) |
611 | tsc = rdtsc(); | 611 | tsc = rdtsc(); |
612 | 612 | ||
613 | switch (lapic_cal_loops++) { | 613 | switch (lapic_cal_loops++) { |
@@ -668,7 +668,7 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc) | |||
668 | *delta = (long)res; | 668 | *delta = (long)res; |
669 | 669 | ||
670 | /* Correct the tsc counter value */ | 670 | /* Correct the tsc counter value */ |
671 | if (cpu_has_tsc) { | 671 | if (boot_cpu_has(X86_FEATURE_TSC)) { |
672 | res = (((u64)(*deltatsc)) * pm_100ms); | 672 | res = (((u64)(*deltatsc)) * pm_100ms); |
673 | do_div(res, deltapm); | 673 | do_div(res, deltapm); |
674 | apic_printk(APIC_VERBOSE, "TSC delta adjusted to " | 674 | apic_printk(APIC_VERBOSE, "TSC delta adjusted to " |
@@ -760,7 +760,7 @@ static int __init calibrate_APIC_clock(void) | |||
760 | apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", | 760 | apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", |
761 | lapic_timer_frequency); | 761 | lapic_timer_frequency); |
762 | 762 | ||
763 | if (cpu_has_tsc) { | 763 | if (boot_cpu_has(X86_FEATURE_TSC)) { |
764 | apic_printk(APIC_VERBOSE, "..... CPU clock speed is " | 764 | apic_printk(APIC_VERBOSE, "..... CPU clock speed is " |
765 | "%ld.%04ld MHz.\n", | 765 | "%ld.%04ld MHz.\n", |
766 | (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ), | 766 | (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ), |
@@ -1085,7 +1085,7 @@ void lapic_shutdown(void) | |||
1085 | { | 1085 | { |
1086 | unsigned long flags; | 1086 | unsigned long flags; |
1087 | 1087 | ||
1088 | if (!cpu_has_apic && !apic_from_smp_config()) | 1088 | if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config()) |
1089 | return; | 1089 | return; |
1090 | 1090 | ||
1091 | local_irq_save(flags); | 1091 | local_irq_save(flags); |
@@ -1134,7 +1134,7 @@ void __init init_bsp_APIC(void) | |||
1134 | * Don't do the setup now if we have a SMP BIOS as the | 1134 | * Don't do the setup now if we have a SMP BIOS as the |
1135 | * through-I/O-APIC virtual wire mode might be active. | 1135 | * through-I/O-APIC virtual wire mode might be active. |
1136 | */ | 1136 | */ |
1137 | if (smp_found_config || !cpu_has_apic) | 1137 | if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC)) |
1138 | return; | 1138 | return; |
1139 | 1139 | ||
1140 | /* | 1140 | /* |
@@ -1227,7 +1227,7 @@ void setup_local_APIC(void) | |||
1227 | unsigned long long tsc = 0, ntsc; | 1227 | unsigned long long tsc = 0, ntsc; |
1228 | long long max_loops = cpu_khz ? cpu_khz : 1000000; | 1228 | long long max_loops = cpu_khz ? cpu_khz : 1000000; |
1229 | 1229 | ||
1230 | if (cpu_has_tsc) | 1230 | if (boot_cpu_has(X86_FEATURE_TSC)) |
1231 | tsc = rdtsc(); | 1231 | tsc = rdtsc(); |
1232 | 1232 | ||
1233 | if (disable_apic) { | 1233 | if (disable_apic) { |
@@ -1311,7 +1311,7 @@ void setup_local_APIC(void) | |||
1311 | break; | 1311 | break; |
1312 | } | 1312 | } |
1313 | if (queued) { | 1313 | if (queued) { |
1314 | if (cpu_has_tsc && cpu_khz) { | 1314 | if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) { |
1315 | ntsc = rdtsc(); | 1315 | ntsc = rdtsc(); |
1316 | max_loops = (cpu_khz << 10) - (ntsc - tsc); | 1316 | max_loops = (cpu_khz << 10) - (ntsc - tsc); |
1317 | } else | 1317 | } else |
@@ -1445,7 +1445,7 @@ static void __x2apic_disable(void) | |||
1445 | { | 1445 | { |
1446 | u64 msr; | 1446 | u64 msr; |
1447 | 1447 | ||
1448 | if (!cpu_has_apic) | 1448 | if (!boot_cpu_has(X86_FEATURE_APIC)) |
1449 | return; | 1449 | return; |
1450 | 1450 | ||
1451 | rdmsrl(MSR_IA32_APICBASE, msr); | 1451 | rdmsrl(MSR_IA32_APICBASE, msr); |
@@ -1632,7 +1632,7 @@ void __init enable_IR_x2apic(void) | |||
1632 | */ | 1632 | */ |
1633 | static int __init detect_init_APIC(void) | 1633 | static int __init detect_init_APIC(void) |
1634 | { | 1634 | { |
1635 | if (!cpu_has_apic) { | 1635 | if (!boot_cpu_has(X86_FEATURE_APIC)) { |
1636 | pr_info("No local APIC present\n"); | 1636 | pr_info("No local APIC present\n"); |
1637 | return -1; | 1637 | return -1; |
1638 | } | 1638 | } |
@@ -1711,14 +1711,14 @@ static int __init detect_init_APIC(void) | |||
1711 | goto no_apic; | 1711 | goto no_apic; |
1712 | case X86_VENDOR_INTEL: | 1712 | case X86_VENDOR_INTEL: |
1713 | if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 || | 1713 | if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 || |
1714 | (boot_cpu_data.x86 == 5 && cpu_has_apic)) | 1714 | (boot_cpu_data.x86 == 5 && boot_cpu_has(X86_FEATURE_APIC))) |
1715 | break; | 1715 | break; |
1716 | goto no_apic; | 1716 | goto no_apic; |
1717 | default: | 1717 | default: |
1718 | goto no_apic; | 1718 | goto no_apic; |
1719 | } | 1719 | } |
1720 | 1720 | ||
1721 | if (!cpu_has_apic) { | 1721 | if (!boot_cpu_has(X86_FEATURE_APIC)) { |
1722 | /* | 1722 | /* |
1723 | * Over-ride BIOS and try to enable the local APIC only if | 1723 | * Over-ride BIOS and try to enable the local APIC only if |
1724 | * "lapic" specified. | 1724 | * "lapic" specified. |
@@ -2233,19 +2233,19 @@ int __init APIC_init_uniprocessor(void) | |||
2233 | return -1; | 2233 | return -1; |
2234 | } | 2234 | } |
2235 | #ifdef CONFIG_X86_64 | 2235 | #ifdef CONFIG_X86_64 |
2236 | if (!cpu_has_apic) { | 2236 | if (!boot_cpu_has(X86_FEATURE_APIC)) { |
2237 | disable_apic = 1; | 2237 | disable_apic = 1; |
2238 | pr_info("Apic disabled by BIOS\n"); | 2238 | pr_info("Apic disabled by BIOS\n"); |
2239 | return -1; | 2239 | return -1; |
2240 | } | 2240 | } |
2241 | #else | 2241 | #else |
2242 | if (!smp_found_config && !cpu_has_apic) | 2242 | if (!smp_found_config && !boot_cpu_has(X86_FEATURE_APIC)) |
2243 | return -1; | 2243 | return -1; |
2244 | 2244 | ||
2245 | /* | 2245 | /* |
2246 | * Complain if the BIOS pretends there is one. | 2246 | * Complain if the BIOS pretends there is one. |
2247 | */ | 2247 | */ |
2248 | if (!cpu_has_apic && | 2248 | if (!boot_cpu_has(X86_FEATURE_APIC) && |
2249 | APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { | 2249 | APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { |
2250 | pr_err("BIOS bug, local APIC 0x%x not detected!...\n", | 2250 | pr_err("BIOS bug, local APIC 0x%x not detected!...\n", |
2251 | boot_cpu_physical_apicid); | 2251 | boot_cpu_physical_apicid); |
@@ -2426,7 +2426,7 @@ static void apic_pm_activate(void) | |||
2426 | static int __init init_lapic_sysfs(void) | 2426 | static int __init init_lapic_sysfs(void) |
2427 | { | 2427 | { |
2428 | /* XXX: remove suspend/resume procs if !apic_pm_state.active? */ | 2428 | /* XXX: remove suspend/resume procs if !apic_pm_state.active? */ |
2429 | if (cpu_has_apic) | 2429 | if (boot_cpu_has(X86_FEATURE_APIC)) |
2430 | register_syscore_ops(&lapic_syscore_ops); | 2430 | register_syscore_ops(&lapic_syscore_ops); |
2431 | 2431 | ||
2432 | return 0; | 2432 | return 0; |
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 331a7a07c48f..13d19ed58514 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c | |||
@@ -100,13 +100,13 @@ static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask, | |||
100 | 100 | ||
101 | static u32 noop_apic_read(u32 reg) | 101 | static u32 noop_apic_read(u32 reg) |
102 | { | 102 | { |
103 | WARN_ON_ONCE((cpu_has_apic && !disable_apic)); | 103 | WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic); |
104 | return 0; | 104 | return 0; |
105 | } | 105 | } |
106 | 106 | ||
107 | static void noop_apic_write(u32 reg, u32 v) | 107 | static void noop_apic_write(u32 reg, u32 v) |
108 | { | 108 | { |
109 | WARN_ON_ONCE(cpu_has_apic && !disable_apic); | 109 | WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic); |
110 | } | 110 | } |
111 | 111 | ||
112 | struct apic apic_noop = { | 112 | struct apic apic_noop = { |
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index 045e424fb368..7788ce643bf4 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c | |||
@@ -18,7 +18,6 @@ | |||
18 | #include <linux/nmi.h> | 18 | #include <linux/nmi.h> |
19 | #include <linux/module.h> | 19 | #include <linux/module.h> |
20 | #include <linux/delay.h> | 20 | #include <linux/delay.h> |
21 | #include <linux/seq_buf.h> | ||
22 | 21 | ||
23 | #ifdef CONFIG_HARDLOCKUP_DETECTOR | 22 | #ifdef CONFIG_HARDLOCKUP_DETECTOR |
24 | u64 hw_nmi_get_sample_period(int watchdog_thresh) | 23 | u64 hw_nmi_get_sample_period(int watchdog_thresh) |
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index fdb0fbfb1197..84e33ff5a6d5 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -1454,7 +1454,7 @@ void native_disable_io_apic(void) | |||
1454 | ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); | 1454 | ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); |
1455 | } | 1455 | } |
1456 | 1456 | ||
1457 | if (cpu_has_apic || apic_from_smp_config()) | 1457 | if (boot_cpu_has(X86_FEATURE_APIC) || apic_from_smp_config()) |
1458 | disconnect_bsp_APIC(ioapic_i8259.pin != -1); | 1458 | disconnect_bsp_APIC(ioapic_i8259.pin != -1); |
1459 | } | 1459 | } |
1460 | 1460 | ||
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index 28bde88b0085..2a0f225afebd 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c | |||
@@ -230,7 +230,7 @@ int safe_smp_processor_id(void) | |||
230 | { | 230 | { |
231 | int apicid, cpuid; | 231 | int apicid, cpuid; |
232 | 232 | ||
233 | if (!cpu_has_apic) | 233 | if (!boot_cpu_has(X86_FEATURE_APIC)) |
234 | return 0; | 234 | return 0; |
235 | 235 | ||
236 | apicid = hard_smp_processor_id(); | 236 | apicid = hard_smp_processor_id(); |
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index ef495511f019..a5e400afc563 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c | |||
@@ -944,7 +944,7 @@ static int __init print_ICs(void) | |||
944 | print_PIC(); | 944 | print_PIC(); |
945 | 945 | ||
946 | /* don't print out if apic is not there */ | 946 | /* don't print out if apic is not there */ |
947 | if (!cpu_has_apic && !apic_from_smp_config()) | 947 | if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config()) |
948 | return 0; | 948 | return 0; |
949 | 949 | ||
950 | print_local_APICs(show_lapic); | 950 | print_local_APICs(show_lapic); |
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 8f4942e2bcbb..29003154fafd 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -48,12 +48,35 @@ static u64 gru_start_paddr, gru_end_paddr; | |||
48 | static u64 gru_dist_base, gru_first_node_paddr = -1LL, gru_last_node_paddr; | 48 | static u64 gru_dist_base, gru_first_node_paddr = -1LL, gru_last_node_paddr; |
49 | static u64 gru_dist_lmask, gru_dist_umask; | 49 | static u64 gru_dist_lmask, gru_dist_umask; |
50 | static union uvh_apicid uvh_apicid; | 50 | static union uvh_apicid uvh_apicid; |
51 | |||
52 | /* info derived from CPUID */ | ||
53 | static struct { | ||
54 | unsigned int apicid_shift; | ||
55 | unsigned int apicid_mask; | ||
56 | unsigned int socketid_shift; /* aka pnode_shift for UV1/2/3 */ | ||
57 | unsigned int pnode_mask; | ||
58 | unsigned int gpa_shift; | ||
59 | } uv_cpuid; | ||
60 | |||
51 | int uv_min_hub_revision_id; | 61 | int uv_min_hub_revision_id; |
52 | EXPORT_SYMBOL_GPL(uv_min_hub_revision_id); | 62 | EXPORT_SYMBOL_GPL(uv_min_hub_revision_id); |
53 | unsigned int uv_apicid_hibits; | 63 | unsigned int uv_apicid_hibits; |
54 | EXPORT_SYMBOL_GPL(uv_apicid_hibits); | 64 | EXPORT_SYMBOL_GPL(uv_apicid_hibits); |
55 | 65 | ||
56 | static struct apic apic_x2apic_uv_x; | 66 | static struct apic apic_x2apic_uv_x; |
67 | static struct uv_hub_info_s uv_hub_info_node0; | ||
68 | |||
69 | /* Set this to use hardware error handler instead of kernel panic */ | ||
70 | static int disable_uv_undefined_panic = 1; | ||
71 | unsigned long uv_undefined(char *str) | ||
72 | { | ||
73 | if (likely(!disable_uv_undefined_panic)) | ||
74 | panic("UV: error: undefined MMR: %s\n", str); | ||
75 | else | ||
76 | pr_crit("UV: error: undefined MMR: %s\n", str); | ||
77 | return ~0ul; /* cause a machine fault */ | ||
78 | } | ||
79 | EXPORT_SYMBOL(uv_undefined); | ||
57 | 80 | ||
58 | static unsigned long __init uv_early_read_mmr(unsigned long addr) | 81 | static unsigned long __init uv_early_read_mmr(unsigned long addr) |
59 | { | 82 | { |
@@ -108,21 +131,71 @@ static int __init early_get_pnodeid(void) | |||
108 | case UV3_HUB_PART_NUMBER_X: | 131 | case UV3_HUB_PART_NUMBER_X: |
109 | uv_min_hub_revision_id += UV3_HUB_REVISION_BASE; | 132 | uv_min_hub_revision_id += UV3_HUB_REVISION_BASE; |
110 | break; | 133 | break; |
134 | case UV4_HUB_PART_NUMBER: | ||
135 | uv_min_hub_revision_id += UV4_HUB_REVISION_BASE - 1; | ||
136 | break; | ||
111 | } | 137 | } |
112 | 138 | ||
113 | uv_hub_info->hub_revision = uv_min_hub_revision_id; | 139 | uv_hub_info->hub_revision = uv_min_hub_revision_id; |
114 | pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1); | 140 | uv_cpuid.pnode_mask = (1 << m_n_config.s.n_skt) - 1; |
141 | pnode = (node_id.s.node_id >> 1) & uv_cpuid.pnode_mask; | ||
142 | uv_cpuid.gpa_shift = 46; /* default unless changed */ | ||
143 | |||
144 | pr_info("UV: rev:%d part#:%x nodeid:%04x n_skt:%d pnmsk:%x pn:%x\n", | ||
145 | node_id.s.revision, node_id.s.part_number, node_id.s.node_id, | ||
146 | m_n_config.s.n_skt, uv_cpuid.pnode_mask, pnode); | ||
115 | return pnode; | 147 | return pnode; |
116 | } | 148 | } |
117 | 149 | ||
118 | static void __init early_get_apic_pnode_shift(void) | 150 | /* [copied from arch/x86/kernel/cpu/topology.c:detect_extended_topology()] */ |
151 | #define SMT_LEVEL 0 /* leaf 0xb SMT level */ | ||
152 | #define INVALID_TYPE 0 /* leaf 0xb sub-leaf types */ | ||
153 | #define SMT_TYPE 1 | ||
154 | #define CORE_TYPE 2 | ||
155 | #define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff) | ||
156 | #define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f) | ||
157 | |||
158 | static void set_x2apic_bits(void) | ||
159 | { | ||
160 | unsigned int eax, ebx, ecx, edx, sub_index; | ||
161 | unsigned int sid_shift; | ||
162 | |||
163 | cpuid(0, &eax, &ebx, &ecx, &edx); | ||
164 | if (eax < 0xb) { | ||
165 | pr_info("UV: CPU does not have CPUID.11\n"); | ||
166 | return; | ||
167 | } | ||
168 | cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx); | ||
169 | if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE)) { | ||
170 | pr_info("UV: CPUID.11 not implemented\n"); | ||
171 | return; | ||
172 | } | ||
173 | sid_shift = BITS_SHIFT_NEXT_LEVEL(eax); | ||
174 | sub_index = 1; | ||
175 | do { | ||
176 | cpuid_count(0xb, sub_index, &eax, &ebx, &ecx, &edx); | ||
177 | if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) { | ||
178 | sid_shift = BITS_SHIFT_NEXT_LEVEL(eax); | ||
179 | break; | ||
180 | } | ||
181 | sub_index++; | ||
182 | } while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE); | ||
183 | uv_cpuid.apicid_shift = 0; | ||
184 | uv_cpuid.apicid_mask = (~(-1 << sid_shift)); | ||
185 | uv_cpuid.socketid_shift = sid_shift; | ||
186 | } | ||
187 | |||
188 | static void __init early_get_apic_socketid_shift(void) | ||
119 | { | 189 | { |
120 | uvh_apicid.v = uv_early_read_mmr(UVH_APICID); | 190 | if (is_uv2_hub() || is_uv3_hub()) |
121 | if (!uvh_apicid.v) | 191 | uvh_apicid.v = uv_early_read_mmr(UVH_APICID); |
122 | /* | 192 | |
123 | * Old bios, use default value | 193 | set_x2apic_bits(); |
124 | */ | 194 | |
125 | uvh_apicid.s.pnode_shift = UV_APIC_PNODE_SHIFT; | 195 | pr_info("UV: apicid_shift:%d apicid_mask:0x%x\n", |
196 | uv_cpuid.apicid_shift, uv_cpuid.apicid_mask); | ||
197 | pr_info("UV: socketid_shift:%d pnode_mask:0x%x\n", | ||
198 | uv_cpuid.socketid_shift, uv_cpuid.pnode_mask); | ||
126 | } | 199 | } |
127 | 200 | ||
128 | /* | 201 | /* |
@@ -150,13 +223,18 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
150 | if (strncmp(oem_id, "SGI", 3) != 0) | 223 | if (strncmp(oem_id, "SGI", 3) != 0) |
151 | return 0; | 224 | return 0; |
152 | 225 | ||
226 | /* Setup early hub type field in uv_hub_info for Node 0 */ | ||
227 | uv_cpu_info->p_uv_hub_info = &uv_hub_info_node0; | ||
228 | |||
153 | /* | 229 | /* |
154 | * Determine UV arch type. | 230 | * Determine UV arch type. |
155 | * SGI: UV100/1000 | 231 | * SGI: UV100/1000 |
156 | * SGI2: UV2000/3000 | 232 | * SGI2: UV2000/3000 |
157 | * SGI3: UV300 (truncated to 4 chars because of different varieties) | 233 | * SGI3: UV300 (truncated to 4 chars because of different varieties) |
234 | * SGI4: UV400 (truncated to 4 chars because of different varieties) | ||
158 | */ | 235 | */ |
159 | uv_hub_info->hub_revision = | 236 | uv_hub_info->hub_revision = |
237 | !strncmp(oem_id, "SGI4", 4) ? UV4_HUB_REVISION_BASE : | ||
160 | !strncmp(oem_id, "SGI3", 4) ? UV3_HUB_REVISION_BASE : | 238 | !strncmp(oem_id, "SGI3", 4) ? UV3_HUB_REVISION_BASE : |
161 | !strcmp(oem_id, "SGI2") ? UV2_HUB_REVISION_BASE : | 239 | !strcmp(oem_id, "SGI2") ? UV2_HUB_REVISION_BASE : |
162 | !strcmp(oem_id, "SGI") ? UV1_HUB_REVISION_BASE : 0; | 240 | !strcmp(oem_id, "SGI") ? UV1_HUB_REVISION_BASE : 0; |
@@ -165,7 +243,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
165 | goto badbios; | 243 | goto badbios; |
166 | 244 | ||
167 | pnodeid = early_get_pnodeid(); | 245 | pnodeid = early_get_pnodeid(); |
168 | early_get_apic_pnode_shift(); | 246 | early_get_apic_socketid_shift(); |
169 | x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; | 247 | x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; |
170 | x86_platform.nmi_init = uv_nmi_init; | 248 | x86_platform.nmi_init = uv_nmi_init; |
171 | 249 | ||
@@ -211,17 +289,11 @@ int is_uv_system(void) | |||
211 | } | 289 | } |
212 | EXPORT_SYMBOL_GPL(is_uv_system); | 290 | EXPORT_SYMBOL_GPL(is_uv_system); |
213 | 291 | ||
214 | DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); | 292 | void **__uv_hub_info_list; |
215 | EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info); | 293 | EXPORT_SYMBOL_GPL(__uv_hub_info_list); |
216 | |||
217 | struct uv_blade_info *uv_blade_info; | ||
218 | EXPORT_SYMBOL_GPL(uv_blade_info); | ||
219 | |||
220 | short *uv_node_to_blade; | ||
221 | EXPORT_SYMBOL_GPL(uv_node_to_blade); | ||
222 | 294 | ||
223 | short *uv_cpu_to_blade; | 295 | DEFINE_PER_CPU(struct uv_cpu_info_s, __uv_cpu_info); |
224 | EXPORT_SYMBOL_GPL(uv_cpu_to_blade); | 296 | EXPORT_PER_CPU_SYMBOL_GPL(__uv_cpu_info); |
225 | 297 | ||
226 | short uv_possible_blades; | 298 | short uv_possible_blades; |
227 | EXPORT_SYMBOL_GPL(uv_possible_blades); | 299 | EXPORT_SYMBOL_GPL(uv_possible_blades); |
@@ -229,6 +301,115 @@ EXPORT_SYMBOL_GPL(uv_possible_blades); | |||
229 | unsigned long sn_rtc_cycles_per_second; | 301 | unsigned long sn_rtc_cycles_per_second; |
230 | EXPORT_SYMBOL(sn_rtc_cycles_per_second); | 302 | EXPORT_SYMBOL(sn_rtc_cycles_per_second); |
231 | 303 | ||
304 | /* the following values are used for the per node hub info struct */ | ||
305 | static __initdata unsigned short *_node_to_pnode; | ||
306 | static __initdata unsigned short _min_socket, _max_socket; | ||
307 | static __initdata unsigned short _min_pnode, _max_pnode, _gr_table_len; | ||
308 | static __initdata struct uv_gam_range_entry *uv_gre_table; | ||
309 | static __initdata struct uv_gam_parameters *uv_gp_table; | ||
310 | static __initdata unsigned short *_socket_to_node; | ||
311 | static __initdata unsigned short *_socket_to_pnode; | ||
312 | static __initdata unsigned short *_pnode_to_socket; | ||
313 | static __initdata struct uv_gam_range_s *_gr_table; | ||
314 | #define SOCK_EMPTY ((unsigned short)~0) | ||
315 | |||
316 | extern int uv_hub_info_version(void) | ||
317 | { | ||
318 | return UV_HUB_INFO_VERSION; | ||
319 | } | ||
320 | EXPORT_SYMBOL(uv_hub_info_version); | ||
321 | |||
322 | /* Build GAM range lookup table */ | ||
323 | static __init void build_uv_gr_table(void) | ||
324 | { | ||
325 | struct uv_gam_range_entry *gre = uv_gre_table; | ||
326 | struct uv_gam_range_s *grt; | ||
327 | unsigned long last_limit = 0, ram_limit = 0; | ||
328 | int bytes, i, sid, lsid = -1; | ||
329 | |||
330 | if (!gre) | ||
331 | return; | ||
332 | |||
333 | bytes = _gr_table_len * sizeof(struct uv_gam_range_s); | ||
334 | grt = kzalloc(bytes, GFP_KERNEL); | ||
335 | BUG_ON(!grt); | ||
336 | _gr_table = grt; | ||
337 | |||
338 | for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) { | ||
339 | if (gre->type == UV_GAM_RANGE_TYPE_HOLE) { | ||
340 | if (!ram_limit) { /* mark hole between ram/non-ram */ | ||
341 | ram_limit = last_limit; | ||
342 | last_limit = gre->limit; | ||
343 | lsid++; | ||
344 | continue; | ||
345 | } | ||
346 | last_limit = gre->limit; | ||
347 | pr_info("UV: extra hole in GAM RE table @%d\n", | ||
348 | (int)(gre - uv_gre_table)); | ||
349 | continue; | ||
350 | } | ||
351 | if (_max_socket < gre->sockid) { | ||
352 | pr_err("UV: GAM table sockid(%d) too large(>%d) @%d\n", | ||
353 | gre->sockid, _max_socket, | ||
354 | (int)(gre - uv_gre_table)); | ||
355 | continue; | ||
356 | } | ||
357 | sid = gre->sockid - _min_socket; | ||
358 | if (lsid < sid) { /* new range */ | ||
359 | grt = &_gr_table[sid]; | ||
360 | grt->base = lsid; | ||
361 | grt->nasid = gre->nasid; | ||
362 | grt->limit = last_limit = gre->limit; | ||
363 | lsid = sid; | ||
364 | continue; | ||
365 | } | ||
366 | if (lsid == sid && !ram_limit) { /* update range */ | ||
367 | if (grt->limit == last_limit) { /* .. if contiguous */ | ||
368 | grt->limit = last_limit = gre->limit; | ||
369 | continue; | ||
370 | } | ||
371 | } | ||
372 | if (!ram_limit) { /* non-contiguous ram range */ | ||
373 | grt++; | ||
374 | grt->base = sid - 1; | ||
375 | grt->nasid = gre->nasid; | ||
376 | grt->limit = last_limit = gre->limit; | ||
377 | continue; | ||
378 | } | ||
379 | grt++; /* non-contiguous/non-ram */ | ||
380 | grt->base = grt - _gr_table; /* base is this entry */ | ||
381 | grt->nasid = gre->nasid; | ||
382 | grt->limit = last_limit = gre->limit; | ||
383 | lsid++; | ||
384 | } | ||
385 | |||
386 | /* shorten table if possible */ | ||
387 | grt++; | ||
388 | i = grt - _gr_table; | ||
389 | if (i < _gr_table_len) { | ||
390 | void *ret; | ||
391 | |||
392 | bytes = i * sizeof(struct uv_gam_range_s); | ||
393 | ret = krealloc(_gr_table, bytes, GFP_KERNEL); | ||
394 | if (ret) { | ||
395 | _gr_table = ret; | ||
396 | _gr_table_len = i; | ||
397 | } | ||
398 | } | ||
399 | |||
400 | /* display resultant gam range table */ | ||
401 | for (i = 0, grt = _gr_table; i < _gr_table_len; i++, grt++) { | ||
402 | int gb = grt->base; | ||
403 | unsigned long start = gb < 0 ? 0 : | ||
404 | (unsigned long)_gr_table[gb].limit << UV_GAM_RANGE_SHFT; | ||
405 | unsigned long end = | ||
406 | (unsigned long)grt->limit << UV_GAM_RANGE_SHFT; | ||
407 | |||
408 | pr_info("UV: GAM Range %2d %04x 0x%013lx-0x%013lx (%d)\n", | ||
409 | i, grt->nasid, start, end, gb); | ||
410 | } | ||
411 | } | ||
412 | |||
232 | static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) | 413 | static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) |
233 | { | 414 | { |
234 | unsigned long val; | 415 | unsigned long val; |
@@ -355,7 +536,6 @@ static unsigned long set_apic_id(unsigned int id) | |||
355 | 536 | ||
356 | static unsigned int uv_read_apic_id(void) | 537 | static unsigned int uv_read_apic_id(void) |
357 | { | 538 | { |
358 | |||
359 | return x2apic_get_apic_id(apic_read(APIC_ID)); | 539 | return x2apic_get_apic_id(apic_read(APIC_ID)); |
360 | } | 540 | } |
361 | 541 | ||
@@ -430,58 +610,38 @@ static void set_x2apic_extra_bits(int pnode) | |||
430 | __this_cpu_write(x2apic_extra_bits, pnode << uvh_apicid.s.pnode_shift); | 610 | __this_cpu_write(x2apic_extra_bits, pnode << uvh_apicid.s.pnode_shift); |
431 | } | 611 | } |
432 | 612 | ||
433 | /* | 613 | #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_LENGTH 3 |
434 | * Called on boot cpu. | ||
435 | */ | ||
436 | static __init int boot_pnode_to_blade(int pnode) | ||
437 | { | ||
438 | int blade; | ||
439 | |||
440 | for (blade = 0; blade < uv_num_possible_blades(); blade++) | ||
441 | if (pnode == uv_blade_info[blade].pnode) | ||
442 | return blade; | ||
443 | BUG(); | ||
444 | } | ||
445 | |||
446 | struct redir_addr { | ||
447 | unsigned long redirect; | ||
448 | unsigned long alias; | ||
449 | }; | ||
450 | |||
451 | #define DEST_SHIFT UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT | 614 | #define DEST_SHIFT UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT |
452 | 615 | ||
453 | static __initdata struct redir_addr redir_addrs[] = { | ||
454 | {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR}, | ||
455 | {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR}, | ||
456 | {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR}, | ||
457 | }; | ||
458 | |||
459 | static unsigned char get_n_lshift(int m_val) | ||
460 | { | ||
461 | union uv3h_gr0_gam_gr_config_u m_gr_config; | ||
462 | |||
463 | if (is_uv1_hub()) | ||
464 | return m_val; | ||
465 | |||
466 | if (is_uv2_hub()) | ||
467 | return m_val == 40 ? 40 : 39; | ||
468 | |||
469 | m_gr_config.v = uv_read_local_mmr(UV3H_GR0_GAM_GR_CONFIG); | ||
470 | return m_gr_config.s3.m_skt; | ||
471 | } | ||
472 | |||
473 | static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size) | 616 | static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size) |
474 | { | 617 | { |
475 | union uvh_rh_gam_alias210_overlay_config_2_mmr_u alias; | 618 | union uvh_rh_gam_alias210_overlay_config_2_mmr_u alias; |
476 | union uvh_rh_gam_alias210_redirect_config_2_mmr_u redirect; | 619 | union uvh_rh_gam_alias210_redirect_config_2_mmr_u redirect; |
620 | unsigned long m_redirect; | ||
621 | unsigned long m_overlay; | ||
477 | int i; | 622 | int i; |
478 | 623 | ||
479 | for (i = 0; i < ARRAY_SIZE(redir_addrs); i++) { | 624 | for (i = 0; i < UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_LENGTH; i++) { |
480 | alias.v = uv_read_local_mmr(redir_addrs[i].alias); | 625 | switch (i) { |
626 | case 0: | ||
627 | m_redirect = UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR; | ||
628 | m_overlay = UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR; | ||
629 | break; | ||
630 | case 1: | ||
631 | m_redirect = UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR; | ||
632 | m_overlay = UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR; | ||
633 | break; | ||
634 | case 2: | ||
635 | m_redirect = UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR; | ||
636 | m_overlay = UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR; | ||
637 | break; | ||
638 | } | ||
639 | alias.v = uv_read_local_mmr(m_overlay); | ||
481 | if (alias.s.enable && alias.s.base == 0) { | 640 | if (alias.s.enable && alias.s.base == 0) { |
482 | *size = (1UL << alias.s.m_alias); | 641 | *size = (1UL << alias.s.m_alias); |
483 | redirect.v = uv_read_local_mmr(redir_addrs[i].redirect); | 642 | redirect.v = uv_read_local_mmr(m_redirect); |
484 | *base = (unsigned long)redirect.s.dest_base << DEST_SHIFT; | 643 | *base = (unsigned long)redirect.s.dest_base |
644 | << DEST_SHIFT; | ||
485 | return; | 645 | return; |
486 | } | 646 | } |
487 | } | 647 | } |
@@ -544,6 +704,8 @@ static __init void map_gru_high(int max_pnode) | |||
544 | { | 704 | { |
545 | union uvh_rh_gam_gru_overlay_config_mmr_u gru; | 705 | union uvh_rh_gam_gru_overlay_config_mmr_u gru; |
546 | int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT; | 706 | int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT; |
707 | unsigned long mask = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK; | ||
708 | unsigned long base; | ||
547 | 709 | ||
548 | gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR); | 710 | gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR); |
549 | if (!gru.s.enable) { | 711 | if (!gru.s.enable) { |
@@ -555,8 +717,9 @@ static __init void map_gru_high(int max_pnode) | |||
555 | map_gru_distributed(gru.v); | 717 | map_gru_distributed(gru.v); |
556 | return; | 718 | return; |
557 | } | 719 | } |
558 | map_high("GRU", gru.s.base, shift, shift, max_pnode, map_wb); | 720 | base = (gru.v & mask) >> shift; |
559 | gru_start_paddr = ((u64)gru.s.base << shift); | 721 | map_high("GRU", base, shift, shift, max_pnode, map_wb); |
722 | gru_start_paddr = ((u64)base << shift); | ||
560 | gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1); | 723 | gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1); |
561 | } | 724 | } |
562 | 725 | ||
@@ -595,6 +758,7 @@ static __initdata struct mmioh_config mmiohs[] = { | |||
595 | }, | 758 | }, |
596 | }; | 759 | }; |
597 | 760 | ||
761 | /* UV3 & UV4 have identical MMIOH overlay configs */ | ||
598 | static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode) | 762 | static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode) |
599 | { | 763 | { |
600 | union uv3h_rh_gam_mmioh_overlay_config0_mmr_u overlay; | 764 | union uv3h_rh_gam_mmioh_overlay_config0_mmr_u overlay; |
@@ -674,7 +838,7 @@ static __init void map_mmioh_high(int min_pnode, int max_pnode) | |||
674 | unsigned long mmr, base; | 838 | unsigned long mmr, base; |
675 | int shift, enable, m_io, n_io; | 839 | int shift, enable, m_io, n_io; |
676 | 840 | ||
677 | if (is_uv3_hub()) { | 841 | if (is_uv3_hub() || is_uv4_hub()) { |
678 | /* Map both MMIOH Regions */ | 842 | /* Map both MMIOH Regions */ |
679 | map_mmioh_high_uv3(0, min_pnode, max_pnode); | 843 | map_mmioh_high_uv3(0, min_pnode, max_pnode); |
680 | map_mmioh_high_uv3(1, min_pnode, max_pnode); | 844 | map_mmioh_high_uv3(1, min_pnode, max_pnode); |
@@ -739,8 +903,8 @@ static __init void uv_rtc_init(void) | |||
739 | */ | 903 | */ |
740 | static void uv_heartbeat(unsigned long ignored) | 904 | static void uv_heartbeat(unsigned long ignored) |
741 | { | 905 | { |
742 | struct timer_list *timer = &uv_hub_info->scir.timer; | 906 | struct timer_list *timer = &uv_scir_info->timer; |
743 | unsigned char bits = uv_hub_info->scir.state; | 907 | unsigned char bits = uv_scir_info->state; |
744 | 908 | ||
745 | /* flip heartbeat bit */ | 909 | /* flip heartbeat bit */ |
746 | bits ^= SCIR_CPU_HEARTBEAT; | 910 | bits ^= SCIR_CPU_HEARTBEAT; |
@@ -760,14 +924,14 @@ static void uv_heartbeat(unsigned long ignored) | |||
760 | 924 | ||
761 | static void uv_heartbeat_enable(int cpu) | 925 | static void uv_heartbeat_enable(int cpu) |
762 | { | 926 | { |
763 | while (!uv_cpu_hub_info(cpu)->scir.enabled) { | 927 | while (!uv_cpu_scir_info(cpu)->enabled) { |
764 | struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer; | 928 | struct timer_list *timer = &uv_cpu_scir_info(cpu)->timer; |
765 | 929 | ||
766 | uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY); | 930 | uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY); |
767 | setup_timer(timer, uv_heartbeat, cpu); | 931 | setup_timer(timer, uv_heartbeat, cpu); |
768 | timer->expires = jiffies + SCIR_CPU_HB_INTERVAL; | 932 | timer->expires = jiffies + SCIR_CPU_HB_INTERVAL; |
769 | add_timer_on(timer, cpu); | 933 | add_timer_on(timer, cpu); |
770 | uv_cpu_hub_info(cpu)->scir.enabled = 1; | 934 | uv_cpu_scir_info(cpu)->enabled = 1; |
771 | 935 | ||
772 | /* also ensure that boot cpu is enabled */ | 936 | /* also ensure that boot cpu is enabled */ |
773 | cpu = 0; | 937 | cpu = 0; |
@@ -777,9 +941,9 @@ static void uv_heartbeat_enable(int cpu) | |||
777 | #ifdef CONFIG_HOTPLUG_CPU | 941 | #ifdef CONFIG_HOTPLUG_CPU |
778 | static void uv_heartbeat_disable(int cpu) | 942 | static void uv_heartbeat_disable(int cpu) |
779 | { | 943 | { |
780 | if (uv_cpu_hub_info(cpu)->scir.enabled) { | 944 | if (uv_cpu_scir_info(cpu)->enabled) { |
781 | uv_cpu_hub_info(cpu)->scir.enabled = 0; | 945 | uv_cpu_scir_info(cpu)->enabled = 0; |
782 | del_timer(&uv_cpu_hub_info(cpu)->scir.timer); | 946 | del_timer(&uv_cpu_scir_info(cpu)->timer); |
783 | } | 947 | } |
784 | uv_set_cpu_scir_bits(cpu, 0xff); | 948 | uv_set_cpu_scir_bits(cpu, 0xff); |
785 | } | 949 | } |
@@ -862,157 +1026,475 @@ int uv_set_vga_state(struct pci_dev *pdev, bool decode, | |||
862 | void uv_cpu_init(void) | 1026 | void uv_cpu_init(void) |
863 | { | 1027 | { |
864 | /* CPU 0 initialization will be done via uv_system_init. */ | 1028 | /* CPU 0 initialization will be done via uv_system_init. */ |
865 | if (!uv_blade_info) | 1029 | if (smp_processor_id() == 0) |
866 | return; | 1030 | return; |
867 | 1031 | ||
868 | uv_blade_info[uv_numa_blade_id()].nr_online_cpus++; | 1032 | uv_hub_info->nr_online_cpus++; |
869 | 1033 | ||
870 | if (get_uv_system_type() == UV_NON_UNIQUE_APIC) | 1034 | if (get_uv_system_type() == UV_NON_UNIQUE_APIC) |
871 | set_x2apic_extra_bits(uv_hub_info->pnode); | 1035 | set_x2apic_extra_bits(uv_hub_info->pnode); |
872 | } | 1036 | } |
873 | 1037 | ||
874 | void __init uv_system_init(void) | 1038 | struct mn { |
1039 | unsigned char m_val; | ||
1040 | unsigned char n_val; | ||
1041 | unsigned char m_shift; | ||
1042 | unsigned char n_lshift; | ||
1043 | }; | ||
1044 | |||
1045 | static void get_mn(struct mn *mnp) | ||
875 | { | 1046 | { |
876 | union uvh_rh_gam_config_mmr_u m_n_config; | 1047 | union uvh_rh_gam_config_mmr_u m_n_config; |
877 | union uvh_node_id_u node_id; | 1048 | union uv3h_gr0_gam_gr_config_u m_gr_config; |
878 | unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size; | ||
879 | int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val; | ||
880 | int gnode_extra, min_pnode = 999999, max_pnode = -1; | ||
881 | unsigned long mmr_base, present, paddr; | ||
882 | unsigned short pnode_mask; | ||
883 | unsigned char n_lshift; | ||
884 | char *hub = (is_uv1_hub() ? "UV100/1000" : | ||
885 | (is_uv2_hub() ? "UV2000/3000" : | ||
886 | (is_uv3_hub() ? "UV300" : NULL))); | ||
887 | 1049 | ||
888 | if (!hub) { | 1050 | m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR); |
889 | pr_err("UV: Unknown/unsupported UV hub\n"); | 1051 | mnp->n_val = m_n_config.s.n_skt; |
890 | return; | 1052 | if (is_uv4_hub()) { |
1053 | mnp->m_val = 0; | ||
1054 | mnp->n_lshift = 0; | ||
1055 | } else if (is_uv3_hub()) { | ||
1056 | mnp->m_val = m_n_config.s3.m_skt; | ||
1057 | m_gr_config.v = uv_read_local_mmr(UV3H_GR0_GAM_GR_CONFIG); | ||
1058 | mnp->n_lshift = m_gr_config.s3.m_skt; | ||
1059 | } else if (is_uv2_hub()) { | ||
1060 | mnp->m_val = m_n_config.s2.m_skt; | ||
1061 | mnp->n_lshift = mnp->m_val == 40 ? 40 : 39; | ||
1062 | } else if (is_uv1_hub()) { | ||
1063 | mnp->m_val = m_n_config.s1.m_skt; | ||
1064 | mnp->n_lshift = mnp->m_val; | ||
891 | } | 1065 | } |
892 | pr_info("UV: Found %s hub\n", hub); | 1066 | mnp->m_shift = mnp->m_val ? 64 - mnp->m_val : 0; |
1067 | } | ||
893 | 1068 | ||
894 | /* We now only need to map the MMRs on UV1 */ | 1069 | void __init uv_init_hub_info(struct uv_hub_info_s *hub_info) |
895 | if (is_uv1_hub()) | 1070 | { |
896 | map_low_mmrs(); | 1071 | struct mn mn = {0}; /* avoid unitialized warnings */ |
1072 | union uvh_node_id_u node_id; | ||
897 | 1073 | ||
898 | m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR ); | 1074 | get_mn(&mn); |
899 | m_val = m_n_config.s.m_skt; | 1075 | hub_info->m_val = mn.m_val; |
900 | n_val = m_n_config.s.n_skt; | 1076 | hub_info->n_val = mn.n_val; |
901 | pnode_mask = (1 << n_val) - 1; | 1077 | hub_info->m_shift = mn.m_shift; |
902 | n_lshift = get_n_lshift(m_val); | 1078 | hub_info->n_lshift = mn.n_lshift ? mn.n_lshift : 0; |
903 | mmr_base = | 1079 | |
904 | uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & | 1080 | hub_info->hub_revision = uv_hub_info->hub_revision; |
905 | ~UV_MMR_ENABLE; | 1081 | hub_info->pnode_mask = uv_cpuid.pnode_mask; |
1082 | hub_info->min_pnode = _min_pnode; | ||
1083 | hub_info->min_socket = _min_socket; | ||
1084 | hub_info->pnode_to_socket = _pnode_to_socket; | ||
1085 | hub_info->socket_to_node = _socket_to_node; | ||
1086 | hub_info->socket_to_pnode = _socket_to_pnode; | ||
1087 | hub_info->gr_table_len = _gr_table_len; | ||
1088 | hub_info->gr_table = _gr_table; | ||
1089 | hub_info->gpa_mask = mn.m_val ? | ||
1090 | (1UL << (mn.m_val + mn.n_val)) - 1 : | ||
1091 | (1UL << uv_cpuid.gpa_shift) - 1; | ||
906 | 1092 | ||
907 | node_id.v = uv_read_local_mmr(UVH_NODE_ID); | 1093 | node_id.v = uv_read_local_mmr(UVH_NODE_ID); |
908 | gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1; | 1094 | hub_info->gnode_extra = |
909 | gnode_upper = ((unsigned long)gnode_extra << m_val); | 1095 | (node_id.s.node_id & ~((1 << mn.n_val) - 1)) >> 1; |
910 | pr_info("UV: N:%d M:%d pnode_mask:0x%x gnode_upper/extra:0x%lx/0x%x n_lshift 0x%x\n", | 1096 | |
911 | n_val, m_val, pnode_mask, gnode_upper, gnode_extra, | 1097 | hub_info->gnode_upper = |
912 | n_lshift); | 1098 | ((unsigned long)hub_info->gnode_extra << mn.m_val); |
1099 | |||
1100 | if (uv_gp_table) { | ||
1101 | hub_info->global_mmr_base = uv_gp_table->mmr_base; | ||
1102 | hub_info->global_mmr_shift = uv_gp_table->mmr_shift; | ||
1103 | hub_info->global_gru_base = uv_gp_table->gru_base; | ||
1104 | hub_info->global_gru_shift = uv_gp_table->gru_shift; | ||
1105 | hub_info->gpa_shift = uv_gp_table->gpa_shift; | ||
1106 | hub_info->gpa_mask = (1UL << hub_info->gpa_shift) - 1; | ||
1107 | } else { | ||
1108 | hub_info->global_mmr_base = | ||
1109 | uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & | ||
1110 | ~UV_MMR_ENABLE; | ||
1111 | hub_info->global_mmr_shift = _UV_GLOBAL_MMR64_PNODE_SHIFT; | ||
1112 | } | ||
913 | 1113 | ||
914 | pr_info("UV: global MMR base 0x%lx\n", mmr_base); | 1114 | get_lowmem_redirect( |
1115 | &hub_info->lowmem_remap_base, &hub_info->lowmem_remap_top); | ||
915 | 1116 | ||
916 | for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) | 1117 | hub_info->apic_pnode_shift = uv_cpuid.socketid_shift; |
917 | uv_possible_blades += | ||
918 | hweight64(uv_read_local_mmr( UVH_NODE_PRESENT_TABLE + i * 8)); | ||
919 | 1118 | ||
920 | /* uv_num_possible_blades() is really the hub count */ | 1119 | /* show system specific info */ |
921 | pr_info("UV: Found %d blades, %d hubs\n", | 1120 | pr_info("UV: N:%d M:%d m_shift:%d n_lshift:%d\n", |
922 | is_uv1_hub() ? uv_num_possible_blades() : | 1121 | hub_info->n_val, hub_info->m_val, |
923 | (uv_num_possible_blades() + 1) / 2, | 1122 | hub_info->m_shift, hub_info->n_lshift); |
924 | uv_num_possible_blades()); | ||
925 | 1123 | ||
926 | bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); | 1124 | pr_info("UV: gpa_mask/shift:0x%lx/%d pnode_mask:0x%x apic_pns:%d\n", |
927 | uv_blade_info = kzalloc(bytes, GFP_KERNEL); | 1125 | hub_info->gpa_mask, hub_info->gpa_shift, |
928 | BUG_ON(!uv_blade_info); | 1126 | hub_info->pnode_mask, hub_info->apic_pnode_shift); |
929 | 1127 | ||
930 | for (blade = 0; blade < uv_num_possible_blades(); blade++) | 1128 | pr_info("UV: mmr_base/shift:0x%lx/%ld gru_base/shift:0x%lx/%ld\n", |
931 | uv_blade_info[blade].memory_nid = -1; | 1129 | hub_info->global_mmr_base, hub_info->global_mmr_shift, |
1130 | hub_info->global_gru_base, hub_info->global_gru_shift); | ||
932 | 1131 | ||
933 | get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size); | 1132 | pr_info("UV: gnode_upper:0x%lx gnode_extra:0x%x\n", |
1133 | hub_info->gnode_upper, hub_info->gnode_extra); | ||
1134 | } | ||
1135 | |||
1136 | static void __init decode_gam_params(unsigned long ptr) | ||
1137 | { | ||
1138 | uv_gp_table = (struct uv_gam_parameters *)ptr; | ||
1139 | |||
1140 | pr_info("UV: GAM Params...\n"); | ||
1141 | pr_info("UV: mmr_base/shift:0x%llx/%d gru_base/shift:0x%llx/%d gpa_shift:%d\n", | ||
1142 | uv_gp_table->mmr_base, uv_gp_table->mmr_shift, | ||
1143 | uv_gp_table->gru_base, uv_gp_table->gru_shift, | ||
1144 | uv_gp_table->gpa_shift); | ||
1145 | } | ||
1146 | |||
1147 | static void __init decode_gam_rng_tbl(unsigned long ptr) | ||
1148 | { | ||
1149 | struct uv_gam_range_entry *gre = (struct uv_gam_range_entry *)ptr; | ||
1150 | unsigned long lgre = 0; | ||
1151 | int index = 0; | ||
1152 | int sock_min = 999999, pnode_min = 99999; | ||
1153 | int sock_max = -1, pnode_max = -1; | ||
1154 | |||
1155 | uv_gre_table = gre; | ||
1156 | for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) { | ||
1157 | if (!index) { | ||
1158 | pr_info("UV: GAM Range Table...\n"); | ||
1159 | pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s %3s\n", | ||
1160 | "Range", "", "Size", "Type", "NASID", | ||
1161 | "SID", "PN", "PXM"); | ||
1162 | } | ||
1163 | pr_info( | ||
1164 | "UV: %2d: 0x%014lx-0x%014lx %5luG %3d %04x %02x %02x %3d\n", | ||
1165 | index++, | ||
1166 | (unsigned long)lgre << UV_GAM_RANGE_SHFT, | ||
1167 | (unsigned long)gre->limit << UV_GAM_RANGE_SHFT, | ||
1168 | ((unsigned long)(gre->limit - lgre)) >> | ||
1169 | (30 - UV_GAM_RANGE_SHFT), /* 64M -> 1G */ | ||
1170 | gre->type, gre->nasid, gre->sockid, | ||
1171 | gre->pnode, gre->pxm); | ||
1172 | |||
1173 | lgre = gre->limit; | ||
1174 | if (sock_min > gre->sockid) | ||
1175 | sock_min = gre->sockid; | ||
1176 | if (sock_max < gre->sockid) | ||
1177 | sock_max = gre->sockid; | ||
1178 | if (pnode_min > gre->pnode) | ||
1179 | pnode_min = gre->pnode; | ||
1180 | if (pnode_max < gre->pnode) | ||
1181 | pnode_max = gre->pnode; | ||
1182 | } | ||
1183 | _min_socket = sock_min; | ||
1184 | _max_socket = sock_max; | ||
1185 | _min_pnode = pnode_min; | ||
1186 | _max_pnode = pnode_max; | ||
1187 | _gr_table_len = index; | ||
1188 | pr_info( | ||
1189 | "UV: GRT: %d entries, sockets(min:%x,max:%x) pnodes(min:%x,max:%x)\n", | ||
1190 | index, _min_socket, _max_socket, _min_pnode, _max_pnode); | ||
1191 | } | ||
1192 | |||
1193 | static void __init decode_uv_systab(void) | ||
1194 | { | ||
1195 | struct uv_systab *st; | ||
1196 | int i; | ||
1197 | |||
1198 | st = uv_systab; | ||
1199 | if ((!st || st->revision < UV_SYSTAB_VERSION_UV4) && !is_uv4_hub()) | ||
1200 | return; | ||
1201 | if (st->revision != UV_SYSTAB_VERSION_UV4_LATEST) { | ||
1202 | pr_crit( | ||
1203 | "UV: BIOS UVsystab version(%x) mismatch, expecting(%x)\n", | ||
1204 | st->revision, UV_SYSTAB_VERSION_UV4_LATEST); | ||
1205 | BUG(); | ||
1206 | } | ||
1207 | |||
1208 | for (i = 0; st->entry[i].type != UV_SYSTAB_TYPE_UNUSED; i++) { | ||
1209 | unsigned long ptr = st->entry[i].offset; | ||
934 | 1210 | ||
935 | bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes(); | 1211 | if (!ptr) |
936 | uv_node_to_blade = kmalloc(bytes, GFP_KERNEL); | 1212 | continue; |
937 | BUG_ON(!uv_node_to_blade); | 1213 | |
938 | memset(uv_node_to_blade, 255, bytes); | 1214 | ptr = ptr + (unsigned long)st; |
939 | 1215 | ||
940 | bytes = sizeof(uv_cpu_to_blade[0]) * num_possible_cpus(); | 1216 | switch (st->entry[i].type) { |
941 | uv_cpu_to_blade = kmalloc(bytes, GFP_KERNEL); | 1217 | case UV_SYSTAB_TYPE_GAM_PARAMS: |
942 | BUG_ON(!uv_cpu_to_blade); | 1218 | decode_gam_params(ptr); |
943 | memset(uv_cpu_to_blade, 255, bytes); | 1219 | break; |
944 | 1220 | ||
945 | blade = 0; | 1221 | case UV_SYSTAB_TYPE_GAM_RNG_TBL: |
1222 | decode_gam_rng_tbl(ptr); | ||
1223 | break; | ||
1224 | } | ||
1225 | } | ||
1226 | } | ||
1227 | |||
1228 | /* | ||
1229 | * Setup physical blade translations from UVH_NODE_PRESENT_TABLE | ||
1230 | * .. NB: UVH_NODE_PRESENT_TABLE is going away, | ||
1231 | * .. being replaced by GAM Range Table | ||
1232 | */ | ||
1233 | static __init void boot_init_possible_blades(struct uv_hub_info_s *hub_info) | ||
1234 | { | ||
1235 | int i, uv_pb = 0; | ||
1236 | |||
1237 | pr_info("UV: NODE_PRESENT_DEPTH = %d\n", UVH_NODE_PRESENT_TABLE_DEPTH); | ||
946 | for (i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) { | 1238 | for (i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) { |
947 | present = uv_read_local_mmr(UVH_NODE_PRESENT_TABLE + i * 8); | 1239 | unsigned long np; |
948 | for (j = 0; j < 64; j++) { | 1240 | |
949 | if (!test_bit(j, &present)) | 1241 | np = uv_read_local_mmr(UVH_NODE_PRESENT_TABLE + i * 8); |
950 | continue; | 1242 | if (np) |
951 | pnode = (i * 64 + j) & pnode_mask; | 1243 | pr_info("UV: NODE_PRESENT(%d) = 0x%016lx\n", i, np); |
952 | uv_blade_info[blade].pnode = pnode; | 1244 | |
953 | uv_blade_info[blade].nr_possible_cpus = 0; | 1245 | uv_pb += hweight64(np); |
954 | uv_blade_info[blade].nr_online_cpus = 0; | 1246 | } |
955 | spin_lock_init(&uv_blade_info[blade].nmi_lock); | 1247 | if (uv_possible_blades != uv_pb) |
956 | min_pnode = min(pnode, min_pnode); | 1248 | uv_possible_blades = uv_pb; |
957 | max_pnode = max(pnode, max_pnode); | 1249 | } |
958 | blade++; | 1250 | |
1251 | static void __init build_socket_tables(void) | ||
1252 | { | ||
1253 | struct uv_gam_range_entry *gre = uv_gre_table; | ||
1254 | int num, nump; | ||
1255 | int cpu, i, lnid; | ||
1256 | int minsock = _min_socket; | ||
1257 | int maxsock = _max_socket; | ||
1258 | int minpnode = _min_pnode; | ||
1259 | int maxpnode = _max_pnode; | ||
1260 | size_t bytes; | ||
1261 | |||
1262 | if (!gre) { | ||
1263 | if (is_uv1_hub() || is_uv2_hub() || is_uv3_hub()) { | ||
1264 | pr_info("UV: No UVsystab socket table, ignoring\n"); | ||
1265 | return; /* not required */ | ||
1266 | } | ||
1267 | pr_crit( | ||
1268 | "UV: Error: UVsystab address translations not available!\n"); | ||
1269 | BUG(); | ||
1270 | } | ||
1271 | |||
1272 | /* build socket id -> node id, pnode */ | ||
1273 | num = maxsock - minsock + 1; | ||
1274 | bytes = num * sizeof(_socket_to_node[0]); | ||
1275 | _socket_to_node = kmalloc(bytes, GFP_KERNEL); | ||
1276 | _socket_to_pnode = kmalloc(bytes, GFP_KERNEL); | ||
1277 | |||
1278 | nump = maxpnode - minpnode + 1; | ||
1279 | bytes = nump * sizeof(_pnode_to_socket[0]); | ||
1280 | _pnode_to_socket = kmalloc(bytes, GFP_KERNEL); | ||
1281 | BUG_ON(!_socket_to_node || !_socket_to_pnode || !_pnode_to_socket); | ||
1282 | |||
1283 | for (i = 0; i < num; i++) | ||
1284 | _socket_to_node[i] = _socket_to_pnode[i] = SOCK_EMPTY; | ||
1285 | |||
1286 | for (i = 0; i < nump; i++) | ||
1287 | _pnode_to_socket[i] = SOCK_EMPTY; | ||
1288 | |||
1289 | /* fill in pnode/node/addr conversion list values */ | ||
1290 | pr_info("UV: GAM Building socket/pnode/pxm conversion tables\n"); | ||
1291 | for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) { | ||
1292 | if (gre->type == UV_GAM_RANGE_TYPE_HOLE) | ||
1293 | continue; | ||
1294 | i = gre->sockid - minsock; | ||
1295 | if (_socket_to_pnode[i] != SOCK_EMPTY) | ||
1296 | continue; /* duplicate */ | ||
1297 | _socket_to_pnode[i] = gre->pnode; | ||
1298 | _socket_to_node[i] = gre->pxm; | ||
1299 | |||
1300 | i = gre->pnode - minpnode; | ||
1301 | _pnode_to_socket[i] = gre->sockid; | ||
1302 | |||
1303 | pr_info( | ||
1304 | "UV: sid:%02x type:%d nasid:%04x pn:%02x pxm:%2d pn2s:%2x\n", | ||
1305 | gre->sockid, gre->type, gre->nasid, | ||
1306 | _socket_to_pnode[gre->sockid - minsock], | ||
1307 | _socket_to_node[gre->sockid - minsock], | ||
1308 | _pnode_to_socket[gre->pnode - minpnode]); | ||
1309 | } | ||
1310 | |||
1311 | /* check socket -> node values */ | ||
1312 | lnid = -1; | ||
1313 | for_each_present_cpu(cpu) { | ||
1314 | int nid = cpu_to_node(cpu); | ||
1315 | int apicid, sockid; | ||
1316 | |||
1317 | if (lnid == nid) | ||
1318 | continue; | ||
1319 | lnid = nid; | ||
1320 | apicid = per_cpu(x86_cpu_to_apicid, cpu); | ||
1321 | sockid = apicid >> uv_cpuid.socketid_shift; | ||
1322 | i = sockid - minsock; | ||
1323 | |||
1324 | if (nid != _socket_to_node[i]) { | ||
1325 | pr_warn( | ||
1326 | "UV: %02x: type:%d socket:%02x PXM:%02x != node:%2d\n", | ||
1327 | i, sockid, gre->type, _socket_to_node[i], nid); | ||
1328 | _socket_to_node[i] = nid; | ||
1329 | } | ||
1330 | } | ||
1331 | |||
1332 | /* Setup physical blade to pnode translation from GAM Range Table */ | ||
1333 | bytes = num_possible_nodes() * sizeof(_node_to_pnode[0]); | ||
1334 | _node_to_pnode = kmalloc(bytes, GFP_KERNEL); | ||
1335 | BUG_ON(!_node_to_pnode); | ||
1336 | |||
1337 | for (lnid = 0; lnid < num_possible_nodes(); lnid++) { | ||
1338 | unsigned short sockid; | ||
1339 | |||
1340 | for (sockid = minsock; sockid <= maxsock; sockid++) { | ||
1341 | if (lnid == _socket_to_node[sockid - minsock]) { | ||
1342 | _node_to_pnode[lnid] = | ||
1343 | _socket_to_pnode[sockid - minsock]; | ||
1344 | break; | ||
1345 | } | ||
1346 | } | ||
1347 | if (sockid > maxsock) { | ||
1348 | pr_err("UV: socket for node %d not found!\n", lnid); | ||
1349 | BUG(); | ||
1350 | } | ||
1351 | } | ||
1352 | |||
1353 | /* | ||
1354 | * If socket id == pnode or socket id == node for all nodes, | ||
1355 | * system runs faster by removing corresponding conversion table. | ||
1356 | */ | ||
1357 | pr_info("UV: Checking socket->node/pnode for identity maps\n"); | ||
1358 | if (minsock == 0) { | ||
1359 | for (i = 0; i < num; i++) | ||
1360 | if (_socket_to_node[i] == SOCK_EMPTY || | ||
1361 | i != _socket_to_node[i]) | ||
1362 | break; | ||
1363 | if (i >= num) { | ||
1364 | kfree(_socket_to_node); | ||
1365 | _socket_to_node = NULL; | ||
1366 | pr_info("UV: 1:1 socket_to_node table removed\n"); | ||
959 | } | 1367 | } |
960 | } | 1368 | } |
1369 | if (minsock == minpnode) { | ||
1370 | for (i = 0; i < num; i++) | ||
1371 | if (_socket_to_pnode[i] != SOCK_EMPTY && | ||
1372 | _socket_to_pnode[i] != i + minpnode) | ||
1373 | break; | ||
1374 | if (i >= num) { | ||
1375 | kfree(_socket_to_pnode); | ||
1376 | _socket_to_pnode = NULL; | ||
1377 | pr_info("UV: 1:1 socket_to_pnode table removed\n"); | ||
1378 | } | ||
1379 | } | ||
1380 | } | ||
1381 | |||
1382 | void __init uv_system_init(void) | ||
1383 | { | ||
1384 | struct uv_hub_info_s hub_info = {0}; | ||
1385 | int bytes, cpu, nodeid; | ||
1386 | unsigned short min_pnode = 9999, max_pnode = 0; | ||
1387 | char *hub = is_uv4_hub() ? "UV400" : | ||
1388 | is_uv3_hub() ? "UV300" : | ||
1389 | is_uv2_hub() ? "UV2000/3000" : | ||
1390 | is_uv1_hub() ? "UV100/1000" : NULL; | ||
1391 | |||
1392 | if (!hub) { | ||
1393 | pr_err("UV: Unknown/unsupported UV hub\n"); | ||
1394 | return; | ||
1395 | } | ||
1396 | pr_info("UV: Found %s hub\n", hub); | ||
1397 | |||
1398 | map_low_mmrs(); | ||
1399 | |||
1400 | uv_bios_init(); /* get uv_systab for decoding */ | ||
1401 | decode_uv_systab(); | ||
1402 | build_socket_tables(); | ||
1403 | build_uv_gr_table(); | ||
1404 | uv_init_hub_info(&hub_info); | ||
1405 | uv_possible_blades = num_possible_nodes(); | ||
1406 | if (!_node_to_pnode) | ||
1407 | boot_init_possible_blades(&hub_info); | ||
1408 | |||
1409 | /* uv_num_possible_blades() is really the hub count */ | ||
1410 | pr_info("UV: Found %d hubs, %d nodes, %d cpus\n", | ||
1411 | uv_num_possible_blades(), | ||
1412 | num_possible_nodes(), | ||
1413 | num_possible_cpus()); | ||
961 | 1414 | ||
962 | uv_bios_init(); | ||
963 | uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, &sn_coherency_id, | 1415 | uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, &sn_coherency_id, |
964 | &sn_region_size, &system_serial_number); | 1416 | &sn_region_size, &system_serial_number); |
1417 | hub_info.coherency_domain_number = sn_coherency_id; | ||
965 | uv_rtc_init(); | 1418 | uv_rtc_init(); |
966 | 1419 | ||
967 | for_each_present_cpu(cpu) { | 1420 | bytes = sizeof(void *) * uv_num_possible_blades(); |
968 | int apicid = per_cpu(x86_cpu_to_apicid, cpu); | 1421 | __uv_hub_info_list = kzalloc(bytes, GFP_KERNEL); |
1422 | BUG_ON(!__uv_hub_info_list); | ||
969 | 1423 | ||
970 | nid = cpu_to_node(cpu); | 1424 | bytes = sizeof(struct uv_hub_info_s); |
971 | /* | 1425 | for_each_node(nodeid) { |
972 | * apic_pnode_shift must be set before calling uv_apicid_to_pnode(); | 1426 | struct uv_hub_info_s *new_hub; |
973 | */ | ||
974 | uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask; | ||
975 | uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift; | ||
976 | uv_cpu_hub_info(cpu)->hub_revision = uv_hub_info->hub_revision; | ||
977 | 1427 | ||
978 | uv_cpu_hub_info(cpu)->m_shift = 64 - m_val; | 1428 | if (__uv_hub_info_list[nodeid]) { |
979 | uv_cpu_hub_info(cpu)->n_lshift = n_lshift; | 1429 | pr_err("UV: Node %d UV HUB already initialized!?\n", |
1430 | nodeid); | ||
1431 | BUG(); | ||
1432 | } | ||
1433 | |||
1434 | /* Allocate new per hub info list */ | ||
1435 | new_hub = (nodeid == 0) ? | ||
1436 | &uv_hub_info_node0 : | ||
1437 | kzalloc_node(bytes, GFP_KERNEL, nodeid); | ||
1438 | BUG_ON(!new_hub); | ||
1439 | __uv_hub_info_list[nodeid] = new_hub; | ||
1440 | new_hub = uv_hub_info_list(nodeid); | ||
1441 | BUG_ON(!new_hub); | ||
1442 | *new_hub = hub_info; | ||
1443 | |||
1444 | /* Use information from GAM table if available */ | ||
1445 | if (_node_to_pnode) | ||
1446 | new_hub->pnode = _node_to_pnode[nodeid]; | ||
1447 | else /* Fill in during cpu loop */ | ||
1448 | new_hub->pnode = 0xffff; | ||
1449 | new_hub->numa_blade_id = uv_node_to_blade_id(nodeid); | ||
1450 | new_hub->memory_nid = -1; | ||
1451 | new_hub->nr_possible_cpus = 0; | ||
1452 | new_hub->nr_online_cpus = 0; | ||
1453 | } | ||
980 | 1454 | ||
1455 | /* Initialize per cpu info */ | ||
1456 | for_each_possible_cpu(cpu) { | ||
1457 | int apicid = per_cpu(x86_cpu_to_apicid, cpu); | ||
1458 | int numa_node_id; | ||
1459 | unsigned short pnode; | ||
1460 | |||
1461 | nodeid = cpu_to_node(cpu); | ||
1462 | numa_node_id = numa_cpu_node(cpu); | ||
981 | pnode = uv_apicid_to_pnode(apicid); | 1463 | pnode = uv_apicid_to_pnode(apicid); |
982 | blade = boot_pnode_to_blade(pnode); | 1464 | |
983 | lcpu = uv_blade_info[blade].nr_possible_cpus; | 1465 | uv_cpu_info_per(cpu)->p_uv_hub_info = uv_hub_info_list(nodeid); |
984 | uv_blade_info[blade].nr_possible_cpus++; | 1466 | uv_cpu_info_per(cpu)->blade_cpu_id = |
985 | 1467 | uv_cpu_hub_info(cpu)->nr_possible_cpus++; | |
986 | /* Any node on the blade, else will contain -1. */ | 1468 | if (uv_cpu_hub_info(cpu)->memory_nid == -1) |
987 | uv_blade_info[blade].memory_nid = nid; | 1469 | uv_cpu_hub_info(cpu)->memory_nid = cpu_to_node(cpu); |
988 | 1470 | if (nodeid != numa_node_id && /* init memoryless node */ | |
989 | uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; | 1471 | uv_hub_info_list(numa_node_id)->pnode == 0xffff) |
990 | uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size; | 1472 | uv_hub_info_list(numa_node_id)->pnode = pnode; |
991 | uv_cpu_hub_info(cpu)->m_val = m_val; | 1473 | else if (uv_cpu_hub_info(cpu)->pnode == 0xffff) |
992 | uv_cpu_hub_info(cpu)->n_val = n_val; | 1474 | uv_cpu_hub_info(cpu)->pnode = pnode; |
993 | uv_cpu_hub_info(cpu)->numa_blade_id = blade; | 1475 | uv_cpu_scir_info(cpu)->offset = uv_scir_offset(apicid); |
994 | uv_cpu_hub_info(cpu)->blade_processor_id = lcpu; | ||
995 | uv_cpu_hub_info(cpu)->pnode = pnode; | ||
996 | uv_cpu_hub_info(cpu)->gpa_mask = (1UL << (m_val + n_val)) - 1; | ||
997 | uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; | ||
998 | uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra; | ||
999 | uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; | ||
1000 | uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id; | ||
1001 | uv_cpu_hub_info(cpu)->scir.offset = uv_scir_offset(apicid); | ||
1002 | uv_node_to_blade[nid] = blade; | ||
1003 | uv_cpu_to_blade[cpu] = blade; | ||
1004 | } | 1476 | } |
1005 | 1477 | ||
1006 | /* Add blade/pnode info for nodes without cpus */ | 1478 | for_each_node(nodeid) { |
1007 | for_each_online_node(nid) { | 1479 | unsigned short pnode = uv_hub_info_list(nodeid)->pnode; |
1008 | if (uv_node_to_blade[nid] >= 0) | 1480 | |
1009 | continue; | 1481 | /* Add pnode info for pre-GAM list nodes without cpus */ |
1010 | paddr = node_start_pfn(nid) << PAGE_SHIFT; | 1482 | if (pnode == 0xffff) { |
1011 | pnode = uv_gpa_to_pnode(uv_soc_phys_ram_to_gpa(paddr)); | 1483 | unsigned long paddr; |
1012 | blade = boot_pnode_to_blade(pnode); | 1484 | |
1013 | uv_node_to_blade[nid] = blade; | 1485 | paddr = node_start_pfn(nodeid) << PAGE_SHIFT; |
1486 | pnode = uv_gpa_to_pnode(uv_soc_phys_ram_to_gpa(paddr)); | ||
1487 | uv_hub_info_list(nodeid)->pnode = pnode; | ||
1488 | } | ||
1489 | min_pnode = min(pnode, min_pnode); | ||
1490 | max_pnode = max(pnode, max_pnode); | ||
1491 | pr_info("UV: UVHUB node:%2d pn:%02x nrcpus:%d\n", | ||
1492 | nodeid, | ||
1493 | uv_hub_info_list(nodeid)->pnode, | ||
1494 | uv_hub_info_list(nodeid)->nr_possible_cpus); | ||
1014 | } | 1495 | } |
1015 | 1496 | ||
1497 | pr_info("UV: min_pnode:%02x max_pnode:%02x\n", min_pnode, max_pnode); | ||
1016 | map_gru_high(max_pnode); | 1498 | map_gru_high(max_pnode); |
1017 | map_mmr_high(max_pnode); | 1499 | map_mmr_high(max_pnode); |
1018 | map_mmioh_high(min_pnode, max_pnode); | 1500 | map_mmioh_high(min_pnode, max_pnode); |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 9307f182fe30..c7364bd633e1 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
@@ -2267,7 +2267,7 @@ static int __init apm_init(void) | |||
2267 | 2267 | ||
2268 | dmi_check_system(apm_dmi_table); | 2268 | dmi_check_system(apm_dmi_table); |
2269 | 2269 | ||
2270 | if (apm_info.bios.version == 0 || paravirt_enabled() || machine_is_olpc()) { | 2270 | if (apm_info.bios.version == 0 || machine_is_olpc()) { |
2271 | printk(KERN_INFO "apm: BIOS not found.\n"); | 2271 | printk(KERN_INFO "apm: BIOS not found.\n"); |
2272 | return -ENODEV; | 2272 | return -ENODEV; |
2273 | } | 2273 | } |
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 5c042466f274..674134e9f5e5 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c | |||
@@ -80,6 +80,7 @@ void common(void) { | |||
80 | OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); | 80 | OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); |
81 | OFFSET(BP_version, boot_params, hdr.version); | 81 | OFFSET(BP_version, boot_params, hdr.version); |
82 | OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment); | 82 | OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment); |
83 | OFFSET(BP_init_size, boot_params, hdr.init_size); | ||
83 | OFFSET(BP_pref_address, boot_params, hdr.pref_address); | 84 | OFFSET(BP_pref_address, boot_params, hdr.pref_address); |
84 | OFFSET(BP_code32_start, boot_params, hdr.code32_start); | 85 | OFFSET(BP_code32_start, boot_params, hdr.code32_start); |
85 | 86 | ||
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 2fec875392cc..c343a54bed39 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -565,14 +565,17 @@ static void early_init_amd(struct cpuinfo_x86 *c) | |||
565 | * can safely set X86_FEATURE_EXTD_APICID unconditionally for families | 565 | * can safely set X86_FEATURE_EXTD_APICID unconditionally for families |
566 | * after 16h. | 566 | * after 16h. |
567 | */ | 567 | */ |
568 | if (cpu_has_apic && c->x86 > 0x16) { | 568 | if (boot_cpu_has(X86_FEATURE_APIC)) { |
569 | set_cpu_cap(c, X86_FEATURE_EXTD_APICID); | 569 | if (c->x86 > 0x16) |
570 | } else if (cpu_has_apic && c->x86 >= 0xf) { | ||
571 | /* check CPU config space for extended APIC ID */ | ||
572 | unsigned int val; | ||
573 | val = read_pci_config(0, 24, 0, 0x68); | ||
574 | if ((val & ((1 << 17) | (1 << 18))) == ((1 << 17) | (1 << 18))) | ||
575 | set_cpu_cap(c, X86_FEATURE_EXTD_APICID); | 570 | set_cpu_cap(c, X86_FEATURE_EXTD_APICID); |
571 | else if (c->x86 >= 0xf) { | ||
572 | /* check CPU config space for extended APIC ID */ | ||
573 | unsigned int val; | ||
574 | |||
575 | val = read_pci_config(0, 24, 0, 0x68); | ||
576 | if ((val >> 17 & 0x3) == 0x3) | ||
577 | set_cpu_cap(c, X86_FEATURE_EXTD_APICID); | ||
578 | } | ||
576 | } | 579 | } |
577 | #endif | 580 | #endif |
578 | 581 | ||
@@ -628,6 +631,7 @@ static void init_amd_k8(struct cpuinfo_x86 *c) | |||
628 | */ | 631 | */ |
629 | msr_set_bit(MSR_K7_HWCR, 6); | 632 | msr_set_bit(MSR_K7_HWCR, 6); |
630 | #endif | 633 | #endif |
634 | set_cpu_bug(c, X86_BUG_SWAPGS_FENCE); | ||
631 | } | 635 | } |
632 | 636 | ||
633 | static void init_amd_gh(struct cpuinfo_x86 *c) | 637 | static void init_amd_gh(struct cpuinfo_x86 *c) |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8394b3d1f94f..0fe6953f421c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <asm/mtrr.h> | 37 | #include <asm/mtrr.h> |
38 | #include <linux/numa.h> | 38 | #include <linux/numa.h> |
39 | #include <asm/asm.h> | 39 | #include <asm/asm.h> |
40 | #include <asm/bugs.h> | ||
40 | #include <asm/cpu.h> | 41 | #include <asm/cpu.h> |
41 | #include <asm/mce.h> | 42 | #include <asm/mce.h> |
42 | #include <asm/msr.h> | 43 | #include <asm/msr.h> |
@@ -270,6 +271,8 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) | |||
270 | static __init int setup_disable_smep(char *arg) | 271 | static __init int setup_disable_smep(char *arg) |
271 | { | 272 | { |
272 | setup_clear_cpu_cap(X86_FEATURE_SMEP); | 273 | setup_clear_cpu_cap(X86_FEATURE_SMEP); |
274 | /* Check for things that depend on SMEP being enabled: */ | ||
275 | check_mpx_erratum(&boot_cpu_data); | ||
273 | return 1; | 276 | return 1; |
274 | } | 277 | } |
275 | __setup("nosmep", setup_disable_smep); | 278 | __setup("nosmep", setup_disable_smep); |
@@ -310,6 +313,10 @@ static bool pku_disabled; | |||
310 | 313 | ||
311 | static __always_inline void setup_pku(struct cpuinfo_x86 *c) | 314 | static __always_inline void setup_pku(struct cpuinfo_x86 *c) |
312 | { | 315 | { |
316 | /* check the boot processor, plus compile options for PKU: */ | ||
317 | if (!cpu_feature_enabled(X86_FEATURE_PKU)) | ||
318 | return; | ||
319 | /* checks the actual processor's cpuid bits: */ | ||
313 | if (!cpu_has(c, X86_FEATURE_PKU)) | 320 | if (!cpu_has(c, X86_FEATURE_PKU)) |
314 | return; | 321 | return; |
315 | if (pku_disabled) | 322 | if (pku_disabled) |
@@ -430,7 +437,7 @@ void load_percpu_segment(int cpu) | |||
430 | #ifdef CONFIG_X86_32 | 437 | #ifdef CONFIG_X86_32 |
431 | loadsegment(fs, __KERNEL_PERCPU); | 438 | loadsegment(fs, __KERNEL_PERCPU); |
432 | #else | 439 | #else |
433 | loadsegment(gs, 0); | 440 | __loadsegment_simple(gs, 0); |
434 | wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); | 441 | wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); |
435 | #endif | 442 | #endif |
436 | load_stack_canary_segment(); | 443 | load_stack_canary_segment(); |
@@ -717,6 +724,13 @@ void get_cpu_cap(struct cpuinfo_x86 *c) | |||
717 | } | 724 | } |
718 | } | 725 | } |
719 | 726 | ||
727 | if (c->extended_cpuid_level >= 0x80000007) { | ||
728 | cpuid(0x80000007, &eax, &ebx, &ecx, &edx); | ||
729 | |||
730 | c->x86_capability[CPUID_8000_0007_EBX] = ebx; | ||
731 | c->x86_power = edx; | ||
732 | } | ||
733 | |||
720 | if (c->extended_cpuid_level >= 0x80000008) { | 734 | if (c->extended_cpuid_level >= 0x80000008) { |
721 | cpuid(0x80000008, &eax, &ebx, &ecx, &edx); | 735 | cpuid(0x80000008, &eax, &ebx, &ecx, &edx); |
722 | 736 | ||
@@ -729,9 +743,6 @@ void get_cpu_cap(struct cpuinfo_x86 *c) | |||
729 | c->x86_phys_bits = 36; | 743 | c->x86_phys_bits = 36; |
730 | #endif | 744 | #endif |
731 | 745 | ||
732 | if (c->extended_cpuid_level >= 0x80000007) | ||
733 | c->x86_power = cpuid_edx(0x80000007); | ||
734 | |||
735 | if (c->extended_cpuid_level >= 0x8000000a) | 746 | if (c->extended_cpuid_level >= 0x8000000a) |
736 | c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a); | 747 | c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a); |
737 | 748 | ||
@@ -862,30 +873,34 @@ static void detect_nopl(struct cpuinfo_x86 *c) | |||
862 | #else | 873 | #else |
863 | set_cpu_cap(c, X86_FEATURE_NOPL); | 874 | set_cpu_cap(c, X86_FEATURE_NOPL); |
864 | #endif | 875 | #endif |
876 | } | ||
865 | 877 | ||
878 | static void detect_null_seg_behavior(struct cpuinfo_x86 *c) | ||
879 | { | ||
880 | #ifdef CONFIG_X86_64 | ||
866 | /* | 881 | /* |
867 | * ESPFIX is a strange bug. All real CPUs have it. Paravirt | 882 | * Empirically, writing zero to a segment selector on AMD does |
868 | * systems that run Linux at CPL > 0 may or may not have the | 883 | * not clear the base, whereas writing zero to a segment |
869 | * issue, but, even if they have the issue, there's absolutely | 884 | * selector on Intel does clear the base. Intel's behavior |
870 | * nothing we can do about it because we can't use the real IRET | 885 | * allows slightly faster context switches in the common case |
871 | * instruction. | 886 | * where GS is unused by the prev and next threads. |
872 | * | 887 | * |
873 | * NB: For the time being, only 32-bit kernels support | 888 | * Since neither vendor documents this anywhere that I can see, |
874 | * X86_BUG_ESPFIX as such. 64-bit kernels directly choose | 889 | * detect it directly instead of hardcoding the choice by |
875 | * whether to apply espfix using paravirt hooks. If any | 890 | * vendor. |
876 | * non-paravirt system ever shows up that does *not* have the | 891 | * |
877 | * ESPFIX issue, we can change this. | 892 | * I've designated AMD's behavior as the "bug" because it's |
893 | * counterintuitive and less friendly. | ||
878 | */ | 894 | */ |
879 | #ifdef CONFIG_X86_32 | 895 | |
880 | #ifdef CONFIG_PARAVIRT | 896 | unsigned long old_base, tmp; |
881 | do { | 897 | rdmsrl(MSR_FS_BASE, old_base); |
882 | extern void native_iret(void); | 898 | wrmsrl(MSR_FS_BASE, 1); |
883 | if (pv_cpu_ops.iret == native_iret) | 899 | loadsegment(fs, 0); |
884 | set_cpu_bug(c, X86_BUG_ESPFIX); | 900 | rdmsrl(MSR_FS_BASE, tmp); |
885 | } while (0); | 901 | if (tmp != 0) |
886 | #else | 902 | set_cpu_bug(c, X86_BUG_NULL_SEG); |
887 | set_cpu_bug(c, X86_BUG_ESPFIX); | 903 | wrmsrl(MSR_FS_BASE, old_base); |
888 | #endif | ||
889 | #endif | 904 | #endif |
890 | } | 905 | } |
891 | 906 | ||
@@ -921,6 +936,33 @@ static void generic_identify(struct cpuinfo_x86 *c) | |||
921 | get_model_name(c); /* Default name */ | 936 | get_model_name(c); /* Default name */ |
922 | 937 | ||
923 | detect_nopl(c); | 938 | detect_nopl(c); |
939 | |||
940 | detect_null_seg_behavior(c); | ||
941 | |||
942 | /* | ||
943 | * ESPFIX is a strange bug. All real CPUs have it. Paravirt | ||
944 | * systems that run Linux at CPL > 0 may or may not have the | ||
945 | * issue, but, even if they have the issue, there's absolutely | ||
946 | * nothing we can do about it because we can't use the real IRET | ||
947 | * instruction. | ||
948 | * | ||
949 | * NB: For the time being, only 32-bit kernels support | ||
950 | * X86_BUG_ESPFIX as such. 64-bit kernels directly choose | ||
951 | * whether to apply espfix using paravirt hooks. If any | ||
952 | * non-paravirt system ever shows up that does *not* have the | ||
953 | * ESPFIX issue, we can change this. | ||
954 | */ | ||
955 | #ifdef CONFIG_X86_32 | ||
956 | # ifdef CONFIG_PARAVIRT | ||
957 | do { | ||
958 | extern void native_iret(void); | ||
959 | if (pv_cpu_ops.iret == native_iret) | ||
960 | set_cpu_bug(c, X86_BUG_ESPFIX); | ||
961 | } while (0); | ||
962 | # else | ||
963 | set_cpu_bug(c, X86_BUG_ESPFIX); | ||
964 | # endif | ||
965 | #endif | ||
924 | } | 966 | } |
925 | 967 | ||
926 | static void x86_init_cache_qos(struct cpuinfo_x86 *c) | 968 | static void x86_init_cache_qos(struct cpuinfo_x86 *c) |
@@ -1076,12 +1118,12 @@ void enable_sep_cpu(void) | |||
1076 | struct tss_struct *tss; | 1118 | struct tss_struct *tss; |
1077 | int cpu; | 1119 | int cpu; |
1078 | 1120 | ||
1121 | if (!boot_cpu_has(X86_FEATURE_SEP)) | ||
1122 | return; | ||
1123 | |||
1079 | cpu = get_cpu(); | 1124 | cpu = get_cpu(); |
1080 | tss = &per_cpu(cpu_tss, cpu); | 1125 | tss = &per_cpu(cpu_tss, cpu); |
1081 | 1126 | ||
1082 | if (!boot_cpu_has(X86_FEATURE_SEP)) | ||
1083 | goto out; | ||
1084 | |||
1085 | /* | 1127 | /* |
1086 | * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field -- | 1128 | * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field -- |
1087 | * see the big comment in struct x86_hw_tss's definition. | 1129 | * see the big comment in struct x86_hw_tss's definition. |
@@ -1096,7 +1138,6 @@ void enable_sep_cpu(void) | |||
1096 | 1138 | ||
1097 | wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0); | 1139 | wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0); |
1098 | 1140 | ||
1099 | out: | ||
1100 | put_cpu(); | 1141 | put_cpu(); |
1101 | } | 1142 | } |
1102 | #endif | 1143 | #endif |
@@ -1528,7 +1569,7 @@ void cpu_init(void) | |||
1528 | pr_info("Initializing CPU#%d\n", cpu); | 1569 | pr_info("Initializing CPU#%d\n", cpu); |
1529 | 1570 | ||
1530 | if (cpu_feature_enabled(X86_FEATURE_VME) || | 1571 | if (cpu_feature_enabled(X86_FEATURE_VME) || |
1531 | cpu_has_tsc || | 1572 | boot_cpu_has(X86_FEATURE_TSC) || |
1532 | boot_cpu_has(X86_FEATURE_DE)) | 1573 | boot_cpu_has(X86_FEATURE_DE)) |
1533 | cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); | 1574 | cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); |
1534 | 1575 | ||
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 6adef9cac23e..bd9dcd6b712d 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c | |||
@@ -333,7 +333,7 @@ static void init_cyrix(struct cpuinfo_x86 *c) | |||
333 | switch (dir0_lsn) { | 333 | switch (dir0_lsn) { |
334 | case 0xd: /* either a 486SLC or DLC w/o DEVID */ | 334 | case 0xd: /* either a 486SLC or DLC w/o DEVID */ |
335 | dir0_msn = 0; | 335 | dir0_msn = 0; |
336 | p = Cx486_name[(cpu_has_fpu ? 1 : 0)]; | 336 | p = Cx486_name[!!boot_cpu_has(X86_FEATURE_FPU)]; |
337 | break; | 337 | break; |
338 | 338 | ||
339 | case 0xe: /* a 486S A step */ | 339 | case 0xe: /* a 486S A step */ |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 5354080f76c3..c1a89bc026ac 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -25,6 +25,41 @@ | |||
25 | #include <asm/apic.h> | 25 | #include <asm/apic.h> |
26 | #endif | 26 | #endif |
27 | 27 | ||
28 | /* | ||
29 | * Just in case our CPU detection goes bad, or you have a weird system, | ||
30 | * allow a way to override the automatic disabling of MPX. | ||
31 | */ | ||
32 | static int forcempx; | ||
33 | |||
34 | static int __init forcempx_setup(char *__unused) | ||
35 | { | ||
36 | forcempx = 1; | ||
37 | |||
38 | return 1; | ||
39 | } | ||
40 | __setup("intel-skd-046-workaround=disable", forcempx_setup); | ||
41 | |||
42 | void check_mpx_erratum(struct cpuinfo_x86 *c) | ||
43 | { | ||
44 | if (forcempx) | ||
45 | return; | ||
46 | /* | ||
47 | * Turn off the MPX feature on CPUs where SMEP is not | ||
48 | * available or disabled. | ||
49 | * | ||
50 | * Works around Intel Erratum SKD046: "Branch Instructions | ||
51 | * May Initialize MPX Bound Registers Incorrectly". | ||
52 | * | ||
53 | * This might falsely disable MPX on systems without | ||
54 | * SMEP, like Atom processors without SMEP. But there | ||
55 | * is no such hardware known at the moment. | ||
56 | */ | ||
57 | if (cpu_has(c, X86_FEATURE_MPX) && !cpu_has(c, X86_FEATURE_SMEP)) { | ||
58 | setup_clear_cpu_cap(X86_FEATURE_MPX); | ||
59 | pr_warn("x86/mpx: Disabling MPX since SMEP not present\n"); | ||
60 | } | ||
61 | } | ||
62 | |||
28 | static void early_init_intel(struct cpuinfo_x86 *c) | 63 | static void early_init_intel(struct cpuinfo_x86 *c) |
29 | { | 64 | { |
30 | u64 misc_enable; | 65 | u64 misc_enable; |
@@ -173,6 +208,8 @@ static void early_init_intel(struct cpuinfo_x86 *c) | |||
173 | if (edx & (1U << 28)) | 208 | if (edx & (1U << 28)) |
174 | c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff); | 209 | c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff); |
175 | } | 210 | } |
211 | |||
212 | check_mpx_erratum(c); | ||
176 | } | 213 | } |
177 | 214 | ||
178 | #ifdef CONFIG_X86_32 | 215 | #ifdef CONFIG_X86_32 |
@@ -233,7 +270,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) | |||
233 | * The Quark is also family 5, but does not have the same bug. | 270 | * The Quark is also family 5, but does not have the same bug. |
234 | */ | 271 | */ |
235 | clear_cpu_bug(c, X86_BUG_F00F); | 272 | clear_cpu_bug(c, X86_BUG_F00F); |
236 | if (!paravirt_enabled() && c->x86 == 5 && c->x86_model < 9) { | 273 | if (c->x86 == 5 && c->x86_model < 9) { |
237 | static int f00f_workaround_enabled; | 274 | static int f00f_workaround_enabled; |
238 | 275 | ||
239 | set_cpu_bug(c, X86_BUG_F00F); | 276 | set_cpu_bug(c, X86_BUG_F00F); |
@@ -280,7 +317,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) | |||
280 | * integrated APIC (see 11AP erratum in "Pentium Processor | 317 | * integrated APIC (see 11AP erratum in "Pentium Processor |
281 | * Specification Update"). | 318 | * Specification Update"). |
282 | */ | 319 | */ |
283 | if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 && | 320 | if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 && |
284 | (c->x86_mask < 0x6 || c->x86_mask == 0xb)) | 321 | (c->x86_mask < 0x6 || c->x86_mask == 0xb)) |
285 | set_cpu_bug(c, X86_BUG_11AP); | 322 | set_cpu_bug(c, X86_BUG_11AP); |
286 | 323 | ||
@@ -335,7 +372,7 @@ static int intel_num_cpu_cores(struct cpuinfo_x86 *c) | |||
335 | { | 372 | { |
336 | unsigned int eax, ebx, ecx, edx; | 373 | unsigned int eax, ebx, ecx, edx; |
337 | 374 | ||
338 | if (c->cpuid_level < 4) | 375 | if (!IS_ENABLED(CONFIG_SMP) || c->cpuid_level < 4) |
339 | return 1; | 376 | return 1; |
340 | 377 | ||
341 | /* Intel has a non-standard dependency on %ecx for this CPUID level. */ | 378 | /* Intel has a non-standard dependency on %ecx for this CPUID level. */ |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-genpool.c b/arch/x86/kernel/cpu/mcheck/mce-genpool.c index 2658e2af74ec..93d824ec3120 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-genpool.c +++ b/arch/x86/kernel/cpu/mcheck/mce-genpool.c | |||
@@ -26,6 +26,52 @@ static struct gen_pool *mce_evt_pool; | |||
26 | static LLIST_HEAD(mce_event_llist); | 26 | static LLIST_HEAD(mce_event_llist); |
27 | static char gen_pool_buf[MCE_POOLSZ]; | 27 | static char gen_pool_buf[MCE_POOLSZ]; |
28 | 28 | ||
29 | /* | ||
30 | * Compare the record "t" with each of the records on list "l" to see if | ||
31 | * an equivalent one is present in the list. | ||
32 | */ | ||
33 | static bool is_duplicate_mce_record(struct mce_evt_llist *t, struct mce_evt_llist *l) | ||
34 | { | ||
35 | struct mce_evt_llist *node; | ||
36 | struct mce *m1, *m2; | ||
37 | |||
38 | m1 = &t->mce; | ||
39 | |||
40 | llist_for_each_entry(node, &l->llnode, llnode) { | ||
41 | m2 = &node->mce; | ||
42 | |||
43 | if (!mce_cmp(m1, m2)) | ||
44 | return true; | ||
45 | } | ||
46 | return false; | ||
47 | } | ||
48 | |||
49 | /* | ||
50 | * The system has panicked - we'd like to peruse the list of MCE records | ||
51 | * that have been queued, but not seen by anyone yet. The list is in | ||
52 | * reverse time order, so we need to reverse it. While doing that we can | ||
53 | * also drop duplicate records (these were logged because some banks are | ||
54 | * shared between cores or by all threads on a socket). | ||
55 | */ | ||
56 | struct llist_node *mce_gen_pool_prepare_records(void) | ||
57 | { | ||
58 | struct llist_node *head; | ||
59 | LLIST_HEAD(new_head); | ||
60 | struct mce_evt_llist *node, *t; | ||
61 | |||
62 | head = llist_del_all(&mce_event_llist); | ||
63 | if (!head) | ||
64 | return NULL; | ||
65 | |||
66 | /* squeeze out duplicates while reversing order */ | ||
67 | llist_for_each_entry_safe(node, t, head, llnode) { | ||
68 | if (!is_duplicate_mce_record(node, t)) | ||
69 | llist_add(&node->llnode, &new_head); | ||
70 | } | ||
71 | |||
72 | return new_head.first; | ||
73 | } | ||
74 | |||
29 | void mce_gen_pool_process(void) | 75 | void mce_gen_pool_process(void) |
30 | { | 76 | { |
31 | struct llist_node *head; | 77 | struct llist_node *head; |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index 547720efd923..cd74a3f00aea 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h | |||
@@ -35,6 +35,7 @@ void mce_gen_pool_process(void); | |||
35 | bool mce_gen_pool_empty(void); | 35 | bool mce_gen_pool_empty(void); |
36 | int mce_gen_pool_add(struct mce *mce); | 36 | int mce_gen_pool_add(struct mce *mce); |
37 | int mce_gen_pool_init(void); | 37 | int mce_gen_pool_init(void); |
38 | struct llist_node *mce_gen_pool_prepare_records(void); | ||
38 | 39 | ||
39 | extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp); | 40 | extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp); |
40 | struct dentry *mce_get_debugfs_dir(void); | 41 | struct dentry *mce_get_debugfs_dir(void); |
@@ -81,3 +82,17 @@ static inline int apei_clear_mce(u64 record_id) | |||
81 | #endif | 82 | #endif |
82 | 83 | ||
83 | void mce_inject_log(struct mce *m); | 84 | void mce_inject_log(struct mce *m); |
85 | |||
86 | /* | ||
87 | * We consider records to be equivalent if bank+status+addr+misc all match. | ||
88 | * This is only used when the system is going down because of a fatal error | ||
89 | * to avoid cluttering the console log with essentially repeated information. | ||
90 | * In normal processing all errors seen are logged. | ||
91 | */ | ||
92 | static inline bool mce_cmp(struct mce *m1, struct mce *m2) | ||
93 | { | ||
94 | return m1->bank != m2->bank || | ||
95 | m1->status != m2->status || | ||
96 | m1->addr != m2->addr || | ||
97 | m1->misc != m2->misc; | ||
98 | } | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index 5119766d9889..631356c8cca4 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c | |||
@@ -204,6 +204,33 @@ static int error_context(struct mce *m) | |||
204 | return IN_KERNEL; | 204 | return IN_KERNEL; |
205 | } | 205 | } |
206 | 206 | ||
207 | static int mce_severity_amd_smca(struct mce *m, int err_ctx) | ||
208 | { | ||
209 | u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank); | ||
210 | u32 low, high; | ||
211 | |||
212 | /* | ||
213 | * We need to look at the following bits: | ||
214 | * - "succor" bit (data poisoning support), and | ||
215 | * - TCC bit (Task Context Corrupt) | ||
216 | * in MCi_STATUS to determine error severity. | ||
217 | */ | ||
218 | if (!mce_flags.succor) | ||
219 | return MCE_PANIC_SEVERITY; | ||
220 | |||
221 | if (rdmsr_safe(addr, &low, &high)) | ||
222 | return MCE_PANIC_SEVERITY; | ||
223 | |||
224 | /* TCC (Task context corrupt). If set and if IN_KERNEL, panic. */ | ||
225 | if ((low & MCI_CONFIG_MCAX) && | ||
226 | (m->status & MCI_STATUS_TCC) && | ||
227 | (err_ctx == IN_KERNEL)) | ||
228 | return MCE_PANIC_SEVERITY; | ||
229 | |||
230 | /* ...otherwise invoke hwpoison handler. */ | ||
231 | return MCE_AR_SEVERITY; | ||
232 | } | ||
233 | |||
207 | /* | 234 | /* |
208 | * See AMD Error Scope Hierarchy table in a newer BKDG. For example | 235 | * See AMD Error Scope Hierarchy table in a newer BKDG. For example |
209 | * 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features" | 236 | * 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features" |
@@ -225,6 +252,9 @@ static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_exc | |||
225 | * to at least kill process to prolong system operation. | 252 | * to at least kill process to prolong system operation. |
226 | */ | 253 | */ |
227 | if (mce_flags.overflow_recov) { | 254 | if (mce_flags.overflow_recov) { |
255 | if (mce_flags.smca) | ||
256 | return mce_severity_amd_smca(m, ctx); | ||
257 | |||
228 | /* software can try to contain */ | 258 | /* software can try to contain */ |
229 | if (!(m->mcgstatus & MCG_STATUS_RIPV) && (ctx == IN_KERNEL)) | 259 | if (!(m->mcgstatus & MCG_STATUS_RIPV) && (ctx == IN_KERNEL)) |
230 | return MCE_PANIC_SEVERITY; | 260 | return MCE_PANIC_SEVERITY; |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index f0c921b03e42..92e5e37d97bf 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -161,7 +161,6 @@ void mce_log(struct mce *mce) | |||
161 | if (!mce_gen_pool_add(mce)) | 161 | if (!mce_gen_pool_add(mce)) |
162 | irq_work_queue(&mce_irq_work); | 162 | irq_work_queue(&mce_irq_work); |
163 | 163 | ||
164 | mce->finished = 0; | ||
165 | wmb(); | 164 | wmb(); |
166 | for (;;) { | 165 | for (;;) { |
167 | entry = mce_log_get_idx_check(mcelog.next); | 166 | entry = mce_log_get_idx_check(mcelog.next); |
@@ -194,7 +193,6 @@ void mce_log(struct mce *mce) | |||
194 | mcelog.entry[entry].finished = 1; | 193 | mcelog.entry[entry].finished = 1; |
195 | wmb(); | 194 | wmb(); |
196 | 195 | ||
197 | mce->finished = 1; | ||
198 | set_bit(0, &mce_need_notify); | 196 | set_bit(0, &mce_need_notify); |
199 | } | 197 | } |
200 | 198 | ||
@@ -224,6 +222,53 @@ void mce_unregister_decode_chain(struct notifier_block *nb) | |||
224 | } | 222 | } |
225 | EXPORT_SYMBOL_GPL(mce_unregister_decode_chain); | 223 | EXPORT_SYMBOL_GPL(mce_unregister_decode_chain); |
226 | 224 | ||
225 | static inline u32 ctl_reg(int bank) | ||
226 | { | ||
227 | return MSR_IA32_MCx_CTL(bank); | ||
228 | } | ||
229 | |||
230 | static inline u32 status_reg(int bank) | ||
231 | { | ||
232 | return MSR_IA32_MCx_STATUS(bank); | ||
233 | } | ||
234 | |||
235 | static inline u32 addr_reg(int bank) | ||
236 | { | ||
237 | return MSR_IA32_MCx_ADDR(bank); | ||
238 | } | ||
239 | |||
240 | static inline u32 misc_reg(int bank) | ||
241 | { | ||
242 | return MSR_IA32_MCx_MISC(bank); | ||
243 | } | ||
244 | |||
245 | static inline u32 smca_ctl_reg(int bank) | ||
246 | { | ||
247 | return MSR_AMD64_SMCA_MCx_CTL(bank); | ||
248 | } | ||
249 | |||
250 | static inline u32 smca_status_reg(int bank) | ||
251 | { | ||
252 | return MSR_AMD64_SMCA_MCx_STATUS(bank); | ||
253 | } | ||
254 | |||
255 | static inline u32 smca_addr_reg(int bank) | ||
256 | { | ||
257 | return MSR_AMD64_SMCA_MCx_ADDR(bank); | ||
258 | } | ||
259 | |||
260 | static inline u32 smca_misc_reg(int bank) | ||
261 | { | ||
262 | return MSR_AMD64_SMCA_MCx_MISC(bank); | ||
263 | } | ||
264 | |||
265 | struct mca_msr_regs msr_ops = { | ||
266 | .ctl = ctl_reg, | ||
267 | .status = status_reg, | ||
268 | .addr = addr_reg, | ||
269 | .misc = misc_reg | ||
270 | }; | ||
271 | |||
227 | static void print_mce(struct mce *m) | 272 | static void print_mce(struct mce *m) |
228 | { | 273 | { |
229 | int ret = 0; | 274 | int ret = 0; |
@@ -290,7 +335,9 @@ static void wait_for_panic(void) | |||
290 | 335 | ||
291 | static void mce_panic(const char *msg, struct mce *final, char *exp) | 336 | static void mce_panic(const char *msg, struct mce *final, char *exp) |
292 | { | 337 | { |
293 | int i, apei_err = 0; | 338 | int apei_err = 0; |
339 | struct llist_node *pending; | ||
340 | struct mce_evt_llist *l; | ||
294 | 341 | ||
295 | if (!fake_panic) { | 342 | if (!fake_panic) { |
296 | /* | 343 | /* |
@@ -307,11 +354,10 @@ static void mce_panic(const char *msg, struct mce *final, char *exp) | |||
307 | if (atomic_inc_return(&mce_fake_panicked) > 1) | 354 | if (atomic_inc_return(&mce_fake_panicked) > 1) |
308 | return; | 355 | return; |
309 | } | 356 | } |
357 | pending = mce_gen_pool_prepare_records(); | ||
310 | /* First print corrected ones that are still unlogged */ | 358 | /* First print corrected ones that are still unlogged */ |
311 | for (i = 0; i < MCE_LOG_LEN; i++) { | 359 | llist_for_each_entry(l, pending, llnode) { |
312 | struct mce *m = &mcelog.entry[i]; | 360 | struct mce *m = &l->mce; |
313 | if (!(m->status & MCI_STATUS_VAL)) | ||
314 | continue; | ||
315 | if (!(m->status & MCI_STATUS_UC)) { | 361 | if (!(m->status & MCI_STATUS_UC)) { |
316 | print_mce(m); | 362 | print_mce(m); |
317 | if (!apei_err) | 363 | if (!apei_err) |
@@ -319,13 +365,11 @@ static void mce_panic(const char *msg, struct mce *final, char *exp) | |||
319 | } | 365 | } |
320 | } | 366 | } |
321 | /* Now print uncorrected but with the final one last */ | 367 | /* Now print uncorrected but with the final one last */ |
322 | for (i = 0; i < MCE_LOG_LEN; i++) { | 368 | llist_for_each_entry(l, pending, llnode) { |
323 | struct mce *m = &mcelog.entry[i]; | 369 | struct mce *m = &l->mce; |
324 | if (!(m->status & MCI_STATUS_VAL)) | ||
325 | continue; | ||
326 | if (!(m->status & MCI_STATUS_UC)) | 370 | if (!(m->status & MCI_STATUS_UC)) |
327 | continue; | 371 | continue; |
328 | if (!final || memcmp(m, final, sizeof(struct mce))) { | 372 | if (!final || mce_cmp(m, final)) { |
329 | print_mce(m); | 373 | print_mce(m); |
330 | if (!apei_err) | 374 | if (!apei_err) |
331 | apei_err = apei_write_mce(m); | 375 | apei_err = apei_write_mce(m); |
@@ -356,11 +400,11 @@ static int msr_to_offset(u32 msr) | |||
356 | 400 | ||
357 | if (msr == mca_cfg.rip_msr) | 401 | if (msr == mca_cfg.rip_msr) |
358 | return offsetof(struct mce, ip); | 402 | return offsetof(struct mce, ip); |
359 | if (msr == MSR_IA32_MCx_STATUS(bank)) | 403 | if (msr == msr_ops.status(bank)) |
360 | return offsetof(struct mce, status); | 404 | return offsetof(struct mce, status); |
361 | if (msr == MSR_IA32_MCx_ADDR(bank)) | 405 | if (msr == msr_ops.addr(bank)) |
362 | return offsetof(struct mce, addr); | 406 | return offsetof(struct mce, addr); |
363 | if (msr == MSR_IA32_MCx_MISC(bank)) | 407 | if (msr == msr_ops.misc(bank)) |
364 | return offsetof(struct mce, misc); | 408 | return offsetof(struct mce, misc); |
365 | if (msr == MSR_IA32_MCG_STATUS) | 409 | if (msr == MSR_IA32_MCG_STATUS) |
366 | return offsetof(struct mce, mcgstatus); | 410 | return offsetof(struct mce, mcgstatus); |
@@ -523,9 +567,9 @@ static struct notifier_block mce_srao_nb = { | |||
523 | static void mce_read_aux(struct mce *m, int i) | 567 | static void mce_read_aux(struct mce *m, int i) |
524 | { | 568 | { |
525 | if (m->status & MCI_STATUS_MISCV) | 569 | if (m->status & MCI_STATUS_MISCV) |
526 | m->misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i)); | 570 | m->misc = mce_rdmsrl(msr_ops.misc(i)); |
527 | if (m->status & MCI_STATUS_ADDRV) { | 571 | if (m->status & MCI_STATUS_ADDRV) { |
528 | m->addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i)); | 572 | m->addr = mce_rdmsrl(msr_ops.addr(i)); |
529 | 573 | ||
530 | /* | 574 | /* |
531 | * Mask the reported address by the reported granularity. | 575 | * Mask the reported address by the reported granularity. |
@@ -607,7 +651,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
607 | m.tsc = 0; | 651 | m.tsc = 0; |
608 | 652 | ||
609 | barrier(); | 653 | barrier(); |
610 | m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); | 654 | m.status = mce_rdmsrl(msr_ops.status(i)); |
611 | if (!(m.status & MCI_STATUS_VAL)) | 655 | if (!(m.status & MCI_STATUS_VAL)) |
612 | continue; | 656 | continue; |
613 | 657 | ||
@@ -654,7 +698,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
654 | /* | 698 | /* |
655 | * Clear state for this bank. | 699 | * Clear state for this bank. |
656 | */ | 700 | */ |
657 | mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0); | 701 | mce_wrmsrl(msr_ops.status(i), 0); |
658 | } | 702 | } |
659 | 703 | ||
660 | /* | 704 | /* |
@@ -679,7 +723,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, | |||
679 | char *tmp; | 723 | char *tmp; |
680 | 724 | ||
681 | for (i = 0; i < mca_cfg.banks; i++) { | 725 | for (i = 0; i < mca_cfg.banks; i++) { |
682 | m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); | 726 | m->status = mce_rdmsrl(msr_ops.status(i)); |
683 | if (m->status & MCI_STATUS_VAL) { | 727 | if (m->status & MCI_STATUS_VAL) { |
684 | __set_bit(i, validp); | 728 | __set_bit(i, validp); |
685 | if (quirk_no_way_out) | 729 | if (quirk_no_way_out) |
@@ -830,9 +874,9 @@ static int mce_start(int *no_way_out) | |||
830 | 874 | ||
831 | atomic_add(*no_way_out, &global_nwo); | 875 | atomic_add(*no_way_out, &global_nwo); |
832 | /* | 876 | /* |
833 | * global_nwo should be updated before mce_callin | 877 | * Rely on the implied barrier below, such that global_nwo |
878 | * is updated before mce_callin. | ||
834 | */ | 879 | */ |
835 | smp_wmb(); | ||
836 | order = atomic_inc_return(&mce_callin); | 880 | order = atomic_inc_return(&mce_callin); |
837 | 881 | ||
838 | /* | 882 | /* |
@@ -957,7 +1001,7 @@ static void mce_clear_state(unsigned long *toclear) | |||
957 | 1001 | ||
958 | for (i = 0; i < mca_cfg.banks; i++) { | 1002 | for (i = 0; i < mca_cfg.banks; i++) { |
959 | if (test_bit(i, toclear)) | 1003 | if (test_bit(i, toclear)) |
960 | mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0); | 1004 | mce_wrmsrl(msr_ops.status(i), 0); |
961 | } | 1005 | } |
962 | } | 1006 | } |
963 | 1007 | ||
@@ -994,11 +1038,12 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
994 | int i; | 1038 | int i; |
995 | int worst = 0; | 1039 | int worst = 0; |
996 | int severity; | 1040 | int severity; |
1041 | |||
997 | /* | 1042 | /* |
998 | * Establish sequential order between the CPUs entering the machine | 1043 | * Establish sequential order between the CPUs entering the machine |
999 | * check handler. | 1044 | * check handler. |
1000 | */ | 1045 | */ |
1001 | int order; | 1046 | int order = -1; |
1002 | /* | 1047 | /* |
1003 | * If no_way_out gets set, there is no safe way to recover from this | 1048 | * If no_way_out gets set, there is no safe way to recover from this |
1004 | * MCE. If mca_cfg.tolerant is cranked up, we'll try anyway. | 1049 | * MCE. If mca_cfg.tolerant is cranked up, we'll try anyway. |
@@ -1012,7 +1057,12 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1012 | DECLARE_BITMAP(toclear, MAX_NR_BANKS); | 1057 | DECLARE_BITMAP(toclear, MAX_NR_BANKS); |
1013 | DECLARE_BITMAP(valid_banks, MAX_NR_BANKS); | 1058 | DECLARE_BITMAP(valid_banks, MAX_NR_BANKS); |
1014 | char *msg = "Unknown"; | 1059 | char *msg = "Unknown"; |
1015 | int lmce = 0; | 1060 | |
1061 | /* | ||
1062 | * MCEs are always local on AMD. Same is determined by MCG_STATUS_LMCES | ||
1063 | * on Intel. | ||
1064 | */ | ||
1065 | int lmce = 1; | ||
1016 | 1066 | ||
1017 | /* If this CPU is offline, just bail out. */ | 1067 | /* If this CPU is offline, just bail out. */ |
1018 | if (cpu_is_offline(smp_processor_id())) { | 1068 | if (cpu_is_offline(smp_processor_id())) { |
@@ -1051,19 +1101,20 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1051 | kill_it = 1; | 1101 | kill_it = 1; |
1052 | 1102 | ||
1053 | /* | 1103 | /* |
1054 | * Check if this MCE is signaled to only this logical processor | 1104 | * Check if this MCE is signaled to only this logical processor, |
1105 | * on Intel only. | ||
1055 | */ | 1106 | */ |
1056 | if (m.mcgstatus & MCG_STATUS_LMCES) | 1107 | if (m.cpuvendor == X86_VENDOR_INTEL) |
1057 | lmce = 1; | 1108 | lmce = m.mcgstatus & MCG_STATUS_LMCES; |
1058 | else { | 1109 | |
1059 | /* | 1110 | /* |
1060 | * Go through all the banks in exclusion of the other CPUs. | 1111 | * Go through all banks in exclusion of the other CPUs. This way we |
1061 | * This way we don't report duplicated events on shared banks | 1112 | * don't report duplicated events on shared banks because the first one |
1062 | * because the first one to see it will clear it. | 1113 | * to see it will clear it. If this is a Local MCE, then no need to |
1063 | * If this is a Local MCE, then no need to perform rendezvous. | 1114 | * perform rendezvous. |
1064 | */ | 1115 | */ |
1116 | if (!lmce) | ||
1065 | order = mce_start(&no_way_out); | 1117 | order = mce_start(&no_way_out); |
1066 | } | ||
1067 | 1118 | ||
1068 | for (i = 0; i < cfg->banks; i++) { | 1119 | for (i = 0; i < cfg->banks; i++) { |
1069 | __clear_bit(i, toclear); | 1120 | __clear_bit(i, toclear); |
@@ -1076,7 +1127,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1076 | m.addr = 0; | 1127 | m.addr = 0; |
1077 | m.bank = i; | 1128 | m.bank = i; |
1078 | 1129 | ||
1079 | m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); | 1130 | m.status = mce_rdmsrl(msr_ops.status(i)); |
1080 | if ((m.status & MCI_STATUS_VAL) == 0) | 1131 | if ((m.status & MCI_STATUS_VAL) == 0) |
1081 | continue; | 1132 | continue; |
1082 | 1133 | ||
@@ -1420,7 +1471,6 @@ static void __mcheck_cpu_init_generic(void) | |||
1420 | enum mcp_flags m_fl = 0; | 1471 | enum mcp_flags m_fl = 0; |
1421 | mce_banks_t all_banks; | 1472 | mce_banks_t all_banks; |
1422 | u64 cap; | 1473 | u64 cap; |
1423 | int i; | ||
1424 | 1474 | ||
1425 | if (!mca_cfg.bootlog) | 1475 | if (!mca_cfg.bootlog) |
1426 | m_fl = MCP_DONTLOG; | 1476 | m_fl = MCP_DONTLOG; |
@@ -1436,14 +1486,19 @@ static void __mcheck_cpu_init_generic(void) | |||
1436 | rdmsrl(MSR_IA32_MCG_CAP, cap); | 1486 | rdmsrl(MSR_IA32_MCG_CAP, cap); |
1437 | if (cap & MCG_CTL_P) | 1487 | if (cap & MCG_CTL_P) |
1438 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); | 1488 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); |
1489 | } | ||
1490 | |||
1491 | static void __mcheck_cpu_init_clear_banks(void) | ||
1492 | { | ||
1493 | int i; | ||
1439 | 1494 | ||
1440 | for (i = 0; i < mca_cfg.banks; i++) { | 1495 | for (i = 0; i < mca_cfg.banks; i++) { |
1441 | struct mce_bank *b = &mce_banks[i]; | 1496 | struct mce_bank *b = &mce_banks[i]; |
1442 | 1497 | ||
1443 | if (!b->init) | 1498 | if (!b->init) |
1444 | continue; | 1499 | continue; |
1445 | wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); | 1500 | wrmsrl(msr_ops.ctl(i), b->ctl); |
1446 | wrmsrl(MSR_IA32_MCx_STATUS(i), 0); | 1501 | wrmsrl(msr_ops.status(i), 0); |
1447 | } | 1502 | } |
1448 | } | 1503 | } |
1449 | 1504 | ||
@@ -1495,7 +1550,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) | |||
1495 | */ | 1550 | */ |
1496 | clear_bit(10, (unsigned long *)&mce_banks[4].ctl); | 1551 | clear_bit(10, (unsigned long *)&mce_banks[4].ctl); |
1497 | } | 1552 | } |
1498 | if (c->x86 <= 17 && cfg->bootlog < 0) { | 1553 | if (c->x86 < 17 && cfg->bootlog < 0) { |
1499 | /* | 1554 | /* |
1500 | * Lots of broken BIOS around that don't clear them | 1555 | * Lots of broken BIOS around that don't clear them |
1501 | * by default and leave crap in there. Don't log: | 1556 | * by default and leave crap in there. Don't log: |
@@ -1628,11 +1683,19 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) | |||
1628 | break; | 1683 | break; |
1629 | 1684 | ||
1630 | case X86_VENDOR_AMD: { | 1685 | case X86_VENDOR_AMD: { |
1631 | u32 ebx = cpuid_ebx(0x80000007); | 1686 | mce_flags.overflow_recov = !!cpu_has(c, X86_FEATURE_OVERFLOW_RECOV); |
1687 | mce_flags.succor = !!cpu_has(c, X86_FEATURE_SUCCOR); | ||
1688 | mce_flags.smca = !!cpu_has(c, X86_FEATURE_SMCA); | ||
1632 | 1689 | ||
1633 | mce_flags.overflow_recov = !!(ebx & BIT(0)); | 1690 | /* |
1634 | mce_flags.succor = !!(ebx & BIT(1)); | 1691 | * Install proper ops for Scalable MCA enabled processors |
1635 | mce_flags.smca = !!(ebx & BIT(3)); | 1692 | */ |
1693 | if (mce_flags.smca) { | ||
1694 | msr_ops.ctl = smca_ctl_reg; | ||
1695 | msr_ops.status = smca_status_reg; | ||
1696 | msr_ops.addr = smca_addr_reg; | ||
1697 | msr_ops.misc = smca_misc_reg; | ||
1698 | } | ||
1636 | mce_amd_feature_init(c); | 1699 | mce_amd_feature_init(c); |
1637 | 1700 | ||
1638 | break; | 1701 | break; |
@@ -1717,6 +1780,7 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c) | |||
1717 | 1780 | ||
1718 | __mcheck_cpu_init_generic(); | 1781 | __mcheck_cpu_init_generic(); |
1719 | __mcheck_cpu_init_vendor(c); | 1782 | __mcheck_cpu_init_vendor(c); |
1783 | __mcheck_cpu_init_clear_banks(); | ||
1720 | __mcheck_cpu_init_timer(); | 1784 | __mcheck_cpu_init_timer(); |
1721 | } | 1785 | } |
1722 | 1786 | ||
@@ -2082,7 +2146,7 @@ static void mce_disable_error_reporting(void) | |||
2082 | struct mce_bank *b = &mce_banks[i]; | 2146 | struct mce_bank *b = &mce_banks[i]; |
2083 | 2147 | ||
2084 | if (b->init) | 2148 | if (b->init) |
2085 | wrmsrl(MSR_IA32_MCx_CTL(i), 0); | 2149 | wrmsrl(msr_ops.ctl(i), 0); |
2086 | } | 2150 | } |
2087 | return; | 2151 | return; |
2088 | } | 2152 | } |
@@ -2121,6 +2185,7 @@ static void mce_syscore_resume(void) | |||
2121 | { | 2185 | { |
2122 | __mcheck_cpu_init_generic(); | 2186 | __mcheck_cpu_init_generic(); |
2123 | __mcheck_cpu_init_vendor(raw_cpu_ptr(&cpu_info)); | 2187 | __mcheck_cpu_init_vendor(raw_cpu_ptr(&cpu_info)); |
2188 | __mcheck_cpu_init_clear_banks(); | ||
2124 | } | 2189 | } |
2125 | 2190 | ||
2126 | static struct syscore_ops mce_syscore_ops = { | 2191 | static struct syscore_ops mce_syscore_ops = { |
@@ -2138,6 +2203,7 @@ static void mce_cpu_restart(void *data) | |||
2138 | if (!mce_available(raw_cpu_ptr(&cpu_info))) | 2203 | if (!mce_available(raw_cpu_ptr(&cpu_info))) |
2139 | return; | 2204 | return; |
2140 | __mcheck_cpu_init_generic(); | 2205 | __mcheck_cpu_init_generic(); |
2206 | __mcheck_cpu_init_clear_banks(); | ||
2141 | __mcheck_cpu_init_timer(); | 2207 | __mcheck_cpu_init_timer(); |
2142 | } | 2208 | } |
2143 | 2209 | ||
@@ -2413,7 +2479,7 @@ static void mce_reenable_cpu(void *h) | |||
2413 | struct mce_bank *b = &mce_banks[i]; | 2479 | struct mce_bank *b = &mce_banks[i]; |
2414 | 2480 | ||
2415 | if (b->init) | 2481 | if (b->init) |
2416 | wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); | 2482 | wrmsrl(msr_ops.ctl(i), b->ctl); |
2417 | } | 2483 | } |
2418 | } | 2484 | } |
2419 | 2485 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 9d656fd436ef..10b0661651e0 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
@@ -54,14 +54,6 @@ | |||
54 | /* Threshold LVT offset is at MSR0xC0000410[15:12] */ | 54 | /* Threshold LVT offset is at MSR0xC0000410[15:12] */ |
55 | #define SMCA_THR_LVT_OFF 0xF000 | 55 | #define SMCA_THR_LVT_OFF 0xF000 |
56 | 56 | ||
57 | /* | ||
58 | * OS is required to set the MCAX bit to acknowledge that it is now using the | ||
59 | * new MSR ranges and new registers under each bank. It also means that the OS | ||
60 | * will configure deferred errors in the new MCx_CONFIG register. If the bit is | ||
61 | * not set, uncorrectable errors will cause a system panic. | ||
62 | */ | ||
63 | #define SMCA_MCAX_EN_OFF 0x1 | ||
64 | |||
65 | static const char * const th_names[] = { | 57 | static const char * const th_names[] = { |
66 | "load_store", | 58 | "load_store", |
67 | "insn_fetch", | 59 | "insn_fetch", |
@@ -333,7 +325,7 @@ static u32 get_block_address(u32 current_addr, u32 low, u32 high, | |||
333 | /* Fall back to method we used for older processors: */ | 325 | /* Fall back to method we used for older processors: */ |
334 | switch (block) { | 326 | switch (block) { |
335 | case 0: | 327 | case 0: |
336 | addr = MSR_IA32_MCx_MISC(bank); | 328 | addr = msr_ops.misc(bank); |
337 | break; | 329 | break; |
338 | case 1: | 330 | case 1: |
339 | offset = ((low & MASK_BLKPTR_LO) >> 21); | 331 | offset = ((low & MASK_BLKPTR_LO) >> 21); |
@@ -351,6 +343,7 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, | |||
351 | int offset, u32 misc_high) | 343 | int offset, u32 misc_high) |
352 | { | 344 | { |
353 | unsigned int cpu = smp_processor_id(); | 345 | unsigned int cpu = smp_processor_id(); |
346 | u32 smca_low, smca_high, smca_addr; | ||
354 | struct threshold_block b; | 347 | struct threshold_block b; |
355 | int new; | 348 | int new; |
356 | 349 | ||
@@ -369,24 +362,49 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, | |||
369 | 362 | ||
370 | b.interrupt_enable = 1; | 363 | b.interrupt_enable = 1; |
371 | 364 | ||
372 | if (mce_flags.smca) { | 365 | if (!mce_flags.smca) { |
373 | u32 smca_low, smca_high; | 366 | new = (misc_high & MASK_LVTOFF_HI) >> 20; |
374 | u32 smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank); | 367 | goto set_offset; |
368 | } | ||
375 | 369 | ||
376 | if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) { | 370 | smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank); |
377 | smca_high |= SMCA_MCAX_EN_OFF; | ||
378 | wrmsr(smca_addr, smca_low, smca_high); | ||
379 | } | ||
380 | 371 | ||
381 | /* Gather LVT offset for thresholding: */ | 372 | if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) { |
382 | if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high)) | 373 | /* |
383 | goto out; | 374 | * OS is required to set the MCAX bit to acknowledge that it is |
375 | * now using the new MSR ranges and new registers under each | ||
376 | * bank. It also means that the OS will configure deferred | ||
377 | * errors in the new MCx_CONFIG register. If the bit is not set, | ||
378 | * uncorrectable errors will cause a system panic. | ||
379 | * | ||
380 | * MCA_CONFIG[MCAX] is bit 32 (0 in the high portion of the MSR.) | ||
381 | */ | ||
382 | smca_high |= BIT(0); | ||
384 | 383 | ||
385 | new = (smca_low & SMCA_THR_LVT_OFF) >> 12; | 384 | /* |
386 | } else { | 385 | * SMCA logs Deferred Error information in MCA_DE{STAT,ADDR} |
387 | new = (misc_high & MASK_LVTOFF_HI) >> 20; | 386 | * registers with the option of additionally logging to |
387 | * MCA_{STATUS,ADDR} if MCA_CONFIG[LogDeferredInMcaStat] is set. | ||
388 | * | ||
389 | * This bit is usually set by BIOS to retain the old behavior | ||
390 | * for OSes that don't use the new registers. Linux supports the | ||
391 | * new registers so let's disable that additional logging here. | ||
392 | * | ||
393 | * MCA_CONFIG[LogDeferredInMcaStat] is bit 34 (bit 2 in the high | ||
394 | * portion of the MSR). | ||
395 | */ | ||
396 | smca_high &= ~BIT(2); | ||
397 | |||
398 | wrmsr(smca_addr, smca_low, smca_high); | ||
388 | } | 399 | } |
389 | 400 | ||
401 | /* Gather LVT offset for thresholding: */ | ||
402 | if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high)) | ||
403 | goto out; | ||
404 | |||
405 | new = (smca_low & SMCA_THR_LVT_OFF) >> 12; | ||
406 | |||
407 | set_offset: | ||
390 | offset = setup_APIC_mce_threshold(offset, new); | 408 | offset = setup_APIC_mce_threshold(offset, new); |
391 | 409 | ||
392 | if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt)) | 410 | if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt)) |
@@ -430,12 +448,23 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
430 | deferred_error_interrupt_enable(c); | 448 | deferred_error_interrupt_enable(c); |
431 | } | 449 | } |
432 | 450 | ||
433 | static void __log_error(unsigned int bank, bool threshold_err, u64 misc) | 451 | static void |
452 | __log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc) | ||
434 | { | 453 | { |
454 | u32 msr_status = msr_ops.status(bank); | ||
455 | u32 msr_addr = msr_ops.addr(bank); | ||
435 | struct mce m; | 456 | struct mce m; |
436 | u64 status; | 457 | u64 status; |
437 | 458 | ||
438 | rdmsrl(MSR_IA32_MCx_STATUS(bank), status); | 459 | WARN_ON_ONCE(deferred_err && threshold_err); |
460 | |||
461 | if (deferred_err && mce_flags.smca) { | ||
462 | msr_status = MSR_AMD64_SMCA_MCx_DESTAT(bank); | ||
463 | msr_addr = MSR_AMD64_SMCA_MCx_DEADDR(bank); | ||
464 | } | ||
465 | |||
466 | rdmsrl(msr_status, status); | ||
467 | |||
439 | if (!(status & MCI_STATUS_VAL)) | 468 | if (!(status & MCI_STATUS_VAL)) |
440 | return; | 469 | return; |
441 | 470 | ||
@@ -448,10 +477,11 @@ static void __log_error(unsigned int bank, bool threshold_err, u64 misc) | |||
448 | m.misc = misc; | 477 | m.misc = misc; |
449 | 478 | ||
450 | if (m.status & MCI_STATUS_ADDRV) | 479 | if (m.status & MCI_STATUS_ADDRV) |
451 | rdmsrl(MSR_IA32_MCx_ADDR(bank), m.addr); | 480 | rdmsrl(msr_addr, m.addr); |
452 | 481 | ||
453 | mce_log(&m); | 482 | mce_log(&m); |
454 | wrmsrl(MSR_IA32_MCx_STATUS(bank), 0); | 483 | |
484 | wrmsrl(msr_status, 0); | ||
455 | } | 485 | } |
456 | 486 | ||
457 | static inline void __smp_deferred_error_interrupt(void) | 487 | static inline void __smp_deferred_error_interrupt(void) |
@@ -479,17 +509,21 @@ asmlinkage __visible void smp_trace_deferred_error_interrupt(void) | |||
479 | /* APIC interrupt handler for deferred errors */ | 509 | /* APIC interrupt handler for deferred errors */ |
480 | static void amd_deferred_error_interrupt(void) | 510 | static void amd_deferred_error_interrupt(void) |
481 | { | 511 | { |
482 | u64 status; | ||
483 | unsigned int bank; | 512 | unsigned int bank; |
513 | u32 msr_status; | ||
514 | u64 status; | ||
484 | 515 | ||
485 | for (bank = 0; bank < mca_cfg.banks; ++bank) { | 516 | for (bank = 0; bank < mca_cfg.banks; ++bank) { |
486 | rdmsrl(MSR_IA32_MCx_STATUS(bank), status); | 517 | msr_status = (mce_flags.smca) ? MSR_AMD64_SMCA_MCx_DESTAT(bank) |
518 | : msr_ops.status(bank); | ||
519 | |||
520 | rdmsrl(msr_status, status); | ||
487 | 521 | ||
488 | if (!(status & MCI_STATUS_VAL) || | 522 | if (!(status & MCI_STATUS_VAL) || |
489 | !(status & MCI_STATUS_DEFERRED)) | 523 | !(status & MCI_STATUS_DEFERRED)) |
490 | continue; | 524 | continue; |
491 | 525 | ||
492 | __log_error(bank, false, 0); | 526 | __log_error(bank, true, false, 0); |
493 | break; | 527 | break; |
494 | } | 528 | } |
495 | } | 529 | } |
@@ -544,7 +578,7 @@ static void amd_threshold_interrupt(void) | |||
544 | return; | 578 | return; |
545 | 579 | ||
546 | log: | 580 | log: |
547 | __log_error(bank, true, ((u64)high << 32) | low); | 581 | __log_error(bank, false, true, ((u64)high << 32) | low); |
548 | } | 582 | } |
549 | 583 | ||
550 | /* | 584 | /* |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 1e8bb6c94f14..1defb8ea882c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c | |||
@@ -84,7 +84,7 @@ static int cmci_supported(int *banks) | |||
84 | */ | 84 | */ |
85 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) | 85 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) |
86 | return 0; | 86 | return 0; |
87 | if (!cpu_has_apic || lapic_get_maxlvt() < 6) | 87 | if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6) |
88 | return 0; | 88 | return 0; |
89 | rdmsrl(MSR_IA32_MCG_CAP, cap); | 89 | rdmsrl(MSR_IA32_MCG_CAP, cap); |
90 | *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff); | 90 | *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff); |
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index ac780cad3b86..6b9dc4d18ccc 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -450,7 +450,7 @@ asmlinkage __visible void smp_trace_thermal_interrupt(struct pt_regs *regs) | |||
450 | /* Thermal monitoring depends on APIC, ACPI and clock modulation */ | 450 | /* Thermal monitoring depends on APIC, ACPI and clock modulation */ |
451 | static int intel_thermal_supported(struct cpuinfo_x86 *c) | 451 | static int intel_thermal_supported(struct cpuinfo_x86 *c) |
452 | { | 452 | { |
453 | if (!cpu_has_apic) | 453 | if (!boot_cpu_has(X86_FEATURE_APIC)) |
454 | return 0; | 454 | return 0; |
455 | if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) | 455 | if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) |
456 | return 0; | 456 | return 0; |
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index cbb3cf09b065..65cbbcd48fe4 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c | |||
@@ -422,7 +422,7 @@ static void show_saved_mc(void) | |||
422 | data_size = get_datasize(mc_saved_header); | 422 | data_size = get_datasize(mc_saved_header); |
423 | date = mc_saved_header->date; | 423 | date = mc_saved_header->date; |
424 | 424 | ||
425 | pr_debug("mc_saved[%d]: sig=0x%x, pf=0x%x, rev=0x%x, toal size=0x%x, date = %04x-%02x-%02x\n", | 425 | pr_debug("mc_saved[%d]: sig=0x%x, pf=0x%x, rev=0x%x, total size=0x%x, date = %04x-%02x-%02x\n", |
426 | i, sig, pf, rev, total_size, | 426 | i, sig, pf, rev, total_size, |
427 | date & 0xffff, | 427 | date & 0xffff, |
428 | date >> 24, | 428 | date >> 24, |
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index f1bed301bdb2..16e37a2581ac 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c | |||
@@ -444,11 +444,24 @@ static void __init print_mtrr_state(void) | |||
444 | pr_debug("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20); | 444 | pr_debug("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20); |
445 | } | 445 | } |
446 | 446 | ||
447 | /* PAT setup for BP. We need to go through sync steps here */ | ||
448 | void __init mtrr_bp_pat_init(void) | ||
449 | { | ||
450 | unsigned long flags; | ||
451 | |||
452 | local_irq_save(flags); | ||
453 | prepare_set(); | ||
454 | |||
455 | pat_init(); | ||
456 | |||
457 | post_set(); | ||
458 | local_irq_restore(flags); | ||
459 | } | ||
460 | |||
447 | /* Grab all of the MTRR state for this CPU into *state */ | 461 | /* Grab all of the MTRR state for this CPU into *state */ |
448 | bool __init get_mtrr_state(void) | 462 | bool __init get_mtrr_state(void) |
449 | { | 463 | { |
450 | struct mtrr_var_range *vrs; | 464 | struct mtrr_var_range *vrs; |
451 | unsigned long flags; | ||
452 | unsigned lo, dummy; | 465 | unsigned lo, dummy; |
453 | unsigned int i; | 466 | unsigned int i; |
454 | 467 | ||
@@ -481,15 +494,6 @@ bool __init get_mtrr_state(void) | |||
481 | 494 | ||
482 | mtrr_state_set = 1; | 495 | mtrr_state_set = 1; |
483 | 496 | ||
484 | /* PAT setup for BP. We need to go through sync steps here */ | ||
485 | local_irq_save(flags); | ||
486 | prepare_set(); | ||
487 | |||
488 | pat_init(); | ||
489 | |||
490 | post_set(); | ||
491 | local_irq_restore(flags); | ||
492 | |||
493 | return !!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED); | 497 | return !!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED); |
494 | } | 498 | } |
495 | 499 | ||
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 10f8d4796240..7d393ecdeee6 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
@@ -752,6 +752,9 @@ void __init mtrr_bp_init(void) | |||
752 | /* BIOS may override */ | 752 | /* BIOS may override */ |
753 | __mtrr_enabled = get_mtrr_state(); | 753 | __mtrr_enabled = get_mtrr_state(); |
754 | 754 | ||
755 | if (mtrr_enabled()) | ||
756 | mtrr_bp_pat_init(); | ||
757 | |||
755 | if (mtrr_cleanup(phys_addr)) { | 758 | if (mtrr_cleanup(phys_addr)) { |
756 | changed_by_mtrr_cleanup = 1; | 759 | changed_by_mtrr_cleanup = 1; |
757 | mtrr_if->set_all(); | 760 | mtrr_if->set_all(); |
@@ -759,8 +762,16 @@ void __init mtrr_bp_init(void) | |||
759 | } | 762 | } |
760 | } | 763 | } |
761 | 764 | ||
762 | if (!mtrr_enabled()) | 765 | if (!mtrr_enabled()) { |
763 | pr_info("MTRR: Disabled\n"); | 766 | pr_info("MTRR: Disabled\n"); |
767 | |||
768 | /* | ||
769 | * PAT initialization relies on MTRR's rendezvous handler. | ||
770 | * Skip PAT init until the handler can initialize both | ||
771 | * features independently. | ||
772 | */ | ||
773 | pat_disable("MTRRs disabled, skipping PAT initialization too."); | ||
774 | } | ||
764 | } | 775 | } |
765 | 776 | ||
766 | void mtrr_ap_init(void) | 777 | void mtrr_ap_init(void) |
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index 951884dcc433..6c7ced07d16d 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h | |||
@@ -52,6 +52,7 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); | |||
52 | void fill_mtrr_var_range(unsigned int index, | 52 | void fill_mtrr_var_range(unsigned int index, |
53 | u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); | 53 | u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); |
54 | bool get_mtrr_state(void); | 54 | bool get_mtrr_state(void); |
55 | void mtrr_bp_pat_init(void); | ||
55 | 56 | ||
56 | extern void set_mtrr_ops(const struct mtrr_ops *ops); | 57 | extern void set_mtrr_ops(const struct mtrr_ops *ops); |
57 | 58 | ||
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 1f4acd68b98b..3fe45f84ced4 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c | |||
@@ -151,7 +151,7 @@ static void __init dtb_lapic_setup(void) | |||
151 | return; | 151 | return; |
152 | 152 | ||
153 | /* Did the boot loader setup the local APIC ? */ | 153 | /* Did the boot loader setup the local APIC ? */ |
154 | if (!cpu_has_apic) { | 154 | if (!boot_cpu_has(X86_FEATURE_APIC)) { |
155 | if (apic_force_enable(r.start)) | 155 | if (apic_force_enable(r.start)) |
156 | return; | 156 | return; |
157 | } | 157 | } |
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 8efa57a5f29e..2bb25c3fe2e8 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c | |||
@@ -260,19 +260,12 @@ int __die(const char *str, struct pt_regs *regs, long err) | |||
260 | unsigned long sp; | 260 | unsigned long sp; |
261 | #endif | 261 | #endif |
262 | printk(KERN_DEFAULT | 262 | printk(KERN_DEFAULT |
263 | "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); | 263 | "%s: %04lx [#%d]%s%s%s%s\n", str, err & 0xffff, ++die_counter, |
264 | #ifdef CONFIG_PREEMPT | 264 | IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "", |
265 | printk("PREEMPT "); | 265 | IS_ENABLED(CONFIG_SMP) ? " SMP" : "", |
266 | #endif | 266 | debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "", |
267 | #ifdef CONFIG_SMP | 267 | IS_ENABLED(CONFIG_KASAN) ? " KASAN" : ""); |
268 | printk("SMP "); | 268 | |
269 | #endif | ||
270 | if (debug_pagealloc_enabled()) | ||
271 | printk("DEBUG_PAGEALLOC "); | ||
272 | #ifdef CONFIG_KASAN | ||
273 | printk("KASAN"); | ||
274 | #endif | ||
275 | printk("\n"); | ||
276 | if (notify_die(DIE_OOPS, str, regs, err, | 269 | if (notify_die(DIE_OOPS, str, regs, err, |
277 | current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP) | 270 | current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP) |
278 | return 1; | 271 | return 1; |
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/ebda.c index 992f442ca155..afe65dffee80 100644 --- a/arch/x86/kernel/head.c +++ b/arch/x86/kernel/ebda.c | |||
@@ -38,7 +38,7 @@ void __init reserve_ebda_region(void) | |||
38 | * that the paravirt case can handle memory setup | 38 | * that the paravirt case can handle memory setup |
39 | * correctly, without our help. | 39 | * correctly, without our help. |
40 | */ | 40 | */ |
41 | if (paravirt_enabled()) | 41 | if (!x86_platform.legacy.ebda_search) |
42 | return; | 42 | return; |
43 | 43 | ||
44 | /* end of low (conventional) memory */ | 44 | /* end of low (conventional) memory */ |
diff --git a/arch/x86/kernel/fpu/bugs.c b/arch/x86/kernel/fpu/bugs.c index dd9ca9b60ff3..aad34aafc0e0 100644 --- a/arch/x86/kernel/fpu/bugs.c +++ b/arch/x86/kernel/fpu/bugs.c | |||
@@ -21,11 +21,15 @@ static double __initdata y = 3145727.0; | |||
21 | * We should really only care about bugs here | 21 | * We should really only care about bugs here |
22 | * anyway. Not features. | 22 | * anyway. Not features. |
23 | */ | 23 | */ |
24 | static void __init check_fpu(void) | 24 | void __init fpu__init_check_bugs(void) |
25 | { | 25 | { |
26 | u32 cr0_saved; | 26 | u32 cr0_saved; |
27 | s32 fdiv_bug; | 27 | s32 fdiv_bug; |
28 | 28 | ||
29 | /* kernel_fpu_begin/end() relies on patched alternative instructions. */ | ||
30 | if (!boot_cpu_has(X86_FEATURE_FPU)) | ||
31 | return; | ||
32 | |||
29 | /* We might have CR0::TS set already, clear it: */ | 33 | /* We might have CR0::TS set already, clear it: */ |
30 | cr0_saved = read_cr0(); | 34 | cr0_saved = read_cr0(); |
31 | write_cr0(cr0_saved & ~X86_CR0_TS); | 35 | write_cr0(cr0_saved & ~X86_CR0_TS); |
@@ -59,13 +63,3 @@ static void __init check_fpu(void) | |||
59 | pr_warn("Hmm, FPU with FDIV bug\n"); | 63 | pr_warn("Hmm, FPU with FDIV bug\n"); |
60 | } | 64 | } |
61 | } | 65 | } |
62 | |||
63 | void __init fpu__init_check_bugs(void) | ||
64 | { | ||
65 | /* | ||
66 | * kernel_fpu_begin/end() in check_fpu() relies on the patched | ||
67 | * alternative instructions. | ||
68 | */ | ||
69 | if (cpu_has_fpu) | ||
70 | check_fpu(); | ||
71 | } | ||
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 8e37cc8a539a..97027545a72d 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c | |||
@@ -217,14 +217,14 @@ static inline void fpstate_init_fstate(struct fregs_state *fp) | |||
217 | 217 | ||
218 | void fpstate_init(union fpregs_state *state) | 218 | void fpstate_init(union fpregs_state *state) |
219 | { | 219 | { |
220 | if (!cpu_has_fpu) { | 220 | if (!static_cpu_has(X86_FEATURE_FPU)) { |
221 | fpstate_init_soft(&state->soft); | 221 | fpstate_init_soft(&state->soft); |
222 | return; | 222 | return; |
223 | } | 223 | } |
224 | 224 | ||
225 | memset(state, 0, xstate_size); | 225 | memset(state, 0, xstate_size); |
226 | 226 | ||
227 | if (cpu_has_fxsr) | 227 | if (static_cpu_has(X86_FEATURE_FXSR)) |
228 | fpstate_init_fxstate(&state->fxsave); | 228 | fpstate_init_fxstate(&state->fxsave); |
229 | else | 229 | else |
230 | fpstate_init_fstate(&state->fsave); | 230 | fpstate_init_fstate(&state->fsave); |
@@ -237,7 +237,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) | |||
237 | dst_fpu->fpregs_active = 0; | 237 | dst_fpu->fpregs_active = 0; |
238 | dst_fpu->last_cpu = -1; | 238 | dst_fpu->last_cpu = -1; |
239 | 239 | ||
240 | if (!src_fpu->fpstate_active || !cpu_has_fpu) | 240 | if (!src_fpu->fpstate_active || !static_cpu_has(X86_FEATURE_FPU)) |
241 | return 0; | 241 | return 0; |
242 | 242 | ||
243 | WARN_ON_FPU(src_fpu != ¤t->thread.fpu); | 243 | WARN_ON_FPU(src_fpu != ¤t->thread.fpu); |
@@ -506,33 +506,6 @@ void fpu__clear(struct fpu *fpu) | |||
506 | * x87 math exception handling: | 506 | * x87 math exception handling: |
507 | */ | 507 | */ |
508 | 508 | ||
509 | static inline unsigned short get_fpu_cwd(struct fpu *fpu) | ||
510 | { | ||
511 | if (cpu_has_fxsr) { | ||
512 | return fpu->state.fxsave.cwd; | ||
513 | } else { | ||
514 | return (unsigned short)fpu->state.fsave.cwd; | ||
515 | } | ||
516 | } | ||
517 | |||
518 | static inline unsigned short get_fpu_swd(struct fpu *fpu) | ||
519 | { | ||
520 | if (cpu_has_fxsr) { | ||
521 | return fpu->state.fxsave.swd; | ||
522 | } else { | ||
523 | return (unsigned short)fpu->state.fsave.swd; | ||
524 | } | ||
525 | } | ||
526 | |||
527 | static inline unsigned short get_fpu_mxcsr(struct fpu *fpu) | ||
528 | { | ||
529 | if (cpu_has_xmm) { | ||
530 | return fpu->state.fxsave.mxcsr; | ||
531 | } else { | ||
532 | return MXCSR_DEFAULT; | ||
533 | } | ||
534 | } | ||
535 | |||
536 | int fpu__exception_code(struct fpu *fpu, int trap_nr) | 509 | int fpu__exception_code(struct fpu *fpu, int trap_nr) |
537 | { | 510 | { |
538 | int err; | 511 | int err; |
@@ -547,10 +520,15 @@ int fpu__exception_code(struct fpu *fpu, int trap_nr) | |||
547 | * so if this combination doesn't produce any single exception, | 520 | * so if this combination doesn't produce any single exception, |
548 | * then we have a bad program that isn't synchronizing its FPU usage | 521 | * then we have a bad program that isn't synchronizing its FPU usage |
549 | * and it will suffer the consequences since we won't be able to | 522 | * and it will suffer the consequences since we won't be able to |
550 | * fully reproduce the context of the exception | 523 | * fully reproduce the context of the exception. |
551 | */ | 524 | */ |
552 | cwd = get_fpu_cwd(fpu); | 525 | if (boot_cpu_has(X86_FEATURE_FXSR)) { |
553 | swd = get_fpu_swd(fpu); | 526 | cwd = fpu->state.fxsave.cwd; |
527 | swd = fpu->state.fxsave.swd; | ||
528 | } else { | ||
529 | cwd = (unsigned short)fpu->state.fsave.cwd; | ||
530 | swd = (unsigned short)fpu->state.fsave.swd; | ||
531 | } | ||
554 | 532 | ||
555 | err = swd & ~cwd; | 533 | err = swd & ~cwd; |
556 | } else { | 534 | } else { |
@@ -560,7 +538,11 @@ int fpu__exception_code(struct fpu *fpu, int trap_nr) | |||
560 | * unmasked exception was caught we must mask the exception mask bits | 538 | * unmasked exception was caught we must mask the exception mask bits |
561 | * at 0x1f80, and then use these to mask the exception bits at 0x3f. | 539 | * at 0x1f80, and then use these to mask the exception bits at 0x3f. |
562 | */ | 540 | */ |
563 | unsigned short mxcsr = get_fpu_mxcsr(fpu); | 541 | unsigned short mxcsr = MXCSR_DEFAULT; |
542 | |||
543 | if (boot_cpu_has(X86_FEATURE_XMM)) | ||
544 | mxcsr = fpu->state.fxsave.mxcsr; | ||
545 | |||
564 | err = ~(mxcsr >> 7) & mxcsr; | 546 | err = ~(mxcsr >> 7) & mxcsr; |
565 | } | 547 | } |
566 | 548 | ||
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 54c86fffbf9f..aacfd7a82cec 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c | |||
@@ -29,22 +29,22 @@ static void fpu__init_cpu_generic(void) | |||
29 | unsigned long cr0; | 29 | unsigned long cr0; |
30 | unsigned long cr4_mask = 0; | 30 | unsigned long cr4_mask = 0; |
31 | 31 | ||
32 | if (cpu_has_fxsr) | 32 | if (boot_cpu_has(X86_FEATURE_FXSR)) |
33 | cr4_mask |= X86_CR4_OSFXSR; | 33 | cr4_mask |= X86_CR4_OSFXSR; |
34 | if (cpu_has_xmm) | 34 | if (boot_cpu_has(X86_FEATURE_XMM)) |
35 | cr4_mask |= X86_CR4_OSXMMEXCPT; | 35 | cr4_mask |= X86_CR4_OSXMMEXCPT; |
36 | if (cr4_mask) | 36 | if (cr4_mask) |
37 | cr4_set_bits(cr4_mask); | 37 | cr4_set_bits(cr4_mask); |
38 | 38 | ||
39 | cr0 = read_cr0(); | 39 | cr0 = read_cr0(); |
40 | cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */ | 40 | cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */ |
41 | if (!cpu_has_fpu) | 41 | if (!boot_cpu_has(X86_FEATURE_FPU)) |
42 | cr0 |= X86_CR0_EM; | 42 | cr0 |= X86_CR0_EM; |
43 | write_cr0(cr0); | 43 | write_cr0(cr0); |
44 | 44 | ||
45 | /* Flush out any pending x87 state: */ | 45 | /* Flush out any pending x87 state: */ |
46 | #ifdef CONFIG_MATH_EMULATION | 46 | #ifdef CONFIG_MATH_EMULATION |
47 | if (!cpu_has_fpu) | 47 | if (!boot_cpu_has(X86_FEATURE_FPU)) |
48 | fpstate_init_soft(¤t->thread.fpu.state.soft); | 48 | fpstate_init_soft(¤t->thread.fpu.state.soft); |
49 | else | 49 | else |
50 | #endif | 50 | #endif |
@@ -89,7 +89,7 @@ static void fpu__init_system_early_generic(struct cpuinfo_x86 *c) | |||
89 | } | 89 | } |
90 | 90 | ||
91 | #ifndef CONFIG_MATH_EMULATION | 91 | #ifndef CONFIG_MATH_EMULATION |
92 | if (!cpu_has_fpu) { | 92 | if (!boot_cpu_has(X86_FEATURE_FPU)) { |
93 | pr_emerg("x86/fpu: Giving up, no FPU found and no math emulation present\n"); | 93 | pr_emerg("x86/fpu: Giving up, no FPU found and no math emulation present\n"); |
94 | for (;;) | 94 | for (;;) |
95 | asm volatile("hlt"); | 95 | asm volatile("hlt"); |
@@ -106,7 +106,7 @@ static void __init fpu__init_system_mxcsr(void) | |||
106 | { | 106 | { |
107 | unsigned int mask = 0; | 107 | unsigned int mask = 0; |
108 | 108 | ||
109 | if (cpu_has_fxsr) { | 109 | if (boot_cpu_has(X86_FEATURE_FXSR)) { |
110 | /* Static because GCC does not get 16-byte stack alignment right: */ | 110 | /* Static because GCC does not get 16-byte stack alignment right: */ |
111 | static struct fxregs_state fxregs __initdata; | 111 | static struct fxregs_state fxregs __initdata; |
112 | 112 | ||
@@ -212,7 +212,7 @@ static void __init fpu__init_system_xstate_size_legacy(void) | |||
212 | * fpu__init_system_xstate(). | 212 | * fpu__init_system_xstate(). |
213 | */ | 213 | */ |
214 | 214 | ||
215 | if (!cpu_has_fpu) { | 215 | if (!boot_cpu_has(X86_FEATURE_FPU)) { |
216 | /* | 216 | /* |
217 | * Disable xsave as we do not support it if i387 | 217 | * Disable xsave as we do not support it if i387 |
218 | * emulation is enabled. | 218 | * emulation is enabled. |
@@ -221,7 +221,7 @@ static void __init fpu__init_system_xstate_size_legacy(void) | |||
221 | setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); | 221 | setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); |
222 | xstate_size = sizeof(struct swregs_state); | 222 | xstate_size = sizeof(struct swregs_state); |
223 | } else { | 223 | } else { |
224 | if (cpu_has_fxsr) | 224 | if (boot_cpu_has(X86_FEATURE_FXSR)) |
225 | xstate_size = sizeof(struct fxregs_state); | 225 | xstate_size = sizeof(struct fxregs_state); |
226 | else | 226 | else |
227 | xstate_size = sizeof(struct fregs_state); | 227 | xstate_size = sizeof(struct fregs_state); |
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 8bd1c003942a..81422dfb152b 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c | |||
@@ -21,7 +21,10 @@ int regset_xregset_fpregs_active(struct task_struct *target, const struct user_r | |||
21 | { | 21 | { |
22 | struct fpu *target_fpu = &target->thread.fpu; | 22 | struct fpu *target_fpu = &target->thread.fpu; |
23 | 23 | ||
24 | return (cpu_has_fxsr && target_fpu->fpstate_active) ? regset->n : 0; | 24 | if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->fpstate_active) |
25 | return regset->n; | ||
26 | else | ||
27 | return 0; | ||
25 | } | 28 | } |
26 | 29 | ||
27 | int xfpregs_get(struct task_struct *target, const struct user_regset *regset, | 30 | int xfpregs_get(struct task_struct *target, const struct user_regset *regset, |
@@ -30,7 +33,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, | |||
30 | { | 33 | { |
31 | struct fpu *fpu = &target->thread.fpu; | 34 | struct fpu *fpu = &target->thread.fpu; |
32 | 35 | ||
33 | if (!cpu_has_fxsr) | 36 | if (!boot_cpu_has(X86_FEATURE_FXSR)) |
34 | return -ENODEV; | 37 | return -ENODEV; |
35 | 38 | ||
36 | fpu__activate_fpstate_read(fpu); | 39 | fpu__activate_fpstate_read(fpu); |
@@ -47,7 +50,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
47 | struct fpu *fpu = &target->thread.fpu; | 50 | struct fpu *fpu = &target->thread.fpu; |
48 | int ret; | 51 | int ret; |
49 | 52 | ||
50 | if (!cpu_has_fxsr) | 53 | if (!boot_cpu_has(X86_FEATURE_FXSR)) |
51 | return -ENODEV; | 54 | return -ENODEV; |
52 | 55 | ||
53 | fpu__activate_fpstate_write(fpu); | 56 | fpu__activate_fpstate_write(fpu); |
@@ -65,7 +68,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
65 | * update the header bits in the xsave header, indicating the | 68 | * update the header bits in the xsave header, indicating the |
66 | * presence of FP and SSE state. | 69 | * presence of FP and SSE state. |
67 | */ | 70 | */ |
68 | if (cpu_has_xsave) | 71 | if (boot_cpu_has(X86_FEATURE_XSAVE)) |
69 | fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FPSSE; | 72 | fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FPSSE; |
70 | 73 | ||
71 | return ret; | 74 | return ret; |
@@ -79,7 +82,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset, | |||
79 | struct xregs_state *xsave; | 82 | struct xregs_state *xsave; |
80 | int ret; | 83 | int ret; |
81 | 84 | ||
82 | if (!cpu_has_xsave) | 85 | if (!boot_cpu_has(X86_FEATURE_XSAVE)) |
83 | return -ENODEV; | 86 | return -ENODEV; |
84 | 87 | ||
85 | fpu__activate_fpstate_read(fpu); | 88 | fpu__activate_fpstate_read(fpu); |
@@ -108,7 +111,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, | |||
108 | struct xregs_state *xsave; | 111 | struct xregs_state *xsave; |
109 | int ret; | 112 | int ret; |
110 | 113 | ||
111 | if (!cpu_has_xsave) | 114 | if (!boot_cpu_has(X86_FEATURE_XSAVE)) |
112 | return -ENODEV; | 115 | return -ENODEV; |
113 | 116 | ||
114 | fpu__activate_fpstate_write(fpu); | 117 | fpu__activate_fpstate_write(fpu); |
@@ -275,10 +278,10 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, | |||
275 | 278 | ||
276 | fpu__activate_fpstate_read(fpu); | 279 | fpu__activate_fpstate_read(fpu); |
277 | 280 | ||
278 | if (!static_cpu_has(X86_FEATURE_FPU)) | 281 | if (!boot_cpu_has(X86_FEATURE_FPU)) |
279 | return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); | 282 | return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); |
280 | 283 | ||
281 | if (!cpu_has_fxsr) | 284 | if (!boot_cpu_has(X86_FEATURE_FXSR)) |
282 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, | 285 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, |
283 | &fpu->state.fsave, 0, | 286 | &fpu->state.fsave, 0, |
284 | -1); | 287 | -1); |
@@ -306,10 +309,10 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
306 | fpu__activate_fpstate_write(fpu); | 309 | fpu__activate_fpstate_write(fpu); |
307 | fpstate_sanitize_xstate(fpu); | 310 | fpstate_sanitize_xstate(fpu); |
308 | 311 | ||
309 | if (!static_cpu_has(X86_FEATURE_FPU)) | 312 | if (!boot_cpu_has(X86_FEATURE_FPU)) |
310 | return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); | 313 | return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); |
311 | 314 | ||
312 | if (!cpu_has_fxsr) | 315 | if (!boot_cpu_has(X86_FEATURE_FXSR)) |
313 | return user_regset_copyin(&pos, &count, &kbuf, &ubuf, | 316 | return user_regset_copyin(&pos, &count, &kbuf, &ubuf, |
314 | &fpu->state.fsave, 0, | 317 | &fpu->state.fsave, 0, |
315 | -1); | 318 | -1); |
@@ -325,7 +328,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
325 | * update the header bit in the xsave header, indicating the | 328 | * update the header bit in the xsave header, indicating the |
326 | * presence of FP. | 329 | * presence of FP. |
327 | */ | 330 | */ |
328 | if (cpu_has_xsave) | 331 | if (boot_cpu_has(X86_FEATURE_XSAVE)) |
329 | fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FP; | 332 | fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FP; |
330 | return ret; | 333 | return ret; |
331 | } | 334 | } |
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index b48ef35b28d4..4ea2a59483c7 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c | |||
@@ -190,7 +190,7 @@ void fpstate_sanitize_xstate(struct fpu *fpu) | |||
190 | */ | 190 | */ |
191 | void fpu__init_cpu_xstate(void) | 191 | void fpu__init_cpu_xstate(void) |
192 | { | 192 | { |
193 | if (!cpu_has_xsave || !xfeatures_mask) | 193 | if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask) |
194 | return; | 194 | return; |
195 | 195 | ||
196 | cr4_set_bits(X86_CR4_OSXSAVE); | 196 | cr4_set_bits(X86_CR4_OSXSAVE); |
@@ -280,7 +280,7 @@ static void __init setup_xstate_comp(void) | |||
280 | xstate_comp_offsets[0] = 0; | 280 | xstate_comp_offsets[0] = 0; |
281 | xstate_comp_offsets[1] = offsetof(struct fxregs_state, xmm_space); | 281 | xstate_comp_offsets[1] = offsetof(struct fxregs_state, xmm_space); |
282 | 282 | ||
283 | if (!cpu_has_xsaves) { | 283 | if (!boot_cpu_has(X86_FEATURE_XSAVES)) { |
284 | for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { | 284 | for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { |
285 | if (xfeature_enabled(i)) { | 285 | if (xfeature_enabled(i)) { |
286 | xstate_comp_offsets[i] = xstate_offsets[i]; | 286 | xstate_comp_offsets[i] = xstate_offsets[i]; |
@@ -316,13 +316,13 @@ static void __init setup_init_fpu_buf(void) | |||
316 | WARN_ON_FPU(!on_boot_cpu); | 316 | WARN_ON_FPU(!on_boot_cpu); |
317 | on_boot_cpu = 0; | 317 | on_boot_cpu = 0; |
318 | 318 | ||
319 | if (!cpu_has_xsave) | 319 | if (!boot_cpu_has(X86_FEATURE_XSAVE)) |
320 | return; | 320 | return; |
321 | 321 | ||
322 | setup_xstate_features(); | 322 | setup_xstate_features(); |
323 | print_xstate_features(); | 323 | print_xstate_features(); |
324 | 324 | ||
325 | if (cpu_has_xsaves) { | 325 | if (boot_cpu_has(X86_FEATURE_XSAVES)) { |
326 | init_fpstate.xsave.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask; | 326 | init_fpstate.xsave.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask; |
327 | init_fpstate.xsave.header.xfeatures = xfeatures_mask; | 327 | init_fpstate.xsave.header.xfeatures = xfeatures_mask; |
328 | } | 328 | } |
@@ -417,7 +417,7 @@ static int xfeature_size(int xfeature_nr) | |||
417 | */ | 417 | */ |
418 | static int using_compacted_format(void) | 418 | static int using_compacted_format(void) |
419 | { | 419 | { |
420 | return cpu_has_xsaves; | 420 | return boot_cpu_has(X86_FEATURE_XSAVES); |
421 | } | 421 | } |
422 | 422 | ||
423 | static void __xstate_dump_leaves(void) | 423 | static void __xstate_dump_leaves(void) |
@@ -549,7 +549,7 @@ static unsigned int __init calculate_xstate_size(void) | |||
549 | unsigned int eax, ebx, ecx, edx; | 549 | unsigned int eax, ebx, ecx, edx; |
550 | unsigned int calculated_xstate_size; | 550 | unsigned int calculated_xstate_size; |
551 | 551 | ||
552 | if (!cpu_has_xsaves) { | 552 | if (!boot_cpu_has(X86_FEATURE_XSAVES)) { |
553 | /* | 553 | /* |
554 | * - CPUID function 0DH, sub-function 0: | 554 | * - CPUID function 0DH, sub-function 0: |
555 | * EBX enumerates the size (in bytes) required by | 555 | * EBX enumerates the size (in bytes) required by |
@@ -630,7 +630,7 @@ void __init fpu__init_system_xstate(void) | |||
630 | WARN_ON_FPU(!on_boot_cpu); | 630 | WARN_ON_FPU(!on_boot_cpu); |
631 | on_boot_cpu = 0; | 631 | on_boot_cpu = 0; |
632 | 632 | ||
633 | if (!cpu_has_xsave) { | 633 | if (!boot_cpu_has(X86_FEATURE_XSAVE)) { |
634 | pr_info("x86/fpu: Legacy x87 FPU detected.\n"); | 634 | pr_info("x86/fpu: Legacy x87 FPU detected.\n"); |
635 | return; | 635 | return; |
636 | } | 636 | } |
@@ -667,7 +667,7 @@ void __init fpu__init_system_xstate(void) | |||
667 | pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n", | 667 | pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n", |
668 | xfeatures_mask, | 668 | xfeatures_mask, |
669 | xstate_size, | 669 | xstate_size, |
670 | cpu_has_xsaves ? "compacted" : "standard"); | 670 | boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard"); |
671 | } | 671 | } |
672 | 672 | ||
673 | /* | 673 | /* |
@@ -678,7 +678,7 @@ void fpu__resume_cpu(void) | |||
678 | /* | 678 | /* |
679 | * Restore XCR0 on xsave capable CPUs: | 679 | * Restore XCR0 on xsave capable CPUs: |
680 | */ | 680 | */ |
681 | if (cpu_has_xsave) | 681 | if (boot_cpu_has(X86_FEATURE_XSAVE)) |
682 | xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask); | 682 | xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask); |
683 | } | 683 | } |
684 | 684 | ||
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 2911ef3a9f1c..d784bb547a9d 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c | |||
@@ -34,6 +34,8 @@ asmlinkage __visible void __init i386_start_kernel(void) | |||
34 | cr4_init_shadow(); | 34 | cr4_init_shadow(); |
35 | sanitize_boot_params(&boot_params); | 35 | sanitize_boot_params(&boot_params); |
36 | 36 | ||
37 | x86_early_init_platform_quirks(); | ||
38 | |||
37 | /* Call the subarch specific early setup function */ | 39 | /* Call the subarch specific early setup function */ |
38 | switch (boot_params.hdr.hardware_subarch) { | 40 | switch (boot_params.hdr.hardware_subarch) { |
39 | case X86_SUBARCH_INTEL_MID: | 41 | case X86_SUBARCH_INTEL_MID: |
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 1f4422d5c8d0..b72fb0b71dd1 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -182,6 +182,7 @@ void __init x86_64_start_reservations(char *real_mode_data) | |||
182 | if (!boot_params.hdr.version) | 182 | if (!boot_params.hdr.version) |
183 | copy_bootdata(__va(real_mode_data)); | 183 | copy_bootdata(__va(real_mode_data)); |
184 | 184 | ||
185 | x86_early_init_platform_quirks(); | ||
185 | reserve_ebda_region(); | 186 | reserve_ebda_region(); |
186 | 187 | ||
187 | switch (boot_params.hdr.hardware_subarch) { | 188 | switch (boot_params.hdr.hardware_subarch) { |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index af1112980dd4..6f8902b0d151 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -555,62 +555,53 @@ early_idt_handler_common: | |||
555 | */ | 555 | */ |
556 | cld | 556 | cld |
557 | 557 | ||
558 | cmpl $2,(%esp) # X86_TRAP_NMI | ||
559 | je .Lis_nmi # Ignore NMI | ||
560 | |||
561 | cmpl $2,%ss:early_recursion_flag | ||
562 | je hlt_loop | ||
563 | incl %ss:early_recursion_flag | 558 | incl %ss:early_recursion_flag |
564 | 559 | ||
565 | push %eax # 16(%esp) | 560 | /* The vector number is in pt_regs->gs */ |
566 | push %ecx # 12(%esp) | ||
567 | push %edx # 8(%esp) | ||
568 | push %ds # 4(%esp) | ||
569 | push %es # 0(%esp) | ||
570 | movl $(__KERNEL_DS),%eax | ||
571 | movl %eax,%ds | ||
572 | movl %eax,%es | ||
573 | |||
574 | cmpl $(__KERNEL_CS),32(%esp) | ||
575 | jne 10f | ||
576 | 561 | ||
577 | leal 28(%esp),%eax # Pointer to %eip | 562 | cld |
578 | call early_fixup_exception | 563 | pushl %fs /* pt_regs->fs */ |
579 | andl %eax,%eax | 564 | movw $0, 2(%esp) /* clear high bits (some CPUs leave garbage) */ |
580 | jnz ex_entry /* found an exception entry */ | 565 | pushl %es /* pt_regs->es */ |
581 | 566 | movw $0, 2(%esp) /* clear high bits (some CPUs leave garbage) */ | |
582 | 10: | 567 | pushl %ds /* pt_regs->ds */ |
583 | #ifdef CONFIG_PRINTK | 568 | movw $0, 2(%esp) /* clear high bits (some CPUs leave garbage) */ |
584 | xorl %eax,%eax | 569 | pushl %eax /* pt_regs->ax */ |
585 | movw %ax,2(%esp) /* clean up the segment values on some cpus */ | 570 | pushl %ebp /* pt_regs->bp */ |
586 | movw %ax,6(%esp) | 571 | pushl %edi /* pt_regs->di */ |
587 | movw %ax,34(%esp) | 572 | pushl %esi /* pt_regs->si */ |
588 | leal 40(%esp),%eax | 573 | pushl %edx /* pt_regs->dx */ |
589 | pushl %eax /* %esp before the exception */ | 574 | pushl %ecx /* pt_regs->cx */ |
590 | pushl %ebx | 575 | pushl %ebx /* pt_regs->bx */ |
591 | pushl %ebp | 576 | |
592 | pushl %esi | 577 | /* Fix up DS and ES */ |
593 | pushl %edi | 578 | movl $(__KERNEL_DS), %ecx |
594 | movl %cr2,%eax | 579 | movl %ecx, %ds |
595 | pushl %eax | 580 | movl %ecx, %es |
596 | pushl (20+6*4)(%esp) /* trapno */ | 581 | |
597 | pushl $fault_msg | 582 | /* Load the vector number into EDX */ |
598 | call printk | 583 | movl PT_GS(%esp), %edx |
599 | #endif | 584 | |
600 | call dump_stack | 585 | /* Load GS into pt_regs->gs and clear high bits */ |
601 | hlt_loop: | 586 | movw %gs, PT_GS(%esp) |
602 | hlt | 587 | movw $0, PT_GS+2(%esp) |
603 | jmp hlt_loop | 588 | |
604 | 589 | movl %esp, %eax /* args are pt_regs (EAX), trapnr (EDX) */ | |
605 | ex_entry: | 590 | call early_fixup_exception |
606 | pop %es | 591 | |
607 | pop %ds | 592 | popl %ebx /* pt_regs->bx */ |
608 | pop %edx | 593 | popl %ecx /* pt_regs->cx */ |
609 | pop %ecx | 594 | popl %edx /* pt_regs->dx */ |
610 | pop %eax | 595 | popl %esi /* pt_regs->si */ |
611 | decl %ss:early_recursion_flag | 596 | popl %edi /* pt_regs->di */ |
612 | .Lis_nmi: | 597 | popl %ebp /* pt_regs->bp */ |
613 | addl $8,%esp /* drop vector number and error code */ | 598 | popl %eax /* pt_regs->ax */ |
599 | popl %ds /* pt_regs->ds */ | ||
600 | popl %es /* pt_regs->es */ | ||
601 | popl %fs /* pt_regs->fs */ | ||
602 | popl %gs /* pt_regs->gs */ | ||
603 | decl %ss:early_recursion_flag | ||
604 | addl $4, %esp /* pop pt_regs->orig_ax */ | ||
614 | iret | 605 | iret |
615 | ENDPROC(early_idt_handler_common) | 606 | ENDPROC(early_idt_handler_common) |
616 | 607 | ||
@@ -647,10 +638,14 @@ ignore_int: | |||
647 | popl %eax | 638 | popl %eax |
648 | #endif | 639 | #endif |
649 | iret | 640 | iret |
641 | |||
642 | hlt_loop: | ||
643 | hlt | ||
644 | jmp hlt_loop | ||
650 | ENDPROC(ignore_int) | 645 | ENDPROC(ignore_int) |
651 | __INITDATA | 646 | __INITDATA |
652 | .align 4 | 647 | .align 4 |
653 | early_recursion_flag: | 648 | GLOBAL(early_recursion_flag) |
654 | .long 0 | 649 | .long 0 |
655 | 650 | ||
656 | __REFDATA | 651 | __REFDATA |
@@ -715,19 +710,6 @@ __INITRODATA | |||
715 | int_msg: | 710 | int_msg: |
716 | .asciz "Unknown interrupt or fault at: %p %p %p\n" | 711 | .asciz "Unknown interrupt or fault at: %p %p %p\n" |
717 | 712 | ||
718 | fault_msg: | ||
719 | /* fault info: */ | ||
720 | .ascii "BUG: Int %d: CR2 %p\n" | ||
721 | /* regs pushed in early_idt_handler: */ | ||
722 | .ascii " EDI %p ESI %p EBP %p EBX %p\n" | ||
723 | .ascii " ESP %p ES %p DS %p\n" | ||
724 | .ascii " EDX %p ECX %p EAX %p\n" | ||
725 | /* fault frame: */ | ||
726 | .ascii " vec %p err %p EIP %p CS %p flg %p\n" | ||
727 | .ascii "Stack: %p %p %p %p %p %p %p %p\n" | ||
728 | .ascii " %p %p %p %p %p %p %p %p\n" | ||
729 | .asciz " %p %p %p %p %p %p %p %p\n" | ||
730 | |||
731 | #include "../../x86/xen/xen-head.S" | 713 | #include "../../x86/xen/xen-head.S" |
732 | 714 | ||
733 | /* | 715 | /* |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 22fbf9df61bb..5df831ef1442 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <asm/processor-flags.h> | 20 | #include <asm/processor-flags.h> |
21 | #include <asm/percpu.h> | 21 | #include <asm/percpu.h> |
22 | #include <asm/nops.h> | 22 | #include <asm/nops.h> |
23 | #include "../entry/calling.h" | ||
23 | 24 | ||
24 | #ifdef CONFIG_PARAVIRT | 25 | #ifdef CONFIG_PARAVIRT |
25 | #include <asm/asm-offsets.h> | 26 | #include <asm/asm-offsets.h> |
@@ -64,6 +65,14 @@ startup_64: | |||
64 | * tables and then reload them. | 65 | * tables and then reload them. |
65 | */ | 66 | */ |
66 | 67 | ||
68 | /* | ||
69 | * Setup stack for verify_cpu(). "-8" because stack_start is defined | ||
70 | * this way, see below. Our best guess is a NULL ptr for stack | ||
71 | * termination heuristics and we don't want to break anything which | ||
72 | * might depend on it (kgdb, ...). | ||
73 | */ | ||
74 | leaq (__end_init_task - 8)(%rip), %rsp | ||
75 | |||
67 | /* Sanitize CPU configuration */ | 76 | /* Sanitize CPU configuration */ |
68 | call verify_cpu | 77 | call verify_cpu |
69 | 78 | ||
@@ -350,90 +359,48 @@ early_idt_handler_common: | |||
350 | */ | 359 | */ |
351 | cld | 360 | cld |
352 | 361 | ||
353 | cmpl $2,(%rsp) # X86_TRAP_NMI | ||
354 | je .Lis_nmi # Ignore NMI | ||
355 | |||
356 | cmpl $2,early_recursion_flag(%rip) | ||
357 | jz 1f | ||
358 | incl early_recursion_flag(%rip) | 362 | incl early_recursion_flag(%rip) |
359 | 363 | ||
360 | pushq %rax # 64(%rsp) | 364 | /* The vector number is currently in the pt_regs->di slot. */ |
361 | pushq %rcx # 56(%rsp) | 365 | pushq %rsi /* pt_regs->si */ |
362 | pushq %rdx # 48(%rsp) | 366 | movq 8(%rsp), %rsi /* RSI = vector number */ |
363 | pushq %rsi # 40(%rsp) | 367 | movq %rdi, 8(%rsp) /* pt_regs->di = RDI */ |
364 | pushq %rdi # 32(%rsp) | 368 | pushq %rdx /* pt_regs->dx */ |
365 | pushq %r8 # 24(%rsp) | 369 | pushq %rcx /* pt_regs->cx */ |
366 | pushq %r9 # 16(%rsp) | 370 | pushq %rax /* pt_regs->ax */ |
367 | pushq %r10 # 8(%rsp) | 371 | pushq %r8 /* pt_regs->r8 */ |
368 | pushq %r11 # 0(%rsp) | 372 | pushq %r9 /* pt_regs->r9 */ |
369 | 373 | pushq %r10 /* pt_regs->r10 */ | |
370 | cmpl $__KERNEL_CS,96(%rsp) | 374 | pushq %r11 /* pt_regs->r11 */ |
371 | jne 11f | 375 | pushq %rbx /* pt_regs->bx */ |
372 | 376 | pushq %rbp /* pt_regs->bp */ | |
373 | cmpl $14,72(%rsp) # Page fault? | 377 | pushq %r12 /* pt_regs->r12 */ |
378 | pushq %r13 /* pt_regs->r13 */ | ||
379 | pushq %r14 /* pt_regs->r14 */ | ||
380 | pushq %r15 /* pt_regs->r15 */ | ||
381 | |||
382 | cmpq $14,%rsi /* Page fault? */ | ||
374 | jnz 10f | 383 | jnz 10f |
375 | GET_CR2_INTO(%rdi) # can clobber any volatile register if pv | 384 | GET_CR2_INTO(%rdi) /* Can clobber any volatile register if pv */ |
376 | call early_make_pgtable | 385 | call early_make_pgtable |
377 | andl %eax,%eax | 386 | andl %eax,%eax |
378 | jz 20f # All good | 387 | jz 20f /* All good */ |
379 | 388 | ||
380 | 10: | 389 | 10: |
381 | leaq 88(%rsp),%rdi # Pointer to %rip | 390 | movq %rsp,%rdi /* RDI = pt_regs; RSI is already trapnr */ |
382 | call early_fixup_exception | 391 | call early_fixup_exception |
383 | andl %eax,%eax | 392 | |
384 | jnz 20f # Found an exception entry | 393 | 20: |
385 | |||
386 | 11: | ||
387 | #ifdef CONFIG_EARLY_PRINTK | ||
388 | GET_CR2_INTO(%r9) # can clobber any volatile register if pv | ||
389 | movl 80(%rsp),%r8d # error code | ||
390 | movl 72(%rsp),%esi # vector number | ||
391 | movl 96(%rsp),%edx # %cs | ||
392 | movq 88(%rsp),%rcx # %rip | ||
393 | xorl %eax,%eax | ||
394 | leaq early_idt_msg(%rip),%rdi | ||
395 | call early_printk | ||
396 | cmpl $2,early_recursion_flag(%rip) | ||
397 | jz 1f | ||
398 | call dump_stack | ||
399 | #ifdef CONFIG_KALLSYMS | ||
400 | leaq early_idt_ripmsg(%rip),%rdi | ||
401 | movq 40(%rsp),%rsi # %rip again | ||
402 | call __print_symbol | ||
403 | #endif | ||
404 | #endif /* EARLY_PRINTK */ | ||
405 | 1: hlt | ||
406 | jmp 1b | ||
407 | |||
408 | 20: # Exception table entry found or page table generated | ||
409 | popq %r11 | ||
410 | popq %r10 | ||
411 | popq %r9 | ||
412 | popq %r8 | ||
413 | popq %rdi | ||
414 | popq %rsi | ||
415 | popq %rdx | ||
416 | popq %rcx | ||
417 | popq %rax | ||
418 | decl early_recursion_flag(%rip) | 394 | decl early_recursion_flag(%rip) |
419 | .Lis_nmi: | 395 | jmp restore_regs_and_iret |
420 | addq $16,%rsp # drop vector number and error code | ||
421 | INTERRUPT_RETURN | ||
422 | ENDPROC(early_idt_handler_common) | 396 | ENDPROC(early_idt_handler_common) |
423 | 397 | ||
424 | __INITDATA | 398 | __INITDATA |
425 | 399 | ||
426 | .balign 4 | 400 | .balign 4 |
427 | early_recursion_flag: | 401 | GLOBAL(early_recursion_flag) |
428 | .long 0 | 402 | .long 0 |
429 | 403 | ||
430 | #ifdef CONFIG_EARLY_PRINTK | ||
431 | early_idt_msg: | ||
432 | .asciz "PANIC: early exception %02lx rip %lx:%lx error %lx cr2 %lx\n" | ||
433 | early_idt_ripmsg: | ||
434 | .asciz "RIP %s\n" | ||
435 | #endif /* CONFIG_EARLY_PRINTK */ | ||
436 | |||
437 | #define NEXT_PAGE(name) \ | 404 | #define NEXT_PAGE(name) \ |
438 | .balign PAGE_SIZE; \ | 405 | .balign PAGE_SIZE; \ |
439 | GLOBAL(name) | 406 | GLOBAL(name) |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index a1f0e4a5c47e..f112af7aa62e 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -54,7 +54,7 @@ struct hpet_dev { | |||
54 | char name[10]; | 54 | char name[10]; |
55 | }; | 55 | }; |
56 | 56 | ||
57 | inline struct hpet_dev *EVT_TO_HPET_DEV(struct clock_event_device *evtdev) | 57 | static inline struct hpet_dev *EVT_TO_HPET_DEV(struct clock_event_device *evtdev) |
58 | { | 58 | { |
59 | return container_of(evtdev, struct hpet_dev, evt); | 59 | return container_of(evtdev, struct hpet_dev, evt); |
60 | } | 60 | } |
@@ -773,7 +773,6 @@ static struct clocksource clocksource_hpet = { | |||
773 | .mask = HPET_MASK, | 773 | .mask = HPET_MASK, |
774 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | 774 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
775 | .resume = hpet_resume_counter, | 775 | .resume = hpet_resume_counter, |
776 | .archdata = { .vclock_mode = VCLOCK_HPET }, | ||
777 | }; | 776 | }; |
778 | 777 | ||
779 | static int hpet_clocksource_register(void) | 778 | static int hpet_clocksource_register(void) |
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index e565e0e4d216..fc25f698d792 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/cpu.h> | 13 | #include <linux/cpu.h> |
14 | #include <asm/kprobes.h> | 14 | #include <asm/kprobes.h> |
15 | #include <asm/alternative.h> | 15 | #include <asm/alternative.h> |
16 | #include <asm/text-patching.h> | ||
16 | 17 | ||
17 | #ifdef HAVE_JUMP_LABEL | 18 | #ifdef HAVE_JUMP_LABEL |
18 | 19 | ||
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index 2af478e3fd4e..f2356bda2b05 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c | |||
@@ -19,8 +19,7 @@ | |||
19 | #include <linux/kernel.h> | 19 | #include <linux/kernel.h> |
20 | #include <linux/mm.h> | 20 | #include <linux/mm.h> |
21 | #include <linux/efi.h> | 21 | #include <linux/efi.h> |
22 | #include <linux/verify_pefile.h> | 22 | #include <linux/verification.h> |
23 | #include <keys/system_keyring.h> | ||
24 | 23 | ||
25 | #include <asm/bootparam.h> | 24 | #include <asm/bootparam.h> |
26 | #include <asm/setup.h> | 25 | #include <asm/setup.h> |
@@ -529,18 +528,9 @@ static int bzImage64_cleanup(void *loader_data) | |||
529 | #ifdef CONFIG_KEXEC_BZIMAGE_VERIFY_SIG | 528 | #ifdef CONFIG_KEXEC_BZIMAGE_VERIFY_SIG |
530 | static int bzImage64_verify_sig(const char *kernel, unsigned long kernel_len) | 529 | static int bzImage64_verify_sig(const char *kernel, unsigned long kernel_len) |
531 | { | 530 | { |
532 | bool trusted; | 531 | return verify_pefile_signature(kernel, kernel_len, |
533 | int ret; | 532 | NULL, |
534 | 533 | VERIFYING_KEXEC_PE_SIGNATURE); | |
535 | ret = verify_pefile_signature(kernel, kernel_len, | ||
536 | system_trusted_keyring, | ||
537 | VERIFYING_KEXEC_PE_SIGNATURE, | ||
538 | &trusted); | ||
539 | if (ret < 0) | ||
540 | return ret; | ||
541 | if (!trusted) | ||
542 | return -EKEYREJECTED; | ||
543 | return 0; | ||
544 | } | 534 | } |
545 | #endif | 535 | #endif |
546 | 536 | ||
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 2da6ee9ae69b..04cde527d728 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include <linux/uaccess.h> | 45 | #include <linux/uaccess.h> |
46 | #include <linux/memory.h> | 46 | #include <linux/memory.h> |
47 | 47 | ||
48 | #include <asm/text-patching.h> | ||
48 | #include <asm/debugreg.h> | 49 | #include <asm/debugreg.h> |
49 | #include <asm/apicdef.h> | 50 | #include <asm/apicdef.h> |
50 | #include <asm/apic.h> | 51 | #include <asm/apic.h> |
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index ae703acb85c1..38cf7a741250 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c | |||
@@ -51,6 +51,7 @@ | |||
51 | #include <linux/ftrace.h> | 51 | #include <linux/ftrace.h> |
52 | #include <linux/frame.h> | 52 | #include <linux/frame.h> |
53 | 53 | ||
54 | #include <asm/text-patching.h> | ||
54 | #include <asm/cacheflush.h> | 55 | #include <asm/cacheflush.h> |
55 | #include <asm/desc.h> | 56 | #include <asm/desc.h> |
56 | #include <asm/pgtable.h> | 57 | #include <asm/pgtable.h> |
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 7b3b9d15c47a..4425f593f0ec 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/kallsyms.h> | 29 | #include <linux/kallsyms.h> |
30 | #include <linux/ftrace.h> | 30 | #include <linux/ftrace.h> |
31 | 31 | ||
32 | #include <asm/text-patching.h> | ||
32 | #include <asm/cacheflush.h> | 33 | #include <asm/cacheflush.h> |
33 | #include <asm/desc.h> | 34 | #include <asm/desc.h> |
34 | #include <asm/pgtable.h> | 35 | #include <asm/pgtable.h> |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index dc1207e2f193..eea2a6f72b31 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -285,14 +285,6 @@ static void __init paravirt_ops_setup(void) | |||
285 | { | 285 | { |
286 | pv_info.name = "KVM"; | 286 | pv_info.name = "KVM"; |
287 | 287 | ||
288 | /* | ||
289 | * KVM isn't paravirt in the sense of paravirt_enabled. A KVM | ||
290 | * guest kernel works like a bare metal kernel with additional | ||
291 | * features, and paravirt_enabled is about features that are | ||
292 | * missing. | ||
293 | */ | ||
294 | pv_info.paravirt_enabled = 0; | ||
295 | |||
296 | if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) | 288 | if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) |
297 | pv_cpu_ops.io_delay = kvm_io_delay; | 289 | pv_cpu_ops.io_delay = kvm_io_delay; |
298 | 290 | ||
diff --git a/arch/x86/kernel/livepatch.c b/arch/x86/kernel/livepatch.c deleted file mode 100644 index 92fc1a51f994..000000000000 --- a/arch/x86/kernel/livepatch.c +++ /dev/null | |||
@@ -1,70 +0,0 @@ | |||
1 | /* | ||
2 | * livepatch.c - x86-specific Kernel Live Patching Core | ||
3 | * | ||
4 | * Copyright (C) 2014 Seth Jennings <sjenning@redhat.com> | ||
5 | * Copyright (C) 2014 SUSE | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; either version 2 | ||
10 | * of the License, or (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, see <http://www.gnu.org/licenses/>. | ||
19 | */ | ||
20 | |||
21 | #include <linux/module.h> | ||
22 | #include <linux/uaccess.h> | ||
23 | #include <asm/elf.h> | ||
24 | #include <asm/livepatch.h> | ||
25 | |||
26 | /** | ||
27 | * klp_write_module_reloc() - write a relocation in a module | ||
28 | * @mod: module in which the section to be modified is found | ||
29 | * @type: ELF relocation type (see asm/elf.h) | ||
30 | * @loc: address that the relocation should be written to | ||
31 | * @value: relocation value (sym address + addend) | ||
32 | * | ||
33 | * This function writes a relocation to the specified location for | ||
34 | * a particular module. | ||
35 | */ | ||
36 | int klp_write_module_reloc(struct module *mod, unsigned long type, | ||
37 | unsigned long loc, unsigned long value) | ||
38 | { | ||
39 | size_t size = 4; | ||
40 | unsigned long val; | ||
41 | unsigned long core = (unsigned long)mod->core_layout.base; | ||
42 | unsigned long core_size = mod->core_layout.size; | ||
43 | |||
44 | switch (type) { | ||
45 | case R_X86_64_NONE: | ||
46 | return 0; | ||
47 | case R_X86_64_64: | ||
48 | val = value; | ||
49 | size = 8; | ||
50 | break; | ||
51 | case R_X86_64_32: | ||
52 | val = (u32)value; | ||
53 | break; | ||
54 | case R_X86_64_32S: | ||
55 | val = (s32)value; | ||
56 | break; | ||
57 | case R_X86_64_PC32: | ||
58 | val = (u32)(value - loc); | ||
59 | break; | ||
60 | default: | ||
61 | /* unsupported relocation type */ | ||
62 | return -EINVAL; | ||
63 | } | ||
64 | |||
65 | if (loc < core || loc >= core + core_size) | ||
66 | /* loc does not point to any symbol inside the module */ | ||
67 | return -EINVAL; | ||
68 | |||
69 | return probe_kernel_write((void *)loc, &val, size); | ||
70 | } | ||
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index ba7fbba9831b..5a294e48b185 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c | |||
@@ -538,3 +538,48 @@ overflow: | |||
538 | return -ENOEXEC; | 538 | return -ENOEXEC; |
539 | } | 539 | } |
540 | #endif /* CONFIG_KEXEC_FILE */ | 540 | #endif /* CONFIG_KEXEC_FILE */ |
541 | |||
542 | static int | ||
543 | kexec_mark_range(unsigned long start, unsigned long end, bool protect) | ||
544 | { | ||
545 | struct page *page; | ||
546 | unsigned int nr_pages; | ||
547 | |||
548 | /* | ||
549 | * For physical range: [start, end]. We must skip the unassigned | ||
550 | * crashk resource with zero-valued "end" member. | ||
551 | */ | ||
552 | if (!end || start > end) | ||
553 | return 0; | ||
554 | |||
555 | page = pfn_to_page(start >> PAGE_SHIFT); | ||
556 | nr_pages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1; | ||
557 | if (protect) | ||
558 | return set_pages_ro(page, nr_pages); | ||
559 | else | ||
560 | return set_pages_rw(page, nr_pages); | ||
561 | } | ||
562 | |||
563 | static void kexec_mark_crashkres(bool protect) | ||
564 | { | ||
565 | unsigned long control; | ||
566 | |||
567 | kexec_mark_range(crashk_low_res.start, crashk_low_res.end, protect); | ||
568 | |||
569 | /* Don't touch the control code page used in crash_kexec().*/ | ||
570 | control = PFN_PHYS(page_to_pfn(kexec_crash_image->control_code_page)); | ||
571 | /* Control code page is located in the 2nd page. */ | ||
572 | kexec_mark_range(crashk_res.start, control + PAGE_SIZE - 1, protect); | ||
573 | control += KEXEC_CONTROL_PAGE_SIZE; | ||
574 | kexec_mark_range(control, crashk_res.end, protect); | ||
575 | } | ||
576 | |||
577 | void arch_kexec_protect_crashkres(void) | ||
578 | { | ||
579 | kexec_mark_crashkres(true); | ||
580 | } | ||
581 | |||
582 | void arch_kexec_unprotect_crashkres(void) | ||
583 | { | ||
584 | kexec_mark_crashkres(false); | ||
585 | } | ||
diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index ed48a9f465f8..61924222a9e1 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S | |||
@@ -182,7 +182,8 @@ GLOBAL(ftrace_graph_call) | |||
182 | jmp ftrace_stub | 182 | jmp ftrace_stub |
183 | #endif | 183 | #endif |
184 | 184 | ||
185 | GLOBAL(ftrace_stub) | 185 | /* This is weak to keep gas from relaxing the jumps */ |
186 | WEAK(ftrace_stub) | ||
186 | retq | 187 | retq |
187 | END(ftrace_caller) | 188 | END(ftrace_caller) |
188 | 189 | ||
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 005c03e93fc5..477ae806c2fa 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/jump_label.h> | 31 | #include <linux/jump_label.h> |
32 | #include <linux/random.h> | 32 | #include <linux/random.h> |
33 | 33 | ||
34 | #include <asm/text-patching.h> | ||
34 | #include <asm/page.h> | 35 | #include <asm/page.h> |
35 | #include <asm/pgtable.h> | 36 | #include <asm/pgtable.h> |
36 | #include <asm/setup.h> | 37 | #include <asm/setup.h> |
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index f08ac28b8136..7b3b3f24c3ea 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -294,7 +294,6 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void) | |||
294 | 294 | ||
295 | struct pv_info pv_info = { | 295 | struct pv_info pv_info = { |
296 | .name = "bare hardware", | 296 | .name = "bare hardware", |
297 | .paravirt_enabled = 0, | ||
298 | .kernel_rpl = 0, | 297 | .kernel_rpl = 0, |
299 | .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ | 298 | .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ |
300 | 299 | ||
@@ -339,8 +338,10 @@ __visible struct pv_cpu_ops pv_cpu_ops = { | |||
339 | .write_cr8 = native_write_cr8, | 338 | .write_cr8 = native_write_cr8, |
340 | #endif | 339 | #endif |
341 | .wbinvd = native_wbinvd, | 340 | .wbinvd = native_wbinvd, |
342 | .read_msr = native_read_msr_safe, | 341 | .read_msr = native_read_msr, |
343 | .write_msr = native_write_msr_safe, | 342 | .write_msr = native_write_msr, |
343 | .read_msr_safe = native_read_msr_safe, | ||
344 | .write_msr_safe = native_write_msr_safe, | ||
344 | .read_pmc = native_read_pmc, | 345 | .read_pmc = native_read_pmc, |
345 | .load_tr_desc = native_load_tr_desc, | 346 | .load_tr_desc = native_load_tr_desc, |
346 | .set_ldt = native_set_ldt, | 347 | .set_ldt = native_set_ldt, |
diff --git a/arch/x86/kernel/pci-iommu_table.c b/arch/x86/kernel/pci-iommu_table.c index 35ccf75696eb..f712dfdf1357 100644 --- a/arch/x86/kernel/pci-iommu_table.c +++ b/arch/x86/kernel/pci-iommu_table.c | |||
@@ -72,7 +72,7 @@ void __init check_iommu_entries(struct iommu_table_entry *start, | |||
72 | } | 72 | } |
73 | } | 73 | } |
74 | #else | 74 | #else |
75 | inline void check_iommu_entries(struct iommu_table_entry *start, | 75 | void __init check_iommu_entries(struct iommu_table_entry *start, |
76 | struct iommu_table_entry *finish) | 76 | struct iommu_table_entry *finish) |
77 | { | 77 | { |
78 | } | 78 | } |
diff --git a/arch/x86/kernel/platform-quirks.c b/arch/x86/kernel/platform-quirks.c new file mode 100644 index 000000000000..b2f8a33b36ff --- /dev/null +++ b/arch/x86/kernel/platform-quirks.c | |||
@@ -0,0 +1,35 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | #include <linux/init.h> | ||
3 | |||
4 | #include <asm/setup.h> | ||
5 | #include <asm/bios_ebda.h> | ||
6 | |||
7 | void __init x86_early_init_platform_quirks(void) | ||
8 | { | ||
9 | x86_platform.legacy.rtc = 1; | ||
10 | x86_platform.legacy.ebda_search = 0; | ||
11 | x86_platform.legacy.devices.pnpbios = 1; | ||
12 | |||
13 | switch (boot_params.hdr.hardware_subarch) { | ||
14 | case X86_SUBARCH_PC: | ||
15 | x86_platform.legacy.ebda_search = 1; | ||
16 | break; | ||
17 | case X86_SUBARCH_XEN: | ||
18 | case X86_SUBARCH_LGUEST: | ||
19 | case X86_SUBARCH_INTEL_MID: | ||
20 | case X86_SUBARCH_CE4100: | ||
21 | x86_platform.legacy.devices.pnpbios = 0; | ||
22 | x86_platform.legacy.rtc = 0; | ||
23 | break; | ||
24 | } | ||
25 | |||
26 | if (x86_platform.set_legacy_features) | ||
27 | x86_platform.set_legacy_features(); | ||
28 | } | ||
29 | |||
30 | #if defined(CONFIG_PNPBIOS) | ||
31 | bool __init arch_pnpbios_disabled(void) | ||
32 | { | ||
33 | return x86_platform.legacy.devices.pnpbios == 0; | ||
34 | } | ||
35 | #endif | ||
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 2915d54e9dd5..96becbbb52e0 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -97,10 +97,9 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) | |||
97 | /* | 97 | /* |
98 | * Free current thread data structures etc.. | 98 | * Free current thread data structures etc.. |
99 | */ | 99 | */ |
100 | void exit_thread(void) | 100 | void exit_thread(struct task_struct *tsk) |
101 | { | 101 | { |
102 | struct task_struct *me = current; | 102 | struct thread_struct *t = &tsk->thread; |
103 | struct thread_struct *t = &me->thread; | ||
104 | unsigned long *bp = t->io_bitmap_ptr; | 103 | unsigned long *bp = t->io_bitmap_ptr; |
105 | struct fpu *fpu = &t->fpu; | 104 | struct fpu *fpu = &t->fpu; |
106 | 105 | ||
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 6cbab31ac23a..6e789ca1f841 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -136,25 +136,6 @@ void release_thread(struct task_struct *dead_task) | |||
136 | } | 136 | } |
137 | } | 137 | } |
138 | 138 | ||
139 | static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr) | ||
140 | { | ||
141 | struct user_desc ud = { | ||
142 | .base_addr = addr, | ||
143 | .limit = 0xfffff, | ||
144 | .seg_32bit = 1, | ||
145 | .limit_in_pages = 1, | ||
146 | .useable = 1, | ||
147 | }; | ||
148 | struct desc_struct *desc = t->thread.tls_array; | ||
149 | desc += tls; | ||
150 | fill_ldt(desc, &ud); | ||
151 | } | ||
152 | |||
153 | static inline u32 read_32bit_tls(struct task_struct *t, int tls) | ||
154 | { | ||
155 | return get_desc_base(&t->thread.tls_array[tls]); | ||
156 | } | ||
157 | |||
158 | int copy_thread_tls(unsigned long clone_flags, unsigned long sp, | 139 | int copy_thread_tls(unsigned long clone_flags, unsigned long sp, |
159 | unsigned long arg, struct task_struct *p, unsigned long tls) | 140 | unsigned long arg, struct task_struct *p, unsigned long tls) |
160 | { | 141 | { |
@@ -169,9 +150,9 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, | |||
169 | p->thread.io_bitmap_ptr = NULL; | 150 | p->thread.io_bitmap_ptr = NULL; |
170 | 151 | ||
171 | savesegment(gs, p->thread.gsindex); | 152 | savesegment(gs, p->thread.gsindex); |
172 | p->thread.gs = p->thread.gsindex ? 0 : me->thread.gs; | 153 | p->thread.gsbase = p->thread.gsindex ? 0 : me->thread.gsbase; |
173 | savesegment(fs, p->thread.fsindex); | 154 | savesegment(fs, p->thread.fsindex); |
174 | p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs; | 155 | p->thread.fsbase = p->thread.fsindex ? 0 : me->thread.fsbase; |
175 | savesegment(es, p->thread.es); | 156 | savesegment(es, p->thread.es); |
176 | savesegment(ds, p->thread.ds); | 157 | savesegment(ds, p->thread.ds); |
177 | memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); | 158 | memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); |
@@ -210,7 +191,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, | |||
210 | */ | 191 | */ |
211 | if (clone_flags & CLONE_SETTLS) { | 192 | if (clone_flags & CLONE_SETTLS) { |
212 | #ifdef CONFIG_IA32_EMULATION | 193 | #ifdef CONFIG_IA32_EMULATION |
213 | if (is_ia32_task()) | 194 | if (in_ia32_syscall()) |
214 | err = do_set_thread_area(p, -1, | 195 | err = do_set_thread_area(p, -1, |
215 | (struct user_desc __user *)tls, 0); | 196 | (struct user_desc __user *)tls, 0); |
216 | else | 197 | else |
@@ -282,7 +263,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
282 | struct fpu *next_fpu = &next->fpu; | 263 | struct fpu *next_fpu = &next->fpu; |
283 | int cpu = smp_processor_id(); | 264 | int cpu = smp_processor_id(); |
284 | struct tss_struct *tss = &per_cpu(cpu_tss, cpu); | 265 | struct tss_struct *tss = &per_cpu(cpu_tss, cpu); |
285 | unsigned fsindex, gsindex; | 266 | unsigned prev_fsindex, prev_gsindex; |
286 | fpu_switch_t fpu_switch; | 267 | fpu_switch_t fpu_switch; |
287 | 268 | ||
288 | fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu); | 269 | fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu); |
@@ -292,8 +273,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
292 | * | 273 | * |
293 | * (e.g. xen_load_tls()) | 274 | * (e.g. xen_load_tls()) |
294 | */ | 275 | */ |
295 | savesegment(fs, fsindex); | 276 | savesegment(fs, prev_fsindex); |
296 | savesegment(gs, gsindex); | 277 | savesegment(gs, prev_gsindex); |
297 | 278 | ||
298 | /* | 279 | /* |
299 | * Load TLS before restoring any segments so that segment loads | 280 | * Load TLS before restoring any segments so that segment loads |
@@ -336,66 +317,104 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
336 | * Switch FS and GS. | 317 | * Switch FS and GS. |
337 | * | 318 | * |
338 | * These are even more complicated than DS and ES: they have | 319 | * These are even more complicated than DS and ES: they have |
339 | * 64-bit bases are that controlled by arch_prctl. Those bases | 320 | * 64-bit bases are that controlled by arch_prctl. The bases |
340 | * only differ from the values in the GDT or LDT if the selector | 321 | * don't necessarily match the selectors, as user code can do |
341 | * is 0. | 322 | * any number of things to cause them to be inconsistent. |
342 | * | ||
343 | * Loading the segment register resets the hidden base part of | ||
344 | * the register to 0 or the value from the GDT / LDT. If the | ||
345 | * next base address zero, writing 0 to the segment register is | ||
346 | * much faster than using wrmsr to explicitly zero the base. | ||
347 | * | ||
348 | * The thread_struct.fs and thread_struct.gs values are 0 | ||
349 | * if the fs and gs bases respectively are not overridden | ||
350 | * from the values implied by fsindex and gsindex. They | ||
351 | * are nonzero, and store the nonzero base addresses, if | ||
352 | * the bases are overridden. | ||
353 | * | ||
354 | * (fs != 0 && fsindex != 0) || (gs != 0 && gsindex != 0) should | ||
355 | * be impossible. | ||
356 | * | ||
357 | * Therefore we need to reload the segment registers if either | ||
358 | * the old or new selector is nonzero, and we need to override | ||
359 | * the base address if next thread expects it to be overridden. | ||
360 | * | 323 | * |
361 | * This code is unnecessarily slow in the case where the old and | 324 | * We don't promise to preserve the bases if the selectors are |
362 | * new indexes are zero and the new base is nonzero -- it will | 325 | * nonzero. We also don't promise to preserve the base if the |
363 | * unnecessarily write 0 to the selector before writing the new | 326 | * selector is zero and the base doesn't match whatever was |
364 | * base address. | 327 | * most recently passed to ARCH_SET_FS/GS. (If/when the |
328 | * FSGSBASE instructions are enabled, we'll need to offer | ||
329 | * stronger guarantees.) | ||
365 | * | 330 | * |
366 | * Note: This all depends on arch_prctl being the only way that | 331 | * As an invariant, |
367 | * user code can override the segment base. Once wrfsbase and | 332 | * (fsbase != 0 && fsindex != 0) || (gsbase != 0 && gsindex != 0) is |
368 | * wrgsbase are enabled, most of this code will need to change. | 333 | * impossible. |
369 | */ | 334 | */ |
370 | if (unlikely(fsindex | next->fsindex | prev->fs)) { | 335 | if (next->fsindex) { |
336 | /* Loading a nonzero value into FS sets the index and base. */ | ||
371 | loadsegment(fs, next->fsindex); | 337 | loadsegment(fs, next->fsindex); |
372 | 338 | } else { | |
373 | /* | 339 | if (next->fsbase) { |
374 | * If user code wrote a nonzero value to FS, then it also | 340 | /* Next index is zero but next base is nonzero. */ |
375 | * cleared the overridden base address. | 341 | if (prev_fsindex) |
376 | * | 342 | loadsegment(fs, 0); |
377 | * XXX: if user code wrote 0 to FS and cleared the base | 343 | wrmsrl(MSR_FS_BASE, next->fsbase); |
378 | * address itself, we won't notice and we'll incorrectly | 344 | } else { |
379 | * restore the prior base address next time we reschdule | 345 | /* Next base and index are both zero. */ |
380 | * the process. | 346 | if (static_cpu_has_bug(X86_BUG_NULL_SEG)) { |
381 | */ | 347 | /* |
382 | if (fsindex) | 348 | * We don't know the previous base and can't |
383 | prev->fs = 0; | 349 | * find out without RDMSR. Forcibly clear it. |
350 | */ | ||
351 | loadsegment(fs, __USER_DS); | ||
352 | loadsegment(fs, 0); | ||
353 | } else { | ||
354 | /* | ||
355 | * If the previous index is zero and ARCH_SET_FS | ||
356 | * didn't change the base, then the base is | ||
357 | * also zero and we don't need to do anything. | ||
358 | */ | ||
359 | if (prev->fsbase || prev_fsindex) | ||
360 | loadsegment(fs, 0); | ||
361 | } | ||
362 | } | ||
384 | } | 363 | } |
385 | if (next->fs) | 364 | /* |
386 | wrmsrl(MSR_FS_BASE, next->fs); | 365 | * Save the old state and preserve the invariant. |
387 | prev->fsindex = fsindex; | 366 | * NB: if prev_fsindex == 0, then we can't reliably learn the base |
367 | * without RDMSR because Intel user code can zero it without telling | ||
368 | * us and AMD user code can program any 32-bit value without telling | ||
369 | * us. | ||
370 | */ | ||
371 | if (prev_fsindex) | ||
372 | prev->fsbase = 0; | ||
373 | prev->fsindex = prev_fsindex; | ||
388 | 374 | ||
389 | if (unlikely(gsindex | next->gsindex | prev->gs)) { | 375 | if (next->gsindex) { |
376 | /* Loading a nonzero value into GS sets the index and base. */ | ||
390 | load_gs_index(next->gsindex); | 377 | load_gs_index(next->gsindex); |
391 | 378 | } else { | |
392 | /* This works (and fails) the same way as fsindex above. */ | 379 | if (next->gsbase) { |
393 | if (gsindex) | 380 | /* Next index is zero but next base is nonzero. */ |
394 | prev->gs = 0; | 381 | if (prev_gsindex) |
382 | load_gs_index(0); | ||
383 | wrmsrl(MSR_KERNEL_GS_BASE, next->gsbase); | ||
384 | } else { | ||
385 | /* Next base and index are both zero. */ | ||
386 | if (static_cpu_has_bug(X86_BUG_NULL_SEG)) { | ||
387 | /* | ||
388 | * We don't know the previous base and can't | ||
389 | * find out without RDMSR. Forcibly clear it. | ||
390 | * | ||
391 | * This contains a pointless SWAPGS pair. | ||
392 | * Fixing it would involve an explicit check | ||
393 | * for Xen or a new pvop. | ||
394 | */ | ||
395 | load_gs_index(__USER_DS); | ||
396 | load_gs_index(0); | ||
397 | } else { | ||
398 | /* | ||
399 | * If the previous index is zero and ARCH_SET_GS | ||
400 | * didn't change the base, then the base is | ||
401 | * also zero and we don't need to do anything. | ||
402 | */ | ||
403 | if (prev->gsbase || prev_gsindex) | ||
404 | load_gs_index(0); | ||
405 | } | ||
406 | } | ||
395 | } | 407 | } |
396 | if (next->gs) | 408 | /* |
397 | wrmsrl(MSR_KERNEL_GS_BASE, next->gs); | 409 | * Save the old state and preserve the invariant. |
398 | prev->gsindex = gsindex; | 410 | * NB: if prev_gsindex == 0, then we can't reliably learn the base |
411 | * without RDMSR because Intel user code can zero it without telling | ||
412 | * us and AMD user code can program any 32-bit value without telling | ||
413 | * us. | ||
414 | */ | ||
415 | if (prev_gsindex) | ||
416 | prev->gsbase = 0; | ||
417 | prev->gsindex = prev_gsindex; | ||
399 | 418 | ||
400 | switch_fpu_finish(next_fpu, fpu_switch); | 419 | switch_fpu_finish(next_fpu, fpu_switch); |
401 | 420 | ||
@@ -513,81 +532,47 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) | |||
513 | 532 | ||
514 | switch (code) { | 533 | switch (code) { |
515 | case ARCH_SET_GS: | 534 | case ARCH_SET_GS: |
516 | if (addr >= TASK_SIZE_OF(task)) | 535 | if (addr >= TASK_SIZE_MAX) |
517 | return -EPERM; | 536 | return -EPERM; |
518 | cpu = get_cpu(); | 537 | cpu = get_cpu(); |
519 | /* handle small bases via the GDT because that's faster to | 538 | task->thread.gsindex = 0; |
520 | switch. */ | 539 | task->thread.gsbase = addr; |
521 | if (addr <= 0xffffffff) { | 540 | if (doit) { |
522 | set_32bit_tls(task, GS_TLS, addr); | 541 | load_gs_index(0); |
523 | if (doit) { | 542 | ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr); |
524 | load_TLS(&task->thread, cpu); | ||
525 | load_gs_index(GS_TLS_SEL); | ||
526 | } | ||
527 | task->thread.gsindex = GS_TLS_SEL; | ||
528 | task->thread.gs = 0; | ||
529 | } else { | ||
530 | task->thread.gsindex = 0; | ||
531 | task->thread.gs = addr; | ||
532 | if (doit) { | ||
533 | load_gs_index(0); | ||
534 | ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr); | ||
535 | } | ||
536 | } | 543 | } |
537 | put_cpu(); | 544 | put_cpu(); |
538 | break; | 545 | break; |
539 | case ARCH_SET_FS: | 546 | case ARCH_SET_FS: |
540 | /* Not strictly needed for fs, but do it for symmetry | 547 | /* Not strictly needed for fs, but do it for symmetry |
541 | with gs */ | 548 | with gs */ |
542 | if (addr >= TASK_SIZE_OF(task)) | 549 | if (addr >= TASK_SIZE_MAX) |
543 | return -EPERM; | 550 | return -EPERM; |
544 | cpu = get_cpu(); | 551 | cpu = get_cpu(); |
545 | /* handle small bases via the GDT because that's faster to | 552 | task->thread.fsindex = 0; |
546 | switch. */ | 553 | task->thread.fsbase = addr; |
547 | if (addr <= 0xffffffff) { | 554 | if (doit) { |
548 | set_32bit_tls(task, FS_TLS, addr); | 555 | /* set the selector to 0 to not confuse __switch_to */ |
549 | if (doit) { | 556 | loadsegment(fs, 0); |
550 | load_TLS(&task->thread, cpu); | 557 | ret = wrmsrl_safe(MSR_FS_BASE, addr); |
551 | loadsegment(fs, FS_TLS_SEL); | ||
552 | } | ||
553 | task->thread.fsindex = FS_TLS_SEL; | ||
554 | task->thread.fs = 0; | ||
555 | } else { | ||
556 | task->thread.fsindex = 0; | ||
557 | task->thread.fs = addr; | ||
558 | if (doit) { | ||
559 | /* set the selector to 0 to not confuse | ||
560 | __switch_to */ | ||
561 | loadsegment(fs, 0); | ||
562 | ret = wrmsrl_safe(MSR_FS_BASE, addr); | ||
563 | } | ||
564 | } | 558 | } |
565 | put_cpu(); | 559 | put_cpu(); |
566 | break; | 560 | break; |
567 | case ARCH_GET_FS: { | 561 | case ARCH_GET_FS: { |
568 | unsigned long base; | 562 | unsigned long base; |
569 | if (task->thread.fsindex == FS_TLS_SEL) | 563 | if (doit) |
570 | base = read_32bit_tls(task, FS_TLS); | ||
571 | else if (doit) | ||
572 | rdmsrl(MSR_FS_BASE, base); | 564 | rdmsrl(MSR_FS_BASE, base); |
573 | else | 565 | else |
574 | base = task->thread.fs; | 566 | base = task->thread.fsbase; |
575 | ret = put_user(base, (unsigned long __user *)addr); | 567 | ret = put_user(base, (unsigned long __user *)addr); |
576 | break; | 568 | break; |
577 | } | 569 | } |
578 | case ARCH_GET_GS: { | 570 | case ARCH_GET_GS: { |
579 | unsigned long base; | 571 | unsigned long base; |
580 | unsigned gsindex; | 572 | if (doit) |
581 | if (task->thread.gsindex == GS_TLS_SEL) | 573 | rdmsrl(MSR_KERNEL_GS_BASE, base); |
582 | base = read_32bit_tls(task, GS_TLS); | 574 | else |
583 | else if (doit) { | 575 | base = task->thread.gsbase; |
584 | savesegment(gs, gsindex); | ||
585 | if (gsindex) | ||
586 | rdmsrl(MSR_KERNEL_GS_BASE, base); | ||
587 | else | ||
588 | base = task->thread.gs; | ||
589 | } else | ||
590 | base = task->thread.gs; | ||
591 | ret = put_user(base, (unsigned long __user *)addr); | 576 | ret = put_user(base, (unsigned long __user *)addr); |
592 | break; | 577 | break; |
593 | } | 578 | } |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 32e9d9cbb884..600edd225e81 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -303,29 +303,11 @@ static int set_segment_reg(struct task_struct *task, | |||
303 | 303 | ||
304 | switch (offset) { | 304 | switch (offset) { |
305 | case offsetof(struct user_regs_struct,fs): | 305 | case offsetof(struct user_regs_struct,fs): |
306 | /* | ||
307 | * If this is setting fs as for normal 64-bit use but | ||
308 | * setting fs_base has implicitly changed it, leave it. | ||
309 | */ | ||
310 | if ((value == FS_TLS_SEL && task->thread.fsindex == 0 && | ||
311 | task->thread.fs != 0) || | ||
312 | (value == 0 && task->thread.fsindex == FS_TLS_SEL && | ||
313 | task->thread.fs == 0)) | ||
314 | break; | ||
315 | task->thread.fsindex = value; | 306 | task->thread.fsindex = value; |
316 | if (task == current) | 307 | if (task == current) |
317 | loadsegment(fs, task->thread.fsindex); | 308 | loadsegment(fs, task->thread.fsindex); |
318 | break; | 309 | break; |
319 | case offsetof(struct user_regs_struct,gs): | 310 | case offsetof(struct user_regs_struct,gs): |
320 | /* | ||
321 | * If this is setting gs as for normal 64-bit use but | ||
322 | * setting gs_base has implicitly changed it, leave it. | ||
323 | */ | ||
324 | if ((value == GS_TLS_SEL && task->thread.gsindex == 0 && | ||
325 | task->thread.gs != 0) || | ||
326 | (value == 0 && task->thread.gsindex == GS_TLS_SEL && | ||
327 | task->thread.gs == 0)) | ||
328 | break; | ||
329 | task->thread.gsindex = value; | 311 | task->thread.gsindex = value; |
330 | if (task == current) | 312 | if (task == current) |
331 | load_gs_index(task->thread.gsindex); | 313 | load_gs_index(task->thread.gsindex); |
@@ -410,23 +392,23 @@ static int putreg(struct task_struct *child, | |||
410 | 392 | ||
411 | #ifdef CONFIG_X86_64 | 393 | #ifdef CONFIG_X86_64 |
412 | case offsetof(struct user_regs_struct,fs_base): | 394 | case offsetof(struct user_regs_struct,fs_base): |
413 | if (value >= TASK_SIZE_OF(child)) | 395 | if (value >= TASK_SIZE_MAX) |
414 | return -EIO; | 396 | return -EIO; |
415 | /* | 397 | /* |
416 | * When changing the segment base, use do_arch_prctl | 398 | * When changing the segment base, use do_arch_prctl |
417 | * to set either thread.fs or thread.fsindex and the | 399 | * to set either thread.fs or thread.fsindex and the |
418 | * corresponding GDT slot. | 400 | * corresponding GDT slot. |
419 | */ | 401 | */ |
420 | if (child->thread.fs != value) | 402 | if (child->thread.fsbase != value) |
421 | return do_arch_prctl(child, ARCH_SET_FS, value); | 403 | return do_arch_prctl(child, ARCH_SET_FS, value); |
422 | return 0; | 404 | return 0; |
423 | case offsetof(struct user_regs_struct,gs_base): | 405 | case offsetof(struct user_regs_struct,gs_base): |
424 | /* | 406 | /* |
425 | * Exactly the same here as the %fs handling above. | 407 | * Exactly the same here as the %fs handling above. |
426 | */ | 408 | */ |
427 | if (value >= TASK_SIZE_OF(child)) | 409 | if (value >= TASK_SIZE_MAX) |
428 | return -EIO; | 410 | return -EIO; |
429 | if (child->thread.gs != value) | 411 | if (child->thread.gsbase != value) |
430 | return do_arch_prctl(child, ARCH_SET_GS, value); | 412 | return do_arch_prctl(child, ARCH_SET_GS, value); |
431 | return 0; | 413 | return 0; |
432 | #endif | 414 | #endif |
@@ -453,31 +435,17 @@ static unsigned long getreg(struct task_struct *task, unsigned long offset) | |||
453 | #ifdef CONFIG_X86_64 | 435 | #ifdef CONFIG_X86_64 |
454 | case offsetof(struct user_regs_struct, fs_base): { | 436 | case offsetof(struct user_regs_struct, fs_base): { |
455 | /* | 437 | /* |
456 | * do_arch_prctl may have used a GDT slot instead of | 438 | * XXX: This will not behave as expected if called on |
457 | * the MSR. To userland, it appears the same either | 439 | * current or if fsindex != 0. |
458 | * way, except the %fs segment selector might not be 0. | ||
459 | */ | 440 | */ |
460 | unsigned int seg = task->thread.fsindex; | 441 | return task->thread.fsbase; |
461 | if (task->thread.fs != 0) | ||
462 | return task->thread.fs; | ||
463 | if (task == current) | ||
464 | asm("movl %%fs,%0" : "=r" (seg)); | ||
465 | if (seg != FS_TLS_SEL) | ||
466 | return 0; | ||
467 | return get_desc_base(&task->thread.tls_array[FS_TLS]); | ||
468 | } | 442 | } |
469 | case offsetof(struct user_regs_struct, gs_base): { | 443 | case offsetof(struct user_regs_struct, gs_base): { |
470 | /* | 444 | /* |
471 | * Exactly the same here as the %fs handling above. | 445 | * XXX: This will not behave as expected if called on |
446 | * current or if fsindex != 0. | ||
472 | */ | 447 | */ |
473 | unsigned int seg = task->thread.gsindex; | 448 | return task->thread.gsbase; |
474 | if (task->thread.gs != 0) | ||
475 | return task->thread.gs; | ||
476 | if (task == current) | ||
477 | asm("movl %%gs,%0" : "=r" (seg)); | ||
478 | if (seg != GS_TLS_SEL) | ||
479 | return 0; | ||
480 | return get_desc_base(&task->thread.tls_array[GS_TLS]); | ||
481 | } | 449 | } |
482 | #endif | 450 | #endif |
483 | } | 451 | } |
@@ -1266,7 +1234,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, | |||
1266 | compat_ulong_t caddr, compat_ulong_t cdata) | 1234 | compat_ulong_t caddr, compat_ulong_t cdata) |
1267 | { | 1235 | { |
1268 | #ifdef CONFIG_X86_X32_ABI | 1236 | #ifdef CONFIG_X86_X32_ABI |
1269 | if (!is_ia32_task()) | 1237 | if (!in_ia32_syscall()) |
1270 | return x32_arch_ptrace(child, request, caddr, cdata); | 1238 | return x32_arch_ptrace(child, request, caddr, cdata); |
1271 | #endif | 1239 | #endif |
1272 | #ifdef CONFIG_IA32_EMULATION | 1240 | #ifdef CONFIG_IA32_EMULATION |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index ab0adc0fa5db..a9b31eb815f2 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -535,6 +535,15 @@ static void native_machine_emergency_restart(void) | |||
535 | mode = reboot_mode == REBOOT_WARM ? 0x1234 : 0; | 535 | mode = reboot_mode == REBOOT_WARM ? 0x1234 : 0; |
536 | *((unsigned short *)__va(0x472)) = mode; | 536 | *((unsigned short *)__va(0x472)) = mode; |
537 | 537 | ||
538 | /* | ||
539 | * If an EFI capsule has been registered with the firmware then | ||
540 | * override the reboot= parameter. | ||
541 | */ | ||
542 | if (efi_capsule_pending(NULL)) { | ||
543 | pr_info("EFI capsule is pending, forcing EFI reboot.\n"); | ||
544 | reboot_type = BOOT_EFI; | ||
545 | } | ||
546 | |||
538 | for (;;) { | 547 | for (;;) { |
539 | /* Could also try the reset bit in the Hammer NB */ | 548 | /* Could also try the reset bit in the Hammer NB */ |
540 | switch (reboot_type) { | 549 | switch (reboot_type) { |
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 4af8d063fb36..eceaa082ec3f 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <asm/time.h> | 14 | #include <asm/time.h> |
15 | #include <asm/intel-mid.h> | 15 | #include <asm/intel-mid.h> |
16 | #include <asm/rtc.h> | 16 | #include <asm/rtc.h> |
17 | #include <asm/setup.h> | ||
17 | 18 | ||
18 | #ifdef CONFIG_X86_32 | 19 | #ifdef CONFIG_X86_32 |
19 | /* | 20 | /* |
@@ -185,22 +186,7 @@ static __init int add_rtc_cmos(void) | |||
185 | } | 186 | } |
186 | } | 187 | } |
187 | #endif | 188 | #endif |
188 | if (of_have_populated_dt()) | 189 | if (!x86_platform.legacy.rtc) |
189 | return 0; | ||
190 | |||
191 | /* Intel MID platforms don't have ioport rtc */ | ||
192 | if (intel_mid_identify_cpu()) | ||
193 | return -ENODEV; | ||
194 | |||
195 | #ifdef CONFIG_ACPI | ||
196 | if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_CMOS_RTC) { | ||
197 | /* This warning can likely go away again in a year or two. */ | ||
198 | pr_info("ACPI: not registering RTC platform device\n"); | ||
199 | return -ENODEV; | ||
200 | } | ||
201 | #endif | ||
202 | |||
203 | if (paravirt_enabled() && !paravirt_has(RTC)) | ||
204 | return -ENODEV; | 190 | return -ENODEV; |
205 | 191 | ||
206 | platform_device_register(&rtc_device); | 192 | platform_device_register(&rtc_device); |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 2367ae07eb76..c4e7b3991b60 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -398,6 +398,11 @@ static void __init reserve_initrd(void) | |||
398 | 398 | ||
399 | memblock_free(ramdisk_image, ramdisk_end - ramdisk_image); | 399 | memblock_free(ramdisk_image, ramdisk_end - ramdisk_image); |
400 | } | 400 | } |
401 | |||
402 | static void __init early_initrd_acpi_init(void) | ||
403 | { | ||
404 | early_acpi_table_init((void *)initrd_start, initrd_end - initrd_start); | ||
405 | } | ||
401 | #else | 406 | #else |
402 | static void __init early_reserve_initrd(void) | 407 | static void __init early_reserve_initrd(void) |
403 | { | 408 | { |
@@ -405,6 +410,9 @@ static void __init early_reserve_initrd(void) | |||
405 | static void __init reserve_initrd(void) | 410 | static void __init reserve_initrd(void) |
406 | { | 411 | { |
407 | } | 412 | } |
413 | static void __init early_initrd_acpi_init(void) | ||
414 | { | ||
415 | } | ||
408 | #endif /* CONFIG_BLK_DEV_INITRD */ | 416 | #endif /* CONFIG_BLK_DEV_INITRD */ |
409 | 417 | ||
410 | static void __init parse_setup_data(void) | 418 | static void __init parse_setup_data(void) |
@@ -1138,9 +1146,7 @@ void __init setup_arch(char **cmdline_p) | |||
1138 | 1146 | ||
1139 | reserve_initrd(); | 1147 | reserve_initrd(); |
1140 | 1148 | ||
1141 | #if defined(CONFIG_ACPI) && defined(CONFIG_BLK_DEV_INITRD) | 1149 | early_initrd_acpi_init(); |
1142 | acpi_initrd_override((void *)initrd_start, initrd_end - initrd_start); | ||
1143 | #endif | ||
1144 | 1150 | ||
1145 | vsmp_init(); | 1151 | vsmp_init(); |
1146 | 1152 | ||
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 548ddf7d6fd2..22cc2f9f8aec 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -248,18 +248,17 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, | |||
248 | if (config_enabled(CONFIG_X86_64)) | 248 | if (config_enabled(CONFIG_X86_64)) |
249 | sp -= 128; | 249 | sp -= 128; |
250 | 250 | ||
251 | if (!onsigstack) { | 251 | /* This is the X/Open sanctioned signal stack switching. */ |
252 | /* This is the X/Open sanctioned signal stack switching. */ | 252 | if (ka->sa.sa_flags & SA_ONSTACK) { |
253 | if (ka->sa.sa_flags & SA_ONSTACK) { | 253 | if (sas_ss_flags(sp) == 0) |
254 | if (current->sas_ss_size) | 254 | sp = current->sas_ss_sp + current->sas_ss_size; |
255 | sp = current->sas_ss_sp + current->sas_ss_size; | 255 | } else if (config_enabled(CONFIG_X86_32) && |
256 | } else if (config_enabled(CONFIG_X86_32) && | 256 | !onsigstack && |
257 | (regs->ss & 0xffff) != __USER_DS && | 257 | (regs->ss & 0xffff) != __USER_DS && |
258 | !(ka->sa.sa_flags & SA_RESTORER) && | 258 | !(ka->sa.sa_flags & SA_RESTORER) && |
259 | ka->sa.sa_restorer) { | 259 | ka->sa.sa_restorer) { |
260 | /* This is the legacy signal stack switching. */ | 260 | /* This is the legacy signal stack switching. */ |
261 | sp = (unsigned long) ka->sa.sa_restorer; | 261 | sp = (unsigned long) ka->sa.sa_restorer; |
262 | } | ||
263 | } | 262 | } |
264 | 263 | ||
265 | if (fpu->fpstate_active) { | 264 | if (fpu->fpstate_active) { |
@@ -391,7 +390,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig, | |||
391 | put_user_ex(&frame->uc, &frame->puc); | 390 | put_user_ex(&frame->uc, &frame->puc); |
392 | 391 | ||
393 | /* Create the ucontext. */ | 392 | /* Create the ucontext. */ |
394 | if (cpu_has_xsave) | 393 | if (boot_cpu_has(X86_FEATURE_XSAVE)) |
395 | put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); | 394 | put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); |
396 | else | 395 | else |
397 | put_user_ex(0, &frame->uc.uc_flags); | 396 | put_user_ex(0, &frame->uc.uc_flags); |
@@ -442,7 +441,7 @@ static unsigned long frame_uc_flags(struct pt_regs *regs) | |||
442 | { | 441 | { |
443 | unsigned long flags; | 442 | unsigned long flags; |
444 | 443 | ||
445 | if (cpu_has_xsave) | 444 | if (boot_cpu_has(X86_FEATURE_XSAVE)) |
446 | flags = UC_FP_XSTATE | UC_SIGCONTEXT_SS; | 445 | flags = UC_FP_XSTATE | UC_SIGCONTEXT_SS; |
447 | else | 446 | else |
448 | flags = UC_SIGCONTEXT_SS; | 447 | flags = UC_SIGCONTEXT_SS; |
@@ -762,7 +761,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) | |||
762 | static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) | 761 | static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) |
763 | { | 762 | { |
764 | #ifdef CONFIG_X86_64 | 763 | #ifdef CONFIG_X86_64 |
765 | if (is_ia32_task()) | 764 | if (in_ia32_syscall()) |
766 | return __NR_ia32_restart_syscall; | 765 | return __NR_ia32_restart_syscall; |
767 | #endif | 766 | #endif |
768 | #ifdef CONFIG_X86_X32_ABI | 767 | #ifdef CONFIG_X86_X32_ABI |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index a2065d3b3b39..fafe8b923cac 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -332,6 +332,11 @@ static void __init smp_init_package_map(void) | |||
332 | * primary cores. | 332 | * primary cores. |
333 | */ | 333 | */ |
334 | ncpus = boot_cpu_data.x86_max_cores; | 334 | ncpus = boot_cpu_data.x86_max_cores; |
335 | if (!ncpus) { | ||
336 | pr_warn("x86_max_cores == zero !?!?"); | ||
337 | ncpus = 1; | ||
338 | } | ||
339 | |||
335 | __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus); | 340 | __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus); |
336 | 341 | ||
337 | /* | 342 | /* |
@@ -1231,7 +1236,7 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
1231 | * If we couldn't find a local APIC, then get out of here now! | 1236 | * If we couldn't find a local APIC, then get out of here now! |
1232 | */ | 1237 | */ |
1233 | if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && | 1238 | if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && |
1234 | !cpu_has_apic) { | 1239 | !boot_cpu_has(X86_FEATURE_APIC)) { |
1235 | if (!disable_apic) { | 1240 | if (!disable_apic) { |
1236 | pr_err("BIOS bug, local APIC #%d not detected!...\n", | 1241 | pr_err("BIOS bug, local APIC #%d not detected!...\n", |
1237 | boot_cpu_physical_apicid); | 1242 | boot_cpu_physical_apicid); |
diff --git a/arch/x86/kernel/sysfb_efi.c b/arch/x86/kernel/sysfb_efi.c index b285d4e8c68e..623965e86b65 100644 --- a/arch/x86/kernel/sysfb_efi.c +++ b/arch/x86/kernel/sysfb_efi.c | |||
@@ -68,6 +68,21 @@ struct efifb_dmi_info efifb_dmi_list[] = { | |||
68 | [M_UNKNOWN] = { NULL, 0, 0, 0, 0, OVERRIDE_NONE } | 68 | [M_UNKNOWN] = { NULL, 0, 0, 0, 0, OVERRIDE_NONE } |
69 | }; | 69 | }; |
70 | 70 | ||
71 | void efifb_setup_from_dmi(struct screen_info *si, const char *opt) | ||
72 | { | ||
73 | int i; | ||
74 | |||
75 | for (i = 0; i < M_UNKNOWN; i++) { | ||
76 | if (efifb_dmi_list[i].base != 0 && | ||
77 | !strcmp(opt, efifb_dmi_list[i].optname)) { | ||
78 | si->lfb_base = efifb_dmi_list[i].base; | ||
79 | si->lfb_linelength = efifb_dmi_list[i].stride; | ||
80 | si->lfb_width = efifb_dmi_list[i].width; | ||
81 | si->lfb_height = efifb_dmi_list[i].height; | ||
82 | } | ||
83 | } | ||
84 | } | ||
85 | |||
71 | #define choose_value(dmivalue, fwvalue, field, flags) ({ \ | 86 | #define choose_value(dmivalue, fwvalue, field, flags) ({ \ |
72 | typeof(fwvalue) _ret_ = fwvalue; \ | 87 | typeof(fwvalue) _ret_ = fwvalue; \ |
73 | if ((flags) & (field)) \ | 88 | if ((flags) & (field)) \ |
@@ -106,14 +121,24 @@ static int __init efifb_set_system(const struct dmi_system_id *id) | |||
106 | continue; | 121 | continue; |
107 | for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { | 122 | for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { |
108 | resource_size_t start, end; | 123 | resource_size_t start, end; |
124 | unsigned long flags; | ||
125 | |||
126 | flags = pci_resource_flags(dev, i); | ||
127 | if (!(flags & IORESOURCE_MEM)) | ||
128 | continue; | ||
129 | |||
130 | if (flags & IORESOURCE_UNSET) | ||
131 | continue; | ||
132 | |||
133 | if (pci_resource_len(dev, i) == 0) | ||
134 | continue; | ||
109 | 135 | ||
110 | start = pci_resource_start(dev, i); | 136 | start = pci_resource_start(dev, i); |
111 | if (start == 0) | ||
112 | break; | ||
113 | end = pci_resource_end(dev, i); | 137 | end = pci_resource_end(dev, i); |
114 | if (screen_info.lfb_base >= start && | 138 | if (screen_info.lfb_base >= start && |
115 | screen_info.lfb_base < end) { | 139 | screen_info.lfb_base < end) { |
116 | found_bar = 1; | 140 | found_bar = 1; |
141 | break; | ||
117 | } | 142 | } |
118 | } | 143 | } |
119 | } | 144 | } |
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index e72a07f20b05..9b0185fbe3eb 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c | |||
@@ -74,12 +74,6 @@ void __init tboot_probe(void) | |||
74 | return; | 74 | return; |
75 | } | 75 | } |
76 | 76 | ||
77 | /* only a natively booted kernel should be using TXT */ | ||
78 | if (paravirt_enabled()) { | ||
79 | pr_warning("non-0 tboot_addr but pv_ops is enabled\n"); | ||
80 | return; | ||
81 | } | ||
82 | |||
83 | /* Map and check for tboot UUID. */ | 77 | /* Map and check for tboot UUID. */ |
84 | set_fixmap(FIX_TBOOT_BASE, boot_params.tboot_addr); | 78 | set_fixmap(FIX_TBOOT_BASE, boot_params.tboot_addr); |
85 | tboot = (struct tboot *)fix_to_virt(FIX_TBOOT_BASE); | 79 | tboot = (struct tboot *)fix_to_virt(FIX_TBOOT_BASE); |
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index 7fc5e843f247..9692a5e9fdab 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c | |||
@@ -114,6 +114,7 @@ int do_set_thread_area(struct task_struct *p, int idx, | |||
114 | int can_allocate) | 114 | int can_allocate) |
115 | { | 115 | { |
116 | struct user_desc info; | 116 | struct user_desc info; |
117 | unsigned short __maybe_unused sel, modified_sel; | ||
117 | 118 | ||
118 | if (copy_from_user(&info, u_info, sizeof(info))) | 119 | if (copy_from_user(&info, u_info, sizeof(info))) |
119 | return -EFAULT; | 120 | return -EFAULT; |
@@ -141,6 +142,47 @@ int do_set_thread_area(struct task_struct *p, int idx, | |||
141 | 142 | ||
142 | set_tls_desc(p, idx, &info, 1); | 143 | set_tls_desc(p, idx, &info, 1); |
143 | 144 | ||
145 | /* | ||
146 | * If DS, ES, FS, or GS points to the modified segment, forcibly | ||
147 | * refresh it. Only needed on x86_64 because x86_32 reloads them | ||
148 | * on return to user mode. | ||
149 | */ | ||
150 | modified_sel = (idx << 3) | 3; | ||
151 | |||
152 | if (p == current) { | ||
153 | #ifdef CONFIG_X86_64 | ||
154 | savesegment(ds, sel); | ||
155 | if (sel == modified_sel) | ||
156 | loadsegment(ds, sel); | ||
157 | |||
158 | savesegment(es, sel); | ||
159 | if (sel == modified_sel) | ||
160 | loadsegment(es, sel); | ||
161 | |||
162 | savesegment(fs, sel); | ||
163 | if (sel == modified_sel) | ||
164 | loadsegment(fs, sel); | ||
165 | |||
166 | savesegment(gs, sel); | ||
167 | if (sel == modified_sel) | ||
168 | load_gs_index(sel); | ||
169 | #endif | ||
170 | |||
171 | #ifdef CONFIG_X86_32_LAZY_GS | ||
172 | savesegment(gs, sel); | ||
173 | if (sel == modified_sel) | ||
174 | loadsegment(gs, sel); | ||
175 | #endif | ||
176 | } else { | ||
177 | #ifdef CONFIG_X86_64 | ||
178 | if (p->thread.fsindex == modified_sel) | ||
179 | p->thread.fsbase = info.base_addr; | ||
180 | |||
181 | if (p->thread.gsindex == modified_sel) | ||
182 | p->thread.gsbase = info.base_addr; | ||
183 | #endif | ||
184 | } | ||
185 | |||
144 | return 0; | 186 | return 0; |
145 | } | 187 | } |
146 | 188 | ||
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 06cbe25861f1..d1590486204a 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -51,6 +51,7 @@ | |||
51 | #include <asm/processor.h> | 51 | #include <asm/processor.h> |
52 | #include <asm/debugreg.h> | 52 | #include <asm/debugreg.h> |
53 | #include <linux/atomic.h> | 53 | #include <linux/atomic.h> |
54 | #include <asm/text-patching.h> | ||
54 | #include <asm/ftrace.h> | 55 | #include <asm/ftrace.h> |
55 | #include <asm/traps.h> | 56 | #include <asm/traps.h> |
56 | #include <asm/desc.h> | 57 | #include <asm/desc.h> |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index c9c4c7ce3eb2..38ba6de56ede 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -36,7 +36,7 @@ static int __read_mostly tsc_unstable; | |||
36 | 36 | ||
37 | /* native_sched_clock() is called before tsc_init(), so | 37 | /* native_sched_clock() is called before tsc_init(), so |
38 | we must start with the TSC soft disabled to prevent | 38 | we must start with the TSC soft disabled to prevent |
39 | erroneous rdtsc usage on !cpu_has_tsc processors */ | 39 | erroneous rdtsc usage on !boot_cpu_has(X86_FEATURE_TSC) processors */ |
40 | static int __read_mostly tsc_disabled = -1; | 40 | static int __read_mostly tsc_disabled = -1; |
41 | 41 | ||
42 | static DEFINE_STATIC_KEY_FALSE(__use_tsc); | 42 | static DEFINE_STATIC_KEY_FALSE(__use_tsc); |
@@ -834,15 +834,15 @@ int recalibrate_cpu_khz(void) | |||
834 | #ifndef CONFIG_SMP | 834 | #ifndef CONFIG_SMP |
835 | unsigned long cpu_khz_old = cpu_khz; | 835 | unsigned long cpu_khz_old = cpu_khz; |
836 | 836 | ||
837 | if (cpu_has_tsc) { | 837 | if (!boot_cpu_has(X86_FEATURE_TSC)) |
838 | tsc_khz = x86_platform.calibrate_tsc(); | ||
839 | cpu_khz = tsc_khz; | ||
840 | cpu_data(0).loops_per_jiffy = | ||
841 | cpufreq_scale(cpu_data(0).loops_per_jiffy, | ||
842 | cpu_khz_old, cpu_khz); | ||
843 | return 0; | ||
844 | } else | ||
845 | return -ENODEV; | 838 | return -ENODEV; |
839 | |||
840 | tsc_khz = x86_platform.calibrate_tsc(); | ||
841 | cpu_khz = tsc_khz; | ||
842 | cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy, | ||
843 | cpu_khz_old, cpu_khz); | ||
844 | |||
845 | return 0; | ||
846 | #else | 846 | #else |
847 | return -ENODEV; | 847 | return -ENODEV; |
848 | #endif | 848 | #endif |
@@ -922,9 +922,6 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | |||
922 | struct cpufreq_freqs *freq = data; | 922 | struct cpufreq_freqs *freq = data; |
923 | unsigned long *lpj; | 923 | unsigned long *lpj; |
924 | 924 | ||
925 | if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC)) | ||
926 | return 0; | ||
927 | |||
928 | lpj = &boot_cpu_data.loops_per_jiffy; | 925 | lpj = &boot_cpu_data.loops_per_jiffy; |
929 | #ifdef CONFIG_SMP | 926 | #ifdef CONFIG_SMP |
930 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) | 927 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) |
@@ -954,9 +951,9 @@ static struct notifier_block time_cpufreq_notifier_block = { | |||
954 | .notifier_call = time_cpufreq_notifier | 951 | .notifier_call = time_cpufreq_notifier |
955 | }; | 952 | }; |
956 | 953 | ||
957 | static int __init cpufreq_tsc(void) | 954 | static int __init cpufreq_register_tsc_scaling(void) |
958 | { | 955 | { |
959 | if (!cpu_has_tsc) | 956 | if (!boot_cpu_has(X86_FEATURE_TSC)) |
960 | return 0; | 957 | return 0; |
961 | if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) | 958 | if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) |
962 | return 0; | 959 | return 0; |
@@ -965,7 +962,7 @@ static int __init cpufreq_tsc(void) | |||
965 | return 0; | 962 | return 0; |
966 | } | 963 | } |
967 | 964 | ||
968 | core_initcall(cpufreq_tsc); | 965 | core_initcall(cpufreq_register_tsc_scaling); |
969 | 966 | ||
970 | #endif /* CONFIG_CPU_FREQ */ | 967 | #endif /* CONFIG_CPU_FREQ */ |
971 | 968 | ||
@@ -1081,7 +1078,7 @@ static void __init check_system_tsc_reliable(void) | |||
1081 | */ | 1078 | */ |
1082 | int unsynchronized_tsc(void) | 1079 | int unsynchronized_tsc(void) |
1083 | { | 1080 | { |
1084 | if (!cpu_has_tsc || tsc_unstable) | 1081 | if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_unstable) |
1085 | return 1; | 1082 | return 1; |
1086 | 1083 | ||
1087 | #ifdef CONFIG_SMP | 1084 | #ifdef CONFIG_SMP |
@@ -1205,7 +1202,7 @@ out: | |||
1205 | 1202 | ||
1206 | static int __init init_tsc_clocksource(void) | 1203 | static int __init init_tsc_clocksource(void) |
1207 | { | 1204 | { |
1208 | if (!cpu_has_tsc || tsc_disabled > 0 || !tsc_khz) | 1205 | if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_disabled > 0 || !tsc_khz) |
1209 | return 0; | 1206 | return 0; |
1210 | 1207 | ||
1211 | if (tsc_clocksource_reliable) | 1208 | if (tsc_clocksource_reliable) |
@@ -1242,7 +1239,7 @@ void __init tsc_init(void) | |||
1242 | u64 lpj; | 1239 | u64 lpj; |
1243 | int cpu; | 1240 | int cpu; |
1244 | 1241 | ||
1245 | if (!cpu_has_tsc) { | 1242 | if (!boot_cpu_has(X86_FEATURE_TSC)) { |
1246 | setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); | 1243 | setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); |
1247 | return; | 1244 | return; |
1248 | } | 1245 | } |
diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c index 92ae6acac8a7..9911a0620f9a 100644 --- a/arch/x86/kernel/tsc_msr.c +++ b/arch/x86/kernel/tsc_msr.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <asm/param.h> | 23 | #include <asm/param.h> |
24 | 24 | ||
25 | /* CPU reference clock frequency: in KHz */ | 25 | /* CPU reference clock frequency: in KHz */ |
26 | #define FREQ_80 80000 | ||
26 | #define FREQ_83 83200 | 27 | #define FREQ_83 83200 |
27 | #define FREQ_100 99840 | 28 | #define FREQ_100 99840 |
28 | #define FREQ_133 133200 | 29 | #define FREQ_133 133200 |
@@ -56,6 +57,8 @@ static struct freq_desc freq_desc_tables[] = { | |||
56 | { 6, 0x37, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_166, 0, 0, 0, 0 } }, | 57 | { 6, 0x37, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_166, 0, 0, 0, 0 } }, |
57 | /* ANN */ | 58 | /* ANN */ |
58 | { 6, 0x5a, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_100, 0, 0, 0, 0 } }, | 59 | { 6, 0x5a, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_100, 0, 0, 0, 0 } }, |
60 | /* AIRMONT */ | ||
61 | { 6, 0x4c, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_166, FREQ_80, 0, 0, 0 } }, | ||
59 | }; | 62 | }; |
60 | 63 | ||
61 | static int match_cpu(u8 family, u8 model) | 64 | static int match_cpu(u8 family, u8 model) |
@@ -92,7 +95,7 @@ unsigned long try_msr_calibrate_tsc(void) | |||
92 | 95 | ||
93 | if (freq_desc_tables[cpu_index].msr_plat) { | 96 | if (freq_desc_tables[cpu_index].msr_plat) { |
94 | rdmsr(MSR_PLATFORM_INFO, lo, hi); | 97 | rdmsr(MSR_PLATFORM_INFO, lo, hi); |
95 | ratio = (lo >> 8) & 0x1f; | 98 | ratio = (lo >> 8) & 0xff; |
96 | } else { | 99 | } else { |
97 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | 100 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); |
98 | ratio = (hi >> 8) & 0x1f; | 101 | ratio = (hi >> 8) & 0x1f; |
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index bf4db6eaec8f..6c1ff31d99ff 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c | |||
@@ -516,7 +516,7 @@ struct uprobe_xol_ops { | |||
516 | 516 | ||
517 | static inline int sizeof_long(void) | 517 | static inline int sizeof_long(void) |
518 | { | 518 | { |
519 | return is_ia32_task() ? 4 : 8; | 519 | return in_ia32_syscall() ? 4 : 8; |
520 | } | 520 | } |
521 | 521 | ||
522 | static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) | 522 | static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) |
@@ -578,7 +578,7 @@ static void default_abort_op(struct arch_uprobe *auprobe, struct pt_regs *regs) | |||
578 | riprel_post_xol(auprobe, regs); | 578 | riprel_post_xol(auprobe, regs); |
579 | } | 579 | } |
580 | 580 | ||
581 | static struct uprobe_xol_ops default_xol_ops = { | 581 | static const struct uprobe_xol_ops default_xol_ops = { |
582 | .pre_xol = default_pre_xol_op, | 582 | .pre_xol = default_pre_xol_op, |
583 | .post_xol = default_post_xol_op, | 583 | .post_xol = default_post_xol_op, |
584 | .abort = default_abort_op, | 584 | .abort = default_abort_op, |
@@ -695,7 +695,7 @@ static void branch_clear_offset(struct arch_uprobe *auprobe, struct insn *insn) | |||
695 | 0, insn->immediate.nbytes); | 695 | 0, insn->immediate.nbytes); |
696 | } | 696 | } |
697 | 697 | ||
698 | static struct uprobe_xol_ops branch_xol_ops = { | 698 | static const struct uprobe_xol_ops branch_xol_ops = { |
699 | .emulate = branch_emulate_op, | 699 | .emulate = branch_emulate_op, |
700 | .post_xol = branch_post_xol_op, | 700 | .post_xol = branch_post_xol_op, |
701 | }; | 701 | }; |
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 4c941f88d405..9297a002d8e5 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -334,7 +334,7 @@ SECTIONS | |||
334 | __brk_limit = .; | 334 | __brk_limit = .; |
335 | } | 335 | } |
336 | 336 | ||
337 | . = ALIGN(PAGE_SIZE); | 337 | . = ALIGN(PAGE_SIZE); /* keep VO_INIT_SIZE page aligned */ |
338 | _end = .; | 338 | _end = .; |
339 | 339 | ||
340 | STABS_DEBUG | 340 | STABS_DEBUG |