aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/alternative.c28
-rw-r--r--arch/x86/kernel/amd_gart_64.c1
-rw-r--r--arch/x86/kernel/aperture_64.c46
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c24
-rw-r--r--arch/x86/kernel/apic/io_apic.c20
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c84
-rw-r--r--arch/x86/kernel/apm_32.c3
-rw-r--r--arch/x86/kernel/cpu/bugs.c134
-rw-r--r--arch/x86/kernel/cpu/centaur.c4
-rw-r--r--arch/x86/kernel/cpu/common.c70
-rw-r--r--arch/x86/kernel/cpu/hypervisor.c4
-rw-r--r--arch/x86/kernel/cpu/intel.c66
-rw-r--r--arch/x86/kernel/cpu/intel_rdt.c68
-rw-r--r--arch/x86/kernel/cpu/intel_rdt.h5
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_rdtgroup.c117
-rw-r--r--arch/x86/kernel/cpu/mcheck/dev-mcelog.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c26
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c26
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c29
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c2
-rw-r--r--arch/x86/kernel/cpu/scattered.c3
-rw-r--r--arch/x86/kernel/devicetree.c6
-rw-r--r--arch/x86/kernel/dumpstack.c2
-rw-r--r--arch/x86/kernel/early-quirks.c87
-rw-r--r--arch/x86/kernel/itmt.c1
-rw-r--r--arch/x86/kernel/jailhouse.c211
-rw-r--r--arch/x86/kernel/mpparse.c23
-rw-r--r--arch/x86/kernel/pci-dma.c23
-rw-r--r--arch/x86/kernel/pci-nommu.c2
-rw-r--r--arch/x86/kernel/pci-swiotlb.c8
-rw-r--r--arch/x86/kernel/platform-quirks.c1
-rw-r--r--arch/x86/kernel/process.c1
-rw-r--r--arch/x86/kernel/process_64.c4
-rw-r--r--arch/x86/kernel/ptrace.c2
-rw-r--r--arch/x86/kernel/relocate_kernel_64.S8
-rw-r--r--arch/x86/kernel/setup.c1
-rw-r--r--arch/x86/kernel/signal.c2
-rw-r--r--arch/x86/kernel/signal_compat.c123
-rw-r--r--arch/x86/kernel/smpboot.c5
-rw-r--r--arch/x86/kernel/time.c9
-rw-r--r--arch/x86/kernel/tsc.c61
-rw-r--r--arch/x86/kernel/uprobes.c107
43 files changed, 1055 insertions, 396 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 7e2baf7304ae..29786c87e864 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -115,6 +115,8 @@ obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
115obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o 115obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
116obj-$(CONFIG_X86_PMEM_LEGACY_DEVICE) += pmem.o 116obj-$(CONFIG_X86_PMEM_LEGACY_DEVICE) += pmem.o
117 117
118obj-$(CONFIG_JAILHOUSE_GUEST) += jailhouse.o
119
118obj-$(CONFIG_EISA) += eisa.o 120obj-$(CONFIG_EISA) += eisa.o
119obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o 121obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o
120 122
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 4817d743c263..a481763a3776 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -46,17 +46,6 @@ static int __init setup_noreplace_smp(char *str)
46} 46}
47__setup("noreplace-smp", setup_noreplace_smp); 47__setup("noreplace-smp", setup_noreplace_smp);
48 48
49#ifdef CONFIG_PARAVIRT
50static int __initdata_or_module noreplace_paravirt = 0;
51
52static int __init setup_noreplace_paravirt(char *str)
53{
54 noreplace_paravirt = 1;
55 return 1;
56}
57__setup("noreplace-paravirt", setup_noreplace_paravirt);
58#endif
59
60#define DPRINTK(fmt, args...) \ 49#define DPRINTK(fmt, args...) \
61do { \ 50do { \
62 if (debug_alternative) \ 51 if (debug_alternative) \
@@ -298,7 +287,7 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
298 tgt_rip = next_rip + o_dspl; 287 tgt_rip = next_rip + o_dspl;
299 n_dspl = tgt_rip - orig_insn; 288 n_dspl = tgt_rip - orig_insn;
300 289
301 DPRINTK("target RIP: %p, new_displ: 0x%x", tgt_rip, n_dspl); 290 DPRINTK("target RIP: %px, new_displ: 0x%x", tgt_rip, n_dspl);
302 291
303 if (tgt_rip - orig_insn >= 0) { 292 if (tgt_rip - orig_insn >= 0) {
304 if (n_dspl - 2 <= 127) 293 if (n_dspl - 2 <= 127)
@@ -355,7 +344,7 @@ static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *ins
355 add_nops(instr + (a->instrlen - a->padlen), a->padlen); 344 add_nops(instr + (a->instrlen - a->padlen), a->padlen);
356 local_irq_restore(flags); 345 local_irq_restore(flags);
357 346
358 DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ", 347 DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ",
359 instr, a->instrlen - a->padlen, a->padlen); 348 instr, a->instrlen - a->padlen, a->padlen);
360} 349}
361 350
@@ -376,7 +365,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
376 u8 *instr, *replacement; 365 u8 *instr, *replacement;
377 u8 insnbuf[MAX_PATCH_LEN]; 366 u8 insnbuf[MAX_PATCH_LEN];
378 367
379 DPRINTK("alt table %p -> %p", start, end); 368 DPRINTK("alt table %px, -> %px", start, end);
380 /* 369 /*
381 * The scan order should be from start to end. A later scanned 370 * The scan order should be from start to end. A later scanned
382 * alternative code can overwrite previously scanned alternative code. 371 * alternative code can overwrite previously scanned alternative code.
@@ -400,14 +389,14 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
400 continue; 389 continue;
401 } 390 }
402 391
403 DPRINTK("feat: %d*32+%d, old: (%p, len: %d), repl: (%p, len: %d), pad: %d", 392 DPRINTK("feat: %d*32+%d, old: (%px len: %d), repl: (%px, len: %d), pad: %d",
404 a->cpuid >> 5, 393 a->cpuid >> 5,
405 a->cpuid & 0x1f, 394 a->cpuid & 0x1f,
406 instr, a->instrlen, 395 instr, a->instrlen,
407 replacement, a->replacementlen, a->padlen); 396 replacement, a->replacementlen, a->padlen);
408 397
409 DUMP_BYTES(instr, a->instrlen, "%p: old_insn: ", instr); 398 DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr);
410 DUMP_BYTES(replacement, a->replacementlen, "%p: rpl_insn: ", replacement); 399 DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement);
411 400
412 memcpy(insnbuf, replacement, a->replacementlen); 401 memcpy(insnbuf, replacement, a->replacementlen);
413 insnbuf_sz = a->replacementlen; 402 insnbuf_sz = a->replacementlen;
@@ -433,7 +422,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
433 a->instrlen - a->replacementlen); 422 a->instrlen - a->replacementlen);
434 insnbuf_sz += a->instrlen - a->replacementlen; 423 insnbuf_sz += a->instrlen - a->replacementlen;
435 } 424 }
436 DUMP_BYTES(insnbuf, insnbuf_sz, "%p: final_insn: ", instr); 425 DUMP_BYTES(insnbuf, insnbuf_sz, "%px: final_insn: ", instr);
437 426
438 text_poke_early(instr, insnbuf, insnbuf_sz); 427 text_poke_early(instr, insnbuf, insnbuf_sz);
439 } 428 }
@@ -599,9 +588,6 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
599 struct paravirt_patch_site *p; 588 struct paravirt_patch_site *p;
600 char insnbuf[MAX_PATCH_LEN]; 589 char insnbuf[MAX_PATCH_LEN];
601 590
602 if (noreplace_paravirt)
603 return;
604
605 for (p = start; p < end; p++) { 591 for (p = start; p < end; p++) {
606 unsigned int used; 592 unsigned int used;
607 593
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index cc0e8bc0ea3f..ecd486cb06ab 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -31,6 +31,7 @@
31#include <linux/io.h> 31#include <linux/io.h>
32#include <linux/gfp.h> 32#include <linux/gfp.h>
33#include <linux/atomic.h> 33#include <linux/atomic.h>
34#include <linux/dma-direct.h>
34#include <asm/mtrr.h> 35#include <asm/mtrr.h>
35#include <asm/pgtable.h> 36#include <asm/pgtable.h>
36#include <asm/proto.h> 37#include <asm/proto.h>
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index f5d92bc3b884..2c4d5ece7456 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -30,6 +30,7 @@
30#include <asm/dma.h> 30#include <asm/dma.h>
31#include <asm/amd_nb.h> 31#include <asm/amd_nb.h>
32#include <asm/x86_init.h> 32#include <asm/x86_init.h>
33#include <linux/crash_dump.h>
33 34
34/* 35/*
35 * Using 512M as goal, in case kexec will load kernel_big 36 * Using 512M as goal, in case kexec will load kernel_big
@@ -56,6 +57,33 @@ int fallback_aper_force __initdata;
56 57
57int fix_aperture __initdata = 1; 58int fix_aperture __initdata = 1;
58 59
60#ifdef CONFIG_PROC_VMCORE
61/*
62 * If the first kernel maps the aperture over e820 RAM, the kdump kernel will
63 * use the same range because it will remain configured in the northbridge.
64 * Trying to dump this area via /proc/vmcore may crash the machine, so exclude
65 * it from vmcore.
66 */
67static unsigned long aperture_pfn_start, aperture_page_count;
68
69static int gart_oldmem_pfn_is_ram(unsigned long pfn)
70{
71 return likely((pfn < aperture_pfn_start) ||
72 (pfn >= aperture_pfn_start + aperture_page_count));
73}
74
75static void exclude_from_vmcore(u64 aper_base, u32 aper_order)
76{
77 aperture_pfn_start = aper_base >> PAGE_SHIFT;
78 aperture_page_count = (32 * 1024 * 1024) << aper_order >> PAGE_SHIFT;
79 WARN_ON(register_oldmem_pfn_is_ram(&gart_oldmem_pfn_is_ram));
80}
81#else
82static void exclude_from_vmcore(u64 aper_base, u32 aper_order)
83{
84}
85#endif
86
59/* This code runs before the PCI subsystem is initialized, so just 87/* This code runs before the PCI subsystem is initialized, so just
60 access the northbridge directly. */ 88 access the northbridge directly. */
61 89
@@ -435,8 +463,16 @@ int __init gart_iommu_hole_init(void)
435 463
436out: 464out:
437 if (!fix && !fallback_aper_force) { 465 if (!fix && !fallback_aper_force) {
438 if (last_aper_base) 466 if (last_aper_base) {
467 /*
468 * If this is the kdump kernel, the first kernel
469 * may have allocated the range over its e820 RAM
470 * and fixed up the northbridge
471 */
472 exclude_from_vmcore(last_aper_base, last_aper_order);
473
439 return 1; 474 return 1;
475 }
440 return 0; 476 return 0;
441 } 477 }
442 478
@@ -473,6 +509,14 @@ out:
473 return 0; 509 return 0;
474 } 510 }
475 511
512 /*
513 * If this is the kdump kernel _and_ the first kernel did not
514 * configure the aperture in the northbridge, this range may
515 * overlap with the first kernel's memory. We can't access the
516 * range through vmcore even though it should be part of the dump.
517 */
518 exclude_from_vmcore(aper_alloc, aper_order);
519
476 /* Fix up the north bridges */ 520 /* Fix up the north bridges */
477 for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) { 521 for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) {
478 int bus, dev_base, dev_limit; 522 int bus, dev_base, dev_limit;
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 25a87028cb3f..e84c9eb4e5b4 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -19,6 +19,7 @@
19#include <asm/smp.h> 19#include <asm/smp.h>
20#include <asm/apic.h> 20#include <asm/apic.h>
21#include <asm/ipi.h> 21#include <asm/ipi.h>
22#include <asm/jailhouse_para.h>
22 23
23#include <linux/acpi.h> 24#include <linux/acpi.h>
24 25
@@ -84,12 +85,8 @@ flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector)
84static void flat_send_IPI_allbutself(int vector) 85static void flat_send_IPI_allbutself(int vector)
85{ 86{
86 int cpu = smp_processor_id(); 87 int cpu = smp_processor_id();
87#ifdef CONFIG_HOTPLUG_CPU 88
88 int hotplug = 1; 89 if (IS_ENABLED(CONFIG_HOTPLUG_CPU) || vector == NMI_VECTOR) {
89#else
90 int hotplug = 0;
91#endif
92 if (hotplug || vector == NMI_VECTOR) {
93 if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) { 90 if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) {
94 unsigned long mask = cpumask_bits(cpu_online_mask)[0]; 91 unsigned long mask = cpumask_bits(cpu_online_mask)[0];
95 92
@@ -218,6 +215,15 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
218 return 0; 215 return 0;
219} 216}
220 217
218static void physflat_init_apic_ldr(void)
219{
220 /*
221 * LDR and DFR are not involved in physflat mode, rather:
222 * "In physical destination mode, the destination processor is
223 * specified by its local APIC ID [...]." (Intel SDM, 10.6.2.1)
224 */
225}
226
221static void physflat_send_IPI_allbutself(int vector) 227static void physflat_send_IPI_allbutself(int vector)
222{ 228{
223 default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector); 229 default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
@@ -230,7 +236,8 @@ static void physflat_send_IPI_all(int vector)
230 236
231static int physflat_probe(void) 237static int physflat_probe(void)
232{ 238{
233 if (apic == &apic_physflat || num_possible_cpus() > 8) 239 if (apic == &apic_physflat || num_possible_cpus() > 8 ||
240 jailhouse_paravirt())
234 return 1; 241 return 1;
235 242
236 return 0; 243 return 0;
@@ -251,8 +258,7 @@ static struct apic apic_physflat __ro_after_init = {
251 .dest_logical = 0, 258 .dest_logical = 0,
252 .check_apicid_used = NULL, 259 .check_apicid_used = NULL,
253 260
254 /* not needed, but shouldn't hurt: */ 261 .init_apic_ldr = physflat_init_apic_ldr,
255 .init_apic_ldr = flat_init_apic_ldr,
256 262
257 .ioapic_phys_id_map = NULL, 263 .ioapic_phys_id_map = NULL,
258 .setup_apic_routing = NULL, 264 .setup_apic_routing = NULL,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 8a7963421460..8ad2e410974f 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -800,18 +800,18 @@ static int irq_polarity(int idx)
800 /* 800 /*
801 * Determine IRQ line polarity (high active or low active): 801 * Determine IRQ line polarity (high active or low active):
802 */ 802 */
803 switch (mp_irqs[idx].irqflag & 0x03) { 803 switch (mp_irqs[idx].irqflag & MP_IRQPOL_MASK) {
804 case 0: 804 case MP_IRQPOL_DEFAULT:
805 /* conforms to spec, ie. bus-type dependent polarity */ 805 /* conforms to spec, ie. bus-type dependent polarity */
806 if (test_bit(bus, mp_bus_not_pci)) 806 if (test_bit(bus, mp_bus_not_pci))
807 return default_ISA_polarity(idx); 807 return default_ISA_polarity(idx);
808 else 808 else
809 return default_PCI_polarity(idx); 809 return default_PCI_polarity(idx);
810 case 1: 810 case MP_IRQPOL_ACTIVE_HIGH:
811 return IOAPIC_POL_HIGH; 811 return IOAPIC_POL_HIGH;
812 case 2: 812 case MP_IRQPOL_RESERVED:
813 pr_warn("IOAPIC: Invalid polarity: 2, defaulting to low\n"); 813 pr_warn("IOAPIC: Invalid polarity: 2, defaulting to low\n");
814 case 3: 814 case MP_IRQPOL_ACTIVE_LOW:
815 default: /* Pointless default required due to do gcc stupidity */ 815 default: /* Pointless default required due to do gcc stupidity */
816 return IOAPIC_POL_LOW; 816 return IOAPIC_POL_LOW;
817 } 817 }
@@ -845,8 +845,8 @@ static int irq_trigger(int idx)
845 /* 845 /*
846 * Determine IRQ trigger mode (edge or level sensitive): 846 * Determine IRQ trigger mode (edge or level sensitive):
847 */ 847 */
848 switch ((mp_irqs[idx].irqflag >> 2) & 0x03) { 848 switch (mp_irqs[idx].irqflag & MP_IRQTRIG_MASK) {
849 case 0: 849 case MP_IRQTRIG_DEFAULT:
850 /* conforms to spec, ie. bus-type dependent trigger mode */ 850 /* conforms to spec, ie. bus-type dependent trigger mode */
851 if (test_bit(bus, mp_bus_not_pci)) 851 if (test_bit(bus, mp_bus_not_pci))
852 trigger = default_ISA_trigger(idx); 852 trigger = default_ISA_trigger(idx);
@@ -854,11 +854,11 @@ static int irq_trigger(int idx)
854 trigger = default_PCI_trigger(idx); 854 trigger = default_PCI_trigger(idx);
855 /* Take EISA into account */ 855 /* Take EISA into account */
856 return eisa_irq_trigger(idx, bus, trigger); 856 return eisa_irq_trigger(idx, bus, trigger);
857 case 1: 857 case MP_IRQTRIG_EDGE:
858 return IOAPIC_EDGE; 858 return IOAPIC_EDGE;
859 case 2: 859 case MP_IRQTRIG_RESERVED:
860 pr_warn("IOAPIC: Invalid trigger mode 2 defaulting to level\n"); 860 pr_warn("IOAPIC: Invalid trigger mode 2 defaulting to level\n");
861 case 3: 861 case MP_IRQTRIG_LEVEL:
862 default: /* Pointless default required due to do gcc stupidity */ 862 default: /* Pointless default required due to do gcc stupidity */
863 return IOAPIC_LEVEL; 863 return IOAPIC_LEVEL;
864 } 864 }
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index e1b8e8bf6b3c..46b675aaf20b 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -137,6 +137,8 @@ static int __init early_get_pnodeid(void)
137 case UV3_HUB_PART_NUMBER_X: 137 case UV3_HUB_PART_NUMBER_X:
138 uv_min_hub_revision_id += UV3_HUB_REVISION_BASE; 138 uv_min_hub_revision_id += UV3_HUB_REVISION_BASE;
139 break; 139 break;
140
141 /* Update: UV4A has only a modified revision to indicate HUB fixes */
140 case UV4_HUB_PART_NUMBER: 142 case UV4_HUB_PART_NUMBER:
141 uv_min_hub_revision_id += UV4_HUB_REVISION_BASE - 1; 143 uv_min_hub_revision_id += UV4_HUB_REVISION_BASE - 1;
142 uv_cpuid.gnode_shift = 2; /* min partition is 4 sockets */ 144 uv_cpuid.gnode_shift = 2; /* min partition is 4 sockets */
@@ -316,6 +318,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
316 } else if (!strcmp(oem_table_id, "UVH")) { 318 } else if (!strcmp(oem_table_id, "UVH")) {
317 /* Only UV1 systems: */ 319 /* Only UV1 systems: */
318 uv_system_type = UV_NON_UNIQUE_APIC; 320 uv_system_type = UV_NON_UNIQUE_APIC;
321 x86_platform.legacy.warm_reset = 0;
319 __this_cpu_write(x2apic_extra_bits, pnodeid << uvh_apicid.s.pnode_shift); 322 __this_cpu_write(x2apic_extra_bits, pnodeid << uvh_apicid.s.pnode_shift);
320 uv_set_apicid_hibit(); 323 uv_set_apicid_hibit();
321 uv_apic = 1; 324 uv_apic = 1;
@@ -767,6 +770,7 @@ static __init void map_gru_high(int max_pnode)
767 return; 770 return;
768 } 771 }
769 772
773 /* Only UV3 has distributed GRU mode */
770 if (is_uv3_hub() && gru.s3.mode) { 774 if (is_uv3_hub() && gru.s3.mode) {
771 map_gru_distributed(gru.v); 775 map_gru_distributed(gru.v);
772 return; 776 return;
@@ -790,63 +794,61 @@ static __init void map_mmr_high(int max_pnode)
790 pr_info("UV: MMR disabled\n"); 794 pr_info("UV: MMR disabled\n");
791} 795}
792 796
793/* 797/* UV3/4 have identical MMIOH overlay configs, UV4A is slightly different */
794 * This commonality works because both 0 & 1 versions of the MMIOH OVERLAY 798static __init void map_mmioh_high_uv34(int index, int min_pnode, int max_pnode)
795 * and REDIRECT MMR regs are exactly the same on UV3.
796 */
797struct mmioh_config {
798 unsigned long overlay;
799 unsigned long redirect;
800 char *id;
801};
802
803static __initdata struct mmioh_config mmiohs[] = {
804 {
805 UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR,
806 UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR,
807 "MMIOH0"
808 },
809 {
810 UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR,
811 UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR,
812 "MMIOH1"
813 },
814};
815
816/* UV3 & UV4 have identical MMIOH overlay configs */
817static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode)
818{ 799{
819 union uv3h_rh_gam_mmioh_overlay_config0_mmr_u overlay; 800 unsigned long overlay;
820 unsigned long mmr; 801 unsigned long mmr;
821 unsigned long base; 802 unsigned long base;
803 unsigned long nasid_mask;
804 unsigned long m_overlay;
822 int i, n, shift, m_io, max_io; 805 int i, n, shift, m_io, max_io;
823 int nasid, lnasid, fi, li; 806 int nasid, lnasid, fi, li;
824 char *id; 807 char *id;
825 808
826 id = mmiohs[index].id; 809 if (index == 0) {
827 overlay.v = uv_read_local_mmr(mmiohs[index].overlay); 810 id = "MMIOH0";
828 811 m_overlay = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR;
829 pr_info("UV: %s overlay 0x%lx base:0x%x m_io:%d\n", id, overlay.v, overlay.s3.base, overlay.s3.m_io); 812 overlay = uv_read_local_mmr(m_overlay);
830 if (!overlay.s3.enable) { 813 base = overlay & UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK;
814 mmr = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR;
815 m_io = (overlay & UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK)
816 >> UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT;
817 shift = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT;
818 n = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH;
819 nasid_mask = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK;
820 } else {
821 id = "MMIOH1";
822 m_overlay = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR;
823 overlay = uv_read_local_mmr(m_overlay);
824 base = overlay & UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK;
825 mmr = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR;
826 m_io = (overlay & UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK)
827 >> UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT;
828 shift = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT;
829 n = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH;
830 nasid_mask = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK;
831 }
832 pr_info("UV: %s overlay 0x%lx base:0x%lx m_io:%d\n", id, overlay, base, m_io);
833 if (!(overlay & UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK)) {
831 pr_info("UV: %s disabled\n", id); 834 pr_info("UV: %s disabled\n", id);
832 return; 835 return;
833 } 836 }
834 837
835 shift = UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_SHFT;
836 base = (unsigned long)overlay.s3.base;
837 m_io = overlay.s3.m_io;
838 mmr = mmiohs[index].redirect;
839 n = UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH;
840 /* Convert to NASID: */ 838 /* Convert to NASID: */
841 min_pnode *= 2; 839 min_pnode *= 2;
842 max_pnode *= 2; 840 max_pnode *= 2;
843 max_io = lnasid = fi = li = -1; 841 max_io = lnasid = fi = li = -1;
844 842
845 for (i = 0; i < n; i++) { 843 for (i = 0; i < n; i++) {
846 union uv3h_rh_gam_mmioh_redirect_config0_mmr_u redirect; 844 unsigned long m_redirect = mmr + i * 8;
845 unsigned long redirect = uv_read_local_mmr(m_redirect);
846
847 nasid = redirect & nasid_mask;
848 if (i == 0)
849 pr_info("UV: %s redirect base 0x%lx(@0x%lx) 0x%04x\n",
850 id, redirect, m_redirect, nasid);
847 851
848 redirect.v = uv_read_local_mmr(mmr + i * 8);
849 nasid = redirect.s3.nasid;
850 /* Invalid NASID: */ 852 /* Invalid NASID: */
851 if (nasid < min_pnode || max_pnode < nasid) 853 if (nasid < min_pnode || max_pnode < nasid)
852 nasid = -1; 854 nasid = -1;
@@ -894,8 +896,8 @@ static __init void map_mmioh_high(int min_pnode, int max_pnode)
894 896
895 if (is_uv3_hub() || is_uv4_hub()) { 897 if (is_uv3_hub() || is_uv4_hub()) {
896 /* Map both MMIOH regions: */ 898 /* Map both MMIOH regions: */
897 map_mmioh_high_uv3(0, min_pnode, max_pnode); 899 map_mmioh_high_uv34(0, min_pnode, max_pnode);
898 map_mmioh_high_uv3(1, min_pnode, max_pnode); 900 map_mmioh_high_uv34(1, min_pnode, max_pnode);
899 return; 901 return;
900 } 902 }
901 903
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index e4b0d92b3ae0..dc0ca8e29c75 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -1506,7 +1506,7 @@ static ssize_t do_read(struct file *fp, char __user *buf, size_t count, loff_t *
1506 return 0; 1506 return 0;
1507} 1507}
1508 1508
1509static unsigned int do_poll(struct file *fp, poll_table *wait) 1509static __poll_t do_poll(struct file *fp, poll_table *wait)
1510{ 1510{
1511 struct apm_user *as; 1511 struct apm_user *as;
1512 1512
@@ -2389,6 +2389,7 @@ static int __init apm_init(void)
2389 if (HZ != 100) 2389 if (HZ != 100)
2390 idle_period = (idle_period * HZ) / 100; 2390 idle_period = (idle_period * HZ) / 100;
2391 if (idle_threshold < 100) { 2391 if (idle_threshold < 100) {
2392 cpuidle_poll_state_init(&apm_idle_driver);
2392 if (!cpuidle_register_driver(&apm_idle_driver)) 2393 if (!cpuidle_register_driver(&apm_idle_driver))
2393 if (cpuidle_register_device(&apm_cpuidle_device)) 2394 if (cpuidle_register_device(&apm_cpuidle_device))
2394 cpuidle_unregister_driver(&apm_idle_driver); 2395 cpuidle_unregister_driver(&apm_idle_driver);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 390b3dc3d438..71949bf2de5a 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -11,6 +11,7 @@
11#include <linux/init.h> 11#include <linux/init.h>
12#include <linux/utsname.h> 12#include <linux/utsname.h>
13#include <linux/cpu.h> 13#include <linux/cpu.h>
14#include <linux/module.h>
14 15
15#include <asm/nospec-branch.h> 16#include <asm/nospec-branch.h>
16#include <asm/cmdline.h> 17#include <asm/cmdline.h>
@@ -90,20 +91,41 @@ static const char *spectre_v2_strings[] = {
90}; 91};
91 92
92#undef pr_fmt 93#undef pr_fmt
93#define pr_fmt(fmt) "Spectre V2 mitigation: " fmt 94#define pr_fmt(fmt) "Spectre V2 : " fmt
94 95
95static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; 96static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
96 97
98#ifdef RETPOLINE
99static bool spectre_v2_bad_module;
100
101bool retpoline_module_ok(bool has_retpoline)
102{
103 if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline)
104 return true;
105
106 pr_err("System may be vulnerable to spectre v2\n");
107 spectre_v2_bad_module = true;
108 return false;
109}
110
111static inline const char *spectre_v2_module_string(void)
112{
113 return spectre_v2_bad_module ? " - vulnerable module loaded" : "";
114}
115#else
116static inline const char *spectre_v2_module_string(void) { return ""; }
117#endif
118
97static void __init spec2_print_if_insecure(const char *reason) 119static void __init spec2_print_if_insecure(const char *reason)
98{ 120{
99 if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) 121 if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
100 pr_info("%s\n", reason); 122 pr_info("%s selected on command line.\n", reason);
101} 123}
102 124
103static void __init spec2_print_if_secure(const char *reason) 125static void __init spec2_print_if_secure(const char *reason)
104{ 126{
105 if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) 127 if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
106 pr_info("%s\n", reason); 128 pr_info("%s selected on command line.\n", reason);
107} 129}
108 130
109static inline bool retp_compiler(void) 131static inline bool retp_compiler(void)
@@ -118,42 +140,68 @@ static inline bool match_option(const char *arg, int arglen, const char *opt)
118 return len == arglen && !strncmp(arg, opt, len); 140 return len == arglen && !strncmp(arg, opt, len);
119} 141}
120 142
143static const struct {
144 const char *option;
145 enum spectre_v2_mitigation_cmd cmd;
146 bool secure;
147} mitigation_options[] = {
148 { "off", SPECTRE_V2_CMD_NONE, false },
149 { "on", SPECTRE_V2_CMD_FORCE, true },
150 { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false },
151 { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false },
152 { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false },
153 { "auto", SPECTRE_V2_CMD_AUTO, false },
154};
155
121static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) 156static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
122{ 157{
123 char arg[20]; 158 char arg[20];
124 int ret; 159 int ret, i;
125 160 enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO;
126 ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, 161
127 sizeof(arg)); 162 if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
128 if (ret > 0) { 163 return SPECTRE_V2_CMD_NONE;
129 if (match_option(arg, ret, "off")) { 164 else {
130 goto disable; 165 ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
131 } else if (match_option(arg, ret, "on")) { 166 sizeof(arg));
132 spec2_print_if_secure("force enabled on command line."); 167 if (ret < 0)
133 return SPECTRE_V2_CMD_FORCE; 168 return SPECTRE_V2_CMD_AUTO;
134 } else if (match_option(arg, ret, "retpoline")) { 169
135 spec2_print_if_insecure("retpoline selected on command line."); 170 for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) {
136 return SPECTRE_V2_CMD_RETPOLINE; 171 if (!match_option(arg, ret, mitigation_options[i].option))
137 } else if (match_option(arg, ret, "retpoline,amd")) { 172 continue;
138 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { 173 cmd = mitigation_options[i].cmd;
139 pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); 174 break;
140 return SPECTRE_V2_CMD_AUTO; 175 }
141 } 176
142 spec2_print_if_insecure("AMD retpoline selected on command line."); 177 if (i >= ARRAY_SIZE(mitigation_options)) {
143 return SPECTRE_V2_CMD_RETPOLINE_AMD; 178 pr_err("unknown option (%s). Switching to AUTO select\n",
144 } else if (match_option(arg, ret, "retpoline,generic")) { 179 mitigation_options[i].option);
145 spec2_print_if_insecure("generic retpoline selected on command line.");
146 return SPECTRE_V2_CMD_RETPOLINE_GENERIC;
147 } else if (match_option(arg, ret, "auto")) {
148 return SPECTRE_V2_CMD_AUTO; 180 return SPECTRE_V2_CMD_AUTO;
149 } 181 }
150 } 182 }
151 183
152 if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2")) 184 if ((cmd == SPECTRE_V2_CMD_RETPOLINE ||
185 cmd == SPECTRE_V2_CMD_RETPOLINE_AMD ||
186 cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) &&
187 !IS_ENABLED(CONFIG_RETPOLINE)) {
188 pr_err("%s selected but not compiled in. Switching to AUTO select\n",
189 mitigation_options[i].option);
153 return SPECTRE_V2_CMD_AUTO; 190 return SPECTRE_V2_CMD_AUTO;
154disable: 191 }
155 spec2_print_if_insecure("disabled on command line."); 192
156 return SPECTRE_V2_CMD_NONE; 193 if (cmd == SPECTRE_V2_CMD_RETPOLINE_AMD &&
194 boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
195 pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
196 return SPECTRE_V2_CMD_AUTO;
197 }
198
199 if (mitigation_options[i].secure)
200 spec2_print_if_secure(mitigation_options[i].option);
201 else
202 spec2_print_if_insecure(mitigation_options[i].option);
203
204 return cmd;
157} 205}
158 206
159/* Check for Skylake-like CPUs (for RSB handling) */ 207/* Check for Skylake-like CPUs (for RSB handling) */
@@ -191,10 +239,10 @@ static void __init spectre_v2_select_mitigation(void)
191 return; 239 return;
192 240
193 case SPECTRE_V2_CMD_FORCE: 241 case SPECTRE_V2_CMD_FORCE:
194 /* FALLTRHU */
195 case SPECTRE_V2_CMD_AUTO: 242 case SPECTRE_V2_CMD_AUTO:
196 goto retpoline_auto; 243 if (IS_ENABLED(CONFIG_RETPOLINE))
197 244 goto retpoline_auto;
245 break;
198 case SPECTRE_V2_CMD_RETPOLINE_AMD: 246 case SPECTRE_V2_CMD_RETPOLINE_AMD:
199 if (IS_ENABLED(CONFIG_RETPOLINE)) 247 if (IS_ENABLED(CONFIG_RETPOLINE))
200 goto retpoline_amd; 248 goto retpoline_amd;
@@ -249,6 +297,12 @@ retpoline_auto:
249 setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); 297 setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
250 pr_info("Filling RSB on context switch\n"); 298 pr_info("Filling RSB on context switch\n");
251 } 299 }
300
301 /* Initialize Indirect Branch Prediction Barrier if supported */
302 if (boot_cpu_has(X86_FEATURE_IBPB)) {
303 setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
304 pr_info("Enabling Indirect Branch Prediction Barrier\n");
305 }
252} 306}
253 307
254#undef pr_fmt 308#undef pr_fmt
@@ -269,7 +323,7 @@ ssize_t cpu_show_spectre_v1(struct device *dev,
269{ 323{
270 if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) 324 if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
271 return sprintf(buf, "Not affected\n"); 325 return sprintf(buf, "Not affected\n");
272 return sprintf(buf, "Vulnerable\n"); 326 return sprintf(buf, "Mitigation: __user pointer sanitization\n");
273} 327}
274 328
275ssize_t cpu_show_spectre_v2(struct device *dev, 329ssize_t cpu_show_spectre_v2(struct device *dev,
@@ -278,6 +332,14 @@ ssize_t cpu_show_spectre_v2(struct device *dev,
278 if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) 332 if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
279 return sprintf(buf, "Not affected\n"); 333 return sprintf(buf, "Not affected\n");
280 334
281 return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]); 335 return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
336 boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
337 spectre_v2_module_string());
282} 338}
283#endif 339#endif
340
341void __ibp_barrier(void)
342{
343 __wrmsr(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, 0);
344}
345EXPORT_SYMBOL_GPL(__ibp_barrier);
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index 68bc6d9b3132..c578cd29c2d2 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -106,6 +106,10 @@ static void early_init_centaur(struct cpuinfo_x86 *c)
106#ifdef CONFIG_X86_64 106#ifdef CONFIG_X86_64
107 set_cpu_cap(c, X86_FEATURE_SYSENTER32); 107 set_cpu_cap(c, X86_FEATURE_SYSENTER32);
108#endif 108#endif
109 if (c->x86_power & (1 << 8)) {
110 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
111 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
112 }
109} 113}
110 114
111static void init_centaur(struct cpuinfo_x86 *c) 115static void init_centaur(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index ef29ad001991..d63f4b5706e4 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -47,6 +47,8 @@
47#include <asm/pat.h> 47#include <asm/pat.h>
48#include <asm/microcode.h> 48#include <asm/microcode.h>
49#include <asm/microcode_intel.h> 49#include <asm/microcode_intel.h>
50#include <asm/intel-family.h>
51#include <asm/cpu_device_id.h>
50 52
51#ifdef CONFIG_X86_LOCAL_APIC 53#ifdef CONFIG_X86_LOCAL_APIC
52#include <asm/uv/uv.h> 54#include <asm/uv/uv.h>
@@ -748,6 +750,26 @@ static void apply_forced_caps(struct cpuinfo_x86 *c)
748 } 750 }
749} 751}
750 752
753static void init_speculation_control(struct cpuinfo_x86 *c)
754{
755 /*
756 * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support,
757 * and they also have a different bit for STIBP support. Also,
758 * a hypervisor might have set the individual AMD bits even on
759 * Intel CPUs, for finer-grained selection of what's available.
760 *
761 * We use the AMD bits in 0x8000_0008 EBX as the generic hardware
762 * features, which are visible in /proc/cpuinfo and used by the
763 * kernel. So set those accordingly from the Intel bits.
764 */
765 if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
766 set_cpu_cap(c, X86_FEATURE_IBRS);
767 set_cpu_cap(c, X86_FEATURE_IBPB);
768 }
769 if (cpu_has(c, X86_FEATURE_INTEL_STIBP))
770 set_cpu_cap(c, X86_FEATURE_STIBP);
771}
772
751void get_cpu_cap(struct cpuinfo_x86 *c) 773void get_cpu_cap(struct cpuinfo_x86 *c)
752{ 774{
753 u32 eax, ebx, ecx, edx; 775 u32 eax, ebx, ecx, edx;
@@ -769,6 +791,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
769 cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); 791 cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx);
770 c->x86_capability[CPUID_7_0_EBX] = ebx; 792 c->x86_capability[CPUID_7_0_EBX] = ebx;
771 c->x86_capability[CPUID_7_ECX] = ecx; 793 c->x86_capability[CPUID_7_ECX] = ecx;
794 c->x86_capability[CPUID_7_EDX] = edx;
772 } 795 }
773 796
774 /* Extended state features: level 0x0000000d */ 797 /* Extended state features: level 0x0000000d */
@@ -841,6 +864,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
841 c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a); 864 c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
842 865
843 init_scattered_cpuid_features(c); 866 init_scattered_cpuid_features(c);
867 init_speculation_control(c);
844 868
845 /* 869 /*
846 * Clear/Set all flags overridden by options, after probe. 870 * Clear/Set all flags overridden by options, after probe.
@@ -876,6 +900,41 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
876#endif 900#endif
877} 901}
878 902
903static const __initconst struct x86_cpu_id cpu_no_speculation[] = {
904 { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY },
905 { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY },
906 { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY },
907 { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL, X86_FEATURE_ANY },
908 { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW, X86_FEATURE_ANY },
909 { X86_VENDOR_CENTAUR, 5 },
910 { X86_VENDOR_INTEL, 5 },
911 { X86_VENDOR_NSC, 5 },
912 { X86_VENDOR_ANY, 4 },
913 {}
914};
915
916static const __initconst struct x86_cpu_id cpu_no_meltdown[] = {
917 { X86_VENDOR_AMD },
918 {}
919};
920
921static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c)
922{
923 u64 ia32_cap = 0;
924
925 if (x86_match_cpu(cpu_no_meltdown))
926 return false;
927
928 if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
929 rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
930
931 /* Rogue Data Cache Load? No! */
932 if (ia32_cap & ARCH_CAP_RDCL_NO)
933 return false;
934
935 return true;
936}
937
879/* 938/*
880 * Do minimum CPU detection early. 939 * Do minimum CPU detection early.
881 * Fields really needed: vendor, cpuid_level, family, model, mask, 940 * Fields really needed: vendor, cpuid_level, family, model, mask,
@@ -923,11 +982,12 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
923 982
924 setup_force_cpu_cap(X86_FEATURE_ALWAYS); 983 setup_force_cpu_cap(X86_FEATURE_ALWAYS);
925 984
926 if (c->x86_vendor != X86_VENDOR_AMD) 985 if (!x86_match_cpu(cpu_no_speculation)) {
927 setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); 986 if (cpu_vulnerable_to_meltdown(c))
928 987 setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
929 setup_force_cpu_bug(X86_BUG_SPECTRE_V1); 988 setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
930 setup_force_cpu_bug(X86_BUG_SPECTRE_V2); 989 setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
990 }
931 991
932 fpu__init_system(c); 992 fpu__init_system(c);
933 993
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index bea8d3e24f50..479ca4728de0 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -31,6 +31,7 @@ extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
31extern const struct hypervisor_x86 x86_hyper_xen_pv; 31extern const struct hypervisor_x86 x86_hyper_xen_pv;
32extern const struct hypervisor_x86 x86_hyper_xen_hvm; 32extern const struct hypervisor_x86 x86_hyper_xen_hvm;
33extern const struct hypervisor_x86 x86_hyper_kvm; 33extern const struct hypervisor_x86 x86_hyper_kvm;
34extern const struct hypervisor_x86 x86_hyper_jailhouse;
34 35
35static const __initconst struct hypervisor_x86 * const hypervisors[] = 36static const __initconst struct hypervisor_x86 * const hypervisors[] =
36{ 37{
@@ -45,6 +46,9 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] =
45#ifdef CONFIG_KVM_GUEST 46#ifdef CONFIG_KVM_GUEST
46 &x86_hyper_kvm, 47 &x86_hyper_kvm,
47#endif 48#endif
49#ifdef CONFIG_JAILHOUSE_GUEST
50 &x86_hyper_jailhouse,
51#endif
48}; 52};
49 53
50enum x86_hypervisor_type x86_hyper_type; 54enum x86_hypervisor_type x86_hyper_type;
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index b1af22073e28..319bf989fad1 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -102,6 +102,59 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c)
102 ELF_HWCAP2 |= HWCAP2_RING3MWAIT; 102 ELF_HWCAP2 |= HWCAP2_RING3MWAIT;
103} 103}
104 104
105/*
106 * Early microcode releases for the Spectre v2 mitigation were broken.
107 * Information taken from;
108 * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf
109 * - https://kb.vmware.com/s/article/52345
110 * - Microcode revisions observed in the wild
111 * - Release note from 20180108 microcode release
112 */
113struct sku_microcode {
114 u8 model;
115 u8 stepping;
116 u32 microcode;
117};
118static const struct sku_microcode spectre_bad_microcodes[] = {
119 { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x84 },
120 { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x84 },
121 { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x84 },
122 { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x84 },
123 { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x84 },
124 { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e },
125 { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c },
126 { INTEL_FAM6_SKYLAKE_MOBILE, 0x03, 0xc2 },
127 { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 },
128 { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 },
129 { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b },
130 { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 },
131 { INTEL_FAM6_BROADWELL_XEON_D, 0x03, 0x07000011 },
132 { INTEL_FAM6_BROADWELL_X, 0x01, 0x0b000025 },
133 { INTEL_FAM6_HASWELL_ULT, 0x01, 0x21 },
134 { INTEL_FAM6_HASWELL_GT3E, 0x01, 0x18 },
135 { INTEL_FAM6_HASWELL_CORE, 0x03, 0x23 },
136 { INTEL_FAM6_HASWELL_X, 0x02, 0x3b },
137 { INTEL_FAM6_HASWELL_X, 0x04, 0x10 },
138 { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a },
139 /* Updated in the 20180108 release; blacklist until we know otherwise */
140 { INTEL_FAM6_ATOM_GEMINI_LAKE, 0x01, 0x22 },
141 /* Observed in the wild */
142 { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b },
143 { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 },
144};
145
146static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
147{
148 int i;
149
150 for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
151 if (c->x86_model == spectre_bad_microcodes[i].model &&
152 c->x86_mask == spectre_bad_microcodes[i].stepping)
153 return (c->microcode <= spectre_bad_microcodes[i].microcode);
154 }
155 return false;
156}
157
105static void early_init_intel(struct cpuinfo_x86 *c) 158static void early_init_intel(struct cpuinfo_x86 *c)
106{ 159{
107 u64 misc_enable; 160 u64 misc_enable;
@@ -122,6 +175,19 @@ static void early_init_intel(struct cpuinfo_x86 *c)
122 if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64)) 175 if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64))
123 c->microcode = intel_get_microcode_revision(); 176 c->microcode = intel_get_microcode_revision();
124 177
178 /* Now if any of them are set, check the blacklist and clear the lot */
179 if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) ||
180 cpu_has(c, X86_FEATURE_INTEL_STIBP) ||
181 cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) ||
182 cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) {
183 pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n");
184 setup_clear_cpu_cap(X86_FEATURE_IBRS);
185 setup_clear_cpu_cap(X86_FEATURE_IBPB);
186 setup_clear_cpu_cap(X86_FEATURE_STIBP);
187 setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL);
188 setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP);
189 }
190
125 /* 191 /*
126 * Atom erratum AAE44/AAF40/AAG38/AAH41: 192 * Atom erratum AAE44/AAF40/AAG38/AAH41:
127 * 193 *
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 99442370de40..410629f10ad3 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -135,6 +135,40 @@ struct rdt_resource rdt_resources_all[] = {
135 .format_str = "%d=%0*x", 135 .format_str = "%d=%0*x",
136 .fflags = RFTYPE_RES_CACHE, 136 .fflags = RFTYPE_RES_CACHE,
137 }, 137 },
138 [RDT_RESOURCE_L2DATA] =
139 {
140 .rid = RDT_RESOURCE_L2DATA,
141 .name = "L2DATA",
142 .domains = domain_init(RDT_RESOURCE_L2DATA),
143 .msr_base = IA32_L2_CBM_BASE,
144 .msr_update = cat_wrmsr,
145 .cache_level = 2,
146 .cache = {
147 .min_cbm_bits = 1,
148 .cbm_idx_mult = 2,
149 .cbm_idx_offset = 0,
150 },
151 .parse_ctrlval = parse_cbm,
152 .format_str = "%d=%0*x",
153 .fflags = RFTYPE_RES_CACHE,
154 },
155 [RDT_RESOURCE_L2CODE] =
156 {
157 .rid = RDT_RESOURCE_L2CODE,
158 .name = "L2CODE",
159 .domains = domain_init(RDT_RESOURCE_L2CODE),
160 .msr_base = IA32_L2_CBM_BASE,
161 .msr_update = cat_wrmsr,
162 .cache_level = 2,
163 .cache = {
164 .min_cbm_bits = 1,
165 .cbm_idx_mult = 2,
166 .cbm_idx_offset = 1,
167 },
168 .parse_ctrlval = parse_cbm,
169 .format_str = "%d=%0*x",
170 .fflags = RFTYPE_RES_CACHE,
171 },
138 [RDT_RESOURCE_MBA] = 172 [RDT_RESOURCE_MBA] =
139 { 173 {
140 .rid = RDT_RESOURCE_MBA, 174 .rid = RDT_RESOURCE_MBA,
@@ -259,15 +293,15 @@ static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r)
259 r->alloc_enabled = true; 293 r->alloc_enabled = true;
260} 294}
261 295
262static void rdt_get_cdp_l3_config(int type) 296static void rdt_get_cdp_config(int level, int type)
263{ 297{
264 struct rdt_resource *r_l3 = &rdt_resources_all[RDT_RESOURCE_L3]; 298 struct rdt_resource *r_l = &rdt_resources_all[level];
265 struct rdt_resource *r = &rdt_resources_all[type]; 299 struct rdt_resource *r = &rdt_resources_all[type];
266 300
267 r->num_closid = r_l3->num_closid / 2; 301 r->num_closid = r_l->num_closid / 2;
268 r->cache.cbm_len = r_l3->cache.cbm_len; 302 r->cache.cbm_len = r_l->cache.cbm_len;
269 r->default_ctrl = r_l3->default_ctrl; 303 r->default_ctrl = r_l->default_ctrl;
270 r->cache.shareable_bits = r_l3->cache.shareable_bits; 304 r->cache.shareable_bits = r_l->cache.shareable_bits;
271 r->data_width = (r->cache.cbm_len + 3) / 4; 305 r->data_width = (r->cache.cbm_len + 3) / 4;
272 r->alloc_capable = true; 306 r->alloc_capable = true;
273 /* 307 /*
@@ -277,6 +311,18 @@ static void rdt_get_cdp_l3_config(int type)
277 r->alloc_enabled = false; 311 r->alloc_enabled = false;
278} 312}
279 313
314static void rdt_get_cdp_l3_config(void)
315{
316 rdt_get_cdp_config(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA);
317 rdt_get_cdp_config(RDT_RESOURCE_L3, RDT_RESOURCE_L3CODE);
318}
319
320static void rdt_get_cdp_l2_config(void)
321{
322 rdt_get_cdp_config(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA);
323 rdt_get_cdp_config(RDT_RESOURCE_L2, RDT_RESOURCE_L2CODE);
324}
325
280static int get_cache_id(int cpu, int level) 326static int get_cache_id(int cpu, int level)
281{ 327{
282 struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu); 328 struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu);
@@ -645,6 +691,7 @@ enum {
645 RDT_FLAG_L3_CAT, 691 RDT_FLAG_L3_CAT,
646 RDT_FLAG_L3_CDP, 692 RDT_FLAG_L3_CDP,
647 RDT_FLAG_L2_CAT, 693 RDT_FLAG_L2_CAT,
694 RDT_FLAG_L2_CDP,
648 RDT_FLAG_MBA, 695 RDT_FLAG_MBA,
649}; 696};
650 697
@@ -667,6 +714,7 @@ static struct rdt_options rdt_options[] __initdata = {
667 RDT_OPT(RDT_FLAG_L3_CAT, "l3cat", X86_FEATURE_CAT_L3), 714 RDT_OPT(RDT_FLAG_L3_CAT, "l3cat", X86_FEATURE_CAT_L3),
668 RDT_OPT(RDT_FLAG_L3_CDP, "l3cdp", X86_FEATURE_CDP_L3), 715 RDT_OPT(RDT_FLAG_L3_CDP, "l3cdp", X86_FEATURE_CDP_L3),
669 RDT_OPT(RDT_FLAG_L2_CAT, "l2cat", X86_FEATURE_CAT_L2), 716 RDT_OPT(RDT_FLAG_L2_CAT, "l2cat", X86_FEATURE_CAT_L2),
717 RDT_OPT(RDT_FLAG_L2_CDP, "l2cdp", X86_FEATURE_CDP_L2),
670 RDT_OPT(RDT_FLAG_MBA, "mba", X86_FEATURE_MBA), 718 RDT_OPT(RDT_FLAG_MBA, "mba", X86_FEATURE_MBA),
671}; 719};
672#define NUM_RDT_OPTIONS ARRAY_SIZE(rdt_options) 720#define NUM_RDT_OPTIONS ARRAY_SIZE(rdt_options)
@@ -729,15 +777,15 @@ static __init bool get_rdt_alloc_resources(void)
729 777
730 if (rdt_cpu_has(X86_FEATURE_CAT_L3)) { 778 if (rdt_cpu_has(X86_FEATURE_CAT_L3)) {
731 rdt_get_cache_alloc_cfg(1, &rdt_resources_all[RDT_RESOURCE_L3]); 779 rdt_get_cache_alloc_cfg(1, &rdt_resources_all[RDT_RESOURCE_L3]);
732 if (rdt_cpu_has(X86_FEATURE_CDP_L3)) { 780 if (rdt_cpu_has(X86_FEATURE_CDP_L3))
733 rdt_get_cdp_l3_config(RDT_RESOURCE_L3DATA); 781 rdt_get_cdp_l3_config();
734 rdt_get_cdp_l3_config(RDT_RESOURCE_L3CODE);
735 }
736 ret = true; 782 ret = true;
737 } 783 }
738 if (rdt_cpu_has(X86_FEATURE_CAT_L2)) { 784 if (rdt_cpu_has(X86_FEATURE_CAT_L2)) {
739 /* CPUID 0x10.2 fields are same format at 0x10.1 */ 785 /* CPUID 0x10.2 fields are same format at 0x10.1 */
740 rdt_get_cache_alloc_cfg(2, &rdt_resources_all[RDT_RESOURCE_L2]); 786 rdt_get_cache_alloc_cfg(2, &rdt_resources_all[RDT_RESOURCE_L2]);
787 if (rdt_cpu_has(X86_FEATURE_CDP_L2))
788 rdt_get_cdp_l2_config();
741 ret = true; 789 ret = true;
742 } 790 }
743 791
diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h
index 3397244984f5..3fd7a70ee04a 100644
--- a/arch/x86/kernel/cpu/intel_rdt.h
+++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -7,12 +7,15 @@
7#include <linux/jump_label.h> 7#include <linux/jump_label.h>
8 8
9#define IA32_L3_QOS_CFG 0xc81 9#define IA32_L3_QOS_CFG 0xc81
10#define IA32_L2_QOS_CFG 0xc82
10#define IA32_L3_CBM_BASE 0xc90 11#define IA32_L3_CBM_BASE 0xc90
11#define IA32_L2_CBM_BASE 0xd10 12#define IA32_L2_CBM_BASE 0xd10
12#define IA32_MBA_THRTL_BASE 0xd50 13#define IA32_MBA_THRTL_BASE 0xd50
13 14
14#define L3_QOS_CDP_ENABLE 0x01ULL 15#define L3_QOS_CDP_ENABLE 0x01ULL
15 16
17#define L2_QOS_CDP_ENABLE 0x01ULL
18
16/* 19/*
17 * Event IDs are used to program IA32_QM_EVTSEL before reading event 20 * Event IDs are used to program IA32_QM_EVTSEL before reading event
18 * counter from IA32_QM_CTR 21 * counter from IA32_QM_CTR
@@ -357,6 +360,8 @@ enum {
357 RDT_RESOURCE_L3DATA, 360 RDT_RESOURCE_L3DATA,
358 RDT_RESOURCE_L3CODE, 361 RDT_RESOURCE_L3CODE,
359 RDT_RESOURCE_L2, 362 RDT_RESOURCE_L2,
363 RDT_RESOURCE_L2DATA,
364 RDT_RESOURCE_L2CODE,
360 RDT_RESOURCE_MBA, 365 RDT_RESOURCE_MBA,
361 366
362 /* Must be the last */ 367 /* Must be the last */
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 64c5ff97ee0d..bdab7d2f51af 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -990,6 +990,7 @@ out_destroy:
990 kernfs_remove(kn); 990 kernfs_remove(kn);
991 return ret; 991 return ret;
992} 992}
993
993static void l3_qos_cfg_update(void *arg) 994static void l3_qos_cfg_update(void *arg)
994{ 995{
995 bool *enable = arg; 996 bool *enable = arg;
@@ -997,8 +998,17 @@ static void l3_qos_cfg_update(void *arg)
997 wrmsrl(IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL); 998 wrmsrl(IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL);
998} 999}
999 1000
1000static int set_l3_qos_cfg(struct rdt_resource *r, bool enable) 1001static void l2_qos_cfg_update(void *arg)
1001{ 1002{
1003 bool *enable = arg;
1004
1005 wrmsrl(IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL);
1006}
1007
1008static int set_cache_qos_cfg(int level, bool enable)
1009{
1010 void (*update)(void *arg);
1011 struct rdt_resource *r_l;
1002 cpumask_var_t cpu_mask; 1012 cpumask_var_t cpu_mask;
1003 struct rdt_domain *d; 1013 struct rdt_domain *d;
1004 int cpu; 1014 int cpu;
@@ -1006,16 +1016,24 @@ static int set_l3_qos_cfg(struct rdt_resource *r, bool enable)
1006 if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) 1016 if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
1007 return -ENOMEM; 1017 return -ENOMEM;
1008 1018
1009 list_for_each_entry(d, &r->domains, list) { 1019 if (level == RDT_RESOURCE_L3)
1020 update = l3_qos_cfg_update;
1021 else if (level == RDT_RESOURCE_L2)
1022 update = l2_qos_cfg_update;
1023 else
1024 return -EINVAL;
1025
1026 r_l = &rdt_resources_all[level];
1027 list_for_each_entry(d, &r_l->domains, list) {
1010 /* Pick one CPU from each domain instance to update MSR */ 1028 /* Pick one CPU from each domain instance to update MSR */
1011 cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); 1029 cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
1012 } 1030 }
1013 cpu = get_cpu(); 1031 cpu = get_cpu();
1014 /* Update QOS_CFG MSR on this cpu if it's in cpu_mask. */ 1032 /* Update QOS_CFG MSR on this cpu if it's in cpu_mask. */
1015 if (cpumask_test_cpu(cpu, cpu_mask)) 1033 if (cpumask_test_cpu(cpu, cpu_mask))
1016 l3_qos_cfg_update(&enable); 1034 update(&enable);
1017 /* Update QOS_CFG MSR on all other cpus in cpu_mask. */ 1035 /* Update QOS_CFG MSR on all other cpus in cpu_mask. */
1018 smp_call_function_many(cpu_mask, l3_qos_cfg_update, &enable, 1); 1036 smp_call_function_many(cpu_mask, update, &enable, 1);
1019 put_cpu(); 1037 put_cpu();
1020 1038
1021 free_cpumask_var(cpu_mask); 1039 free_cpumask_var(cpu_mask);
@@ -1023,52 +1041,99 @@ static int set_l3_qos_cfg(struct rdt_resource *r, bool enable)
1023 return 0; 1041 return 0;
1024} 1042}
1025 1043
1026static int cdp_enable(void) 1044static int cdp_enable(int level, int data_type, int code_type)
1027{ 1045{
1028 struct rdt_resource *r_l3data = &rdt_resources_all[RDT_RESOURCE_L3DATA]; 1046 struct rdt_resource *r_ldata = &rdt_resources_all[data_type];
1029 struct rdt_resource *r_l3code = &rdt_resources_all[RDT_RESOURCE_L3CODE]; 1047 struct rdt_resource *r_lcode = &rdt_resources_all[code_type];
1030 struct rdt_resource *r_l3 = &rdt_resources_all[RDT_RESOURCE_L3]; 1048 struct rdt_resource *r_l = &rdt_resources_all[level];
1031 int ret; 1049 int ret;
1032 1050
1033 if (!r_l3->alloc_capable || !r_l3data->alloc_capable || 1051 if (!r_l->alloc_capable || !r_ldata->alloc_capable ||
1034 !r_l3code->alloc_capable) 1052 !r_lcode->alloc_capable)
1035 return -EINVAL; 1053 return -EINVAL;
1036 1054
1037 ret = set_l3_qos_cfg(r_l3, true); 1055 ret = set_cache_qos_cfg(level, true);
1038 if (!ret) { 1056 if (!ret) {
1039 r_l3->alloc_enabled = false; 1057 r_l->alloc_enabled = false;
1040 r_l3data->alloc_enabled = true; 1058 r_ldata->alloc_enabled = true;
1041 r_l3code->alloc_enabled = true; 1059 r_lcode->alloc_enabled = true;
1042 } 1060 }
1043 return ret; 1061 return ret;
1044} 1062}
1045 1063
1046static void cdp_disable(void) 1064static int cdpl3_enable(void)
1047{ 1065{
1048 struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3]; 1066 return cdp_enable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA,
1067 RDT_RESOURCE_L3CODE);
1068}
1069
1070static int cdpl2_enable(void)
1071{
1072 return cdp_enable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA,
1073 RDT_RESOURCE_L2CODE);
1074}
1075
1076static void cdp_disable(int level, int data_type, int code_type)
1077{
1078 struct rdt_resource *r = &rdt_resources_all[level];
1049 1079
1050 r->alloc_enabled = r->alloc_capable; 1080 r->alloc_enabled = r->alloc_capable;
1051 1081
1052 if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled) { 1082 if (rdt_resources_all[data_type].alloc_enabled) {
1053 rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled = false; 1083 rdt_resources_all[data_type].alloc_enabled = false;
1054 rdt_resources_all[RDT_RESOURCE_L3CODE].alloc_enabled = false; 1084 rdt_resources_all[code_type].alloc_enabled = false;
1055 set_l3_qos_cfg(r, false); 1085 set_cache_qos_cfg(level, false);
1056 } 1086 }
1057} 1087}
1058 1088
1089static void cdpl3_disable(void)
1090{
1091 cdp_disable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA, RDT_RESOURCE_L3CODE);
1092}
1093
1094static void cdpl2_disable(void)
1095{
1096 cdp_disable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA, RDT_RESOURCE_L2CODE);
1097}
1098
1099static void cdp_disable_all(void)
1100{
1101 if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled)
1102 cdpl3_disable();
1103 if (rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled)
1104 cdpl2_disable();
1105}
1106
1059static int parse_rdtgroupfs_options(char *data) 1107static int parse_rdtgroupfs_options(char *data)
1060{ 1108{
1061 char *token, *o = data; 1109 char *token, *o = data;
1062 int ret = 0; 1110 int ret = 0;
1063 1111
1064 while ((token = strsep(&o, ",")) != NULL) { 1112 while ((token = strsep(&o, ",")) != NULL) {
1065 if (!*token) 1113 if (!*token) {
1066 return -EINVAL; 1114 ret = -EINVAL;
1115 goto out;
1116 }
1067 1117
1068 if (!strcmp(token, "cdp")) 1118 if (!strcmp(token, "cdp")) {
1069 ret = cdp_enable(); 1119 ret = cdpl3_enable();
1120 if (ret)
1121 goto out;
1122 } else if (!strcmp(token, "cdpl2")) {
1123 ret = cdpl2_enable();
1124 if (ret)
1125 goto out;
1126 } else {
1127 ret = -EINVAL;
1128 goto out;
1129 }
1070 } 1130 }
1071 1131
1132 return 0;
1133
1134out:
1135 pr_err("Invalid mount option \"%s\"\n", token);
1136
1072 return ret; 1137 return ret;
1073} 1138}
1074 1139
@@ -1223,7 +1288,7 @@ out_mongrp:
1223out_info: 1288out_info:
1224 kernfs_remove(kn_info); 1289 kernfs_remove(kn_info);
1225out_cdp: 1290out_cdp:
1226 cdp_disable(); 1291 cdp_disable_all();
1227out: 1292out:
1228 rdt_last_cmd_clear(); 1293 rdt_last_cmd_clear();
1229 mutex_unlock(&rdtgroup_mutex); 1294 mutex_unlock(&rdtgroup_mutex);
@@ -1383,7 +1448,7 @@ static void rdt_kill_sb(struct super_block *sb)
1383 /*Put everything back to default values. */ 1448 /*Put everything back to default values. */
1384 for_each_alloc_enabled_rdt_resource(r) 1449 for_each_alloc_enabled_rdt_resource(r)
1385 reset_all_ctrls(r); 1450 reset_all_ctrls(r);
1386 cdp_disable(); 1451 cdp_disable_all();
1387 rmdir_all_sub(); 1452 rmdir_all_sub();
1388 static_branch_disable_cpuslocked(&rdt_alloc_enable_key); 1453 static_branch_disable_cpuslocked(&rdt_alloc_enable_key);
1389 static_branch_disable_cpuslocked(&rdt_mon_enable_key); 1454 static_branch_disable_cpuslocked(&rdt_mon_enable_key);
diff --git a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
index 7f85b76f43bc..213e8c2ca702 100644
--- a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
+++ b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
@@ -243,7 +243,7 @@ out:
243 return err ? err : buf - ubuf; 243 return err ? err : buf - ubuf;
244} 244}
245 245
246static unsigned int mce_chrdev_poll(struct file *file, poll_table *wait) 246static __poll_t mce_chrdev_poll(struct file *file, poll_table *wait)
247{ 247{
248 poll_wait(file, &mce_chrdev_wait, wait); 248 poll_wait(file, &mce_chrdev_wait, wait);
249 if (READ_ONCE(mcelog.next)) 249 if (READ_ONCE(mcelog.next))
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 4ca632a06e0b..5bbd06f38ff6 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -59,6 +59,7 @@ static struct severity {
59#define MCGMASK(x, y) .mcgmask = x, .mcgres = y 59#define MCGMASK(x, y) .mcgmask = x, .mcgres = y
60#define MASK(x, y) .mask = x, .result = y 60#define MASK(x, y) .mask = x, .result = y
61#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S) 61#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
62#define MCI_UC_AR (MCI_STATUS_UC|MCI_STATUS_AR)
62#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR) 63#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
63#define MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV) 64#define MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV)
64 65
@@ -101,6 +102,22 @@ static struct severity {
101 NOSER, BITCLR(MCI_STATUS_UC) 102 NOSER, BITCLR(MCI_STATUS_UC)
102 ), 103 ),
103 104
105 /*
106 * known AO MCACODs reported via MCE or CMC:
107 *
108 * SRAO could be signaled either via a machine check exception or
109 * CMCI with the corresponding bit S 1 or 0. So we don't need to
110 * check bit S for SRAO.
111 */
112 MCESEV(
113 AO, "Action optional: memory scrubbing error",
114 SER, MASK(MCI_STATUS_OVER|MCI_UC_AR|MCACOD_SCRUBMSK, MCI_STATUS_UC|MCACOD_SCRUB)
115 ),
116 MCESEV(
117 AO, "Action optional: last level cache writeback error",
118 SER, MASK(MCI_STATUS_OVER|MCI_UC_AR|MCACOD, MCI_STATUS_UC|MCACOD_L3WB)
119 ),
120
104 /* ignore OVER for UCNA */ 121 /* ignore OVER for UCNA */
105 MCESEV( 122 MCESEV(
106 UCNA, "Uncorrected no action required", 123 UCNA, "Uncorrected no action required",
@@ -149,15 +166,6 @@ static struct severity {
149 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR) 166 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR)
150 ), 167 ),
151 168
152 /* known AO MCACODs: */
153 MCESEV(
154 AO, "Action optional: memory scrubbing error",
155 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD_SCRUBMSK, MCI_UC_S|MCACOD_SCRUB)
156 ),
157 MCESEV(
158 AO, "Action optional: last level cache writeback error",
159 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_S|MCACOD_L3WB)
160 ),
161 MCESEV( 169 MCESEV(
162 SOME, "Action optional: unknown MCACOD", 170 SOME, "Action optional: unknown MCACOD",
163 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S) 171 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 868e412b4f0c..3a8e88a611eb 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -14,7 +14,6 @@
14#include <linux/capability.h> 14#include <linux/capability.h>
15#include <linux/miscdevice.h> 15#include <linux/miscdevice.h>
16#include <linux/ratelimit.h> 16#include <linux/ratelimit.h>
17#include <linux/kallsyms.h>
18#include <linux/rcupdate.h> 17#include <linux/rcupdate.h>
19#include <linux/kobject.h> 18#include <linux/kobject.h>
20#include <linux/uaccess.h> 19#include <linux/uaccess.h>
@@ -235,7 +234,7 @@ static void __print_mce(struct mce *m)
235 m->cs, m->ip); 234 m->cs, m->ip);
236 235
237 if (m->cs == __KERNEL_CS) 236 if (m->cs == __KERNEL_CS)
238 print_symbol("{%s}", m->ip); 237 pr_cont("{%pS}", (void *)m->ip);
239 pr_cont("\n"); 238 pr_cont("\n");
240 } 239 }
241 240
@@ -503,10 +502,8 @@ static int mce_usable_address(struct mce *m)
503bool mce_is_memory_error(struct mce *m) 502bool mce_is_memory_error(struct mce *m)
504{ 503{
505 if (m->cpuvendor == X86_VENDOR_AMD) { 504 if (m->cpuvendor == X86_VENDOR_AMD) {
506 /* ErrCodeExt[20:16] */ 505 return amd_mce_is_memory_error(m);
507 u8 xec = (m->status >> 16) & 0x1f;
508 506
509 return (xec == 0x0 || xec == 0x8);
510 } else if (m->cpuvendor == X86_VENDOR_INTEL) { 507 } else if (m->cpuvendor == X86_VENDOR_INTEL) {
511 /* 508 /*
512 * Intel SDM Volume 3B - 15.9.2 Compound Error Codes 509 * Intel SDM Volume 3B - 15.9.2 Compound Error Codes
@@ -530,6 +527,17 @@ bool mce_is_memory_error(struct mce *m)
530} 527}
531EXPORT_SYMBOL_GPL(mce_is_memory_error); 528EXPORT_SYMBOL_GPL(mce_is_memory_error);
532 529
530static bool mce_is_correctable(struct mce *m)
531{
532 if (m->cpuvendor == X86_VENDOR_AMD && m->status & MCI_STATUS_DEFERRED)
533 return false;
534
535 if (m->status & MCI_STATUS_UC)
536 return false;
537
538 return true;
539}
540
533static bool cec_add_mce(struct mce *m) 541static bool cec_add_mce(struct mce *m)
534{ 542{
535 if (!m) 543 if (!m)
@@ -537,7 +545,7 @@ static bool cec_add_mce(struct mce *m)
537 545
538 /* We eat only correctable DRAM errors with usable addresses. */ 546 /* We eat only correctable DRAM errors with usable addresses. */
539 if (mce_is_memory_error(m) && 547 if (mce_is_memory_error(m) &&
540 !(m->status & MCI_STATUS_UC) && 548 mce_is_correctable(m) &&
541 mce_usable_address(m)) 549 mce_usable_address(m))
542 if (!cec_add_elem(m->addr >> PAGE_SHIFT)) 550 if (!cec_add_elem(m->addr >> PAGE_SHIFT))
543 return true; 551 return true;
@@ -582,7 +590,7 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
582 590
583 if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) { 591 if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) {
584 pfn = mce->addr >> PAGE_SHIFT; 592 pfn = mce->addr >> PAGE_SHIFT;
585 memory_failure(pfn, MCE_VECTOR, 0); 593 memory_failure(pfn, 0);
586 } 594 }
587 595
588 return NOTIFY_OK; 596 return NOTIFY_OK;
@@ -1046,7 +1054,7 @@ static int do_memory_failure(struct mce *m)
1046 pr_err("Uncorrected hardware memory error in user-access at %llx", m->addr); 1054 pr_err("Uncorrected hardware memory error in user-access at %llx", m->addr);
1047 if (!(m->mcgstatus & MCG_STATUS_RIPV)) 1055 if (!(m->mcgstatus & MCG_STATUS_RIPV))
1048 flags |= MF_MUST_KILL; 1056 flags |= MF_MUST_KILL;
1049 ret = memory_failure(m->addr >> PAGE_SHIFT, MCE_VECTOR, flags); 1057 ret = memory_failure(m->addr >> PAGE_SHIFT, flags);
1050 if (ret) 1058 if (ret)
1051 pr_err("Memory error not recovered"); 1059 pr_err("Memory error not recovered");
1052 return ret; 1060 return ret;
@@ -1325,7 +1333,7 @@ out_ist:
1325EXPORT_SYMBOL_GPL(do_machine_check); 1333EXPORT_SYMBOL_GPL(do_machine_check);
1326 1334
1327#ifndef CONFIG_MEMORY_FAILURE 1335#ifndef CONFIG_MEMORY_FAILURE
1328int memory_failure(unsigned long pfn, int vector, int flags) 1336int memory_failure(unsigned long pfn, int flags)
1329{ 1337{
1330 /* mce_severity() should not hand us an ACTION_REQUIRED error */ 1338 /* mce_severity() should not hand us an ACTION_REQUIRED error */
1331 BUG_ON(flags & MF_ACTION_REQUIRED); 1339 BUG_ON(flags & MF_ACTION_REQUIRED);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 486f640b02ef..0f32ad242324 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -110,6 +110,20 @@ const char *smca_get_long_name(enum smca_bank_types t)
110} 110}
111EXPORT_SYMBOL_GPL(smca_get_long_name); 111EXPORT_SYMBOL_GPL(smca_get_long_name);
112 112
113static enum smca_bank_types smca_get_bank_type(struct mce *m)
114{
115 struct smca_bank *b;
116
117 if (m->bank >= N_SMCA_BANK_TYPES)
118 return N_SMCA_BANK_TYPES;
119
120 b = &smca_banks[m->bank];
121 if (!b->hwid)
122 return N_SMCA_BANK_TYPES;
123
124 return b->hwid->bank_type;
125}
126
113static struct smca_hwid smca_hwid_mcatypes[] = { 127static struct smca_hwid smca_hwid_mcatypes[] = {
114 /* { bank_type, hwid_mcatype, xec_bitmap } */ 128 /* { bank_type, hwid_mcatype, xec_bitmap } */
115 129
@@ -407,7 +421,9 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
407 (deferred_error_int_vector != amd_deferred_error_interrupt)) 421 (deferred_error_int_vector != amd_deferred_error_interrupt))
408 deferred_error_int_vector = amd_deferred_error_interrupt; 422 deferred_error_int_vector = amd_deferred_error_interrupt;
409 423
410 low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC; 424 if (!mce_flags.smca)
425 low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC;
426
411 wrmsr(MSR_CU_DEF_ERR, low, high); 427 wrmsr(MSR_CU_DEF_ERR, low, high);
412} 428}
413 429
@@ -738,6 +754,17 @@ out_err:
738} 754}
739EXPORT_SYMBOL_GPL(umc_normaddr_to_sysaddr); 755EXPORT_SYMBOL_GPL(umc_normaddr_to_sysaddr);
740 756
757bool amd_mce_is_memory_error(struct mce *m)
758{
759 /* ErrCodeExt[20:16] */
760 u8 xec = (m->status >> 16) & 0x1f;
761
762 if (mce_flags.smca)
763 return smca_get_bank_type(m) == SMCA_UMC && xec == 0x0;
764
765 return m->bank == 4 && xec == 0x8;
766}
767
741static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc) 768static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
742{ 769{
743 struct mce m; 770 struct mce m;
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index e4fc595cd6ea..319dd65f98a2 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -560,7 +560,7 @@ static ssize_t pf_show(struct device *dev,
560 return sprintf(buf, "0x%x\n", uci->cpu_sig.pf); 560 return sprintf(buf, "0x%x\n", uci->cpu_sig.pf);
561} 561}
562 562
563static DEVICE_ATTR(reload, 0200, NULL, reload_store); 563static DEVICE_ATTR_WO(reload);
564static DEVICE_ATTR(version, 0400, version_show, NULL); 564static DEVICE_ATTR(version, 0400, version_show, NULL);
565static DEVICE_ATTR(processor_flags, 0400, pf_show, NULL); 565static DEVICE_ATTR(processor_flags, 0400, pf_show, NULL);
566 566
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index d0e69769abfd..4075d2be5357 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -21,11 +21,10 @@ struct cpuid_bit {
21static const struct cpuid_bit cpuid_bits[] = { 21static const struct cpuid_bit cpuid_bits[] = {
22 { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, 22 { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
23 { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, 23 { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
24 { X86_FEATURE_AVX512_4VNNIW, CPUID_EDX, 2, 0x00000007, 0 },
25 { X86_FEATURE_AVX512_4FMAPS, CPUID_EDX, 3, 0x00000007, 0 },
26 { X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 }, 24 { X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 },
27 { X86_FEATURE_CAT_L2, CPUID_EBX, 2, 0x00000010, 0 }, 25 { X86_FEATURE_CAT_L2, CPUID_EBX, 2, 0x00000010, 0 },
28 { X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 }, 26 { X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 },
27 { X86_FEATURE_CDP_L2, CPUID_ECX, 2, 0x00000010, 2 },
29 { X86_FEATURE_MBA, CPUID_EBX, 3, 0x00000010, 0 }, 28 { X86_FEATURE_MBA, CPUID_EBX, 3, 0x00000010, 0 },
30 { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 }, 29 { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 },
31 { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 }, 30 { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 },
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 76e07698e6d1..25de5f6ca997 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -2,7 +2,6 @@
2/* 2/*
3 * Architecture specific OF callbacks. 3 * Architecture specific OF callbacks.
4 */ 4 */
5#include <linux/bootmem.h>
6#include <linux/export.h> 5#include <linux/export.h>
7#include <linux/io.h> 6#include <linux/io.h>
8#include <linux/interrupt.h> 7#include <linux/interrupt.h>
@@ -39,11 +38,6 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size)
39 BUG(); 38 BUG();
40} 39}
41 40
42void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
43{
44 return __alloc_bootmem(size, align, __pa(MAX_DMA_ADDRESS));
45}
46
47void __init add_dtb(u64 data) 41void __init add_dtb(u64 data)
48{ 42{
49 initial_dtb = data + offsetof(struct setup_data, data); 43 initial_dtb = data + offsetof(struct setup_data, data);
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index afbecff161d1..a2d8a3908670 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -109,7 +109,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
109 struct stack_info stack_info = {0}; 109 struct stack_info stack_info = {0};
110 unsigned long visit_mask = 0; 110 unsigned long visit_mask = 0;
111 int graph_idx = 0; 111 int graph_idx = 0;
112 bool partial; 112 bool partial = false;
113 113
114 printk("%sCall Trace:\n", log_lvl); 114 printk("%sCall Trace:\n", log_lvl);
115 115
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 1e82f787c160..bae0d32e327b 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -243,7 +243,7 @@ static void __init intel_remapping_check(int num, int slot, int func)
243#define KB(x) ((x) * 1024UL) 243#define KB(x) ((x) * 1024UL)
244#define MB(x) (KB (KB (x))) 244#define MB(x) (KB (KB (x)))
245 245
246static size_t __init i830_tseg_size(void) 246static resource_size_t __init i830_tseg_size(void)
247{ 247{
248 u8 esmramc = read_pci_config_byte(0, 0, 0, I830_ESMRAMC); 248 u8 esmramc = read_pci_config_byte(0, 0, 0, I830_ESMRAMC);
249 249
@@ -256,7 +256,7 @@ static size_t __init i830_tseg_size(void)
256 return KB(512); 256 return KB(512);
257} 257}
258 258
259static size_t __init i845_tseg_size(void) 259static resource_size_t __init i845_tseg_size(void)
260{ 260{
261 u8 esmramc = read_pci_config_byte(0, 0, 0, I845_ESMRAMC); 261 u8 esmramc = read_pci_config_byte(0, 0, 0, I845_ESMRAMC);
262 u8 tseg_size = esmramc & I845_TSEG_SIZE_MASK; 262 u8 tseg_size = esmramc & I845_TSEG_SIZE_MASK;
@@ -273,7 +273,7 @@ static size_t __init i845_tseg_size(void)
273 return 0; 273 return 0;
274} 274}
275 275
276static size_t __init i85x_tseg_size(void) 276static resource_size_t __init i85x_tseg_size(void)
277{ 277{
278 u8 esmramc = read_pci_config_byte(0, 0, 0, I85X_ESMRAMC); 278 u8 esmramc = read_pci_config_byte(0, 0, 0, I85X_ESMRAMC);
279 279
@@ -283,12 +283,12 @@ static size_t __init i85x_tseg_size(void)
283 return MB(1); 283 return MB(1);
284} 284}
285 285
286static size_t __init i830_mem_size(void) 286static resource_size_t __init i830_mem_size(void)
287{ 287{
288 return read_pci_config_byte(0, 0, 0, I830_DRB3) * MB(32); 288 return read_pci_config_byte(0, 0, 0, I830_DRB3) * MB(32);
289} 289}
290 290
291static size_t __init i85x_mem_size(void) 291static resource_size_t __init i85x_mem_size(void)
292{ 292{
293 return read_pci_config_byte(0, 0, 1, I85X_DRB3) * MB(32); 293 return read_pci_config_byte(0, 0, 1, I85X_DRB3) * MB(32);
294} 294}
@@ -297,36 +297,36 @@ static size_t __init i85x_mem_size(void)
297 * On 830/845/85x the stolen memory base isn't available in any 297 * On 830/845/85x the stolen memory base isn't available in any
298 * register. We need to calculate it as TOM-TSEG_SIZE-stolen_size. 298 * register. We need to calculate it as TOM-TSEG_SIZE-stolen_size.
299 */ 299 */
300static phys_addr_t __init i830_stolen_base(int num, int slot, int func, 300static resource_size_t __init i830_stolen_base(int num, int slot, int func,
301 size_t stolen_size) 301 resource_size_t stolen_size)
302{ 302{
303 return (phys_addr_t)i830_mem_size() - i830_tseg_size() - stolen_size; 303 return i830_mem_size() - i830_tseg_size() - stolen_size;
304} 304}
305 305
306static phys_addr_t __init i845_stolen_base(int num, int slot, int func, 306static resource_size_t __init i845_stolen_base(int num, int slot, int func,
307 size_t stolen_size) 307 resource_size_t stolen_size)
308{ 308{
309 return (phys_addr_t)i830_mem_size() - i845_tseg_size() - stolen_size; 309 return i830_mem_size() - i845_tseg_size() - stolen_size;
310} 310}
311 311
312static phys_addr_t __init i85x_stolen_base(int num, int slot, int func, 312static resource_size_t __init i85x_stolen_base(int num, int slot, int func,
313 size_t stolen_size) 313 resource_size_t stolen_size)
314{ 314{
315 return (phys_addr_t)i85x_mem_size() - i85x_tseg_size() - stolen_size; 315 return i85x_mem_size() - i85x_tseg_size() - stolen_size;
316} 316}
317 317
318static phys_addr_t __init i865_stolen_base(int num, int slot, int func, 318static resource_size_t __init i865_stolen_base(int num, int slot, int func,
319 size_t stolen_size) 319 resource_size_t stolen_size)
320{ 320{
321 u16 toud = 0; 321 u16 toud = 0;
322 322
323 toud = read_pci_config_16(0, 0, 0, I865_TOUD); 323 toud = read_pci_config_16(0, 0, 0, I865_TOUD);
324 324
325 return (phys_addr_t)(toud << 16) + i845_tseg_size(); 325 return toud * KB(64) + i845_tseg_size();
326} 326}
327 327
328static phys_addr_t __init gen3_stolen_base(int num, int slot, int func, 328static resource_size_t __init gen3_stolen_base(int num, int slot, int func,
329 size_t stolen_size) 329 resource_size_t stolen_size)
330{ 330{
331 u32 bsm; 331 u32 bsm;
332 332
@@ -337,10 +337,10 @@ static phys_addr_t __init gen3_stolen_base(int num, int slot, int func,
337 */ 337 */
338 bsm = read_pci_config(num, slot, func, INTEL_BSM); 338 bsm = read_pci_config(num, slot, func, INTEL_BSM);
339 339
340 return (phys_addr_t)bsm & INTEL_BSM_MASK; 340 return bsm & INTEL_BSM_MASK;
341} 341}
342 342
343static size_t __init i830_stolen_size(int num, int slot, int func) 343static resource_size_t __init i830_stolen_size(int num, int slot, int func)
344{ 344{
345 u16 gmch_ctrl; 345 u16 gmch_ctrl;
346 u16 gms; 346 u16 gms;
@@ -361,7 +361,7 @@ static size_t __init i830_stolen_size(int num, int slot, int func)
361 return 0; 361 return 0;
362} 362}
363 363
364static size_t __init gen3_stolen_size(int num, int slot, int func) 364static resource_size_t __init gen3_stolen_size(int num, int slot, int func)
365{ 365{
366 u16 gmch_ctrl; 366 u16 gmch_ctrl;
367 u16 gms; 367 u16 gms;
@@ -390,7 +390,7 @@ static size_t __init gen3_stolen_size(int num, int slot, int func)
390 return 0; 390 return 0;
391} 391}
392 392
393static size_t __init gen6_stolen_size(int num, int slot, int func) 393static resource_size_t __init gen6_stolen_size(int num, int slot, int func)
394{ 394{
395 u16 gmch_ctrl; 395 u16 gmch_ctrl;
396 u16 gms; 396 u16 gms;
@@ -398,10 +398,10 @@ static size_t __init gen6_stolen_size(int num, int slot, int func)
398 gmch_ctrl = read_pci_config_16(num, slot, func, SNB_GMCH_CTRL); 398 gmch_ctrl = read_pci_config_16(num, slot, func, SNB_GMCH_CTRL);
399 gms = (gmch_ctrl >> SNB_GMCH_GMS_SHIFT) & SNB_GMCH_GMS_MASK; 399 gms = (gmch_ctrl >> SNB_GMCH_GMS_SHIFT) & SNB_GMCH_GMS_MASK;
400 400
401 return (size_t)gms * MB(32); 401 return gms * MB(32);
402} 402}
403 403
404static size_t __init gen8_stolen_size(int num, int slot, int func) 404static resource_size_t __init gen8_stolen_size(int num, int slot, int func)
405{ 405{
406 u16 gmch_ctrl; 406 u16 gmch_ctrl;
407 u16 gms; 407 u16 gms;
@@ -409,10 +409,10 @@ static size_t __init gen8_stolen_size(int num, int slot, int func)
409 gmch_ctrl = read_pci_config_16(num, slot, func, SNB_GMCH_CTRL); 409 gmch_ctrl = read_pci_config_16(num, slot, func, SNB_GMCH_CTRL);
410 gms = (gmch_ctrl >> BDW_GMCH_GMS_SHIFT) & BDW_GMCH_GMS_MASK; 410 gms = (gmch_ctrl >> BDW_GMCH_GMS_SHIFT) & BDW_GMCH_GMS_MASK;
411 411
412 return (size_t)gms * MB(32); 412 return gms * MB(32);
413} 413}
414 414
415static size_t __init chv_stolen_size(int num, int slot, int func) 415static resource_size_t __init chv_stolen_size(int num, int slot, int func)
416{ 416{
417 u16 gmch_ctrl; 417 u16 gmch_ctrl;
418 u16 gms; 418 u16 gms;
@@ -426,14 +426,14 @@ static size_t __init chv_stolen_size(int num, int slot, int func)
426 * 0x17 to 0x1d: 4MB increments start at 36MB 426 * 0x17 to 0x1d: 4MB increments start at 36MB
427 */ 427 */
428 if (gms < 0x11) 428 if (gms < 0x11)
429 return (size_t)gms * MB(32); 429 return gms * MB(32);
430 else if (gms < 0x17) 430 else if (gms < 0x17)
431 return (size_t)(gms - 0x11 + 2) * MB(4); 431 return (gms - 0x11) * MB(4) + MB(8);
432 else 432 else
433 return (size_t)(gms - 0x17 + 9) * MB(4); 433 return (gms - 0x17) * MB(4) + MB(36);
434} 434}
435 435
436static size_t __init gen9_stolen_size(int num, int slot, int func) 436static resource_size_t __init gen9_stolen_size(int num, int slot, int func)
437{ 437{
438 u16 gmch_ctrl; 438 u16 gmch_ctrl;
439 u16 gms; 439 u16 gms;
@@ -444,14 +444,15 @@ static size_t __init gen9_stolen_size(int num, int slot, int func)
444 /* 0x0 to 0xef: 32MB increments starting at 0MB */ 444 /* 0x0 to 0xef: 32MB increments starting at 0MB */
445 /* 0xf0 to 0xfe: 4MB increments starting at 4MB */ 445 /* 0xf0 to 0xfe: 4MB increments starting at 4MB */
446 if (gms < 0xf0) 446 if (gms < 0xf0)
447 return (size_t)gms * MB(32); 447 return gms * MB(32);
448 else 448 else
449 return (size_t)(gms - 0xf0 + 1) * MB(4); 449 return (gms - 0xf0) * MB(4) + MB(4);
450} 450}
451 451
452struct intel_early_ops { 452struct intel_early_ops {
453 size_t (*stolen_size)(int num, int slot, int func); 453 resource_size_t (*stolen_size)(int num, int slot, int func);
454 phys_addr_t (*stolen_base)(int num, int slot, int func, size_t size); 454 resource_size_t (*stolen_base)(int num, int slot, int func,
455 resource_size_t size);
455}; 456};
456 457
457static const struct intel_early_ops i830_early_ops __initconst = { 458static const struct intel_early_ops i830_early_ops __initconst = {
@@ -527,16 +528,20 @@ static const struct pci_device_id intel_early_ids[] __initconst = {
527 INTEL_SKL_IDS(&gen9_early_ops), 528 INTEL_SKL_IDS(&gen9_early_ops),
528 INTEL_BXT_IDS(&gen9_early_ops), 529 INTEL_BXT_IDS(&gen9_early_ops),
529 INTEL_KBL_IDS(&gen9_early_ops), 530 INTEL_KBL_IDS(&gen9_early_ops),
531 INTEL_CFL_IDS(&gen9_early_ops),
530 INTEL_GLK_IDS(&gen9_early_ops), 532 INTEL_GLK_IDS(&gen9_early_ops),
531 INTEL_CNL_IDS(&gen9_early_ops), 533 INTEL_CNL_IDS(&gen9_early_ops),
532}; 534};
533 535
536struct resource intel_graphics_stolen_res __ro_after_init = DEFINE_RES_MEM(0, 0);
537EXPORT_SYMBOL(intel_graphics_stolen_res);
538
534static void __init 539static void __init
535intel_graphics_stolen(int num, int slot, int func, 540intel_graphics_stolen(int num, int slot, int func,
536 const struct intel_early_ops *early_ops) 541 const struct intel_early_ops *early_ops)
537{ 542{
538 phys_addr_t base, end; 543 resource_size_t base, size;
539 size_t size; 544 resource_size_t end;
540 545
541 size = early_ops->stolen_size(num, slot, func); 546 size = early_ops->stolen_size(num, slot, func);
542 base = early_ops->stolen_base(num, slot, func, size); 547 base = early_ops->stolen_base(num, slot, func, size);
@@ -545,8 +550,12 @@ intel_graphics_stolen(int num, int slot, int func,
545 return; 550 return;
546 551
547 end = base + size - 1; 552 end = base + size - 1;
548 printk(KERN_INFO "Reserving Intel graphics memory at %pa-%pa\n", 553
549 &base, &end); 554 intel_graphics_stolen_res.start = base;
555 intel_graphics_stolen_res.end = end;
556
557 printk(KERN_INFO "Reserving Intel graphics memory at %pR\n",
558 &intel_graphics_stolen_res);
550 559
551 /* Mark this space as reserved */ 560 /* Mark this space as reserved */
552 e820__range_add(base, size, E820_TYPE_RESERVED); 561 e820__range_add(base, size, E820_TYPE_RESERVED);
diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c
index f73f475d0573..d177940aa090 100644
--- a/arch/x86/kernel/itmt.c
+++ b/arch/x86/kernel/itmt.c
@@ -24,7 +24,6 @@
24#include <linux/cpumask.h> 24#include <linux/cpumask.h>
25#include <linux/cpuset.h> 25#include <linux/cpuset.h>
26#include <linux/mutex.h> 26#include <linux/mutex.h>
27#include <linux/sched.h>
28#include <linux/sysctl.h> 27#include <linux/sysctl.h>
29#include <linux/nodemask.h> 28#include <linux/nodemask.h>
30 29
diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c
new file mode 100644
index 000000000000..b68fd895235a
--- /dev/null
+++ b/arch/x86/kernel/jailhouse.c
@@ -0,0 +1,211 @@
1// SPDX-License-Identifier: GPL2.0
2/*
3 * Jailhouse paravirt_ops implementation
4 *
5 * Copyright (c) Siemens AG, 2015-2017
6 *
7 * Authors:
8 * Jan Kiszka <jan.kiszka@siemens.com>
9 */
10
11#include <linux/acpi_pmtmr.h>
12#include <linux/kernel.h>
13#include <linux/reboot.h>
14#include <asm/apic.h>
15#include <asm/cpu.h>
16#include <asm/hypervisor.h>
17#include <asm/i8259.h>
18#include <asm/irqdomain.h>
19#include <asm/pci_x86.h>
20#include <asm/reboot.h>
21#include <asm/setup.h>
22
23static __initdata struct jailhouse_setup_data setup_data;
24static unsigned int precalibrated_tsc_khz;
25
26static uint32_t jailhouse_cpuid_base(void)
27{
28 if (boot_cpu_data.cpuid_level < 0 ||
29 !boot_cpu_has(X86_FEATURE_HYPERVISOR))
30 return 0;
31
32 return hypervisor_cpuid_base("Jailhouse\0\0\0", 0);
33}
34
35static uint32_t __init jailhouse_detect(void)
36{
37 return jailhouse_cpuid_base();
38}
39
40static void jailhouse_get_wallclock(struct timespec *now)
41{
42 memset(now, 0, sizeof(*now));
43}
44
45static void __init jailhouse_timer_init(void)
46{
47 lapic_timer_frequency = setup_data.apic_khz * (1000 / HZ);
48}
49
50static unsigned long jailhouse_get_tsc(void)
51{
52 return precalibrated_tsc_khz;
53}
54
55static void __init jailhouse_x2apic_init(void)
56{
57#ifdef CONFIG_X86_X2APIC
58 if (!x2apic_enabled())
59 return;
60 /*
61 * We do not have access to IR inside Jailhouse non-root cells. So
62 * we have to run in physical mode.
63 */
64 x2apic_phys = 1;
65 /*
66 * This will trigger the switch to apic_x2apic_phys. Empty OEM IDs
67 * ensure that only this APIC driver picks up the call.
68 */
69 default_acpi_madt_oem_check("", "");
70#endif
71}
72
73static void __init jailhouse_get_smp_config(unsigned int early)
74{
75 struct ioapic_domain_cfg ioapic_cfg = {
76 .type = IOAPIC_DOMAIN_STRICT,
77 .ops = &mp_ioapic_irqdomain_ops,
78 };
79 struct mpc_intsrc mp_irq = {
80 .type = MP_INTSRC,
81 .irqtype = mp_INT,
82 .irqflag = MP_IRQPOL_ACTIVE_HIGH | MP_IRQTRIG_EDGE,
83 };
84 unsigned int cpu;
85
86 jailhouse_x2apic_init();
87
88 register_lapic_address(0xfee00000);
89
90 for (cpu = 0; cpu < setup_data.num_cpus; cpu++) {
91 generic_processor_info(setup_data.cpu_ids[cpu],
92 boot_cpu_apic_version);
93 }
94
95 smp_found_config = 1;
96
97 if (setup_data.standard_ioapic) {
98 mp_register_ioapic(0, 0xfec00000, gsi_top, &ioapic_cfg);
99
100 /* Register 1:1 mapping for legacy UART IRQs 3 and 4 */
101 mp_irq.srcbusirq = mp_irq.dstirq = 3;
102 mp_save_irq(&mp_irq);
103
104 mp_irq.srcbusirq = mp_irq.dstirq = 4;
105 mp_save_irq(&mp_irq);
106 }
107}
108
109static void jailhouse_no_restart(void)
110{
111 pr_notice("Jailhouse: Restart not supported, halting\n");
112 machine_halt();
113}
114
115static int __init jailhouse_pci_arch_init(void)
116{
117 pci_direct_init(1);
118
119 /*
120 * There are no bridges on the virtual PCI root bus under Jailhouse,
121 * thus no other way to discover all devices than a full scan.
122 * Respect any overrides via the command line, though.
123 */
124 if (pcibios_last_bus < 0)
125 pcibios_last_bus = 0xff;
126
127 return 0;
128}
129
130static void __init jailhouse_init_platform(void)
131{
132 u64 pa_data = boot_params.hdr.setup_data;
133 struct setup_data header;
134 void *mapping;
135
136 x86_init.irqs.pre_vector_init = x86_init_noop;
137 x86_init.timers.timer_init = jailhouse_timer_init;
138 x86_init.mpparse.get_smp_config = jailhouse_get_smp_config;
139 x86_init.pci.arch_init = jailhouse_pci_arch_init;
140
141 x86_platform.calibrate_cpu = jailhouse_get_tsc;
142 x86_platform.calibrate_tsc = jailhouse_get_tsc;
143 x86_platform.get_wallclock = jailhouse_get_wallclock;
144 x86_platform.legacy.rtc = 0;
145 x86_platform.legacy.warm_reset = 0;
146 x86_platform.legacy.i8042 = X86_LEGACY_I8042_PLATFORM_ABSENT;
147
148 legacy_pic = &null_legacy_pic;
149
150 machine_ops.emergency_restart = jailhouse_no_restart;
151
152 while (pa_data) {
153 mapping = early_memremap(pa_data, sizeof(header));
154 memcpy(&header, mapping, sizeof(header));
155 early_memunmap(mapping, sizeof(header));
156
157 if (header.type == SETUP_JAILHOUSE &&
158 header.len >= sizeof(setup_data)) {
159 pa_data += offsetof(struct setup_data, data);
160
161 mapping = early_memremap(pa_data, sizeof(setup_data));
162 memcpy(&setup_data, mapping, sizeof(setup_data));
163 early_memunmap(mapping, sizeof(setup_data));
164
165 break;
166 }
167
168 pa_data = header.next;
169 }
170
171 if (!pa_data)
172 panic("Jailhouse: No valid setup data found");
173
174 if (setup_data.compatible_version > JAILHOUSE_SETUP_REQUIRED_VERSION)
175 panic("Jailhouse: Unsupported setup data structure");
176
177 pmtmr_ioport = setup_data.pm_timer_address;
178 pr_debug("Jailhouse: PM-Timer IO Port: %#x\n", pmtmr_ioport);
179
180 precalibrated_tsc_khz = setup_data.tsc_khz;
181 setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
182
183 pci_probe = 0;
184
185 /*
186 * Avoid that the kernel complains about missing ACPI tables - there
187 * are none in a non-root cell.
188 */
189 disable_acpi();
190}
191
192bool jailhouse_paravirt(void)
193{
194 return jailhouse_cpuid_base() != 0;
195}
196
197static bool jailhouse_x2apic_available(void)
198{
199 /*
200 * The x2APIC is only available if the root cell enabled it. Jailhouse
201 * does not support switching between xAPIC and x2APIC.
202 */
203 return x2apic_enabled();
204}
205
206const struct hypervisor_x86 x86_hyper_jailhouse __refconst = {
207 .name = "Jailhouse",
208 .detect = jailhouse_detect,
209 .init.init_platform = jailhouse_init_platform,
210 .init.x2apic_available = jailhouse_x2apic_available,
211};
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 3a4b12809ab5..27d0a1712663 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -281,7 +281,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
281 int ELCR_fallback = 0; 281 int ELCR_fallback = 0;
282 282
283 intsrc.type = MP_INTSRC; 283 intsrc.type = MP_INTSRC;
284 intsrc.irqflag = 0; /* conforming */ 284 intsrc.irqflag = MP_IRQTRIG_DEFAULT | MP_IRQPOL_DEFAULT;
285 intsrc.srcbus = 0; 285 intsrc.srcbus = 0;
286 intsrc.dstapic = mpc_ioapic_id(0); 286 intsrc.dstapic = mpc_ioapic_id(0);
287 287
@@ -324,10 +324,13 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
324 * copy that information over to the MP table in the 324 * copy that information over to the MP table in the
325 * irqflag field (level sensitive, active high polarity). 325 * irqflag field (level sensitive, active high polarity).
326 */ 326 */
327 if (ELCR_trigger(i)) 327 if (ELCR_trigger(i)) {
328 intsrc.irqflag = 13; 328 intsrc.irqflag = MP_IRQTRIG_LEVEL |
329 else 329 MP_IRQPOL_ACTIVE_HIGH;
330 intsrc.irqflag = 0; 330 } else {
331 intsrc.irqflag = MP_IRQTRIG_DEFAULT |
332 MP_IRQPOL_DEFAULT;
333 }
331 } 334 }
332 335
333 intsrc.srcbusirq = i; 336 intsrc.srcbusirq = i;
@@ -419,7 +422,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
419 construct_ioapic_table(mpc_default_type); 422 construct_ioapic_table(mpc_default_type);
420 423
421 lintsrc.type = MP_LINTSRC; 424 lintsrc.type = MP_LINTSRC;
422 lintsrc.irqflag = 0; /* conforming */ 425 lintsrc.irqflag = MP_IRQTRIG_DEFAULT | MP_IRQPOL_DEFAULT;
423 lintsrc.srcbusid = 0; 426 lintsrc.srcbusid = 0;
424 lintsrc.srcbusirq = 0; 427 lintsrc.srcbusirq = 0;
425 lintsrc.destapic = MP_APIC_ALL; 428 lintsrc.destapic = MP_APIC_ALL;
@@ -664,7 +667,7 @@ static int __init get_MP_intsrc_index(struct mpc_intsrc *m)
664 if (m->irqtype != mp_INT) 667 if (m->irqtype != mp_INT)
665 return 0; 668 return 0;
666 669
667 if (m->irqflag != 0x0f) 670 if (m->irqflag != (MP_IRQTRIG_LEVEL | MP_IRQPOL_ACTIVE_LOW))
668 return 0; 671 return 0;
669 672
670 /* not legacy */ 673 /* not legacy */
@@ -673,7 +676,8 @@ static int __init get_MP_intsrc_index(struct mpc_intsrc *m)
673 if (mp_irqs[i].irqtype != mp_INT) 676 if (mp_irqs[i].irqtype != mp_INT)
674 continue; 677 continue;
675 678
676 if (mp_irqs[i].irqflag != 0x0f) 679 if (mp_irqs[i].irqflag != (MP_IRQTRIG_LEVEL |
680 MP_IRQPOL_ACTIVE_LOW))
677 continue; 681 continue;
678 682
679 if (mp_irqs[i].srcbus != m->srcbus) 683 if (mp_irqs[i].srcbus != m->srcbus)
@@ -784,7 +788,8 @@ static int __init replace_intsrc_all(struct mpc_table *mpc,
784 if (mp_irqs[i].irqtype != mp_INT) 788 if (mp_irqs[i].irqtype != mp_INT)
785 continue; 789 continue;
786 790
787 if (mp_irqs[i].irqflag != 0x0f) 791 if (mp_irqs[i].irqflag != (MP_IRQTRIG_LEVEL |
792 MP_IRQPOL_ACTIVE_LOW))
788 continue; 793 continue;
789 794
790 if (nr_m_spare > 0) { 795 if (nr_m_spare > 0) {
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 599d7462eccc..df7ab02f959f 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -1,5 +1,5 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2#include <linux/dma-mapping.h> 2#include <linux/dma-direct.h>
3#include <linux/dma-debug.h> 3#include <linux/dma-debug.h>
4#include <linux/dmar.h> 4#include <linux/dmar.h>
5#include <linux/export.h> 5#include <linux/export.h>
@@ -87,7 +87,6 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
87 87
88 dma_mask = dma_alloc_coherent_mask(dev, flag); 88 dma_mask = dma_alloc_coherent_mask(dev, flag);
89 89
90 flag &= ~__GFP_ZERO;
91again: 90again:
92 page = NULL; 91 page = NULL;
93 /* CMA can be used only in the context which permits sleeping */ 92 /* CMA can be used only in the context which permits sleeping */
@@ -139,7 +138,6 @@ bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp)
139 if (!*dev) 138 if (!*dev)
140 *dev = &x86_dma_fallback_dev; 139 *dev = &x86_dma_fallback_dev;
141 140
142 *gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
143 *gfp = dma_alloc_coherent_gfp_flags(*dev, *gfp); 141 *gfp = dma_alloc_coherent_gfp_flags(*dev, *gfp);
144 142
145 if (!is_device_dma_capable(*dev)) 143 if (!is_device_dma_capable(*dev))
@@ -217,7 +215,7 @@ static __init int iommu_setup(char *p)
217} 215}
218early_param("iommu", iommu_setup); 216early_param("iommu", iommu_setup);
219 217
220int x86_dma_supported(struct device *dev, u64 mask) 218int arch_dma_supported(struct device *dev, u64 mask)
221{ 219{
222#ifdef CONFIG_PCI 220#ifdef CONFIG_PCI
223 if (mask > 0xffffffff && forbid_dac > 0) { 221 if (mask > 0xffffffff && forbid_dac > 0) {
@@ -226,12 +224,6 @@ int x86_dma_supported(struct device *dev, u64 mask)
226 } 224 }
227#endif 225#endif
228 226
229 /* Copied from i386. Doesn't make much sense, because it will
230 only work for pci_alloc_coherent.
231 The caller just has to use GFP_DMA in this case. */
232 if (mask < DMA_BIT_MASK(24))
233 return 0;
234
235 /* Tell the device to use SAC when IOMMU force is on. This 227 /* Tell the device to use SAC when IOMMU force is on. This
236 allows the driver to use cheaper accesses in some cases. 228 allows the driver to use cheaper accesses in some cases.
237 229
@@ -251,6 +243,17 @@ int x86_dma_supported(struct device *dev, u64 mask)
251 243
252 return 1; 244 return 1;
253} 245}
246EXPORT_SYMBOL(arch_dma_supported);
247
248int x86_dma_supported(struct device *dev, u64 mask)
249{
250 /* Copied from i386. Doesn't make much sense, because it will
251 only work for pci_alloc_coherent.
252 The caller just has to use GFP_DMA in this case. */
253 if (mask < DMA_BIT_MASK(24))
254 return 0;
255 return 1;
256}
254 257
255static int __init pci_iommu_init(void) 258static int __init pci_iommu_init(void)
256{ 259{
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index b0caae27e1b7..618285e475c6 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -1,7 +1,7 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2/* Fallback functions when the main IOMMU code is not compiled in. This 2/* Fallback functions when the main IOMMU code is not compiled in. This
3 code is roughly equivalent to i386. */ 3 code is roughly equivalent to i386. */
4#include <linux/dma-mapping.h> 4#include <linux/dma-direct.h>
5#include <linux/scatterlist.h> 5#include <linux/scatterlist.h>
6#include <linux/string.h> 6#include <linux/string.h>
7#include <linux/gfp.h> 7#include <linux/gfp.h>
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 53bd05ea90d8..0ee0f8f34251 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -6,7 +6,7 @@
6#include <linux/init.h> 6#include <linux/init.h>
7#include <linux/swiotlb.h> 7#include <linux/swiotlb.h>
8#include <linux/bootmem.h> 8#include <linux/bootmem.h>
9#include <linux/dma-mapping.h> 9#include <linux/dma-direct.h>
10#include <linux/mem_encrypt.h> 10#include <linux/mem_encrypt.h>
11 11
12#include <asm/iommu.h> 12#include <asm/iommu.h>
@@ -48,7 +48,7 @@ void x86_swiotlb_free_coherent(struct device *dev, size_t size,
48 dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs); 48 dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs);
49} 49}
50 50
51static const struct dma_map_ops swiotlb_dma_ops = { 51static const struct dma_map_ops x86_swiotlb_dma_ops = {
52 .mapping_error = swiotlb_dma_mapping_error, 52 .mapping_error = swiotlb_dma_mapping_error,
53 .alloc = x86_swiotlb_alloc_coherent, 53 .alloc = x86_swiotlb_alloc_coherent,
54 .free = x86_swiotlb_free_coherent, 54 .free = x86_swiotlb_free_coherent,
@@ -112,7 +112,7 @@ void __init pci_swiotlb_init(void)
112{ 112{
113 if (swiotlb) { 113 if (swiotlb) {
114 swiotlb_init(0); 114 swiotlb_init(0);
115 dma_ops = &swiotlb_dma_ops; 115 dma_ops = &x86_swiotlb_dma_ops;
116 } 116 }
117} 117}
118 118
@@ -120,7 +120,7 @@ void __init pci_swiotlb_late_init(void)
120{ 120{
121 /* An IOMMU turned us off. */ 121 /* An IOMMU turned us off. */
122 if (!swiotlb) 122 if (!swiotlb)
123 swiotlb_free(); 123 swiotlb_exit();
124 else { 124 else {
125 printk(KERN_INFO "PCI-DMA: " 125 printk(KERN_INFO "PCI-DMA: "
126 "Using software bounce buffering for IO (SWIOTLB)\n"); 126 "Using software bounce buffering for IO (SWIOTLB)\n");
diff --git a/arch/x86/kernel/platform-quirks.c b/arch/x86/kernel/platform-quirks.c
index 39a59299bfa0..235fe6008ac8 100644
--- a/arch/x86/kernel/platform-quirks.c
+++ b/arch/x86/kernel/platform-quirks.c
@@ -9,6 +9,7 @@ void __init x86_early_init_platform_quirks(void)
9{ 9{
10 x86_platform.legacy.i8042 = X86_LEGACY_I8042_EXPECTED_PRESENT; 10 x86_platform.legacy.i8042 = X86_LEGACY_I8042_EXPECTED_PRESENT;
11 x86_platform.legacy.rtc = 1; 11 x86_platform.legacy.rtc = 1;
12 x86_platform.legacy.warm_reset = 1;
12 x86_platform.legacy.reserve_bios_regions = 0; 13 x86_platform.legacy.reserve_bios_regions = 0;
13 x86_platform.legacy.devices.pnpbios = 1; 14 x86_platform.legacy.devices.pnpbios = 1;
14 15
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index cb368c2a22ab..03408b942adb 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -21,7 +21,6 @@
21#include <linux/dmi.h> 21#include <linux/dmi.h>
22#include <linux/utsname.h> 22#include <linux/utsname.h>
23#include <linux/stackprotector.h> 23#include <linux/stackprotector.h>
24#include <linux/tick.h>
25#include <linux/cpuidle.h> 24#include <linux/cpuidle.h>
26#include <trace/events/power.h> 25#include <trace/events/power.h>
27#include <linux/hw_breakpoint.h> 26#include <linux/hw_breakpoint.h>
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index c75466232016..9eb448c7859d 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -557,7 +557,7 @@ static void __set_personality_x32(void)
557 * Pretend to come from a x32 execve. 557 * Pretend to come from a x32 execve.
558 */ 558 */
559 task_pt_regs(current)->orig_ax = __NR_x32_execve | __X32_SYSCALL_BIT; 559 task_pt_regs(current)->orig_ax = __NR_x32_execve | __X32_SYSCALL_BIT;
560 current->thread.status &= ~TS_COMPAT; 560 current_thread_info()->status &= ~TS_COMPAT;
561#endif 561#endif
562} 562}
563 563
@@ -571,7 +571,7 @@ static void __set_personality_ia32(void)
571 current->personality |= force_personality32; 571 current->personality |= force_personality32;
572 /* Prepare the first "return" to user space */ 572 /* Prepare the first "return" to user space */
573 task_pt_regs(current)->orig_ax = __NR_ia32_execve; 573 task_pt_regs(current)->orig_ax = __NR_ia32_execve;
574 current->thread.status |= TS_COMPAT; 574 current_thread_info()->status |= TS_COMPAT;
575#endif 575#endif
576} 576}
577 577
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index f37d18124648..ed5c4cdf0a34 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -935,7 +935,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value)
935 */ 935 */
936 regs->orig_ax = value; 936 regs->orig_ax = value;
937 if (syscall_get_nr(child, regs) >= 0) 937 if (syscall_get_nr(child, regs) >= 0)
938 child->thread.status |= TS_I386_REGS_POKED; 938 child->thread_info.status |= TS_I386_REGS_POKED;
939 break; 939 break;
940 940
941 case offsetof(struct user32, regs.eflags): 941 case offsetof(struct user32, regs.eflags):
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index 307d3bac5f04..11eda21eb697 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -68,6 +68,9 @@ relocate_kernel:
68 movq %cr4, %rax 68 movq %cr4, %rax
69 movq %rax, CR4(%r11) 69 movq %rax, CR4(%r11)
70 70
71 /* Save CR4. Required to enable the right paging mode later. */
72 movq %rax, %r13
73
71 /* zero out flags, and disable interrupts */ 74 /* zero out flags, and disable interrupts */
72 pushq $0 75 pushq $0
73 popfq 76 popfq
@@ -126,8 +129,13 @@ identity_mapped:
126 /* 129 /*
127 * Set cr4 to a known state: 130 * Set cr4 to a known state:
128 * - physical address extension enabled 131 * - physical address extension enabled
132 * - 5-level paging, if it was enabled before
129 */ 133 */
130 movl $X86_CR4_PAE, %eax 134 movl $X86_CR4_PAE, %eax
135 testq $X86_CR4_LA57, %r13
136 jz 1f
137 orl $X86_CR4_LA57, %eax
1381:
131 movq %rax, %cr4 139 movq %rax, %cr4
132 140
133 jmp 1f 141 jmp 1f
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 68d7ab81c62f..1ae67e982af7 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -114,7 +114,6 @@
114#include <asm/alternative.h> 114#include <asm/alternative.h>
115#include <asm/prom.h> 115#include <asm/prom.h>
116#include <asm/microcode.h> 116#include <asm/microcode.h>
117#include <asm/mmu_context.h>
118#include <asm/kaslr.h> 117#include <asm/kaslr.h>
119#include <asm/unwind.h> 118#include <asm/unwind.h>
120 119
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index b9e00e8f1c9b..4cdc0b27ec82 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -787,7 +787,7 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
787 * than the tracee. 787 * than the tracee.
788 */ 788 */
789#ifdef CONFIG_IA32_EMULATION 789#ifdef CONFIG_IA32_EMULATION
790 if (current->thread.status & (TS_COMPAT|TS_I386_REGS_POKED)) 790 if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED))
791 return __NR_ia32_restart_syscall; 791 return __NR_ia32_restart_syscall;
792#endif 792#endif
793#ifdef CONFIG_X86_X32_ABI 793#ifdef CONFIG_X86_X32_ABI
diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c
index 8c6da1a643da..ac057f9b0763 100644
--- a/arch/x86/kernel/signal_compat.c
+++ b/arch/x86/kernel/signal_compat.c
@@ -25,8 +25,8 @@ static inline void signal_compat_build_tests(void)
25 * limits also have to look at this code. Make sure any 25 * limits also have to look at this code. Make sure any
26 * new fields are handled in copy_siginfo_to_user32()! 26 * new fields are handled in copy_siginfo_to_user32()!
27 */ 27 */
28 BUILD_BUG_ON(NSIGILL != 8); 28 BUILD_BUG_ON(NSIGILL != 11);
29 BUILD_BUG_ON(NSIGFPE != 8); 29 BUILD_BUG_ON(NSIGFPE != 13);
30 BUILD_BUG_ON(NSIGSEGV != 4); 30 BUILD_BUG_ON(NSIGSEGV != 4);
31 BUILD_BUG_ON(NSIGBUS != 5); 31 BUILD_BUG_ON(NSIGBUS != 5);
32 BUILD_BUG_ON(NSIGTRAP != 4); 32 BUILD_BUG_ON(NSIGTRAP != 4);
@@ -64,7 +64,7 @@ static inline void signal_compat_build_tests(void)
64 CHECK_SI_SIZE (_kill, 2*sizeof(int)); 64 CHECK_SI_SIZE (_kill, 2*sizeof(int));
65 65
66 CHECK_CSI_OFFSET(_timer); 66 CHECK_CSI_OFFSET(_timer);
67 CHECK_CSI_SIZE (_timer, 5*sizeof(int)); 67 CHECK_CSI_SIZE (_timer, 3*sizeof(int));
68 CHECK_SI_SIZE (_timer, 6*sizeof(int)); 68 CHECK_SI_SIZE (_timer, 6*sizeof(int));
69 69
70 CHECK_CSI_OFFSET(_rt); 70 CHECK_CSI_OFFSET(_rt);
@@ -75,9 +75,11 @@ static inline void signal_compat_build_tests(void)
75 CHECK_CSI_SIZE (_sigchld, 5*sizeof(int)); 75 CHECK_CSI_SIZE (_sigchld, 5*sizeof(int));
76 CHECK_SI_SIZE (_sigchld, 8*sizeof(int)); 76 CHECK_SI_SIZE (_sigchld, 8*sizeof(int));
77 77
78#ifdef CONFIG_X86_X32_ABI
78 CHECK_CSI_OFFSET(_sigchld_x32); 79 CHECK_CSI_OFFSET(_sigchld_x32);
79 CHECK_CSI_SIZE (_sigchld_x32, 7*sizeof(int)); 80 CHECK_CSI_SIZE (_sigchld_x32, 7*sizeof(int));
80 /* no _sigchld_x32 in the generic siginfo_t */ 81 /* no _sigchld_x32 in the generic siginfo_t */
82#endif
81 83
82 CHECK_CSI_OFFSET(_sigfault); 84 CHECK_CSI_OFFSET(_sigfault);
83 CHECK_CSI_SIZE (_sigfault, 4*sizeof(int)); 85 CHECK_CSI_SIZE (_sigfault, 4*sizeof(int));
@@ -96,6 +98,8 @@ static inline void signal_compat_build_tests(void)
96 98
97void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact) 99void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact)
98{ 100{
101 signal_compat_build_tests();
102
99 /* Don't leak in-kernel non-uapi flags to user-space */ 103 /* Don't leak in-kernel non-uapi flags to user-space */
100 if (oact) 104 if (oact)
101 oact->sa.sa_flags &= ~(SA_IA32_ABI | SA_X32_ABI); 105 oact->sa.sa_flags &= ~(SA_IA32_ABI | SA_X32_ABI);
@@ -111,116 +115,3 @@ void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact)
111 if (in_x32_syscall()) 115 if (in_x32_syscall())
112 act->sa.sa_flags |= SA_X32_ABI; 116 act->sa.sa_flags |= SA_X32_ABI;
113} 117}
114
115int __copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from,
116 bool x32_ABI)
117{
118 int err = 0;
119
120 signal_compat_build_tests();
121
122 if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t)))
123 return -EFAULT;
124
125 put_user_try {
126 /* If you change siginfo_t structure, please make sure that
127 this code is fixed accordingly.
128 It should never copy any pad contained in the structure
129 to avoid security leaks, but must copy the generic
130 3 ints plus the relevant union member. */
131 put_user_ex(from->si_signo, &to->si_signo);
132 put_user_ex(from->si_errno, &to->si_errno);
133 put_user_ex(from->si_code, &to->si_code);
134
135 if (from->si_code < 0) {
136 put_user_ex(from->si_pid, &to->si_pid);
137 put_user_ex(from->si_uid, &to->si_uid);
138 put_user_ex(ptr_to_compat(from->si_ptr), &to->si_ptr);
139 } else {
140 /*
141 * First 32bits of unions are always present:
142 * si_pid === si_band === si_tid === si_addr(LS half)
143 */
144 put_user_ex(from->_sifields._pad[0],
145 &to->_sifields._pad[0]);
146 switch (siginfo_layout(from->si_signo, from->si_code)) {
147 case SIL_FAULT:
148 if (from->si_signo == SIGBUS &&
149 (from->si_code == BUS_MCEERR_AR ||
150 from->si_code == BUS_MCEERR_AO))
151 put_user_ex(from->si_addr_lsb, &to->si_addr_lsb);
152
153 if (from->si_signo == SIGSEGV) {
154 if (from->si_code == SEGV_BNDERR) {
155 compat_uptr_t lower = (unsigned long)from->si_lower;
156 compat_uptr_t upper = (unsigned long)from->si_upper;
157 put_user_ex(lower, &to->si_lower);
158 put_user_ex(upper, &to->si_upper);
159 }
160 if (from->si_code == SEGV_PKUERR)
161 put_user_ex(from->si_pkey, &to->si_pkey);
162 }
163 break;
164 case SIL_SYS:
165 put_user_ex(from->si_syscall, &to->si_syscall);
166 put_user_ex(from->si_arch, &to->si_arch);
167 break;
168 case SIL_CHLD:
169 if (!x32_ABI) {
170 put_user_ex(from->si_utime, &to->si_utime);
171 put_user_ex(from->si_stime, &to->si_stime);
172 } else {
173 put_user_ex(from->si_utime, &to->_sifields._sigchld_x32._utime);
174 put_user_ex(from->si_stime, &to->_sifields._sigchld_x32._stime);
175 }
176 put_user_ex(from->si_status, &to->si_status);
177 /* FALL THROUGH */
178 case SIL_KILL:
179 put_user_ex(from->si_uid, &to->si_uid);
180 break;
181 case SIL_POLL:
182 put_user_ex(from->si_fd, &to->si_fd);
183 break;
184 case SIL_TIMER:
185 put_user_ex(from->si_overrun, &to->si_overrun);
186 put_user_ex(ptr_to_compat(from->si_ptr),
187 &to->si_ptr);
188 break;
189 case SIL_RT:
190 put_user_ex(from->si_uid, &to->si_uid);
191 put_user_ex(from->si_int, &to->si_int);
192 break;
193 }
194 }
195 } put_user_catch(err);
196
197 return err;
198}
199
200/* from syscall's path, where we know the ABI */
201int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
202{
203 return __copy_siginfo_to_user32(to, from, in_x32_syscall());
204}
205
206int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
207{
208 int err = 0;
209 u32 ptr32;
210
211 if (!access_ok(VERIFY_READ, from, sizeof(compat_siginfo_t)))
212 return -EFAULT;
213
214 get_user_try {
215 get_user_ex(to->si_signo, &from->si_signo);
216 get_user_ex(to->si_errno, &from->si_errno);
217 get_user_ex(to->si_code, &from->si_code);
218
219 get_user_ex(to->si_pid, &from->si_pid);
220 get_user_ex(to->si_uid, &from->si_uid);
221 get_user_ex(ptr32, &from->si_ptr);
222 to->si_ptr = compat_ptr(ptr32);
223 } get_user_catch(err);
224
225 return err;
226}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index ed556d50d7ed..6f27facbaa9b 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -75,7 +75,6 @@
75#include <asm/uv/uv.h> 75#include <asm/uv/uv.h>
76#include <linux/mc146818rtc.h> 76#include <linux/mc146818rtc.h>
77#include <asm/i8259.h> 77#include <asm/i8259.h>
78#include <asm/realmode.h>
79#include <asm/misc.h> 78#include <asm/misc.h>
80#include <asm/qspinlock.h> 79#include <asm/qspinlock.h>
81 80
@@ -934,7 +933,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
934 * the targeted processor. 933 * the targeted processor.
935 */ 934 */
936 935
937 if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { 936 if (x86_platform.legacy.warm_reset) {
938 937
939 pr_debug("Setting warm reset code and vector.\n"); 938 pr_debug("Setting warm reset code and vector.\n");
940 939
@@ -1006,7 +1005,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
1006 /* mark "stuck" area as not stuck */ 1005 /* mark "stuck" area as not stuck */
1007 *trampoline_status = 0; 1006 *trampoline_status = 0;
1008 1007
1009 if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { 1008 if (x86_platform.legacy.warm_reset) {
1010 /* 1009 /*
1011 * Cleanup possible dangling ends... 1010 * Cleanup possible dangling ends...
1012 */ 1011 */
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index 749d189f8cd4..774ebafa97c4 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -69,9 +69,12 @@ static struct irqaction irq0 = {
69 69
70static void __init setup_default_timer_irq(void) 70static void __init setup_default_timer_irq(void)
71{ 71{
72 if (!nr_legacy_irqs()) 72 /*
73 return; 73 * Unconditionally register the legacy timer; even without legacy
74 setup_irq(0, &irq0); 74 * PIC/PIT we need this for the HPET0 in legacy replacement mode.
75 */
76 if (setup_irq(0, &irq0))
77 pr_info("Failed to register legacy timer interrupt\n");
75} 78}
76 79
77/* Default timer init function */ 80/* Default timer init function */
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index e169e85db434..fb4302738410 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -25,6 +25,7 @@
25#include <asm/geode.h> 25#include <asm/geode.h>
26#include <asm/apic.h> 26#include <asm/apic.h>
27#include <asm/intel-family.h> 27#include <asm/intel-family.h>
28#include <asm/i8259.h>
28 29
29unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ 30unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */
30EXPORT_SYMBOL(cpu_khz); 31EXPORT_SYMBOL(cpu_khz);
@@ -363,6 +364,20 @@ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
363 unsigned long tscmin, tscmax; 364 unsigned long tscmin, tscmax;
364 int pitcnt; 365 int pitcnt;
365 366
367 if (!has_legacy_pic()) {
368 /*
369 * Relies on tsc_early_delay_calibrate() to have given us semi
370 * usable udelay(), wait for the same 50ms we would have with
371 * the PIT loop below.
372 */
373 udelay(10 * USEC_PER_MSEC);
374 udelay(10 * USEC_PER_MSEC);
375 udelay(10 * USEC_PER_MSEC);
376 udelay(10 * USEC_PER_MSEC);
377 udelay(10 * USEC_PER_MSEC);
378 return ULONG_MAX;
379 }
380
366 /* Set the Gate high, disable speaker */ 381 /* Set the Gate high, disable speaker */
367 outb((inb(0x61) & ~0x02) | 0x01, 0x61); 382 outb((inb(0x61) & ~0x02) | 0x01, 0x61);
368 383
@@ -487,6 +502,9 @@ static unsigned long quick_pit_calibrate(void)
487 u64 tsc, delta; 502 u64 tsc, delta;
488 unsigned long d1, d2; 503 unsigned long d1, d2;
489 504
505 if (!has_legacy_pic())
506 return 0;
507
490 /* Set the Gate high, disable speaker */ 508 /* Set the Gate high, disable speaker */
491 outb((inb(0x61) & ~0x02) | 0x01, 0x61); 509 outb((inb(0x61) & ~0x02) | 0x01, 0x61);
492 510
@@ -988,8 +1006,6 @@ static void __init detect_art(void)
988 1006
989/* clocksource code */ 1007/* clocksource code */
990 1008
991static struct clocksource clocksource_tsc;
992
993static void tsc_resume(struct clocksource *cs) 1009static void tsc_resume(struct clocksource *cs)
994{ 1010{
995 tsc_verify_tsc_adjust(true); 1011 tsc_verify_tsc_adjust(true);
@@ -1040,12 +1056,31 @@ static void tsc_cs_tick_stable(struct clocksource *cs)
1040/* 1056/*
1041 * .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc() 1057 * .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc()
1042 */ 1058 */
1059static struct clocksource clocksource_tsc_early = {
1060 .name = "tsc-early",
1061 .rating = 299,
1062 .read = read_tsc,
1063 .mask = CLOCKSOURCE_MASK(64),
1064 .flags = CLOCK_SOURCE_IS_CONTINUOUS |
1065 CLOCK_SOURCE_MUST_VERIFY,
1066 .archdata = { .vclock_mode = VCLOCK_TSC },
1067 .resume = tsc_resume,
1068 .mark_unstable = tsc_cs_mark_unstable,
1069 .tick_stable = tsc_cs_tick_stable,
1070};
1071
1072/*
1073 * Must mark VALID_FOR_HRES early such that when we unregister tsc_early
1074 * this one will immediately take over. We will only register if TSC has
1075 * been found good.
1076 */
1043static struct clocksource clocksource_tsc = { 1077static struct clocksource clocksource_tsc = {
1044 .name = "tsc", 1078 .name = "tsc",
1045 .rating = 300, 1079 .rating = 300,
1046 .read = read_tsc, 1080 .read = read_tsc,
1047 .mask = CLOCKSOURCE_MASK(64), 1081 .mask = CLOCKSOURCE_MASK(64),
1048 .flags = CLOCK_SOURCE_IS_CONTINUOUS | 1082 .flags = CLOCK_SOURCE_IS_CONTINUOUS |
1083 CLOCK_SOURCE_VALID_FOR_HRES |
1049 CLOCK_SOURCE_MUST_VERIFY, 1084 CLOCK_SOURCE_MUST_VERIFY,
1050 .archdata = { .vclock_mode = VCLOCK_TSC }, 1085 .archdata = { .vclock_mode = VCLOCK_TSC },
1051 .resume = tsc_resume, 1086 .resume = tsc_resume,
@@ -1169,8 +1204,8 @@ static void tsc_refine_calibration_work(struct work_struct *work)
1169 int cpu; 1204 int cpu;
1170 1205
1171 /* Don't bother refining TSC on unstable systems */ 1206 /* Don't bother refining TSC on unstable systems */
1172 if (check_tsc_unstable()) 1207 if (tsc_unstable)
1173 goto out; 1208 return;
1174 1209
1175 /* 1210 /*
1176 * Since the work is started early in boot, we may be 1211 * Since the work is started early in boot, we may be
@@ -1222,9 +1257,13 @@ static void tsc_refine_calibration_work(struct work_struct *work)
1222 set_cyc2ns_scale(tsc_khz, cpu, tsc_stop); 1257 set_cyc2ns_scale(tsc_khz, cpu, tsc_stop);
1223 1258
1224out: 1259out:
1260 if (tsc_unstable)
1261 return;
1262
1225 if (boot_cpu_has(X86_FEATURE_ART)) 1263 if (boot_cpu_has(X86_FEATURE_ART))
1226 art_related_clocksource = &clocksource_tsc; 1264 art_related_clocksource = &clocksource_tsc;
1227 clocksource_register_khz(&clocksource_tsc, tsc_khz); 1265 clocksource_register_khz(&clocksource_tsc, tsc_khz);
1266 clocksource_unregister(&clocksource_tsc_early);
1228} 1267}
1229 1268
1230 1269
@@ -1233,13 +1272,11 @@ static int __init init_tsc_clocksource(void)
1233 if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_disabled > 0 || !tsc_khz) 1272 if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_disabled > 0 || !tsc_khz)
1234 return 0; 1273 return 0;
1235 1274
1275 if (check_tsc_unstable())
1276 return 0;
1277
1236 if (tsc_clocksource_reliable) 1278 if (tsc_clocksource_reliable)
1237 clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; 1279 clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
1238 /* lower the rating if we already know its unstable: */
1239 if (check_tsc_unstable()) {
1240 clocksource_tsc.rating = 0;
1241 clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
1242 }
1243 1280
1244 if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3)) 1281 if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3))
1245 clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP; 1282 clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP;
@@ -1252,6 +1289,7 @@ static int __init init_tsc_clocksource(void)
1252 if (boot_cpu_has(X86_FEATURE_ART)) 1289 if (boot_cpu_has(X86_FEATURE_ART))
1253 art_related_clocksource = &clocksource_tsc; 1290 art_related_clocksource = &clocksource_tsc;
1254 clocksource_register_khz(&clocksource_tsc, tsc_khz); 1291 clocksource_register_khz(&clocksource_tsc, tsc_khz);
1292 clocksource_unregister(&clocksource_tsc_early);
1255 return 0; 1293 return 0;
1256 } 1294 }
1257 1295
@@ -1356,9 +1394,12 @@ void __init tsc_init(void)
1356 1394
1357 check_system_tsc_reliable(); 1395 check_system_tsc_reliable();
1358 1396
1359 if (unsynchronized_tsc()) 1397 if (unsynchronized_tsc()) {
1360 mark_tsc_unstable("TSCs unsynchronized"); 1398 mark_tsc_unstable("TSCs unsynchronized");
1399 return;
1400 }
1361 1401
1402 clocksource_register_khz(&clocksource_tsc_early, tsc_khz);
1362 detect_art(); 1403 detect_art();
1363} 1404}
1364 1405
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index a3755d293a48..85c7ef23d99f 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -528,11 +528,11 @@ static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
528 return 0; 528 return 0;
529} 529}
530 530
531static int push_ret_address(struct pt_regs *regs, unsigned long ip) 531static int emulate_push_stack(struct pt_regs *regs, unsigned long val)
532{ 532{
533 unsigned long new_sp = regs->sp - sizeof_long(); 533 unsigned long new_sp = regs->sp - sizeof_long();
534 534
535 if (copy_to_user((void __user *)new_sp, &ip, sizeof_long())) 535 if (copy_to_user((void __user *)new_sp, &val, sizeof_long()))
536 return -EFAULT; 536 return -EFAULT;
537 537
538 regs->sp = new_sp; 538 regs->sp = new_sp;
@@ -566,7 +566,7 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs
566 regs->ip += correction; 566 regs->ip += correction;
567 } else if (auprobe->defparam.fixups & UPROBE_FIX_CALL) { 567 } else if (auprobe->defparam.fixups & UPROBE_FIX_CALL) {
568 regs->sp += sizeof_long(); /* Pop incorrect return address */ 568 regs->sp += sizeof_long(); /* Pop incorrect return address */
569 if (push_ret_address(regs, utask->vaddr + auprobe->defparam.ilen)) 569 if (emulate_push_stack(regs, utask->vaddr + auprobe->defparam.ilen))
570 return -ERESTART; 570 return -ERESTART;
571 } 571 }
572 /* popf; tell the caller to not touch TF */ 572 /* popf; tell the caller to not touch TF */
@@ -655,7 +655,7 @@ static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
655 * 655 *
656 * But there is corner case, see the comment in ->post_xol(). 656 * But there is corner case, see the comment in ->post_xol().
657 */ 657 */
658 if (push_ret_address(regs, new_ip)) 658 if (emulate_push_stack(regs, new_ip))
659 return false; 659 return false;
660 } else if (!check_jmp_cond(auprobe, regs)) { 660 } else if (!check_jmp_cond(auprobe, regs)) {
661 offs = 0; 661 offs = 0;
@@ -665,6 +665,16 @@ static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
665 return true; 665 return true;
666} 666}
667 667
668static bool push_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
669{
670 unsigned long *src_ptr = (void *)regs + auprobe->push.reg_offset;
671
672 if (emulate_push_stack(regs, *src_ptr))
673 return false;
674 regs->ip += auprobe->push.ilen;
675 return true;
676}
677
668static int branch_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) 678static int branch_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
669{ 679{
670 BUG_ON(!branch_is_call(auprobe)); 680 BUG_ON(!branch_is_call(auprobe));
@@ -703,6 +713,10 @@ static const struct uprobe_xol_ops branch_xol_ops = {
703 .post_xol = branch_post_xol_op, 713 .post_xol = branch_post_xol_op,
704}; 714};
705 715
716static const struct uprobe_xol_ops push_xol_ops = {
717 .emulate = push_emulate_op,
718};
719
706/* Returns -ENOSYS if branch_xol_ops doesn't handle this insn */ 720/* Returns -ENOSYS if branch_xol_ops doesn't handle this insn */
707static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) 721static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
708{ 722{
@@ -750,6 +764,87 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
750 return 0; 764 return 0;
751} 765}
752 766
767/* Returns -ENOSYS if push_xol_ops doesn't handle this insn */
768static int push_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
769{
770 u8 opc1 = OPCODE1(insn), reg_offset = 0;
771
772 if (opc1 < 0x50 || opc1 > 0x57)
773 return -ENOSYS;
774
775 if (insn->length > 2)
776 return -ENOSYS;
777 if (insn->length == 2) {
778 /* only support rex_prefix 0x41 (x64 only) */
779#ifdef CONFIG_X86_64
780 if (insn->rex_prefix.nbytes != 1 ||
781 insn->rex_prefix.bytes[0] != 0x41)
782 return -ENOSYS;
783
784 switch (opc1) {
785 case 0x50:
786 reg_offset = offsetof(struct pt_regs, r8);
787 break;
788 case 0x51:
789 reg_offset = offsetof(struct pt_regs, r9);
790 break;
791 case 0x52:
792 reg_offset = offsetof(struct pt_regs, r10);
793 break;
794 case 0x53:
795 reg_offset = offsetof(struct pt_regs, r11);
796 break;
797 case 0x54:
798 reg_offset = offsetof(struct pt_regs, r12);
799 break;
800 case 0x55:
801 reg_offset = offsetof(struct pt_regs, r13);
802 break;
803 case 0x56:
804 reg_offset = offsetof(struct pt_regs, r14);
805 break;
806 case 0x57:
807 reg_offset = offsetof(struct pt_regs, r15);
808 break;
809 }
810#else
811 return -ENOSYS;
812#endif
813 } else {
814 switch (opc1) {
815 case 0x50:
816 reg_offset = offsetof(struct pt_regs, ax);
817 break;
818 case 0x51:
819 reg_offset = offsetof(struct pt_regs, cx);
820 break;
821 case 0x52:
822 reg_offset = offsetof(struct pt_regs, dx);
823 break;
824 case 0x53:
825 reg_offset = offsetof(struct pt_regs, bx);
826 break;
827 case 0x54:
828 reg_offset = offsetof(struct pt_regs, sp);
829 break;
830 case 0x55:
831 reg_offset = offsetof(struct pt_regs, bp);
832 break;
833 case 0x56:
834 reg_offset = offsetof(struct pt_regs, si);
835 break;
836 case 0x57:
837 reg_offset = offsetof(struct pt_regs, di);
838 break;
839 }
840 }
841
842 auprobe->push.reg_offset = reg_offset;
843 auprobe->push.ilen = insn->length;
844 auprobe->ops = &push_xol_ops;
845 return 0;
846}
847
753/** 848/**
754 * arch_uprobe_analyze_insn - instruction analysis including validity and fixups. 849 * arch_uprobe_analyze_insn - instruction analysis including validity and fixups.
755 * @mm: the probed address space. 850 * @mm: the probed address space.
@@ -771,6 +866,10 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
771 if (ret != -ENOSYS) 866 if (ret != -ENOSYS)
772 return ret; 867 return ret;
773 868
869 ret = push_setup_xol_ops(auprobe, &insn);
870 if (ret != -ENOSYS)
871 return ret;
872
774 /* 873 /*
775 * Figure out which fixups default_post_xol_op() will need to perform, 874 * Figure out which fixups default_post_xol_op() will need to perform,
776 * and annotate defparam->fixups accordingly. 875 * and annotate defparam->fixups accordingly.