aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-06-29 07:42:30 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2019-06-29 07:42:30 -0400
commit728254541ebcc7fee869c3c4c3f36f96be791edb (patch)
tree7a49c04a3a933db926e9492b45106fc7f4cf696e
parent57103eb7c6cad04c0611b7a5767a381b34b8b0ab (diff)
parentae6a45a0868986f69039a2150d3b2b9ca294c378 (diff)
Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Ingo Molnar: "Misc fixes all over the place: - might_sleep() atomicity fix in the microcode loader - resctrl boundary condition fix - APIC arithmethics bug fix for frequencies >= 4.2 GHz - three 5-level paging crash fixes - two speculation fixes - a perf/stacktrace fix" * 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/unwind/orc: Fall back to using frame pointers for generated code perf/x86: Always store regs->ip in perf_callchain_kernel() x86/speculation: Allow guests to use SSBD even if host does not x86/mm: Handle physical-virtual alignment mismatch in phys_p4d_init() x86/boot/64: Add missing fixup_pointer() for next_early_pgt access x86/boot/64: Fix crash if kernel image crosses page table boundary x86/apic: Fix integer overflow on 10 bit left shift of cpu_khz x86/resctrl: Prevent possible overrun during bitmap operations x86/microcode: Fix the microcode load on CPU hotplug for real
-rw-r--r--arch/x86/events/core.c10
-rw-r--r--arch/x86/kernel/apic/apic.c3
-rw-r--r--arch/x86/kernel/cpu/bugs.c11
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c15
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c35
-rw-r--r--arch/x86/kernel/head64.c20
-rw-r--r--arch/x86/kernel/unwind_orc.c26
-rw-r--r--arch/x86/mm/init_64.c24
8 files changed, 89 insertions, 55 deletions
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 52a97463cb24..3cd94a21bd53 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2402,13 +2402,13 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
2402 return; 2402 return;
2403 } 2403 }
2404 2404
2405 if (perf_hw_regs(regs)) { 2405 if (perf_callchain_store(entry, regs->ip))
2406 if (perf_callchain_store(entry, regs->ip)) 2406 return;
2407 return; 2407
2408 if (perf_hw_regs(regs))
2408 unwind_start(&state, current, regs, NULL); 2409 unwind_start(&state, current, regs, NULL);
2409 } else { 2410 else
2410 unwind_start(&state, current, NULL, (void *)regs->sp); 2411 unwind_start(&state, current, NULL, (void *)regs->sp);
2411 }
2412 2412
2413 for (; !unwind_done(&state); unwind_next_frame(&state)) { 2413 for (; !unwind_done(&state); unwind_next_frame(&state)) {
2414 addr = unwind_get_return_address(&state); 2414 addr = unwind_get_return_address(&state);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 177aa8ef2afa..85be316665b4 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1464,7 +1464,8 @@ static void apic_pending_intr_clear(void)
1464 if (queued) { 1464 if (queued) {
1465 if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) { 1465 if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) {
1466 ntsc = rdtsc(); 1466 ntsc = rdtsc();
1467 max_loops = (cpu_khz << 10) - (ntsc - tsc); 1467 max_loops = (long long)cpu_khz << 10;
1468 max_loops -= ntsc - tsc;
1468 } else { 1469 } else {
1469 max_loops--; 1470 max_loops--;
1470 } 1471 }
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 03b4cc0ec3a7..66ca906aa790 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -836,6 +836,16 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
836 } 836 }
837 837
838 /* 838 /*
839 * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper
840 * bit in the mask to allow guests to use the mitigation even in the
841 * case where the host does not enable it.
842 */
843 if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
844 static_cpu_has(X86_FEATURE_AMD_SSBD)) {
845 x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
846 }
847
848 /*
839 * We have three CPU feature flags that are in play here: 849 * We have three CPU feature flags that are in play here:
840 * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible. 850 * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible.
841 * - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass 851 * - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass
@@ -852,7 +862,6 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
852 x86_amd_ssb_disable(); 862 x86_amd_ssb_disable();
853 } else { 863 } else {
854 x86_spec_ctrl_base |= SPEC_CTRL_SSBD; 864 x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
855 x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
856 wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); 865 wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
857 } 866 }
858 } 867 }
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index a813987b5552..cb0fdcaf1415 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -789,13 +789,16 @@ static struct syscore_ops mc_syscore_ops = {
789 .resume = mc_bp_resume, 789 .resume = mc_bp_resume,
790}; 790};
791 791
792static int mc_cpu_online(unsigned int cpu) 792static int mc_cpu_starting(unsigned int cpu)
793{ 793{
794 struct device *dev;
795
796 dev = get_cpu_device(cpu);
797 microcode_update_cpu(cpu); 794 microcode_update_cpu(cpu);
798 pr_debug("CPU%d added\n", cpu); 795 pr_debug("CPU%d added\n", cpu);
796 return 0;
797}
798
799static int mc_cpu_online(unsigned int cpu)
800{
801 struct device *dev = get_cpu_device(cpu);
799 802
800 if (sysfs_create_group(&dev->kobj, &mc_attr_group)) 803 if (sysfs_create_group(&dev->kobj, &mc_attr_group))
801 pr_err("Failed to create group for CPU%d\n", cpu); 804 pr_err("Failed to create group for CPU%d\n", cpu);
@@ -872,7 +875,9 @@ int __init microcode_init(void)
872 goto out_ucode_group; 875 goto out_ucode_group;
873 876
874 register_syscore_ops(&mc_syscore_ops); 877 register_syscore_ops(&mc_syscore_ops);
875 cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:online", 878 cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:starting",
879 mc_cpu_starting, NULL);
880 cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/microcode:online",
876 mc_cpu_online, mc_cpu_down_prep); 881 mc_cpu_online, mc_cpu_down_prep);
877 882
878 pr_info("Microcode Update Driver: v%s.", DRIVER_VERSION); 883 pr_info("Microcode Update Driver: v%s.", DRIVER_VERSION);
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 2131b8bbaad7..2f4824793798 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -796,8 +796,12 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
796 struct seq_file *seq, void *v) 796 struct seq_file *seq, void *v)
797{ 797{
798 struct rdt_resource *r = of->kn->parent->priv; 798 struct rdt_resource *r = of->kn->parent->priv;
799 u32 sw_shareable = 0, hw_shareable = 0; 799 /*
800 u32 exclusive = 0, pseudo_locked = 0; 800 * Use unsigned long even though only 32 bits are used to ensure
801 * test_bit() is used safely.
802 */
803 unsigned long sw_shareable = 0, hw_shareable = 0;
804 unsigned long exclusive = 0, pseudo_locked = 0;
801 struct rdt_domain *dom; 805 struct rdt_domain *dom;
802 int i, hwb, swb, excl, psl; 806 int i, hwb, swb, excl, psl;
803 enum rdtgrp_mode mode; 807 enum rdtgrp_mode mode;
@@ -842,10 +846,10 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
842 } 846 }
843 for (i = r->cache.cbm_len - 1; i >= 0; i--) { 847 for (i = r->cache.cbm_len - 1; i >= 0; i--) {
844 pseudo_locked = dom->plr ? dom->plr->cbm : 0; 848 pseudo_locked = dom->plr ? dom->plr->cbm : 0;
845 hwb = test_bit(i, (unsigned long *)&hw_shareable); 849 hwb = test_bit(i, &hw_shareable);
846 swb = test_bit(i, (unsigned long *)&sw_shareable); 850 swb = test_bit(i, &sw_shareable);
847 excl = test_bit(i, (unsigned long *)&exclusive); 851 excl = test_bit(i, &exclusive);
848 psl = test_bit(i, (unsigned long *)&pseudo_locked); 852 psl = test_bit(i, &pseudo_locked);
849 if (hwb && swb) 853 if (hwb && swb)
850 seq_putc(seq, 'X'); 854 seq_putc(seq, 'X');
851 else if (hwb && !swb) 855 else if (hwb && !swb)
@@ -2486,26 +2490,19 @@ out_destroy:
2486 */ 2490 */
2487static void cbm_ensure_valid(u32 *_val, struct rdt_resource *r) 2491static void cbm_ensure_valid(u32 *_val, struct rdt_resource *r)
2488{ 2492{
2489 /* 2493 unsigned long val = *_val;
2490 * Convert the u32 _val to an unsigned long required by all the bit
2491 * operations within this function. No more than 32 bits of this
2492 * converted value can be accessed because all bit operations are
2493 * additionally provided with cbm_len that is initialized during
2494 * hardware enumeration using five bits from the EAX register and
2495 * thus never can exceed 32 bits.
2496 */
2497 unsigned long *val = (unsigned long *)_val;
2498 unsigned int cbm_len = r->cache.cbm_len; 2494 unsigned int cbm_len = r->cache.cbm_len;
2499 unsigned long first_bit, zero_bit; 2495 unsigned long first_bit, zero_bit;
2500 2496
2501 if (*val == 0) 2497 if (val == 0)
2502 return; 2498 return;
2503 2499
2504 first_bit = find_first_bit(val, cbm_len); 2500 first_bit = find_first_bit(&val, cbm_len);
2505 zero_bit = find_next_zero_bit(val, cbm_len, first_bit); 2501 zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
2506 2502
2507 /* Clear any remaining bits to ensure contiguous region */ 2503 /* Clear any remaining bits to ensure contiguous region */
2508 bitmap_clear(val, zero_bit, cbm_len - zero_bit); 2504 bitmap_clear(&val, zero_bit, cbm_len - zero_bit);
2505 *_val = (u32)val;
2509} 2506}
2510 2507
2511/* 2508/*
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 16b1cbd3a61e..29ffa495bd1c 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -184,24 +184,25 @@ unsigned long __head __startup_64(unsigned long physaddr,
184 pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask(); 184 pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask();
185 185
186 if (la57) { 186 if (la57) {
187 p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr); 187 p4d = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++],
188 physaddr);
188 189
189 i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; 190 i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
190 pgd[i + 0] = (pgdval_t)p4d + pgtable_flags; 191 pgd[i + 0] = (pgdval_t)p4d + pgtable_flags;
191 pgd[i + 1] = (pgdval_t)p4d + pgtable_flags; 192 pgd[i + 1] = (pgdval_t)p4d + pgtable_flags;
192 193
193 i = (physaddr >> P4D_SHIFT) % PTRS_PER_P4D; 194 i = physaddr >> P4D_SHIFT;
194 p4d[i + 0] = (pgdval_t)pud + pgtable_flags; 195 p4d[(i + 0) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags;
195 p4d[i + 1] = (pgdval_t)pud + pgtable_flags; 196 p4d[(i + 1) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags;
196 } else { 197 } else {
197 i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; 198 i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
198 pgd[i + 0] = (pgdval_t)pud + pgtable_flags; 199 pgd[i + 0] = (pgdval_t)pud + pgtable_flags;
199 pgd[i + 1] = (pgdval_t)pud + pgtable_flags; 200 pgd[i + 1] = (pgdval_t)pud + pgtable_flags;
200 } 201 }
201 202
202 i = (physaddr >> PUD_SHIFT) % PTRS_PER_PUD; 203 i = physaddr >> PUD_SHIFT;
203 pud[i + 0] = (pudval_t)pmd + pgtable_flags; 204 pud[(i + 0) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags;
204 pud[i + 1] = (pudval_t)pmd + pgtable_flags; 205 pud[(i + 1) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags;
205 206
206 pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL; 207 pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
207 /* Filter out unsupported __PAGE_KERNEL_* bits: */ 208 /* Filter out unsupported __PAGE_KERNEL_* bits: */
@@ -211,8 +212,9 @@ unsigned long __head __startup_64(unsigned long physaddr,
211 pmd_entry += physaddr; 212 pmd_entry += physaddr;
212 213
213 for (i = 0; i < DIV_ROUND_UP(_end - _text, PMD_SIZE); i++) { 214 for (i = 0; i < DIV_ROUND_UP(_end - _text, PMD_SIZE); i++) {
214 int idx = i + (physaddr >> PMD_SHIFT) % PTRS_PER_PMD; 215 int idx = i + (physaddr >> PMD_SHIFT);
215 pmd[idx] = pmd_entry + i * PMD_SIZE; 216
217 pmd[idx % PTRS_PER_PMD] = pmd_entry + i * PMD_SIZE;
216 } 218 }
217 219
218 /* 220 /*
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 33b66b5c5aec..72b997eaa1fc 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -82,9 +82,9 @@ static struct orc_entry *orc_find(unsigned long ip);
82 * But they are copies of the ftrace entries that are static and 82 * But they are copies of the ftrace entries that are static and
83 * defined in ftrace_*.S, which do have orc entries. 83 * defined in ftrace_*.S, which do have orc entries.
84 * 84 *
85 * If the undwinder comes across a ftrace trampoline, then find the 85 * If the unwinder comes across a ftrace trampoline, then find the
86 * ftrace function that was used to create it, and use that ftrace 86 * ftrace function that was used to create it, and use that ftrace
87 * function's orc entrie, as the placement of the return code in 87 * function's orc entry, as the placement of the return code in
88 * the stack will be identical. 88 * the stack will be identical.
89 */ 89 */
90static struct orc_entry *orc_ftrace_find(unsigned long ip) 90static struct orc_entry *orc_ftrace_find(unsigned long ip)
@@ -128,6 +128,16 @@ static struct orc_entry null_orc_entry = {
128 .type = ORC_TYPE_CALL 128 .type = ORC_TYPE_CALL
129}; 129};
130 130
131/* Fake frame pointer entry -- used as a fallback for generated code */
132static struct orc_entry orc_fp_entry = {
133 .type = ORC_TYPE_CALL,
134 .sp_reg = ORC_REG_BP,
135 .sp_offset = 16,
136 .bp_reg = ORC_REG_PREV_SP,
137 .bp_offset = -16,
138 .end = 0,
139};
140
131static struct orc_entry *orc_find(unsigned long ip) 141static struct orc_entry *orc_find(unsigned long ip)
132{ 142{
133 static struct orc_entry *orc; 143 static struct orc_entry *orc;
@@ -392,8 +402,16 @@ bool unwind_next_frame(struct unwind_state *state)
392 * calls and calls to noreturn functions. 402 * calls and calls to noreturn functions.
393 */ 403 */
394 orc = orc_find(state->signal ? state->ip : state->ip - 1); 404 orc = orc_find(state->signal ? state->ip : state->ip - 1);
395 if (!orc) 405 if (!orc) {
396 goto err; 406 /*
407 * As a fallback, try to assume this code uses a frame pointer.
408 * This is useful for generated code, like BPF, which ORC
409 * doesn't know about. This is just a guess, so the rest of
410 * the unwind is no longer considered reliable.
411 */
412 orc = &orc_fp_entry;
413 state->error = true;
414 }
397 415
398 /* End-of-stack check for kernel threads: */ 416 /* End-of-stack check for kernel threads: */
399 if (orc->sp_reg == ORC_REG_UNDEFINED) { 417 if (orc->sp_reg == ORC_REG_UNDEFINED) {
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 693aaf28d5fe..0f01c7b1d217 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -671,23 +671,25 @@ static unsigned long __meminit
671phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, 671phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
672 unsigned long page_size_mask, bool init) 672 unsigned long page_size_mask, bool init)
673{ 673{
674 unsigned long paddr_next, paddr_last = paddr_end; 674 unsigned long vaddr, vaddr_end, vaddr_next, paddr_next, paddr_last;
675 unsigned long vaddr = (unsigned long)__va(paddr); 675
676 int i = p4d_index(vaddr); 676 paddr_last = paddr_end;
677 vaddr = (unsigned long)__va(paddr);
678 vaddr_end = (unsigned long)__va(paddr_end);
677 679
678 if (!pgtable_l5_enabled()) 680 if (!pgtable_l5_enabled())
679 return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, 681 return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end,
680 page_size_mask, init); 682 page_size_mask, init);
681 683
682 for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) { 684 for (; vaddr < vaddr_end; vaddr = vaddr_next) {
683 p4d_t *p4d; 685 p4d_t *p4d = p4d_page + p4d_index(vaddr);
684 pud_t *pud; 686 pud_t *pud;
685 687
686 vaddr = (unsigned long)__va(paddr); 688 vaddr_next = (vaddr & P4D_MASK) + P4D_SIZE;
687 p4d = p4d_page + p4d_index(vaddr); 689 paddr = __pa(vaddr);
688 paddr_next = (paddr & P4D_MASK) + P4D_SIZE;
689 690
690 if (paddr >= paddr_end) { 691 if (paddr >= paddr_end) {
692 paddr_next = __pa(vaddr_next);
691 if (!after_bootmem && 693 if (!after_bootmem &&
692 !e820__mapped_any(paddr & P4D_MASK, paddr_next, 694 !e820__mapped_any(paddr & P4D_MASK, paddr_next,
693 E820_TYPE_RAM) && 695 E820_TYPE_RAM) &&
@@ -699,13 +701,13 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
699 701
700 if (!p4d_none(*p4d)) { 702 if (!p4d_none(*p4d)) {
701 pud = pud_offset(p4d, 0); 703 pud = pud_offset(p4d, 0);
702 paddr_last = phys_pud_init(pud, paddr, paddr_end, 704 paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end),
703 page_size_mask, init); 705 page_size_mask, init);
704 continue; 706 continue;
705 } 707 }
706 708
707 pud = alloc_low_page(); 709 pud = alloc_low_page();
708 paddr_last = phys_pud_init(pud, paddr, paddr_end, 710 paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end),
709 page_size_mask, init); 711 page_size_mask, init);
710 712
711 spin_lock(&init_mm.page_table_lock); 713 spin_lock(&init_mm.page_table_lock);