aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2014-12-01 19:58:33 -0500
committerDave Airlie <airlied@redhat.com>2014-12-01 19:58:33 -0500
commite8115e79aa62b6ebdb3e8e61ca4092cc32938afc (patch)
tree42b791ab54ef9d5c73dcd49f907b8b37fa2f7e19 /arch/x86/kernel
parent9be23ae4350bfd71c0cc2ea3494671ee90e5603b (diff)
parent009d0431c3914de64666bec0d350e54fdd59df6a (diff)
Merge tag 'v3.18-rc7' into drm-next
This fixes a bunch of conflicts prior to merging i915 tree. Linux 3.18-rc7 Conflicts: drivers/gpu/drm/exynos/exynos_drm_drv.c drivers/gpu/drm/i915/i915_drv.c drivers/gpu/drm/i915/intel_pm.c drivers/gpu/drm/tegra/dc.c
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/microcode/amd_early.c33
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c8
-rw-r--r--arch/x86/kernel/cpu/microcode/core_early.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c49
-rw-r--r--arch/x86/kernel/dumpstack_64.c1
-rw-r--r--arch/x86/kernel/entry_64.S81
-rw-r--r--arch/x86/kernel/ptrace.c2
-rw-r--r--arch/x86/kernel/smpboot.c15
-rw-r--r--arch/x86/kernel/traps.c71
10 files changed, 165 insertions, 99 deletions
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4b4f78c9ba19..cfa9b5b2c27a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -146,6 +146,8 @@ EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
146 146
147static int __init x86_xsave_setup(char *s) 147static int __init x86_xsave_setup(char *s)
148{ 148{
149 if (strlen(s))
150 return 0;
149 setup_clear_cpu_cap(X86_FEATURE_XSAVE); 151 setup_clear_cpu_cap(X86_FEATURE_XSAVE);
150 setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); 152 setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
151 setup_clear_cpu_cap(X86_FEATURE_XSAVES); 153 setup_clear_cpu_cap(X86_FEATURE_XSAVES);
diff --git a/arch/x86/kernel/cpu/microcode/amd_early.c b/arch/x86/kernel/cpu/microcode/amd_early.c
index 7aa1acc79789..06674473b0e6 100644
--- a/arch/x86/kernel/cpu/microcode/amd_early.c
+++ b/arch/x86/kernel/cpu/microcode/amd_early.c
@@ -108,12 +108,13 @@ static size_t compute_container_size(u8 *data, u32 total_size)
108 * load_microcode_amd() to save equivalent cpu table and microcode patches in 108 * load_microcode_amd() to save equivalent cpu table and microcode patches in
109 * kernel heap memory. 109 * kernel heap memory.
110 */ 110 */
111static void apply_ucode_in_initrd(void *ucode, size_t size) 111static void apply_ucode_in_initrd(void *ucode, size_t size, bool save_patch)
112{ 112{
113 struct equiv_cpu_entry *eq; 113 struct equiv_cpu_entry *eq;
114 size_t *cont_sz; 114 size_t *cont_sz;
115 u32 *header; 115 u32 *header;
116 u8 *data, **cont; 116 u8 *data, **cont;
117 u8 (*patch)[PATCH_MAX_SIZE];
117 u16 eq_id = 0; 118 u16 eq_id = 0;
118 int offset, left; 119 int offset, left;
119 u32 rev, eax, ebx, ecx, edx; 120 u32 rev, eax, ebx, ecx, edx;
@@ -123,10 +124,12 @@ static void apply_ucode_in_initrd(void *ucode, size_t size)
123 new_rev = (u32 *)__pa_nodebug(&ucode_new_rev); 124 new_rev = (u32 *)__pa_nodebug(&ucode_new_rev);
124 cont_sz = (size_t *)__pa_nodebug(&container_size); 125 cont_sz = (size_t *)__pa_nodebug(&container_size);
125 cont = (u8 **)__pa_nodebug(&container); 126 cont = (u8 **)__pa_nodebug(&container);
127 patch = (u8 (*)[PATCH_MAX_SIZE])__pa_nodebug(&amd_ucode_patch);
126#else 128#else
127 new_rev = &ucode_new_rev; 129 new_rev = &ucode_new_rev;
128 cont_sz = &container_size; 130 cont_sz = &container_size;
129 cont = &container; 131 cont = &container;
132 patch = &amd_ucode_patch;
130#endif 133#endif
131 134
132 data = ucode; 135 data = ucode;
@@ -213,9 +216,9 @@ static void apply_ucode_in_initrd(void *ucode, size_t size)
213 rev = mc->hdr.patch_id; 216 rev = mc->hdr.patch_id;
214 *new_rev = rev; 217 *new_rev = rev;
215 218
216 /* save ucode patch */ 219 if (save_patch)
217 memcpy(amd_ucode_patch, mc, 220 memcpy(patch, mc,
218 min_t(u32, header[1], PATCH_MAX_SIZE)); 221 min_t(u32, header[1], PATCH_MAX_SIZE));
219 } 222 }
220 } 223 }
221 224
@@ -246,7 +249,7 @@ void __init load_ucode_amd_bsp(void)
246 *data = cp.data; 249 *data = cp.data;
247 *size = cp.size; 250 *size = cp.size;
248 251
249 apply_ucode_in_initrd(cp.data, cp.size); 252 apply_ucode_in_initrd(cp.data, cp.size, true);
250} 253}
251 254
252#ifdef CONFIG_X86_32 255#ifdef CONFIG_X86_32
@@ -263,7 +266,7 @@ void load_ucode_amd_ap(void)
263 size_t *usize; 266 size_t *usize;
264 void **ucode; 267 void **ucode;
265 268
266 mc = (struct microcode_amd *)__pa(amd_ucode_patch); 269 mc = (struct microcode_amd *)__pa_nodebug(amd_ucode_patch);
267 if (mc->hdr.patch_id && mc->hdr.processor_rev_id) { 270 if (mc->hdr.patch_id && mc->hdr.processor_rev_id) {
268 __apply_microcode_amd(mc); 271 __apply_microcode_amd(mc);
269 return; 272 return;
@@ -275,7 +278,7 @@ void load_ucode_amd_ap(void)
275 if (!*ucode || !*usize) 278 if (!*ucode || !*usize)
276 return; 279 return;
277 280
278 apply_ucode_in_initrd(*ucode, *usize); 281 apply_ucode_in_initrd(*ucode, *usize, false);
279} 282}
280 283
281static void __init collect_cpu_sig_on_bsp(void *arg) 284static void __init collect_cpu_sig_on_bsp(void *arg)
@@ -339,7 +342,7 @@ void load_ucode_amd_ap(void)
339 * AP has a different equivalence ID than BSP, looks like 342 * AP has a different equivalence ID than BSP, looks like
340 * mixed-steppings silicon so go through the ucode blob anew. 343 * mixed-steppings silicon so go through the ucode blob anew.
341 */ 344 */
342 apply_ucode_in_initrd(ucode_cpio.data, ucode_cpio.size); 345 apply_ucode_in_initrd(ucode_cpio.data, ucode_cpio.size, false);
343 } 346 }
344} 347}
345#endif 348#endif
@@ -347,7 +350,9 @@ void load_ucode_amd_ap(void)
347int __init save_microcode_in_initrd_amd(void) 350int __init save_microcode_in_initrd_amd(void)
348{ 351{
349 unsigned long cont; 352 unsigned long cont;
353 int retval = 0;
350 enum ucode_state ret; 354 enum ucode_state ret;
355 u8 *cont_va;
351 u32 eax; 356 u32 eax;
352 357
353 if (!container) 358 if (!container)
@@ -355,13 +360,15 @@ int __init save_microcode_in_initrd_amd(void)
355 360
356#ifdef CONFIG_X86_32 361#ifdef CONFIG_X86_32
357 get_bsp_sig(); 362 get_bsp_sig();
358 cont = (unsigned long)container; 363 cont = (unsigned long)container;
364 cont_va = __va(container);
359#else 365#else
360 /* 366 /*
361 * We need the physical address of the container for both bitness since 367 * We need the physical address of the container for both bitness since
362 * boot_params.hdr.ramdisk_image is a physical address. 368 * boot_params.hdr.ramdisk_image is a physical address.
363 */ 369 */
364 cont = __pa(container); 370 cont = __pa(container);
371 cont_va = container;
365#endif 372#endif
366 373
367 /* 374 /*
@@ -372,6 +379,8 @@ int __init save_microcode_in_initrd_amd(void)
372 if (relocated_ramdisk) 379 if (relocated_ramdisk)
373 container = (u8 *)(__va(relocated_ramdisk) + 380 container = (u8 *)(__va(relocated_ramdisk) +
374 (cont - boot_params.hdr.ramdisk_image)); 381 (cont - boot_params.hdr.ramdisk_image));
382 else
383 container = cont_va;
375 384
376 if (ucode_new_rev) 385 if (ucode_new_rev)
377 pr_info("microcode: updated early to new patch_level=0x%08x\n", 386 pr_info("microcode: updated early to new patch_level=0x%08x\n",
@@ -382,7 +391,7 @@ int __init save_microcode_in_initrd_amd(void)
382 391
383 ret = load_microcode_amd(eax, container, container_size); 392 ret = load_microcode_amd(eax, container, container_size);
384 if (ret != UCODE_OK) 393 if (ret != UCODE_OK)
385 return -EINVAL; 394 retval = -EINVAL;
386 395
387 /* 396 /*
388 * This will be freed any msec now, stash patches for the current 397 * This will be freed any msec now, stash patches for the current
@@ -391,5 +400,5 @@ int __init save_microcode_in_initrd_amd(void)
391 container = NULL; 400 container = NULL;
392 container_size = 0; 401 container_size = 0;
393 402
394 return 0; 403 return retval;
395} 404}
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index dd9d6190b08d..2ce9051174e6 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -465,6 +465,14 @@ static void mc_bp_resume(void)
465 465
466 if (uci->valid && uci->mc) 466 if (uci->valid && uci->mc)
467 microcode_ops->apply_microcode(cpu); 467 microcode_ops->apply_microcode(cpu);
468 else if (!uci->mc)
469 /*
470 * We might resume and not have applied late microcode but still
471 * have a newer patch stashed from the early loader. We don't
472 * have it in uci->mc so we have to load it the same way we're
473 * applying patches early on the APs.
474 */
475 load_ucode_ap();
468} 476}
469 477
470static struct syscore_ops mc_syscore_ops = { 478static struct syscore_ops mc_syscore_ops = {
diff --git a/arch/x86/kernel/cpu/microcode/core_early.c b/arch/x86/kernel/cpu/microcode/core_early.c
index 5f28a64e71ea..2c017f242a78 100644
--- a/arch/x86/kernel/cpu/microcode/core_early.c
+++ b/arch/x86/kernel/cpu/microcode/core_early.c
@@ -124,7 +124,7 @@ void __init load_ucode_bsp(void)
124static bool check_loader_disabled_ap(void) 124static bool check_loader_disabled_ap(void)
125{ 125{
126#ifdef CONFIG_X86_32 126#ifdef CONFIG_X86_32
127 return __pa_nodebug(dis_ucode_ldr); 127 return *((bool *)__pa_nodebug(&dis_ucode_ldr));
128#else 128#else
129 return dis_ucode_ldr; 129 return dis_ucode_ldr;
130#endif 130#endif
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
index adf138eac85c..f9ed429d6e4f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
@@ -486,14 +486,17 @@ static struct attribute_group snbep_uncore_qpi_format_group = {
486 .attrs = snbep_uncore_qpi_formats_attr, 486 .attrs = snbep_uncore_qpi_formats_attr,
487}; 487};
488 488
489#define SNBEP_UNCORE_MSR_OPS_COMMON_INIT() \ 489#define __SNBEP_UNCORE_MSR_OPS_COMMON_INIT() \
490 .init_box = snbep_uncore_msr_init_box, \
491 .disable_box = snbep_uncore_msr_disable_box, \ 490 .disable_box = snbep_uncore_msr_disable_box, \
492 .enable_box = snbep_uncore_msr_enable_box, \ 491 .enable_box = snbep_uncore_msr_enable_box, \
493 .disable_event = snbep_uncore_msr_disable_event, \ 492 .disable_event = snbep_uncore_msr_disable_event, \
494 .enable_event = snbep_uncore_msr_enable_event, \ 493 .enable_event = snbep_uncore_msr_enable_event, \
495 .read_counter = uncore_msr_read_counter 494 .read_counter = uncore_msr_read_counter
496 495
496#define SNBEP_UNCORE_MSR_OPS_COMMON_INIT() \
497 __SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), \
498 .init_box = snbep_uncore_msr_init_box \
499
497static struct intel_uncore_ops snbep_uncore_msr_ops = { 500static struct intel_uncore_ops snbep_uncore_msr_ops = {
498 SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), 501 SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
499}; 502};
@@ -1919,6 +1922,30 @@ static struct intel_uncore_type hswep_uncore_cbox = {
1919 .format_group = &hswep_uncore_cbox_format_group, 1922 .format_group = &hswep_uncore_cbox_format_group,
1920}; 1923};
1921 1924
1925/*
1926 * Write SBOX Initialization register bit by bit to avoid spurious #GPs
1927 */
1928static void hswep_uncore_sbox_msr_init_box(struct intel_uncore_box *box)
1929{
1930 unsigned msr = uncore_msr_box_ctl(box);
1931
1932 if (msr) {
1933 u64 init = SNBEP_PMON_BOX_CTL_INT;
1934 u64 flags = 0;
1935 int i;
1936
1937 for_each_set_bit(i, (unsigned long *)&init, 64) {
1938 flags |= (1ULL << i);
1939 wrmsrl(msr, flags);
1940 }
1941 }
1942}
1943
1944static struct intel_uncore_ops hswep_uncore_sbox_msr_ops = {
1945 __SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
1946 .init_box = hswep_uncore_sbox_msr_init_box
1947};
1948
1922static struct attribute *hswep_uncore_sbox_formats_attr[] = { 1949static struct attribute *hswep_uncore_sbox_formats_attr[] = {
1923 &format_attr_event.attr, 1950 &format_attr_event.attr,
1924 &format_attr_umask.attr, 1951 &format_attr_umask.attr,
@@ -1944,7 +1971,7 @@ static struct intel_uncore_type hswep_uncore_sbox = {
1944 .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK, 1971 .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK,
1945 .box_ctl = HSWEP_S0_MSR_PMON_BOX_CTL, 1972 .box_ctl = HSWEP_S0_MSR_PMON_BOX_CTL,
1946 .msr_offset = HSWEP_SBOX_MSR_OFFSET, 1973 .msr_offset = HSWEP_SBOX_MSR_OFFSET,
1947 .ops = &snbep_uncore_msr_ops, 1974 .ops = &hswep_uncore_sbox_msr_ops,
1948 .format_group = &hswep_uncore_sbox_format_group, 1975 .format_group = &hswep_uncore_sbox_format_group,
1949}; 1976};
1950 1977
@@ -2025,13 +2052,27 @@ static struct intel_uncore_type hswep_uncore_imc = {
2025 SNBEP_UNCORE_PCI_COMMON_INIT(), 2052 SNBEP_UNCORE_PCI_COMMON_INIT(),
2026}; 2053};
2027 2054
2055static unsigned hswep_uncore_irp_ctrs[] = {0xa0, 0xa8, 0xb0, 0xb8};
2056
2057static u64 hswep_uncore_irp_read_counter(struct intel_uncore_box *box, struct perf_event *event)
2058{
2059 struct pci_dev *pdev = box->pci_dev;
2060 struct hw_perf_event *hwc = &event->hw;
2061 u64 count = 0;
2062
2063 pci_read_config_dword(pdev, hswep_uncore_irp_ctrs[hwc->idx], (u32 *)&count);
2064 pci_read_config_dword(pdev, hswep_uncore_irp_ctrs[hwc->idx] + 4, (u32 *)&count + 1);
2065
2066 return count;
2067}
2068
2028static struct intel_uncore_ops hswep_uncore_irp_ops = { 2069static struct intel_uncore_ops hswep_uncore_irp_ops = {
2029 .init_box = snbep_uncore_pci_init_box, 2070 .init_box = snbep_uncore_pci_init_box,
2030 .disable_box = snbep_uncore_pci_disable_box, 2071 .disable_box = snbep_uncore_pci_disable_box,
2031 .enable_box = snbep_uncore_pci_enable_box, 2072 .enable_box = snbep_uncore_pci_enable_box,
2032 .disable_event = ivbep_uncore_irp_disable_event, 2073 .disable_event = ivbep_uncore_irp_disable_event,
2033 .enable_event = ivbep_uncore_irp_enable_event, 2074 .enable_event = ivbep_uncore_irp_enable_event,
2034 .read_counter = ivbep_uncore_irp_read_counter, 2075 .read_counter = hswep_uncore_irp_read_counter,
2035}; 2076};
2036 2077
2037static struct intel_uncore_type hswep_uncore_irp = { 2078static struct intel_uncore_type hswep_uncore_irp = {
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 1abcb50b48ae..ff86f19b5758 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -24,7 +24,6 @@ static char x86_stack_ids[][8] = {
24 [ DEBUG_STACK-1 ] = "#DB", 24 [ DEBUG_STACK-1 ] = "#DB",
25 [ NMI_STACK-1 ] = "NMI", 25 [ NMI_STACK-1 ] = "NMI",
26 [ DOUBLEFAULT_STACK-1 ] = "#DF", 26 [ DOUBLEFAULT_STACK-1 ] = "#DF",
27 [ STACKFAULT_STACK-1 ] = "#SS",
28 [ MCE_STACK-1 ] = "#MC", 27 [ MCE_STACK-1 ] = "#MC",
29#if DEBUG_STKSZ > EXCEPTION_STKSZ 28#if DEBUG_STKSZ > EXCEPTION_STKSZ
30 [ N_EXCEPTION_STACKS ... 29 [ N_EXCEPTION_STACKS ...
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index df088bb03fb3..c0226ab54106 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -828,9 +828,15 @@ ENTRY(native_iret)
828 jnz native_irq_return_ldt 828 jnz native_irq_return_ldt
829#endif 829#endif
830 830
831.global native_irq_return_iret
831native_irq_return_iret: 832native_irq_return_iret:
833 /*
834 * This may fault. Non-paranoid faults on return to userspace are
835 * handled by fixup_bad_iret. These include #SS, #GP, and #NP.
836 * Double-faults due to espfix64 are handled in do_double_fault.
837 * Other faults here are fatal.
838 */
832 iretq 839 iretq
833 _ASM_EXTABLE(native_irq_return_iret, bad_iret)
834 840
835#ifdef CONFIG_X86_ESPFIX64 841#ifdef CONFIG_X86_ESPFIX64
836native_irq_return_ldt: 842native_irq_return_ldt:
@@ -858,25 +864,6 @@ native_irq_return_ldt:
858 jmp native_irq_return_iret 864 jmp native_irq_return_iret
859#endif 865#endif
860 866
861 .section .fixup,"ax"
862bad_iret:
863 /*
864 * The iret traps when the %cs or %ss being restored is bogus.
865 * We've lost the original trap vector and error code.
866 * #GPF is the most likely one to get for an invalid selector.
867 * So pretend we completed the iret and took the #GPF in user mode.
868 *
869 * We are now running with the kernel GS after exception recovery.
870 * But error_entry expects us to have user GS to match the user %cs,
871 * so swap back.
872 */
873 pushq $0
874
875 SWAPGS
876 jmp general_protection
877
878 .previous
879
880 /* edi: workmask, edx: work */ 867 /* edi: workmask, edx: work */
881retint_careful: 868retint_careful:
882 CFI_RESTORE_STATE 869 CFI_RESTORE_STATE
@@ -922,37 +909,6 @@ ENTRY(retint_kernel)
922 CFI_ENDPROC 909 CFI_ENDPROC
923END(common_interrupt) 910END(common_interrupt)
924 911
925 /*
926 * If IRET takes a fault on the espfix stack, then we
927 * end up promoting it to a doublefault. In that case,
928 * modify the stack to make it look like we just entered
929 * the #GP handler from user space, similar to bad_iret.
930 */
931#ifdef CONFIG_X86_ESPFIX64
932 ALIGN
933__do_double_fault:
934 XCPT_FRAME 1 RDI+8
935 movq RSP(%rdi),%rax /* Trap on the espfix stack? */
936 sarq $PGDIR_SHIFT,%rax
937 cmpl $ESPFIX_PGD_ENTRY,%eax
938 jne do_double_fault /* No, just deliver the fault */
939 cmpl $__KERNEL_CS,CS(%rdi)
940 jne do_double_fault
941 movq RIP(%rdi),%rax
942 cmpq $native_irq_return_iret,%rax
943 jne do_double_fault /* This shouldn't happen... */
944 movq PER_CPU_VAR(kernel_stack),%rax
945 subq $(6*8-KERNEL_STACK_OFFSET),%rax /* Reset to original stack */
946 movq %rax,RSP(%rdi)
947 movq $0,(%rax) /* Missing (lost) #GP error code */
948 movq $general_protection,RIP(%rdi)
949 retq
950 CFI_ENDPROC
951END(__do_double_fault)
952#else
953# define __do_double_fault do_double_fault
954#endif
955
956/* 912/*
957 * APIC interrupts. 913 * APIC interrupts.
958 */ 914 */
@@ -1124,7 +1080,7 @@ idtentry overflow do_overflow has_error_code=0
1124idtentry bounds do_bounds has_error_code=0 1080idtentry bounds do_bounds has_error_code=0
1125idtentry invalid_op do_invalid_op has_error_code=0 1081idtentry invalid_op do_invalid_op has_error_code=0
1126idtentry device_not_available do_device_not_available has_error_code=0 1082idtentry device_not_available do_device_not_available has_error_code=0
1127idtentry double_fault __do_double_fault has_error_code=1 paranoid=1 1083idtentry double_fault do_double_fault has_error_code=1 paranoid=1
1128idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 1084idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0
1129idtentry invalid_TSS do_invalid_TSS has_error_code=1 1085idtentry invalid_TSS do_invalid_TSS has_error_code=1
1130idtentry segment_not_present do_segment_not_present has_error_code=1 1086idtentry segment_not_present do_segment_not_present has_error_code=1
@@ -1289,7 +1245,7 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
1289 1245
1290idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK 1246idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
1291idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK 1247idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
1292idtentry stack_segment do_stack_segment has_error_code=1 paranoid=1 1248idtentry stack_segment do_stack_segment has_error_code=1
1293#ifdef CONFIG_XEN 1249#ifdef CONFIG_XEN
1294idtentry xen_debug do_debug has_error_code=0 1250idtentry xen_debug do_debug has_error_code=0
1295idtentry xen_int3 do_int3 has_error_code=0 1251idtentry xen_int3 do_int3 has_error_code=0
@@ -1399,17 +1355,16 @@ error_sti:
1399 1355
1400/* 1356/*
1401 * There are two places in the kernel that can potentially fault with 1357 * There are two places in the kernel that can potentially fault with
1402 * usergs. Handle them here. The exception handlers after iret run with 1358 * usergs. Handle them here. B stepping K8s sometimes report a
1403 * kernel gs again, so don't set the user space flag. B stepping K8s 1359 * truncated RIP for IRET exceptions returning to compat mode. Check
1404 * sometimes report an truncated RIP for IRET exceptions returning to 1360 * for these here too.
1405 * compat mode. Check for these here too.
1406 */ 1361 */
1407error_kernelspace: 1362error_kernelspace:
1408 CFI_REL_OFFSET rcx, RCX+8 1363 CFI_REL_OFFSET rcx, RCX+8
1409 incl %ebx 1364 incl %ebx
1410 leaq native_irq_return_iret(%rip),%rcx 1365 leaq native_irq_return_iret(%rip),%rcx
1411 cmpq %rcx,RIP+8(%rsp) 1366 cmpq %rcx,RIP+8(%rsp)
1412 je error_swapgs 1367 je error_bad_iret
1413 movl %ecx,%eax /* zero extend */ 1368 movl %ecx,%eax /* zero extend */
1414 cmpq %rax,RIP+8(%rsp) 1369 cmpq %rax,RIP+8(%rsp)
1415 je bstep_iret 1370 je bstep_iret
@@ -1420,7 +1375,15 @@ error_kernelspace:
1420bstep_iret: 1375bstep_iret:
1421 /* Fix truncated RIP */ 1376 /* Fix truncated RIP */
1422 movq %rcx,RIP+8(%rsp) 1377 movq %rcx,RIP+8(%rsp)
1423 jmp error_swapgs 1378 /* fall through */
1379
1380error_bad_iret:
1381 SWAPGS
1382 mov %rsp,%rdi
1383 call fixup_bad_iret
1384 mov %rax,%rsp
1385 decl %ebx /* Return to usergs */
1386 jmp error_sti
1424 CFI_ENDPROC 1387 CFI_ENDPROC
1425END(error_entry) 1388END(error_entry)
1426 1389
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 749b0e423419..e510618b2e91 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1484,7 +1484,7 @@ unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)
1484 */ 1484 */
1485 if (work & _TIF_NOHZ) { 1485 if (work & _TIF_NOHZ) {
1486 user_exit(); 1486 user_exit();
1487 work &= ~TIF_NOHZ; 1487 work &= ~_TIF_NOHZ;
1488 } 1488 }
1489 1489
1490#ifdef CONFIG_SECCOMP 1490#ifdef CONFIG_SECCOMP
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 4d2128ac70bd..668d8f2a8781 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1303,10 +1303,14 @@ static void __ref remove_cpu_from_maps(int cpu)
1303 numa_remove_cpu(cpu); 1303 numa_remove_cpu(cpu);
1304} 1304}
1305 1305
1306static DEFINE_PER_CPU(struct completion, die_complete);
1307
1306void cpu_disable_common(void) 1308void cpu_disable_common(void)
1307{ 1309{
1308 int cpu = smp_processor_id(); 1310 int cpu = smp_processor_id();
1309 1311
1312 init_completion(&per_cpu(die_complete, smp_processor_id()));
1313
1310 remove_siblinginfo(cpu); 1314 remove_siblinginfo(cpu);
1311 1315
1312 /* It's now safe to remove this processor from the online map */ 1316 /* It's now safe to remove this processor from the online map */
@@ -1316,8 +1320,6 @@ void cpu_disable_common(void)
1316 fixup_irqs(); 1320 fixup_irqs();
1317} 1321}
1318 1322
1319static DEFINE_PER_CPU(struct completion, die_complete);
1320
1321int native_cpu_disable(void) 1323int native_cpu_disable(void)
1322{ 1324{
1323 int ret; 1325 int ret;
@@ -1327,16 +1329,21 @@ int native_cpu_disable(void)
1327 return ret; 1329 return ret;
1328 1330
1329 clear_local_APIC(); 1331 clear_local_APIC();
1330 init_completion(&per_cpu(die_complete, smp_processor_id()));
1331 cpu_disable_common(); 1332 cpu_disable_common();
1332 1333
1333 return 0; 1334 return 0;
1334} 1335}
1335 1336
1337void cpu_die_common(unsigned int cpu)
1338{
1339 wait_for_completion_timeout(&per_cpu(die_complete, cpu), HZ);
1340}
1341
1336void native_cpu_die(unsigned int cpu) 1342void native_cpu_die(unsigned int cpu)
1337{ 1343{
1338 /* We don't do anything here: idle task is faking death itself. */ 1344 /* We don't do anything here: idle task is faking death itself. */
1339 wait_for_completion_timeout(&per_cpu(die_complete, cpu), HZ); 1345
1346 cpu_die_common(cpu);
1340 1347
1341 /* They ack this in play_dead() by setting CPU_DEAD */ 1348 /* They ack this in play_dead() by setting CPU_DEAD */
1342 if (per_cpu(cpu_state, cpu) == CPU_DEAD) { 1349 if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 0d0e922fafc1..de801f22128a 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -233,32 +233,40 @@ DO_ERROR(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op)
233DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun",coprocessor_segment_overrun) 233DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun",coprocessor_segment_overrun)
234DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS) 234DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS)
235DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present) 235DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present)
236#ifdef CONFIG_X86_32
237DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) 236DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment)
238#endif
239DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check) 237DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check)
240 238
241#ifdef CONFIG_X86_64 239#ifdef CONFIG_X86_64
242/* Runs on IST stack */ 240/* Runs on IST stack */
243dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code)
244{
245 enum ctx_state prev_state;
246
247 prev_state = exception_enter();
248 if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
249 X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) {
250 preempt_conditional_sti(regs);
251 do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL);
252 preempt_conditional_cli(regs);
253 }
254 exception_exit(prev_state);
255}
256
257dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) 241dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
258{ 242{
259 static const char str[] = "double fault"; 243 static const char str[] = "double fault";
260 struct task_struct *tsk = current; 244 struct task_struct *tsk = current;
261 245
246#ifdef CONFIG_X86_ESPFIX64
247 extern unsigned char native_irq_return_iret[];
248
249 /*
250 * If IRET takes a non-IST fault on the espfix64 stack, then we
251 * end up promoting it to a doublefault. In that case, modify
252 * the stack to make it look like we just entered the #GP
253 * handler from user space, similar to bad_iret.
254 */
255 if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY &&
256 regs->cs == __KERNEL_CS &&
257 regs->ip == (unsigned long)native_irq_return_iret)
258 {
259 struct pt_regs *normal_regs = task_pt_regs(current);
260
261 /* Fake a #GP(0) from userspace. */
262 memmove(&normal_regs->ip, (void *)regs->sp, 5*8);
263 normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */
264 regs->ip = (unsigned long)general_protection;
265 regs->sp = (unsigned long)&normal_regs->orig_ax;
266 return;
267 }
268#endif
269
262 exception_enter(); 270 exception_enter();
263 /* Return not checked because double check cannot be ignored */ 271 /* Return not checked because double check cannot be ignored */
264 notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); 272 notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
@@ -399,6 +407,35 @@ asmlinkage __visible struct pt_regs *sync_regs(struct pt_regs *eregs)
399 return regs; 407 return regs;
400} 408}
401NOKPROBE_SYMBOL(sync_regs); 409NOKPROBE_SYMBOL(sync_regs);
410
411struct bad_iret_stack {
412 void *error_entry_ret;
413 struct pt_regs regs;
414};
415
416asmlinkage __visible
417struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
418{
419 /*
420 * This is called from entry_64.S early in handling a fault
421 * caused by a bad iret to user mode. To handle the fault
422 * correctly, we want move our stack frame to task_pt_regs
423 * and we want to pretend that the exception came from the
424 * iret target.
425 */
426 struct bad_iret_stack *new_stack =
427 container_of(task_pt_regs(current),
428 struct bad_iret_stack, regs);
429
430 /* Copy the IRET target to the new stack. */
431 memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8);
432
433 /* Copy the remainder of the stack from the current stack. */
434 memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip));
435
436 BUG_ON(!user_mode_vm(&new_stack->regs));
437 return new_stack;
438}
402#endif 439#endif
403 440
404/* 441/*
@@ -778,7 +815,7 @@ void __init trap_init(void)
778 set_intr_gate(X86_TRAP_OLD_MF, coprocessor_segment_overrun); 815 set_intr_gate(X86_TRAP_OLD_MF, coprocessor_segment_overrun);
779 set_intr_gate(X86_TRAP_TS, invalid_TSS); 816 set_intr_gate(X86_TRAP_TS, invalid_TSS);
780 set_intr_gate(X86_TRAP_NP, segment_not_present); 817 set_intr_gate(X86_TRAP_NP, segment_not_present);
781 set_intr_gate_ist(X86_TRAP_SS, &stack_segment, STACKFAULT_STACK); 818 set_intr_gate(X86_TRAP_SS, stack_segment);
782 set_intr_gate(X86_TRAP_GP, general_protection); 819 set_intr_gate(X86_TRAP_GP, general_protection);
783 set_intr_gate(X86_TRAP_SPURIOUS, spurious_interrupt_bug); 820 set_intr_gate(X86_TRAP_SPURIOUS, spurious_interrupt_bug);
784 set_intr_gate(X86_TRAP_MF, coprocessor_error); 821 set_intr_gate(X86_TRAP_MF, coprocessor_error);