summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-07-20 14:24:49 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2019-07-20 14:24:49 -0400
commitc6dd78fcb8eefa15dd861889e0f59d301cb5230c (patch)
tree0ff947de7cd07590d2735f7add164f0bbda1c840
parent46f5c0cc3af0ecb76224a91d2997d74e35ff7821 (diff)
parent6879298bd0673840cadd1fb36d7225485504ceb4 (diff)
Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Thomas Gleixner: "A set of x86 specific fixes and updates: - The CR2 corruption fixes which store CR2 early in the entry code and hand the stored address to the fault handlers. - Revert a forgotten leftover of the dropped FSGSBASE series. - Plug a memory leak in the boot code. - Make the Hyper-V assist functionality robust by zeroing the shadow page. - Remove a useless check for dead processes with LDT - Update paravirt and VMware maintainers entries. - A few cleanup patches addressing various compiler warnings" * 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/entry/64: Prevent clobbering of saved CR2 value x86/hyper-v: Zero out the VP ASSIST PAGE on allocation x86, boot: Remove multiple copy of static function sanitize_boot_params() x86/boot/compressed/64: Remove unused variable x86/boot/efi: Remove unused variables x86/mm, tracing: Fix CR2 corruption x86/entry/64: Update comments and sanity tests for create_gap x86/entry/64: Simplify idtentry a little x86/entry/32: Simplify common_exception x86/paravirt: Make read_cr2() CALLEE_SAVE MAINTAINERS: Update PARAVIRT_OPS_INTERFACE and VMWARE_HYPERVISOR_INTERFACE x86/process: Delete useless check for dead process with LDT x86: math-emu: Hide clang warnings for 16-bit overflow x86/e820: Use proper booleans instead of 0/1 x86/apic: Silence -Wtype-limits compiler warnings x86/mm: Free sme_early_buffer after init x86/boot: Fix memory leak in default_get_smp_config() Revert "x86/ptrace: Prevent ptrace from clearing the FS/GS selector" and fix the test
-rw-r--r--MAINTAINERS6
-rw-r--r--arch/x86/boot/compressed/eboot.c10
-rw-r--r--arch/x86/boot/compressed/misc.c1
-rw-r--r--arch/x86/boot/compressed/misc.h1
-rw-r--r--arch/x86/boot/compressed/pgtable_64.c1
-rw-r--r--arch/x86/entry/calling.h6
-rw-r--r--arch/x86/entry/entry_32.S61
-rw-r--r--arch/x86/entry/entry_64.S155
-rw-r--r--arch/x86/hyperv/hv_init.c13
-rw-r--r--arch/x86/include/asm/apic.h2
-rw-r--r--arch/x86/include/asm/kvm_para.h2
-rw-r--r--arch/x86/include/asm/paravirt.h22
-rw-r--r--arch/x86/include/asm/paravirt_types.h2
-rw-r--r--arch/x86/include/asm/traps.h4
-rw-r--r--arch/x86/kernel/apic/apic.c2
-rw-r--r--arch/x86/kernel/asm-offsets.c1
-rw-r--r--arch/x86/kernel/e820.c4
-rw-r--r--arch/x86/kernel/head_64.S4
-rw-r--r--arch/x86/kernel/kvm.c8
-rw-r--r--arch/x86/kernel/mpparse.c10
-rw-r--r--arch/x86/kernel/paravirt.c2
-rw-r--r--arch/x86/kernel/process_64.c12
-rw-r--r--arch/x86/kernel/ptrace.c14
-rw-r--r--arch/x86/kernel/traps.c6
-rw-r--r--arch/x86/math-emu/fpu_emu.h2
-rw-r--r--arch/x86/math-emu/reg_constant.c2
-rw-r--r--arch/x86/mm/fault.c30
-rw-r--r--arch/x86/mm/mem_encrypt.c2
-rw-r--r--arch/x86/xen/enlighten_pv.c3
-rw-r--r--arch/x86/xen/mmu_pv.c12
-rw-r--r--arch/x86/xen/xen-asm.S16
-rw-r--r--arch/x86/xen/xen-ops.h3
-rw-r--r--tools/testing/selftests/x86/fsgsbase.c22
33 files changed, 226 insertions, 215 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 8f663ceabd6d..783569e3c4b4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12133,7 +12133,8 @@ F: Documentation/driver-api/parport*.rst
12133 12133
12134PARAVIRT_OPS INTERFACE 12134PARAVIRT_OPS INTERFACE
12135M: Juergen Gross <jgross@suse.com> 12135M: Juergen Gross <jgross@suse.com>
12136M: Alok Kataria <akataria@vmware.com> 12136M: Thomas Hellstrom <thellstrom@vmware.com>
12137M: "VMware, Inc." <pv-drivers@vmware.com>
12137L: virtualization@lists.linux-foundation.org 12138L: virtualization@lists.linux-foundation.org
12138S: Supported 12139S: Supported
12139F: Documentation/virtual/paravirt_ops.txt 12140F: Documentation/virtual/paravirt_ops.txt
@@ -17179,7 +17180,8 @@ S: Maintained
17179F: drivers/misc/vmw_balloon.c 17180F: drivers/misc/vmw_balloon.c
17180 17181
17181VMWARE HYPERVISOR INTERFACE 17182VMWARE HYPERVISOR INTERFACE
17182M: Alok Kataria <akataria@vmware.com> 17183M: Thomas Hellstrom <thellstrom@vmware.com>
17184M: "VMware, Inc." <pv-drivers@vmware.com>
17183L: virtualization@lists.linux-foundation.org 17185L: virtualization@lists.linux-foundation.org
17184S: Supported 17186S: Supported
17185F: arch/x86/kernel/cpu/vmware.c 17187F: arch/x86/kernel/cpu/vmware.c
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 220d1279d0e2..d6662fdef300 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -384,14 +384,11 @@ struct boot_params *make_boot_params(struct efi_config *c)
384 struct apm_bios_info *bi; 384 struct apm_bios_info *bi;
385 struct setup_header *hdr; 385 struct setup_header *hdr;
386 efi_loaded_image_t *image; 386 efi_loaded_image_t *image;
387 void *options, *handle; 387 void *handle;
388 efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID; 388 efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID;
389 int options_size = 0; 389 int options_size = 0;
390 efi_status_t status; 390 efi_status_t status;
391 char *cmdline_ptr; 391 char *cmdline_ptr;
392 u16 *s2;
393 u8 *s1;
394 int i;
395 unsigned long ramdisk_addr; 392 unsigned long ramdisk_addr;
396 unsigned long ramdisk_size; 393 unsigned long ramdisk_size;
397 394
@@ -494,8 +491,6 @@ static void add_e820ext(struct boot_params *params,
494 struct setup_data *e820ext, u32 nr_entries) 491 struct setup_data *e820ext, u32 nr_entries)
495{ 492{
496 struct setup_data *data; 493 struct setup_data *data;
497 efi_status_t status;
498 unsigned long size;
499 494
500 e820ext->type = SETUP_E820_EXT; 495 e820ext->type = SETUP_E820_EXT;
501 e820ext->len = nr_entries * sizeof(struct boot_e820_entry); 496 e820ext->len = nr_entries * sizeof(struct boot_e820_entry);
@@ -677,8 +672,6 @@ static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg,
677 void *priv) 672 void *priv)
678{ 673{
679 const char *signature; 674 const char *signature;
680 __u32 nr_desc;
681 efi_status_t status;
682 struct exit_boot_struct *p = priv; 675 struct exit_boot_struct *p = priv;
683 676
684 signature = efi_is_64bit() ? EFI64_LOADER_SIGNATURE 677 signature = efi_is_64bit() ? EFI64_LOADER_SIGNATURE
@@ -747,7 +740,6 @@ struct boot_params *
747efi_main(struct efi_config *c, struct boot_params *boot_params) 740efi_main(struct efi_config *c, struct boot_params *boot_params)
748{ 741{
749 struct desc_ptr *gdt = NULL; 742 struct desc_ptr *gdt = NULL;
750 efi_loaded_image_t *image;
751 struct setup_header *hdr = &boot_params->hdr; 743 struct setup_header *hdr = &boot_params->hdr;
752 efi_status_t status; 744 efi_status_t status;
753 struct desc_struct *desc; 745 struct desc_struct *desc;
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 24e65a0f756d..53ac0cb2396d 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -17,6 +17,7 @@
17#include "pgtable.h" 17#include "pgtable.h"
18#include "../string.h" 18#include "../string.h"
19#include "../voffset.h" 19#include "../voffset.h"
20#include <asm/bootparam_utils.h>
20 21
21/* 22/*
22 * WARNING!! 23 * WARNING!!
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index d2f184165934..c8181392f70d 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -23,7 +23,6 @@
23#include <asm/page.h> 23#include <asm/page.h>
24#include <asm/boot.h> 24#include <asm/boot.h>
25#include <asm/bootparam.h> 25#include <asm/bootparam.h>
26#include <asm/bootparam_utils.h>
27 26
28#define BOOT_CTYPE_H 27#define BOOT_CTYPE_H
29#include <linux/acpi.h> 28#include <linux/acpi.h>
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index f8debf7aeb4c..5f2d03067ae5 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -40,7 +40,6 @@ int cmdline_find_option_bool(const char *option);
40static unsigned long find_trampoline_placement(void) 40static unsigned long find_trampoline_placement(void)
41{ 41{
42 unsigned long bios_start = 0, ebda_start = 0; 42 unsigned long bios_start = 0, ebda_start = 0;
43 unsigned long trampoline_start;
44 struct boot_e820_entry *entry; 43 struct boot_e820_entry *entry;
45 char *signature; 44 char *signature;
46 int i; 45 int i;
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 9f1f9e3b8230..830bd984182b 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -343,3 +343,9 @@ For 32-bit we have the following conventions - kernel is built with
343.Lafter_call_\@: 343.Lafter_call_\@:
344#endif 344#endif
345.endm 345.endm
346
347#ifdef CONFIG_PARAVIRT_XXL
348#define GET_CR2_INTO(reg) GET_CR2_INTO_AX ; _ASM_MOV %_ASM_AX, reg
349#else
350#define GET_CR2_INTO(reg) _ASM_MOV %cr2, reg
351#endif
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 90b473297299..2bb986f305ac 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -294,9 +294,11 @@
294.Lfinished_frame_\@: 294.Lfinished_frame_\@:
295.endm 295.endm
296 296
297.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 297.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 skip_gs=0
298 cld 298 cld
299.if \skip_gs == 0
299 PUSH_GS 300 PUSH_GS
301.endif
300 FIXUP_FRAME 302 FIXUP_FRAME
301 pushl %fs 303 pushl %fs
302 pushl %es 304 pushl %es
@@ -313,13 +315,13 @@
313 movl %edx, %es 315 movl %edx, %es
314 movl $(__KERNEL_PERCPU), %edx 316 movl $(__KERNEL_PERCPU), %edx
315 movl %edx, %fs 317 movl %edx, %fs
318.if \skip_gs == 0
316 SET_KERNEL_GS %edx 319 SET_KERNEL_GS %edx
317 320.endif
318 /* Switch to kernel stack if necessary */ 321 /* Switch to kernel stack if necessary */
319.if \switch_stacks > 0 322.if \switch_stacks > 0
320 SWITCH_TO_KERNEL_STACK 323 SWITCH_TO_KERNEL_STACK
321.endif 324.endif
322
323.endm 325.endm
324 326
325.macro SAVE_ALL_NMI cr3_reg:req 327.macro SAVE_ALL_NMI cr3_reg:req
@@ -1441,39 +1443,46 @@ BUILD_INTERRUPT3(hv_stimer0_callback_vector, HYPERV_STIMER0_VECTOR,
1441 1443
1442ENTRY(page_fault) 1444ENTRY(page_fault)
1443 ASM_CLAC 1445 ASM_CLAC
1444 pushl $do_page_fault 1446 pushl $0; /* %gs's slot on the stack */
1445 ALIGN 1447
1446 jmp common_exception 1448 SAVE_ALL switch_stacks=1 skip_gs=1
1449
1450 ENCODE_FRAME_POINTER
1451 UNWIND_ESPFIX_STACK
1452
1453 /* fixup %gs */
1454 GS_TO_REG %ecx
1455 REG_TO_PTGS %ecx
1456 SET_KERNEL_GS %ecx
1457
1458 GET_CR2_INTO(%ecx) # might clobber %eax
1459
1460 /* fixup orig %eax */
1461 movl PT_ORIG_EAX(%esp), %edx # get the error code
1462 movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
1463
1464 TRACE_IRQS_OFF
1465 movl %esp, %eax # pt_regs pointer
1466 call do_page_fault
1467 jmp ret_from_exception
1447END(page_fault) 1468END(page_fault)
1448 1469
1449common_exception: 1470common_exception:
1450 /* the function address is in %gs's slot on the stack */ 1471 /* the function address is in %gs's slot on the stack */
1451 FIXUP_FRAME 1472 SAVE_ALL switch_stacks=1 skip_gs=1
1452 pushl %fs
1453 pushl %es
1454 pushl %ds
1455 pushl %eax
1456 movl $(__USER_DS), %eax
1457 movl %eax, %ds
1458 movl %eax, %es
1459 movl $(__KERNEL_PERCPU), %eax
1460 movl %eax, %fs
1461 pushl %ebp
1462 pushl %edi
1463 pushl %esi
1464 pushl %edx
1465 pushl %ecx
1466 pushl %ebx
1467 SWITCH_TO_KERNEL_STACK
1468 ENCODE_FRAME_POINTER 1473 ENCODE_FRAME_POINTER
1469 cld
1470 UNWIND_ESPFIX_STACK 1474 UNWIND_ESPFIX_STACK
1475
1476 /* fixup %gs */
1471 GS_TO_REG %ecx 1477 GS_TO_REG %ecx
1472 movl PT_GS(%esp), %edi # get the function address 1478 movl PT_GS(%esp), %edi # get the function address
1473 movl PT_ORIG_EAX(%esp), %edx # get the error code
1474 movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
1475 REG_TO_PTGS %ecx 1479 REG_TO_PTGS %ecx
1476 SET_KERNEL_GS %ecx 1480 SET_KERNEL_GS %ecx
1481
1482 /* fixup orig %eax */
1483 movl PT_ORIG_EAX(%esp), %edx # get the error code
1484 movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
1485
1477 TRACE_IRQS_OFF 1486 TRACE_IRQS_OFF
1478 movl %esp, %eax # pt_regs pointer 1487 movl %esp, %eax # pt_regs pointer
1479 CALL_NOSPEC %edi 1488 CALL_NOSPEC %edi
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 35a66fcfcb91..3f5a978a02a7 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -864,18 +864,84 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
864 */ 864 */
865#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + (x) * 8) 865#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + (x) * 8)
866 866
867.macro idtentry_part do_sym, has_error_code:req, read_cr2:req, paranoid:req, shift_ist=-1, ist_offset=0
868
869 .if \paranoid
870 call paranoid_entry
871 /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */
872 .else
873 call error_entry
874 .endif
875 UNWIND_HINT_REGS
876
877 .if \read_cr2
878 /*
879 * Store CR2 early so subsequent faults cannot clobber it. Use R12 as
880 * intermediate storage as RDX can be clobbered in enter_from_user_mode().
881 * GET_CR2_INTO can clobber RAX.
882 */
883 GET_CR2_INTO(%r12);
884 .endif
885
886 .if \shift_ist != -1
887 TRACE_IRQS_OFF_DEBUG /* reload IDT in case of recursion */
888 .else
889 TRACE_IRQS_OFF
890 .endif
891
892 .if \paranoid == 0
893 testb $3, CS(%rsp)
894 jz .Lfrom_kernel_no_context_tracking_\@
895 CALL_enter_from_user_mode
896.Lfrom_kernel_no_context_tracking_\@:
897 .endif
898
899 movq %rsp, %rdi /* pt_regs pointer */
900
901 .if \has_error_code
902 movq ORIG_RAX(%rsp), %rsi /* get error code */
903 movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
904 .else
905 xorl %esi, %esi /* no error code */
906 .endif
907
908 .if \shift_ist != -1
909 subq $\ist_offset, CPU_TSS_IST(\shift_ist)
910 .endif
911
912 .if \read_cr2
913 movq %r12, %rdx /* Move CR2 into 3rd argument */
914 .endif
915
916 call \do_sym
917
918 .if \shift_ist != -1
919 addq $\ist_offset, CPU_TSS_IST(\shift_ist)
920 .endif
921
922 .if \paranoid
923 /* this procedure expect "no swapgs" flag in ebx */
924 jmp paranoid_exit
925 .else
926 jmp error_exit
927 .endif
928
929.endm
930
867/** 931/**
868 * idtentry - Generate an IDT entry stub 932 * idtentry - Generate an IDT entry stub
869 * @sym: Name of the generated entry point 933 * @sym: Name of the generated entry point
870 * @do_sym: C function to be called 934 * @do_sym: C function to be called
871 * @has_error_code: True if this IDT vector has an error code on the stack 935 * @has_error_code: True if this IDT vector has an error code on the stack
872 * @paranoid: non-zero means that this vector may be invoked from 936 * @paranoid: non-zero means that this vector may be invoked from
873 * kernel mode with user GSBASE and/or user CR3. 937 * kernel mode with user GSBASE and/or user CR3.
874 * 2 is special -- see below. 938 * 2 is special -- see below.
875 * @shift_ist: Set to an IST index if entries from kernel mode should 939 * @shift_ist: Set to an IST index if entries from kernel mode should
876 * decrement the IST stack so that nested entries get a 940 * decrement the IST stack so that nested entries get a
877 * fresh stack. (This is for #DB, which has a nasty habit 941 * fresh stack. (This is for #DB, which has a nasty habit
878 * of recursing.) 942 * of recursing.)
943 * @create_gap: create a 6-word stack gap when coming from kernel mode.
944 * @read_cr2: load CR2 into the 3rd argument; done before calling any C code
879 * 945 *
880 * idtentry generates an IDT stub that sets up a usable kernel context, 946 * idtentry generates an IDT stub that sets up a usable kernel context,
881 * creates struct pt_regs, and calls @do_sym. The stub has the following 947 * creates struct pt_regs, and calls @do_sym. The stub has the following
@@ -900,15 +966,19 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
900 * @paranoid == 2 is special: the stub will never switch stacks. This is for 966 * @paranoid == 2 is special: the stub will never switch stacks. This is for
901 * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS. 967 * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS.
902 */ 968 */
903.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0 969.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0 read_cr2=0
904ENTRY(\sym) 970ENTRY(\sym)
905 UNWIND_HINT_IRET_REGS offset=\has_error_code*8 971 UNWIND_HINT_IRET_REGS offset=\has_error_code*8
906 972
907 /* Sanity check */ 973 /* Sanity check */
908 .if \shift_ist != -1 && \paranoid == 0 974 .if \shift_ist != -1 && \paranoid != 1
909 .error "using shift_ist requires paranoid=1" 975 .error "using shift_ist requires paranoid=1"
910 .endif 976 .endif
911 977
978 .if \create_gap && \paranoid
979 .error "using create_gap requires paranoid=0"
980 .endif
981
912 ASM_CLAC 982 ASM_CLAC
913 983
914 .if \has_error_code == 0 984 .if \has_error_code == 0
@@ -934,47 +1004,7 @@ ENTRY(\sym)
934.Lfrom_usermode_no_gap_\@: 1004.Lfrom_usermode_no_gap_\@:
935 .endif 1005 .endif
936 1006
937 .if \paranoid 1007 idtentry_part \do_sym, \has_error_code, \read_cr2, \paranoid, \shift_ist, \ist_offset
938 call paranoid_entry
939 .else
940 call error_entry
941 .endif
942 UNWIND_HINT_REGS
943 /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */
944
945 .if \paranoid
946 .if \shift_ist != -1
947 TRACE_IRQS_OFF_DEBUG /* reload IDT in case of recursion */
948 .else
949 TRACE_IRQS_OFF
950 .endif
951 .endif
952
953 movq %rsp, %rdi /* pt_regs pointer */
954
955 .if \has_error_code
956 movq ORIG_RAX(%rsp), %rsi /* get error code */
957 movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
958 .else
959 xorl %esi, %esi /* no error code */
960 .endif
961
962 .if \shift_ist != -1
963 subq $\ist_offset, CPU_TSS_IST(\shift_ist)
964 .endif
965
966 call \do_sym
967
968 .if \shift_ist != -1
969 addq $\ist_offset, CPU_TSS_IST(\shift_ist)
970 .endif
971
972 /* these procedures expect "no swapgs" flag in ebx */
973 .if \paranoid
974 jmp paranoid_exit
975 .else
976 jmp error_exit
977 .endif
978 1008
979 .if \paranoid == 1 1009 .if \paranoid == 1
980 /* 1010 /*
@@ -983,21 +1013,9 @@ ENTRY(\sym)
983 * run in real process context if user_mode(regs). 1013 * run in real process context if user_mode(regs).
984 */ 1014 */
985.Lfrom_usermode_switch_stack_\@: 1015.Lfrom_usermode_switch_stack_\@:
986 call error_entry 1016 idtentry_part \do_sym, \has_error_code, \read_cr2, paranoid=0
987
988 movq %rsp, %rdi /* pt_regs pointer */
989
990 .if \has_error_code
991 movq ORIG_RAX(%rsp), %rsi /* get error code */
992 movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
993 .else
994 xorl %esi, %esi /* no error code */
995 .endif 1017 .endif
996 1018
997 call \do_sym
998
999 jmp error_exit
1000 .endif
1001_ASM_NOKPROBE(\sym) 1019_ASM_NOKPROBE(\sym)
1002END(\sym) 1020END(\sym)
1003.endm 1021.endm
@@ -1007,7 +1025,7 @@ idtentry overflow do_overflow has_error_code=0
1007idtentry bounds do_bounds has_error_code=0 1025idtentry bounds do_bounds has_error_code=0
1008idtentry invalid_op do_invalid_op has_error_code=0 1026idtentry invalid_op do_invalid_op has_error_code=0
1009idtentry device_not_available do_device_not_available has_error_code=0 1027idtentry device_not_available do_device_not_available has_error_code=0
1010idtentry double_fault do_double_fault has_error_code=1 paranoid=2 1028idtentry double_fault do_double_fault has_error_code=1 paranoid=2 read_cr2=1
1011idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 1029idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0
1012idtentry invalid_TSS do_invalid_TSS has_error_code=1 1030idtentry invalid_TSS do_invalid_TSS has_error_code=1
1013idtentry segment_not_present do_segment_not_present has_error_code=1 1031idtentry segment_not_present do_segment_not_present has_error_code=1
@@ -1179,10 +1197,10 @@ idtentry xendebug do_debug has_error_code=0
1179#endif 1197#endif
1180 1198
1181idtentry general_protection do_general_protection has_error_code=1 1199idtentry general_protection do_general_protection has_error_code=1
1182idtentry page_fault do_page_fault has_error_code=1 1200idtentry page_fault do_page_fault has_error_code=1 read_cr2=1
1183 1201
1184#ifdef CONFIG_KVM_GUEST 1202#ifdef CONFIG_KVM_GUEST
1185idtentry async_page_fault do_async_page_fault has_error_code=1 1203idtentry async_page_fault do_async_page_fault has_error_code=1 read_cr2=1
1186#endif 1204#endif
1187 1205
1188#ifdef CONFIG_X86_MCE 1206#ifdef CONFIG_X86_MCE
@@ -1281,18 +1299,9 @@ ENTRY(error_entry)
1281 movq %rax, %rsp /* switch stack */ 1299 movq %rax, %rsp /* switch stack */
1282 ENCODE_FRAME_POINTER 1300 ENCODE_FRAME_POINTER
1283 pushq %r12 1301 pushq %r12
1284
1285 /*
1286 * We need to tell lockdep that IRQs are off. We can't do this until
1287 * we fix gsbase, and we should do it before enter_from_user_mode
1288 * (which can take locks).
1289 */
1290 TRACE_IRQS_OFF
1291 CALL_enter_from_user_mode
1292 ret 1302 ret
1293 1303
1294.Lerror_entry_done: 1304.Lerror_entry_done:
1295 TRACE_IRQS_OFF
1296 ret 1305 ret
1297 1306
1298 /* 1307 /*
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 0e033ef11a9f..0d258688c8cf 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -60,8 +60,17 @@ static int hv_cpu_init(unsigned int cpu)
60 if (!hv_vp_assist_page) 60 if (!hv_vp_assist_page)
61 return 0; 61 return 0;
62 62
63 if (!*hvp) 63 /*
64 *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL); 64 * The VP ASSIST PAGE is an "overlay" page (see Hyper-V TLFS's Section
65 * 5.2.1 "GPA Overlay Pages"). Here it must be zeroed out to make sure
66 * we always write the EOI MSR in hv_apic_eoi_write() *after* the
67 * EOI optimization is disabled in hv_cpu_die(), otherwise a CPU may
68 * not be stopped in the case of CPU offlining and the VM will hang.
69 */
70 if (!*hvp) {
71 *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO,
72 PAGE_KERNEL);
73 }
65 74
66 if (*hvp) { 75 if (*hvp) {
67 u64 val; 76 u64 val;
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 050e5f9ebf81..e647aa095867 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -49,7 +49,7 @@ static inline void generic_apic_probe(void)
49 49
50#ifdef CONFIG_X86_LOCAL_APIC 50#ifdef CONFIG_X86_LOCAL_APIC
51 51
52extern unsigned int apic_verbosity; 52extern int apic_verbosity;
53extern int local_apic_timer_c2_ok; 53extern int local_apic_timer_c2_ok;
54 54
55extern int disable_apic; 55extern int disable_apic;
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 5ed3cf1c3934..9b4df6eaa11a 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -92,7 +92,7 @@ void kvm_async_pf_task_wait(u32 token, int interrupt_kernel);
92void kvm_async_pf_task_wake(u32 token); 92void kvm_async_pf_task_wake(u32 token);
93u32 kvm_read_and_reset_pf_reason(void); 93u32 kvm_read_and_reset_pf_reason(void);
94extern void kvm_disable_steal_time(void); 94extern void kvm_disable_steal_time(void);
95void do_async_page_fault(struct pt_regs *regs, unsigned long error_code); 95void do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address);
96 96
97#ifdef CONFIG_PARAVIRT_SPINLOCKS 97#ifdef CONFIG_PARAVIRT_SPINLOCKS
98void __init kvm_spinlock_init(void); 98void __init kvm_spinlock_init(void);
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index d6f5ae2c79ab..dce26f1d13e1 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -116,7 +116,7 @@ static inline void write_cr0(unsigned long x)
116 116
117static inline unsigned long read_cr2(void) 117static inline unsigned long read_cr2(void)
118{ 118{
119 return PVOP_CALL0(unsigned long, mmu.read_cr2); 119 return PVOP_CALLEE0(unsigned long, mmu.read_cr2);
120} 120}
121 121
122static inline void write_cr2(unsigned long x) 122static inline void write_cr2(unsigned long x)
@@ -910,13 +910,7 @@ extern void default_banner(void);
910 ANNOTATE_RETPOLINE_SAFE; \ 910 ANNOTATE_RETPOLINE_SAFE; \
911 call PARA_INDIRECT(pv_ops+PV_CPU_swapgs); \ 911 call PARA_INDIRECT(pv_ops+PV_CPU_swapgs); \
912 ) 912 )
913#endif
914
915#define GET_CR2_INTO_RAX \
916 ANNOTATE_RETPOLINE_SAFE; \
917 call PARA_INDIRECT(pv_ops+PV_MMU_read_cr2);
918 913
919#ifdef CONFIG_PARAVIRT_XXL
920#define USERGS_SYSRET64 \ 914#define USERGS_SYSRET64 \
921 PARA_SITE(PARA_PATCH(PV_CPU_usergs_sysret64), \ 915 PARA_SITE(PARA_PATCH(PV_CPU_usergs_sysret64), \
922 ANNOTATE_RETPOLINE_SAFE; \ 916 ANNOTATE_RETPOLINE_SAFE; \
@@ -930,9 +924,19 @@ extern void default_banner(void);
930 call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl); \ 924 call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl); \
931 PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) 925 PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
932#endif 926#endif
933#endif 927#endif /* CONFIG_PARAVIRT_XXL */
928#endif /* CONFIG_X86_64 */
929
930#ifdef CONFIG_PARAVIRT_XXL
931
932#define GET_CR2_INTO_AX \
933 PARA_SITE(PARA_PATCH(PV_MMU_read_cr2), \
934 ANNOTATE_RETPOLINE_SAFE; \
935 call PARA_INDIRECT(pv_ops+PV_MMU_read_cr2); \
936 )
937
938#endif /* CONFIG_PARAVIRT_XXL */
934 939
935#endif /* CONFIG_X86_32 */
936 940
937#endif /* __ASSEMBLY__ */ 941#endif /* __ASSEMBLY__ */
938#else /* CONFIG_PARAVIRT */ 942#else /* CONFIG_PARAVIRT */
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 946f8f1f1efc..639b2df445ee 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -220,7 +220,7 @@ struct pv_mmu_ops {
220 void (*exit_mmap)(struct mm_struct *mm); 220 void (*exit_mmap)(struct mm_struct *mm);
221 221
222#ifdef CONFIG_PARAVIRT_XXL 222#ifdef CONFIG_PARAVIRT_XXL
223 unsigned long (*read_cr2)(void); 223 struct paravirt_callee_save read_cr2;
224 void (*write_cr2)(unsigned long); 224 void (*write_cr2)(unsigned long);
225 225
226 unsigned long (*read_cr3)(void); 226 unsigned long (*read_cr3)(void);
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index f2bd284abc16..b25e633033c3 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -74,14 +74,14 @@ dotraplinkage void do_invalid_TSS(struct pt_regs *regs, long error_code);
74dotraplinkage void do_segment_not_present(struct pt_regs *regs, long error_code); 74dotraplinkage void do_segment_not_present(struct pt_regs *regs, long error_code);
75dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code); 75dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code);
76#ifdef CONFIG_X86_64 76#ifdef CONFIG_X86_64
77dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code); 77dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long address);
78asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs); 78asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs);
79asmlinkage __visible notrace 79asmlinkage __visible notrace
80struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s); 80struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s);
81void __init trap_init(void); 81void __init trap_init(void);
82#endif 82#endif
83dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code); 83dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code);
84dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code); 84dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address);
85dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code); 85dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code);
86dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code); 86dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code);
87dotraplinkage void do_alignment_check(struct pt_regs *regs, long error_code); 87dotraplinkage void do_alignment_check(struct pt_regs *regs, long error_code);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 1bd91cb7b320..f5291362da1a 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -183,7 +183,7 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
183/* 183/*
184 * Debug level, exported for io_apic.c 184 * Debug level, exported for io_apic.c
185 */ 185 */
186unsigned int apic_verbosity; 186int apic_verbosity;
187 187
188int pic_mode; 188int pic_mode;
189 189
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index da64452584b0..5c7ee3df4d0b 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -76,6 +76,7 @@ static void __used common(void)
76 BLANK(); 76 BLANK();
77 OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask); 77 OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
78 OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending); 78 OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
79 OFFSET(XEN_vcpu_info_arch_cr2, vcpu_info, arch.cr2);
79#endif 80#endif
80 81
81 BLANK(); 82 BLANK();
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index e69408bf664b..7da2bcd2b8eb 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -86,9 +86,9 @@ static bool _e820__mapped_any(struct e820_table *table,
86 continue; 86 continue;
87 if (entry->addr >= end || entry->addr + entry->size <= start) 87 if (entry->addr >= end || entry->addr + entry->size <= start)
88 continue; 88 continue;
89 return 1; 89 return true;
90 } 90 }
91 return 0; 91 return false;
92} 92}
93 93
94bool e820__mapped_raw_any(u64 start, u64 end, enum e820_type type) 94bool e820__mapped_raw_any(u64 start, u64 end, enum e820_type type)
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 66b4a7757397..a6342c899be5 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -29,9 +29,7 @@
29#ifdef CONFIG_PARAVIRT_XXL 29#ifdef CONFIG_PARAVIRT_XXL
30#include <asm/asm-offsets.h> 30#include <asm/asm-offsets.h>
31#include <asm/paravirt.h> 31#include <asm/paravirt.h>
32#define GET_CR2_INTO(reg) GET_CR2_INTO_RAX ; movq %rax, reg
33#else 32#else
34#define GET_CR2_INTO(reg) movq %cr2, reg
35#define INTERRUPT_RETURN iretq 33#define INTERRUPT_RETURN iretq
36#endif 34#endif
37 35
@@ -323,7 +321,7 @@ early_idt_handler_common:
323 321
324 cmpq $14,%rsi /* Page fault? */ 322 cmpq $14,%rsi /* Page fault? */
325 jnz 10f 323 jnz 10f
326 GET_CR2_INTO(%rdi) /* Can clobber any volatile register if pv */ 324 GET_CR2_INTO(%rdi) /* can clobber %rax if pv */
327 call early_make_pgtable 325 call early_make_pgtable
328 andl %eax,%eax 326 andl %eax,%eax
329 jz 20f /* All good */ 327 jz 20f /* All good */
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 6661bd2f08a6..b7f34fe2171e 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -242,23 +242,23 @@ EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason);
242NOKPROBE_SYMBOL(kvm_read_and_reset_pf_reason); 242NOKPROBE_SYMBOL(kvm_read_and_reset_pf_reason);
243 243
244dotraplinkage void 244dotraplinkage void
245do_async_page_fault(struct pt_regs *regs, unsigned long error_code) 245do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address)
246{ 246{
247 enum ctx_state prev_state; 247 enum ctx_state prev_state;
248 248
249 switch (kvm_read_and_reset_pf_reason()) { 249 switch (kvm_read_and_reset_pf_reason()) {
250 default: 250 default:
251 do_page_fault(regs, error_code); 251 do_page_fault(regs, error_code, address);
252 break; 252 break;
253 case KVM_PV_REASON_PAGE_NOT_PRESENT: 253 case KVM_PV_REASON_PAGE_NOT_PRESENT:
254 /* page is swapped out by the host. */ 254 /* page is swapped out by the host. */
255 prev_state = exception_enter(); 255 prev_state = exception_enter();
256 kvm_async_pf_task_wait((u32)read_cr2(), !user_mode(regs)); 256 kvm_async_pf_task_wait((u32)address, !user_mode(regs));
257 exception_exit(prev_state); 257 exception_exit(prev_state);
258 break; 258 break;
259 case KVM_PV_REASON_PAGE_READY: 259 case KVM_PV_REASON_PAGE_READY:
260 rcu_irq_enter(); 260 rcu_irq_enter();
261 kvm_async_pf_task_wake((u32)read_cr2()); 261 kvm_async_pf_task_wake((u32)address);
262 rcu_irq_exit(); 262 rcu_irq_exit();
263 break; 263 break;
264 } 264 }
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 1bfe5c6e6cfe..afac7ccce72f 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -546,17 +546,15 @@ void __init default_get_smp_config(unsigned int early)
546 * local APIC has default address 546 * local APIC has default address
547 */ 547 */
548 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; 548 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
549 return; 549 goto out;
550 } 550 }
551 551
552 pr_info("Default MP configuration #%d\n", mpf->feature1); 552 pr_info("Default MP configuration #%d\n", mpf->feature1);
553 construct_default_ISA_mptable(mpf->feature1); 553 construct_default_ISA_mptable(mpf->feature1);
554 554
555 } else if (mpf->physptr) { 555 } else if (mpf->physptr) {
556 if (check_physptr(mpf, early)) { 556 if (check_physptr(mpf, early))
557 early_memunmap(mpf, sizeof(*mpf)); 557 goto out;
558 return;
559 }
560 } else 558 } else
561 BUG(); 559 BUG();
562 560
@@ -565,7 +563,7 @@ void __init default_get_smp_config(unsigned int early)
565 /* 563 /*
566 * Only use the first configuration found. 564 * Only use the first configuration found.
567 */ 565 */
568 566out:
569 early_memunmap(mpf, sizeof(*mpf)); 567 early_memunmap(mpf, sizeof(*mpf));
570} 568}
571 569
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 98039d7fb998..0aa6256eedd8 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -370,7 +370,7 @@ struct paravirt_patch_template pv_ops = {
370 .mmu.exit_mmap = paravirt_nop, 370 .mmu.exit_mmap = paravirt_nop,
371 371
372#ifdef CONFIG_PARAVIRT_XXL 372#ifdef CONFIG_PARAVIRT_XXL
373 .mmu.read_cr2 = native_read_cr2, 373 .mmu.read_cr2 = __PV_IS_CALLEE_SAVE(native_read_cr2),
374 .mmu.write_cr2 = native_write_cr2, 374 .mmu.write_cr2 = native_write_cr2,
375 .mmu.read_cr3 = __native_read_cr3, 375 .mmu.read_cr3 = __native_read_cr3,
376 .mmu.write_cr3 = native_write_cr3, 376 .mmu.write_cr3 = native_write_cr3,
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 250e4c4ac6d9..af64519b2695 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -143,17 +143,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
143 143
144void release_thread(struct task_struct *dead_task) 144void release_thread(struct task_struct *dead_task)
145{ 145{
146 if (dead_task->mm) { 146 WARN_ON(dead_task->mm);
147#ifdef CONFIG_MODIFY_LDT_SYSCALL
148 if (dead_task->mm->context.ldt) {
149 pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
150 dead_task->comm,
151 dead_task->mm->context.ldt->entries,
152 dead_task->mm->context.ldt->nr_entries);
153 BUG();
154 }
155#endif
156 }
157} 147}
158 148
159enum which_selector { 149enum which_selector {
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 71691a8310e7..0fdbe89d0754 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -369,12 +369,22 @@ static int putreg(struct task_struct *child,
369 case offsetof(struct user_regs_struct,fs_base): 369 case offsetof(struct user_regs_struct,fs_base):
370 if (value >= TASK_SIZE_MAX) 370 if (value >= TASK_SIZE_MAX)
371 return -EIO; 371 return -EIO;
372 x86_fsbase_write_task(child, value); 372 /*
373 * When changing the FS base, use do_arch_prctl_64()
374 * to set the index to zero and to set the base
375 * as requested.
376 */
377 if (child->thread.fsbase != value)
378 return do_arch_prctl_64(child, ARCH_SET_FS, value);
373 return 0; 379 return 0;
374 case offsetof(struct user_regs_struct,gs_base): 380 case offsetof(struct user_regs_struct,gs_base):
381 /*
382 * Exactly the same here as the %fs handling above.
383 */
375 if (value >= TASK_SIZE_MAX) 384 if (value >= TASK_SIZE_MAX)
376 return -EIO; 385 return -EIO;
377 x86_gsbase_write_task(child, value); 386 if (child->thread.gsbase != value)
387 return do_arch_prctl_64(child, ARCH_SET_GS, value);
378 return 0; 388 return 0;
379#endif 389#endif
380 } 390 }
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 87095a477154..4bb0f8447112 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -313,13 +313,10 @@ __visible void __noreturn handle_stack_overflow(const char *message,
313 313
314#ifdef CONFIG_X86_64 314#ifdef CONFIG_X86_64
315/* Runs on IST stack */ 315/* Runs on IST stack */
316dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) 316dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2)
317{ 317{
318 static const char str[] = "double fault"; 318 static const char str[] = "double fault";
319 struct task_struct *tsk = current; 319 struct task_struct *tsk = current;
320#ifdef CONFIG_VMAP_STACK
321 unsigned long cr2;
322#endif
323 320
324#ifdef CONFIG_X86_ESPFIX64 321#ifdef CONFIG_X86_ESPFIX64
325 extern unsigned char native_irq_return_iret[]; 322 extern unsigned char native_irq_return_iret[];
@@ -415,7 +412,6 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
415 * stack even if the actual trigger for the double fault was 412 * stack even if the actual trigger for the double fault was
416 * something else. 413 * something else.
417 */ 414 */
418 cr2 = read_cr2();
419 if ((unsigned long)task_stack_page(tsk) - 1 - cr2 < PAGE_SIZE) 415 if ((unsigned long)task_stack_page(tsk) - 1 - cr2 < PAGE_SIZE)
420 handle_stack_overflow("kernel stack overflow (double-fault)", regs, cr2); 416 handle_stack_overflow("kernel stack overflow (double-fault)", regs, cr2);
421#endif 417#endif
diff --git a/arch/x86/math-emu/fpu_emu.h b/arch/x86/math-emu/fpu_emu.h
index a5a41ec58072..0c122226ca56 100644
--- a/arch/x86/math-emu/fpu_emu.h
+++ b/arch/x86/math-emu/fpu_emu.h
@@ -177,7 +177,7 @@ static inline void reg_copy(FPU_REG const *x, FPU_REG *y)
177#define setexponentpos(x,y) { (*(short *)&((x)->exp)) = \ 177#define setexponentpos(x,y) { (*(short *)&((x)->exp)) = \
178 ((y) + EXTENDED_Ebias) & 0x7fff; } 178 ((y) + EXTENDED_Ebias) & 0x7fff; }
179#define exponent16(x) (*(short *)&((x)->exp)) 179#define exponent16(x) (*(short *)&((x)->exp))
180#define setexponent16(x,y) { (*(short *)&((x)->exp)) = (y); } 180#define setexponent16(x,y) { (*(short *)&((x)->exp)) = (u16)(y); }
181#define addexponent(x,y) { (*(short *)&((x)->exp)) += (y); } 181#define addexponent(x,y) { (*(short *)&((x)->exp)) += (y); }
182#define stdexp(x) { (*(short *)&((x)->exp)) += EXTENDED_Ebias; } 182#define stdexp(x) { (*(short *)&((x)->exp)) += EXTENDED_Ebias; }
183 183
diff --git a/arch/x86/math-emu/reg_constant.c b/arch/x86/math-emu/reg_constant.c
index 8dc9095bab22..742619e94bdf 100644
--- a/arch/x86/math-emu/reg_constant.c
+++ b/arch/x86/math-emu/reg_constant.c
@@ -18,7 +18,7 @@
18#include "control_w.h" 18#include "control_w.h"
19 19
20#define MAKE_REG(s, e, l, h) { l, h, \ 20#define MAKE_REG(s, e, l, h) { l, h, \
21 ((EXTENDED_Ebias+(e)) | ((SIGN_##s != 0)*0x8000)) } 21 (u16)((EXTENDED_Ebias+(e)) | ((SIGN_##s != 0)*0x8000)) }
22 22
23FPU_REG const CONST_1 = MAKE_REG(POS, 0, 0x00000000, 0x80000000); 23FPU_REG const CONST_1 = MAKE_REG(POS, 0, 0x00000000, 0x80000000);
24#if 0 24#if 0
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index d1634c59ed56..6c46095cd0d9 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1490,9 +1490,8 @@ good_area:
1490NOKPROBE_SYMBOL(do_user_addr_fault); 1490NOKPROBE_SYMBOL(do_user_addr_fault);
1491 1491
1492/* 1492/*
1493 * This routine handles page faults. It determines the address, 1493 * Explicitly marked noinline such that the function tracer sees this as the
1494 * and the problem, and then passes it off to one of the appropriate 1494 * page_fault entry point.
1495 * routines.
1496 */ 1495 */
1497static noinline void 1496static noinline void
1498__do_page_fault(struct pt_regs *regs, unsigned long hw_error_code, 1497__do_page_fault(struct pt_regs *regs, unsigned long hw_error_code,
@@ -1511,33 +1510,26 @@ __do_page_fault(struct pt_regs *regs, unsigned long hw_error_code,
1511} 1510}
1512NOKPROBE_SYMBOL(__do_page_fault); 1511NOKPROBE_SYMBOL(__do_page_fault);
1513 1512
1514static nokprobe_inline void 1513static __always_inline void
1515trace_page_fault_entries(unsigned long address, struct pt_regs *regs, 1514trace_page_fault_entries(struct pt_regs *regs, unsigned long error_code,
1516 unsigned long error_code) 1515 unsigned long address)
1517{ 1516{
1517 if (!trace_pagefault_enabled())
1518 return;
1519
1518 if (user_mode(regs)) 1520 if (user_mode(regs))
1519 trace_page_fault_user(address, regs, error_code); 1521 trace_page_fault_user(address, regs, error_code);
1520 else 1522 else
1521 trace_page_fault_kernel(address, regs, error_code); 1523 trace_page_fault_kernel(address, regs, error_code);
1522} 1524}
1523 1525
1524/* 1526dotraplinkage void
1525 * We must have this function blacklisted from kprobes, tagged with notrace 1527do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address)
1526 * and call read_cr2() before calling anything else. To avoid calling any
1527 * kind of tracing machinery before we've observed the CR2 value.
1528 *
1529 * exception_{enter,exit}() contains all sorts of tracepoints.
1530 */
1531dotraplinkage void notrace
1532do_page_fault(struct pt_regs *regs, unsigned long error_code)
1533{ 1528{
1534 unsigned long address = read_cr2(); /* Get the faulting address */
1535 enum ctx_state prev_state; 1529 enum ctx_state prev_state;
1536 1530
1537 prev_state = exception_enter(); 1531 prev_state = exception_enter();
1538 if (trace_pagefault_enabled()) 1532 trace_page_fault_entries(regs, error_code, address);
1539 trace_page_fault_entries(address, regs, error_code);
1540
1541 __do_page_fault(regs, error_code, address); 1533 __do_page_fault(regs, error_code, address);
1542 exception_exit(prev_state); 1534 exception_exit(prev_state);
1543} 1535}
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index e0df96fdfe46..e94e0a62ba92 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -41,7 +41,7 @@ EXPORT_SYMBOL_GPL(sev_enable_key);
41bool sev_enabled __section(.data); 41bool sev_enabled __section(.data);
42 42
43/* Buffer used for early in-place encryption by BSP, no locking needed */ 43/* Buffer used for early in-place encryption by BSP, no locking needed */
44static char sme_early_buffer[PAGE_SIZE] __aligned(PAGE_SIZE); 44static char sme_early_buffer[PAGE_SIZE] __initdata __aligned(PAGE_SIZE);
45 45
46/* 46/*
47 * This routine does not change the underlying encryption setting of the 47 * This routine does not change the underlying encryption setting of the
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index bed6bb93c965..7ceb32821093 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -998,7 +998,8 @@ void __init xen_setup_vcpu_info_placement(void)
998 __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); 998 __PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
999 pv_ops.irq.irq_enable = 999 pv_ops.irq.irq_enable =
1000 __PV_IS_CALLEE_SAVE(xen_irq_enable_direct); 1000 __PV_IS_CALLEE_SAVE(xen_irq_enable_direct);
1001 pv_ops.mmu.read_cr2 = xen_read_cr2_direct; 1001 pv_ops.mmu.read_cr2 =
1002 __PV_IS_CALLEE_SAVE(xen_read_cr2_direct);
1002 } 1003 }
1003} 1004}
1004 1005
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index f6e5eeecfc69..26e8b326966d 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1307,16 +1307,6 @@ static void xen_write_cr2(unsigned long cr2)
1307 this_cpu_read(xen_vcpu)->arch.cr2 = cr2; 1307 this_cpu_read(xen_vcpu)->arch.cr2 = cr2;
1308} 1308}
1309 1309
1310static unsigned long xen_read_cr2(void)
1311{
1312 return this_cpu_read(xen_vcpu)->arch.cr2;
1313}
1314
1315unsigned long xen_read_cr2_direct(void)
1316{
1317 return this_cpu_read(xen_vcpu_info.arch.cr2);
1318}
1319
1320static noinline void xen_flush_tlb(void) 1310static noinline void xen_flush_tlb(void)
1321{ 1311{
1322 struct mmuext_op *op; 1312 struct mmuext_op *op;
@@ -2397,7 +2387,7 @@ static void xen_leave_lazy_mmu(void)
2397} 2387}
2398 2388
2399static const struct pv_mmu_ops xen_mmu_ops __initconst = { 2389static const struct pv_mmu_ops xen_mmu_ops __initconst = {
2400 .read_cr2 = xen_read_cr2, 2390 .read_cr2 = __PV_IS_CALLEE_SAVE(xen_read_cr2),
2401 .write_cr2 = xen_write_cr2, 2391 .write_cr2 = xen_write_cr2,
2402 2392
2403 .read_cr3 = xen_read_cr3, 2393 .read_cr3 = xen_read_cr3,
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index 8019edd0125c..be104eef80be 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -10,6 +10,7 @@
10#include <asm/percpu.h> 10#include <asm/percpu.h>
11#include <asm/processor-flags.h> 11#include <asm/processor-flags.h>
12#include <asm/frame.h> 12#include <asm/frame.h>
13#include <asm/asm.h>
13 14
14#include <linux/linkage.h> 15#include <linux/linkage.h>
15 16
@@ -135,3 +136,18 @@ ENTRY(check_events)
135 FRAME_END 136 FRAME_END
136 ret 137 ret
137ENDPROC(check_events) 138ENDPROC(check_events)
139
140ENTRY(xen_read_cr2)
141 FRAME_BEGIN
142 _ASM_MOV PER_CPU_VAR(xen_vcpu), %_ASM_AX
143 _ASM_MOV XEN_vcpu_info_arch_cr2(%_ASM_AX), %_ASM_AX
144 FRAME_END
145 ret
146 ENDPROC(xen_read_cr2);
147
148ENTRY(xen_read_cr2_direct)
149 FRAME_BEGIN
150 _ASM_MOV PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_arch_cr2, %_ASM_AX
151 FRAME_END
152 ret
153 ENDPROC(xen_read_cr2_direct);
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 2f111f47ba98..45a441c33d6d 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -134,6 +134,9 @@ __visible void xen_irq_disable_direct(void);
134__visible unsigned long xen_save_fl_direct(void); 134__visible unsigned long xen_save_fl_direct(void);
135__visible void xen_restore_fl_direct(unsigned long); 135__visible void xen_restore_fl_direct(unsigned long);
136 136
137__visible unsigned long xen_read_cr2(void);
138__visible unsigned long xen_read_cr2_direct(void);
139
137/* These are not functions, and cannot be called normally */ 140/* These are not functions, and cannot be called normally */
138__visible void xen_iret(void); 141__visible void xen_iret(void);
139__visible void xen_sysret32(void); 142__visible void xen_sysret32(void);
diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c
index 5ab4c60c100e..15a329da59fa 100644
--- a/tools/testing/selftests/x86/fsgsbase.c
+++ b/tools/testing/selftests/x86/fsgsbase.c
@@ -489,25 +489,11 @@ static void test_ptrace_write_gsbase(void)
489 * selector value is changed or not by the GSBASE write in 489 * selector value is changed or not by the GSBASE write in
490 * a ptracer. 490 * a ptracer.
491 */ 491 */
492 if (gs != *shared_scratch) { 492 if (gs == 0 && base == 0xFF) {
493 nerrs++; 493 printf("[OK]\tGS was reset as expected\n");
494 printf("[FAIL]\tGS changed to %lx\n", gs);
495
496 /*
497 * On older kernels, poking a nonzero value into the
498 * base would zero the selector. On newer kernels,
499 * this behavior has changed -- poking the base
500 * changes only the base and, if FSGSBASE is not
501 * available, this may have no effect.
502 */
503 if (gs == 0)
504 printf("\tNote: this is expected behavior on older kernels.\n");
505 } else if (have_fsgsbase && (base != 0xFF)) {
506 nerrs++;
507 printf("[FAIL]\tGSBASE changed to %lx\n", base);
508 } else { 494 } else {
509 printf("[OK]\tGS remained 0x%hx%s", *shared_scratch, have_fsgsbase ? " and GSBASE changed to 0xFF" : ""); 495 nerrs++;
510 printf("\n"); 496 printf("[FAIL]\tGS=0x%lx, GSBASE=0x%lx (should be 0, 0xFF)\n", gs, base);
511 } 497 }
512 } 498 }
513 499