diff options
Diffstat (limited to 'arch/x86')
246 files changed, 4920 insertions, 5071 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b7d31ca55187..d43e7e1c784b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -87,7 +87,7 @@ config X86 | |||
87 | select HAVE_ARCH_KMEMCHECK | 87 | select HAVE_ARCH_KMEMCHECK |
88 | select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP | 88 | select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP |
89 | select HAVE_USER_RETURN_NOTIFIER | 89 | select HAVE_USER_RETURN_NOTIFIER |
90 | select ARCH_BINFMT_ELF_RANDOMIZE_PIE | 90 | select ARCH_HAS_ELF_RANDOMIZE |
91 | select HAVE_ARCH_JUMP_LABEL | 91 | select HAVE_ARCH_JUMP_LABEL |
92 | select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE | 92 | select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE |
93 | select SPARSE_IRQ | 93 | select SPARSE_IRQ |
@@ -99,6 +99,7 @@ config X86 | |||
99 | select IRQ_FORCED_THREADING | 99 | select IRQ_FORCED_THREADING |
100 | select HAVE_BPF_JIT if X86_64 | 100 | select HAVE_BPF_JIT if X86_64 |
101 | select HAVE_ARCH_TRANSPARENT_HUGEPAGE | 101 | select HAVE_ARCH_TRANSPARENT_HUGEPAGE |
102 | select HAVE_ARCH_HUGE_VMAP if X86_64 || (X86_32 && X86_PAE) | ||
102 | select ARCH_HAS_SG_CHAIN | 103 | select ARCH_HAS_SG_CHAIN |
103 | select CLKEVT_I8253 | 104 | select CLKEVT_I8253 |
104 | select ARCH_HAVE_NMI_SAFE_CMPXCHG | 105 | select ARCH_HAVE_NMI_SAFE_CMPXCHG |
@@ -235,12 +236,10 @@ config ARCH_WANT_GENERAL_HUGETLB | |||
235 | def_bool y | 236 | def_bool y |
236 | 237 | ||
237 | config ZONE_DMA32 | 238 | config ZONE_DMA32 |
238 | bool | 239 | def_bool y if X86_64 |
239 | default X86_64 | ||
240 | 240 | ||
241 | config AUDIT_ARCH | 241 | config AUDIT_ARCH |
242 | bool | 242 | def_bool y if X86_64 |
243 | default X86_64 | ||
244 | 243 | ||
245 | config ARCH_SUPPORTS_OPTIMIZED_INLINING | 244 | config ARCH_SUPPORTS_OPTIMIZED_INLINING |
246 | def_bool y | 245 | def_bool y |
@@ -279,6 +278,12 @@ config ARCH_SUPPORTS_UPROBES | |||
279 | config FIX_EARLYCON_MEM | 278 | config FIX_EARLYCON_MEM |
280 | def_bool y | 279 | def_bool y |
281 | 280 | ||
281 | config PGTABLE_LEVELS | ||
282 | int | ||
283 | default 4 if X86_64 | ||
284 | default 3 if X86_PAE | ||
285 | default 2 | ||
286 | |||
282 | source "init/Kconfig" | 287 | source "init/Kconfig" |
283 | source "kernel/Kconfig.freezer" | 288 | source "kernel/Kconfig.freezer" |
284 | 289 | ||
@@ -716,17 +721,6 @@ endif #HYPERVISOR_GUEST | |||
716 | config NO_BOOTMEM | 721 | config NO_BOOTMEM |
717 | def_bool y | 722 | def_bool y |
718 | 723 | ||
719 | config MEMTEST | ||
720 | bool "Memtest" | ||
721 | ---help--- | ||
722 | This option adds a kernel parameter 'memtest', which allows memtest | ||
723 | to be set. | ||
724 | memtest=0, mean disabled; -- default | ||
725 | memtest=1, mean do 1 test pattern; | ||
726 | ... | ||
727 | memtest=4, mean do 4 test patterns. | ||
728 | If you are unsure how to answer this question, answer N. | ||
729 | |||
730 | source "arch/x86/Kconfig.cpu" | 724 | source "arch/x86/Kconfig.cpu" |
731 | 725 | ||
732 | config HPET_TIMER | 726 | config HPET_TIMER |
@@ -891,7 +885,8 @@ config UP_LATE_INIT | |||
891 | depends on !SMP && X86_LOCAL_APIC | 885 | depends on !SMP && X86_LOCAL_APIC |
892 | 886 | ||
893 | config X86_UP_APIC | 887 | config X86_UP_APIC |
894 | bool "Local APIC support on uniprocessors" | 888 | bool "Local APIC support on uniprocessors" if !PCI_MSI |
889 | default PCI_MSI | ||
895 | depends on X86_32 && !SMP && !X86_32_NON_STANDARD | 890 | depends on X86_32 && !SMP && !X86_32_NON_STANDARD |
896 | ---help--- | 891 | ---help--- |
897 | A local APIC (Advanced Programmable Interrupt Controller) is an | 892 | A local APIC (Advanced Programmable Interrupt Controller) is an |
@@ -903,10 +898,6 @@ config X86_UP_APIC | |||
903 | performance counters), and the NMI watchdog which detects hard | 898 | performance counters), and the NMI watchdog which detects hard |
904 | lockups. | 899 | lockups. |
905 | 900 | ||
906 | config X86_UP_APIC_MSI | ||
907 | def_bool y | ||
908 | select X86_UP_APIC if X86_32 && !SMP && !X86_32_NON_STANDARD && PCI_MSI | ||
909 | |||
910 | config X86_UP_IOAPIC | 901 | config X86_UP_IOAPIC |
911 | bool "IO-APIC support on uniprocessors" | 902 | bool "IO-APIC support on uniprocessors" |
912 | depends on X86_UP_APIC | 903 | depends on X86_UP_APIC |
@@ -925,8 +916,8 @@ config X86_LOCAL_APIC | |||
925 | select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ | 916 | select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ |
926 | 917 | ||
927 | config X86_IO_APIC | 918 | config X86_IO_APIC |
928 | def_bool X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC | 919 | def_bool y |
929 | depends on X86_LOCAL_APIC | 920 | depends on X86_LOCAL_APIC || X86_UP_IOAPIC |
930 | select IRQ_DOMAIN | 921 | select IRQ_DOMAIN |
931 | 922 | ||
932 | config X86_REROUTE_FOR_BROKEN_BOOT_IRQS | 923 | config X86_REROUTE_FOR_BROKEN_BOOT_IRQS |
@@ -1145,10 +1136,10 @@ config MICROCODE_OLD_INTERFACE | |||
1145 | depends on MICROCODE | 1136 | depends on MICROCODE |
1146 | 1137 | ||
1147 | config MICROCODE_INTEL_EARLY | 1138 | config MICROCODE_INTEL_EARLY |
1148 | def_bool n | 1139 | bool |
1149 | 1140 | ||
1150 | config MICROCODE_AMD_EARLY | 1141 | config MICROCODE_AMD_EARLY |
1151 | def_bool n | 1142 | bool |
1152 | 1143 | ||
1153 | config MICROCODE_EARLY | 1144 | config MICROCODE_EARLY |
1154 | bool "Early load microcode" | 1145 | bool "Early load microcode" |
@@ -1300,14 +1291,14 @@ config ARCH_DMA_ADDR_T_64BIT | |||
1300 | def_bool y | 1291 | def_bool y |
1301 | depends on X86_64 || HIGHMEM64G | 1292 | depends on X86_64 || HIGHMEM64G |
1302 | 1293 | ||
1303 | config DIRECT_GBPAGES | 1294 | config X86_DIRECT_GBPAGES |
1304 | bool "Enable 1GB pages for kernel pagetables" if EXPERT | 1295 | def_bool y |
1305 | default y | 1296 | depends on X86_64 && !DEBUG_PAGEALLOC && !KMEMCHECK |
1306 | depends on X86_64 | ||
1307 | ---help--- | 1297 | ---help--- |
1308 | Allow the kernel linear mapping to use 1GB pages on CPUs that | 1298 | Certain kernel features effectively disable kernel |
1309 | support it. This can improve the kernel's performance a tiny bit by | 1299 | linear 1 GB mappings (even if the CPU otherwise |
1310 | reducing TLB pressure. If in doubt, say "Y". | 1300 | supports them), so don't confuse the user by printing |
1301 | that we have them enabled. | ||
1311 | 1302 | ||
1312 | # Common NUMA Features | 1303 | # Common NUMA Features |
1313 | config NUMA | 1304 | config NUMA |
@@ -1747,14 +1738,11 @@ config KEXEC_VERIFY_SIG | |||
1747 | depends on KEXEC_FILE | 1738 | depends on KEXEC_FILE |
1748 | ---help--- | 1739 | ---help--- |
1749 | This option makes kernel signature verification mandatory for | 1740 | This option makes kernel signature verification mandatory for |
1750 | kexec_file_load() syscall. If kernel is signature can not be | 1741 | the kexec_file_load() syscall. |
1751 | verified, kexec_file_load() will fail. | 1742 | |
1752 | 1743 | In addition to that option, you need to enable signature | |
1753 | This option enforces signature verification at generic level. | 1744 | verification for the corresponding kernel image type being |
1754 | One needs to enable signature verification for type of kernel | 1745 | loaded in order for this to work. |
1755 | image being loaded to make sure it works. For example, enable | ||
1756 | bzImage signature verification option to be able to load and | ||
1757 | verify signatures of bzImage. Otherwise kernel loading will fail. | ||
1758 | 1746 | ||
1759 | config KEXEC_BZIMAGE_VERIFY_SIG | 1747 | config KEXEC_BZIMAGE_VERIFY_SIG |
1760 | bool "Enable bzImage signature verification support" | 1748 | bool "Enable bzImage signature verification support" |
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 5ba2d9ce82dc..2fda005bb334 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile | |||
@@ -63,7 +63,7 @@ ifeq ($(CONFIG_X86_32),y) | |||
63 | $(call cc-option,-fno-unit-at-a-time)) | 63 | $(call cc-option,-fno-unit-at-a-time)) |
64 | 64 | ||
65 | # CPU-specific tuning. Anything which can be shared with UML should go here. | 65 | # CPU-specific tuning. Anything which can be shared with UML should go here. |
66 | include $(srctree)/arch/x86/Makefile_32.cpu | 66 | include arch/x86/Makefile_32.cpu |
67 | KBUILD_CFLAGS += $(cflags-y) | 67 | KBUILD_CFLAGS += $(cflags-y) |
68 | 68 | ||
69 | # temporary until string.h is fixed | 69 | # temporary until string.h is fixed |
diff --git a/arch/x86/Makefile.um b/arch/x86/Makefile.um index 95eba554baf9..5b7e898ffd9a 100644 --- a/arch/x86/Makefile.um +++ b/arch/x86/Makefile.um | |||
@@ -18,7 +18,7 @@ LDS_EXTRA := -Ui386 | |||
18 | export LDS_EXTRA | 18 | export LDS_EXTRA |
19 | 19 | ||
20 | # First of all, tune CFLAGS for the specific CPU. This actually sets cflags-y. | 20 | # First of all, tune CFLAGS for the specific CPU. This actually sets cflags-y. |
21 | include $(srctree)/arch/x86/Makefile_32.cpu | 21 | include arch/x86/Makefile_32.cpu |
22 | 22 | ||
23 | # prevent gcc from keeping the stack 16 byte aligned. Taken from i386. | 23 | # prevent gcc from keeping the stack 16 byte aligned. Taken from i386. |
24 | cflags-y += $(call cc-option,-mpreferred-stack-boundary=2) | 24 | cflags-y += $(call cc-option,-mpreferred-stack-boundary=2) |
diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c index bb1376381985..d7b1f655b3ef 100644 --- a/arch/x86/boot/compressed/aslr.c +++ b/arch/x86/boot/compressed/aslr.c | |||
@@ -295,7 +295,8 @@ static unsigned long find_random_addr(unsigned long minimum, | |||
295 | return slots_fetch_random(); | 295 | return slots_fetch_random(); |
296 | } | 296 | } |
297 | 297 | ||
298 | unsigned char *choose_kernel_location(unsigned char *input, | 298 | unsigned char *choose_kernel_location(struct boot_params *boot_params, |
299 | unsigned char *input, | ||
299 | unsigned long input_size, | 300 | unsigned long input_size, |
300 | unsigned char *output, | 301 | unsigned char *output, |
301 | unsigned long output_size) | 302 | unsigned long output_size) |
@@ -315,6 +316,8 @@ unsigned char *choose_kernel_location(unsigned char *input, | |||
315 | } | 316 | } |
316 | #endif | 317 | #endif |
317 | 318 | ||
319 | boot_params->hdr.loadflags |= KASLR_FLAG; | ||
320 | |||
318 | /* Record the various known unsafe memory ranges. */ | 321 | /* Record the various known unsafe memory ranges. */ |
319 | mem_avoid_init((unsigned long)input, input_size, | 322 | mem_avoid_init((unsigned long)input, input_size, |
320 | (unsigned long)output, output_size); | 323 | (unsigned long)output, output_size); |
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 1d7fbbcc196d..8ef964ddc18e 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <asm/page_types.h> | 29 | #include <asm/page_types.h> |
30 | #include <asm/boot.h> | 30 | #include <asm/boot.h> |
31 | #include <asm/asm-offsets.h> | 31 | #include <asm/asm-offsets.h> |
32 | #include <asm/bootparam.h> | ||
32 | 33 | ||
33 | __HEAD | 34 | __HEAD |
34 | ENTRY(startup_32) | 35 | ENTRY(startup_32) |
@@ -102,7 +103,7 @@ preferred_addr: | |||
102 | * Test KEEP_SEGMENTS flag to see if the bootloader is asking | 103 | * Test KEEP_SEGMENTS flag to see if the bootloader is asking |
103 | * us to not reload segments | 104 | * us to not reload segments |
104 | */ | 105 | */ |
105 | testb $(1<<6), BP_loadflags(%esi) | 106 | testb $KEEP_SEGMENTS, BP_loadflags(%esi) |
106 | jnz 1f | 107 | jnz 1f |
107 | 108 | ||
108 | cli | 109 | cli |
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 6b1766c6c082..b0c0d16ef58d 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <asm/msr.h> | 31 | #include <asm/msr.h> |
32 | #include <asm/processor-flags.h> | 32 | #include <asm/processor-flags.h> |
33 | #include <asm/asm-offsets.h> | 33 | #include <asm/asm-offsets.h> |
34 | #include <asm/bootparam.h> | ||
34 | 35 | ||
35 | __HEAD | 36 | __HEAD |
36 | .code32 | 37 | .code32 |
@@ -46,7 +47,7 @@ ENTRY(startup_32) | |||
46 | * Test KEEP_SEGMENTS flag to see if the bootloader is asking | 47 | * Test KEEP_SEGMENTS flag to see if the bootloader is asking |
47 | * us to not reload segments | 48 | * us to not reload segments |
48 | */ | 49 | */ |
49 | testb $(1<<6), BP_loadflags(%esi) | 50 | testb $KEEP_SEGMENTS, BP_loadflags(%esi) |
50 | jnz 1f | 51 | jnz 1f |
51 | 52 | ||
52 | cli | 53 | cli |
@@ -164,7 +165,7 @@ ENTRY(startup_32) | |||
164 | /* After gdt is loaded */ | 165 | /* After gdt is loaded */ |
165 | xorl %eax, %eax | 166 | xorl %eax, %eax |
166 | lldt %ax | 167 | lldt %ax |
167 | movl $0x20, %eax | 168 | movl $__BOOT_TSS, %eax |
168 | ltr %ax | 169 | ltr %ax |
169 | 170 | ||
170 | /* | 171 | /* |
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index a950864a64da..a107b935e22f 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c | |||
@@ -377,6 +377,9 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap, | |||
377 | 377 | ||
378 | real_mode = rmode; | 378 | real_mode = rmode; |
379 | 379 | ||
380 | /* Clear it for solely in-kernel use */ | ||
381 | real_mode->hdr.loadflags &= ~KASLR_FLAG; | ||
382 | |||
380 | sanitize_boot_params(real_mode); | 383 | sanitize_boot_params(real_mode); |
381 | 384 | ||
382 | if (real_mode->screen_info.orig_video_mode == 7) { | 385 | if (real_mode->screen_info.orig_video_mode == 7) { |
@@ -401,7 +404,7 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap, | |||
401 | * the entire decompressed kernel plus relocation table, or the | 404 | * the entire decompressed kernel plus relocation table, or the |
402 | * entire decompressed kernel plus .bss and .brk sections. | 405 | * entire decompressed kernel plus .bss and .brk sections. |
403 | */ | 406 | */ |
404 | output = choose_kernel_location(input_data, input_len, output, | 407 | output = choose_kernel_location(real_mode, input_data, input_len, output, |
405 | output_len > run_size ? output_len | 408 | output_len > run_size ? output_len |
406 | : run_size); | 409 | : run_size); |
407 | 410 | ||
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 04477d68403f..89dd0d78013a 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h | |||
@@ -57,7 +57,8 @@ int cmdline_find_option_bool(const char *option); | |||
57 | 57 | ||
58 | #if CONFIG_RANDOMIZE_BASE | 58 | #if CONFIG_RANDOMIZE_BASE |
59 | /* aslr.c */ | 59 | /* aslr.c */ |
60 | unsigned char *choose_kernel_location(unsigned char *input, | 60 | unsigned char *choose_kernel_location(struct boot_params *boot_params, |
61 | unsigned char *input, | ||
61 | unsigned long input_size, | 62 | unsigned long input_size, |
62 | unsigned char *output, | 63 | unsigned char *output, |
63 | unsigned long output_size); | 64 | unsigned long output_size); |
@@ -65,7 +66,8 @@ unsigned char *choose_kernel_location(unsigned char *input, | |||
65 | bool has_cpuflag(int flag); | 66 | bool has_cpuflag(int flag); |
66 | #else | 67 | #else |
67 | static inline | 68 | static inline |
68 | unsigned char *choose_kernel_location(unsigned char *input, | 69 | unsigned char *choose_kernel_location(struct boot_params *boot_params, |
70 | unsigned char *input, | ||
69 | unsigned long input_size, | 71 | unsigned long input_size, |
70 | unsigned char *output, | 72 | unsigned char *output, |
71 | unsigned long output_size) | 73 | unsigned long output_size) |
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index 493f3fd9f139..318b8465d302 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c | |||
@@ -30,7 +30,7 @@ int strcmp(const char *str1, const char *str2) | |||
30 | int delta = 0; | 30 | int delta = 0; |
31 | 31 | ||
32 | while (*s1 || *s2) { | 32 | while (*s1 || *s2) { |
33 | delta = *s2 - *s1; | 33 | delta = *s1 - *s2; |
34 | if (delta) | 34 | if (delta) |
35 | return delta; | 35 | return delta; |
36 | s1++; | 36 | s1++; |
diff --git a/arch/x86/boot/video-mode.c b/arch/x86/boot/video-mode.c index 748e8d06290a..aa8a96b052e3 100644 --- a/arch/x86/boot/video-mode.c +++ b/arch/x86/boot/video-mode.c | |||
@@ -22,10 +22,8 @@ | |||
22 | /* | 22 | /* |
23 | * Common variables | 23 | * Common variables |
24 | */ | 24 | */ |
25 | int adapter; /* 0=CGA/MDA/HGC, 1=EGA, 2=VGA+ */ | 25 | int adapter; /* 0=CGA/MDA/HGC, 1=EGA, 2=VGA+ */ |
26 | u16 video_segment; | ||
27 | int force_x, force_y; /* Don't query the BIOS for cols/rows */ | 26 | int force_x, force_y; /* Don't query the BIOS for cols/rows */ |
28 | |||
29 | int do_restore; /* Screen contents changed during mode flip */ | 27 | int do_restore; /* Screen contents changed during mode flip */ |
30 | int graphic_mode; /* Graphic mode with linear frame buffer */ | 28 | int graphic_mode; /* Graphic mode with linear frame buffer */ |
31 | 29 | ||
diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c index 43eda284d27f..05111bb8d018 100644 --- a/arch/x86/boot/video.c +++ b/arch/x86/boot/video.c | |||
@@ -17,6 +17,8 @@ | |||
17 | #include "video.h" | 17 | #include "video.h" |
18 | #include "vesa.h" | 18 | #include "vesa.h" |
19 | 19 | ||
20 | static u16 video_segment; | ||
21 | |||
20 | static void store_cursor_position(void) | 22 | static void store_cursor_position(void) |
21 | { | 23 | { |
22 | struct biosregs ireg, oreg; | 24 | struct biosregs ireg, oreg; |
diff --git a/arch/x86/boot/video.h b/arch/x86/boot/video.h index 0bb25491262d..b54e0328c449 100644 --- a/arch/x86/boot/video.h +++ b/arch/x86/boot/video.h | |||
@@ -91,7 +91,6 @@ int mode_defined(u16 mode); /* video.c */ | |||
91 | #define ADAPTER_VGA 2 | 91 | #define ADAPTER_VGA 2 |
92 | 92 | ||
93 | extern int adapter; | 93 | extern int adapter; |
94 | extern u16 video_segment; | ||
95 | extern int force_x, force_y; /* Don't query the BIOS for cols/rows */ | 94 | extern int force_x, force_y; /* Don't query the BIOS for cols/rows */ |
96 | extern int do_restore; /* Restore screen contents */ | 95 | extern int do_restore; /* Restore screen contents */ |
97 | extern int graphic_mode; /* Graphics mode with linear frame buffer */ | 96 | extern int graphic_mode; /* Graphics mode with linear frame buffer */ |
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 419819d6dab3..aaa1118bf01e 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig | |||
@@ -248,7 +248,7 @@ CONFIG_USB=y | |||
248 | CONFIG_USB_ANNOUNCE_NEW_DEVICES=y | 248 | CONFIG_USB_ANNOUNCE_NEW_DEVICES=y |
249 | CONFIG_USB_MON=y | 249 | CONFIG_USB_MON=y |
250 | CONFIG_USB_EHCI_HCD=y | 250 | CONFIG_USB_EHCI_HCD=y |
251 | # CONFIG_USB_EHCI_TT_NEWSCHED is not set | 251 | CONFIG_USB_EHCI_TT_NEWSCHED=y |
252 | CONFIG_USB_OHCI_HCD=y | 252 | CONFIG_USB_OHCI_HCD=y |
253 | CONFIG_USB_UHCI_HCD=y | 253 | CONFIG_USB_UHCI_HCD=y |
254 | CONFIG_USB_PRINTER=y | 254 | CONFIG_USB_PRINTER=y |
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 4c311ddd973b..315b86106572 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig | |||
@@ -243,7 +243,7 @@ CONFIG_USB=y | |||
243 | CONFIG_USB_ANNOUNCE_NEW_DEVICES=y | 243 | CONFIG_USB_ANNOUNCE_NEW_DEVICES=y |
244 | CONFIG_USB_MON=y | 244 | CONFIG_USB_MON=y |
245 | CONFIG_USB_EHCI_HCD=y | 245 | CONFIG_USB_EHCI_HCD=y |
246 | # CONFIG_USB_EHCI_TT_NEWSCHED is not set | 246 | CONFIG_USB_EHCI_TT_NEWSCHED=y |
247 | CONFIG_USB_OHCI_HCD=y | 247 | CONFIG_USB_OHCI_HCD=y |
248 | CONFIG_USB_UHCI_HCD=y | 248 | CONFIG_USB_UHCI_HCD=y |
249 | CONFIG_USB_PRINTER=y | 249 | CONFIG_USB_PRINTER=y |
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 54f60ab41c63..112cefacf2af 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c | |||
@@ -797,7 +797,9 @@ static int rfc4106_init(struct crypto_tfm *tfm) | |||
797 | PTR_ALIGN((u8 *)crypto_tfm_ctx(tfm), AESNI_ALIGN); | 797 | PTR_ALIGN((u8 *)crypto_tfm_ctx(tfm), AESNI_ALIGN); |
798 | struct crypto_aead *cryptd_child; | 798 | struct crypto_aead *cryptd_child; |
799 | struct aesni_rfc4106_gcm_ctx *child_ctx; | 799 | struct aesni_rfc4106_gcm_ctx *child_ctx; |
800 | cryptd_tfm = cryptd_alloc_aead("__driver-gcm-aes-aesni", 0, 0); | 800 | cryptd_tfm = cryptd_alloc_aead("__driver-gcm-aes-aesni", |
801 | CRYPTO_ALG_INTERNAL, | ||
802 | CRYPTO_ALG_INTERNAL); | ||
801 | if (IS_ERR(cryptd_tfm)) | 803 | if (IS_ERR(cryptd_tfm)) |
802 | return PTR_ERR(cryptd_tfm); | 804 | return PTR_ERR(cryptd_tfm); |
803 | 805 | ||
@@ -890,15 +892,12 @@ out_free_ablkcipher: | |||
890 | return ret; | 892 | return ret; |
891 | } | 893 | } |
892 | 894 | ||
893 | static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key, | 895 | static int common_rfc4106_set_key(struct crypto_aead *aead, const u8 *key, |
894 | unsigned int key_len) | 896 | unsigned int key_len) |
895 | { | 897 | { |
896 | int ret = 0; | 898 | int ret = 0; |
897 | struct crypto_tfm *tfm = crypto_aead_tfm(parent); | 899 | struct crypto_tfm *tfm = crypto_aead_tfm(aead); |
898 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent); | 900 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(aead); |
899 | struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); | ||
900 | struct aesni_rfc4106_gcm_ctx *child_ctx = | ||
901 | aesni_rfc4106_gcm_ctx_get(cryptd_child); | ||
902 | u8 *new_key_align, *new_key_mem = NULL; | 901 | u8 *new_key_align, *new_key_mem = NULL; |
903 | 902 | ||
904 | if (key_len < 4) { | 903 | if (key_len < 4) { |
@@ -943,20 +942,31 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key, | |||
943 | goto exit; | 942 | goto exit; |
944 | } | 943 | } |
945 | ret = rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len); | 944 | ret = rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len); |
946 | memcpy(child_ctx, ctx, sizeof(*ctx)); | ||
947 | exit: | 945 | exit: |
948 | kfree(new_key_mem); | 946 | kfree(new_key_mem); |
949 | return ret; | 947 | return ret; |
950 | } | 948 | } |
951 | 949 | ||
952 | /* This is the Integrity Check Value (aka the authentication tag length and can | 950 | static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key, |
953 | * be 8, 12 or 16 bytes long. */ | 951 | unsigned int key_len) |
954 | static int rfc4106_set_authsize(struct crypto_aead *parent, | ||
955 | unsigned int authsize) | ||
956 | { | 952 | { |
957 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent); | 953 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent); |
958 | struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); | 954 | struct crypto_aead *child = cryptd_aead_child(ctx->cryptd_tfm); |
955 | struct aesni_rfc4106_gcm_ctx *c_ctx = aesni_rfc4106_gcm_ctx_get(child); | ||
956 | struct cryptd_aead *cryptd_tfm = ctx->cryptd_tfm; | ||
957 | int ret; | ||
959 | 958 | ||
959 | ret = crypto_aead_setkey(child, key, key_len); | ||
960 | if (!ret) { | ||
961 | memcpy(ctx, c_ctx, sizeof(*ctx)); | ||
962 | ctx->cryptd_tfm = cryptd_tfm; | ||
963 | } | ||
964 | return ret; | ||
965 | } | ||
966 | |||
967 | static int common_rfc4106_set_authsize(struct crypto_aead *aead, | ||
968 | unsigned int authsize) | ||
969 | { | ||
960 | switch (authsize) { | 970 | switch (authsize) { |
961 | case 8: | 971 | case 8: |
962 | case 12: | 972 | case 12: |
@@ -965,51 +975,23 @@ static int rfc4106_set_authsize(struct crypto_aead *parent, | |||
965 | default: | 975 | default: |
966 | return -EINVAL; | 976 | return -EINVAL; |
967 | } | 977 | } |
968 | crypto_aead_crt(parent)->authsize = authsize; | 978 | crypto_aead_crt(aead)->authsize = authsize; |
969 | crypto_aead_crt(cryptd_child)->authsize = authsize; | ||
970 | return 0; | 979 | return 0; |
971 | } | 980 | } |
972 | 981 | ||
973 | static int rfc4106_encrypt(struct aead_request *req) | 982 | /* This is the Integrity Check Value (aka the authentication tag length and can |
974 | { | 983 | * be 8, 12 or 16 bytes long. */ |
975 | int ret; | 984 | static int rfc4106_set_authsize(struct crypto_aead *parent, |
976 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); | 985 | unsigned int authsize) |
977 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); | ||
978 | |||
979 | if (!irq_fpu_usable()) { | ||
980 | struct aead_request *cryptd_req = | ||
981 | (struct aead_request *) aead_request_ctx(req); | ||
982 | memcpy(cryptd_req, req, sizeof(*req)); | ||
983 | aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
984 | return crypto_aead_encrypt(cryptd_req); | ||
985 | } else { | ||
986 | struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); | ||
987 | kernel_fpu_begin(); | ||
988 | ret = cryptd_child->base.crt_aead.encrypt(req); | ||
989 | kernel_fpu_end(); | ||
990 | return ret; | ||
991 | } | ||
992 | } | ||
993 | |||
994 | static int rfc4106_decrypt(struct aead_request *req) | ||
995 | { | 986 | { |
987 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent); | ||
988 | struct crypto_aead *child = cryptd_aead_child(ctx->cryptd_tfm); | ||
996 | int ret; | 989 | int ret; |
997 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); | ||
998 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); | ||
999 | 990 | ||
1000 | if (!irq_fpu_usable()) { | 991 | ret = crypto_aead_setauthsize(child, authsize); |
1001 | struct aead_request *cryptd_req = | 992 | if (!ret) |
1002 | (struct aead_request *) aead_request_ctx(req); | 993 | crypto_aead_crt(parent)->authsize = authsize; |
1003 | memcpy(cryptd_req, req, sizeof(*req)); | 994 | return ret; |
1004 | aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
1005 | return crypto_aead_decrypt(cryptd_req); | ||
1006 | } else { | ||
1007 | struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); | ||
1008 | kernel_fpu_begin(); | ||
1009 | ret = cryptd_child->base.crt_aead.decrypt(req); | ||
1010 | kernel_fpu_end(); | ||
1011 | return ret; | ||
1012 | } | ||
1013 | } | 995 | } |
1014 | 996 | ||
1015 | static int __driver_rfc4106_encrypt(struct aead_request *req) | 997 | static int __driver_rfc4106_encrypt(struct aead_request *req) |
@@ -1185,6 +1167,78 @@ static int __driver_rfc4106_decrypt(struct aead_request *req) | |||
1185 | } | 1167 | } |
1186 | return retval; | 1168 | return retval; |
1187 | } | 1169 | } |
1170 | |||
1171 | static int rfc4106_encrypt(struct aead_request *req) | ||
1172 | { | ||
1173 | int ret; | ||
1174 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); | ||
1175 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); | ||
1176 | |||
1177 | if (!irq_fpu_usable()) { | ||
1178 | struct aead_request *cryptd_req = | ||
1179 | (struct aead_request *) aead_request_ctx(req); | ||
1180 | |||
1181 | memcpy(cryptd_req, req, sizeof(*req)); | ||
1182 | aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
1183 | ret = crypto_aead_encrypt(cryptd_req); | ||
1184 | } else { | ||
1185 | kernel_fpu_begin(); | ||
1186 | ret = __driver_rfc4106_encrypt(req); | ||
1187 | kernel_fpu_end(); | ||
1188 | } | ||
1189 | return ret; | ||
1190 | } | ||
1191 | |||
1192 | static int rfc4106_decrypt(struct aead_request *req) | ||
1193 | { | ||
1194 | int ret; | ||
1195 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); | ||
1196 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); | ||
1197 | |||
1198 | if (!irq_fpu_usable()) { | ||
1199 | struct aead_request *cryptd_req = | ||
1200 | (struct aead_request *) aead_request_ctx(req); | ||
1201 | |||
1202 | memcpy(cryptd_req, req, sizeof(*req)); | ||
1203 | aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
1204 | ret = crypto_aead_decrypt(cryptd_req); | ||
1205 | } else { | ||
1206 | kernel_fpu_begin(); | ||
1207 | ret = __driver_rfc4106_decrypt(req); | ||
1208 | kernel_fpu_end(); | ||
1209 | } | ||
1210 | return ret; | ||
1211 | } | ||
1212 | |||
1213 | static int helper_rfc4106_encrypt(struct aead_request *req) | ||
1214 | { | ||
1215 | int ret; | ||
1216 | |||
1217 | if (unlikely(!irq_fpu_usable())) { | ||
1218 | WARN_ONCE(1, "__gcm-aes-aesni alg used in invalid context"); | ||
1219 | ret = -EINVAL; | ||
1220 | } else { | ||
1221 | kernel_fpu_begin(); | ||
1222 | ret = __driver_rfc4106_encrypt(req); | ||
1223 | kernel_fpu_end(); | ||
1224 | } | ||
1225 | return ret; | ||
1226 | } | ||
1227 | |||
1228 | static int helper_rfc4106_decrypt(struct aead_request *req) | ||
1229 | { | ||
1230 | int ret; | ||
1231 | |||
1232 | if (unlikely(!irq_fpu_usable())) { | ||
1233 | WARN_ONCE(1, "__gcm-aes-aesni alg used in invalid context"); | ||
1234 | ret = -EINVAL; | ||
1235 | } else { | ||
1236 | kernel_fpu_begin(); | ||
1237 | ret = __driver_rfc4106_decrypt(req); | ||
1238 | kernel_fpu_end(); | ||
1239 | } | ||
1240 | return ret; | ||
1241 | } | ||
1188 | #endif | 1242 | #endif |
1189 | 1243 | ||
1190 | static struct crypto_alg aesni_algs[] = { { | 1244 | static struct crypto_alg aesni_algs[] = { { |
@@ -1210,7 +1264,7 @@ static struct crypto_alg aesni_algs[] = { { | |||
1210 | .cra_name = "__aes-aesni", | 1264 | .cra_name = "__aes-aesni", |
1211 | .cra_driver_name = "__driver-aes-aesni", | 1265 | .cra_driver_name = "__driver-aes-aesni", |
1212 | .cra_priority = 0, | 1266 | .cra_priority = 0, |
1213 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER, | 1267 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER | CRYPTO_ALG_INTERNAL, |
1214 | .cra_blocksize = AES_BLOCK_SIZE, | 1268 | .cra_blocksize = AES_BLOCK_SIZE, |
1215 | .cra_ctxsize = sizeof(struct crypto_aes_ctx) + | 1269 | .cra_ctxsize = sizeof(struct crypto_aes_ctx) + |
1216 | AESNI_ALIGN - 1, | 1270 | AESNI_ALIGN - 1, |
@@ -1229,7 +1283,8 @@ static struct crypto_alg aesni_algs[] = { { | |||
1229 | .cra_name = "__ecb-aes-aesni", | 1283 | .cra_name = "__ecb-aes-aesni", |
1230 | .cra_driver_name = "__driver-ecb-aes-aesni", | 1284 | .cra_driver_name = "__driver-ecb-aes-aesni", |
1231 | .cra_priority = 0, | 1285 | .cra_priority = 0, |
1232 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 1286 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
1287 | CRYPTO_ALG_INTERNAL, | ||
1233 | .cra_blocksize = AES_BLOCK_SIZE, | 1288 | .cra_blocksize = AES_BLOCK_SIZE, |
1234 | .cra_ctxsize = sizeof(struct crypto_aes_ctx) + | 1289 | .cra_ctxsize = sizeof(struct crypto_aes_ctx) + |
1235 | AESNI_ALIGN - 1, | 1290 | AESNI_ALIGN - 1, |
@@ -1249,7 +1304,8 @@ static struct crypto_alg aesni_algs[] = { { | |||
1249 | .cra_name = "__cbc-aes-aesni", | 1304 | .cra_name = "__cbc-aes-aesni", |
1250 | .cra_driver_name = "__driver-cbc-aes-aesni", | 1305 | .cra_driver_name = "__driver-cbc-aes-aesni", |
1251 | .cra_priority = 0, | 1306 | .cra_priority = 0, |
1252 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 1307 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
1308 | CRYPTO_ALG_INTERNAL, | ||
1253 | .cra_blocksize = AES_BLOCK_SIZE, | 1309 | .cra_blocksize = AES_BLOCK_SIZE, |
1254 | .cra_ctxsize = sizeof(struct crypto_aes_ctx) + | 1310 | .cra_ctxsize = sizeof(struct crypto_aes_ctx) + |
1255 | AESNI_ALIGN - 1, | 1311 | AESNI_ALIGN - 1, |
@@ -1313,7 +1369,8 @@ static struct crypto_alg aesni_algs[] = { { | |||
1313 | .cra_name = "__ctr-aes-aesni", | 1369 | .cra_name = "__ctr-aes-aesni", |
1314 | .cra_driver_name = "__driver-ctr-aes-aesni", | 1370 | .cra_driver_name = "__driver-ctr-aes-aesni", |
1315 | .cra_priority = 0, | 1371 | .cra_priority = 0, |
1316 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 1372 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
1373 | CRYPTO_ALG_INTERNAL, | ||
1317 | .cra_blocksize = 1, | 1374 | .cra_blocksize = 1, |
1318 | .cra_ctxsize = sizeof(struct crypto_aes_ctx) + | 1375 | .cra_ctxsize = sizeof(struct crypto_aes_ctx) + |
1319 | AESNI_ALIGN - 1, | 1376 | AESNI_ALIGN - 1, |
@@ -1357,7 +1414,7 @@ static struct crypto_alg aesni_algs[] = { { | |||
1357 | .cra_name = "__gcm-aes-aesni", | 1414 | .cra_name = "__gcm-aes-aesni", |
1358 | .cra_driver_name = "__driver-gcm-aes-aesni", | 1415 | .cra_driver_name = "__driver-gcm-aes-aesni", |
1359 | .cra_priority = 0, | 1416 | .cra_priority = 0, |
1360 | .cra_flags = CRYPTO_ALG_TYPE_AEAD, | 1417 | .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_INTERNAL, |
1361 | .cra_blocksize = 1, | 1418 | .cra_blocksize = 1, |
1362 | .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) + | 1419 | .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) + |
1363 | AESNI_ALIGN, | 1420 | AESNI_ALIGN, |
@@ -1366,8 +1423,12 @@ static struct crypto_alg aesni_algs[] = { { | |||
1366 | .cra_module = THIS_MODULE, | 1423 | .cra_module = THIS_MODULE, |
1367 | .cra_u = { | 1424 | .cra_u = { |
1368 | .aead = { | 1425 | .aead = { |
1369 | .encrypt = __driver_rfc4106_encrypt, | 1426 | .setkey = common_rfc4106_set_key, |
1370 | .decrypt = __driver_rfc4106_decrypt, | 1427 | .setauthsize = common_rfc4106_set_authsize, |
1428 | .encrypt = helper_rfc4106_encrypt, | ||
1429 | .decrypt = helper_rfc4106_decrypt, | ||
1430 | .ivsize = 8, | ||
1431 | .maxauthsize = 16, | ||
1371 | }, | 1432 | }, |
1372 | }, | 1433 | }, |
1373 | }, { | 1434 | }, { |
@@ -1423,7 +1484,8 @@ static struct crypto_alg aesni_algs[] = { { | |||
1423 | .cra_name = "__lrw-aes-aesni", | 1484 | .cra_name = "__lrw-aes-aesni", |
1424 | .cra_driver_name = "__driver-lrw-aes-aesni", | 1485 | .cra_driver_name = "__driver-lrw-aes-aesni", |
1425 | .cra_priority = 0, | 1486 | .cra_priority = 0, |
1426 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 1487 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
1488 | CRYPTO_ALG_INTERNAL, | ||
1427 | .cra_blocksize = AES_BLOCK_SIZE, | 1489 | .cra_blocksize = AES_BLOCK_SIZE, |
1428 | .cra_ctxsize = sizeof(struct aesni_lrw_ctx), | 1490 | .cra_ctxsize = sizeof(struct aesni_lrw_ctx), |
1429 | .cra_alignmask = 0, | 1491 | .cra_alignmask = 0, |
@@ -1444,7 +1506,8 @@ static struct crypto_alg aesni_algs[] = { { | |||
1444 | .cra_name = "__xts-aes-aesni", | 1506 | .cra_name = "__xts-aes-aesni", |
1445 | .cra_driver_name = "__driver-xts-aes-aesni", | 1507 | .cra_driver_name = "__driver-xts-aes-aesni", |
1446 | .cra_priority = 0, | 1508 | .cra_priority = 0, |
1447 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 1509 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
1510 | CRYPTO_ALG_INTERNAL, | ||
1448 | .cra_blocksize = AES_BLOCK_SIZE, | 1511 | .cra_blocksize = AES_BLOCK_SIZE, |
1449 | .cra_ctxsize = sizeof(struct aesni_xts_ctx), | 1512 | .cra_ctxsize = sizeof(struct aesni_xts_ctx), |
1450 | .cra_alignmask = 0, | 1513 | .cra_alignmask = 0, |
diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c index 9a07fafe3831..baf0ac21ace5 100644 --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c | |||
@@ -343,7 +343,8 @@ static struct crypto_alg cmll_algs[10] = { { | |||
343 | .cra_name = "__ecb-camellia-aesni-avx2", | 343 | .cra_name = "__ecb-camellia-aesni-avx2", |
344 | .cra_driver_name = "__driver-ecb-camellia-aesni-avx2", | 344 | .cra_driver_name = "__driver-ecb-camellia-aesni-avx2", |
345 | .cra_priority = 0, | 345 | .cra_priority = 0, |
346 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 346 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
347 | CRYPTO_ALG_INTERNAL, | ||
347 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | 348 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, |
348 | .cra_ctxsize = sizeof(struct camellia_ctx), | 349 | .cra_ctxsize = sizeof(struct camellia_ctx), |
349 | .cra_alignmask = 0, | 350 | .cra_alignmask = 0, |
@@ -362,7 +363,8 @@ static struct crypto_alg cmll_algs[10] = { { | |||
362 | .cra_name = "__cbc-camellia-aesni-avx2", | 363 | .cra_name = "__cbc-camellia-aesni-avx2", |
363 | .cra_driver_name = "__driver-cbc-camellia-aesni-avx2", | 364 | .cra_driver_name = "__driver-cbc-camellia-aesni-avx2", |
364 | .cra_priority = 0, | 365 | .cra_priority = 0, |
365 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 366 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
367 | CRYPTO_ALG_INTERNAL, | ||
366 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | 368 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, |
367 | .cra_ctxsize = sizeof(struct camellia_ctx), | 369 | .cra_ctxsize = sizeof(struct camellia_ctx), |
368 | .cra_alignmask = 0, | 370 | .cra_alignmask = 0, |
@@ -381,7 +383,8 @@ static struct crypto_alg cmll_algs[10] = { { | |||
381 | .cra_name = "__ctr-camellia-aesni-avx2", | 383 | .cra_name = "__ctr-camellia-aesni-avx2", |
382 | .cra_driver_name = "__driver-ctr-camellia-aesni-avx2", | 384 | .cra_driver_name = "__driver-ctr-camellia-aesni-avx2", |
383 | .cra_priority = 0, | 385 | .cra_priority = 0, |
384 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 386 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
387 | CRYPTO_ALG_INTERNAL, | ||
385 | .cra_blocksize = 1, | 388 | .cra_blocksize = 1, |
386 | .cra_ctxsize = sizeof(struct camellia_ctx), | 389 | .cra_ctxsize = sizeof(struct camellia_ctx), |
387 | .cra_alignmask = 0, | 390 | .cra_alignmask = 0, |
@@ -401,7 +404,8 @@ static struct crypto_alg cmll_algs[10] = { { | |||
401 | .cra_name = "__lrw-camellia-aesni-avx2", | 404 | .cra_name = "__lrw-camellia-aesni-avx2", |
402 | .cra_driver_name = "__driver-lrw-camellia-aesni-avx2", | 405 | .cra_driver_name = "__driver-lrw-camellia-aesni-avx2", |
403 | .cra_priority = 0, | 406 | .cra_priority = 0, |
404 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 407 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
408 | CRYPTO_ALG_INTERNAL, | ||
405 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | 409 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, |
406 | .cra_ctxsize = sizeof(struct camellia_lrw_ctx), | 410 | .cra_ctxsize = sizeof(struct camellia_lrw_ctx), |
407 | .cra_alignmask = 0, | 411 | .cra_alignmask = 0, |
@@ -424,7 +428,8 @@ static struct crypto_alg cmll_algs[10] = { { | |||
424 | .cra_name = "__xts-camellia-aesni-avx2", | 428 | .cra_name = "__xts-camellia-aesni-avx2", |
425 | .cra_driver_name = "__driver-xts-camellia-aesni-avx2", | 429 | .cra_driver_name = "__driver-xts-camellia-aesni-avx2", |
426 | .cra_priority = 0, | 430 | .cra_priority = 0, |
427 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 431 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
432 | CRYPTO_ALG_INTERNAL, | ||
428 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | 433 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, |
429 | .cra_ctxsize = sizeof(struct camellia_xts_ctx), | 434 | .cra_ctxsize = sizeof(struct camellia_xts_ctx), |
430 | .cra_alignmask = 0, | 435 | .cra_alignmask = 0, |
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c index ed38d959add6..78818a1e73e3 100644 --- a/arch/x86/crypto/camellia_aesni_avx_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c | |||
@@ -335,7 +335,8 @@ static struct crypto_alg cmll_algs[10] = { { | |||
335 | .cra_name = "__ecb-camellia-aesni", | 335 | .cra_name = "__ecb-camellia-aesni", |
336 | .cra_driver_name = "__driver-ecb-camellia-aesni", | 336 | .cra_driver_name = "__driver-ecb-camellia-aesni", |
337 | .cra_priority = 0, | 337 | .cra_priority = 0, |
338 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 338 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
339 | CRYPTO_ALG_INTERNAL, | ||
339 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | 340 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, |
340 | .cra_ctxsize = sizeof(struct camellia_ctx), | 341 | .cra_ctxsize = sizeof(struct camellia_ctx), |
341 | .cra_alignmask = 0, | 342 | .cra_alignmask = 0, |
@@ -354,7 +355,8 @@ static struct crypto_alg cmll_algs[10] = { { | |||
354 | .cra_name = "__cbc-camellia-aesni", | 355 | .cra_name = "__cbc-camellia-aesni", |
355 | .cra_driver_name = "__driver-cbc-camellia-aesni", | 356 | .cra_driver_name = "__driver-cbc-camellia-aesni", |
356 | .cra_priority = 0, | 357 | .cra_priority = 0, |
357 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 358 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
359 | CRYPTO_ALG_INTERNAL, | ||
358 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | 360 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, |
359 | .cra_ctxsize = sizeof(struct camellia_ctx), | 361 | .cra_ctxsize = sizeof(struct camellia_ctx), |
360 | .cra_alignmask = 0, | 362 | .cra_alignmask = 0, |
@@ -373,7 +375,8 @@ static struct crypto_alg cmll_algs[10] = { { | |||
373 | .cra_name = "__ctr-camellia-aesni", | 375 | .cra_name = "__ctr-camellia-aesni", |
374 | .cra_driver_name = "__driver-ctr-camellia-aesni", | 376 | .cra_driver_name = "__driver-ctr-camellia-aesni", |
375 | .cra_priority = 0, | 377 | .cra_priority = 0, |
376 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 378 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
379 | CRYPTO_ALG_INTERNAL, | ||
377 | .cra_blocksize = 1, | 380 | .cra_blocksize = 1, |
378 | .cra_ctxsize = sizeof(struct camellia_ctx), | 381 | .cra_ctxsize = sizeof(struct camellia_ctx), |
379 | .cra_alignmask = 0, | 382 | .cra_alignmask = 0, |
@@ -393,7 +396,8 @@ static struct crypto_alg cmll_algs[10] = { { | |||
393 | .cra_name = "__lrw-camellia-aesni", | 396 | .cra_name = "__lrw-camellia-aesni", |
394 | .cra_driver_name = "__driver-lrw-camellia-aesni", | 397 | .cra_driver_name = "__driver-lrw-camellia-aesni", |
395 | .cra_priority = 0, | 398 | .cra_priority = 0, |
396 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 399 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
400 | CRYPTO_ALG_INTERNAL, | ||
397 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | 401 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, |
398 | .cra_ctxsize = sizeof(struct camellia_lrw_ctx), | 402 | .cra_ctxsize = sizeof(struct camellia_lrw_ctx), |
399 | .cra_alignmask = 0, | 403 | .cra_alignmask = 0, |
@@ -416,7 +420,8 @@ static struct crypto_alg cmll_algs[10] = { { | |||
416 | .cra_name = "__xts-camellia-aesni", | 420 | .cra_name = "__xts-camellia-aesni", |
417 | .cra_driver_name = "__driver-xts-camellia-aesni", | 421 | .cra_driver_name = "__driver-xts-camellia-aesni", |
418 | .cra_priority = 0, | 422 | .cra_priority = 0, |
419 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 423 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
424 | CRYPTO_ALG_INTERNAL, | ||
420 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | 425 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, |
421 | .cra_ctxsize = sizeof(struct camellia_xts_ctx), | 426 | .cra_ctxsize = sizeof(struct camellia_xts_ctx), |
422 | .cra_alignmask = 0, | 427 | .cra_alignmask = 0, |
diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c index 60ada677a928..236c80974457 100644 --- a/arch/x86/crypto/cast5_avx_glue.c +++ b/arch/x86/crypto/cast5_avx_glue.c | |||
@@ -341,7 +341,8 @@ static struct crypto_alg cast5_algs[6] = { { | |||
341 | .cra_name = "__ecb-cast5-avx", | 341 | .cra_name = "__ecb-cast5-avx", |
342 | .cra_driver_name = "__driver-ecb-cast5-avx", | 342 | .cra_driver_name = "__driver-ecb-cast5-avx", |
343 | .cra_priority = 0, | 343 | .cra_priority = 0, |
344 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 344 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
345 | CRYPTO_ALG_INTERNAL, | ||
345 | .cra_blocksize = CAST5_BLOCK_SIZE, | 346 | .cra_blocksize = CAST5_BLOCK_SIZE, |
346 | .cra_ctxsize = sizeof(struct cast5_ctx), | 347 | .cra_ctxsize = sizeof(struct cast5_ctx), |
347 | .cra_alignmask = 0, | 348 | .cra_alignmask = 0, |
@@ -360,7 +361,8 @@ static struct crypto_alg cast5_algs[6] = { { | |||
360 | .cra_name = "__cbc-cast5-avx", | 361 | .cra_name = "__cbc-cast5-avx", |
361 | .cra_driver_name = "__driver-cbc-cast5-avx", | 362 | .cra_driver_name = "__driver-cbc-cast5-avx", |
362 | .cra_priority = 0, | 363 | .cra_priority = 0, |
363 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 364 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
365 | CRYPTO_ALG_INTERNAL, | ||
364 | .cra_blocksize = CAST5_BLOCK_SIZE, | 366 | .cra_blocksize = CAST5_BLOCK_SIZE, |
365 | .cra_ctxsize = sizeof(struct cast5_ctx), | 367 | .cra_ctxsize = sizeof(struct cast5_ctx), |
366 | .cra_alignmask = 0, | 368 | .cra_alignmask = 0, |
@@ -379,7 +381,8 @@ static struct crypto_alg cast5_algs[6] = { { | |||
379 | .cra_name = "__ctr-cast5-avx", | 381 | .cra_name = "__ctr-cast5-avx", |
380 | .cra_driver_name = "__driver-ctr-cast5-avx", | 382 | .cra_driver_name = "__driver-ctr-cast5-avx", |
381 | .cra_priority = 0, | 383 | .cra_priority = 0, |
382 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 384 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
385 | CRYPTO_ALG_INTERNAL, | ||
383 | .cra_blocksize = 1, | 386 | .cra_blocksize = 1, |
384 | .cra_ctxsize = sizeof(struct cast5_ctx), | 387 | .cra_ctxsize = sizeof(struct cast5_ctx), |
385 | .cra_alignmask = 0, | 388 | .cra_alignmask = 0, |
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c index 0160f68a57ff..f448810ca4ac 100644 --- a/arch/x86/crypto/cast6_avx_glue.c +++ b/arch/x86/crypto/cast6_avx_glue.c | |||
@@ -372,7 +372,8 @@ static struct crypto_alg cast6_algs[10] = { { | |||
372 | .cra_name = "__ecb-cast6-avx", | 372 | .cra_name = "__ecb-cast6-avx", |
373 | .cra_driver_name = "__driver-ecb-cast6-avx", | 373 | .cra_driver_name = "__driver-ecb-cast6-avx", |
374 | .cra_priority = 0, | 374 | .cra_priority = 0, |
375 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 375 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
376 | CRYPTO_ALG_INTERNAL, | ||
376 | .cra_blocksize = CAST6_BLOCK_SIZE, | 377 | .cra_blocksize = CAST6_BLOCK_SIZE, |
377 | .cra_ctxsize = sizeof(struct cast6_ctx), | 378 | .cra_ctxsize = sizeof(struct cast6_ctx), |
378 | .cra_alignmask = 0, | 379 | .cra_alignmask = 0, |
@@ -391,7 +392,8 @@ static struct crypto_alg cast6_algs[10] = { { | |||
391 | .cra_name = "__cbc-cast6-avx", | 392 | .cra_name = "__cbc-cast6-avx", |
392 | .cra_driver_name = "__driver-cbc-cast6-avx", | 393 | .cra_driver_name = "__driver-cbc-cast6-avx", |
393 | .cra_priority = 0, | 394 | .cra_priority = 0, |
394 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 395 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
396 | CRYPTO_ALG_INTERNAL, | ||
395 | .cra_blocksize = CAST6_BLOCK_SIZE, | 397 | .cra_blocksize = CAST6_BLOCK_SIZE, |
396 | .cra_ctxsize = sizeof(struct cast6_ctx), | 398 | .cra_ctxsize = sizeof(struct cast6_ctx), |
397 | .cra_alignmask = 0, | 399 | .cra_alignmask = 0, |
@@ -410,7 +412,8 @@ static struct crypto_alg cast6_algs[10] = { { | |||
410 | .cra_name = "__ctr-cast6-avx", | 412 | .cra_name = "__ctr-cast6-avx", |
411 | .cra_driver_name = "__driver-ctr-cast6-avx", | 413 | .cra_driver_name = "__driver-ctr-cast6-avx", |
412 | .cra_priority = 0, | 414 | .cra_priority = 0, |
413 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 415 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
416 | CRYPTO_ALG_INTERNAL, | ||
414 | .cra_blocksize = 1, | 417 | .cra_blocksize = 1, |
415 | .cra_ctxsize = sizeof(struct cast6_ctx), | 418 | .cra_ctxsize = sizeof(struct cast6_ctx), |
416 | .cra_alignmask = 0, | 419 | .cra_alignmask = 0, |
@@ -430,7 +433,8 @@ static struct crypto_alg cast6_algs[10] = { { | |||
430 | .cra_name = "__lrw-cast6-avx", | 433 | .cra_name = "__lrw-cast6-avx", |
431 | .cra_driver_name = "__driver-lrw-cast6-avx", | 434 | .cra_driver_name = "__driver-lrw-cast6-avx", |
432 | .cra_priority = 0, | 435 | .cra_priority = 0, |
433 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 436 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
437 | CRYPTO_ALG_INTERNAL, | ||
434 | .cra_blocksize = CAST6_BLOCK_SIZE, | 438 | .cra_blocksize = CAST6_BLOCK_SIZE, |
435 | .cra_ctxsize = sizeof(struct cast6_lrw_ctx), | 439 | .cra_ctxsize = sizeof(struct cast6_lrw_ctx), |
436 | .cra_alignmask = 0, | 440 | .cra_alignmask = 0, |
@@ -453,7 +457,8 @@ static struct crypto_alg cast6_algs[10] = { { | |||
453 | .cra_name = "__xts-cast6-avx", | 457 | .cra_name = "__xts-cast6-avx", |
454 | .cra_driver_name = "__driver-xts-cast6-avx", | 458 | .cra_driver_name = "__driver-xts-cast6-avx", |
455 | .cra_priority = 0, | 459 | .cra_priority = 0, |
456 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 460 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
461 | CRYPTO_ALG_INTERNAL, | ||
457 | .cra_blocksize = CAST6_BLOCK_SIZE, | 462 | .cra_blocksize = CAST6_BLOCK_SIZE, |
458 | .cra_ctxsize = sizeof(struct cast6_xts_ctx), | 463 | .cra_ctxsize = sizeof(struct cast6_xts_ctx), |
459 | .cra_alignmask = 0, | 464 | .cra_alignmask = 0, |
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S index 26d49ebae040..225be06edc80 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S | |||
@@ -178,7 +178,7 @@ continue_block: | |||
178 | ## 2a) PROCESS FULL BLOCKS: | 178 | ## 2a) PROCESS FULL BLOCKS: |
179 | ################################################################ | 179 | ################################################################ |
180 | full_block: | 180 | full_block: |
181 | movq $128,%rax | 181 | movl $128,%eax |
182 | lea 128*8*2(block_0), block_1 | 182 | lea 128*8*2(block_0), block_1 |
183 | lea 128*8*3(block_0), block_2 | 183 | lea 128*8*3(block_0), block_2 |
184 | add $128*8*1, block_0 | 184 | add $128*8*1, block_0 |
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index 8253d85aa165..2079baf06bdd 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c | |||
@@ -154,7 +154,8 @@ static struct shash_alg ghash_alg = { | |||
154 | .cra_name = "__ghash", | 154 | .cra_name = "__ghash", |
155 | .cra_driver_name = "__ghash-pclmulqdqni", | 155 | .cra_driver_name = "__ghash-pclmulqdqni", |
156 | .cra_priority = 0, | 156 | .cra_priority = 0, |
157 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | 157 | .cra_flags = CRYPTO_ALG_TYPE_SHASH | |
158 | CRYPTO_ALG_INTERNAL, | ||
158 | .cra_blocksize = GHASH_BLOCK_SIZE, | 159 | .cra_blocksize = GHASH_BLOCK_SIZE, |
159 | .cra_ctxsize = sizeof(struct ghash_ctx), | 160 | .cra_ctxsize = sizeof(struct ghash_ctx), |
160 | .cra_module = THIS_MODULE, | 161 | .cra_module = THIS_MODULE, |
@@ -261,7 +262,9 @@ static int ghash_async_init_tfm(struct crypto_tfm *tfm) | |||
261 | struct cryptd_ahash *cryptd_tfm; | 262 | struct cryptd_ahash *cryptd_tfm; |
262 | struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); | 263 | struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); |
263 | 264 | ||
264 | cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni", 0, 0); | 265 | cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni", |
266 | CRYPTO_ALG_INTERNAL, | ||
267 | CRYPTO_ALG_INTERNAL); | ||
265 | if (IS_ERR(cryptd_tfm)) | 268 | if (IS_ERR(cryptd_tfm)) |
266 | return PTR_ERR(cryptd_tfm); | 269 | return PTR_ERR(cryptd_tfm); |
267 | ctx->cryptd_tfm = cryptd_tfm; | 270 | ctx->cryptd_tfm = cryptd_tfm; |
diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c index 432f1d76ceb8..6a85598931b5 100644 --- a/arch/x86/crypto/glue_helper.c +++ b/arch/x86/crypto/glue_helper.c | |||
@@ -232,7 +232,6 @@ static void glue_ctr_crypt_final_128bit(const common_glue_ctr_func_t fn_ctr, | |||
232 | 232 | ||
233 | le128_to_be128((be128 *)walk->iv, &ctrblk); | 233 | le128_to_be128((be128 *)walk->iv, &ctrblk); |
234 | } | 234 | } |
235 | EXPORT_SYMBOL_GPL(glue_ctr_crypt_final_128bit); | ||
236 | 235 | ||
237 | static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, | 236 | static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, |
238 | struct blkcipher_desc *desc, | 237 | struct blkcipher_desc *desc, |
diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c index 437e47a4d302..2f63dc89e7a9 100644 --- a/arch/x86/crypto/serpent_avx2_glue.c +++ b/arch/x86/crypto/serpent_avx2_glue.c | |||
@@ -309,7 +309,8 @@ static struct crypto_alg srp_algs[10] = { { | |||
309 | .cra_name = "__ecb-serpent-avx2", | 309 | .cra_name = "__ecb-serpent-avx2", |
310 | .cra_driver_name = "__driver-ecb-serpent-avx2", | 310 | .cra_driver_name = "__driver-ecb-serpent-avx2", |
311 | .cra_priority = 0, | 311 | .cra_priority = 0, |
312 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 312 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
313 | CRYPTO_ALG_INTERNAL, | ||
313 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 314 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
314 | .cra_ctxsize = sizeof(struct serpent_ctx), | 315 | .cra_ctxsize = sizeof(struct serpent_ctx), |
315 | .cra_alignmask = 0, | 316 | .cra_alignmask = 0, |
@@ -329,7 +330,8 @@ static struct crypto_alg srp_algs[10] = { { | |||
329 | .cra_name = "__cbc-serpent-avx2", | 330 | .cra_name = "__cbc-serpent-avx2", |
330 | .cra_driver_name = "__driver-cbc-serpent-avx2", | 331 | .cra_driver_name = "__driver-cbc-serpent-avx2", |
331 | .cra_priority = 0, | 332 | .cra_priority = 0, |
332 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 333 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
334 | CRYPTO_ALG_INTERNAL, | ||
333 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 335 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
334 | .cra_ctxsize = sizeof(struct serpent_ctx), | 336 | .cra_ctxsize = sizeof(struct serpent_ctx), |
335 | .cra_alignmask = 0, | 337 | .cra_alignmask = 0, |
@@ -349,7 +351,8 @@ static struct crypto_alg srp_algs[10] = { { | |||
349 | .cra_name = "__ctr-serpent-avx2", | 351 | .cra_name = "__ctr-serpent-avx2", |
350 | .cra_driver_name = "__driver-ctr-serpent-avx2", | 352 | .cra_driver_name = "__driver-ctr-serpent-avx2", |
351 | .cra_priority = 0, | 353 | .cra_priority = 0, |
352 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 354 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
355 | CRYPTO_ALG_INTERNAL, | ||
353 | .cra_blocksize = 1, | 356 | .cra_blocksize = 1, |
354 | .cra_ctxsize = sizeof(struct serpent_ctx), | 357 | .cra_ctxsize = sizeof(struct serpent_ctx), |
355 | .cra_alignmask = 0, | 358 | .cra_alignmask = 0, |
@@ -370,7 +373,8 @@ static struct crypto_alg srp_algs[10] = { { | |||
370 | .cra_name = "__lrw-serpent-avx2", | 373 | .cra_name = "__lrw-serpent-avx2", |
371 | .cra_driver_name = "__driver-lrw-serpent-avx2", | 374 | .cra_driver_name = "__driver-lrw-serpent-avx2", |
372 | .cra_priority = 0, | 375 | .cra_priority = 0, |
373 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 376 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
377 | CRYPTO_ALG_INTERNAL, | ||
374 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 378 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
375 | .cra_ctxsize = sizeof(struct serpent_lrw_ctx), | 379 | .cra_ctxsize = sizeof(struct serpent_lrw_ctx), |
376 | .cra_alignmask = 0, | 380 | .cra_alignmask = 0, |
@@ -394,7 +398,8 @@ static struct crypto_alg srp_algs[10] = { { | |||
394 | .cra_name = "__xts-serpent-avx2", | 398 | .cra_name = "__xts-serpent-avx2", |
395 | .cra_driver_name = "__driver-xts-serpent-avx2", | 399 | .cra_driver_name = "__driver-xts-serpent-avx2", |
396 | .cra_priority = 0, | 400 | .cra_priority = 0, |
397 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 401 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
402 | CRYPTO_ALG_INTERNAL, | ||
398 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 403 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
399 | .cra_ctxsize = sizeof(struct serpent_xts_ctx), | 404 | .cra_ctxsize = sizeof(struct serpent_xts_ctx), |
400 | .cra_alignmask = 0, | 405 | .cra_alignmask = 0, |
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index 7e217398b4eb..c8d478af8456 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c | |||
@@ -378,7 +378,8 @@ static struct crypto_alg serpent_algs[10] = { { | |||
378 | .cra_name = "__ecb-serpent-avx", | 378 | .cra_name = "__ecb-serpent-avx", |
379 | .cra_driver_name = "__driver-ecb-serpent-avx", | 379 | .cra_driver_name = "__driver-ecb-serpent-avx", |
380 | .cra_priority = 0, | 380 | .cra_priority = 0, |
381 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 381 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
382 | CRYPTO_ALG_INTERNAL, | ||
382 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 383 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
383 | .cra_ctxsize = sizeof(struct serpent_ctx), | 384 | .cra_ctxsize = sizeof(struct serpent_ctx), |
384 | .cra_alignmask = 0, | 385 | .cra_alignmask = 0, |
@@ -397,7 +398,8 @@ static struct crypto_alg serpent_algs[10] = { { | |||
397 | .cra_name = "__cbc-serpent-avx", | 398 | .cra_name = "__cbc-serpent-avx", |
398 | .cra_driver_name = "__driver-cbc-serpent-avx", | 399 | .cra_driver_name = "__driver-cbc-serpent-avx", |
399 | .cra_priority = 0, | 400 | .cra_priority = 0, |
400 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 401 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
402 | CRYPTO_ALG_INTERNAL, | ||
401 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 403 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
402 | .cra_ctxsize = sizeof(struct serpent_ctx), | 404 | .cra_ctxsize = sizeof(struct serpent_ctx), |
403 | .cra_alignmask = 0, | 405 | .cra_alignmask = 0, |
@@ -416,7 +418,8 @@ static struct crypto_alg serpent_algs[10] = { { | |||
416 | .cra_name = "__ctr-serpent-avx", | 418 | .cra_name = "__ctr-serpent-avx", |
417 | .cra_driver_name = "__driver-ctr-serpent-avx", | 419 | .cra_driver_name = "__driver-ctr-serpent-avx", |
418 | .cra_priority = 0, | 420 | .cra_priority = 0, |
419 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 421 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
422 | CRYPTO_ALG_INTERNAL, | ||
420 | .cra_blocksize = 1, | 423 | .cra_blocksize = 1, |
421 | .cra_ctxsize = sizeof(struct serpent_ctx), | 424 | .cra_ctxsize = sizeof(struct serpent_ctx), |
422 | .cra_alignmask = 0, | 425 | .cra_alignmask = 0, |
@@ -436,7 +439,8 @@ static struct crypto_alg serpent_algs[10] = { { | |||
436 | .cra_name = "__lrw-serpent-avx", | 439 | .cra_name = "__lrw-serpent-avx", |
437 | .cra_driver_name = "__driver-lrw-serpent-avx", | 440 | .cra_driver_name = "__driver-lrw-serpent-avx", |
438 | .cra_priority = 0, | 441 | .cra_priority = 0, |
439 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 442 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
443 | CRYPTO_ALG_INTERNAL, | ||
440 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 444 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
441 | .cra_ctxsize = sizeof(struct serpent_lrw_ctx), | 445 | .cra_ctxsize = sizeof(struct serpent_lrw_ctx), |
442 | .cra_alignmask = 0, | 446 | .cra_alignmask = 0, |
@@ -459,7 +463,8 @@ static struct crypto_alg serpent_algs[10] = { { | |||
459 | .cra_name = "__xts-serpent-avx", | 463 | .cra_name = "__xts-serpent-avx", |
460 | .cra_driver_name = "__driver-xts-serpent-avx", | 464 | .cra_driver_name = "__driver-xts-serpent-avx", |
461 | .cra_priority = 0, | 465 | .cra_priority = 0, |
462 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 466 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
467 | CRYPTO_ALG_INTERNAL, | ||
463 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 468 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
464 | .cra_ctxsize = sizeof(struct serpent_xts_ctx), | 469 | .cra_ctxsize = sizeof(struct serpent_xts_ctx), |
465 | .cra_alignmask = 0, | 470 | .cra_alignmask = 0, |
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c index bf025adaea01..3643dd508f45 100644 --- a/arch/x86/crypto/serpent_sse2_glue.c +++ b/arch/x86/crypto/serpent_sse2_glue.c | |||
@@ -387,7 +387,8 @@ static struct crypto_alg serpent_algs[10] = { { | |||
387 | .cra_name = "__ecb-serpent-sse2", | 387 | .cra_name = "__ecb-serpent-sse2", |
388 | .cra_driver_name = "__driver-ecb-serpent-sse2", | 388 | .cra_driver_name = "__driver-ecb-serpent-sse2", |
389 | .cra_priority = 0, | 389 | .cra_priority = 0, |
390 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 390 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
391 | CRYPTO_ALG_INTERNAL, | ||
391 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 392 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
392 | .cra_ctxsize = sizeof(struct serpent_ctx), | 393 | .cra_ctxsize = sizeof(struct serpent_ctx), |
393 | .cra_alignmask = 0, | 394 | .cra_alignmask = 0, |
@@ -406,7 +407,8 @@ static struct crypto_alg serpent_algs[10] = { { | |||
406 | .cra_name = "__cbc-serpent-sse2", | 407 | .cra_name = "__cbc-serpent-sse2", |
407 | .cra_driver_name = "__driver-cbc-serpent-sse2", | 408 | .cra_driver_name = "__driver-cbc-serpent-sse2", |
408 | .cra_priority = 0, | 409 | .cra_priority = 0, |
409 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 410 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
411 | CRYPTO_ALG_INTERNAL, | ||
410 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 412 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
411 | .cra_ctxsize = sizeof(struct serpent_ctx), | 413 | .cra_ctxsize = sizeof(struct serpent_ctx), |
412 | .cra_alignmask = 0, | 414 | .cra_alignmask = 0, |
@@ -425,7 +427,8 @@ static struct crypto_alg serpent_algs[10] = { { | |||
425 | .cra_name = "__ctr-serpent-sse2", | 427 | .cra_name = "__ctr-serpent-sse2", |
426 | .cra_driver_name = "__driver-ctr-serpent-sse2", | 428 | .cra_driver_name = "__driver-ctr-serpent-sse2", |
427 | .cra_priority = 0, | 429 | .cra_priority = 0, |
428 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 430 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
431 | CRYPTO_ALG_INTERNAL, | ||
429 | .cra_blocksize = 1, | 432 | .cra_blocksize = 1, |
430 | .cra_ctxsize = sizeof(struct serpent_ctx), | 433 | .cra_ctxsize = sizeof(struct serpent_ctx), |
431 | .cra_alignmask = 0, | 434 | .cra_alignmask = 0, |
@@ -445,7 +448,8 @@ static struct crypto_alg serpent_algs[10] = { { | |||
445 | .cra_name = "__lrw-serpent-sse2", | 448 | .cra_name = "__lrw-serpent-sse2", |
446 | .cra_driver_name = "__driver-lrw-serpent-sse2", | 449 | .cra_driver_name = "__driver-lrw-serpent-sse2", |
447 | .cra_priority = 0, | 450 | .cra_priority = 0, |
448 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 451 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
452 | CRYPTO_ALG_INTERNAL, | ||
449 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 453 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
450 | .cra_ctxsize = sizeof(struct serpent_lrw_ctx), | 454 | .cra_ctxsize = sizeof(struct serpent_lrw_ctx), |
451 | .cra_alignmask = 0, | 455 | .cra_alignmask = 0, |
@@ -468,7 +472,8 @@ static struct crypto_alg serpent_algs[10] = { { | |||
468 | .cra_name = "__xts-serpent-sse2", | 472 | .cra_name = "__xts-serpent-sse2", |
469 | .cra_driver_name = "__driver-xts-serpent-sse2", | 473 | .cra_driver_name = "__driver-xts-serpent-sse2", |
470 | .cra_priority = 0, | 474 | .cra_priority = 0, |
471 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 475 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
476 | CRYPTO_ALG_INTERNAL, | ||
472 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 477 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
473 | .cra_ctxsize = sizeof(struct serpent_xts_ctx), | 478 | .cra_ctxsize = sizeof(struct serpent_xts_ctx), |
474 | .cra_alignmask = 0, | 479 | .cra_alignmask = 0, |
diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c index fd9f6b035b16..e510b1c5d690 100644 --- a/arch/x86/crypto/sha-mb/sha1_mb.c +++ b/arch/x86/crypto/sha-mb/sha1_mb.c | |||
@@ -694,7 +694,8 @@ static struct shash_alg sha1_mb_shash_alg = { | |||
694 | * use ASYNC flag as some buffers in multi-buffer | 694 | * use ASYNC flag as some buffers in multi-buffer |
695 | * algo may not have completed before hashing thread sleep | 695 | * algo may not have completed before hashing thread sleep |
696 | */ | 696 | */ |
697 | .cra_flags = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_ASYNC, | 697 | .cra_flags = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_ASYNC | |
698 | CRYPTO_ALG_INTERNAL, | ||
698 | .cra_blocksize = SHA1_BLOCK_SIZE, | 699 | .cra_blocksize = SHA1_BLOCK_SIZE, |
699 | .cra_module = THIS_MODULE, | 700 | .cra_module = THIS_MODULE, |
700 | .cra_list = LIST_HEAD_INIT(sha1_mb_shash_alg.base.cra_list), | 701 | .cra_list = LIST_HEAD_INIT(sha1_mb_shash_alg.base.cra_list), |
@@ -770,7 +771,9 @@ static int sha1_mb_async_init_tfm(struct crypto_tfm *tfm) | |||
770 | struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm); | 771 | struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm); |
771 | struct mcryptd_hash_ctx *mctx; | 772 | struct mcryptd_hash_ctx *mctx; |
772 | 773 | ||
773 | mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha1-mb", 0, 0); | 774 | mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha1-mb", |
775 | CRYPTO_ALG_INTERNAL, | ||
776 | CRYPTO_ALG_INTERNAL); | ||
774 | if (IS_ERR(mcryptd_tfm)) | 777 | if (IS_ERR(mcryptd_tfm)) |
775 | return PTR_ERR(mcryptd_tfm); | 778 | return PTR_ERR(mcryptd_tfm); |
776 | mctx = crypto_ahash_ctx(&mcryptd_tfm->base); | 779 | mctx = crypto_ahash_ctx(&mcryptd_tfm->base); |
@@ -828,7 +831,7 @@ static unsigned long sha1_mb_flusher(struct mcryptd_alg_cstate *cstate) | |||
828 | while (!list_empty(&cstate->work_list)) { | 831 | while (!list_empty(&cstate->work_list)) { |
829 | rctx = list_entry(cstate->work_list.next, | 832 | rctx = list_entry(cstate->work_list.next, |
830 | struct mcryptd_hash_request_ctx, waiter); | 833 | struct mcryptd_hash_request_ctx, waiter); |
831 | if time_before(cur_time, rctx->tag.expire) | 834 | if (time_before(cur_time, rctx->tag.expire)) |
832 | break; | 835 | break; |
833 | kernel_fpu_begin(); | 836 | kernel_fpu_begin(); |
834 | sha_ctx = (struct sha1_hash_ctx *) sha1_ctx_mgr_flush(cstate->mgr); | 837 | sha_ctx = (struct sha1_hash_ctx *) sha1_ctx_mgr_flush(cstate->mgr); |
diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c b/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c index 4ca7e166a2aa..822acb5b464c 100644 --- a/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c +++ b/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c | |||
@@ -56,7 +56,7 @@ | |||
56 | void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state) | 56 | void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state) |
57 | { | 57 | { |
58 | unsigned int j; | 58 | unsigned int j; |
59 | state->unused_lanes = 0xF76543210; | 59 | state->unused_lanes = 0xF76543210ULL; |
60 | for (j = 0; j < 8; j++) { | 60 | for (j = 0; j < 8; j++) { |
61 | state->lens[j] = 0xFFFFFFFF; | 61 | state->lens[j] = 0xFFFFFFFF; |
62 | state->ldata[j].job_in_lane = NULL; | 62 | state->ldata[j].job_in_lane = NULL; |
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index 6c20fe04a738..33d1b9dc14cc 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c | |||
@@ -28,7 +28,7 @@ | |||
28 | #include <linux/cryptohash.h> | 28 | #include <linux/cryptohash.h> |
29 | #include <linux/types.h> | 29 | #include <linux/types.h> |
30 | #include <crypto/sha.h> | 30 | #include <crypto/sha.h> |
31 | #include <asm/byteorder.h> | 31 | #include <crypto/sha1_base.h> |
32 | #include <asm/i387.h> | 32 | #include <asm/i387.h> |
33 | #include <asm/xcr.h> | 33 | #include <asm/xcr.h> |
34 | #include <asm/xsave.h> | 34 | #include <asm/xsave.h> |
@@ -44,132 +44,51 @@ asmlinkage void sha1_transform_avx(u32 *digest, const char *data, | |||
44 | #define SHA1_AVX2_BLOCK_OPTSIZE 4 /* optimal 4*64 bytes of SHA1 blocks */ | 44 | #define SHA1_AVX2_BLOCK_OPTSIZE 4 /* optimal 4*64 bytes of SHA1 blocks */ |
45 | 45 | ||
46 | asmlinkage void sha1_transform_avx2(u32 *digest, const char *data, | 46 | asmlinkage void sha1_transform_avx2(u32 *digest, const char *data, |
47 | unsigned int rounds); | 47 | unsigned int rounds); |
48 | #endif | 48 | #endif |
49 | 49 | ||
50 | static asmlinkage void (*sha1_transform_asm)(u32 *, const char *, unsigned int); | 50 | static void (*sha1_transform_asm)(u32 *, const char *, unsigned int); |
51 | |||
52 | |||
53 | static int sha1_ssse3_init(struct shash_desc *desc) | ||
54 | { | ||
55 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
56 | |||
57 | *sctx = (struct sha1_state){ | ||
58 | .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, | ||
59 | }; | ||
60 | |||
61 | return 0; | ||
62 | } | ||
63 | |||
64 | static int __sha1_ssse3_update(struct shash_desc *desc, const u8 *data, | ||
65 | unsigned int len, unsigned int partial) | ||
66 | { | ||
67 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
68 | unsigned int done = 0; | ||
69 | |||
70 | sctx->count += len; | ||
71 | |||
72 | if (partial) { | ||
73 | done = SHA1_BLOCK_SIZE - partial; | ||
74 | memcpy(sctx->buffer + partial, data, done); | ||
75 | sha1_transform_asm(sctx->state, sctx->buffer, 1); | ||
76 | } | ||
77 | |||
78 | if (len - done >= SHA1_BLOCK_SIZE) { | ||
79 | const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE; | ||
80 | |||
81 | sha1_transform_asm(sctx->state, data + done, rounds); | ||
82 | done += rounds * SHA1_BLOCK_SIZE; | ||
83 | } | ||
84 | |||
85 | memcpy(sctx->buffer, data + done, len - done); | ||
86 | |||
87 | return 0; | ||
88 | } | ||
89 | 51 | ||
90 | static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data, | 52 | static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data, |
91 | unsigned int len) | 53 | unsigned int len) |
92 | { | 54 | { |
93 | struct sha1_state *sctx = shash_desc_ctx(desc); | 55 | struct sha1_state *sctx = shash_desc_ctx(desc); |
94 | unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; | ||
95 | int res; | ||
96 | 56 | ||
97 | /* Handle the fast case right here */ | 57 | if (!irq_fpu_usable() || |
98 | if (partial + len < SHA1_BLOCK_SIZE) { | 58 | (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE) |
99 | sctx->count += len; | 59 | return crypto_sha1_update(desc, data, len); |
100 | memcpy(sctx->buffer + partial, data, len); | ||
101 | 60 | ||
102 | return 0; | 61 | /* make sure casting to sha1_block_fn() is safe */ |
103 | } | 62 | BUILD_BUG_ON(offsetof(struct sha1_state, state) != 0); |
104 | 63 | ||
105 | if (!irq_fpu_usable()) { | 64 | kernel_fpu_begin(); |
106 | res = crypto_sha1_update(desc, data, len); | 65 | sha1_base_do_update(desc, data, len, |
107 | } else { | 66 | (sha1_block_fn *)sha1_transform_asm); |
108 | kernel_fpu_begin(); | 67 | kernel_fpu_end(); |
109 | res = __sha1_ssse3_update(desc, data, len, partial); | ||
110 | kernel_fpu_end(); | ||
111 | } | ||
112 | |||
113 | return res; | ||
114 | } | ||
115 | |||
116 | |||
117 | /* Add padding and return the message digest. */ | ||
118 | static int sha1_ssse3_final(struct shash_desc *desc, u8 *out) | ||
119 | { | ||
120 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
121 | unsigned int i, index, padlen; | ||
122 | __be32 *dst = (__be32 *)out; | ||
123 | __be64 bits; | ||
124 | static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; | ||
125 | |||
126 | bits = cpu_to_be64(sctx->count << 3); | ||
127 | |||
128 | /* Pad out to 56 mod 64 and append length */ | ||
129 | index = sctx->count % SHA1_BLOCK_SIZE; | ||
130 | padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index); | ||
131 | if (!irq_fpu_usable()) { | ||
132 | crypto_sha1_update(desc, padding, padlen); | ||
133 | crypto_sha1_update(desc, (const u8 *)&bits, sizeof(bits)); | ||
134 | } else { | ||
135 | kernel_fpu_begin(); | ||
136 | /* We need to fill a whole block for __sha1_ssse3_update() */ | ||
137 | if (padlen <= 56) { | ||
138 | sctx->count += padlen; | ||
139 | memcpy(sctx->buffer + index, padding, padlen); | ||
140 | } else { | ||
141 | __sha1_ssse3_update(desc, padding, padlen, index); | ||
142 | } | ||
143 | __sha1_ssse3_update(desc, (const u8 *)&bits, sizeof(bits), 56); | ||
144 | kernel_fpu_end(); | ||
145 | } | ||
146 | |||
147 | /* Store state in digest */ | ||
148 | for (i = 0; i < 5; i++) | ||
149 | dst[i] = cpu_to_be32(sctx->state[i]); | ||
150 | |||
151 | /* Wipe context */ | ||
152 | memset(sctx, 0, sizeof(*sctx)); | ||
153 | 68 | ||
154 | return 0; | 69 | return 0; |
155 | } | 70 | } |
156 | 71 | ||
157 | static int sha1_ssse3_export(struct shash_desc *desc, void *out) | 72 | static int sha1_ssse3_finup(struct shash_desc *desc, const u8 *data, |
73 | unsigned int len, u8 *out) | ||
158 | { | 74 | { |
159 | struct sha1_state *sctx = shash_desc_ctx(desc); | 75 | if (!irq_fpu_usable()) |
76 | return crypto_sha1_finup(desc, data, len, out); | ||
160 | 77 | ||
161 | memcpy(out, sctx, sizeof(*sctx)); | 78 | kernel_fpu_begin(); |
79 | if (len) | ||
80 | sha1_base_do_update(desc, data, len, | ||
81 | (sha1_block_fn *)sha1_transform_asm); | ||
82 | sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_transform_asm); | ||
83 | kernel_fpu_end(); | ||
162 | 84 | ||
163 | return 0; | 85 | return sha1_base_finish(desc, out); |
164 | } | 86 | } |
165 | 87 | ||
166 | static int sha1_ssse3_import(struct shash_desc *desc, const void *in) | 88 | /* Add padding and return the message digest. */ |
89 | static int sha1_ssse3_final(struct shash_desc *desc, u8 *out) | ||
167 | { | 90 | { |
168 | struct sha1_state *sctx = shash_desc_ctx(desc); | 91 | return sha1_ssse3_finup(desc, NULL, 0, out); |
169 | |||
170 | memcpy(sctx, in, sizeof(*sctx)); | ||
171 | |||
172 | return 0; | ||
173 | } | 92 | } |
174 | 93 | ||
175 | #ifdef CONFIG_AS_AVX2 | 94 | #ifdef CONFIG_AS_AVX2 |
@@ -186,13 +105,11 @@ static void sha1_apply_transform_avx2(u32 *digest, const char *data, | |||
186 | 105 | ||
187 | static struct shash_alg alg = { | 106 | static struct shash_alg alg = { |
188 | .digestsize = SHA1_DIGEST_SIZE, | 107 | .digestsize = SHA1_DIGEST_SIZE, |
189 | .init = sha1_ssse3_init, | 108 | .init = sha1_base_init, |
190 | .update = sha1_ssse3_update, | 109 | .update = sha1_ssse3_update, |
191 | .final = sha1_ssse3_final, | 110 | .final = sha1_ssse3_final, |
192 | .export = sha1_ssse3_export, | 111 | .finup = sha1_ssse3_finup, |
193 | .import = sha1_ssse3_import, | ||
194 | .descsize = sizeof(struct sha1_state), | 112 | .descsize = sizeof(struct sha1_state), |
195 | .statesize = sizeof(struct sha1_state), | ||
196 | .base = { | 113 | .base = { |
197 | .cra_name = "sha1", | 114 | .cra_name = "sha1", |
198 | .cra_driver_name= "sha1-ssse3", | 115 | .cra_driver_name= "sha1-ssse3", |
diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S index 642f15687a0a..92b3b5d75ba9 100644 --- a/arch/x86/crypto/sha256-avx-asm.S +++ b/arch/x86/crypto/sha256-avx-asm.S | |||
@@ -96,10 +96,10 @@ SHUF_DC00 = %xmm12 # shuffle xDxC -> DC00 | |||
96 | BYTE_FLIP_MASK = %xmm13 | 96 | BYTE_FLIP_MASK = %xmm13 |
97 | 97 | ||
98 | NUM_BLKS = %rdx # 3rd arg | 98 | NUM_BLKS = %rdx # 3rd arg |
99 | CTX = %rsi # 2nd arg | 99 | INP = %rsi # 2nd arg |
100 | INP = %rdi # 1st arg | 100 | CTX = %rdi # 1st arg |
101 | 101 | ||
102 | SRND = %rdi # clobbers INP | 102 | SRND = %rsi # clobbers INP |
103 | c = %ecx | 103 | c = %ecx |
104 | d = %r8d | 104 | d = %r8d |
105 | e = %edx | 105 | e = %edx |
@@ -342,8 +342,8 @@ a = TMP_ | |||
342 | 342 | ||
343 | ######################################################################## | 343 | ######################################################################## |
344 | ## void sha256_transform_avx(void *input_data, UINT32 digest[8], UINT64 num_blks) | 344 | ## void sha256_transform_avx(void *input_data, UINT32 digest[8], UINT64 num_blks) |
345 | ## arg 1 : pointer to input data | 345 | ## arg 1 : pointer to digest |
346 | ## arg 2 : pointer to digest | 346 | ## arg 2 : pointer to input data |
347 | ## arg 3 : Num blocks | 347 | ## arg 3 : Num blocks |
348 | ######################################################################## | 348 | ######################################################################## |
349 | .text | 349 | .text |
diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S index 9e86944c539d..570ec5ec62d7 100644 --- a/arch/x86/crypto/sha256-avx2-asm.S +++ b/arch/x86/crypto/sha256-avx2-asm.S | |||
@@ -91,12 +91,12 @@ BYTE_FLIP_MASK = %ymm13 | |||
91 | X_BYTE_FLIP_MASK = %xmm13 # XMM version of BYTE_FLIP_MASK | 91 | X_BYTE_FLIP_MASK = %xmm13 # XMM version of BYTE_FLIP_MASK |
92 | 92 | ||
93 | NUM_BLKS = %rdx # 3rd arg | 93 | NUM_BLKS = %rdx # 3rd arg |
94 | CTX = %rsi # 2nd arg | 94 | INP = %rsi # 2nd arg |
95 | INP = %rdi # 1st arg | 95 | CTX = %rdi # 1st arg |
96 | c = %ecx | 96 | c = %ecx |
97 | d = %r8d | 97 | d = %r8d |
98 | e = %edx # clobbers NUM_BLKS | 98 | e = %edx # clobbers NUM_BLKS |
99 | y3 = %edi # clobbers INP | 99 | y3 = %esi # clobbers INP |
100 | 100 | ||
101 | 101 | ||
102 | TBL = %rbp | 102 | TBL = %rbp |
@@ -523,8 +523,8 @@ STACK_SIZE = _RSP + _RSP_SIZE | |||
523 | 523 | ||
524 | ######################################################################## | 524 | ######################################################################## |
525 | ## void sha256_transform_rorx(void *input_data, UINT32 digest[8], UINT64 num_blks) | 525 | ## void sha256_transform_rorx(void *input_data, UINT32 digest[8], UINT64 num_blks) |
526 | ## arg 1 : pointer to input data | 526 | ## arg 1 : pointer to digest |
527 | ## arg 2 : pointer to digest | 527 | ## arg 2 : pointer to input data |
528 | ## arg 3 : Num blocks | 528 | ## arg 3 : Num blocks |
529 | ######################################################################## | 529 | ######################################################################## |
530 | .text | 530 | .text |
diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S index f833b74d902b..2cedc44e8121 100644 --- a/arch/x86/crypto/sha256-ssse3-asm.S +++ b/arch/x86/crypto/sha256-ssse3-asm.S | |||
@@ -88,10 +88,10 @@ SHUF_DC00 = %xmm11 # shuffle xDxC -> DC00 | |||
88 | BYTE_FLIP_MASK = %xmm12 | 88 | BYTE_FLIP_MASK = %xmm12 |
89 | 89 | ||
90 | NUM_BLKS = %rdx # 3rd arg | 90 | NUM_BLKS = %rdx # 3rd arg |
91 | CTX = %rsi # 2nd arg | 91 | INP = %rsi # 2nd arg |
92 | INP = %rdi # 1st arg | 92 | CTX = %rdi # 1st arg |
93 | 93 | ||
94 | SRND = %rdi # clobbers INP | 94 | SRND = %rsi # clobbers INP |
95 | c = %ecx | 95 | c = %ecx |
96 | d = %r8d | 96 | d = %r8d |
97 | e = %edx | 97 | e = %edx |
@@ -348,8 +348,8 @@ a = TMP_ | |||
348 | 348 | ||
349 | ######################################################################## | 349 | ######################################################################## |
350 | ## void sha256_transform_ssse3(void *input_data, UINT32 digest[8], UINT64 num_blks) | 350 | ## void sha256_transform_ssse3(void *input_data, UINT32 digest[8], UINT64 num_blks) |
351 | ## arg 1 : pointer to input data | 351 | ## arg 1 : pointer to digest |
352 | ## arg 2 : pointer to digest | 352 | ## arg 2 : pointer to input data |
353 | ## arg 3 : Num blocks | 353 | ## arg 3 : Num blocks |
354 | ######################################################################## | 354 | ######################################################################## |
355 | .text | 355 | .text |
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index 8fad72f4dfd2..ccc338881ee8 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c | |||
@@ -36,195 +36,74 @@ | |||
36 | #include <linux/cryptohash.h> | 36 | #include <linux/cryptohash.h> |
37 | #include <linux/types.h> | 37 | #include <linux/types.h> |
38 | #include <crypto/sha.h> | 38 | #include <crypto/sha.h> |
39 | #include <asm/byteorder.h> | 39 | #include <crypto/sha256_base.h> |
40 | #include <asm/i387.h> | 40 | #include <asm/i387.h> |
41 | #include <asm/xcr.h> | 41 | #include <asm/xcr.h> |
42 | #include <asm/xsave.h> | 42 | #include <asm/xsave.h> |
43 | #include <linux/string.h> | 43 | #include <linux/string.h> |
44 | 44 | ||
45 | asmlinkage void sha256_transform_ssse3(const char *data, u32 *digest, | 45 | asmlinkage void sha256_transform_ssse3(u32 *digest, const char *data, |
46 | u64 rounds); | 46 | u64 rounds); |
47 | #ifdef CONFIG_AS_AVX | 47 | #ifdef CONFIG_AS_AVX |
48 | asmlinkage void sha256_transform_avx(const char *data, u32 *digest, | 48 | asmlinkage void sha256_transform_avx(u32 *digest, const char *data, |
49 | u64 rounds); | 49 | u64 rounds); |
50 | #endif | 50 | #endif |
51 | #ifdef CONFIG_AS_AVX2 | 51 | #ifdef CONFIG_AS_AVX2 |
52 | asmlinkage void sha256_transform_rorx(const char *data, u32 *digest, | 52 | asmlinkage void sha256_transform_rorx(u32 *digest, const char *data, |
53 | u64 rounds); | 53 | u64 rounds); |
54 | #endif | 54 | #endif |
55 | 55 | ||
56 | static asmlinkage void (*sha256_transform_asm)(const char *, u32 *, u64); | 56 | static void (*sha256_transform_asm)(u32 *, const char *, u64); |
57 | |||
58 | |||
59 | static int sha256_ssse3_init(struct shash_desc *desc) | ||
60 | { | ||
61 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
62 | |||
63 | sctx->state[0] = SHA256_H0; | ||
64 | sctx->state[1] = SHA256_H1; | ||
65 | sctx->state[2] = SHA256_H2; | ||
66 | sctx->state[3] = SHA256_H3; | ||
67 | sctx->state[4] = SHA256_H4; | ||
68 | sctx->state[5] = SHA256_H5; | ||
69 | sctx->state[6] = SHA256_H6; | ||
70 | sctx->state[7] = SHA256_H7; | ||
71 | sctx->count = 0; | ||
72 | |||
73 | return 0; | ||
74 | } | ||
75 | |||
76 | static int __sha256_ssse3_update(struct shash_desc *desc, const u8 *data, | ||
77 | unsigned int len, unsigned int partial) | ||
78 | { | ||
79 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
80 | unsigned int done = 0; | ||
81 | |||
82 | sctx->count += len; | ||
83 | |||
84 | if (partial) { | ||
85 | done = SHA256_BLOCK_SIZE - partial; | ||
86 | memcpy(sctx->buf + partial, data, done); | ||
87 | sha256_transform_asm(sctx->buf, sctx->state, 1); | ||
88 | } | ||
89 | |||
90 | if (len - done >= SHA256_BLOCK_SIZE) { | ||
91 | const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE; | ||
92 | |||
93 | sha256_transform_asm(data + done, sctx->state, (u64) rounds); | ||
94 | |||
95 | done += rounds * SHA256_BLOCK_SIZE; | ||
96 | } | ||
97 | |||
98 | memcpy(sctx->buf, data + done, len - done); | ||
99 | |||
100 | return 0; | ||
101 | } | ||
102 | 57 | ||
103 | static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data, | 58 | static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data, |
104 | unsigned int len) | 59 | unsigned int len) |
105 | { | 60 | { |
106 | struct sha256_state *sctx = shash_desc_ctx(desc); | 61 | struct sha256_state *sctx = shash_desc_ctx(desc); |
107 | unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; | ||
108 | int res; | ||
109 | 62 | ||
110 | /* Handle the fast case right here */ | 63 | if (!irq_fpu_usable() || |
111 | if (partial + len < SHA256_BLOCK_SIZE) { | 64 | (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE) |
112 | sctx->count += len; | 65 | return crypto_sha256_update(desc, data, len); |
113 | memcpy(sctx->buf + partial, data, len); | ||
114 | 66 | ||
115 | return 0; | 67 | /* make sure casting to sha256_block_fn() is safe */ |
116 | } | 68 | BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0); |
117 | |||
118 | if (!irq_fpu_usable()) { | ||
119 | res = crypto_sha256_update(desc, data, len); | ||
120 | } else { | ||
121 | kernel_fpu_begin(); | ||
122 | res = __sha256_ssse3_update(desc, data, len, partial); | ||
123 | kernel_fpu_end(); | ||
124 | } | ||
125 | |||
126 | return res; | ||
127 | } | ||
128 | 69 | ||
129 | 70 | kernel_fpu_begin(); | |
130 | /* Add padding and return the message digest. */ | 71 | sha256_base_do_update(desc, data, len, |
131 | static int sha256_ssse3_final(struct shash_desc *desc, u8 *out) | 72 | (sha256_block_fn *)sha256_transform_asm); |
132 | { | 73 | kernel_fpu_end(); |
133 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
134 | unsigned int i, index, padlen; | ||
135 | __be32 *dst = (__be32 *)out; | ||
136 | __be64 bits; | ||
137 | static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; | ||
138 | |||
139 | bits = cpu_to_be64(sctx->count << 3); | ||
140 | |||
141 | /* Pad out to 56 mod 64 and append length */ | ||
142 | index = sctx->count % SHA256_BLOCK_SIZE; | ||
143 | padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index); | ||
144 | |||
145 | if (!irq_fpu_usable()) { | ||
146 | crypto_sha256_update(desc, padding, padlen); | ||
147 | crypto_sha256_update(desc, (const u8 *)&bits, sizeof(bits)); | ||
148 | } else { | ||
149 | kernel_fpu_begin(); | ||
150 | /* We need to fill a whole block for __sha256_ssse3_update() */ | ||
151 | if (padlen <= 56) { | ||
152 | sctx->count += padlen; | ||
153 | memcpy(sctx->buf + index, padding, padlen); | ||
154 | } else { | ||
155 | __sha256_ssse3_update(desc, padding, padlen, index); | ||
156 | } | ||
157 | __sha256_ssse3_update(desc, (const u8 *)&bits, | ||
158 | sizeof(bits), 56); | ||
159 | kernel_fpu_end(); | ||
160 | } | ||
161 | |||
162 | /* Store state in digest */ | ||
163 | for (i = 0; i < 8; i++) | ||
164 | dst[i] = cpu_to_be32(sctx->state[i]); | ||
165 | |||
166 | /* Wipe context */ | ||
167 | memset(sctx, 0, sizeof(*sctx)); | ||
168 | 74 | ||
169 | return 0; | 75 | return 0; |
170 | } | 76 | } |
171 | 77 | ||
172 | static int sha256_ssse3_export(struct shash_desc *desc, void *out) | 78 | static int sha256_ssse3_finup(struct shash_desc *desc, const u8 *data, |
79 | unsigned int len, u8 *out) | ||
173 | { | 80 | { |
174 | struct sha256_state *sctx = shash_desc_ctx(desc); | 81 | if (!irq_fpu_usable()) |
82 | return crypto_sha256_finup(desc, data, len, out); | ||
175 | 83 | ||
176 | memcpy(out, sctx, sizeof(*sctx)); | 84 | kernel_fpu_begin(); |
85 | if (len) | ||
86 | sha256_base_do_update(desc, data, len, | ||
87 | (sha256_block_fn *)sha256_transform_asm); | ||
88 | sha256_base_do_finalize(desc, (sha256_block_fn *)sha256_transform_asm); | ||
89 | kernel_fpu_end(); | ||
177 | 90 | ||
178 | return 0; | 91 | return sha256_base_finish(desc, out); |
179 | } | 92 | } |
180 | 93 | ||
181 | static int sha256_ssse3_import(struct shash_desc *desc, const void *in) | 94 | /* Add padding and return the message digest. */ |
182 | { | 95 | static int sha256_ssse3_final(struct shash_desc *desc, u8 *out) |
183 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
184 | |||
185 | memcpy(sctx, in, sizeof(*sctx)); | ||
186 | |||
187 | return 0; | ||
188 | } | ||
189 | |||
190 | static int sha224_ssse3_init(struct shash_desc *desc) | ||
191 | { | ||
192 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
193 | |||
194 | sctx->state[0] = SHA224_H0; | ||
195 | sctx->state[1] = SHA224_H1; | ||
196 | sctx->state[2] = SHA224_H2; | ||
197 | sctx->state[3] = SHA224_H3; | ||
198 | sctx->state[4] = SHA224_H4; | ||
199 | sctx->state[5] = SHA224_H5; | ||
200 | sctx->state[6] = SHA224_H6; | ||
201 | sctx->state[7] = SHA224_H7; | ||
202 | sctx->count = 0; | ||
203 | |||
204 | return 0; | ||
205 | } | ||
206 | |||
207 | static int sha224_ssse3_final(struct shash_desc *desc, u8 *hash) | ||
208 | { | 96 | { |
209 | u8 D[SHA256_DIGEST_SIZE]; | 97 | return sha256_ssse3_finup(desc, NULL, 0, out); |
210 | |||
211 | sha256_ssse3_final(desc, D); | ||
212 | |||
213 | memcpy(hash, D, SHA224_DIGEST_SIZE); | ||
214 | memzero_explicit(D, SHA256_DIGEST_SIZE); | ||
215 | |||
216 | return 0; | ||
217 | } | 98 | } |
218 | 99 | ||
219 | static struct shash_alg algs[] = { { | 100 | static struct shash_alg algs[] = { { |
220 | .digestsize = SHA256_DIGEST_SIZE, | 101 | .digestsize = SHA256_DIGEST_SIZE, |
221 | .init = sha256_ssse3_init, | 102 | .init = sha256_base_init, |
222 | .update = sha256_ssse3_update, | 103 | .update = sha256_ssse3_update, |
223 | .final = sha256_ssse3_final, | 104 | .final = sha256_ssse3_final, |
224 | .export = sha256_ssse3_export, | 105 | .finup = sha256_ssse3_finup, |
225 | .import = sha256_ssse3_import, | ||
226 | .descsize = sizeof(struct sha256_state), | 106 | .descsize = sizeof(struct sha256_state), |
227 | .statesize = sizeof(struct sha256_state), | ||
228 | .base = { | 107 | .base = { |
229 | .cra_name = "sha256", | 108 | .cra_name = "sha256", |
230 | .cra_driver_name = "sha256-ssse3", | 109 | .cra_driver_name = "sha256-ssse3", |
@@ -235,13 +114,11 @@ static struct shash_alg algs[] = { { | |||
235 | } | 114 | } |
236 | }, { | 115 | }, { |
237 | .digestsize = SHA224_DIGEST_SIZE, | 116 | .digestsize = SHA224_DIGEST_SIZE, |
238 | .init = sha224_ssse3_init, | 117 | .init = sha224_base_init, |
239 | .update = sha256_ssse3_update, | 118 | .update = sha256_ssse3_update, |
240 | .final = sha224_ssse3_final, | 119 | .final = sha256_ssse3_final, |
241 | .export = sha256_ssse3_export, | 120 | .finup = sha256_ssse3_finup, |
242 | .import = sha256_ssse3_import, | ||
243 | .descsize = sizeof(struct sha256_state), | 121 | .descsize = sizeof(struct sha256_state), |
244 | .statesize = sizeof(struct sha256_state), | ||
245 | .base = { | 122 | .base = { |
246 | .cra_name = "sha224", | 123 | .cra_name = "sha224", |
247 | .cra_driver_name = "sha224-ssse3", | 124 | .cra_driver_name = "sha224-ssse3", |
diff --git a/arch/x86/crypto/sha512-avx-asm.S b/arch/x86/crypto/sha512-avx-asm.S index 974dde9bc6cd..565274d6a641 100644 --- a/arch/x86/crypto/sha512-avx-asm.S +++ b/arch/x86/crypto/sha512-avx-asm.S | |||
@@ -54,9 +54,9 @@ | |||
54 | 54 | ||
55 | # Virtual Registers | 55 | # Virtual Registers |
56 | # ARG1 | 56 | # ARG1 |
57 | msg = %rdi | 57 | digest = %rdi |
58 | # ARG2 | 58 | # ARG2 |
59 | digest = %rsi | 59 | msg = %rsi |
60 | # ARG3 | 60 | # ARG3 |
61 | msglen = %rdx | 61 | msglen = %rdx |
62 | T1 = %rcx | 62 | T1 = %rcx |
@@ -271,7 +271,7 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE | |||
271 | .endm | 271 | .endm |
272 | 272 | ||
273 | ######################################################################## | 273 | ######################################################################## |
274 | # void sha512_transform_avx(const void* M, void* D, u64 L) | 274 | # void sha512_transform_avx(void* D, const void* M, u64 L) |
275 | # Purpose: Updates the SHA512 digest stored at D with the message stored in M. | 275 | # Purpose: Updates the SHA512 digest stored at D with the message stored in M. |
276 | # The size of the message pointed to by M must be an integer multiple of SHA512 | 276 | # The size of the message pointed to by M must be an integer multiple of SHA512 |
277 | # message blocks. | 277 | # message blocks. |
diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S index 568b96105f5c..a4771dcd1fcf 100644 --- a/arch/x86/crypto/sha512-avx2-asm.S +++ b/arch/x86/crypto/sha512-avx2-asm.S | |||
@@ -70,9 +70,9 @@ XFER = YTMP0 | |||
70 | BYTE_FLIP_MASK = %ymm9 | 70 | BYTE_FLIP_MASK = %ymm9 |
71 | 71 | ||
72 | # 1st arg | 72 | # 1st arg |
73 | INP = %rdi | 73 | CTX = %rdi |
74 | # 2nd arg | 74 | # 2nd arg |
75 | CTX = %rsi | 75 | INP = %rsi |
76 | # 3rd arg | 76 | # 3rd arg |
77 | NUM_BLKS = %rdx | 77 | NUM_BLKS = %rdx |
78 | 78 | ||
@@ -562,7 +562,7 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE | |||
562 | .endm | 562 | .endm |
563 | 563 | ||
564 | ######################################################################## | 564 | ######################################################################## |
565 | # void sha512_transform_rorx(const void* M, void* D, uint64_t L)# | 565 | # void sha512_transform_rorx(void* D, const void* M, uint64_t L)# |
566 | # Purpose: Updates the SHA512 digest stored at D with the message stored in M. | 566 | # Purpose: Updates the SHA512 digest stored at D with the message stored in M. |
567 | # The size of the message pointed to by M must be an integer multiple of SHA512 | 567 | # The size of the message pointed to by M must be an integer multiple of SHA512 |
568 | # message blocks. | 568 | # message blocks. |
diff --git a/arch/x86/crypto/sha512-ssse3-asm.S b/arch/x86/crypto/sha512-ssse3-asm.S index fb56855d51f5..e610e29cbc81 100644 --- a/arch/x86/crypto/sha512-ssse3-asm.S +++ b/arch/x86/crypto/sha512-ssse3-asm.S | |||
@@ -53,9 +53,9 @@ | |||
53 | 53 | ||
54 | # Virtual Registers | 54 | # Virtual Registers |
55 | # ARG1 | 55 | # ARG1 |
56 | msg = %rdi | 56 | digest = %rdi |
57 | # ARG2 | 57 | # ARG2 |
58 | digest = %rsi | 58 | msg = %rsi |
59 | # ARG3 | 59 | # ARG3 |
60 | msglen = %rdx | 60 | msglen = %rdx |
61 | T1 = %rcx | 61 | T1 = %rcx |
@@ -269,7 +269,7 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE | |||
269 | .endm | 269 | .endm |
270 | 270 | ||
271 | ######################################################################## | 271 | ######################################################################## |
272 | # void sha512_transform_ssse3(const void* M, void* D, u64 L)# | 272 | # void sha512_transform_ssse3(void* D, const void* M, u64 L)# |
273 | # Purpose: Updates the SHA512 digest stored at D with the message stored in M. | 273 | # Purpose: Updates the SHA512 digest stored at D with the message stored in M. |
274 | # The size of the message pointed to by M must be an integer multiple of SHA512 | 274 | # The size of the message pointed to by M must be an integer multiple of SHA512 |
275 | # message blocks. | 275 | # message blocks. |
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index 0b6af26832bf..d9fa4c1e063f 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c | |||
@@ -34,205 +34,75 @@ | |||
34 | #include <linux/cryptohash.h> | 34 | #include <linux/cryptohash.h> |
35 | #include <linux/types.h> | 35 | #include <linux/types.h> |
36 | #include <crypto/sha.h> | 36 | #include <crypto/sha.h> |
37 | #include <asm/byteorder.h> | 37 | #include <crypto/sha512_base.h> |
38 | #include <asm/i387.h> | 38 | #include <asm/i387.h> |
39 | #include <asm/xcr.h> | 39 | #include <asm/xcr.h> |
40 | #include <asm/xsave.h> | 40 | #include <asm/xsave.h> |
41 | 41 | ||
42 | #include <linux/string.h> | 42 | #include <linux/string.h> |
43 | 43 | ||
44 | asmlinkage void sha512_transform_ssse3(const char *data, u64 *digest, | 44 | asmlinkage void sha512_transform_ssse3(u64 *digest, const char *data, |
45 | u64 rounds); | 45 | u64 rounds); |
46 | #ifdef CONFIG_AS_AVX | 46 | #ifdef CONFIG_AS_AVX |
47 | asmlinkage void sha512_transform_avx(const char *data, u64 *digest, | 47 | asmlinkage void sha512_transform_avx(u64 *digest, const char *data, |
48 | u64 rounds); | 48 | u64 rounds); |
49 | #endif | 49 | #endif |
50 | #ifdef CONFIG_AS_AVX2 | 50 | #ifdef CONFIG_AS_AVX2 |
51 | asmlinkage void sha512_transform_rorx(const char *data, u64 *digest, | 51 | asmlinkage void sha512_transform_rorx(u64 *digest, const char *data, |
52 | u64 rounds); | 52 | u64 rounds); |
53 | #endif | 53 | #endif |
54 | 54 | ||
55 | static asmlinkage void (*sha512_transform_asm)(const char *, u64 *, u64); | 55 | static void (*sha512_transform_asm)(u64 *, const char *, u64); |
56 | |||
57 | |||
58 | static int sha512_ssse3_init(struct shash_desc *desc) | ||
59 | { | ||
60 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
61 | |||
62 | sctx->state[0] = SHA512_H0; | ||
63 | sctx->state[1] = SHA512_H1; | ||
64 | sctx->state[2] = SHA512_H2; | ||
65 | sctx->state[3] = SHA512_H3; | ||
66 | sctx->state[4] = SHA512_H4; | ||
67 | sctx->state[5] = SHA512_H5; | ||
68 | sctx->state[6] = SHA512_H6; | ||
69 | sctx->state[7] = SHA512_H7; | ||
70 | sctx->count[0] = sctx->count[1] = 0; | ||
71 | |||
72 | return 0; | ||
73 | } | ||
74 | 56 | ||
75 | static int __sha512_ssse3_update(struct shash_desc *desc, const u8 *data, | 57 | static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data, |
76 | unsigned int len, unsigned int partial) | 58 | unsigned int len) |
77 | { | 59 | { |
78 | struct sha512_state *sctx = shash_desc_ctx(desc); | 60 | struct sha512_state *sctx = shash_desc_ctx(desc); |
79 | unsigned int done = 0; | ||
80 | |||
81 | sctx->count[0] += len; | ||
82 | if (sctx->count[0] < len) | ||
83 | sctx->count[1]++; | ||
84 | 61 | ||
85 | if (partial) { | 62 | if (!irq_fpu_usable() || |
86 | done = SHA512_BLOCK_SIZE - partial; | 63 | (sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE) |
87 | memcpy(sctx->buf + partial, data, done); | 64 | return crypto_sha512_update(desc, data, len); |
88 | sha512_transform_asm(sctx->buf, sctx->state, 1); | ||
89 | } | ||
90 | |||
91 | if (len - done >= SHA512_BLOCK_SIZE) { | ||
92 | const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE; | ||
93 | 65 | ||
94 | sha512_transform_asm(data + done, sctx->state, (u64) rounds); | 66 | /* make sure casting to sha512_block_fn() is safe */ |
95 | 67 | BUILD_BUG_ON(offsetof(struct sha512_state, state) != 0); | |
96 | done += rounds * SHA512_BLOCK_SIZE; | ||
97 | } | ||
98 | 68 | ||
99 | memcpy(sctx->buf, data + done, len - done); | 69 | kernel_fpu_begin(); |
70 | sha512_base_do_update(desc, data, len, | ||
71 | (sha512_block_fn *)sha512_transform_asm); | ||
72 | kernel_fpu_end(); | ||
100 | 73 | ||
101 | return 0; | 74 | return 0; |
102 | } | 75 | } |
103 | 76 | ||
104 | static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data, | 77 | static int sha512_ssse3_finup(struct shash_desc *desc, const u8 *data, |
105 | unsigned int len) | 78 | unsigned int len, u8 *out) |
106 | { | 79 | { |
107 | struct sha512_state *sctx = shash_desc_ctx(desc); | 80 | if (!irq_fpu_usable()) |
108 | unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE; | 81 | return crypto_sha512_finup(desc, data, len, out); |
109 | int res; | ||
110 | |||
111 | /* Handle the fast case right here */ | ||
112 | if (partial + len < SHA512_BLOCK_SIZE) { | ||
113 | sctx->count[0] += len; | ||
114 | if (sctx->count[0] < len) | ||
115 | sctx->count[1]++; | ||
116 | memcpy(sctx->buf + partial, data, len); | ||
117 | |||
118 | return 0; | ||
119 | } | ||
120 | 82 | ||
121 | if (!irq_fpu_usable()) { | 83 | kernel_fpu_begin(); |
122 | res = crypto_sha512_update(desc, data, len); | 84 | if (len) |
123 | } else { | 85 | sha512_base_do_update(desc, data, len, |
124 | kernel_fpu_begin(); | 86 | (sha512_block_fn *)sha512_transform_asm); |
125 | res = __sha512_ssse3_update(desc, data, len, partial); | 87 | sha512_base_do_finalize(desc, (sha512_block_fn *)sha512_transform_asm); |
126 | kernel_fpu_end(); | 88 | kernel_fpu_end(); |
127 | } | ||
128 | 89 | ||
129 | return res; | 90 | return sha512_base_finish(desc, out); |
130 | } | 91 | } |
131 | 92 | ||
132 | |||
133 | /* Add padding and return the message digest. */ | 93 | /* Add padding and return the message digest. */ |
134 | static int sha512_ssse3_final(struct shash_desc *desc, u8 *out) | 94 | static int sha512_ssse3_final(struct shash_desc *desc, u8 *out) |
135 | { | 95 | { |
136 | struct sha512_state *sctx = shash_desc_ctx(desc); | 96 | return sha512_ssse3_finup(desc, NULL, 0, out); |
137 | unsigned int i, index, padlen; | ||
138 | __be64 *dst = (__be64 *)out; | ||
139 | __be64 bits[2]; | ||
140 | static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, }; | ||
141 | |||
142 | /* save number of bits */ | ||
143 | bits[1] = cpu_to_be64(sctx->count[0] << 3); | ||
144 | bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61); | ||
145 | |||
146 | /* Pad out to 112 mod 128 and append length */ | ||
147 | index = sctx->count[0] & 0x7f; | ||
148 | padlen = (index < 112) ? (112 - index) : ((128+112) - index); | ||
149 | |||
150 | if (!irq_fpu_usable()) { | ||
151 | crypto_sha512_update(desc, padding, padlen); | ||
152 | crypto_sha512_update(desc, (const u8 *)&bits, sizeof(bits)); | ||
153 | } else { | ||
154 | kernel_fpu_begin(); | ||
155 | /* We need to fill a whole block for __sha512_ssse3_update() */ | ||
156 | if (padlen <= 112) { | ||
157 | sctx->count[0] += padlen; | ||
158 | if (sctx->count[0] < padlen) | ||
159 | sctx->count[1]++; | ||
160 | memcpy(sctx->buf + index, padding, padlen); | ||
161 | } else { | ||
162 | __sha512_ssse3_update(desc, padding, padlen, index); | ||
163 | } | ||
164 | __sha512_ssse3_update(desc, (const u8 *)&bits, | ||
165 | sizeof(bits), 112); | ||
166 | kernel_fpu_end(); | ||
167 | } | ||
168 | |||
169 | /* Store state in digest */ | ||
170 | for (i = 0; i < 8; i++) | ||
171 | dst[i] = cpu_to_be64(sctx->state[i]); | ||
172 | |||
173 | /* Wipe context */ | ||
174 | memset(sctx, 0, sizeof(*sctx)); | ||
175 | |||
176 | return 0; | ||
177 | } | ||
178 | |||
179 | static int sha512_ssse3_export(struct shash_desc *desc, void *out) | ||
180 | { | ||
181 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
182 | |||
183 | memcpy(out, sctx, sizeof(*sctx)); | ||
184 | |||
185 | return 0; | ||
186 | } | ||
187 | |||
188 | static int sha512_ssse3_import(struct shash_desc *desc, const void *in) | ||
189 | { | ||
190 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
191 | |||
192 | memcpy(sctx, in, sizeof(*sctx)); | ||
193 | |||
194 | return 0; | ||
195 | } | ||
196 | |||
197 | static int sha384_ssse3_init(struct shash_desc *desc) | ||
198 | { | ||
199 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
200 | |||
201 | sctx->state[0] = SHA384_H0; | ||
202 | sctx->state[1] = SHA384_H1; | ||
203 | sctx->state[2] = SHA384_H2; | ||
204 | sctx->state[3] = SHA384_H3; | ||
205 | sctx->state[4] = SHA384_H4; | ||
206 | sctx->state[5] = SHA384_H5; | ||
207 | sctx->state[6] = SHA384_H6; | ||
208 | sctx->state[7] = SHA384_H7; | ||
209 | |||
210 | sctx->count[0] = sctx->count[1] = 0; | ||
211 | |||
212 | return 0; | ||
213 | } | ||
214 | |||
215 | static int sha384_ssse3_final(struct shash_desc *desc, u8 *hash) | ||
216 | { | ||
217 | u8 D[SHA512_DIGEST_SIZE]; | ||
218 | |||
219 | sha512_ssse3_final(desc, D); | ||
220 | |||
221 | memcpy(hash, D, SHA384_DIGEST_SIZE); | ||
222 | memzero_explicit(D, SHA512_DIGEST_SIZE); | ||
223 | |||
224 | return 0; | ||
225 | } | 97 | } |
226 | 98 | ||
227 | static struct shash_alg algs[] = { { | 99 | static struct shash_alg algs[] = { { |
228 | .digestsize = SHA512_DIGEST_SIZE, | 100 | .digestsize = SHA512_DIGEST_SIZE, |
229 | .init = sha512_ssse3_init, | 101 | .init = sha512_base_init, |
230 | .update = sha512_ssse3_update, | 102 | .update = sha512_ssse3_update, |
231 | .final = sha512_ssse3_final, | 103 | .final = sha512_ssse3_final, |
232 | .export = sha512_ssse3_export, | 104 | .finup = sha512_ssse3_finup, |
233 | .import = sha512_ssse3_import, | ||
234 | .descsize = sizeof(struct sha512_state), | 105 | .descsize = sizeof(struct sha512_state), |
235 | .statesize = sizeof(struct sha512_state), | ||
236 | .base = { | 106 | .base = { |
237 | .cra_name = "sha512", | 107 | .cra_name = "sha512", |
238 | .cra_driver_name = "sha512-ssse3", | 108 | .cra_driver_name = "sha512-ssse3", |
@@ -243,13 +113,11 @@ static struct shash_alg algs[] = { { | |||
243 | } | 113 | } |
244 | }, { | 114 | }, { |
245 | .digestsize = SHA384_DIGEST_SIZE, | 115 | .digestsize = SHA384_DIGEST_SIZE, |
246 | .init = sha384_ssse3_init, | 116 | .init = sha384_base_init, |
247 | .update = sha512_ssse3_update, | 117 | .update = sha512_ssse3_update, |
248 | .final = sha384_ssse3_final, | 118 | .final = sha512_ssse3_final, |
249 | .export = sha512_ssse3_export, | 119 | .finup = sha512_ssse3_finup, |
250 | .import = sha512_ssse3_import, | ||
251 | .descsize = sizeof(struct sha512_state), | 120 | .descsize = sizeof(struct sha512_state), |
252 | .statesize = sizeof(struct sha512_state), | ||
253 | .base = { | 121 | .base = { |
254 | .cra_name = "sha384", | 122 | .cra_name = "sha384", |
255 | .cra_driver_name = "sha384-ssse3", | 123 | .cra_driver_name = "sha384-ssse3", |
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64.S b/arch/x86/crypto/twofish-x86_64-asm_64.S index a039d21986a2..a350c990dc86 100644 --- a/arch/x86/crypto/twofish-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-x86_64-asm_64.S | |||
@@ -264,7 +264,7 @@ ENTRY(twofish_enc_blk) | |||
264 | movq R1, 8(%rsi) | 264 | movq R1, 8(%rsi) |
265 | 265 | ||
266 | popq R1 | 266 | popq R1 |
267 | movq $1,%rax | 267 | movl $1,%eax |
268 | ret | 268 | ret |
269 | ENDPROC(twofish_enc_blk) | 269 | ENDPROC(twofish_enc_blk) |
270 | 270 | ||
@@ -316,6 +316,6 @@ ENTRY(twofish_dec_blk) | |||
316 | movq R1, 8(%rsi) | 316 | movq R1, 8(%rsi) |
317 | 317 | ||
318 | popq R1 | 318 | popq R1 |
319 | movq $1,%rax | 319 | movl $1,%eax |
320 | ret | 320 | ret |
321 | ENDPROC(twofish_dec_blk) | 321 | ENDPROC(twofish_dec_blk) |
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index 1ac531ea9bcc..b5e2d5651851 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c | |||
@@ -340,7 +340,8 @@ static struct crypto_alg twofish_algs[10] = { { | |||
340 | .cra_name = "__ecb-twofish-avx", | 340 | .cra_name = "__ecb-twofish-avx", |
341 | .cra_driver_name = "__driver-ecb-twofish-avx", | 341 | .cra_driver_name = "__driver-ecb-twofish-avx", |
342 | .cra_priority = 0, | 342 | .cra_priority = 0, |
343 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 343 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
344 | CRYPTO_ALG_INTERNAL, | ||
344 | .cra_blocksize = TF_BLOCK_SIZE, | 345 | .cra_blocksize = TF_BLOCK_SIZE, |
345 | .cra_ctxsize = sizeof(struct twofish_ctx), | 346 | .cra_ctxsize = sizeof(struct twofish_ctx), |
346 | .cra_alignmask = 0, | 347 | .cra_alignmask = 0, |
@@ -359,7 +360,8 @@ static struct crypto_alg twofish_algs[10] = { { | |||
359 | .cra_name = "__cbc-twofish-avx", | 360 | .cra_name = "__cbc-twofish-avx", |
360 | .cra_driver_name = "__driver-cbc-twofish-avx", | 361 | .cra_driver_name = "__driver-cbc-twofish-avx", |
361 | .cra_priority = 0, | 362 | .cra_priority = 0, |
362 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 363 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
364 | CRYPTO_ALG_INTERNAL, | ||
363 | .cra_blocksize = TF_BLOCK_SIZE, | 365 | .cra_blocksize = TF_BLOCK_SIZE, |
364 | .cra_ctxsize = sizeof(struct twofish_ctx), | 366 | .cra_ctxsize = sizeof(struct twofish_ctx), |
365 | .cra_alignmask = 0, | 367 | .cra_alignmask = 0, |
@@ -378,7 +380,8 @@ static struct crypto_alg twofish_algs[10] = { { | |||
378 | .cra_name = "__ctr-twofish-avx", | 380 | .cra_name = "__ctr-twofish-avx", |
379 | .cra_driver_name = "__driver-ctr-twofish-avx", | 381 | .cra_driver_name = "__driver-ctr-twofish-avx", |
380 | .cra_priority = 0, | 382 | .cra_priority = 0, |
381 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 383 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
384 | CRYPTO_ALG_INTERNAL, | ||
382 | .cra_blocksize = 1, | 385 | .cra_blocksize = 1, |
383 | .cra_ctxsize = sizeof(struct twofish_ctx), | 386 | .cra_ctxsize = sizeof(struct twofish_ctx), |
384 | .cra_alignmask = 0, | 387 | .cra_alignmask = 0, |
@@ -398,7 +401,8 @@ static struct crypto_alg twofish_algs[10] = { { | |||
398 | .cra_name = "__lrw-twofish-avx", | 401 | .cra_name = "__lrw-twofish-avx", |
399 | .cra_driver_name = "__driver-lrw-twofish-avx", | 402 | .cra_driver_name = "__driver-lrw-twofish-avx", |
400 | .cra_priority = 0, | 403 | .cra_priority = 0, |
401 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 404 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
405 | CRYPTO_ALG_INTERNAL, | ||
402 | .cra_blocksize = TF_BLOCK_SIZE, | 406 | .cra_blocksize = TF_BLOCK_SIZE, |
403 | .cra_ctxsize = sizeof(struct twofish_lrw_ctx), | 407 | .cra_ctxsize = sizeof(struct twofish_lrw_ctx), |
404 | .cra_alignmask = 0, | 408 | .cra_alignmask = 0, |
@@ -421,7 +425,8 @@ static struct crypto_alg twofish_algs[10] = { { | |||
421 | .cra_name = "__xts-twofish-avx", | 425 | .cra_name = "__xts-twofish-avx", |
422 | .cra_driver_name = "__driver-xts-twofish-avx", | 426 | .cra_driver_name = "__driver-xts-twofish-avx", |
423 | .cra_priority = 0, | 427 | .cra_priority = 0, |
424 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 428 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
429 | CRYPTO_ALG_INTERNAL, | ||
425 | .cra_blocksize = TF_BLOCK_SIZE, | 430 | .cra_blocksize = TF_BLOCK_SIZE, |
426 | .cra_ctxsize = sizeof(struct twofish_xts_ctx), | 431 | .cra_ctxsize = sizeof(struct twofish_xts_ctx), |
427 | .cra_alignmask = 0, | 432 | .cra_alignmask = 0, |
diff --git a/arch/x86/ia32/Makefile b/arch/x86/ia32/Makefile index e785b422b766..bb635c641869 100644 --- a/arch/x86/ia32/Makefile +++ b/arch/x86/ia32/Makefile | |||
@@ -3,7 +3,6 @@ | |||
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o | 5 | obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o |
6 | obj-$(CONFIG_IA32_EMULATION) += nosyscall.o syscall_ia32.o | ||
7 | 6 | ||
8 | obj-$(CONFIG_IA32_AOUT) += ia32_aout.o | 7 | obj-$(CONFIG_IA32_AOUT) += ia32_aout.o |
9 | 8 | ||
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index d0165c9a2932..c81d35e6c7f1 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c | |||
@@ -161,8 +161,7 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) | |||
161 | } | 161 | } |
162 | 162 | ||
163 | static int ia32_restore_sigcontext(struct pt_regs *regs, | 163 | static int ia32_restore_sigcontext(struct pt_regs *regs, |
164 | struct sigcontext_ia32 __user *sc, | 164 | struct sigcontext_ia32 __user *sc) |
165 | unsigned int *pax) | ||
166 | { | 165 | { |
167 | unsigned int tmpflags, err = 0; | 166 | unsigned int tmpflags, err = 0; |
168 | void __user *buf; | 167 | void __user *buf; |
@@ -184,7 +183,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, | |||
184 | RELOAD_SEG(es); | 183 | RELOAD_SEG(es); |
185 | 184 | ||
186 | COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); | 185 | COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); |
187 | COPY(dx); COPY(cx); COPY(ip); | 186 | COPY(dx); COPY(cx); COPY(ip); COPY(ax); |
188 | /* Don't touch extended registers */ | 187 | /* Don't touch extended registers */ |
189 | 188 | ||
190 | COPY_SEG_CPL3(cs); | 189 | COPY_SEG_CPL3(cs); |
@@ -197,12 +196,12 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, | |||
197 | 196 | ||
198 | get_user_ex(tmp, &sc->fpstate); | 197 | get_user_ex(tmp, &sc->fpstate); |
199 | buf = compat_ptr(tmp); | 198 | buf = compat_ptr(tmp); |
200 | |||
201 | get_user_ex(*pax, &sc->ax); | ||
202 | } get_user_catch(err); | 199 | } get_user_catch(err); |
203 | 200 | ||
204 | err |= restore_xstate_sig(buf, 1); | 201 | err |= restore_xstate_sig(buf, 1); |
205 | 202 | ||
203 | force_iret(); | ||
204 | |||
206 | return err; | 205 | return err; |
207 | } | 206 | } |
208 | 207 | ||
@@ -211,7 +210,6 @@ asmlinkage long sys32_sigreturn(void) | |||
211 | struct pt_regs *regs = current_pt_regs(); | 210 | struct pt_regs *regs = current_pt_regs(); |
212 | struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8); | 211 | struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8); |
213 | sigset_t set; | 212 | sigset_t set; |
214 | unsigned int ax; | ||
215 | 213 | ||
216 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) | 214 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) |
217 | goto badframe; | 215 | goto badframe; |
@@ -224,9 +222,9 @@ asmlinkage long sys32_sigreturn(void) | |||
224 | 222 | ||
225 | set_current_blocked(&set); | 223 | set_current_blocked(&set); |
226 | 224 | ||
227 | if (ia32_restore_sigcontext(regs, &frame->sc, &ax)) | 225 | if (ia32_restore_sigcontext(regs, &frame->sc)) |
228 | goto badframe; | 226 | goto badframe; |
229 | return ax; | 227 | return regs->ax; |
230 | 228 | ||
231 | badframe: | 229 | badframe: |
232 | signal_fault(regs, frame, "32bit sigreturn"); | 230 | signal_fault(regs, frame, "32bit sigreturn"); |
@@ -238,7 +236,6 @@ asmlinkage long sys32_rt_sigreturn(void) | |||
238 | struct pt_regs *regs = current_pt_regs(); | 236 | struct pt_regs *regs = current_pt_regs(); |
239 | struct rt_sigframe_ia32 __user *frame; | 237 | struct rt_sigframe_ia32 __user *frame; |
240 | sigset_t set; | 238 | sigset_t set; |
241 | unsigned int ax; | ||
242 | 239 | ||
243 | frame = (struct rt_sigframe_ia32 __user *)(regs->sp - 4); | 240 | frame = (struct rt_sigframe_ia32 __user *)(regs->sp - 4); |
244 | 241 | ||
@@ -249,13 +246,13 @@ asmlinkage long sys32_rt_sigreturn(void) | |||
249 | 246 | ||
250 | set_current_blocked(&set); | 247 | set_current_blocked(&set); |
251 | 248 | ||
252 | if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) | 249 | if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext)) |
253 | goto badframe; | 250 | goto badframe; |
254 | 251 | ||
255 | if (compat_restore_altstack(&frame->uc.uc_stack)) | 252 | if (compat_restore_altstack(&frame->uc.uc_stack)) |
256 | goto badframe; | 253 | goto badframe; |
257 | 254 | ||
258 | return ax; | 255 | return regs->ax; |
259 | 256 | ||
260 | badframe: | 257 | badframe: |
261 | signal_fault(regs, frame, "32bit rt sigreturn"); | 258 | signal_fault(regs, frame, "32bit rt sigreturn"); |
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 156ebcab4ada..a821b1cd4fa7 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S | |||
@@ -30,24 +30,13 @@ | |||
30 | 30 | ||
31 | .section .entry.text, "ax" | 31 | .section .entry.text, "ax" |
32 | 32 | ||
33 | .macro IA32_ARG_FIXUP noebp=0 | 33 | /* clobbers %rax */ |
34 | movl %edi,%r8d | 34 | .macro CLEAR_RREGS _r9=rax |
35 | .if \noebp | ||
36 | .else | ||
37 | movl %ebp,%r9d | ||
38 | .endif | ||
39 | xchg %ecx,%esi | ||
40 | movl %ebx,%edi | ||
41 | movl %edx,%edx /* zero extension */ | ||
42 | .endm | ||
43 | |||
44 | /* clobbers %eax */ | ||
45 | .macro CLEAR_RREGS offset=0, _r9=rax | ||
46 | xorl %eax,%eax | 35 | xorl %eax,%eax |
47 | movq %rax,\offset+R11(%rsp) | 36 | movq %rax,R11(%rsp) |
48 | movq %rax,\offset+R10(%rsp) | 37 | movq %rax,R10(%rsp) |
49 | movq %\_r9,\offset+R9(%rsp) | 38 | movq %\_r9,R9(%rsp) |
50 | movq %rax,\offset+R8(%rsp) | 39 | movq %rax,R8(%rsp) |
51 | .endm | 40 | .endm |
52 | 41 | ||
53 | /* | 42 | /* |
@@ -60,14 +49,14 @@ | |||
60 | * If it's -1 to make us punt the syscall, then (u32)-1 is still | 49 | * If it's -1 to make us punt the syscall, then (u32)-1 is still |
61 | * an appropriately invalid value. | 50 | * an appropriately invalid value. |
62 | */ | 51 | */ |
63 | .macro LOAD_ARGS32 offset, _r9=0 | 52 | .macro LOAD_ARGS32 _r9=0 |
64 | .if \_r9 | 53 | .if \_r9 |
65 | movl \offset+16(%rsp),%r9d | 54 | movl R9(%rsp),%r9d |
66 | .endif | 55 | .endif |
67 | movl \offset+40(%rsp),%ecx | 56 | movl RCX(%rsp),%ecx |
68 | movl \offset+48(%rsp),%edx | 57 | movl RDX(%rsp),%edx |
69 | movl \offset+56(%rsp),%esi | 58 | movl RSI(%rsp),%esi |
70 | movl \offset+64(%rsp),%edi | 59 | movl RDI(%rsp),%edi |
71 | movl %eax,%eax /* zero extension */ | 60 | movl %eax,%eax /* zero extension */ |
72 | .endm | 61 | .endm |
73 | 62 | ||
@@ -99,54 +88,69 @@ ENDPROC(native_irq_enable_sysexit) | |||
99 | /* | 88 | /* |
100 | * 32bit SYSENTER instruction entry. | 89 | * 32bit SYSENTER instruction entry. |
101 | * | 90 | * |
91 | * SYSENTER loads ss, rsp, cs, and rip from previously programmed MSRs. | ||
92 | * IF and VM in rflags are cleared (IOW: interrupts are off). | ||
93 | * SYSENTER does not save anything on the stack, | ||
94 | * and does not save old rip (!!!) and rflags. | ||
95 | * | ||
102 | * Arguments: | 96 | * Arguments: |
103 | * %eax System call number. | 97 | * eax system call number |
104 | * %ebx Arg1 | 98 | * ebx arg1 |
105 | * %ecx Arg2 | 99 | * ecx arg2 |
106 | * %edx Arg3 | 100 | * edx arg3 |
107 | * %esi Arg4 | 101 | * esi arg4 |
108 | * %edi Arg5 | 102 | * edi arg5 |
109 | * %ebp user stack | 103 | * ebp user stack |
110 | * 0(%ebp) Arg6 | 104 | * 0(%ebp) arg6 |
111 | * | 105 | * |
112 | * Interrupts off. | ||
113 | * | ||
114 | * This is purely a fast path. For anything complicated we use the int 0x80 | 106 | * This is purely a fast path. For anything complicated we use the int 0x80 |
115 | * path below. Set up a complete hardware stack frame to share code | 107 | * path below. We set up a complete hardware stack frame to share code |
116 | * with the int 0x80 path. | 108 | * with the int 0x80 path. |
117 | */ | 109 | */ |
118 | ENTRY(ia32_sysenter_target) | 110 | ENTRY(ia32_sysenter_target) |
119 | CFI_STARTPROC32 simple | 111 | CFI_STARTPROC32 simple |
120 | CFI_SIGNAL_FRAME | 112 | CFI_SIGNAL_FRAME |
121 | CFI_DEF_CFA rsp,0 | 113 | CFI_DEF_CFA rsp,0 |
122 | CFI_REGISTER rsp,rbp | 114 | CFI_REGISTER rsp,rbp |
123 | SWAPGS_UNSAFE_STACK | 115 | |
124 | movq PER_CPU_VAR(kernel_stack), %rsp | ||
125 | addq $(KERNEL_STACK_OFFSET),%rsp | ||
126 | /* | 116 | /* |
127 | * No need to follow this irqs on/off section: the syscall | 117 | * Interrupts are off on entry. |
128 | * disabled irqs, here we enable it straight after entry: | 118 | * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, |
119 | * it is too small to ever cause noticeable irq latency. | ||
129 | */ | 120 | */ |
121 | SWAPGS_UNSAFE_STACK | ||
122 | movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp | ||
130 | ENABLE_INTERRUPTS(CLBR_NONE) | 123 | ENABLE_INTERRUPTS(CLBR_NONE) |
131 | movl %ebp,%ebp /* zero extension */ | 124 | |
132 | pushq_cfi $__USER32_DS | 125 | /* Zero-extending 32-bit regs, do not remove */ |
133 | /*CFI_REL_OFFSET ss,0*/ | 126 | movl %ebp, %ebp |
134 | pushq_cfi %rbp | ||
135 | CFI_REL_OFFSET rsp,0 | ||
136 | pushfq_cfi | ||
137 | /*CFI_REL_OFFSET rflags,0*/ | ||
138 | movl TI_sysenter_return+THREAD_INFO(%rsp,3*8-KERNEL_STACK_OFFSET),%r10d | ||
139 | CFI_REGISTER rip,r10 | ||
140 | pushq_cfi $__USER32_CS | ||
141 | /*CFI_REL_OFFSET cs,0*/ | ||
142 | movl %eax, %eax | 127 | movl %eax, %eax |
143 | pushq_cfi %r10 | 128 | |
144 | CFI_REL_OFFSET rip,0 | 129 | movl ASM_THREAD_INFO(TI_sysenter_return, %rsp, 0), %r10d |
145 | pushq_cfi %rax | 130 | CFI_REGISTER rip,r10 |
131 | |||
132 | /* Construct struct pt_regs on stack */ | ||
133 | pushq_cfi $__USER32_DS /* pt_regs->ss */ | ||
134 | pushq_cfi %rbp /* pt_regs->sp */ | ||
135 | CFI_REL_OFFSET rsp,0 | ||
136 | pushfq_cfi /* pt_regs->flags */ | ||
137 | pushq_cfi $__USER32_CS /* pt_regs->cs */ | ||
138 | pushq_cfi %r10 /* pt_regs->ip = thread_info->sysenter_return */ | ||
139 | CFI_REL_OFFSET rip,0 | ||
140 | pushq_cfi_reg rax /* pt_regs->orig_ax */ | ||
141 | pushq_cfi_reg rdi /* pt_regs->di */ | ||
142 | pushq_cfi_reg rsi /* pt_regs->si */ | ||
143 | pushq_cfi_reg rdx /* pt_regs->dx */ | ||
144 | pushq_cfi_reg rcx /* pt_regs->cx */ | ||
145 | pushq_cfi_reg rax /* pt_regs->ax */ | ||
146 | cld | 146 | cld |
147 | SAVE_ARGS 0,1,0 | 147 | sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */ |
148 | /* no need to do an access_ok check here because rbp has been | 148 | CFI_ADJUST_CFA_OFFSET 10*8 |
149 | 32bit zero extended */ | 149 | |
150 | /* | ||
151 | * no need to do an access_ok check here because rbp has been | ||
152 | * 32bit zero extended | ||
153 | */ | ||
150 | ASM_STAC | 154 | ASM_STAC |
151 | 1: movl (%rbp),%ebp | 155 | 1: movl (%rbp),%ebp |
152 | _ASM_EXTABLE(1b,ia32_badarg) | 156 | _ASM_EXTABLE(1b,ia32_badarg) |
@@ -157,42 +161,80 @@ ENTRY(ia32_sysenter_target) | |||
157 | * ourselves. To save a few cycles, we can check whether | 161 | * ourselves. To save a few cycles, we can check whether |
158 | * NT was set instead of doing an unconditional popfq. | 162 | * NT was set instead of doing an unconditional popfq. |
159 | */ | 163 | */ |
160 | testl $X86_EFLAGS_NT,EFLAGS-ARGOFFSET(%rsp) | 164 | testl $X86_EFLAGS_NT,EFLAGS(%rsp) |
161 | jnz sysenter_fix_flags | 165 | jnz sysenter_fix_flags |
162 | sysenter_flags_fixed: | 166 | sysenter_flags_fixed: |
163 | 167 | ||
164 | orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 168 | orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) |
165 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 169 | testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) |
166 | CFI_REMEMBER_STATE | 170 | CFI_REMEMBER_STATE |
167 | jnz sysenter_tracesys | 171 | jnz sysenter_tracesys |
168 | cmpq $(IA32_NR_syscalls-1),%rax | 172 | cmpq $(IA32_NR_syscalls-1),%rax |
169 | ja ia32_badsys | 173 | ja ia32_badsys |
170 | sysenter_do_call: | 174 | sysenter_do_call: |
171 | IA32_ARG_FIXUP | 175 | /* 32bit syscall -> 64bit C ABI argument conversion */ |
176 | movl %edi,%r8d /* arg5 */ | ||
177 | movl %ebp,%r9d /* arg6 */ | ||
178 | xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */ | ||
179 | movl %ebx,%edi /* arg1 */ | ||
180 | movl %edx,%edx /* arg3 (zero extension) */ | ||
172 | sysenter_dispatch: | 181 | sysenter_dispatch: |
173 | call *ia32_sys_call_table(,%rax,8) | 182 | call *ia32_sys_call_table(,%rax,8) |
174 | movq %rax,RAX-ARGOFFSET(%rsp) | 183 | movq %rax,RAX(%rsp) |
175 | DISABLE_INTERRUPTS(CLBR_NONE) | 184 | DISABLE_INTERRUPTS(CLBR_NONE) |
176 | TRACE_IRQS_OFF | 185 | TRACE_IRQS_OFF |
177 | testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 186 | testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) |
178 | jnz sysexit_audit | 187 | jnz sysexit_audit |
179 | sysexit_from_sys_call: | 188 | sysexit_from_sys_call: |
180 | andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 189 | /* |
181 | /* clear IF, that popfq doesn't enable interrupts early */ | 190 | * NB: SYSEXIT is not obviously safe for 64-bit kernels -- an |
182 | andl $~0x200,EFLAGS-ARGOFFSET(%rsp) | 191 | * NMI between STI and SYSEXIT has poorly specified behavior, |
183 | movl RIP-ARGOFFSET(%rsp),%edx /* User %eip */ | 192 | * and and NMI followed by an IRQ with usergs is fatal. So |
184 | CFI_REGISTER rip,rdx | 193 | * we just pretend we're using SYSEXIT but we really use |
185 | RESTORE_ARGS 0,24,0,0,0,0 | 194 | * SYSRETL instead. |
195 | * | ||
196 | * This code path is still called 'sysexit' because it pairs | ||
197 | * with 'sysenter' and it uses the SYSENTER calling convention. | ||
198 | */ | ||
199 | andl $~TS_COMPAT,ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) | ||
200 | movl RIP(%rsp),%ecx /* User %eip */ | ||
201 | CFI_REGISTER rip,rcx | ||
202 | RESTORE_RSI_RDI | ||
203 | xorl %edx,%edx /* avoid info leaks */ | ||
186 | xorq %r8,%r8 | 204 | xorq %r8,%r8 |
187 | xorq %r9,%r9 | 205 | xorq %r9,%r9 |
188 | xorq %r10,%r10 | 206 | xorq %r10,%r10 |
189 | xorq %r11,%r11 | 207 | movl EFLAGS(%rsp),%r11d /* User eflags */ |
190 | popfq_cfi | ||
191 | /*CFI_RESTORE rflags*/ | 208 | /*CFI_RESTORE rflags*/ |
192 | popq_cfi %rcx /* User %esp */ | ||
193 | CFI_REGISTER rsp,rcx | ||
194 | TRACE_IRQS_ON | 209 | TRACE_IRQS_ON |
195 | ENABLE_INTERRUPTS_SYSEXIT32 | 210 | |
211 | /* | ||
212 | * SYSRETL works even on Intel CPUs. Use it in preference to SYSEXIT, | ||
213 | * since it avoids a dicey window with interrupts enabled. | ||
214 | */ | ||
215 | movl RSP(%rsp),%esp | ||
216 | |||
217 | /* | ||
218 | * USERGS_SYSRET32 does: | ||
219 | * gsbase = user's gs base | ||
220 | * eip = ecx | ||
221 | * rflags = r11 | ||
222 | * cs = __USER32_CS | ||
223 | * ss = __USER_DS | ||
224 | * | ||
225 | * The prologue set RIP(%rsp) to VDSO32_SYSENTER_RETURN, which does: | ||
226 | * | ||
227 | * pop %ebp | ||
228 | * pop %edx | ||
229 | * pop %ecx | ||
230 | * | ||
231 | * Therefore, we invoke SYSRETL with EDX and R8-R10 zeroed to | ||
232 | * avoid info leaks. R11 ends up with VDSO32_SYSENTER_RETURN's | ||
233 | * address (already known to user code), and R12-R15 are | ||
234 | * callee-saved and therefore don't contain any interesting | ||
235 | * kernel data. | ||
236 | */ | ||
237 | USERGS_SYSRET32 | ||
196 | 238 | ||
197 | CFI_RESTORE_STATE | 239 | CFI_RESTORE_STATE |
198 | 240 | ||
@@ -205,18 +247,18 @@ sysexit_from_sys_call: | |||
205 | movl %ebx,%esi /* 2nd arg: 1st syscall arg */ | 247 | movl %ebx,%esi /* 2nd arg: 1st syscall arg */ |
206 | movl %eax,%edi /* 1st arg: syscall number */ | 248 | movl %eax,%edi /* 1st arg: syscall number */ |
207 | call __audit_syscall_entry | 249 | call __audit_syscall_entry |
208 | movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */ | 250 | movl RAX(%rsp),%eax /* reload syscall number */ |
209 | cmpq $(IA32_NR_syscalls-1),%rax | 251 | cmpq $(IA32_NR_syscalls-1),%rax |
210 | ja ia32_badsys | 252 | ja ia32_badsys |
211 | movl %ebx,%edi /* reload 1st syscall arg */ | 253 | movl %ebx,%edi /* reload 1st syscall arg */ |
212 | movl RCX-ARGOFFSET(%rsp),%esi /* reload 2nd syscall arg */ | 254 | movl RCX(%rsp),%esi /* reload 2nd syscall arg */ |
213 | movl RDX-ARGOFFSET(%rsp),%edx /* reload 3rd syscall arg */ | 255 | movl RDX(%rsp),%edx /* reload 3rd syscall arg */ |
214 | movl RSI-ARGOFFSET(%rsp),%ecx /* reload 4th syscall arg */ | 256 | movl RSI(%rsp),%ecx /* reload 4th syscall arg */ |
215 | movl RDI-ARGOFFSET(%rsp),%r8d /* reload 5th syscall arg */ | 257 | movl RDI(%rsp),%r8d /* reload 5th syscall arg */ |
216 | .endm | 258 | .endm |
217 | 259 | ||
218 | .macro auditsys_exit exit | 260 | .macro auditsys_exit exit |
219 | testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 261 | testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) |
220 | jnz ia32_ret_from_sys_call | 262 | jnz ia32_ret_from_sys_call |
221 | TRACE_IRQS_ON | 263 | TRACE_IRQS_ON |
222 | ENABLE_INTERRUPTS(CLBR_NONE) | 264 | ENABLE_INTERRUPTS(CLBR_NONE) |
@@ -227,13 +269,13 @@ sysexit_from_sys_call: | |||
227 | 1: setbe %al /* 1 if error, 0 if not */ | 269 | 1: setbe %al /* 1 if error, 0 if not */ |
228 | movzbl %al,%edi /* zero-extend that into %edi */ | 270 | movzbl %al,%edi /* zero-extend that into %edi */ |
229 | call __audit_syscall_exit | 271 | call __audit_syscall_exit |
230 | movq RAX-ARGOFFSET(%rsp),%rax /* reload syscall return value */ | 272 | movq RAX(%rsp),%rax /* reload syscall return value */ |
231 | movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi | 273 | movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi |
232 | DISABLE_INTERRUPTS(CLBR_NONE) | 274 | DISABLE_INTERRUPTS(CLBR_NONE) |
233 | TRACE_IRQS_OFF | 275 | TRACE_IRQS_OFF |
234 | testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 276 | testl %edi, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) |
235 | jz \exit | 277 | jz \exit |
236 | CLEAR_RREGS -ARGOFFSET | 278 | CLEAR_RREGS |
237 | jmp int_with_check | 279 | jmp int_with_check |
238 | .endm | 280 | .endm |
239 | 281 | ||
@@ -253,16 +295,16 @@ sysenter_fix_flags: | |||
253 | 295 | ||
254 | sysenter_tracesys: | 296 | sysenter_tracesys: |
255 | #ifdef CONFIG_AUDITSYSCALL | 297 | #ifdef CONFIG_AUDITSYSCALL |
256 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 298 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) |
257 | jz sysenter_auditsys | 299 | jz sysenter_auditsys |
258 | #endif | 300 | #endif |
259 | SAVE_REST | 301 | SAVE_EXTRA_REGS |
260 | CLEAR_RREGS | 302 | CLEAR_RREGS |
261 | movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */ | 303 | movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */ |
262 | movq %rsp,%rdi /* &pt_regs -> arg1 */ | 304 | movq %rsp,%rdi /* &pt_regs -> arg1 */ |
263 | call syscall_trace_enter | 305 | call syscall_trace_enter |
264 | LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ | 306 | LOAD_ARGS32 /* reload args from stack in case ptrace changed it */ |
265 | RESTORE_REST | 307 | RESTORE_EXTRA_REGS |
266 | cmpq $(IA32_NR_syscalls-1),%rax | 308 | cmpq $(IA32_NR_syscalls-1),%rax |
267 | ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */ | 309 | ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */ |
268 | jmp sysenter_do_call | 310 | jmp sysenter_do_call |
@@ -272,94 +314,128 @@ ENDPROC(ia32_sysenter_target) | |||
272 | /* | 314 | /* |
273 | * 32bit SYSCALL instruction entry. | 315 | * 32bit SYSCALL instruction entry. |
274 | * | 316 | * |
317 | * 32bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11, | ||
318 | * then loads new ss, cs, and rip from previously programmed MSRs. | ||
319 | * rflags gets masked by a value from another MSR (so CLD and CLAC | ||
320 | * are not needed). SYSCALL does not save anything on the stack | ||
321 | * and does not change rsp. | ||
322 | * | ||
323 | * Note: rflags saving+masking-with-MSR happens only in Long mode | ||
324 | * (in legacy 32bit mode, IF, RF and VM bits are cleared and that's it). | ||
325 | * Don't get confused: rflags saving+masking depends on Long Mode Active bit | ||
326 | * (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes | ||
327 | * or target CS descriptor's L bit (SYSCALL does not read segment descriptors). | ||
328 | * | ||
275 | * Arguments: | 329 | * Arguments: |
276 | * %eax System call number. | 330 | * eax system call number |
277 | * %ebx Arg1 | 331 | * ecx return address |
278 | * %ecx return EIP | 332 | * ebx arg1 |
279 | * %edx Arg3 | 333 | * ebp arg2 (note: not saved in the stack frame, should not be touched) |
280 | * %esi Arg4 | 334 | * edx arg3 |
281 | * %edi Arg5 | 335 | * esi arg4 |
282 | * %ebp Arg2 [note: not saved in the stack frame, should not be touched] | 336 | * edi arg5 |
283 | * %esp user stack | 337 | * esp user stack |
284 | * 0(%esp) Arg6 | 338 | * 0(%esp) arg6 |
285 | * | 339 | * |
286 | * Interrupts off. | ||
287 | * | ||
288 | * This is purely a fast path. For anything complicated we use the int 0x80 | 340 | * This is purely a fast path. For anything complicated we use the int 0x80 |
289 | * path below. Set up a complete hardware stack frame to share code | 341 | * path below. We set up a complete hardware stack frame to share code |
290 | * with the int 0x80 path. | 342 | * with the int 0x80 path. |
291 | */ | 343 | */ |
292 | ENTRY(ia32_cstar_target) | 344 | ENTRY(ia32_cstar_target) |
293 | CFI_STARTPROC32 simple | 345 | CFI_STARTPROC32 simple |
294 | CFI_SIGNAL_FRAME | 346 | CFI_SIGNAL_FRAME |
295 | CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET | 347 | CFI_DEF_CFA rsp,0 |
296 | CFI_REGISTER rip,rcx | 348 | CFI_REGISTER rip,rcx |
297 | /*CFI_REGISTER rflags,r11*/ | 349 | /*CFI_REGISTER rflags,r11*/ |
350 | |||
351 | /* | ||
352 | * Interrupts are off on entry. | ||
353 | * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, | ||
354 | * it is too small to ever cause noticeable irq latency. | ||
355 | */ | ||
298 | SWAPGS_UNSAFE_STACK | 356 | SWAPGS_UNSAFE_STACK |
299 | movl %esp,%r8d | 357 | movl %esp,%r8d |
300 | CFI_REGISTER rsp,r8 | 358 | CFI_REGISTER rsp,r8 |
301 | movq PER_CPU_VAR(kernel_stack),%rsp | 359 | movq PER_CPU_VAR(kernel_stack),%rsp |
302 | /* | ||
303 | * No need to follow this irqs on/off section: the syscall | ||
304 | * disabled irqs and here we enable it straight after entry: | ||
305 | */ | ||
306 | ENABLE_INTERRUPTS(CLBR_NONE) | 360 | ENABLE_INTERRUPTS(CLBR_NONE) |
307 | SAVE_ARGS 8,0,0 | 361 | |
308 | movl %eax,%eax /* zero extension */ | 362 | /* Zero-extending 32-bit regs, do not remove */ |
309 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) | 363 | movl %eax,%eax |
310 | movq %rcx,RIP-ARGOFFSET(%rsp) | 364 | |
311 | CFI_REL_OFFSET rip,RIP-ARGOFFSET | 365 | /* Construct struct pt_regs on stack */ |
312 | movq %rbp,RCX-ARGOFFSET(%rsp) /* this lies slightly to ptrace */ | 366 | pushq_cfi $__USER32_DS /* pt_regs->ss */ |
367 | pushq_cfi %r8 /* pt_regs->sp */ | ||
368 | CFI_REL_OFFSET rsp,0 | ||
369 | pushq_cfi %r11 /* pt_regs->flags */ | ||
370 | pushq_cfi $__USER32_CS /* pt_regs->cs */ | ||
371 | pushq_cfi %rcx /* pt_regs->ip */ | ||
372 | CFI_REL_OFFSET rip,0 | ||
373 | pushq_cfi_reg rax /* pt_regs->orig_ax */ | ||
374 | pushq_cfi_reg rdi /* pt_regs->di */ | ||
375 | pushq_cfi_reg rsi /* pt_regs->si */ | ||
376 | pushq_cfi_reg rdx /* pt_regs->dx */ | ||
377 | pushq_cfi_reg rbp /* pt_regs->cx */ | ||
313 | movl %ebp,%ecx | 378 | movl %ebp,%ecx |
314 | movq $__USER32_CS,CS-ARGOFFSET(%rsp) | 379 | pushq_cfi_reg rax /* pt_regs->ax */ |
315 | movq $__USER32_DS,SS-ARGOFFSET(%rsp) | 380 | sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */ |
316 | movq %r11,EFLAGS-ARGOFFSET(%rsp) | 381 | CFI_ADJUST_CFA_OFFSET 10*8 |
317 | /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/ | 382 | |
318 | movq %r8,RSP-ARGOFFSET(%rsp) | 383 | /* |
319 | CFI_REL_OFFSET rsp,RSP-ARGOFFSET | 384 | * no need to do an access_ok check here because r8 has been |
320 | /* no need to do an access_ok check here because r8 has been | 385 | * 32bit zero extended |
321 | 32bit zero extended */ | 386 | */ |
322 | /* hardware stack frame is complete now */ | ||
323 | ASM_STAC | 387 | ASM_STAC |
324 | 1: movl (%r8),%r9d | 388 | 1: movl (%r8),%r9d |
325 | _ASM_EXTABLE(1b,ia32_badarg) | 389 | _ASM_EXTABLE(1b,ia32_badarg) |
326 | ASM_CLAC | 390 | ASM_CLAC |
327 | orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 391 | orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) |
328 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 392 | testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) |
329 | CFI_REMEMBER_STATE | 393 | CFI_REMEMBER_STATE |
330 | jnz cstar_tracesys | 394 | jnz cstar_tracesys |
331 | cmpq $IA32_NR_syscalls-1,%rax | 395 | cmpq $IA32_NR_syscalls-1,%rax |
332 | ja ia32_badsys | 396 | ja ia32_badsys |
333 | cstar_do_call: | 397 | cstar_do_call: |
334 | IA32_ARG_FIXUP 1 | 398 | /* 32bit syscall -> 64bit C ABI argument conversion */ |
399 | movl %edi,%r8d /* arg5 */ | ||
400 | /* r9 already loaded */ /* arg6 */ | ||
401 | xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */ | ||
402 | movl %ebx,%edi /* arg1 */ | ||
403 | movl %edx,%edx /* arg3 (zero extension) */ | ||
335 | cstar_dispatch: | 404 | cstar_dispatch: |
336 | call *ia32_sys_call_table(,%rax,8) | 405 | call *ia32_sys_call_table(,%rax,8) |
337 | movq %rax,RAX-ARGOFFSET(%rsp) | 406 | movq %rax,RAX(%rsp) |
338 | DISABLE_INTERRUPTS(CLBR_NONE) | 407 | DISABLE_INTERRUPTS(CLBR_NONE) |
339 | TRACE_IRQS_OFF | 408 | TRACE_IRQS_OFF |
340 | testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 409 | testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) |
341 | jnz sysretl_audit | 410 | jnz sysretl_audit |
342 | sysretl_from_sys_call: | 411 | sysretl_from_sys_call: |
343 | andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 412 | andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) |
344 | RESTORE_ARGS 0,-ARG_SKIP,0,0,0 | 413 | RESTORE_RSI_RDI_RDX |
345 | movl RIP-ARGOFFSET(%rsp),%ecx | 414 | movl RIP(%rsp),%ecx |
346 | CFI_REGISTER rip,rcx | 415 | CFI_REGISTER rip,rcx |
347 | movl EFLAGS-ARGOFFSET(%rsp),%r11d | 416 | movl EFLAGS(%rsp),%r11d |
348 | /*CFI_REGISTER rflags,r11*/ | 417 | /*CFI_REGISTER rflags,r11*/ |
349 | xorq %r10,%r10 | 418 | xorq %r10,%r10 |
350 | xorq %r9,%r9 | 419 | xorq %r9,%r9 |
351 | xorq %r8,%r8 | 420 | xorq %r8,%r8 |
352 | TRACE_IRQS_ON | 421 | TRACE_IRQS_ON |
353 | movl RSP-ARGOFFSET(%rsp),%esp | 422 | movl RSP(%rsp),%esp |
354 | CFI_RESTORE rsp | 423 | CFI_RESTORE rsp |
424 | /* | ||
425 | * 64bit->32bit SYSRET restores eip from ecx, | ||
426 | * eflags from r11 (but RF and VM bits are forced to 0), | ||
427 | * cs and ss are loaded from MSRs. | ||
428 | * (Note: 32bit->32bit SYSRET is different: since r11 | ||
429 | * does not exist, it merely sets eflags.IF=1). | ||
430 | */ | ||
355 | USERGS_SYSRET32 | 431 | USERGS_SYSRET32 |
356 | 432 | ||
357 | #ifdef CONFIG_AUDITSYSCALL | 433 | #ifdef CONFIG_AUDITSYSCALL |
358 | cstar_auditsys: | 434 | cstar_auditsys: |
359 | CFI_RESTORE_STATE | 435 | CFI_RESTORE_STATE |
360 | movl %r9d,R9-ARGOFFSET(%rsp) /* register to be clobbered by call */ | 436 | movl %r9d,R9(%rsp) /* register to be clobbered by call */ |
361 | auditsys_entry_common | 437 | auditsys_entry_common |
362 | movl R9-ARGOFFSET(%rsp),%r9d /* reload 6th syscall arg */ | 438 | movl R9(%rsp),%r9d /* reload 6th syscall arg */ |
363 | jmp cstar_dispatch | 439 | jmp cstar_dispatch |
364 | 440 | ||
365 | sysretl_audit: | 441 | sysretl_audit: |
@@ -368,17 +444,17 @@ sysretl_audit: | |||
368 | 444 | ||
369 | cstar_tracesys: | 445 | cstar_tracesys: |
370 | #ifdef CONFIG_AUDITSYSCALL | 446 | #ifdef CONFIG_AUDITSYSCALL |
371 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 447 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) |
372 | jz cstar_auditsys | 448 | jz cstar_auditsys |
373 | #endif | 449 | #endif |
374 | xchgl %r9d,%ebp | 450 | xchgl %r9d,%ebp |
375 | SAVE_REST | 451 | SAVE_EXTRA_REGS |
376 | CLEAR_RREGS 0, r9 | 452 | CLEAR_RREGS r9 |
377 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ | 453 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ |
378 | movq %rsp,%rdi /* &pt_regs -> arg1 */ | 454 | movq %rsp,%rdi /* &pt_regs -> arg1 */ |
379 | call syscall_trace_enter | 455 | call syscall_trace_enter |
380 | LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */ | 456 | LOAD_ARGS32 1 /* reload args from stack in case ptrace changed it */ |
381 | RESTORE_REST | 457 | RESTORE_EXTRA_REGS |
382 | xchgl %ebp,%r9d | 458 | xchgl %ebp,%r9d |
383 | cmpq $(IA32_NR_syscalls-1),%rax | 459 | cmpq $(IA32_NR_syscalls-1),%rax |
384 | ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */ | 460 | ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */ |
@@ -391,78 +467,94 @@ ia32_badarg: | |||
391 | jmp ia32_sysret | 467 | jmp ia32_sysret |
392 | CFI_ENDPROC | 468 | CFI_ENDPROC |
393 | 469 | ||
394 | /* | 470 | /* |
395 | * Emulated IA32 system calls via int 0x80. | 471 | * Emulated IA32 system calls via int 0x80. |
396 | * | 472 | * |
397 | * Arguments: | 473 | * Arguments: |
398 | * %eax System call number. | 474 | * eax system call number |
399 | * %ebx Arg1 | 475 | * ebx arg1 |
400 | * %ecx Arg2 | 476 | * ecx arg2 |
401 | * %edx Arg3 | 477 | * edx arg3 |
402 | * %esi Arg4 | 478 | * esi arg4 |
403 | * %edi Arg5 | 479 | * edi arg5 |
404 | * %ebp Arg6 [note: not saved in the stack frame, should not be touched] | 480 | * ebp arg6 (note: not saved in the stack frame, should not be touched) |
405 | * | 481 | * |
406 | * Notes: | 482 | * Notes: |
407 | * Uses the same stack frame as the x86-64 version. | 483 | * Uses the same stack frame as the x86-64 version. |
408 | * All registers except %eax must be saved (but ptrace may violate that) | 484 | * All registers except eax must be saved (but ptrace may violate that). |
409 | * Arguments are zero extended. For system calls that want sign extension and | 485 | * Arguments are zero extended. For system calls that want sign extension and |
410 | * take long arguments a wrapper is needed. Most calls can just be called | 486 | * take long arguments a wrapper is needed. Most calls can just be called |
411 | * directly. | 487 | * directly. |
412 | * Assumes it is only called from user space and entered with interrupts off. | 488 | * Assumes it is only called from user space and entered with interrupts off. |
413 | */ | 489 | */ |
414 | 490 | ||
415 | ENTRY(ia32_syscall) | 491 | ENTRY(ia32_syscall) |
416 | CFI_STARTPROC32 simple | 492 | CFI_STARTPROC32 simple |
417 | CFI_SIGNAL_FRAME | 493 | CFI_SIGNAL_FRAME |
418 | CFI_DEF_CFA rsp,SS+8-RIP | 494 | CFI_DEF_CFA rsp,5*8 |
419 | /*CFI_REL_OFFSET ss,SS-RIP*/ | 495 | /*CFI_REL_OFFSET ss,4*8 */ |
420 | CFI_REL_OFFSET rsp,RSP-RIP | 496 | CFI_REL_OFFSET rsp,3*8 |
421 | /*CFI_REL_OFFSET rflags,EFLAGS-RIP*/ | 497 | /*CFI_REL_OFFSET rflags,2*8 */ |
422 | /*CFI_REL_OFFSET cs,CS-RIP*/ | 498 | /*CFI_REL_OFFSET cs,1*8 */ |
423 | CFI_REL_OFFSET rip,RIP-RIP | 499 | CFI_REL_OFFSET rip,0*8 |
424 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 500 | |
425 | SWAPGS | ||
426 | /* | 501 | /* |
427 | * No need to follow this irqs on/off section: the syscall | 502 | * Interrupts are off on entry. |
428 | * disabled irqs and here we enable it straight after entry: | 503 | * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, |
504 | * it is too small to ever cause noticeable irq latency. | ||
429 | */ | 505 | */ |
506 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
507 | SWAPGS | ||
430 | ENABLE_INTERRUPTS(CLBR_NONE) | 508 | ENABLE_INTERRUPTS(CLBR_NONE) |
431 | movl %eax,%eax | 509 | |
432 | pushq_cfi %rax | 510 | /* Zero-extending 32-bit regs, do not remove */ |
511 | movl %eax,%eax | ||
512 | |||
513 | /* Construct struct pt_regs on stack (iret frame is already on stack) */ | ||
514 | pushq_cfi_reg rax /* pt_regs->orig_ax */ | ||
515 | pushq_cfi_reg rdi /* pt_regs->di */ | ||
516 | pushq_cfi_reg rsi /* pt_regs->si */ | ||
517 | pushq_cfi_reg rdx /* pt_regs->dx */ | ||
518 | pushq_cfi_reg rcx /* pt_regs->cx */ | ||
519 | pushq_cfi_reg rax /* pt_regs->ax */ | ||
433 | cld | 520 | cld |
434 | /* note the registers are not zero extended to the sf. | 521 | sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */ |
435 | this could be a problem. */ | 522 | CFI_ADJUST_CFA_OFFSET 10*8 |
436 | SAVE_ARGS 0,1,0 | 523 | |
437 | orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 524 | orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) |
438 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 525 | testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) |
439 | jnz ia32_tracesys | 526 | jnz ia32_tracesys |
440 | cmpq $(IA32_NR_syscalls-1),%rax | 527 | cmpq $(IA32_NR_syscalls-1),%rax |
441 | ja ia32_badsys | 528 | ja ia32_badsys |
442 | ia32_do_call: | 529 | ia32_do_call: |
443 | IA32_ARG_FIXUP | 530 | /* 32bit syscall -> 64bit C ABI argument conversion */ |
531 | movl %edi,%r8d /* arg5 */ | ||
532 | movl %ebp,%r9d /* arg6 */ | ||
533 | xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */ | ||
534 | movl %ebx,%edi /* arg1 */ | ||
535 | movl %edx,%edx /* arg3 (zero extension) */ | ||
444 | call *ia32_sys_call_table(,%rax,8) # xxx: rip relative | 536 | call *ia32_sys_call_table(,%rax,8) # xxx: rip relative |
445 | ia32_sysret: | 537 | ia32_sysret: |
446 | movq %rax,RAX-ARGOFFSET(%rsp) | 538 | movq %rax,RAX(%rsp) |
447 | ia32_ret_from_sys_call: | 539 | ia32_ret_from_sys_call: |
448 | CLEAR_RREGS -ARGOFFSET | 540 | CLEAR_RREGS |
449 | jmp int_ret_from_sys_call | 541 | jmp int_ret_from_sys_call |
450 | 542 | ||
451 | ia32_tracesys: | 543 | ia32_tracesys: |
452 | SAVE_REST | 544 | SAVE_EXTRA_REGS |
453 | CLEAR_RREGS | 545 | CLEAR_RREGS |
454 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ | 546 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ |
455 | movq %rsp,%rdi /* &pt_regs -> arg1 */ | 547 | movq %rsp,%rdi /* &pt_regs -> arg1 */ |
456 | call syscall_trace_enter | 548 | call syscall_trace_enter |
457 | LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ | 549 | LOAD_ARGS32 /* reload args from stack in case ptrace changed it */ |
458 | RESTORE_REST | 550 | RESTORE_EXTRA_REGS |
459 | cmpq $(IA32_NR_syscalls-1),%rax | 551 | cmpq $(IA32_NR_syscalls-1),%rax |
460 | ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ | 552 | ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ |
461 | jmp ia32_do_call | 553 | jmp ia32_do_call |
462 | END(ia32_syscall) | 554 | END(ia32_syscall) |
463 | 555 | ||
464 | ia32_badsys: | 556 | ia32_badsys: |
465 | movq $0,ORIG_RAX-ARGOFFSET(%rsp) | 557 | movq $0,ORIG_RAX(%rsp) |
466 | movq $-ENOSYS,%rax | 558 | movq $-ENOSYS,%rax |
467 | jmp ia32_sysret | 559 | jmp ia32_sysret |
468 | 560 | ||
@@ -479,8 +571,6 @@ GLOBAL(\label) | |||
479 | 571 | ||
480 | PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn | 572 | PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn |
481 | PTREGSCALL stub32_sigreturn, sys32_sigreturn | 573 | PTREGSCALL stub32_sigreturn, sys32_sigreturn |
482 | PTREGSCALL stub32_execve, compat_sys_execve | ||
483 | PTREGSCALL stub32_execveat, compat_sys_execveat | ||
484 | PTREGSCALL stub32_fork, sys_fork | 574 | PTREGSCALL stub32_fork, sys_fork |
485 | PTREGSCALL stub32_vfork, sys_vfork | 575 | PTREGSCALL stub32_vfork, sys_vfork |
486 | 576 | ||
@@ -492,24 +582,23 @@ GLOBAL(stub32_clone) | |||
492 | 582 | ||
493 | ALIGN | 583 | ALIGN |
494 | ia32_ptregs_common: | 584 | ia32_ptregs_common: |
495 | popq %r11 | ||
496 | CFI_ENDPROC | 585 | CFI_ENDPROC |
497 | CFI_STARTPROC32 simple | 586 | CFI_STARTPROC32 simple |
498 | CFI_SIGNAL_FRAME | 587 | CFI_SIGNAL_FRAME |
499 | CFI_DEF_CFA rsp,SS+8-ARGOFFSET | 588 | CFI_DEF_CFA rsp,SIZEOF_PTREGS |
500 | CFI_REL_OFFSET rax,RAX-ARGOFFSET | 589 | CFI_REL_OFFSET rax,RAX |
501 | CFI_REL_OFFSET rcx,RCX-ARGOFFSET | 590 | CFI_REL_OFFSET rcx,RCX |
502 | CFI_REL_OFFSET rdx,RDX-ARGOFFSET | 591 | CFI_REL_OFFSET rdx,RDX |
503 | CFI_REL_OFFSET rsi,RSI-ARGOFFSET | 592 | CFI_REL_OFFSET rsi,RSI |
504 | CFI_REL_OFFSET rdi,RDI-ARGOFFSET | 593 | CFI_REL_OFFSET rdi,RDI |
505 | CFI_REL_OFFSET rip,RIP-ARGOFFSET | 594 | CFI_REL_OFFSET rip,RIP |
506 | /* CFI_REL_OFFSET cs,CS-ARGOFFSET*/ | 595 | /* CFI_REL_OFFSET cs,CS*/ |
507 | /* CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/ | 596 | /* CFI_REL_OFFSET rflags,EFLAGS*/ |
508 | CFI_REL_OFFSET rsp,RSP-ARGOFFSET | 597 | CFI_REL_OFFSET rsp,RSP |
509 | /* CFI_REL_OFFSET ss,SS-ARGOFFSET*/ | 598 | /* CFI_REL_OFFSET ss,SS*/ |
510 | SAVE_REST | 599 | SAVE_EXTRA_REGS 8 |
511 | call *%rax | 600 | call *%rax |
512 | RESTORE_REST | 601 | RESTORE_EXTRA_REGS 8 |
513 | jmp ia32_sysret /* misbalances the return cache */ | 602 | ret |
514 | CFI_ENDPROC | 603 | CFI_ENDPROC |
515 | END(ia32_ptregs_common) | 604 | END(ia32_ptregs_common) |
diff --git a/arch/x86/ia32/nosyscall.c b/arch/x86/ia32/nosyscall.c deleted file mode 100644 index 51ecd5b4e787..000000000000 --- a/arch/x86/ia32/nosyscall.c +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | #include <linux/errno.h> | ||
3 | |||
4 | long compat_ni_syscall(void) | ||
5 | { | ||
6 | return -ENOSYS; | ||
7 | } | ||
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 8e0ceecdc957..719cd702b0a4 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c | |||
@@ -201,20 +201,6 @@ long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high, | |||
201 | advice); | 201 | advice); |
202 | } | 202 | } |
203 | 203 | ||
204 | long sys32_vm86_warning(void) | ||
205 | { | ||
206 | struct task_struct *me = current; | ||
207 | static char lastcomm[sizeof(me->comm)]; | ||
208 | |||
209 | if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) { | ||
210 | compat_printk(KERN_INFO | ||
211 | "%s: vm86 mode not supported on 64 bit kernel\n", | ||
212 | me->comm); | ||
213 | strncpy(lastcomm, me->comm, sizeof(lastcomm)); | ||
214 | } | ||
215 | return -ENOSYS; | ||
216 | } | ||
217 | |||
218 | asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi, | 204 | asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi, |
219 | size_t count) | 205 | size_t count) |
220 | { | 206 | { |
diff --git a/arch/x86/ia32/syscall_ia32.c b/arch/x86/ia32/syscall_ia32.c deleted file mode 100644 index 4754ba0f5d9f..000000000000 --- a/arch/x86/ia32/syscall_ia32.c +++ /dev/null | |||
@@ -1,25 +0,0 @@ | |||
1 | /* System call table for ia32 emulation. */ | ||
2 | |||
3 | #include <linux/linkage.h> | ||
4 | #include <linux/sys.h> | ||
5 | #include <linux/cache.h> | ||
6 | #include <asm/asm-offsets.h> | ||
7 | |||
8 | #define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void compat(void) ; | ||
9 | #include <asm/syscalls_32.h> | ||
10 | #undef __SYSCALL_I386 | ||
11 | |||
12 | #define __SYSCALL_I386(nr, sym, compat) [nr] = compat, | ||
13 | |||
14 | typedef void (*sys_call_ptr_t)(void); | ||
15 | |||
16 | extern void compat_ni_syscall(void); | ||
17 | |||
18 | const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = { | ||
19 | /* | ||
20 | * Smells like a compiler bug -- it doesn't work | ||
21 | * when the & below is removed. | ||
22 | */ | ||
23 | [0 ... __NR_ia32_syscall_max] = &compat_ni_syscall, | ||
24 | #include <asm/syscalls_32.h> | ||
25 | }; | ||
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index 372231c22a47..bdf02eeee765 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h | |||
@@ -18,12 +18,63 @@ | |||
18 | .endm | 18 | .endm |
19 | #endif | 19 | #endif |
20 | 20 | ||
21 | .macro altinstruction_entry orig alt feature orig_len alt_len | 21 | .macro altinstruction_entry orig alt feature orig_len alt_len pad_len |
22 | .long \orig - . | 22 | .long \orig - . |
23 | .long \alt - . | 23 | .long \alt - . |
24 | .word \feature | 24 | .word \feature |
25 | .byte \orig_len | 25 | .byte \orig_len |
26 | .byte \alt_len | 26 | .byte \alt_len |
27 | .byte \pad_len | ||
28 | .endm | ||
29 | |||
30 | .macro ALTERNATIVE oldinstr, newinstr, feature | ||
31 | 140: | ||
32 | \oldinstr | ||
33 | 141: | ||
34 | .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90 | ||
35 | 142: | ||
36 | |||
37 | .pushsection .altinstructions,"a" | ||
38 | altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b | ||
39 | .popsection | ||
40 | |||
41 | .pushsection .altinstr_replacement,"ax" | ||
42 | 143: | ||
43 | \newinstr | ||
44 | 144: | ||
45 | .popsection | ||
46 | .endm | ||
47 | |||
48 | #define old_len 141b-140b | ||
49 | #define new_len1 144f-143f | ||
50 | #define new_len2 145f-144f | ||
51 | |||
52 | /* | ||
53 | * max without conditionals. Idea adapted from: | ||
54 | * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax | ||
55 | */ | ||
56 | #define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b))))) | ||
57 | |||
58 | .macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2 | ||
59 | 140: | ||
60 | \oldinstr | ||
61 | 141: | ||
62 | .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \ | ||
63 | (alt_max_short(new_len1, new_len2) - (old_len)),0x90 | ||
64 | 142: | ||
65 | |||
66 | .pushsection .altinstructions,"a" | ||
67 | altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b | ||
68 | altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b | ||
69 | .popsection | ||
70 | |||
71 | .pushsection .altinstr_replacement,"ax" | ||
72 | 143: | ||
73 | \newinstr1 | ||
74 | 144: | ||
75 | \newinstr2 | ||
76 | 145: | ||
77 | .popsection | ||
27 | .endm | 78 | .endm |
28 | 79 | ||
29 | #endif /* __ASSEMBLY__ */ | 80 | #endif /* __ASSEMBLY__ */ |
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 473bdbee378a..ba32af062f61 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h | |||
@@ -48,8 +48,9 @@ struct alt_instr { | |||
48 | s32 repl_offset; /* offset to replacement instruction */ | 48 | s32 repl_offset; /* offset to replacement instruction */ |
49 | u16 cpuid; /* cpuid bit set for replacement */ | 49 | u16 cpuid; /* cpuid bit set for replacement */ |
50 | u8 instrlen; /* length of original instruction */ | 50 | u8 instrlen; /* length of original instruction */ |
51 | u8 replacementlen; /* length of new instruction, <= instrlen */ | 51 | u8 replacementlen; /* length of new instruction */ |
52 | }; | 52 | u8 padlen; /* length of build-time padding */ |
53 | } __packed; | ||
53 | 54 | ||
54 | extern void alternative_instructions(void); | 55 | extern void alternative_instructions(void); |
55 | extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); | 56 | extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); |
@@ -76,50 +77,69 @@ static inline int alternatives_text_reserved(void *start, void *end) | |||
76 | } | 77 | } |
77 | #endif /* CONFIG_SMP */ | 78 | #endif /* CONFIG_SMP */ |
78 | 79 | ||
79 | #define OLDINSTR(oldinstr) "661:\n\t" oldinstr "\n662:\n" | 80 | #define b_replacement(num) "664"#num |
81 | #define e_replacement(num) "665"#num | ||
80 | 82 | ||
81 | #define b_replacement(number) "663"#number | 83 | #define alt_end_marker "663" |
82 | #define e_replacement(number) "664"#number | 84 | #define alt_slen "662b-661b" |
85 | #define alt_pad_len alt_end_marker"b-662b" | ||
86 | #define alt_total_slen alt_end_marker"b-661b" | ||
87 | #define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f" | ||
83 | 88 | ||
84 | #define alt_slen "662b-661b" | 89 | #define __OLDINSTR(oldinstr, num) \ |
85 | #define alt_rlen(number) e_replacement(number)"f-"b_replacement(number)"f" | 90 | "661:\n\t" oldinstr "\n662:\n" \ |
91 | ".skip -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * " \ | ||
92 | "((" alt_rlen(num) ")-(" alt_slen ")),0x90\n" | ||
86 | 93 | ||
87 | #define ALTINSTR_ENTRY(feature, number) \ | 94 | #define OLDINSTR(oldinstr, num) \ |
95 | __OLDINSTR(oldinstr, num) \ | ||
96 | alt_end_marker ":\n" | ||
97 | |||
98 | /* | ||
99 | * max without conditionals. Idea adapted from: | ||
100 | * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax | ||
101 | * | ||
102 | * The additional "-" is needed because gas works with s32s. | ||
103 | */ | ||
104 | #define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") - (" b ")))))" | ||
105 | |||
106 | /* | ||
107 | * Pad the second replacement alternative with additional NOPs if it is | ||
108 | * additionally longer than the first replacement alternative. | ||
109 | */ | ||
110 | #define OLDINSTR_2(oldinstr, num1, num2) \ | ||
111 | "661:\n\t" oldinstr "\n662:\n" \ | ||
112 | ".skip -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * " \ | ||
113 | "(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")), 0x90\n" \ | ||
114 | alt_end_marker ":\n" | ||
115 | |||
116 | #define ALTINSTR_ENTRY(feature, num) \ | ||
88 | " .long 661b - .\n" /* label */ \ | 117 | " .long 661b - .\n" /* label */ \ |
89 | " .long " b_replacement(number)"f - .\n" /* new instruction */ \ | 118 | " .long " b_replacement(num)"f - .\n" /* new instruction */ \ |
90 | " .word " __stringify(feature) "\n" /* feature bit */ \ | 119 | " .word " __stringify(feature) "\n" /* feature bit */ \ |
91 | " .byte " alt_slen "\n" /* source len */ \ | 120 | " .byte " alt_total_slen "\n" /* source len */ \ |
92 | " .byte " alt_rlen(number) "\n" /* replacement len */ | 121 | " .byte " alt_rlen(num) "\n" /* replacement len */ \ |
93 | 122 | " .byte " alt_pad_len "\n" /* pad len */ | |
94 | #define DISCARD_ENTRY(number) /* rlen <= slen */ \ | ||
95 | " .byte 0xff + (" alt_rlen(number) ") - (" alt_slen ")\n" | ||
96 | 123 | ||
97 | #define ALTINSTR_REPLACEMENT(newinstr, feature, number) /* replacement */ \ | 124 | #define ALTINSTR_REPLACEMENT(newinstr, feature, num) /* replacement */ \ |
98 | b_replacement(number)":\n\t" newinstr "\n" e_replacement(number) ":\n\t" | 125 | b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n\t" |
99 | 126 | ||
100 | /* alternative assembly primitive: */ | 127 | /* alternative assembly primitive: */ |
101 | #define ALTERNATIVE(oldinstr, newinstr, feature) \ | 128 | #define ALTERNATIVE(oldinstr, newinstr, feature) \ |
102 | OLDINSTR(oldinstr) \ | 129 | OLDINSTR(oldinstr, 1) \ |
103 | ".pushsection .altinstructions,\"a\"\n" \ | 130 | ".pushsection .altinstructions,\"a\"\n" \ |
104 | ALTINSTR_ENTRY(feature, 1) \ | 131 | ALTINSTR_ENTRY(feature, 1) \ |
105 | ".popsection\n" \ | 132 | ".popsection\n" \ |
106 | ".pushsection .discard,\"aw\",@progbits\n" \ | ||
107 | DISCARD_ENTRY(1) \ | ||
108 | ".popsection\n" \ | ||
109 | ".pushsection .altinstr_replacement, \"ax\"\n" \ | 133 | ".pushsection .altinstr_replacement, \"ax\"\n" \ |
110 | ALTINSTR_REPLACEMENT(newinstr, feature, 1) \ | 134 | ALTINSTR_REPLACEMENT(newinstr, feature, 1) \ |
111 | ".popsection" | 135 | ".popsection" |
112 | 136 | ||
113 | #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ | 137 | #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ |
114 | OLDINSTR(oldinstr) \ | 138 | OLDINSTR_2(oldinstr, 1, 2) \ |
115 | ".pushsection .altinstructions,\"a\"\n" \ | 139 | ".pushsection .altinstructions,\"a\"\n" \ |
116 | ALTINSTR_ENTRY(feature1, 1) \ | 140 | ALTINSTR_ENTRY(feature1, 1) \ |
117 | ALTINSTR_ENTRY(feature2, 2) \ | 141 | ALTINSTR_ENTRY(feature2, 2) \ |
118 | ".popsection\n" \ | 142 | ".popsection\n" \ |
119 | ".pushsection .discard,\"aw\",@progbits\n" \ | ||
120 | DISCARD_ENTRY(1) \ | ||
121 | DISCARD_ENTRY(2) \ | ||
122 | ".popsection\n" \ | ||
123 | ".pushsection .altinstr_replacement, \"ax\"\n" \ | 143 | ".pushsection .altinstr_replacement, \"ax\"\n" \ |
124 | ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ | 144 | ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ |
125 | ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ | 145 | ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ |
@@ -146,6 +166,9 @@ static inline int alternatives_text_reserved(void *start, void *end) | |||
146 | #define alternative(oldinstr, newinstr, feature) \ | 166 | #define alternative(oldinstr, newinstr, feature) \ |
147 | asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory") | 167 | asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory") |
148 | 168 | ||
169 | #define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \ | ||
170 | asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory") | ||
171 | |||
149 | /* | 172 | /* |
150 | * Alternative inline assembly with input. | 173 | * Alternative inline assembly with input. |
151 | * | 174 | * |
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index efc3b22d896e..976b86a325e5 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h | |||
@@ -91,7 +91,7 @@ static inline void native_apic_mem_write(u32 reg, u32 v) | |||
91 | { | 91 | { |
92 | volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg); | 92 | volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg); |
93 | 93 | ||
94 | alternative_io("movl %0, %1", "xchgl %0, %1", X86_BUG_11AP, | 94 | alternative_io("movl %0, %P1", "xchgl %0, %P1", X86_BUG_11AP, |
95 | ASM_OUTPUT2("=r" (v), "=m" (*addr)), | 95 | ASM_OUTPUT2("=r" (v), "=m" (*addr)), |
96 | ASM_OUTPUT2("0" (v), "m" (*addr))); | 96 | ASM_OUTPUT2("0" (v), "m" (*addr))); |
97 | } | 97 | } |
@@ -204,7 +204,6 @@ extern void clear_local_APIC(void); | |||
204 | extern void disconnect_bsp_APIC(int virt_wire_setup); | 204 | extern void disconnect_bsp_APIC(int virt_wire_setup); |
205 | extern void disable_local_APIC(void); | 205 | extern void disable_local_APIC(void); |
206 | extern void lapic_shutdown(void); | 206 | extern void lapic_shutdown(void); |
207 | extern int verify_local_APIC(void); | ||
208 | extern void sync_Arb_IDs(void); | 207 | extern void sync_Arb_IDs(void); |
209 | extern void init_bsp_APIC(void); | 208 | extern void init_bsp_APIC(void); |
210 | extern void setup_local_APIC(void); | 209 | extern void setup_local_APIC(void); |
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 2ab1eb33106e..959e45b81fe2 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h | |||
@@ -95,13 +95,11 @@ do { \ | |||
95 | * Stop RDTSC speculation. This is needed when you need to use RDTSC | 95 | * Stop RDTSC speculation. This is needed when you need to use RDTSC |
96 | * (or get_cycles or vread that possibly accesses the TSC) in a defined | 96 | * (or get_cycles or vread that possibly accesses the TSC) in a defined |
97 | * code region. | 97 | * code region. |
98 | * | ||
99 | * (Could use an alternative three way for this if there was one.) | ||
100 | */ | 98 | */ |
101 | static __always_inline void rdtsc_barrier(void) | 99 | static __always_inline void rdtsc_barrier(void) |
102 | { | 100 | { |
103 | alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); | 101 | alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, |
104 | alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); | 102 | "lfence", X86_FEATURE_LFENCE_RDTSC); |
105 | } | 103 | } |
106 | 104 | ||
107 | #endif /* _ASM_X86_BARRIER_H */ | 105 | #endif /* _ASM_X86_BARRIER_H */ |
diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h index 1f1297b46f83..1c8b50edb2db 100644 --- a/arch/x86/include/asm/calling.h +++ b/arch/x86/include/asm/calling.h | |||
@@ -55,143 +55,157 @@ For 32-bit we have the following conventions - kernel is built with | |||
55 | * for assembly code: | 55 | * for assembly code: |
56 | */ | 56 | */ |
57 | 57 | ||
58 | #define R15 0 | 58 | /* The layout forms the "struct pt_regs" on the stack: */ |
59 | #define R14 8 | 59 | /* |
60 | #define R13 16 | 60 | * C ABI says these regs are callee-preserved. They aren't saved on kernel entry |
61 | #define R12 24 | 61 | * unless syscall needs a complete, fully filled "struct pt_regs". |
62 | #define RBP 32 | 62 | */ |
63 | #define RBX 40 | 63 | #define R15 0*8 |
64 | 64 | #define R14 1*8 | |
65 | /* arguments: interrupts/non tracing syscalls only save up to here: */ | 65 | #define R13 2*8 |
66 | #define R11 48 | 66 | #define R12 3*8 |
67 | #define R10 56 | 67 | #define RBP 4*8 |
68 | #define R9 64 | 68 | #define RBX 5*8 |
69 | #define R8 72 | 69 | /* These regs are callee-clobbered. Always saved on kernel entry. */ |
70 | #define RAX 80 | 70 | #define R11 6*8 |
71 | #define RCX 88 | 71 | #define R10 7*8 |
72 | #define RDX 96 | 72 | #define R9 8*8 |
73 | #define RSI 104 | 73 | #define R8 9*8 |
74 | #define RDI 112 | 74 | #define RAX 10*8 |
75 | #define ORIG_RAX 120 /* + error_code */ | 75 | #define RCX 11*8 |
76 | /* end of arguments */ | 76 | #define RDX 12*8 |
77 | 77 | #define RSI 13*8 | |
78 | /* cpu exception frame or undefined in case of fast syscall: */ | 78 | #define RDI 14*8 |
79 | #define RIP 128 | 79 | /* |
80 | #define CS 136 | 80 | * On syscall entry, this is syscall#. On CPU exception, this is error code. |
81 | #define EFLAGS 144 | 81 | * On hw interrupt, it's IRQ number: |
82 | #define RSP 152 | 82 | */ |
83 | #define SS 160 | 83 | #define ORIG_RAX 15*8 |
84 | 84 | /* Return frame for iretq */ | |
85 | #define ARGOFFSET R11 | 85 | #define RIP 16*8 |
86 | 86 | #define CS 17*8 | |
87 | .macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1, rax_enosys=0 | 87 | #define EFLAGS 18*8 |
88 | subq $9*8+\addskip, %rsp | 88 | #define RSP 19*8 |
89 | CFI_ADJUST_CFA_OFFSET 9*8+\addskip | 89 | #define SS 20*8 |
90 | movq_cfi rdi, 8*8 | 90 | |
91 | movq_cfi rsi, 7*8 | 91 | #define SIZEOF_PTREGS 21*8 |
92 | movq_cfi rdx, 6*8 | 92 | |
93 | 93 | .macro ALLOC_PT_GPREGS_ON_STACK addskip=0 | |
94 | .if \save_rcx | 94 | subq $15*8+\addskip, %rsp |
95 | movq_cfi rcx, 5*8 | 95 | CFI_ADJUST_CFA_OFFSET 15*8+\addskip |
96 | .endif | 96 | .endm |
97 | 97 | ||
98 | .if \rax_enosys | 98 | .macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1 |
99 | movq $-ENOSYS, 4*8(%rsp) | 99 | .if \r11 |
100 | .else | 100 | movq_cfi r11, 6*8+\offset |
101 | movq_cfi rax, 4*8 | ||
102 | .endif | 101 | .endif |
103 | 102 | .if \r8910 | |
104 | .if \save_r891011 | 103 | movq_cfi r10, 7*8+\offset |
105 | movq_cfi r8, 3*8 | 104 | movq_cfi r9, 8*8+\offset |
106 | movq_cfi r9, 2*8 | 105 | movq_cfi r8, 9*8+\offset |
107 | movq_cfi r10, 1*8 | 106 | .endif |
108 | movq_cfi r11, 0*8 | 107 | .if \rax |
108 | movq_cfi rax, 10*8+\offset | ||
109 | .endif | ||
110 | .if \rcx | ||
111 | movq_cfi rcx, 11*8+\offset | ||
109 | .endif | 112 | .endif |
113 | movq_cfi rdx, 12*8+\offset | ||
114 | movq_cfi rsi, 13*8+\offset | ||
115 | movq_cfi rdi, 14*8+\offset | ||
116 | .endm | ||
117 | .macro SAVE_C_REGS offset=0 | ||
118 | SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1 | ||
119 | .endm | ||
120 | .macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0 | ||
121 | SAVE_C_REGS_HELPER \offset, 0, 0, 1, 1 | ||
122 | .endm | ||
123 | .macro SAVE_C_REGS_EXCEPT_R891011 | ||
124 | SAVE_C_REGS_HELPER 0, 1, 1, 0, 0 | ||
125 | .endm | ||
126 | .macro SAVE_C_REGS_EXCEPT_RCX_R891011 | ||
127 | SAVE_C_REGS_HELPER 0, 1, 0, 0, 0 | ||
128 | .endm | ||
129 | .macro SAVE_C_REGS_EXCEPT_RAX_RCX_R11 | ||
130 | SAVE_C_REGS_HELPER 0, 0, 0, 1, 0 | ||
131 | .endm | ||
132 | |||
133 | .macro SAVE_EXTRA_REGS offset=0 | ||
134 | movq_cfi r15, 0*8+\offset | ||
135 | movq_cfi r14, 1*8+\offset | ||
136 | movq_cfi r13, 2*8+\offset | ||
137 | movq_cfi r12, 3*8+\offset | ||
138 | movq_cfi rbp, 4*8+\offset | ||
139 | movq_cfi rbx, 5*8+\offset | ||
140 | .endm | ||
141 | .macro SAVE_EXTRA_REGS_RBP offset=0 | ||
142 | movq_cfi rbp, 4*8+\offset | ||
143 | .endm | ||
110 | 144 | ||
145 | .macro RESTORE_EXTRA_REGS offset=0 | ||
146 | movq_cfi_restore 0*8+\offset, r15 | ||
147 | movq_cfi_restore 1*8+\offset, r14 | ||
148 | movq_cfi_restore 2*8+\offset, r13 | ||
149 | movq_cfi_restore 3*8+\offset, r12 | ||
150 | movq_cfi_restore 4*8+\offset, rbp | ||
151 | movq_cfi_restore 5*8+\offset, rbx | ||
111 | .endm | 152 | .endm |
112 | 153 | ||
113 | #define ARG_SKIP (9*8) | 154 | .macro ZERO_EXTRA_REGS |
155 | xorl %r15d, %r15d | ||
156 | xorl %r14d, %r14d | ||
157 | xorl %r13d, %r13d | ||
158 | xorl %r12d, %r12d | ||
159 | xorl %ebp, %ebp | ||
160 | xorl %ebx, %ebx | ||
161 | .endm | ||
114 | 162 | ||
115 | .macro RESTORE_ARGS rstor_rax=1, addskip=0, rstor_rcx=1, rstor_r11=1, \ | 163 | .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1 |
116 | rstor_r8910=1, rstor_rdx=1 | ||
117 | .if \rstor_r11 | 164 | .if \rstor_r11 |
118 | movq_cfi_restore 0*8, r11 | 165 | movq_cfi_restore 6*8, r11 |
119 | .endif | 166 | .endif |
120 | |||
121 | .if \rstor_r8910 | 167 | .if \rstor_r8910 |
122 | movq_cfi_restore 1*8, r10 | 168 | movq_cfi_restore 7*8, r10 |
123 | movq_cfi_restore 2*8, r9 | 169 | movq_cfi_restore 8*8, r9 |
124 | movq_cfi_restore 3*8, r8 | 170 | movq_cfi_restore 9*8, r8 |
125 | .endif | 171 | .endif |
126 | |||
127 | .if \rstor_rax | 172 | .if \rstor_rax |
128 | movq_cfi_restore 4*8, rax | 173 | movq_cfi_restore 10*8, rax |
129 | .endif | 174 | .endif |
130 | |||
131 | .if \rstor_rcx | 175 | .if \rstor_rcx |
132 | movq_cfi_restore 5*8, rcx | 176 | movq_cfi_restore 11*8, rcx |
133 | .endif | 177 | .endif |
134 | |||
135 | .if \rstor_rdx | 178 | .if \rstor_rdx |
136 | movq_cfi_restore 6*8, rdx | 179 | movq_cfi_restore 12*8, rdx |
137 | .endif | ||
138 | |||
139 | movq_cfi_restore 7*8, rsi | ||
140 | movq_cfi_restore 8*8, rdi | ||
141 | |||
142 | .if ARG_SKIP+\addskip > 0 | ||
143 | addq $ARG_SKIP+\addskip, %rsp | ||
144 | CFI_ADJUST_CFA_OFFSET -(ARG_SKIP+\addskip) | ||
145 | .endif | 180 | .endif |
181 | movq_cfi_restore 13*8, rsi | ||
182 | movq_cfi_restore 14*8, rdi | ||
146 | .endm | 183 | .endm |
147 | 184 | .macro RESTORE_C_REGS | |
148 | .macro LOAD_ARGS offset, skiprax=0 | 185 | RESTORE_C_REGS_HELPER 1,1,1,1,1 |
149 | movq \offset(%rsp), %r11 | ||
150 | movq \offset+8(%rsp), %r10 | ||
151 | movq \offset+16(%rsp), %r9 | ||
152 | movq \offset+24(%rsp), %r8 | ||
153 | movq \offset+40(%rsp), %rcx | ||
154 | movq \offset+48(%rsp), %rdx | ||
155 | movq \offset+56(%rsp), %rsi | ||
156 | movq \offset+64(%rsp), %rdi | ||
157 | .if \skiprax | ||
158 | .else | ||
159 | movq \offset+72(%rsp), %rax | ||
160 | .endif | ||
161 | .endm | 186 | .endm |
162 | 187 | .macro RESTORE_C_REGS_EXCEPT_RAX | |
163 | #define REST_SKIP (6*8) | 188 | RESTORE_C_REGS_HELPER 0,1,1,1,1 |
164 | |||
165 | .macro SAVE_REST | ||
166 | subq $REST_SKIP, %rsp | ||
167 | CFI_ADJUST_CFA_OFFSET REST_SKIP | ||
168 | movq_cfi rbx, 5*8 | ||
169 | movq_cfi rbp, 4*8 | ||
170 | movq_cfi r12, 3*8 | ||
171 | movq_cfi r13, 2*8 | ||
172 | movq_cfi r14, 1*8 | ||
173 | movq_cfi r15, 0*8 | ||
174 | .endm | 189 | .endm |
175 | 190 | .macro RESTORE_C_REGS_EXCEPT_RCX | |
176 | .macro RESTORE_REST | 191 | RESTORE_C_REGS_HELPER 1,0,1,1,1 |
177 | movq_cfi_restore 0*8, r15 | ||
178 | movq_cfi_restore 1*8, r14 | ||
179 | movq_cfi_restore 2*8, r13 | ||
180 | movq_cfi_restore 3*8, r12 | ||
181 | movq_cfi_restore 4*8, rbp | ||
182 | movq_cfi_restore 5*8, rbx | ||
183 | addq $REST_SKIP, %rsp | ||
184 | CFI_ADJUST_CFA_OFFSET -(REST_SKIP) | ||
185 | .endm | 192 | .endm |
186 | 193 | .macro RESTORE_C_REGS_EXCEPT_R11 | |
187 | .macro SAVE_ALL | 194 | RESTORE_C_REGS_HELPER 1,1,0,1,1 |
188 | SAVE_ARGS | 195 | .endm |
189 | SAVE_REST | 196 | .macro RESTORE_C_REGS_EXCEPT_RCX_R11 |
197 | RESTORE_C_REGS_HELPER 1,0,0,1,1 | ||
198 | .endm | ||
199 | .macro RESTORE_RSI_RDI | ||
200 | RESTORE_C_REGS_HELPER 0,0,0,0,0 | ||
201 | .endm | ||
202 | .macro RESTORE_RSI_RDI_RDX | ||
203 | RESTORE_C_REGS_HELPER 0,0,0,0,1 | ||
190 | .endm | 204 | .endm |
191 | 205 | ||
192 | .macro RESTORE_ALL addskip=0 | 206 | .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0 |
193 | RESTORE_REST | 207 | addq $15*8+\addskip, %rsp |
194 | RESTORE_ARGS 1, \addskip | 208 | CFI_ADJUST_CFA_OFFSET -(15*8+\addskip) |
195 | .endm | 209 | .endm |
196 | 210 | ||
197 | .macro icebp | 211 | .macro icebp |
@@ -210,37 +224,23 @@ For 32-bit we have the following conventions - kernel is built with | |||
210 | */ | 224 | */ |
211 | 225 | ||
212 | .macro SAVE_ALL | 226 | .macro SAVE_ALL |
213 | pushl_cfi %eax | 227 | pushl_cfi_reg eax |
214 | CFI_REL_OFFSET eax, 0 | 228 | pushl_cfi_reg ebp |
215 | pushl_cfi %ebp | 229 | pushl_cfi_reg edi |
216 | CFI_REL_OFFSET ebp, 0 | 230 | pushl_cfi_reg esi |
217 | pushl_cfi %edi | 231 | pushl_cfi_reg edx |
218 | CFI_REL_OFFSET edi, 0 | 232 | pushl_cfi_reg ecx |
219 | pushl_cfi %esi | 233 | pushl_cfi_reg ebx |
220 | CFI_REL_OFFSET esi, 0 | ||
221 | pushl_cfi %edx | ||
222 | CFI_REL_OFFSET edx, 0 | ||
223 | pushl_cfi %ecx | ||
224 | CFI_REL_OFFSET ecx, 0 | ||
225 | pushl_cfi %ebx | ||
226 | CFI_REL_OFFSET ebx, 0 | ||
227 | .endm | 234 | .endm |
228 | 235 | ||
229 | .macro RESTORE_ALL | 236 | .macro RESTORE_ALL |
230 | popl_cfi %ebx | 237 | popl_cfi_reg ebx |
231 | CFI_RESTORE ebx | 238 | popl_cfi_reg ecx |
232 | popl_cfi %ecx | 239 | popl_cfi_reg edx |
233 | CFI_RESTORE ecx | 240 | popl_cfi_reg esi |
234 | popl_cfi %edx | 241 | popl_cfi_reg edi |
235 | CFI_RESTORE edx | 242 | popl_cfi_reg ebp |
236 | popl_cfi %esi | 243 | popl_cfi_reg eax |
237 | CFI_RESTORE esi | ||
238 | popl_cfi %edi | ||
239 | CFI_RESTORE edi | ||
240 | popl_cfi %ebp | ||
241 | CFI_RESTORE ebp | ||
242 | popl_cfi %eax | ||
243 | CFI_RESTORE eax | ||
244 | .endm | 244 | .endm |
245 | 245 | ||
246 | #endif /* CONFIG_X86_64 */ | 246 | #endif /* CONFIG_X86_64 */ |
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 59c6c401f79f..acdee09228b3 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h | |||
@@ -301,7 +301,7 @@ static inline void __user *arch_compat_alloc_user_space(long len) | |||
301 | sp = task_pt_regs(current)->sp; | 301 | sp = task_pt_regs(current)->sp; |
302 | } else { | 302 | } else { |
303 | /* -128 for the x32 ABI redzone */ | 303 | /* -128 for the x32 ABI redzone */ |
304 | sp = this_cpu_read(old_rsp) - 128; | 304 | sp = task_pt_regs(current)->sp - 128; |
305 | } | 305 | } |
306 | 306 | ||
307 | return (void __user *)round_down(sp - len, 16); | 307 | return (void __user *)round_down(sp - len, 16); |
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index d2b12988d2ed..bf2caa1dedc5 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h | |||
@@ -34,8 +34,6 @@ extern int _debug_hotplug_cpu(int cpu, int action); | |||
34 | #endif | 34 | #endif |
35 | #endif | 35 | #endif |
36 | 36 | ||
37 | DECLARE_PER_CPU(int, cpu_state); | ||
38 | |||
39 | int mwait_usable(const struct cpuinfo_x86 *); | 37 | int mwait_usable(const struct cpuinfo_x86 *); |
40 | 38 | ||
41 | #endif /* _ASM_X86_CPU_H */ | 39 | #endif /* _ASM_X86_CPU_H */ |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index c1553b70fed4..7ee9b94d9921 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
@@ -233,7 +233,9 @@ | |||
233 | #define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ | 233 | #define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ |
234 | #define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */ | 234 | #define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */ |
235 | #define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ | 235 | #define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ |
236 | #define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */ | ||
236 | #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ | 237 | #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ |
238 | #define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ | ||
237 | #define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ | 239 | #define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ |
238 | #define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ | 240 | #define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ |
239 | #define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ | 241 | #define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ |
@@ -426,6 +428,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit) | |||
426 | " .word %P0\n" /* 1: do replace */ | 428 | " .word %P0\n" /* 1: do replace */ |
427 | " .byte 2b - 1b\n" /* source len */ | 429 | " .byte 2b - 1b\n" /* source len */ |
428 | " .byte 0\n" /* replacement len */ | 430 | " .byte 0\n" /* replacement len */ |
431 | " .byte 0\n" /* pad len */ | ||
429 | ".previous\n" | 432 | ".previous\n" |
430 | /* skipping size check since replacement size = 0 */ | 433 | /* skipping size check since replacement size = 0 */ |
431 | : : "i" (X86_FEATURE_ALWAYS) : : t_warn); | 434 | : : "i" (X86_FEATURE_ALWAYS) : : t_warn); |
@@ -440,6 +443,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit) | |||
440 | " .word %P0\n" /* feature bit */ | 443 | " .word %P0\n" /* feature bit */ |
441 | " .byte 2b - 1b\n" /* source len */ | 444 | " .byte 2b - 1b\n" /* source len */ |
442 | " .byte 0\n" /* replacement len */ | 445 | " .byte 0\n" /* replacement len */ |
446 | " .byte 0\n" /* pad len */ | ||
443 | ".previous\n" | 447 | ".previous\n" |
444 | /* skipping size check since replacement size = 0 */ | 448 | /* skipping size check since replacement size = 0 */ |
445 | : : "i" (bit) : : t_no); | 449 | : : "i" (bit) : : t_no); |
@@ -465,6 +469,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit) | |||
465 | " .word %P1\n" /* feature bit */ | 469 | " .word %P1\n" /* feature bit */ |
466 | " .byte 2b - 1b\n" /* source len */ | 470 | " .byte 2b - 1b\n" /* source len */ |
467 | " .byte 4f - 3f\n" /* replacement len */ | 471 | " .byte 4f - 3f\n" /* replacement len */ |
472 | " .byte 0\n" /* pad len */ | ||
468 | ".previous\n" | 473 | ".previous\n" |
469 | ".section .discard,\"aw\",@progbits\n" | 474 | ".section .discard,\"aw\",@progbits\n" |
470 | " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */ | 475 | " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */ |
@@ -491,31 +496,30 @@ static __always_inline __pure bool __static_cpu_has(u16 bit) | |||
491 | static __always_inline __pure bool _static_cpu_has_safe(u16 bit) | 496 | static __always_inline __pure bool _static_cpu_has_safe(u16 bit) |
492 | { | 497 | { |
493 | #ifdef CC_HAVE_ASM_GOTO | 498 | #ifdef CC_HAVE_ASM_GOTO |
494 | /* | 499 | asm_volatile_goto("1: jmp %l[t_dynamic]\n" |
495 | * We need to spell the jumps to the compiler because, depending on the offset, | ||
496 | * the replacement jump can be bigger than the original jump, and this we cannot | ||
497 | * have. Thus, we force the jump to the widest, 4-byte, signed relative | ||
498 | * offset even though the last would often fit in less bytes. | ||
499 | */ | ||
500 | asm_volatile_goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n" | ||
501 | "2:\n" | 500 | "2:\n" |
501 | ".skip -(((5f-4f) - (2b-1b)) > 0) * " | ||
502 | "((5f-4f) - (2b-1b)),0x90\n" | ||
503 | "3:\n" | ||
502 | ".section .altinstructions,\"a\"\n" | 504 | ".section .altinstructions,\"a\"\n" |
503 | " .long 1b - .\n" /* src offset */ | 505 | " .long 1b - .\n" /* src offset */ |
504 | " .long 3f - .\n" /* repl offset */ | 506 | " .long 4f - .\n" /* repl offset */ |
505 | " .word %P1\n" /* always replace */ | 507 | " .word %P1\n" /* always replace */ |
506 | " .byte 2b - 1b\n" /* src len */ | 508 | " .byte 3b - 1b\n" /* src len */ |
507 | " .byte 4f - 3f\n" /* repl len */ | 509 | " .byte 5f - 4f\n" /* repl len */ |
510 | " .byte 3b - 2b\n" /* pad len */ | ||
508 | ".previous\n" | 511 | ".previous\n" |
509 | ".section .altinstr_replacement,\"ax\"\n" | 512 | ".section .altinstr_replacement,\"ax\"\n" |
510 | "3: .byte 0xe9\n .long %l[t_no] - 2b\n" | 513 | "4: jmp %l[t_no]\n" |
511 | "4:\n" | 514 | "5:\n" |
512 | ".previous\n" | 515 | ".previous\n" |
513 | ".section .altinstructions,\"a\"\n" | 516 | ".section .altinstructions,\"a\"\n" |
514 | " .long 1b - .\n" /* src offset */ | 517 | " .long 1b - .\n" /* src offset */ |
515 | " .long 0\n" /* no replacement */ | 518 | " .long 0\n" /* no replacement */ |
516 | " .word %P0\n" /* feature bit */ | 519 | " .word %P0\n" /* feature bit */ |
517 | " .byte 2b - 1b\n" /* src len */ | 520 | " .byte 3b - 1b\n" /* src len */ |
518 | " .byte 0\n" /* repl len */ | 521 | " .byte 0\n" /* repl len */ |
522 | " .byte 0\n" /* pad len */ | ||
519 | ".previous\n" | 523 | ".previous\n" |
520 | : : "i" (bit), "i" (X86_FEATURE_ALWAYS) | 524 | : : "i" (bit), "i" (X86_FEATURE_ALWAYS) |
521 | : : t_dynamic, t_no); | 525 | : : t_dynamic, t_no); |
@@ -535,6 +539,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit) | |||
535 | " .word %P2\n" /* always replace */ | 539 | " .word %P2\n" /* always replace */ |
536 | " .byte 2b - 1b\n" /* source len */ | 540 | " .byte 2b - 1b\n" /* source len */ |
537 | " .byte 4f - 3f\n" /* replacement len */ | 541 | " .byte 4f - 3f\n" /* replacement len */ |
542 | " .byte 0\n" /* pad len */ | ||
538 | ".previous\n" | 543 | ".previous\n" |
539 | ".section .discard,\"aw\",@progbits\n" | 544 | ".section .discard,\"aw\",@progbits\n" |
540 | " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */ | 545 | " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */ |
@@ -549,6 +554,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit) | |||
549 | " .word %P1\n" /* feature bit */ | 554 | " .word %P1\n" /* feature bit */ |
550 | " .byte 4b - 3b\n" /* src len */ | 555 | " .byte 4b - 3b\n" /* src len */ |
551 | " .byte 6f - 5f\n" /* repl len */ | 556 | " .byte 6f - 5f\n" /* repl len */ |
557 | " .byte 0\n" /* pad len */ | ||
552 | ".previous\n" | 558 | ".previous\n" |
553 | ".section .discard,\"aw\",@progbits\n" | 559 | ".section .discard,\"aw\",@progbits\n" |
554 | " .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */ | 560 | " .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */ |
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index a94b82e8f156..a0bf89fd2647 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h | |||
@@ -376,11 +376,16 @@ static inline void _set_gate(int gate, unsigned type, void *addr, | |||
376 | * Pentium F0 0F bugfix can have resulted in the mapped | 376 | * Pentium F0 0F bugfix can have resulted in the mapped |
377 | * IDT being write-protected. | 377 | * IDT being write-protected. |
378 | */ | 378 | */ |
379 | #define set_intr_gate(n, addr) \ | 379 | #define set_intr_gate_notrace(n, addr) \ |
380 | do { \ | 380 | do { \ |
381 | BUG_ON((unsigned)n > 0xFF); \ | 381 | BUG_ON((unsigned)n > 0xFF); \ |
382 | _set_gate(n, GATE_INTERRUPT, (void *)addr, 0, 0, \ | 382 | _set_gate(n, GATE_INTERRUPT, (void *)addr, 0, 0, \ |
383 | __KERNEL_CS); \ | 383 | __KERNEL_CS); \ |
384 | } while (0) | ||
385 | |||
386 | #define set_intr_gate(n, addr) \ | ||
387 | do { \ | ||
388 | set_intr_gate_notrace(n, addr); \ | ||
384 | _trace_set_gate(n, GATE_INTERRUPT, (void *)trace_##addr,\ | 389 | _trace_set_gate(n, GATE_INTERRUPT, (void *)trace_##addr,\ |
385 | 0, 0, __KERNEL_CS); \ | 390 | 0, 0, __KERNEL_CS); \ |
386 | } while (0) | 391 | } while (0) |
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h index f6f15986df6c..de1cdaf4d743 100644 --- a/arch/x86/include/asm/dwarf2.h +++ b/arch/x86/include/asm/dwarf2.h | |||
@@ -86,11 +86,23 @@ | |||
86 | CFI_ADJUST_CFA_OFFSET 8 | 86 | CFI_ADJUST_CFA_OFFSET 8 |
87 | .endm | 87 | .endm |
88 | 88 | ||
89 | .macro pushq_cfi_reg reg | ||
90 | pushq %\reg | ||
91 | CFI_ADJUST_CFA_OFFSET 8 | ||
92 | CFI_REL_OFFSET \reg, 0 | ||
93 | .endm | ||
94 | |||
89 | .macro popq_cfi reg | 95 | .macro popq_cfi reg |
90 | popq \reg | 96 | popq \reg |
91 | CFI_ADJUST_CFA_OFFSET -8 | 97 | CFI_ADJUST_CFA_OFFSET -8 |
92 | .endm | 98 | .endm |
93 | 99 | ||
100 | .macro popq_cfi_reg reg | ||
101 | popq %\reg | ||
102 | CFI_ADJUST_CFA_OFFSET -8 | ||
103 | CFI_RESTORE \reg | ||
104 | .endm | ||
105 | |||
94 | .macro pushfq_cfi | 106 | .macro pushfq_cfi |
95 | pushfq | 107 | pushfq |
96 | CFI_ADJUST_CFA_OFFSET 8 | 108 | CFI_ADJUST_CFA_OFFSET 8 |
@@ -116,11 +128,23 @@ | |||
116 | CFI_ADJUST_CFA_OFFSET 4 | 128 | CFI_ADJUST_CFA_OFFSET 4 |
117 | .endm | 129 | .endm |
118 | 130 | ||
131 | .macro pushl_cfi_reg reg | ||
132 | pushl %\reg | ||
133 | CFI_ADJUST_CFA_OFFSET 4 | ||
134 | CFI_REL_OFFSET \reg, 0 | ||
135 | .endm | ||
136 | |||
119 | .macro popl_cfi reg | 137 | .macro popl_cfi reg |
120 | popl \reg | 138 | popl \reg |
121 | CFI_ADJUST_CFA_OFFSET -4 | 139 | CFI_ADJUST_CFA_OFFSET -4 |
122 | .endm | 140 | .endm |
123 | 141 | ||
142 | .macro popl_cfi_reg reg | ||
143 | popl %\reg | ||
144 | CFI_ADJUST_CFA_OFFSET -4 | ||
145 | CFI_RESTORE \reg | ||
146 | .endm | ||
147 | |||
124 | .macro pushfl_cfi | 148 | .macro pushfl_cfi |
125 | pushfl | 149 | pushfl |
126 | CFI_ADJUST_CFA_OFFSET 4 | 150 | CFI_ADJUST_CFA_OFFSET 4 |
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index 779c2efe2e97..3ab0537872fb 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h | |||
@@ -40,14 +40,6 @@ static inline void e820_mark_nosave_regions(unsigned long limit_pfn) | |||
40 | } | 40 | } |
41 | #endif | 41 | #endif |
42 | 42 | ||
43 | #ifdef CONFIG_MEMTEST | ||
44 | extern void early_memtest(unsigned long start, unsigned long end); | ||
45 | #else | ||
46 | static inline void early_memtest(unsigned long start, unsigned long end) | ||
47 | { | ||
48 | } | ||
49 | #endif | ||
50 | |||
51 | extern unsigned long e820_end_of_ram_pfn(void); | 43 | extern unsigned long e820_end_of_ram_pfn(void); |
52 | extern unsigned long e820_end_of_low_ram_pfn(void); | 44 | extern unsigned long e820_end_of_low_ram_pfn(void); |
53 | extern u64 early_reserve_e820(u64 sizet, u64 align); | 45 | extern u64 early_reserve_e820(u64 sizet, u64 align); |
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 25bce45c6fc4..3738b138b843 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h | |||
@@ -2,6 +2,8 @@ | |||
2 | #define _ASM_X86_EFI_H | 2 | #define _ASM_X86_EFI_H |
3 | 3 | ||
4 | #include <asm/i387.h> | 4 | #include <asm/i387.h> |
5 | #include <asm/pgtable.h> | ||
6 | |||
5 | /* | 7 | /* |
6 | * We map the EFI regions needed for runtime services non-contiguously, | 8 | * We map the EFI regions needed for runtime services non-contiguously, |
7 | * with preserved alignment on virtual addresses starting from -4G down | 9 | * with preserved alignment on virtual addresses starting from -4G down |
@@ -89,8 +91,8 @@ extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, | |||
89 | extern struct efi_scratch efi_scratch; | 91 | extern struct efi_scratch efi_scratch; |
90 | extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable); | 92 | extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable); |
91 | extern int __init efi_memblock_x86_reserve_range(void); | 93 | extern int __init efi_memblock_x86_reserve_range(void); |
92 | extern void __init efi_call_phys_prolog(void); | 94 | extern pgd_t * __init efi_call_phys_prolog(void); |
93 | extern void __init efi_call_phys_epilog(void); | 95 | extern void __init efi_call_phys_epilog(pgd_t *save_pgd); |
94 | extern void __init efi_unmap_memmap(void); | 96 | extern void __init efi_unmap_memmap(void); |
95 | extern void __init efi_memory_uc(u64 addr, unsigned long size); | 97 | extern void __init efi_memory_uc(u64 addr, unsigned long size); |
96 | extern void __init efi_map_region(efi_memory_desc_t *md); | 98 | extern void __init efi_map_region(efi_memory_desc_t *md); |
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index ca3347a9dab5..f161c189c27b 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h | |||
@@ -171,10 +171,11 @@ do { \ | |||
171 | static inline void elf_common_init(struct thread_struct *t, | 171 | static inline void elf_common_init(struct thread_struct *t, |
172 | struct pt_regs *regs, const u16 ds) | 172 | struct pt_regs *regs, const u16 ds) |
173 | { | 173 | { |
174 | regs->ax = regs->bx = regs->cx = regs->dx = 0; | 174 | /* Commented-out registers are cleared in stub_execve */ |
175 | regs->si = regs->di = regs->bp = 0; | 175 | /*regs->ax = regs->bx =*/ regs->cx = regs->dx = 0; |
176 | regs->si = regs->di /*= regs->bp*/ = 0; | ||
176 | regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0; | 177 | regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0; |
177 | regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0; | 178 | /*regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;*/ |
178 | t->fs = t->gs = 0; | 179 | t->fs = t->gs = 0; |
179 | t->fsindex = t->gsindex = 0; | 180 | t->fsindex = t->gsindex = 0; |
180 | t->ds = t->es = ds; | 181 | t->ds = t->es = ds; |
@@ -338,9 +339,6 @@ extern int compat_arch_setup_additional_pages(struct linux_binprm *bprm, | |||
338 | int uses_interp); | 339 | int uses_interp); |
339 | #define compat_arch_setup_additional_pages compat_arch_setup_additional_pages | 340 | #define compat_arch_setup_additional_pages compat_arch_setup_additional_pages |
340 | 341 | ||
341 | extern unsigned long arch_randomize_brk(struct mm_struct *mm); | ||
342 | #define arch_randomize_brk arch_randomize_brk | ||
343 | |||
344 | /* | 342 | /* |
345 | * True on X86_32 or when emulating IA32 on X86_64 | 343 | * True on X86_32 or when emulating IA32 on X86_64 |
346 | */ | 344 | */ |
@@ -365,6 +363,7 @@ enum align_flags { | |||
365 | struct va_alignment { | 363 | struct va_alignment { |
366 | int flags; | 364 | int flags; |
367 | unsigned long mask; | 365 | unsigned long mask; |
366 | unsigned long bits; | ||
368 | } ____cacheline_aligned; | 367 | } ____cacheline_aligned; |
369 | 368 | ||
370 | extern struct va_alignment va_align; | 369 | extern struct va_alignment va_align; |
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 72ba21a8b5fc..da5e96756570 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h | |||
@@ -67,6 +67,34 @@ extern void finit_soft_fpu(struct i387_soft_struct *soft); | |||
67 | static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} | 67 | static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} |
68 | #endif | 68 | #endif |
69 | 69 | ||
70 | /* | ||
71 | * Must be run with preemption disabled: this clears the fpu_owner_task, | ||
72 | * on this CPU. | ||
73 | * | ||
74 | * This will disable any lazy FPU state restore of the current FPU state, | ||
75 | * but if the current thread owns the FPU, it will still be saved by. | ||
76 | */ | ||
77 | static inline void __cpu_disable_lazy_restore(unsigned int cpu) | ||
78 | { | ||
79 | per_cpu(fpu_owner_task, cpu) = NULL; | ||
80 | } | ||
81 | |||
82 | /* | ||
83 | * Used to indicate that the FPU state in memory is newer than the FPU | ||
84 | * state in registers, and the FPU state should be reloaded next time the | ||
85 | * task is run. Only safe on the current task, or non-running tasks. | ||
86 | */ | ||
87 | static inline void task_disable_lazy_fpu_restore(struct task_struct *tsk) | ||
88 | { | ||
89 | tsk->thread.fpu.last_cpu = ~0; | ||
90 | } | ||
91 | |||
92 | static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu) | ||
93 | { | ||
94 | return new == this_cpu_read_stable(fpu_owner_task) && | ||
95 | cpu == new->thread.fpu.last_cpu; | ||
96 | } | ||
97 | |||
70 | static inline int is_ia32_compat_frame(void) | 98 | static inline int is_ia32_compat_frame(void) |
71 | { | 99 | { |
72 | return config_enabled(CONFIG_IA32_EMULATION) && | 100 | return config_enabled(CONFIG_IA32_EMULATION) && |
@@ -107,7 +135,6 @@ static __always_inline __pure bool use_fxsr(void) | |||
107 | 135 | ||
108 | static inline void fx_finit(struct i387_fxsave_struct *fx) | 136 | static inline void fx_finit(struct i387_fxsave_struct *fx) |
109 | { | 137 | { |
110 | memset(fx, 0, xstate_size); | ||
111 | fx->cwd = 0x37f; | 138 | fx->cwd = 0x37f; |
112 | fx->mxcsr = MXCSR_DEFAULT; | 139 | fx->mxcsr = MXCSR_DEFAULT; |
113 | } | 140 | } |
@@ -351,8 +378,14 @@ static inline void __thread_fpu_begin(struct task_struct *tsk) | |||
351 | __thread_set_has_fpu(tsk); | 378 | __thread_set_has_fpu(tsk); |
352 | } | 379 | } |
353 | 380 | ||
354 | static inline void __drop_fpu(struct task_struct *tsk) | 381 | static inline void drop_fpu(struct task_struct *tsk) |
355 | { | 382 | { |
383 | /* | ||
384 | * Forget coprocessor state.. | ||
385 | */ | ||
386 | preempt_disable(); | ||
387 | tsk->thread.fpu_counter = 0; | ||
388 | |||
356 | if (__thread_has_fpu(tsk)) { | 389 | if (__thread_has_fpu(tsk)) { |
357 | /* Ignore delayed exceptions from user space */ | 390 | /* Ignore delayed exceptions from user space */ |
358 | asm volatile("1: fwait\n" | 391 | asm volatile("1: fwait\n" |
@@ -360,30 +393,29 @@ static inline void __drop_fpu(struct task_struct *tsk) | |||
360 | _ASM_EXTABLE(1b, 2b)); | 393 | _ASM_EXTABLE(1b, 2b)); |
361 | __thread_fpu_end(tsk); | 394 | __thread_fpu_end(tsk); |
362 | } | 395 | } |
363 | } | ||
364 | 396 | ||
365 | static inline void drop_fpu(struct task_struct *tsk) | ||
366 | { | ||
367 | /* | ||
368 | * Forget coprocessor state.. | ||
369 | */ | ||
370 | preempt_disable(); | ||
371 | tsk->thread.fpu_counter = 0; | ||
372 | __drop_fpu(tsk); | ||
373 | clear_stopped_child_used_math(tsk); | 397 | clear_stopped_child_used_math(tsk); |
374 | preempt_enable(); | 398 | preempt_enable(); |
375 | } | 399 | } |
376 | 400 | ||
377 | static inline void drop_init_fpu(struct task_struct *tsk) | 401 | static inline void restore_init_xstate(void) |
402 | { | ||
403 | if (use_xsave()) | ||
404 | xrstor_state(init_xstate_buf, -1); | ||
405 | else | ||
406 | fxrstor_checking(&init_xstate_buf->i387); | ||
407 | } | ||
408 | |||
409 | /* | ||
410 | * Reset the FPU state in the eager case and drop it in the lazy case (later use | ||
411 | * will reinit it). | ||
412 | */ | ||
413 | static inline void fpu_reset_state(struct task_struct *tsk) | ||
378 | { | 414 | { |
379 | if (!use_eager_fpu()) | 415 | if (!use_eager_fpu()) |
380 | drop_fpu(tsk); | 416 | drop_fpu(tsk); |
381 | else { | 417 | else |
382 | if (use_xsave()) | 418 | restore_init_xstate(); |
383 | xrstor_state(init_xstate_buf, -1); | ||
384 | else | ||
385 | fxrstor_checking(&init_xstate_buf->i387); | ||
386 | } | ||
387 | } | 419 | } |
388 | 420 | ||
389 | /* | 421 | /* |
@@ -400,24 +432,6 @@ static inline void drop_init_fpu(struct task_struct *tsk) | |||
400 | */ | 432 | */ |
401 | typedef struct { int preload; } fpu_switch_t; | 433 | typedef struct { int preload; } fpu_switch_t; |
402 | 434 | ||
403 | /* | ||
404 | * Must be run with preemption disabled: this clears the fpu_owner_task, | ||
405 | * on this CPU. | ||
406 | * | ||
407 | * This will disable any lazy FPU state restore of the current FPU state, | ||
408 | * but if the current thread owns the FPU, it will still be saved by. | ||
409 | */ | ||
410 | static inline void __cpu_disable_lazy_restore(unsigned int cpu) | ||
411 | { | ||
412 | per_cpu(fpu_owner_task, cpu) = NULL; | ||
413 | } | ||
414 | |||
415 | static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu) | ||
416 | { | ||
417 | return new == this_cpu_read_stable(fpu_owner_task) && | ||
418 | cpu == new->thread.fpu.last_cpu; | ||
419 | } | ||
420 | |||
421 | static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu) | 435 | static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu) |
422 | { | 436 | { |
423 | fpu_switch_t fpu; | 437 | fpu_switch_t fpu; |
@@ -426,13 +440,17 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta | |||
426 | * If the task has used the math, pre-load the FPU on xsave processors | 440 | * If the task has used the math, pre-load the FPU on xsave processors |
427 | * or if the past 5 consecutive context-switches used math. | 441 | * or if the past 5 consecutive context-switches used math. |
428 | */ | 442 | */ |
429 | fpu.preload = tsk_used_math(new) && (use_eager_fpu() || | 443 | fpu.preload = tsk_used_math(new) && |
430 | new->thread.fpu_counter > 5); | 444 | (use_eager_fpu() || new->thread.fpu_counter > 5); |
445 | |||
431 | if (__thread_has_fpu(old)) { | 446 | if (__thread_has_fpu(old)) { |
432 | if (!__save_init_fpu(old)) | 447 | if (!__save_init_fpu(old)) |
433 | cpu = ~0; | 448 | task_disable_lazy_fpu_restore(old); |
434 | old->thread.fpu.last_cpu = cpu; | 449 | else |
435 | old->thread.fpu.has_fpu = 0; /* But leave fpu_owner_task! */ | 450 | old->thread.fpu.last_cpu = cpu; |
451 | |||
452 | /* But leave fpu_owner_task! */ | ||
453 | old->thread.fpu.has_fpu = 0; | ||
436 | 454 | ||
437 | /* Don't change CR0.TS if we just switch! */ | 455 | /* Don't change CR0.TS if we just switch! */ |
438 | if (fpu.preload) { | 456 | if (fpu.preload) { |
@@ -443,10 +461,10 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta | |||
443 | stts(); | 461 | stts(); |
444 | } else { | 462 | } else { |
445 | old->thread.fpu_counter = 0; | 463 | old->thread.fpu_counter = 0; |
446 | old->thread.fpu.last_cpu = ~0; | 464 | task_disable_lazy_fpu_restore(old); |
447 | if (fpu.preload) { | 465 | if (fpu.preload) { |
448 | new->thread.fpu_counter++; | 466 | new->thread.fpu_counter++; |
449 | if (!use_eager_fpu() && fpu_lazy_restore(new, cpu)) | 467 | if (fpu_lazy_restore(new, cpu)) |
450 | fpu.preload = 0; | 468 | fpu.preload = 0; |
451 | else | 469 | else |
452 | prefetch(new->thread.fpu.state); | 470 | prefetch(new->thread.fpu.state); |
@@ -466,7 +484,7 @@ static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu) | |||
466 | { | 484 | { |
467 | if (fpu.preload) { | 485 | if (fpu.preload) { |
468 | if (unlikely(restore_fpu_checking(new))) | 486 | if (unlikely(restore_fpu_checking(new))) |
469 | drop_init_fpu(new); | 487 | fpu_reset_state(new); |
470 | } | 488 | } |
471 | } | 489 | } |
472 | 490 | ||
@@ -495,10 +513,12 @@ static inline int restore_xstate_sig(void __user *buf, int ia32_frame) | |||
495 | } | 513 | } |
496 | 514 | ||
497 | /* | 515 | /* |
498 | * Need to be preemption-safe. | 516 | * Needs to be preemption-safe. |
499 | * | 517 | * |
500 | * NOTE! user_fpu_begin() must be used only immediately before restoring | 518 | * NOTE! user_fpu_begin() must be used only immediately before restoring |
501 | * it. This function does not do any save/restore on their own. | 519 | * the save state. It does not do any saving/restoring on its own. In |
520 | * lazy FPU mode, it is just an optimization to avoid a #NM exception, | ||
521 | * the task can lose the FPU right after preempt_enable(). | ||
502 | */ | 522 | */ |
503 | static inline void user_fpu_begin(void) | 523 | static inline void user_fpu_begin(void) |
504 | { | 524 | { |
@@ -520,24 +540,6 @@ static inline void __save_fpu(struct task_struct *tsk) | |||
520 | } | 540 | } |
521 | 541 | ||
522 | /* | 542 | /* |
523 | * These disable preemption on their own and are safe | ||
524 | */ | ||
525 | static inline void save_init_fpu(struct task_struct *tsk) | ||
526 | { | ||
527 | WARN_ON_ONCE(!__thread_has_fpu(tsk)); | ||
528 | |||
529 | if (use_eager_fpu()) { | ||
530 | __save_fpu(tsk); | ||
531 | return; | ||
532 | } | ||
533 | |||
534 | preempt_disable(); | ||
535 | __save_init_fpu(tsk); | ||
536 | __thread_fpu_end(tsk); | ||
537 | preempt_enable(); | ||
538 | } | ||
539 | |||
540 | /* | ||
541 | * i387 state interaction | 543 | * i387 state interaction |
542 | */ | 544 | */ |
543 | static inline unsigned short get_fpu_cwd(struct task_struct *tsk) | 545 | static inline unsigned short get_fpu_cwd(struct task_struct *tsk) |
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 9662290e0b20..e9571ddabc4f 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h | |||
@@ -181,10 +181,9 @@ extern __visible void smp_call_function_single_interrupt(struct pt_regs *); | |||
181 | extern __visible void smp_invalidate_interrupt(struct pt_regs *); | 181 | extern __visible void smp_invalidate_interrupt(struct pt_regs *); |
182 | #endif | 182 | #endif |
183 | 183 | ||
184 | extern void (*__initconst interrupt[FIRST_SYSTEM_VECTOR | 184 | extern char irq_entries_start[]; |
185 | - FIRST_EXTERNAL_VECTOR])(void); | ||
186 | #ifdef CONFIG_TRACING | 185 | #ifdef CONFIG_TRACING |
187 | #define trace_interrupt interrupt | 186 | #define trace_irq_entries_start irq_entries_start |
188 | #endif | 187 | #endif |
189 | 188 | ||
190 | #define VECTOR_UNDEFINED (-1) | 189 | #define VECTOR_UNDEFINED (-1) |
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 47f29b1d1846..e7814b74caf8 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h | |||
@@ -69,7 +69,7 @@ struct insn { | |||
69 | const insn_byte_t *next_byte; | 69 | const insn_byte_t *next_byte; |
70 | }; | 70 | }; |
71 | 71 | ||
72 | #define MAX_INSN_SIZE 16 | 72 | #define MAX_INSN_SIZE 15 |
73 | 73 | ||
74 | #define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) | 74 | #define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) |
75 | #define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) | 75 | #define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) |
diff --git a/arch/x86/include/asm/iommu_table.h b/arch/x86/include/asm/iommu_table.h index f42a04735a0a..e37d6b3ad983 100644 --- a/arch/x86/include/asm/iommu_table.h +++ b/arch/x86/include/asm/iommu_table.h | |||
@@ -79,11 +79,12 @@ struct iommu_table_entry { | |||
79 | * d). Similar to the 'init', except that this gets called from pci_iommu_init | 79 | * d). Similar to the 'init', except that this gets called from pci_iommu_init |
80 | * where we do have a memory allocator. | 80 | * where we do have a memory allocator. |
81 | * | 81 | * |
82 | * The standard vs the _FINISH differs in that the _FINISH variant will | 82 | * The standard IOMMU_INIT differs from the IOMMU_INIT_FINISH variant |
83 | * continue detecting other IOMMUs in the call list after the | 83 | * in that the former will continue detecting other IOMMUs in the call |
84 | * the detection routine returns a positive number. The _FINISH will | 84 | * list after the detection routine returns a positive number, while the |
85 | * stop the execution chain. Both will still call the 'init' and | 85 | * latter will stop the execution chain upon first successful detection. |
86 | * 'late_init' functions if they are set. | 86 | * Both variants will still call the 'init' and 'late_init' functions if |
87 | * they are set. | ||
87 | */ | 88 | */ |
88 | #define IOMMU_INIT_FINISH(_detect, _depend, _init, _late_init) \ | 89 | #define IOMMU_INIT_FINISH(_detect, _depend, _init, _late_init) \ |
89 | __IOMMU_INIT(_detect, _depend, _init, _late_init, 1) | 90 | __IOMMU_INIT(_detect, _depend, _init, _late_init, 1) |
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 0a8b519226b8..b77f5edb03b0 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h | |||
@@ -136,10 +136,6 @@ static inline notrace unsigned long arch_local_irq_save(void) | |||
136 | #define USERGS_SYSRET32 \ | 136 | #define USERGS_SYSRET32 \ |
137 | swapgs; \ | 137 | swapgs; \ |
138 | sysretl | 138 | sysretl |
139 | #define ENABLE_INTERRUPTS_SYSEXIT32 \ | ||
140 | swapgs; \ | ||
141 | sti; \ | ||
142 | sysexit | ||
143 | 139 | ||
144 | #else | 140 | #else |
145 | #define INTERRUPT_RETURN iret | 141 | #define INTERRUPT_RETURN iret |
@@ -163,22 +159,27 @@ static inline int arch_irqs_disabled(void) | |||
163 | 159 | ||
164 | return arch_irqs_disabled_flags(flags); | 160 | return arch_irqs_disabled_flags(flags); |
165 | } | 161 | } |
162 | #endif /* !__ASSEMBLY__ */ | ||
166 | 163 | ||
164 | #ifdef __ASSEMBLY__ | ||
165 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
166 | # define TRACE_IRQS_ON call trace_hardirqs_on_thunk; | ||
167 | # define TRACE_IRQS_OFF call trace_hardirqs_off_thunk; | ||
167 | #else | 168 | #else |
168 | 169 | # define TRACE_IRQS_ON | |
169 | #ifdef CONFIG_X86_64 | 170 | # define TRACE_IRQS_OFF |
170 | #define ARCH_LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk | 171 | #endif |
171 | #define ARCH_LOCKDEP_SYS_EXIT_IRQ \ | 172 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
173 | # ifdef CONFIG_X86_64 | ||
174 | # define LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk | ||
175 | # define LOCKDEP_SYS_EXIT_IRQ \ | ||
172 | TRACE_IRQS_ON; \ | 176 | TRACE_IRQS_ON; \ |
173 | sti; \ | 177 | sti; \ |
174 | SAVE_REST; \ | 178 | call lockdep_sys_exit_thunk; \ |
175 | LOCKDEP_SYS_EXIT; \ | ||
176 | RESTORE_REST; \ | ||
177 | cli; \ | 179 | cli; \ |
178 | TRACE_IRQS_OFF; | 180 | TRACE_IRQS_OFF; |
179 | 181 | # else | |
180 | #else | 182 | # define LOCKDEP_SYS_EXIT \ |
181 | #define ARCH_LOCKDEP_SYS_EXIT \ | ||
182 | pushl %eax; \ | 183 | pushl %eax; \ |
183 | pushl %ecx; \ | 184 | pushl %ecx; \ |
184 | pushl %edx; \ | 185 | pushl %edx; \ |
@@ -186,24 +187,12 @@ static inline int arch_irqs_disabled(void) | |||
186 | popl %edx; \ | 187 | popl %edx; \ |
187 | popl %ecx; \ | 188 | popl %ecx; \ |
188 | popl %eax; | 189 | popl %eax; |
189 | 190 | # define LOCKDEP_SYS_EXIT_IRQ | |
190 | #define ARCH_LOCKDEP_SYS_EXIT_IRQ | 191 | # endif |
191 | #endif | ||
192 | |||
193 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
194 | # define TRACE_IRQS_ON call trace_hardirqs_on_thunk; | ||
195 | # define TRACE_IRQS_OFF call trace_hardirqs_off_thunk; | ||
196 | #else | 192 | #else |
197 | # define TRACE_IRQS_ON | ||
198 | # define TRACE_IRQS_OFF | ||
199 | #endif | ||
200 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
201 | # define LOCKDEP_SYS_EXIT ARCH_LOCKDEP_SYS_EXIT | ||
202 | # define LOCKDEP_SYS_EXIT_IRQ ARCH_LOCKDEP_SYS_EXIT_IRQ | ||
203 | # else | ||
204 | # define LOCKDEP_SYS_EXIT | 193 | # define LOCKDEP_SYS_EXIT |
205 | # define LOCKDEP_SYS_EXIT_IRQ | 194 | # define LOCKDEP_SYS_EXIT_IRQ |
206 | # endif | 195 | #endif |
207 | |||
208 | #endif /* __ASSEMBLY__ */ | 196 | #endif /* __ASSEMBLY__ */ |
197 | |||
209 | #endif | 198 | #endif |
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 6a2cefb4395a..a4c1cf7e93f8 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h | |||
@@ -1,7 +1,7 @@ | |||
1 | #ifndef _ASM_X86_JUMP_LABEL_H | 1 | #ifndef _ASM_X86_JUMP_LABEL_H |
2 | #define _ASM_X86_JUMP_LABEL_H | 2 | #define _ASM_X86_JUMP_LABEL_H |
3 | 3 | ||
4 | #ifdef __KERNEL__ | 4 | #ifndef __ASSEMBLY__ |
5 | 5 | ||
6 | #include <linux/stringify.h> | 6 | #include <linux/stringify.h> |
7 | #include <linux/types.h> | 7 | #include <linux/types.h> |
@@ -30,8 +30,6 @@ l_yes: | |||
30 | return true; | 30 | return true; |
31 | } | 31 | } |
32 | 32 | ||
33 | #endif /* __KERNEL__ */ | ||
34 | |||
35 | #ifdef CONFIG_X86_64 | 33 | #ifdef CONFIG_X86_64 |
36 | typedef u64 jump_label_t; | 34 | typedef u64 jump_label_t; |
37 | #else | 35 | #else |
@@ -44,4 +42,5 @@ struct jump_entry { | |||
44 | jump_label_t key; | 42 | jump_label_t key; |
45 | }; | 43 | }; |
46 | 44 | ||
45 | #endif /* __ASSEMBLY__ */ | ||
47 | #endif | 46 | #endif |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a236e39cc385..dea2e7e962e3 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -81,11 +81,6 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) | |||
81 | (base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); | 81 | (base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); |
82 | } | 82 | } |
83 | 83 | ||
84 | #define SELECTOR_TI_MASK (1 << 2) | ||
85 | #define SELECTOR_RPL_MASK 0x03 | ||
86 | |||
87 | #define IOPL_SHIFT 12 | ||
88 | |||
89 | #define KVM_PERMILLE_MMU_PAGES 20 | 84 | #define KVM_PERMILLE_MMU_PAGES 20 |
90 | #define KVM_MIN_ALLOC_MMU_PAGES 64 | 85 | #define KVM_MIN_ALLOC_MMU_PAGES 64 |
91 | #define KVM_MMU_HASH_SHIFT 10 | 86 | #define KVM_MMU_HASH_SHIFT 10 |
@@ -345,6 +340,7 @@ struct kvm_pmu { | |||
345 | enum { | 340 | enum { |
346 | KVM_DEBUGREG_BP_ENABLED = 1, | 341 | KVM_DEBUGREG_BP_ENABLED = 1, |
347 | KVM_DEBUGREG_WONT_EXIT = 2, | 342 | KVM_DEBUGREG_WONT_EXIT = 2, |
343 | KVM_DEBUGREG_RELOAD = 4, | ||
348 | }; | 344 | }; |
349 | 345 | ||
350 | struct kvm_vcpu_arch { | 346 | struct kvm_vcpu_arch { |
@@ -431,6 +427,9 @@ struct kvm_vcpu_arch { | |||
431 | 427 | ||
432 | int cpuid_nent; | 428 | int cpuid_nent; |
433 | struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES]; | 429 | struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES]; |
430 | |||
431 | int maxphyaddr; | ||
432 | |||
434 | /* emulate context */ | 433 | /* emulate context */ |
435 | 434 | ||
436 | struct x86_emulate_ctxt emulate_ctxt; | 435 | struct x86_emulate_ctxt emulate_ctxt; |
@@ -550,11 +549,20 @@ struct kvm_arch_memory_slot { | |||
550 | struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; | 549 | struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; |
551 | }; | 550 | }; |
552 | 551 | ||
552 | /* | ||
553 | * We use as the mode the number of bits allocated in the LDR for the | ||
554 | * logical processor ID. It happens that these are all powers of two. | ||
555 | * This makes it is very easy to detect cases where the APICs are | ||
556 | * configured for multiple modes; in that case, we cannot use the map and | ||
557 | * hence cannot use kvm_irq_delivery_to_apic_fast either. | ||
558 | */ | ||
559 | #define KVM_APIC_MODE_XAPIC_CLUSTER 4 | ||
560 | #define KVM_APIC_MODE_XAPIC_FLAT 8 | ||
561 | #define KVM_APIC_MODE_X2APIC 16 | ||
562 | |||
553 | struct kvm_apic_map { | 563 | struct kvm_apic_map { |
554 | struct rcu_head rcu; | 564 | struct rcu_head rcu; |
555 | u8 ldr_bits; | 565 | u8 mode; |
556 | /* fields bellow are used to decode ldr values in different modes */ | ||
557 | u32 cid_shift, cid_mask, lid_mask, broadcast; | ||
558 | struct kvm_lapic *phys_map[256]; | 566 | struct kvm_lapic *phys_map[256]; |
559 | /* first index is cluster id second is cpu id in a cluster */ | 567 | /* first index is cluster id second is cpu id in a cluster */ |
560 | struct kvm_lapic *logical_map[16][16]; | 568 | struct kvm_lapic *logical_map[16][16]; |
@@ -859,6 +867,8 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | |||
859 | void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); | 867 | void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); |
860 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, | 868 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, |
861 | struct kvm_memory_slot *memslot); | 869 | struct kvm_memory_slot *memslot); |
870 | void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, | ||
871 | struct kvm_memory_slot *memslot); | ||
862 | void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, | 872 | void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, |
863 | struct kvm_memory_slot *memslot); | 873 | struct kvm_memory_slot *memslot); |
864 | void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm, | 874 | void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm, |
@@ -933,6 +943,7 @@ struct x86_emulate_ctxt; | |||
933 | int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port); | 943 | int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port); |
934 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); | 944 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); |
935 | int kvm_emulate_halt(struct kvm_vcpu *vcpu); | 945 | int kvm_emulate_halt(struct kvm_vcpu *vcpu); |
946 | int kvm_vcpu_halt(struct kvm_vcpu *vcpu); | ||
936 | int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); | 947 | int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); |
937 | 948 | ||
938 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); | 949 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); |
@@ -1128,7 +1139,6 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) | |||
1128 | int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); | 1139 | int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); |
1129 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); | 1140 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); |
1130 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); | 1141 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); |
1131 | int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); | ||
1132 | int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); | 1142 | int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); |
1133 | int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); | 1143 | int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); |
1134 | int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); | 1144 | int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); |
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index e62cf897f781..c1adf33fdd0d 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h | |||
@@ -115,7 +115,7 @@ static inline void kvm_spinlock_init(void) | |||
115 | 115 | ||
116 | static inline bool kvm_para_available(void) | 116 | static inline bool kvm_para_available(void) |
117 | { | 117 | { |
118 | return 0; | 118 | return false; |
119 | } | 119 | } |
120 | 120 | ||
121 | static inline unsigned int kvm_arch_para_features(void) | 121 | static inline unsigned int kvm_arch_para_features(void) |
diff --git a/arch/x86/include/asm/livepatch.h b/arch/x86/include/asm/livepatch.h index a455a53d789a..2d29197bd2fb 100644 --- a/arch/x86/include/asm/livepatch.h +++ b/arch/x86/include/asm/livepatch.h | |||
@@ -32,8 +32,8 @@ static inline int klp_check_compiler_support(void) | |||
32 | #endif | 32 | #endif |
33 | return 0; | 33 | return 0; |
34 | } | 34 | } |
35 | extern int klp_write_module_reloc(struct module *mod, unsigned long type, | 35 | int klp_write_module_reloc(struct module *mod, unsigned long type, |
36 | unsigned long loc, unsigned long value); | 36 | unsigned long loc, unsigned long value); |
37 | 37 | ||
38 | static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip) | 38 | static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip) |
39 | { | 39 | { |
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 9b3de99dc004..1f5a86d518db 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
@@ -116,6 +116,12 @@ struct mca_config { | |||
116 | u32 rip_msr; | 116 | u32 rip_msr; |
117 | }; | 117 | }; |
118 | 118 | ||
119 | struct mce_vendor_flags { | ||
120 | __u64 overflow_recov : 1, /* cpuid_ebx(80000007) */ | ||
121 | __reserved_0 : 63; | ||
122 | }; | ||
123 | extern struct mce_vendor_flags mce_flags; | ||
124 | |||
119 | extern struct mca_config mca_cfg; | 125 | extern struct mca_config mca_cfg; |
120 | extern void mce_register_decode_chain(struct notifier_block *nb); | 126 | extern void mce_register_decode_chain(struct notifier_block *nb); |
121 | extern void mce_unregister_decode_chain(struct notifier_block *nb); | 127 | extern void mce_unregister_decode_chain(struct notifier_block *nb); |
@@ -128,9 +134,11 @@ extern int mce_p5_enabled; | |||
128 | #ifdef CONFIG_X86_MCE | 134 | #ifdef CONFIG_X86_MCE |
129 | int mcheck_init(void); | 135 | int mcheck_init(void); |
130 | void mcheck_cpu_init(struct cpuinfo_x86 *c); | 136 | void mcheck_cpu_init(struct cpuinfo_x86 *c); |
137 | void mcheck_vendor_init_severity(void); | ||
131 | #else | 138 | #else |
132 | static inline int mcheck_init(void) { return 0; } | 139 | static inline int mcheck_init(void) { return 0; } |
133 | static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {} | 140 | static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {} |
141 | static inline void mcheck_vendor_init_severity(void) {} | ||
134 | #endif | 142 | #endif |
135 | 143 | ||
136 | #ifdef CONFIG_X86_ANCIENT_MCE | 144 | #ifdef CONFIG_X86_ANCIENT_MCE |
@@ -183,11 +191,11 @@ typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS); | |||
183 | DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); | 191 | DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); |
184 | 192 | ||
185 | enum mcp_flags { | 193 | enum mcp_flags { |
186 | MCP_TIMESTAMP = (1 << 0), /* log time stamp */ | 194 | MCP_TIMESTAMP = BIT(0), /* log time stamp */ |
187 | MCP_UC = (1 << 1), /* log uncorrected errors */ | 195 | MCP_UC = BIT(1), /* log uncorrected errors */ |
188 | MCP_DONTLOG = (1 << 2), /* only clear, don't log */ | 196 | MCP_DONTLOG = BIT(2), /* only clear, don't log */ |
189 | }; | 197 | }; |
190 | void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); | 198 | bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b); |
191 | 199 | ||
192 | int mce_notify_irq(void); | 200 | int mce_notify_irq(void); |
193 | 201 | ||
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 201b520521ed..2fb20d6f7e23 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h | |||
@@ -75,6 +75,79 @@ static inline void __exit exit_amd_microcode(void) {} | |||
75 | 75 | ||
76 | #ifdef CONFIG_MICROCODE_EARLY | 76 | #ifdef CONFIG_MICROCODE_EARLY |
77 | #define MAX_UCODE_COUNT 128 | 77 | #define MAX_UCODE_COUNT 128 |
78 | |||
79 | #define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24)) | ||
80 | #define CPUID_INTEL1 QCHAR('G', 'e', 'n', 'u') | ||
81 | #define CPUID_INTEL2 QCHAR('i', 'n', 'e', 'I') | ||
82 | #define CPUID_INTEL3 QCHAR('n', 't', 'e', 'l') | ||
83 | #define CPUID_AMD1 QCHAR('A', 'u', 't', 'h') | ||
84 | #define CPUID_AMD2 QCHAR('e', 'n', 't', 'i') | ||
85 | #define CPUID_AMD3 QCHAR('c', 'A', 'M', 'D') | ||
86 | |||
87 | #define CPUID_IS(a, b, c, ebx, ecx, edx) \ | ||
88 | (!((ebx ^ (a))|(edx ^ (b))|(ecx ^ (c)))) | ||
89 | |||
90 | /* | ||
91 | * In early loading microcode phase on BSP, boot_cpu_data is not set up yet. | ||
92 | * x86_vendor() gets vendor id for BSP. | ||
93 | * | ||
94 | * In 32 bit AP case, accessing boot_cpu_data needs linear address. To simplify | ||
95 | * coding, we still use x86_vendor() to get vendor id for AP. | ||
96 | * | ||
97 | * x86_vendor() gets vendor information directly from CPUID. | ||
98 | */ | ||
99 | static inline int x86_vendor(void) | ||
100 | { | ||
101 | u32 eax = 0x00000000; | ||
102 | u32 ebx, ecx = 0, edx; | ||
103 | |||
104 | native_cpuid(&eax, &ebx, &ecx, &edx); | ||
105 | |||
106 | if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx)) | ||
107 | return X86_VENDOR_INTEL; | ||
108 | |||
109 | if (CPUID_IS(CPUID_AMD1, CPUID_AMD2, CPUID_AMD3, ebx, ecx, edx)) | ||
110 | return X86_VENDOR_AMD; | ||
111 | |||
112 | return X86_VENDOR_UNKNOWN; | ||
113 | } | ||
114 | |||
115 | static inline unsigned int __x86_family(unsigned int sig) | ||
116 | { | ||
117 | unsigned int x86; | ||
118 | |||
119 | x86 = (sig >> 8) & 0xf; | ||
120 | |||
121 | if (x86 == 0xf) | ||
122 | x86 += (sig >> 20) & 0xff; | ||
123 | |||
124 | return x86; | ||
125 | } | ||
126 | |||
127 | static inline unsigned int x86_family(void) | ||
128 | { | ||
129 | u32 eax = 0x00000001; | ||
130 | u32 ebx, ecx = 0, edx; | ||
131 | |||
132 | native_cpuid(&eax, &ebx, &ecx, &edx); | ||
133 | |||
134 | return __x86_family(eax); | ||
135 | } | ||
136 | |||
137 | static inline unsigned int x86_model(unsigned int sig) | ||
138 | { | ||
139 | unsigned int x86, model; | ||
140 | |||
141 | x86 = __x86_family(sig); | ||
142 | |||
143 | model = (sig >> 4) & 0xf; | ||
144 | |||
145 | if (x86 == 0x6 || x86 == 0xf) | ||
146 | model += ((sig >> 16) & 0xf) << 4; | ||
147 | |||
148 | return model; | ||
149 | } | ||
150 | |||
78 | extern void __init load_ucode_bsp(void); | 151 | extern void __init load_ucode_bsp(void); |
79 | extern void load_ucode_ap(void); | 152 | extern void load_ucode_ap(void); |
80 | extern int __init save_microcode_in_initrd(void); | 153 | extern int __init save_microcode_in_initrd(void); |
diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h index dd4c20043ce7..2b9209c46ca9 100644 --- a/arch/x86/include/asm/microcode_intel.h +++ b/arch/x86/include/asm/microcode_intel.h | |||
@@ -56,12 +56,15 @@ struct extended_sigtable { | |||
56 | 56 | ||
57 | #define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) | 57 | #define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) |
58 | 58 | ||
59 | extern int | 59 | extern int get_matching_microcode(unsigned int csig, int cpf, int rev, void *mc); |
60 | get_matching_microcode(unsigned int csig, int cpf, void *mc, int rev); | ||
61 | extern int microcode_sanity_check(void *mc, int print_err); | 60 | extern int microcode_sanity_check(void *mc, int print_err); |
62 | extern int get_matching_sig(unsigned int csig, int cpf, void *mc, int rev); | 61 | extern int get_matching_sig(unsigned int csig, int cpf, int rev, void *mc); |
63 | extern int | 62 | |
64 | update_match_revision(struct microcode_header_intel *mc_header, int rev); | 63 | static inline int |
64 | revision_is_newer(struct microcode_header_intel *mc_header, int rev) | ||
65 | { | ||
66 | return (mc_header->rev <= rev) ? 0 : 1; | ||
67 | } | ||
65 | 68 | ||
66 | #ifdef CONFIG_MICROCODE_INTEL_EARLY | 69 | #ifdef CONFIG_MICROCODE_INTEL_EARLY |
67 | extern void __init load_ucode_intel_bsp(void); | 70 | extern void __init load_ucode_intel_bsp(void); |
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index a1410db38a1a..653dfa7662e1 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h | |||
@@ -30,6 +30,14 @@ static inline void __mwait(unsigned long eax, unsigned long ecx) | |||
30 | :: "a" (eax), "c" (ecx)); | 30 | :: "a" (eax), "c" (ecx)); |
31 | } | 31 | } |
32 | 32 | ||
33 | static inline void __sti_mwait(unsigned long eax, unsigned long ecx) | ||
34 | { | ||
35 | trace_hardirqs_on(); | ||
36 | /* "mwait %eax, %ecx;" */ | ||
37 | asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" | ||
38 | :: "a" (eax), "c" (ecx)); | ||
39 | } | ||
40 | |||
33 | /* | 41 | /* |
34 | * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, | 42 | * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, |
35 | * which can obviate IPI to trigger checking of need_resched. | 43 | * which can obviate IPI to trigger checking of need_resched. |
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index f97fbe3abb67..c7c712f2648b 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h | |||
@@ -40,8 +40,10 @@ | |||
40 | 40 | ||
41 | #ifdef CONFIG_X86_64 | 41 | #ifdef CONFIG_X86_64 |
42 | #include <asm/page_64_types.h> | 42 | #include <asm/page_64_types.h> |
43 | #define IOREMAP_MAX_ORDER (PUD_SHIFT) | ||
43 | #else | 44 | #else |
44 | #include <asm/page_32_types.h> | 45 | #include <asm/page_32_types.h> |
46 | #define IOREMAP_MAX_ORDER (PMD_SHIFT) | ||
45 | #endif /* CONFIG_X86_64 */ | 47 | #endif /* CONFIG_X86_64 */ |
46 | 48 | ||
47 | #ifndef __ASSEMBLY__ | 49 | #ifndef __ASSEMBLY__ |
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 965c47d254aa..8957810ad7d1 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -545,7 +545,7 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) | |||
545 | PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, val); | 545 | PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, val); |
546 | } | 546 | } |
547 | 547 | ||
548 | #if PAGETABLE_LEVELS >= 3 | 548 | #if CONFIG_PGTABLE_LEVELS >= 3 |
549 | static inline pmd_t __pmd(pmdval_t val) | 549 | static inline pmd_t __pmd(pmdval_t val) |
550 | { | 550 | { |
551 | pmdval_t ret; | 551 | pmdval_t ret; |
@@ -585,7 +585,7 @@ static inline void set_pud(pud_t *pudp, pud_t pud) | |||
585 | PVOP_VCALL2(pv_mmu_ops.set_pud, pudp, | 585 | PVOP_VCALL2(pv_mmu_ops.set_pud, pudp, |
586 | val); | 586 | val); |
587 | } | 587 | } |
588 | #if PAGETABLE_LEVELS == 4 | 588 | #if CONFIG_PGTABLE_LEVELS == 4 |
589 | static inline pud_t __pud(pudval_t val) | 589 | static inline pud_t __pud(pudval_t val) |
590 | { | 590 | { |
591 | pudval_t ret; | 591 | pudval_t ret; |
@@ -636,9 +636,9 @@ static inline void pud_clear(pud_t *pudp) | |||
636 | set_pud(pudp, __pud(0)); | 636 | set_pud(pudp, __pud(0)); |
637 | } | 637 | } |
638 | 638 | ||
639 | #endif /* PAGETABLE_LEVELS == 4 */ | 639 | #endif /* CONFIG_PGTABLE_LEVELS == 4 */ |
640 | 640 | ||
641 | #endif /* PAGETABLE_LEVELS >= 3 */ | 641 | #endif /* CONFIG_PGTABLE_LEVELS >= 3 */ |
642 | 642 | ||
643 | #ifdef CONFIG_X86_PAE | 643 | #ifdef CONFIG_X86_PAE |
644 | /* Special-case pte-setting operations for PAE, which can't update a | 644 | /* Special-case pte-setting operations for PAE, which can't update a |
@@ -976,11 +976,6 @@ extern void default_banner(void); | |||
976 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ | 976 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ |
977 | CLBR_NONE, \ | 977 | CLBR_NONE, \ |
978 | jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64)) | 978 | jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64)) |
979 | |||
980 | #define ENABLE_INTERRUPTS_SYSEXIT32 \ | ||
981 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \ | ||
982 | CLBR_NONE, \ | ||
983 | jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit)) | ||
984 | #endif /* CONFIG_X86_32 */ | 979 | #endif /* CONFIG_X86_32 */ |
985 | 980 | ||
986 | #endif /* __ASSEMBLY__ */ | 981 | #endif /* __ASSEMBLY__ */ |
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 7549b8b369e4..f7b0b5c112f2 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h | |||
@@ -294,7 +294,7 @@ struct pv_mmu_ops { | |||
294 | struct paravirt_callee_save pgd_val; | 294 | struct paravirt_callee_save pgd_val; |
295 | struct paravirt_callee_save make_pgd; | 295 | struct paravirt_callee_save make_pgd; |
296 | 296 | ||
297 | #if PAGETABLE_LEVELS >= 3 | 297 | #if CONFIG_PGTABLE_LEVELS >= 3 |
298 | #ifdef CONFIG_X86_PAE | 298 | #ifdef CONFIG_X86_PAE |
299 | void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); | 299 | void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); |
300 | void (*pte_clear)(struct mm_struct *mm, unsigned long addr, | 300 | void (*pte_clear)(struct mm_struct *mm, unsigned long addr, |
@@ -308,13 +308,13 @@ struct pv_mmu_ops { | |||
308 | struct paravirt_callee_save pmd_val; | 308 | struct paravirt_callee_save pmd_val; |
309 | struct paravirt_callee_save make_pmd; | 309 | struct paravirt_callee_save make_pmd; |
310 | 310 | ||
311 | #if PAGETABLE_LEVELS == 4 | 311 | #if CONFIG_PGTABLE_LEVELS == 4 |
312 | struct paravirt_callee_save pud_val; | 312 | struct paravirt_callee_save pud_val; |
313 | struct paravirt_callee_save make_pud; | 313 | struct paravirt_callee_save make_pud; |
314 | 314 | ||
315 | void (*set_pgd)(pgd_t *pudp, pgd_t pgdval); | 315 | void (*set_pgd)(pgd_t *pudp, pgd_t pgdval); |
316 | #endif /* PAGETABLE_LEVELS == 4 */ | 316 | #endif /* CONFIG_PGTABLE_LEVELS == 4 */ |
317 | #endif /* PAGETABLE_LEVELS >= 3 */ | 317 | #endif /* CONFIG_PGTABLE_LEVELS >= 3 */ |
318 | 318 | ||
319 | struct pv_lazy_ops lazy_mode; | 319 | struct pv_lazy_ops lazy_mode; |
320 | 320 | ||
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index c4412e972bbd..bf7f8b55b0f9 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h | |||
@@ -77,7 +77,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, | |||
77 | 77 | ||
78 | #define pmd_pgtable(pmd) pmd_page(pmd) | 78 | #define pmd_pgtable(pmd) pmd_page(pmd) |
79 | 79 | ||
80 | #if PAGETABLE_LEVELS > 2 | 80 | #if CONFIG_PGTABLE_LEVELS > 2 |
81 | static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) | 81 | static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) |
82 | { | 82 | { |
83 | struct page *page; | 83 | struct page *page; |
@@ -116,7 +116,7 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) | |||
116 | } | 116 | } |
117 | #endif /* CONFIG_X86_PAE */ | 117 | #endif /* CONFIG_X86_PAE */ |
118 | 118 | ||
119 | #if PAGETABLE_LEVELS > 3 | 119 | #if CONFIG_PGTABLE_LEVELS > 3 |
120 | static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) | 120 | static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) |
121 | { | 121 | { |
122 | paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT); | 122 | paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT); |
@@ -142,7 +142,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, | |||
142 | ___pud_free_tlb(tlb, pud); | 142 | ___pud_free_tlb(tlb, pud); |
143 | } | 143 | } |
144 | 144 | ||
145 | #endif /* PAGETABLE_LEVELS > 3 */ | 145 | #endif /* CONFIG_PGTABLE_LEVELS > 3 */ |
146 | #endif /* PAGETABLE_LEVELS > 2 */ | 146 | #endif /* CONFIG_PGTABLE_LEVELS > 2 */ |
147 | 147 | ||
148 | #endif /* _ASM_X86_PGALLOC_H */ | 148 | #endif /* _ASM_X86_PGALLOC_H */ |
diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h index daacc23e3fb9..392576433e77 100644 --- a/arch/x86/include/asm/pgtable-2level_types.h +++ b/arch/x86/include/asm/pgtable-2level_types.h | |||
@@ -17,7 +17,6 @@ typedef union { | |||
17 | #endif /* !__ASSEMBLY__ */ | 17 | #endif /* !__ASSEMBLY__ */ |
18 | 18 | ||
19 | #define SHARED_KERNEL_PMD 0 | 19 | #define SHARED_KERNEL_PMD 0 |
20 | #define PAGETABLE_LEVELS 2 | ||
21 | 20 | ||
22 | /* | 21 | /* |
23 | * traditional i386 two-level paging structure: | 22 | * traditional i386 two-level paging structure: |
diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h index 1bd5876c8649..bcc89625ebe5 100644 --- a/arch/x86/include/asm/pgtable-3level_types.h +++ b/arch/x86/include/asm/pgtable-3level_types.h | |||
@@ -24,8 +24,6 @@ typedef union { | |||
24 | #define SHARED_KERNEL_PMD 1 | 24 | #define SHARED_KERNEL_PMD 1 |
25 | #endif | 25 | #endif |
26 | 26 | ||
27 | #define PAGETABLE_LEVELS 3 | ||
28 | |||
29 | /* | 27 | /* |
30 | * PGDIR_SHIFT determines what a top-level page table entry can map | 28 | * PGDIR_SHIFT determines what a top-level page table entry can map |
31 | */ | 29 | */ |
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index a0c35bf6cb92..fe57e7a98839 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
@@ -551,7 +551,7 @@ static inline unsigned long pages_to_mb(unsigned long npg) | |||
551 | return npg >> (20 - PAGE_SHIFT); | 551 | return npg >> (20 - PAGE_SHIFT); |
552 | } | 552 | } |
553 | 553 | ||
554 | #if PAGETABLE_LEVELS > 2 | 554 | #if CONFIG_PGTABLE_LEVELS > 2 |
555 | static inline int pud_none(pud_t pud) | 555 | static inline int pud_none(pud_t pud) |
556 | { | 556 | { |
557 | return native_pud_val(pud) == 0; | 557 | return native_pud_val(pud) == 0; |
@@ -594,9 +594,9 @@ static inline int pud_large(pud_t pud) | |||
594 | { | 594 | { |
595 | return 0; | 595 | return 0; |
596 | } | 596 | } |
597 | #endif /* PAGETABLE_LEVELS > 2 */ | 597 | #endif /* CONFIG_PGTABLE_LEVELS > 2 */ |
598 | 598 | ||
599 | #if PAGETABLE_LEVELS > 3 | 599 | #if CONFIG_PGTABLE_LEVELS > 3 |
600 | static inline int pgd_present(pgd_t pgd) | 600 | static inline int pgd_present(pgd_t pgd) |
601 | { | 601 | { |
602 | return pgd_flags(pgd) & _PAGE_PRESENT; | 602 | return pgd_flags(pgd) & _PAGE_PRESENT; |
@@ -633,7 +633,7 @@ static inline int pgd_none(pgd_t pgd) | |||
633 | { | 633 | { |
634 | return !native_pgd_val(pgd); | 634 | return !native_pgd_val(pgd); |
635 | } | 635 | } |
636 | #endif /* PAGETABLE_LEVELS > 3 */ | 636 | #endif /* CONFIG_PGTABLE_LEVELS > 3 */ |
637 | 637 | ||
638 | #endif /* __ASSEMBLY__ */ | 638 | #endif /* __ASSEMBLY__ */ |
639 | 639 | ||
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 602b6028c5b6..e6844dfb4471 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h | |||
@@ -20,7 +20,6 @@ typedef struct { pteval_t pte; } pte_t; | |||
20 | #endif /* !__ASSEMBLY__ */ | 20 | #endif /* !__ASSEMBLY__ */ |
21 | 21 | ||
22 | #define SHARED_KERNEL_PMD 0 | 22 | #define SHARED_KERNEL_PMD 0 |
23 | #define PAGETABLE_LEVELS 4 | ||
24 | 23 | ||
25 | /* | 24 | /* |
26 | * PGDIR_SHIFT determines what a top-level page table entry can map | 25 | * PGDIR_SHIFT determines what a top-level page table entry can map |
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 8c7c10802e9c..78f0c8cbe316 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h | |||
@@ -234,7 +234,7 @@ static inline pgdval_t pgd_flags(pgd_t pgd) | |||
234 | return native_pgd_val(pgd) & PTE_FLAGS_MASK; | 234 | return native_pgd_val(pgd) & PTE_FLAGS_MASK; |
235 | } | 235 | } |
236 | 236 | ||
237 | #if PAGETABLE_LEVELS > 3 | 237 | #if CONFIG_PGTABLE_LEVELS > 3 |
238 | typedef struct { pudval_t pud; } pud_t; | 238 | typedef struct { pudval_t pud; } pud_t; |
239 | 239 | ||
240 | static inline pud_t native_make_pud(pmdval_t val) | 240 | static inline pud_t native_make_pud(pmdval_t val) |
@@ -255,7 +255,7 @@ static inline pudval_t native_pud_val(pud_t pud) | |||
255 | } | 255 | } |
256 | #endif | 256 | #endif |
257 | 257 | ||
258 | #if PAGETABLE_LEVELS > 2 | 258 | #if CONFIG_PGTABLE_LEVELS > 2 |
259 | typedef struct { pmdval_t pmd; } pmd_t; | 259 | typedef struct { pmdval_t pmd; } pmd_t; |
260 | 260 | ||
261 | static inline pmd_t native_make_pmd(pmdval_t val) | 261 | static inline pmd_t native_make_pmd(pmdval_t val) |
diff --git a/arch/x86/include/asm/resume-trace.h b/arch/x86/include/asm/pm-trace.h index 3ff1c2cb1da5..7b7ac42c3661 100644 --- a/arch/x86/include/asm/resume-trace.h +++ b/arch/x86/include/asm/pm-trace.h | |||
@@ -1,5 +1,5 @@ | |||
1 | #ifndef _ASM_X86_RESUME_TRACE_H | 1 | #ifndef _ASM_X86_PM_TRACE_H |
2 | #define _ASM_X86_RESUME_TRACE_H | 2 | #define _ASM_X86_PM_TRACE_H |
3 | 3 | ||
4 | #include <asm/asm.h> | 4 | #include <asm/asm.h> |
5 | 5 | ||
@@ -14,8 +14,10 @@ do { \ | |||
14 | ".previous" \ | 14 | ".previous" \ |
15 | :"=r" (tracedata) \ | 15 | :"=r" (tracedata) \ |
16 | : "i" (__LINE__), "i" (__FILE__)); \ | 16 | : "i" (__LINE__), "i" (__FILE__)); \ |
17 | generate_resume_trace(tracedata, user); \ | 17 | generate_pm_trace(tracedata, user); \ |
18 | } \ | 18 | } \ |
19 | } while (0) | 19 | } while (0) |
20 | 20 | ||
21 | #endif /* _ASM_X86_RESUME_TRACE_H */ | 21 | #define TRACE_SUSPEND(user) TRACE_RESUME(user) |
22 | |||
23 | #endif /* _ASM_X86_PM_TRACE_H */ | ||
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index a12d50e04d7a..23ba6765b718 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -213,8 +213,23 @@ struct x86_hw_tss { | |||
213 | unsigned long sp0; | 213 | unsigned long sp0; |
214 | unsigned short ss0, __ss0h; | 214 | unsigned short ss0, __ss0h; |
215 | unsigned long sp1; | 215 | unsigned long sp1; |
216 | /* ss1 caches MSR_IA32_SYSENTER_CS: */ | 216 | |
217 | unsigned short ss1, __ss1h; | 217 | /* |
218 | * We don't use ring 1, so ss1 is a convenient scratch space in | ||
219 | * the same cacheline as sp0. We use ss1 to cache the value in | ||
220 | * MSR_IA32_SYSENTER_CS. When we context switch | ||
221 | * MSR_IA32_SYSENTER_CS, we first check if the new value being | ||
222 | * written matches ss1, and, if it's not, then we wrmsr the new | ||
223 | * value and update ss1. | ||
224 | * | ||
225 | * The only reason we context switch MSR_IA32_SYSENTER_CS is | ||
226 | * that we set it to zero in vm86 tasks to avoid corrupting the | ||
227 | * stack if we were to go through the sysenter path from vm86 | ||
228 | * mode. | ||
229 | */ | ||
230 | unsigned short ss1; /* MSR_IA32_SYSENTER_CS */ | ||
231 | |||
232 | unsigned short __ss1h; | ||
218 | unsigned long sp2; | 233 | unsigned long sp2; |
219 | unsigned short ss2, __ss2h; | 234 | unsigned short ss2, __ss2h; |
220 | unsigned long __cr3; | 235 | unsigned long __cr3; |
@@ -279,13 +294,17 @@ struct tss_struct { | |||
279 | unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; | 294 | unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; |
280 | 295 | ||
281 | /* | 296 | /* |
282 | * .. and then another 0x100 bytes for the emergency kernel stack: | 297 | * Space for the temporary SYSENTER stack: |
283 | */ | 298 | */ |
284 | unsigned long stack[64]; | 299 | unsigned long SYSENTER_stack[64]; |
285 | 300 | ||
286 | } ____cacheline_aligned; | 301 | } ____cacheline_aligned; |
287 | 302 | ||
288 | DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss); | 303 | DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss); |
304 | |||
305 | #ifdef CONFIG_X86_32 | ||
306 | DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); | ||
307 | #endif | ||
289 | 308 | ||
290 | /* | 309 | /* |
291 | * Save the original ist values for checking stack pointers during debugging | 310 | * Save the original ist values for checking stack pointers during debugging |
@@ -477,7 +496,6 @@ struct thread_struct { | |||
477 | #ifdef CONFIG_X86_32 | 496 | #ifdef CONFIG_X86_32 |
478 | unsigned long sysenter_cs; | 497 | unsigned long sysenter_cs; |
479 | #else | 498 | #else |
480 | unsigned long usersp; /* Copy from PDA */ | ||
481 | unsigned short es; | 499 | unsigned short es; |
482 | unsigned short ds; | 500 | unsigned short ds; |
483 | unsigned short fsindex; | 501 | unsigned short fsindex; |
@@ -567,6 +585,16 @@ static inline void native_swapgs(void) | |||
567 | #endif | 585 | #endif |
568 | } | 586 | } |
569 | 587 | ||
588 | static inline unsigned long current_top_of_stack(void) | ||
589 | { | ||
590 | #ifdef CONFIG_X86_64 | ||
591 | return this_cpu_read_stable(cpu_tss.x86_tss.sp0); | ||
592 | #else | ||
593 | /* sp0 on x86_32 is special in and around vm86 mode. */ | ||
594 | return this_cpu_read_stable(cpu_current_top_of_stack); | ||
595 | #endif | ||
596 | } | ||
597 | |||
570 | #ifdef CONFIG_PARAVIRT | 598 | #ifdef CONFIG_PARAVIRT |
571 | #include <asm/paravirt.h> | 599 | #include <asm/paravirt.h> |
572 | #else | 600 | #else |
@@ -764,10 +792,10 @@ extern char ignore_fpu_irq; | |||
764 | #define ARCH_HAS_SPINLOCK_PREFETCH | 792 | #define ARCH_HAS_SPINLOCK_PREFETCH |
765 | 793 | ||
766 | #ifdef CONFIG_X86_32 | 794 | #ifdef CONFIG_X86_32 |
767 | # define BASE_PREFETCH ASM_NOP4 | 795 | # define BASE_PREFETCH "" |
768 | # define ARCH_HAS_PREFETCH | 796 | # define ARCH_HAS_PREFETCH |
769 | #else | 797 | #else |
770 | # define BASE_PREFETCH "prefetcht0 (%1)" | 798 | # define BASE_PREFETCH "prefetcht0 %P1" |
771 | #endif | 799 | #endif |
772 | 800 | ||
773 | /* | 801 | /* |
@@ -778,10 +806,9 @@ extern char ignore_fpu_irq; | |||
778 | */ | 806 | */ |
779 | static inline void prefetch(const void *x) | 807 | static inline void prefetch(const void *x) |
780 | { | 808 | { |
781 | alternative_input(BASE_PREFETCH, | 809 | alternative_input(BASE_PREFETCH, "prefetchnta %P1", |
782 | "prefetchnta (%1)", | ||
783 | X86_FEATURE_XMM, | 810 | X86_FEATURE_XMM, |
784 | "r" (x)); | 811 | "m" (*(const char *)x)); |
785 | } | 812 | } |
786 | 813 | ||
787 | /* | 814 | /* |
@@ -791,10 +818,9 @@ static inline void prefetch(const void *x) | |||
791 | */ | 818 | */ |
792 | static inline void prefetchw(const void *x) | 819 | static inline void prefetchw(const void *x) |
793 | { | 820 | { |
794 | alternative_input(BASE_PREFETCH, | 821 | alternative_input(BASE_PREFETCH, "prefetchw %P1", |
795 | "prefetchw (%1)", | 822 | X86_FEATURE_3DNOWPREFETCH, |
796 | X86_FEATURE_3DNOW, | 823 | "m" (*(const char *)x)); |
797 | "r" (x)); | ||
798 | } | 824 | } |
799 | 825 | ||
800 | static inline void spin_lock_prefetch(const void *x) | 826 | static inline void spin_lock_prefetch(const void *x) |
@@ -802,6 +828,9 @@ static inline void spin_lock_prefetch(const void *x) | |||
802 | prefetchw(x); | 828 | prefetchw(x); |
803 | } | 829 | } |
804 | 830 | ||
831 | #define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \ | ||
832 | TOP_OF_KERNEL_STACK_PADDING) | ||
833 | |||
805 | #ifdef CONFIG_X86_32 | 834 | #ifdef CONFIG_X86_32 |
806 | /* | 835 | /* |
807 | * User space process size: 3GB (default). | 836 | * User space process size: 3GB (default). |
@@ -812,39 +841,16 @@ static inline void spin_lock_prefetch(const void *x) | |||
812 | #define STACK_TOP_MAX STACK_TOP | 841 | #define STACK_TOP_MAX STACK_TOP |
813 | 842 | ||
814 | #define INIT_THREAD { \ | 843 | #define INIT_THREAD { \ |
815 | .sp0 = sizeof(init_stack) + (long)&init_stack, \ | 844 | .sp0 = TOP_OF_INIT_STACK, \ |
816 | .vm86_info = NULL, \ | 845 | .vm86_info = NULL, \ |
817 | .sysenter_cs = __KERNEL_CS, \ | 846 | .sysenter_cs = __KERNEL_CS, \ |
818 | .io_bitmap_ptr = NULL, \ | 847 | .io_bitmap_ptr = NULL, \ |
819 | } | 848 | } |
820 | 849 | ||
821 | /* | ||
822 | * Note that the .io_bitmap member must be extra-big. This is because | ||
823 | * the CPU will access an additional byte beyond the end of the IO | ||
824 | * permission bitmap. The extra byte must be all 1 bits, and must | ||
825 | * be within the limit. | ||
826 | */ | ||
827 | #define INIT_TSS { \ | ||
828 | .x86_tss = { \ | ||
829 | .sp0 = sizeof(init_stack) + (long)&init_stack, \ | ||
830 | .ss0 = __KERNEL_DS, \ | ||
831 | .ss1 = __KERNEL_CS, \ | ||
832 | .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \ | ||
833 | }, \ | ||
834 | .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, \ | ||
835 | } | ||
836 | |||
837 | extern unsigned long thread_saved_pc(struct task_struct *tsk); | 850 | extern unsigned long thread_saved_pc(struct task_struct *tsk); |
838 | 851 | ||
839 | #define THREAD_SIZE_LONGS (THREAD_SIZE/sizeof(unsigned long)) | ||
840 | #define KSTK_TOP(info) \ | ||
841 | ({ \ | ||
842 | unsigned long *__ptr = (unsigned long *)(info); \ | ||
843 | (unsigned long)(&__ptr[THREAD_SIZE_LONGS]); \ | ||
844 | }) | ||
845 | |||
846 | /* | 852 | /* |
847 | * The below -8 is to reserve 8 bytes on top of the ring0 stack. | 853 | * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack. |
848 | * This is necessary to guarantee that the entire "struct pt_regs" | 854 | * This is necessary to guarantee that the entire "struct pt_regs" |
849 | * is accessible even if the CPU haven't stored the SS/ESP registers | 855 | * is accessible even if the CPU haven't stored the SS/ESP registers |
850 | * on the stack (interrupt gate does not save these registers | 856 | * on the stack (interrupt gate does not save these registers |
@@ -853,11 +859,11 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); | |||
853 | * "struct pt_regs" is possible, but they may contain the | 859 | * "struct pt_regs" is possible, but they may contain the |
854 | * completely wrong values. | 860 | * completely wrong values. |
855 | */ | 861 | */ |
856 | #define task_pt_regs(task) \ | 862 | #define task_pt_regs(task) \ |
857 | ({ \ | 863 | ({ \ |
858 | struct pt_regs *__regs__; \ | 864 | unsigned long __ptr = (unsigned long)task_stack_page(task); \ |
859 | __regs__ = (struct pt_regs *)(KSTK_TOP(task_stack_page(task))-8); \ | 865 | __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \ |
860 | __regs__ - 1; \ | 866 | ((struct pt_regs *)__ptr) - 1; \ |
861 | }) | 867 | }) |
862 | 868 | ||
863 | #define KSTK_ESP(task) (task_pt_regs(task)->sp) | 869 | #define KSTK_ESP(task) (task_pt_regs(task)->sp) |
@@ -889,11 +895,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); | |||
889 | #define STACK_TOP_MAX TASK_SIZE_MAX | 895 | #define STACK_TOP_MAX TASK_SIZE_MAX |
890 | 896 | ||
891 | #define INIT_THREAD { \ | 897 | #define INIT_THREAD { \ |
892 | .sp0 = (unsigned long)&init_stack + sizeof(init_stack) \ | 898 | .sp0 = TOP_OF_INIT_STACK \ |
893 | } | ||
894 | |||
895 | #define INIT_TSS { \ | ||
896 | .x86_tss.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \ | ||
897 | } | 899 | } |
898 | 900 | ||
899 | /* | 901 | /* |
@@ -905,11 +907,6 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); | |||
905 | #define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) | 907 | #define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) |
906 | extern unsigned long KSTK_ESP(struct task_struct *task); | 908 | extern unsigned long KSTK_ESP(struct task_struct *task); |
907 | 909 | ||
908 | /* | ||
909 | * User space RSP while inside the SYSCALL fast path | ||
910 | */ | ||
911 | DECLARE_PER_CPU(unsigned long, old_rsp); | ||
912 | |||
913 | #endif /* CONFIG_X86_64 */ | 910 | #endif /* CONFIG_X86_64 */ |
914 | 911 | ||
915 | extern void start_thread(struct pt_regs *regs, unsigned long new_ip, | 912 | extern void start_thread(struct pt_regs *regs, unsigned long new_ip, |
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 86fc2bb82287..19507ffa5d28 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h | |||
@@ -31,13 +31,17 @@ struct pt_regs { | |||
31 | #else /* __i386__ */ | 31 | #else /* __i386__ */ |
32 | 32 | ||
33 | struct pt_regs { | 33 | struct pt_regs { |
34 | /* | ||
35 | * C ABI says these regs are callee-preserved. They aren't saved on kernel entry | ||
36 | * unless syscall needs a complete, fully filled "struct pt_regs". | ||
37 | */ | ||
34 | unsigned long r15; | 38 | unsigned long r15; |
35 | unsigned long r14; | 39 | unsigned long r14; |
36 | unsigned long r13; | 40 | unsigned long r13; |
37 | unsigned long r12; | 41 | unsigned long r12; |
38 | unsigned long bp; | 42 | unsigned long bp; |
39 | unsigned long bx; | 43 | unsigned long bx; |
40 | /* arguments: non interrupts/non tracing syscalls only save up to here*/ | 44 | /* These regs are callee-clobbered. Always saved on kernel entry. */ |
41 | unsigned long r11; | 45 | unsigned long r11; |
42 | unsigned long r10; | 46 | unsigned long r10; |
43 | unsigned long r9; | 47 | unsigned long r9; |
@@ -47,9 +51,12 @@ struct pt_regs { | |||
47 | unsigned long dx; | 51 | unsigned long dx; |
48 | unsigned long si; | 52 | unsigned long si; |
49 | unsigned long di; | 53 | unsigned long di; |
54 | /* | ||
55 | * On syscall entry, this is syscall#. On CPU exception, this is error code. | ||
56 | * On hw interrupt, it's IRQ number: | ||
57 | */ | ||
50 | unsigned long orig_ax; | 58 | unsigned long orig_ax; |
51 | /* end of arguments */ | 59 | /* Return frame for iretq */ |
52 | /* cpu exception frame or undefined */ | ||
53 | unsigned long ip; | 60 | unsigned long ip; |
54 | unsigned long cs; | 61 | unsigned long cs; |
55 | unsigned long flags; | 62 | unsigned long flags; |
@@ -89,11 +96,13 @@ static inline unsigned long regs_return_value(struct pt_regs *regs) | |||
89 | } | 96 | } |
90 | 97 | ||
91 | /* | 98 | /* |
92 | * user_mode_vm(regs) determines whether a register set came from user mode. | 99 | * user_mode(regs) determines whether a register set came from user |
93 | * This is true if V8086 mode was enabled OR if the register set was from | 100 | * mode. On x86_32, this is true if V8086 mode was enabled OR if the |
94 | * protected mode with RPL-3 CS value. This tricky test checks that with | 101 | * register set was from protected mode with RPL-3 CS value. This |
95 | * one comparison. Many places in the kernel can bypass this full check | 102 | * tricky test checks that with one comparison. |
96 | * if they have already ruled out V8086 mode, so user_mode(regs) can be used. | 103 | * |
104 | * On x86_64, vm86 mode is mercifully nonexistent, and we don't need | ||
105 | * the extra check. | ||
97 | */ | 106 | */ |
98 | static inline int user_mode(struct pt_regs *regs) | 107 | static inline int user_mode(struct pt_regs *regs) |
99 | { | 108 | { |
@@ -104,16 +113,6 @@ static inline int user_mode(struct pt_regs *regs) | |||
104 | #endif | 113 | #endif |
105 | } | 114 | } |
106 | 115 | ||
107 | static inline int user_mode_vm(struct pt_regs *regs) | ||
108 | { | ||
109 | #ifdef CONFIG_X86_32 | ||
110 | return ((regs->cs & SEGMENT_RPL_MASK) | (regs->flags & X86_VM_MASK)) >= | ||
111 | USER_RPL; | ||
112 | #else | ||
113 | return user_mode(regs); | ||
114 | #endif | ||
115 | } | ||
116 | |||
117 | static inline int v8086_mode(struct pt_regs *regs) | 116 | static inline int v8086_mode(struct pt_regs *regs) |
118 | { | 117 | { |
119 | #ifdef CONFIG_X86_32 | 118 | #ifdef CONFIG_X86_32 |
@@ -138,12 +137,8 @@ static inline bool user_64bit_mode(struct pt_regs *regs) | |||
138 | #endif | 137 | #endif |
139 | } | 138 | } |
140 | 139 | ||
141 | #define current_user_stack_pointer() this_cpu_read(old_rsp) | 140 | #define current_user_stack_pointer() current_pt_regs()->sp |
142 | /* ia32 vs. x32 difference */ | 141 | #define compat_user_stack_pointer() current_pt_regs()->sp |
143 | #define compat_user_stack_pointer() \ | ||
144 | (test_thread_flag(TIF_IA32) \ | ||
145 | ? current_pt_regs()->sp \ | ||
146 | : this_cpu_read(old_rsp)) | ||
147 | #endif | 142 | #endif |
148 | 143 | ||
149 | #ifdef CONFIG_X86_32 | 144 | #ifdef CONFIG_X86_32 |
@@ -248,7 +243,7 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, | |||
248 | */ | 243 | */ |
249 | #define arch_ptrace_stop_needed(code, info) \ | 244 | #define arch_ptrace_stop_needed(code, info) \ |
250 | ({ \ | 245 | ({ \ |
251 | set_thread_flag(TIF_NOTIFY_RESUME); \ | 246 | force_iret(); \ |
252 | false; \ | 247 | false; \ |
253 | }) | 248 | }) |
254 | 249 | ||
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index d6b078e9fa28..25b1cc07d496 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h | |||
@@ -95,6 +95,7 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, | |||
95 | 95 | ||
96 | struct pvclock_vsyscall_time_info { | 96 | struct pvclock_vsyscall_time_info { |
97 | struct pvclock_vcpu_time_info pvti; | 97 | struct pvclock_vcpu_time_info pvti; |
98 | u32 migrate_count; | ||
98 | } __attribute__((__aligned__(SMP_CACHE_BYTES))); | 99 | } __attribute__((__aligned__(SMP_CACHE_BYTES))); |
99 | 100 | ||
100 | #define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info) | 101 | #define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info) |
diff --git a/arch/x86/include/asm/seccomp.h b/arch/x86/include/asm/seccomp.h index 0f3d7f099224..0c8c7c8861b4 100644 --- a/arch/x86/include/asm/seccomp.h +++ b/arch/x86/include/asm/seccomp.h | |||
@@ -1,5 +1,20 @@ | |||
1 | #ifndef _ASM_X86_SECCOMP_H | ||
2 | #define _ASM_X86_SECCOMP_H | ||
3 | |||
4 | #include <asm/unistd.h> | ||
5 | |||
1 | #ifdef CONFIG_X86_32 | 6 | #ifdef CONFIG_X86_32 |
2 | # include <asm/seccomp_32.h> | 7 | #define __NR_seccomp_sigreturn __NR_sigreturn |
3 | #else | ||
4 | # include <asm/seccomp_64.h> | ||
5 | #endif | 8 | #endif |
9 | |||
10 | #ifdef CONFIG_COMPAT | ||
11 | #include <asm/ia32_unistd.h> | ||
12 | #define __NR_seccomp_read_32 __NR_ia32_read | ||
13 | #define __NR_seccomp_write_32 __NR_ia32_write | ||
14 | #define __NR_seccomp_exit_32 __NR_ia32_exit | ||
15 | #define __NR_seccomp_sigreturn_32 __NR_ia32_sigreturn | ||
16 | #endif | ||
17 | |||
18 | #include <asm-generic/seccomp.h> | ||
19 | |||
20 | #endif /* _ASM_X86_SECCOMP_H */ | ||
diff --git a/arch/x86/include/asm/seccomp_32.h b/arch/x86/include/asm/seccomp_32.h deleted file mode 100644 index b811d6f5780c..000000000000 --- a/arch/x86/include/asm/seccomp_32.h +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | #ifndef _ASM_X86_SECCOMP_32_H | ||
2 | #define _ASM_X86_SECCOMP_32_H | ||
3 | |||
4 | #include <linux/unistd.h> | ||
5 | |||
6 | #define __NR_seccomp_read __NR_read | ||
7 | #define __NR_seccomp_write __NR_write | ||
8 | #define __NR_seccomp_exit __NR_exit | ||
9 | #define __NR_seccomp_sigreturn __NR_sigreturn | ||
10 | |||
11 | #endif /* _ASM_X86_SECCOMP_32_H */ | ||
diff --git a/arch/x86/include/asm/seccomp_64.h b/arch/x86/include/asm/seccomp_64.h deleted file mode 100644 index 84ec1bd161a5..000000000000 --- a/arch/x86/include/asm/seccomp_64.h +++ /dev/null | |||
@@ -1,17 +0,0 @@ | |||
1 | #ifndef _ASM_X86_SECCOMP_64_H | ||
2 | #define _ASM_X86_SECCOMP_64_H | ||
3 | |||
4 | #include <linux/unistd.h> | ||
5 | #include <asm/ia32_unistd.h> | ||
6 | |||
7 | #define __NR_seccomp_read __NR_read | ||
8 | #define __NR_seccomp_write __NR_write | ||
9 | #define __NR_seccomp_exit __NR_exit | ||
10 | #define __NR_seccomp_sigreturn __NR_rt_sigreturn | ||
11 | |||
12 | #define __NR_seccomp_read_32 __NR_ia32_read | ||
13 | #define __NR_seccomp_write_32 __NR_ia32_write | ||
14 | #define __NR_seccomp_exit_32 __NR_ia32_exit | ||
15 | #define __NR_seccomp_sigreturn_32 __NR_ia32_sigreturn | ||
16 | |||
17 | #endif /* _ASM_X86_SECCOMP_64_H */ | ||
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index db257a58571f..5a9856eb12ba 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h | |||
@@ -3,8 +3,10 @@ | |||
3 | 3 | ||
4 | #include <linux/const.h> | 4 | #include <linux/const.h> |
5 | 5 | ||
6 | /* Constructor for a conventional segment GDT (or LDT) entry */ | 6 | /* |
7 | /* This is a macro so it can be used in initializers */ | 7 | * Constructor for a conventional segment GDT (or LDT) entry. |
8 | * This is a macro so it can be used in initializers. | ||
9 | */ | ||
8 | #define GDT_ENTRY(flags, base, limit) \ | 10 | #define GDT_ENTRY(flags, base, limit) \ |
9 | ((((base) & _AC(0xff000000,ULL)) << (56-24)) | \ | 11 | ((((base) & _AC(0xff000000,ULL)) << (56-24)) | \ |
10 | (((flags) & _AC(0x0000f0ff,ULL)) << 40) | \ | 12 | (((flags) & _AC(0x0000f0ff,ULL)) << 40) | \ |
@@ -12,198 +14,228 @@ | |||
12 | (((base) & _AC(0x00ffffff,ULL)) << 16) | \ | 14 | (((base) & _AC(0x00ffffff,ULL)) << 16) | \ |
13 | (((limit) & _AC(0x0000ffff,ULL)))) | 15 | (((limit) & _AC(0x0000ffff,ULL)))) |
14 | 16 | ||
15 | /* Simple and small GDT entries for booting only */ | 17 | /* Simple and small GDT entries for booting only: */ |
16 | 18 | ||
17 | #define GDT_ENTRY_BOOT_CS 2 | 19 | #define GDT_ENTRY_BOOT_CS 2 |
18 | #define __BOOT_CS (GDT_ENTRY_BOOT_CS * 8) | 20 | #define GDT_ENTRY_BOOT_DS 3 |
21 | #define GDT_ENTRY_BOOT_TSS 4 | ||
22 | #define __BOOT_CS (GDT_ENTRY_BOOT_CS*8) | ||
23 | #define __BOOT_DS (GDT_ENTRY_BOOT_DS*8) | ||
24 | #define __BOOT_TSS (GDT_ENTRY_BOOT_TSS*8) | ||
25 | |||
26 | /* | ||
27 | * Bottom two bits of selector give the ring | ||
28 | * privilege level | ||
29 | */ | ||
30 | #define SEGMENT_RPL_MASK 0x3 | ||
19 | 31 | ||
20 | #define GDT_ENTRY_BOOT_DS (GDT_ENTRY_BOOT_CS + 1) | 32 | /* User mode is privilege level 3: */ |
21 | #define __BOOT_DS (GDT_ENTRY_BOOT_DS * 8) | 33 | #define USER_RPL 0x3 |
22 | 34 | ||
23 | #define GDT_ENTRY_BOOT_TSS (GDT_ENTRY_BOOT_CS + 2) | 35 | /* Bit 2 is Table Indicator (TI): selects between LDT or GDT */ |
24 | #define __BOOT_TSS (GDT_ENTRY_BOOT_TSS * 8) | 36 | #define SEGMENT_TI_MASK 0x4 |
37 | /* LDT segment has TI set ... */ | ||
38 | #define SEGMENT_LDT 0x4 | ||
39 | /* ... GDT has it cleared */ | ||
40 | #define SEGMENT_GDT 0x0 | ||
25 | 41 | ||
26 | #define SEGMENT_RPL_MASK 0x3 /* | 42 | #define GDT_ENTRY_INVALID_SEG 0 |
27 | * Bottom two bits of selector give the ring | ||
28 | * privilege level | ||
29 | */ | ||
30 | #define SEGMENT_TI_MASK 0x4 /* Bit 2 is table indicator (LDT/GDT) */ | ||
31 | #define USER_RPL 0x3 /* User mode is privilege level 3 */ | ||
32 | #define SEGMENT_LDT 0x4 /* LDT segment has TI set... */ | ||
33 | #define SEGMENT_GDT 0x0 /* ... GDT has it cleared */ | ||
34 | 43 | ||
35 | #ifdef CONFIG_X86_32 | 44 | #ifdef CONFIG_X86_32 |
36 | /* | 45 | /* |
37 | * The layout of the per-CPU GDT under Linux: | 46 | * The layout of the per-CPU GDT under Linux: |
38 | * | 47 | * |
39 | * 0 - null | 48 | * 0 - null <=== cacheline #1 |
40 | * 1 - reserved | 49 | * 1 - reserved |
41 | * 2 - reserved | 50 | * 2 - reserved |
42 | * 3 - reserved | 51 | * 3 - reserved |
43 | * | 52 | * |
44 | * 4 - unused <==== new cacheline | 53 | * 4 - unused <=== cacheline #2 |
45 | * 5 - unused | 54 | * 5 - unused |
46 | * | 55 | * |
47 | * ------- start of TLS (Thread-Local Storage) segments: | 56 | * ------- start of TLS (Thread-Local Storage) segments: |
48 | * | 57 | * |
49 | * 6 - TLS segment #1 [ glibc's TLS segment ] | 58 | * 6 - TLS segment #1 [ glibc's TLS segment ] |
50 | * 7 - TLS segment #2 [ Wine's %fs Win32 segment ] | 59 | * 7 - TLS segment #2 [ Wine's %fs Win32 segment ] |
51 | * 8 - TLS segment #3 | 60 | * 8 - TLS segment #3 <=== cacheline #3 |
52 | * 9 - reserved | 61 | * 9 - reserved |
53 | * 10 - reserved | 62 | * 10 - reserved |
54 | * 11 - reserved | 63 | * 11 - reserved |
55 | * | 64 | * |
56 | * ------- start of kernel segments: | 65 | * ------- start of kernel segments: |
57 | * | 66 | * |
58 | * 12 - kernel code segment <==== new cacheline | 67 | * 12 - kernel code segment <=== cacheline #4 |
59 | * 13 - kernel data segment | 68 | * 13 - kernel data segment |
60 | * 14 - default user CS | 69 | * 14 - default user CS |
61 | * 15 - default user DS | 70 | * 15 - default user DS |
62 | * 16 - TSS | 71 | * 16 - TSS <=== cacheline #5 |
63 | * 17 - LDT | 72 | * 17 - LDT |
64 | * 18 - PNPBIOS support (16->32 gate) | 73 | * 18 - PNPBIOS support (16->32 gate) |
65 | * 19 - PNPBIOS support | 74 | * 19 - PNPBIOS support |
66 | * 20 - PNPBIOS support | 75 | * 20 - PNPBIOS support <=== cacheline #6 |
67 | * 21 - PNPBIOS support | 76 | * 21 - PNPBIOS support |
68 | * 22 - PNPBIOS support | 77 | * 22 - PNPBIOS support |
69 | * 23 - APM BIOS support | 78 | * 23 - APM BIOS support |
70 | * 24 - APM BIOS support | 79 | * 24 - APM BIOS support <=== cacheline #7 |
71 | * 25 - APM BIOS support | 80 | * 25 - APM BIOS support |
72 | * | 81 | * |
73 | * 26 - ESPFIX small SS | 82 | * 26 - ESPFIX small SS |
74 | * 27 - per-cpu [ offset to per-cpu data area ] | 83 | * 27 - per-cpu [ offset to per-cpu data area ] |
75 | * 28 - stack_canary-20 [ for stack protector ] | 84 | * 28 - stack_canary-20 [ for stack protector ] <=== cacheline #8 |
76 | * 29 - unused | 85 | * 29 - unused |
77 | * 30 - unused | 86 | * 30 - unused |
78 | * 31 - TSS for double fault handler | 87 | * 31 - TSS for double fault handler |
79 | */ | 88 | */ |
80 | #define GDT_ENTRY_TLS_MIN 6 | 89 | #define GDT_ENTRY_TLS_MIN 6 |
81 | #define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1) | 90 | #define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1) |
82 | 91 | ||
92 | #define GDT_ENTRY_KERNEL_CS 12 | ||
93 | #define GDT_ENTRY_KERNEL_DS 13 | ||
83 | #define GDT_ENTRY_DEFAULT_USER_CS 14 | 94 | #define GDT_ENTRY_DEFAULT_USER_CS 14 |
84 | |||
85 | #define GDT_ENTRY_DEFAULT_USER_DS 15 | 95 | #define GDT_ENTRY_DEFAULT_USER_DS 15 |
96 | #define GDT_ENTRY_TSS 16 | ||
97 | #define GDT_ENTRY_LDT 17 | ||
98 | #define GDT_ENTRY_PNPBIOS_CS32 18 | ||
99 | #define GDT_ENTRY_PNPBIOS_CS16 19 | ||
100 | #define GDT_ENTRY_PNPBIOS_DS 20 | ||
101 | #define GDT_ENTRY_PNPBIOS_TS1 21 | ||
102 | #define GDT_ENTRY_PNPBIOS_TS2 22 | ||
103 | #define GDT_ENTRY_APMBIOS_BASE 23 | ||
104 | |||
105 | #define GDT_ENTRY_ESPFIX_SS 26 | ||
106 | #define GDT_ENTRY_PERCPU 27 | ||
107 | #define GDT_ENTRY_STACK_CANARY 28 | ||
108 | |||
109 | #define GDT_ENTRY_DOUBLEFAULT_TSS 31 | ||
86 | 110 | ||
87 | #define GDT_ENTRY_KERNEL_BASE (12) | 111 | /* |
112 | * Number of entries in the GDT table: | ||
113 | */ | ||
114 | #define GDT_ENTRIES 32 | ||
88 | 115 | ||
89 | #define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE+0) | 116 | /* |
117 | * Segment selector values corresponding to the above entries: | ||
118 | */ | ||
90 | 119 | ||
91 | #define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE+1) | 120 | #define __KERNEL_CS (GDT_ENTRY_KERNEL_CS*8) |
121 | #define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8) | ||
122 | #define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8 + 3) | ||
123 | #define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3) | ||
124 | #define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS*8) | ||
92 | 125 | ||
93 | #define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE+4) | 126 | /* segment for calling fn: */ |
94 | #define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE+5) | 127 | #define PNP_CS32 (GDT_ENTRY_PNPBIOS_CS32*8) |
128 | /* code segment for BIOS: */ | ||
129 | #define PNP_CS16 (GDT_ENTRY_PNPBIOS_CS16*8) | ||
95 | 130 | ||
96 | #define GDT_ENTRY_PNPBIOS_BASE (GDT_ENTRY_KERNEL_BASE+6) | 131 | /* "Is this PNP code selector (PNP_CS32 or PNP_CS16)?" */ |
97 | #define GDT_ENTRY_APMBIOS_BASE (GDT_ENTRY_KERNEL_BASE+11) | 132 | #define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == PNP_CS32) |
98 | 133 | ||
99 | #define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE+14) | 134 | /* data segment for BIOS: */ |
100 | #define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS*8) | 135 | #define PNP_DS (GDT_ENTRY_PNPBIOS_DS*8) |
136 | /* transfer data segment: */ | ||
137 | #define PNP_TS1 (GDT_ENTRY_PNPBIOS_TS1*8) | ||
138 | /* another data segment: */ | ||
139 | #define PNP_TS2 (GDT_ENTRY_PNPBIOS_TS2*8) | ||
101 | 140 | ||
102 | #define GDT_ENTRY_PERCPU (GDT_ENTRY_KERNEL_BASE+15) | ||
103 | #ifdef CONFIG_SMP | 141 | #ifdef CONFIG_SMP |
104 | #define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8) | 142 | # define __KERNEL_PERCPU (GDT_ENTRY_PERCPU*8) |
105 | #else | 143 | #else |
106 | #define __KERNEL_PERCPU 0 | 144 | # define __KERNEL_PERCPU 0 |
107 | #endif | 145 | #endif |
108 | 146 | ||
109 | #define GDT_ENTRY_STACK_CANARY (GDT_ENTRY_KERNEL_BASE+16) | ||
110 | #ifdef CONFIG_CC_STACKPROTECTOR | 147 | #ifdef CONFIG_CC_STACKPROTECTOR |
111 | #define __KERNEL_STACK_CANARY (GDT_ENTRY_STACK_CANARY*8) | 148 | # define __KERNEL_STACK_CANARY (GDT_ENTRY_STACK_CANARY*8) |
112 | #else | 149 | #else |
113 | #define __KERNEL_STACK_CANARY 0 | 150 | # define __KERNEL_STACK_CANARY 0 |
114 | #endif | 151 | #endif |
115 | 152 | ||
116 | #define GDT_ENTRY_DOUBLEFAULT_TSS 31 | 153 | #else /* 64-bit: */ |
117 | |||
118 | /* | ||
119 | * The GDT has 32 entries | ||
120 | */ | ||
121 | #define GDT_ENTRIES 32 | ||
122 | 154 | ||
123 | /* The PnP BIOS entries in the GDT */ | 155 | #include <asm/cache.h> |
124 | #define GDT_ENTRY_PNPBIOS_CS32 (GDT_ENTRY_PNPBIOS_BASE + 0) | ||
125 | #define GDT_ENTRY_PNPBIOS_CS16 (GDT_ENTRY_PNPBIOS_BASE + 1) | ||
126 | #define GDT_ENTRY_PNPBIOS_DS (GDT_ENTRY_PNPBIOS_BASE + 2) | ||
127 | #define GDT_ENTRY_PNPBIOS_TS1 (GDT_ENTRY_PNPBIOS_BASE + 3) | ||
128 | #define GDT_ENTRY_PNPBIOS_TS2 (GDT_ENTRY_PNPBIOS_BASE + 4) | ||
129 | |||
130 | /* The PnP BIOS selectors */ | ||
131 | #define PNP_CS32 (GDT_ENTRY_PNPBIOS_CS32 * 8) /* segment for calling fn */ | ||
132 | #define PNP_CS16 (GDT_ENTRY_PNPBIOS_CS16 * 8) /* code segment for BIOS */ | ||
133 | #define PNP_DS (GDT_ENTRY_PNPBIOS_DS * 8) /* data segment for BIOS */ | ||
134 | #define PNP_TS1 (GDT_ENTRY_PNPBIOS_TS1 * 8) /* transfer data segment */ | ||
135 | #define PNP_TS2 (GDT_ENTRY_PNPBIOS_TS2 * 8) /* another data segment */ | ||
136 | 156 | ||
157 | #define GDT_ENTRY_KERNEL32_CS 1 | ||
158 | #define GDT_ENTRY_KERNEL_CS 2 | ||
159 | #define GDT_ENTRY_KERNEL_DS 3 | ||
137 | 160 | ||
138 | /* | 161 | /* |
139 | * Matching rules for certain types of segments. | 162 | * We cannot use the same code segment descriptor for user and kernel mode, |
163 | * not even in long flat mode, because of different DPL. | ||
164 | * | ||
165 | * GDT layout to get 64-bit SYSCALL/SYSRET support right. SYSRET hardcodes | ||
166 | * selectors: | ||
167 | * | ||
168 | * if returning to 32-bit userspace: cs = STAR.SYSRET_CS, | ||
169 | * if returning to 64-bit userspace: cs = STAR.SYSRET_CS+16, | ||
170 | * | ||
171 | * ss = STAR.SYSRET_CS+8 (in either case) | ||
172 | * | ||
173 | * thus USER_DS should be between 32-bit and 64-bit code selectors: | ||
140 | */ | 174 | */ |
175 | #define GDT_ENTRY_DEFAULT_USER32_CS 4 | ||
176 | #define GDT_ENTRY_DEFAULT_USER_DS 5 | ||
177 | #define GDT_ENTRY_DEFAULT_USER_CS 6 | ||
141 | 178 | ||
142 | /* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */ | 179 | /* Needs two entries */ |
143 | #define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8) | 180 | #define GDT_ENTRY_TSS 8 |
144 | 181 | /* Needs two entries */ | |
182 | #define GDT_ENTRY_LDT 10 | ||
145 | 183 | ||
146 | #else | 184 | #define GDT_ENTRY_TLS_MIN 12 |
147 | #include <asm/cache.h> | 185 | #define GDT_ENTRY_TLS_MAX 14 |
148 | |||
149 | #define GDT_ENTRY_KERNEL32_CS 1 | ||
150 | #define GDT_ENTRY_KERNEL_CS 2 | ||
151 | #define GDT_ENTRY_KERNEL_DS 3 | ||
152 | 186 | ||
153 | #define __KERNEL32_CS (GDT_ENTRY_KERNEL32_CS * 8) | 187 | /* Abused to load per CPU data from limit */ |
188 | #define GDT_ENTRY_PER_CPU 15 | ||
154 | 189 | ||
155 | /* | 190 | /* |
156 | * we cannot use the same code segment descriptor for user and kernel | 191 | * Number of entries in the GDT table: |
157 | * -- not even in the long flat mode, because of different DPL /kkeil | ||
158 | * The segment offset needs to contain a RPL. Grr. -AK | ||
159 | * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets) | ||
160 | */ | 192 | */ |
161 | #define GDT_ENTRY_DEFAULT_USER32_CS 4 | 193 | #define GDT_ENTRIES 16 |
162 | #define GDT_ENTRY_DEFAULT_USER_DS 5 | ||
163 | #define GDT_ENTRY_DEFAULT_USER_CS 6 | ||
164 | #define __USER32_CS (GDT_ENTRY_DEFAULT_USER32_CS*8+3) | ||
165 | #define __USER32_DS __USER_DS | ||
166 | |||
167 | #define GDT_ENTRY_TSS 8 /* needs two entries */ | ||
168 | #define GDT_ENTRY_LDT 10 /* needs two entries */ | ||
169 | #define GDT_ENTRY_TLS_MIN 12 | ||
170 | #define GDT_ENTRY_TLS_MAX 14 | ||
171 | |||
172 | #define GDT_ENTRY_PER_CPU 15 /* Abused to load per CPU data from limit */ | ||
173 | #define __PER_CPU_SEG (GDT_ENTRY_PER_CPU * 8 + 3) | ||
174 | 194 | ||
175 | /* TLS indexes for 64bit - hardcoded in arch_prctl */ | 195 | /* |
176 | #define FS_TLS 0 | 196 | * Segment selector values corresponding to the above entries: |
177 | #define GS_TLS 1 | 197 | * |
178 | 198 | * Note, selectors also need to have a correct RPL, | |
179 | #define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3) | 199 | * expressed with the +3 value for user-space selectors: |
180 | #define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3) | 200 | */ |
181 | 201 | #define __KERNEL32_CS (GDT_ENTRY_KERNEL32_CS*8) | |
182 | #define GDT_ENTRIES 16 | 202 | #define __KERNEL_CS (GDT_ENTRY_KERNEL_CS*8) |
203 | #define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8) | ||
204 | #define __USER32_CS (GDT_ENTRY_DEFAULT_USER32_CS*8 + 3) | ||
205 | #define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8 + 3) | ||
206 | #define __USER32_DS __USER_DS | ||
207 | #define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3) | ||
208 | #define __PER_CPU_SEG (GDT_ENTRY_PER_CPU*8 + 3) | ||
209 | |||
210 | /* TLS indexes for 64-bit - hardcoded in arch_prctl(): */ | ||
211 | #define FS_TLS 0 | ||
212 | #define GS_TLS 1 | ||
213 | |||
214 | #define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3) | ||
215 | #define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3) | ||
183 | 216 | ||
184 | #endif | 217 | #endif |
185 | 218 | ||
186 | #define __KERNEL_CS (GDT_ENTRY_KERNEL_CS*8) | ||
187 | #define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8) | ||
188 | #define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8+3) | ||
189 | #define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8+3) | ||
190 | #ifndef CONFIG_PARAVIRT | 219 | #ifndef CONFIG_PARAVIRT |
191 | #define get_kernel_rpl() 0 | 220 | # define get_kernel_rpl() 0 |
192 | #endif | 221 | #endif |
193 | 222 | ||
194 | #define IDT_ENTRIES 256 | 223 | #define IDT_ENTRIES 256 |
195 | #define NUM_EXCEPTION_VECTORS 32 | 224 | #define NUM_EXCEPTION_VECTORS 32 |
196 | /* Bitmask of exception vectors which push an error code on the stack */ | 225 | |
197 | #define EXCEPTION_ERRCODE_MASK 0x00027d00 | 226 | /* Bitmask of exception vectors which push an error code on the stack: */ |
198 | #define GDT_SIZE (GDT_ENTRIES * 8) | 227 | #define EXCEPTION_ERRCODE_MASK 0x00027d00 |
199 | #define GDT_ENTRY_TLS_ENTRIES 3 | 228 | |
200 | #define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8) | 229 | #define GDT_SIZE (GDT_ENTRIES*8) |
230 | #define GDT_ENTRY_TLS_ENTRIES 3 | ||
231 | #define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES* 8) | ||
201 | 232 | ||
202 | #ifdef __KERNEL__ | 233 | #ifdef __KERNEL__ |
203 | #ifndef __ASSEMBLY__ | 234 | #ifndef __ASSEMBLY__ |
235 | |||
204 | extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][2+2+5]; | 236 | extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][2+2+5]; |
205 | #ifdef CONFIG_TRACING | 237 | #ifdef CONFIG_TRACING |
206 | #define trace_early_idt_handlers early_idt_handlers | 238 | # define trace_early_idt_handlers early_idt_handlers |
207 | #endif | 239 | #endif |
208 | 240 | ||
209 | /* | 241 | /* |
@@ -228,37 +260,30 @@ do { \ | |||
228 | } while (0) | 260 | } while (0) |
229 | 261 | ||
230 | /* | 262 | /* |
231 | * Save a segment register away | 263 | * Save a segment register away: |
232 | */ | 264 | */ |
233 | #define savesegment(seg, value) \ | 265 | #define savesegment(seg, value) \ |
234 | asm("mov %%" #seg ",%0":"=r" (value) : : "memory") | 266 | asm("mov %%" #seg ",%0":"=r" (value) : : "memory") |
235 | 267 | ||
236 | /* | 268 | /* |
237 | * x86_32 user gs accessors. | 269 | * x86-32 user GS accessors: |
238 | */ | 270 | */ |
239 | #ifdef CONFIG_X86_32 | 271 | #ifdef CONFIG_X86_32 |
240 | #ifdef CONFIG_X86_32_LAZY_GS | 272 | # ifdef CONFIG_X86_32_LAZY_GS |
241 | #define get_user_gs(regs) (u16)({unsigned long v; savesegment(gs, v); v;}) | 273 | # define get_user_gs(regs) (u16)({ unsigned long v; savesegment(gs, v); v; }) |
242 | #define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v)) | 274 | # define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v)) |
243 | #define task_user_gs(tsk) ((tsk)->thread.gs) | 275 | # define task_user_gs(tsk) ((tsk)->thread.gs) |
244 | #define lazy_save_gs(v) savesegment(gs, (v)) | 276 | # define lazy_save_gs(v) savesegment(gs, (v)) |
245 | #define lazy_load_gs(v) loadsegment(gs, (v)) | 277 | # define lazy_load_gs(v) loadsegment(gs, (v)) |
246 | #else /* X86_32_LAZY_GS */ | 278 | # else /* X86_32_LAZY_GS */ |
247 | #define get_user_gs(regs) (u16)((regs)->gs) | 279 | # define get_user_gs(regs) (u16)((regs)->gs) |
248 | #define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0) | 280 | # define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0) |
249 | #define task_user_gs(tsk) (task_pt_regs(tsk)->gs) | 281 | # define task_user_gs(tsk) (task_pt_regs(tsk)->gs) |
250 | #define lazy_save_gs(v) do { } while (0) | 282 | # define lazy_save_gs(v) do { } while (0) |
251 | #define lazy_load_gs(v) do { } while (0) | 283 | # define lazy_load_gs(v) do { } while (0) |
252 | #endif /* X86_32_LAZY_GS */ | 284 | # endif /* X86_32_LAZY_GS */ |
253 | #endif /* X86_32 */ | 285 | #endif /* X86_32 */ |
254 | 286 | ||
255 | static inline unsigned long get_limit(unsigned long segment) | ||
256 | { | ||
257 | unsigned long __limit; | ||
258 | asm("lsll %1,%0" : "=r" (__limit) : "r" (segment)); | ||
259 | return __limit + 1; | ||
260 | } | ||
261 | |||
262 | #endif /* !__ASSEMBLY__ */ | 287 | #endif /* !__ASSEMBLY__ */ |
263 | #endif /* __KERNEL__ */ | 288 | #endif /* __KERNEL__ */ |
264 | 289 | ||
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index ff4e7b236e21..f69e06b283fb 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h | |||
@@ -66,6 +66,11 @@ static inline void x86_ce4100_early_setup(void) { } | |||
66 | */ | 66 | */ |
67 | extern struct boot_params boot_params; | 67 | extern struct boot_params boot_params; |
68 | 68 | ||
69 | static inline bool kaslr_enabled(void) | ||
70 | { | ||
71 | return !!(boot_params.hdr.loadflags & KASLR_FLAG); | ||
72 | } | ||
73 | |||
69 | /* | 74 | /* |
70 | * Do NOT EVER look at the BIOS memory size location. | 75 | * Do NOT EVER look at the BIOS memory size location. |
71 | * It does not work on many machines. | 76 | * It does not work on many machines. |
diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h index 9dfce4e0417d..6fe6b182c998 100644 --- a/arch/x86/include/asm/sigcontext.h +++ b/arch/x86/include/asm/sigcontext.h | |||
@@ -57,9 +57,9 @@ struct sigcontext { | |||
57 | unsigned long ip; | 57 | unsigned long ip; |
58 | unsigned long flags; | 58 | unsigned long flags; |
59 | unsigned short cs; | 59 | unsigned short cs; |
60 | unsigned short gs; | 60 | unsigned short __pad2; /* Was called gs, but was always zero. */ |
61 | unsigned short fs; | 61 | unsigned short __pad1; /* Was called fs, but was always zero. */ |
62 | unsigned short __pad0; | 62 | unsigned short ss; |
63 | unsigned long err; | 63 | unsigned long err; |
64 | unsigned long trapno; | 64 | unsigned long trapno; |
65 | unsigned long oldmask; | 65 | unsigned long oldmask; |
diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h index 7a958164088c..89db46752a8f 100644 --- a/arch/x86/include/asm/sighandling.h +++ b/arch/x86/include/asm/sighandling.h | |||
@@ -13,9 +13,7 @@ | |||
13 | X86_EFLAGS_CF | X86_EFLAGS_RF) | 13 | X86_EFLAGS_CF | X86_EFLAGS_RF) |
14 | 14 | ||
15 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where); | 15 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where); |
16 | 16 | int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc); | |
17 | int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, | ||
18 | unsigned long *pax); | ||
19 | int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, | 17 | int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, |
20 | struct pt_regs *regs, unsigned long mask); | 18 | struct pt_regs *regs, unsigned long mask); |
21 | 19 | ||
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h index 8d3120f4e270..ba665ebd17bb 100644 --- a/arch/x86/include/asm/smap.h +++ b/arch/x86/include/asm/smap.h | |||
@@ -27,23 +27,11 @@ | |||
27 | 27 | ||
28 | #ifdef CONFIG_X86_SMAP | 28 | #ifdef CONFIG_X86_SMAP |
29 | 29 | ||
30 | #define ASM_CLAC \ | 30 | #define ASM_CLAC \ |
31 | 661: ASM_NOP3 ; \ | 31 | ALTERNATIVE "", __stringify(__ASM_CLAC), X86_FEATURE_SMAP |
32 | .pushsection .altinstr_replacement, "ax" ; \ | 32 | |
33 | 662: __ASM_CLAC ; \ | 33 | #define ASM_STAC \ |
34 | .popsection ; \ | 34 | ALTERNATIVE "", __stringify(__ASM_STAC), X86_FEATURE_SMAP |
35 | .pushsection .altinstructions, "a" ; \ | ||
36 | altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ; \ | ||
37 | .popsection | ||
38 | |||
39 | #define ASM_STAC \ | ||
40 | 661: ASM_NOP3 ; \ | ||
41 | .pushsection .altinstr_replacement, "ax" ; \ | ||
42 | 662: __ASM_STAC ; \ | ||
43 | .popsection ; \ | ||
44 | .pushsection .altinstructions, "a" ; \ | ||
45 | altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ; \ | ||
46 | .popsection | ||
47 | 35 | ||
48 | #else /* CONFIG_X86_SMAP */ | 36 | #else /* CONFIG_X86_SMAP */ |
49 | 37 | ||
@@ -61,20 +49,20 @@ | |||
61 | static __always_inline void clac(void) | 49 | static __always_inline void clac(void) |
62 | { | 50 | { |
63 | /* Note: a barrier is implicit in alternative() */ | 51 | /* Note: a barrier is implicit in alternative() */ |
64 | alternative(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP); | 52 | alternative("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP); |
65 | } | 53 | } |
66 | 54 | ||
67 | static __always_inline void stac(void) | 55 | static __always_inline void stac(void) |
68 | { | 56 | { |
69 | /* Note: a barrier is implicit in alternative() */ | 57 | /* Note: a barrier is implicit in alternative() */ |
70 | alternative(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP); | 58 | alternative("", __stringify(__ASM_STAC), X86_FEATURE_SMAP); |
71 | } | 59 | } |
72 | 60 | ||
73 | /* These macros can be used in asm() statements */ | 61 | /* These macros can be used in asm() statements */ |
74 | #define ASM_CLAC \ | 62 | #define ASM_CLAC \ |
75 | ALTERNATIVE(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP) | 63 | ALTERNATIVE("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP) |
76 | #define ASM_STAC \ | 64 | #define ASM_STAC \ |
77 | ALTERNATIVE(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP) | 65 | ALTERNATIVE("", __stringify(__ASM_STAC), X86_FEATURE_SMAP) |
78 | 66 | ||
79 | #else /* CONFIG_X86_SMAP */ | 67 | #else /* CONFIG_X86_SMAP */ |
80 | 68 | ||
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 8cd1cc3bc835..17a8dced12da 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h | |||
@@ -150,12 +150,13 @@ static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask) | |||
150 | } | 150 | } |
151 | 151 | ||
152 | void cpu_disable_common(void); | 152 | void cpu_disable_common(void); |
153 | void cpu_die_common(unsigned int cpu); | ||
154 | void native_smp_prepare_boot_cpu(void); | 153 | void native_smp_prepare_boot_cpu(void); |
155 | void native_smp_prepare_cpus(unsigned int max_cpus); | 154 | void native_smp_prepare_cpus(unsigned int max_cpus); |
156 | void native_smp_cpus_done(unsigned int max_cpus); | 155 | void native_smp_cpus_done(unsigned int max_cpus); |
156 | void common_cpu_up(unsigned int cpunum, struct task_struct *tidle); | ||
157 | int native_cpu_up(unsigned int cpunum, struct task_struct *tidle); | 157 | int native_cpu_up(unsigned int cpunum, struct task_struct *tidle); |
158 | int native_cpu_disable(void); | 158 | int native_cpu_disable(void); |
159 | int common_cpu_die(unsigned int cpu); | ||
159 | void native_cpu_die(unsigned int cpu); | 160 | void native_cpu_die(unsigned int cpu); |
160 | void native_play_dead(void); | 161 | void native_play_dead(void); |
161 | void play_dead_common(void); | 162 | void play_dead_common(void); |
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 6a4b00fafb00..aeb4666e0c0a 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h | |||
@@ -4,6 +4,8 @@ | |||
4 | 4 | ||
5 | #ifdef __KERNEL__ | 5 | #ifdef __KERNEL__ |
6 | 6 | ||
7 | #include <asm/nops.h> | ||
8 | |||
7 | static inline void native_clts(void) | 9 | static inline void native_clts(void) |
8 | { | 10 | { |
9 | asm volatile("clts"); | 11 | asm volatile("clts"); |
@@ -199,6 +201,28 @@ static inline void clflushopt(volatile void *__p) | |||
199 | "+m" (*(volatile char __force *)__p)); | 201 | "+m" (*(volatile char __force *)__p)); |
200 | } | 202 | } |
201 | 203 | ||
204 | static inline void clwb(volatile void *__p) | ||
205 | { | ||
206 | volatile struct { char x[64]; } *p = __p; | ||
207 | |||
208 | asm volatile(ALTERNATIVE_2( | ||
209 | ".byte " __stringify(NOP_DS_PREFIX) "; clflush (%[pax])", | ||
210 | ".byte 0x66; clflush (%[pax])", /* clflushopt (%%rax) */ | ||
211 | X86_FEATURE_CLFLUSHOPT, | ||
212 | ".byte 0x66, 0x0f, 0xae, 0x30", /* clwb (%%rax) */ | ||
213 | X86_FEATURE_CLWB) | ||
214 | : [p] "+m" (*p) | ||
215 | : [pax] "a" (p)); | ||
216 | } | ||
217 | |||
218 | static inline void pcommit_sfence(void) | ||
219 | { | ||
220 | alternative(ASM_NOP7, | ||
221 | ".byte 0x66, 0x0f, 0xae, 0xf8\n\t" /* pcommit */ | ||
222 | "sfence", | ||
223 | X86_FEATURE_PCOMMIT); | ||
224 | } | ||
225 | |||
202 | #define nop() asm volatile ("nop") | 226 | #define nop() asm volatile ("nop") |
203 | 227 | ||
204 | 228 | ||
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 1d4e4f279a32..b4bdec3e9523 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
@@ -13,19 +13,44 @@ | |||
13 | #include <asm/types.h> | 13 | #include <asm/types.h> |
14 | 14 | ||
15 | /* | 15 | /* |
16 | * TOP_OF_KERNEL_STACK_PADDING is a number of unused bytes that we | ||
17 | * reserve at the top of the kernel stack. We do it because of a nasty | ||
18 | * 32-bit corner case. On x86_32, the hardware stack frame is | ||
19 | * variable-length. Except for vm86 mode, struct pt_regs assumes a | ||
20 | * maximum-length frame. If we enter from CPL 0, the top 8 bytes of | ||
21 | * pt_regs don't actually exist. Ordinarily this doesn't matter, but it | ||
22 | * does in at least one case: | ||
23 | * | ||
24 | * If we take an NMI early enough in SYSENTER, then we can end up with | ||
25 | * pt_regs that extends above sp0. On the way out, in the espfix code, | ||
26 | * we can read the saved SS value, but that value will be above sp0. | ||
27 | * Without this offset, that can result in a page fault. (We are | ||
28 | * careful that, in this case, the value we read doesn't matter.) | ||
29 | * | ||
30 | * In vm86 mode, the hardware frame is much longer still, but we neither | ||
31 | * access the extra members from NMI context, nor do we write such a | ||
32 | * frame at sp0 at all. | ||
33 | * | ||
34 | * x86_64 has a fixed-length stack frame. | ||
35 | */ | ||
36 | #ifdef CONFIG_X86_32 | ||
37 | # define TOP_OF_KERNEL_STACK_PADDING 8 | ||
38 | #else | ||
39 | # define TOP_OF_KERNEL_STACK_PADDING 0 | ||
40 | #endif | ||
41 | |||
42 | /* | ||
16 | * low level task data that entry.S needs immediate access to | 43 | * low level task data that entry.S needs immediate access to |
17 | * - this struct should fit entirely inside of one cache line | 44 | * - this struct should fit entirely inside of one cache line |
18 | * - this struct shares the supervisor stack pages | 45 | * - this struct shares the supervisor stack pages |
19 | */ | 46 | */ |
20 | #ifndef __ASSEMBLY__ | 47 | #ifndef __ASSEMBLY__ |
21 | struct task_struct; | 48 | struct task_struct; |
22 | struct exec_domain; | ||
23 | #include <asm/processor.h> | 49 | #include <asm/processor.h> |
24 | #include <linux/atomic.h> | 50 | #include <linux/atomic.h> |
25 | 51 | ||
26 | struct thread_info { | 52 | struct thread_info { |
27 | struct task_struct *task; /* main task structure */ | 53 | struct task_struct *task; /* main task structure */ |
28 | struct exec_domain *exec_domain; /* execution domain */ | ||
29 | __u32 flags; /* low level flags */ | 54 | __u32 flags; /* low level flags */ |
30 | __u32 status; /* thread synchronous flags */ | 55 | __u32 status; /* thread synchronous flags */ |
31 | __u32 cpu; /* current CPU */ | 56 | __u32 cpu; /* current CPU */ |
@@ -39,7 +64,6 @@ struct thread_info { | |||
39 | #define INIT_THREAD_INFO(tsk) \ | 64 | #define INIT_THREAD_INFO(tsk) \ |
40 | { \ | 65 | { \ |
41 | .task = &tsk, \ | 66 | .task = &tsk, \ |
42 | .exec_domain = &default_exec_domain, \ | ||
43 | .flags = 0, \ | 67 | .flags = 0, \ |
44 | .cpu = 0, \ | 68 | .cpu = 0, \ |
45 | .saved_preempt_count = INIT_PREEMPT_COUNT, \ | 69 | .saved_preempt_count = INIT_PREEMPT_COUNT, \ |
@@ -145,7 +169,6 @@ struct thread_info { | |||
145 | #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) | 169 | #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) |
146 | 170 | ||
147 | #define STACK_WARN (THREAD_SIZE/8) | 171 | #define STACK_WARN (THREAD_SIZE/8) |
148 | #define KERNEL_STACK_OFFSET (5*(BITS_PER_LONG/8)) | ||
149 | 172 | ||
150 | /* | 173 | /* |
151 | * macros/functions for gaining access to the thread information structure | 174 | * macros/functions for gaining access to the thread information structure |
@@ -158,10 +181,7 @@ DECLARE_PER_CPU(unsigned long, kernel_stack); | |||
158 | 181 | ||
159 | static inline struct thread_info *current_thread_info(void) | 182 | static inline struct thread_info *current_thread_info(void) |
160 | { | 183 | { |
161 | struct thread_info *ti; | 184 | return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE); |
162 | ti = (void *)(this_cpu_read_stable(kernel_stack) + | ||
163 | KERNEL_STACK_OFFSET - THREAD_SIZE); | ||
164 | return ti; | ||
165 | } | 185 | } |
166 | 186 | ||
167 | static inline unsigned long current_stack_pointer(void) | 187 | static inline unsigned long current_stack_pointer(void) |
@@ -177,16 +197,37 @@ static inline unsigned long current_stack_pointer(void) | |||
177 | 197 | ||
178 | #else /* !__ASSEMBLY__ */ | 198 | #else /* !__ASSEMBLY__ */ |
179 | 199 | ||
180 | /* how to get the thread information struct from ASM */ | 200 | /* Load thread_info address into "reg" */ |
181 | #define GET_THREAD_INFO(reg) \ | 201 | #define GET_THREAD_INFO(reg) \ |
182 | _ASM_MOV PER_CPU_VAR(kernel_stack),reg ; \ | 202 | _ASM_MOV PER_CPU_VAR(kernel_stack),reg ; \ |
183 | _ASM_SUB $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg ; | 203 | _ASM_SUB $(THREAD_SIZE),reg ; |
184 | 204 | ||
185 | /* | 205 | /* |
186 | * Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in | 206 | * ASM operand which evaluates to a 'thread_info' address of |
187 | * a certain register (to be used in assembler memory operands). | 207 | * the current task, if it is known that "reg" is exactly "off" |
208 | * bytes below the top of the stack currently. | ||
209 | * | ||
210 | * ( The kernel stack's size is known at build time, it is usually | ||
211 | * 2 or 4 pages, and the bottom of the kernel stack contains | ||
212 | * the thread_info structure. So to access the thread_info very | ||
213 | * quickly from assembly code we can calculate down from the | ||
214 | * top of the kernel stack to the bottom, using constant, | ||
215 | * build-time calculations only. ) | ||
216 | * | ||
217 | * For example, to fetch the current thread_info->flags value into %eax | ||
218 | * on x86-64 defconfig kernels, in syscall entry code where RSP is | ||
219 | * currently at exactly SIZEOF_PTREGS bytes away from the top of the | ||
220 | * stack: | ||
221 | * | ||
222 | * mov ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS), %eax | ||
223 | * | ||
224 | * will translate to: | ||
225 | * | ||
226 | * 8b 84 24 b8 c0 ff ff mov -0x3f48(%rsp), %eax | ||
227 | * | ||
228 | * which is below the current RSP by almost 16K. | ||
188 | */ | 229 | */ |
189 | #define THREAD_INFO(reg, off) KERNEL_STACK_OFFSET+(off)-THREAD_SIZE(reg) | 230 | #define ASM_THREAD_INFO(field, reg, off) ((field)+(off)-THREAD_SIZE)(reg) |
190 | 231 | ||
191 | #endif | 232 | #endif |
192 | 233 | ||
@@ -236,6 +277,16 @@ static inline bool is_ia32_task(void) | |||
236 | #endif | 277 | #endif |
237 | return false; | 278 | return false; |
238 | } | 279 | } |
280 | |||
281 | /* | ||
282 | * Force syscall return via IRET by making it look as if there was | ||
283 | * some work pending. IRET is our most capable (but slowest) syscall | ||
284 | * return path, which is able to restore modified SS, CS and certain | ||
285 | * EFLAGS values that other (fast) syscall return instructions | ||
286 | * are not able to restore properly. | ||
287 | */ | ||
288 | #define force_iret() set_thread_flag(TIF_NOTIFY_RESUME) | ||
289 | |||
239 | #endif /* !__ASSEMBLY__ */ | 290 | #endif /* !__ASSEMBLY__ */ |
240 | 291 | ||
241 | #ifndef __ASSEMBLY__ | 292 | #ifndef __ASSEMBLY__ |
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 12a26b979bf1..f2f9b39b274a 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h | |||
@@ -231,6 +231,6 @@ __copy_from_user_inatomic_nocache(void *dst, const void __user *src, | |||
231 | } | 231 | } |
232 | 232 | ||
233 | unsigned long | 233 | unsigned long |
234 | copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest); | 234 | copy_user_handle_tail(char *to, char *from, unsigned len); |
235 | 235 | ||
236 | #endif /* _ASM_X86_UACCESS_64_H */ | 236 | #endif /* _ASM_X86_UACCESS_64_H */ |
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index 225b0988043a..ab456dc233b5 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h | |||
@@ -15,6 +15,7 @@ | |||
15 | 15 | ||
16 | /* loadflags */ | 16 | /* loadflags */ |
17 | #define LOADED_HIGH (1<<0) | 17 | #define LOADED_HIGH (1<<0) |
18 | #define KASLR_FLAG (1<<1) | ||
18 | #define QUIET_FLAG (1<<5) | 19 | #define QUIET_FLAG (1<<5) |
19 | #define KEEP_SEGMENTS (1<<6) | 20 | #define KEEP_SEGMENTS (1<<6) |
20 | #define CAN_USE_HEAP (1<<7) | 21 | #define CAN_USE_HEAP (1<<7) |
diff --git a/arch/x86/include/uapi/asm/ptrace-abi.h b/arch/x86/include/uapi/asm/ptrace-abi.h index 7b0a55a88851..580aee3072e0 100644 --- a/arch/x86/include/uapi/asm/ptrace-abi.h +++ b/arch/x86/include/uapi/asm/ptrace-abi.h | |||
@@ -25,13 +25,17 @@ | |||
25 | #else /* __i386__ */ | 25 | #else /* __i386__ */ |
26 | 26 | ||
27 | #if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS) | 27 | #if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS) |
28 | /* | ||
29 | * C ABI says these regs are callee-preserved. They aren't saved on kernel entry | ||
30 | * unless syscall needs a complete, fully filled "struct pt_regs". | ||
31 | */ | ||
28 | #define R15 0 | 32 | #define R15 0 |
29 | #define R14 8 | 33 | #define R14 8 |
30 | #define R13 16 | 34 | #define R13 16 |
31 | #define R12 24 | 35 | #define R12 24 |
32 | #define RBP 32 | 36 | #define RBP 32 |
33 | #define RBX 40 | 37 | #define RBX 40 |
34 | /* arguments: interrupts/non tracing syscalls only save up to here*/ | 38 | /* These regs are callee-clobbered. Always saved on kernel entry. */ |
35 | #define R11 48 | 39 | #define R11 48 |
36 | #define R10 56 | 40 | #define R10 56 |
37 | #define R9 64 | 41 | #define R9 64 |
@@ -41,15 +45,17 @@ | |||
41 | #define RDX 96 | 45 | #define RDX 96 |
42 | #define RSI 104 | 46 | #define RSI 104 |
43 | #define RDI 112 | 47 | #define RDI 112 |
44 | #define ORIG_RAX 120 /* = ERROR */ | 48 | /* |
45 | /* end of arguments */ | 49 | * On syscall entry, this is syscall#. On CPU exception, this is error code. |
46 | /* cpu exception frame or undefined in case of fast syscall. */ | 50 | * On hw interrupt, it's IRQ number: |
51 | */ | ||
52 | #define ORIG_RAX 120 | ||
53 | /* Return frame for iretq */ | ||
47 | #define RIP 128 | 54 | #define RIP 128 |
48 | #define CS 136 | 55 | #define CS 136 |
49 | #define EFLAGS 144 | 56 | #define EFLAGS 144 |
50 | #define RSP 152 | 57 | #define RSP 152 |
51 | #define SS 160 | 58 | #define SS 160 |
52 | #define ARGOFFSET R11 | ||
53 | #endif /* __ASSEMBLY__ */ | 59 | #endif /* __ASSEMBLY__ */ |
54 | 60 | ||
55 | /* top of stack page */ | 61 | /* top of stack page */ |
diff --git a/arch/x86/include/uapi/asm/ptrace.h b/arch/x86/include/uapi/asm/ptrace.h index ac4b9aa4d999..bc16115af39b 100644 --- a/arch/x86/include/uapi/asm/ptrace.h +++ b/arch/x86/include/uapi/asm/ptrace.h | |||
@@ -41,13 +41,17 @@ struct pt_regs { | |||
41 | #ifndef __KERNEL__ | 41 | #ifndef __KERNEL__ |
42 | 42 | ||
43 | struct pt_regs { | 43 | struct pt_regs { |
44 | /* | ||
45 | * C ABI says these regs are callee-preserved. They aren't saved on kernel entry | ||
46 | * unless syscall needs a complete, fully filled "struct pt_regs". | ||
47 | */ | ||
44 | unsigned long r15; | 48 | unsigned long r15; |
45 | unsigned long r14; | 49 | unsigned long r14; |
46 | unsigned long r13; | 50 | unsigned long r13; |
47 | unsigned long r12; | 51 | unsigned long r12; |
48 | unsigned long rbp; | 52 | unsigned long rbp; |
49 | unsigned long rbx; | 53 | unsigned long rbx; |
50 | /* arguments: non interrupts/non tracing syscalls only save up to here*/ | 54 | /* These regs are callee-clobbered. Always saved on kernel entry. */ |
51 | unsigned long r11; | 55 | unsigned long r11; |
52 | unsigned long r10; | 56 | unsigned long r10; |
53 | unsigned long r9; | 57 | unsigned long r9; |
@@ -57,9 +61,12 @@ struct pt_regs { | |||
57 | unsigned long rdx; | 61 | unsigned long rdx; |
58 | unsigned long rsi; | 62 | unsigned long rsi; |
59 | unsigned long rdi; | 63 | unsigned long rdi; |
64 | /* | ||
65 | * On syscall entry, this is syscall#. On CPU exception, this is error code. | ||
66 | * On hw interrupt, it's IRQ number: | ||
67 | */ | ||
60 | unsigned long orig_rax; | 68 | unsigned long orig_rax; |
61 | /* end of arguments */ | 69 | /* Return frame for iretq */ |
62 | /* cpu exception frame or undefined */ | ||
63 | unsigned long rip; | 70 | unsigned long rip; |
64 | unsigned long cs; | 71 | unsigned long cs; |
65 | unsigned long eflags; | 72 | unsigned long eflags; |
diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h index d8b9f9081e86..16dc4e8a2cd3 100644 --- a/arch/x86/include/uapi/asm/sigcontext.h +++ b/arch/x86/include/uapi/asm/sigcontext.h | |||
@@ -177,9 +177,24 @@ struct sigcontext { | |||
177 | __u64 rip; | 177 | __u64 rip; |
178 | __u64 eflags; /* RFLAGS */ | 178 | __u64 eflags; /* RFLAGS */ |
179 | __u16 cs; | 179 | __u16 cs; |
180 | __u16 gs; | 180 | |
181 | __u16 fs; | 181 | /* |
182 | __u16 __pad0; | 182 | * Prior to 2.5.64 ("[PATCH] x86-64 updates for 2.5.64-bk3"), |
183 | * Linux saved and restored fs and gs in these slots. This | ||
184 | * was counterproductive, as fsbase and gsbase were never | ||
185 | * saved, so arch_prctl was presumably unreliable. | ||
186 | * | ||
187 | * If these slots are ever needed for any other purpose, there | ||
188 | * is some risk that very old 64-bit binaries could get | ||
189 | * confused. I doubt that many such binaries still work, | ||
190 | * though, since the same patch in 2.5.64 also removed the | ||
191 | * 64-bit set_thread_area syscall, so it appears that there is | ||
192 | * no TLS API that works in both pre- and post-2.5.64 kernels. | ||
193 | */ | ||
194 | __u16 __pad2; /* Was gs. */ | ||
195 | __u16 __pad1; /* Was fs. */ | ||
196 | |||
197 | __u16 ss; | ||
183 | __u64 err; | 198 | __u64 err; |
184 | __u64 trapno; | 199 | __u64 trapno; |
185 | __u64 oldmask; | 200 | __u64 oldmask; |
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index c5f1a1deb91a..1fe92181ee9e 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h | |||
@@ -67,6 +67,7 @@ | |||
67 | #define EXIT_REASON_EPT_VIOLATION 48 | 67 | #define EXIT_REASON_EPT_VIOLATION 48 |
68 | #define EXIT_REASON_EPT_MISCONFIG 49 | 68 | #define EXIT_REASON_EPT_MISCONFIG 49 |
69 | #define EXIT_REASON_INVEPT 50 | 69 | #define EXIT_REASON_INVEPT 50 |
70 | #define EXIT_REASON_RDTSCP 51 | ||
70 | #define EXIT_REASON_PREEMPTION_TIMER 52 | 71 | #define EXIT_REASON_PREEMPTION_TIMER 52 |
71 | #define EXIT_REASON_INVVPID 53 | 72 | #define EXIT_REASON_INVVPID 53 |
72 | #define EXIT_REASON_WBINVD 54 | 73 | #define EXIT_REASON_WBINVD 54 |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index cdb1b70ddad0..c887cd944f0c 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -32,6 +32,7 @@ obj-$(CONFIG_X86_32) += i386_ksyms_32.o | |||
32 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o | 32 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o |
33 | obj-$(CONFIG_X86_64) += mcount_64.o | 33 | obj-$(CONFIG_X86_64) += mcount_64.o |
34 | obj-y += syscall_$(BITS).o vsyscall_gtod.o | 34 | obj-y += syscall_$(BITS).o vsyscall_gtod.o |
35 | obj-$(CONFIG_IA32_EMULATION) += syscall_32.o | ||
35 | obj-$(CONFIG_X86_VSYSCALL_EMULATION) += vsyscall_64.o vsyscall_emu_64.o | 36 | obj-$(CONFIG_X86_VSYSCALL_EMULATION) += vsyscall_64.o vsyscall_emu_64.o |
36 | obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o | 37 | obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o |
37 | obj-$(CONFIG_SYSFS) += ksysfs.o | 38 | obj-$(CONFIG_SYSFS) += ksysfs.o |
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 703130f469ec..aef653193160 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
@@ -52,10 +52,25 @@ static int __init setup_noreplace_paravirt(char *str) | |||
52 | __setup("noreplace-paravirt", setup_noreplace_paravirt); | 52 | __setup("noreplace-paravirt", setup_noreplace_paravirt); |
53 | #endif | 53 | #endif |
54 | 54 | ||
55 | #define DPRINTK(fmt, ...) \ | 55 | #define DPRINTK(fmt, args...) \ |
56 | do { \ | 56 | do { \ |
57 | if (debug_alternative) \ | 57 | if (debug_alternative) \ |
58 | printk(KERN_DEBUG fmt, ##__VA_ARGS__); \ | 58 | printk(KERN_DEBUG "%s: " fmt "\n", __func__, ##args); \ |
59 | } while (0) | ||
60 | |||
61 | #define DUMP_BYTES(buf, len, fmt, args...) \ | ||
62 | do { \ | ||
63 | if (unlikely(debug_alternative)) { \ | ||
64 | int j; \ | ||
65 | \ | ||
66 | if (!(len)) \ | ||
67 | break; \ | ||
68 | \ | ||
69 | printk(KERN_DEBUG fmt, ##args); \ | ||
70 | for (j = 0; j < (len) - 1; j++) \ | ||
71 | printk(KERN_CONT "%02hhx ", buf[j]); \ | ||
72 | printk(KERN_CONT "%02hhx\n", buf[j]); \ | ||
73 | } \ | ||
59 | } while (0) | 74 | } while (0) |
60 | 75 | ||
61 | /* | 76 | /* |
@@ -243,12 +258,89 @@ extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; | |||
243 | extern s32 __smp_locks[], __smp_locks_end[]; | 258 | extern s32 __smp_locks[], __smp_locks_end[]; |
244 | void *text_poke_early(void *addr, const void *opcode, size_t len); | 259 | void *text_poke_early(void *addr, const void *opcode, size_t len); |
245 | 260 | ||
246 | /* Replace instructions with better alternatives for this CPU type. | 261 | /* |
247 | This runs before SMP is initialized to avoid SMP problems with | 262 | * Are we looking at a near JMP with a 1 or 4-byte displacement. |
248 | self modifying code. This implies that asymmetric systems where | 263 | */ |
249 | APs have less capabilities than the boot processor are not handled. | 264 | static inline bool is_jmp(const u8 opcode) |
250 | Tough. Make sure you disable such features by hand. */ | 265 | { |
266 | return opcode == 0xeb || opcode == 0xe9; | ||
267 | } | ||
268 | |||
269 | static void __init_or_module | ||
270 | recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf) | ||
271 | { | ||
272 | u8 *next_rip, *tgt_rip; | ||
273 | s32 n_dspl, o_dspl; | ||
274 | int repl_len; | ||
275 | |||
276 | if (a->replacementlen != 5) | ||
277 | return; | ||
278 | |||
279 | o_dspl = *(s32 *)(insnbuf + 1); | ||
280 | |||
281 | /* next_rip of the replacement JMP */ | ||
282 | next_rip = repl_insn + a->replacementlen; | ||
283 | /* target rip of the replacement JMP */ | ||
284 | tgt_rip = next_rip + o_dspl; | ||
285 | n_dspl = tgt_rip - orig_insn; | ||
286 | |||
287 | DPRINTK("target RIP: %p, new_displ: 0x%x", tgt_rip, n_dspl); | ||
288 | |||
289 | if (tgt_rip - orig_insn >= 0) { | ||
290 | if (n_dspl - 2 <= 127) | ||
291 | goto two_byte_jmp; | ||
292 | else | ||
293 | goto five_byte_jmp; | ||
294 | /* negative offset */ | ||
295 | } else { | ||
296 | if (((n_dspl - 2) & 0xff) == (n_dspl - 2)) | ||
297 | goto two_byte_jmp; | ||
298 | else | ||
299 | goto five_byte_jmp; | ||
300 | } | ||
301 | |||
302 | two_byte_jmp: | ||
303 | n_dspl -= 2; | ||
304 | |||
305 | insnbuf[0] = 0xeb; | ||
306 | insnbuf[1] = (s8)n_dspl; | ||
307 | add_nops(insnbuf + 2, 3); | ||
308 | |||
309 | repl_len = 2; | ||
310 | goto done; | ||
311 | |||
312 | five_byte_jmp: | ||
313 | n_dspl -= 5; | ||
314 | |||
315 | insnbuf[0] = 0xe9; | ||
316 | *(s32 *)&insnbuf[1] = n_dspl; | ||
251 | 317 | ||
318 | repl_len = 5; | ||
319 | |||
320 | done: | ||
321 | |||
322 | DPRINTK("final displ: 0x%08x, JMP 0x%lx", | ||
323 | n_dspl, (unsigned long)orig_insn + n_dspl + repl_len); | ||
324 | } | ||
325 | |||
326 | static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr) | ||
327 | { | ||
328 | if (instr[0] != 0x90) | ||
329 | return; | ||
330 | |||
331 | add_nops(instr + (a->instrlen - a->padlen), a->padlen); | ||
332 | |||
333 | DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ", | ||
334 | instr, a->instrlen - a->padlen, a->padlen); | ||
335 | } | ||
336 | |||
337 | /* | ||
338 | * Replace instructions with better alternatives for this CPU type. This runs | ||
339 | * before SMP is initialized to avoid SMP problems with self modifying code. | ||
340 | * This implies that asymmetric systems where APs have less capabilities than | ||
341 | * the boot processor are not handled. Tough. Make sure you disable such | ||
342 | * features by hand. | ||
343 | */ | ||
252 | void __init_or_module apply_alternatives(struct alt_instr *start, | 344 | void __init_or_module apply_alternatives(struct alt_instr *start, |
253 | struct alt_instr *end) | 345 | struct alt_instr *end) |
254 | { | 346 | { |
@@ -256,10 +348,10 @@ void __init_or_module apply_alternatives(struct alt_instr *start, | |||
256 | u8 *instr, *replacement; | 348 | u8 *instr, *replacement; |
257 | u8 insnbuf[MAX_PATCH_LEN]; | 349 | u8 insnbuf[MAX_PATCH_LEN]; |
258 | 350 | ||
259 | DPRINTK("%s: alt table %p -> %p\n", __func__, start, end); | 351 | DPRINTK("alt table %p -> %p", start, end); |
260 | /* | 352 | /* |
261 | * The scan order should be from start to end. A later scanned | 353 | * The scan order should be from start to end. A later scanned |
262 | * alternative code can overwrite a previous scanned alternative code. | 354 | * alternative code can overwrite previously scanned alternative code. |
263 | * Some kernel functions (e.g. memcpy, memset, etc) use this order to | 355 | * Some kernel functions (e.g. memcpy, memset, etc) use this order to |
264 | * patch code. | 356 | * patch code. |
265 | * | 357 | * |
@@ -267,29 +359,54 @@ void __init_or_module apply_alternatives(struct alt_instr *start, | |||
267 | * order. | 359 | * order. |
268 | */ | 360 | */ |
269 | for (a = start; a < end; a++) { | 361 | for (a = start; a < end; a++) { |
362 | int insnbuf_sz = 0; | ||
363 | |||
270 | instr = (u8 *)&a->instr_offset + a->instr_offset; | 364 | instr = (u8 *)&a->instr_offset + a->instr_offset; |
271 | replacement = (u8 *)&a->repl_offset + a->repl_offset; | 365 | replacement = (u8 *)&a->repl_offset + a->repl_offset; |
272 | BUG_ON(a->replacementlen > a->instrlen); | ||
273 | BUG_ON(a->instrlen > sizeof(insnbuf)); | 366 | BUG_ON(a->instrlen > sizeof(insnbuf)); |
274 | BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32); | 367 | BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32); |
275 | if (!boot_cpu_has(a->cpuid)) | 368 | if (!boot_cpu_has(a->cpuid)) { |
369 | if (a->padlen > 1) | ||
370 | optimize_nops(a, instr); | ||
371 | |||
276 | continue; | 372 | continue; |
373 | } | ||
374 | |||
375 | DPRINTK("feat: %d*32+%d, old: (%p, len: %d), repl: (%p, len: %d), pad: %d", | ||
376 | a->cpuid >> 5, | ||
377 | a->cpuid & 0x1f, | ||
378 | instr, a->instrlen, | ||
379 | replacement, a->replacementlen, a->padlen); | ||
380 | |||
381 | DUMP_BYTES(instr, a->instrlen, "%p: old_insn: ", instr); | ||
382 | DUMP_BYTES(replacement, a->replacementlen, "%p: rpl_insn: ", replacement); | ||
277 | 383 | ||
278 | memcpy(insnbuf, replacement, a->replacementlen); | 384 | memcpy(insnbuf, replacement, a->replacementlen); |
385 | insnbuf_sz = a->replacementlen; | ||
279 | 386 | ||
280 | /* 0xe8 is a relative jump; fix the offset. */ | 387 | /* 0xe8 is a relative jump; fix the offset. */ |
281 | if (*insnbuf == 0xe8 && a->replacementlen == 5) | 388 | if (*insnbuf == 0xe8 && a->replacementlen == 5) { |
282 | *(s32 *)(insnbuf + 1) += replacement - instr; | 389 | *(s32 *)(insnbuf + 1) += replacement - instr; |
390 | DPRINTK("Fix CALL offset: 0x%x, CALL 0x%lx", | ||
391 | *(s32 *)(insnbuf + 1), | ||
392 | (unsigned long)instr + *(s32 *)(insnbuf + 1) + 5); | ||
393 | } | ||
394 | |||
395 | if (a->replacementlen && is_jmp(replacement[0])) | ||
396 | recompute_jump(a, instr, replacement, insnbuf); | ||
283 | 397 | ||
284 | add_nops(insnbuf + a->replacementlen, | 398 | if (a->instrlen > a->replacementlen) { |
285 | a->instrlen - a->replacementlen); | 399 | add_nops(insnbuf + a->replacementlen, |
400 | a->instrlen - a->replacementlen); | ||
401 | insnbuf_sz += a->instrlen - a->replacementlen; | ||
402 | } | ||
403 | DUMP_BYTES(insnbuf, insnbuf_sz, "%p: final_insn: ", instr); | ||
286 | 404 | ||
287 | text_poke_early(instr, insnbuf, a->instrlen); | 405 | text_poke_early(instr, insnbuf, insnbuf_sz); |
288 | } | 406 | } |
289 | } | 407 | } |
290 | 408 | ||
291 | #ifdef CONFIG_SMP | 409 | #ifdef CONFIG_SMP |
292 | |||
293 | static void alternatives_smp_lock(const s32 *start, const s32 *end, | 410 | static void alternatives_smp_lock(const s32 *start, const s32 *end, |
294 | u8 *text, u8 *text_end) | 411 | u8 *text, u8 *text_end) |
295 | { | 412 | { |
@@ -371,8 +488,8 @@ void __init_or_module alternatives_smp_module_add(struct module *mod, | |||
371 | smp->locks_end = locks_end; | 488 | smp->locks_end = locks_end; |
372 | smp->text = text; | 489 | smp->text = text; |
373 | smp->text_end = text_end; | 490 | smp->text_end = text_end; |
374 | DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n", | 491 | DPRINTK("locks %p -> %p, text %p -> %p, name %s\n", |
375 | __func__, smp->locks, smp->locks_end, | 492 | smp->locks, smp->locks_end, |
376 | smp->text, smp->text_end, smp->name); | 493 | smp->text, smp->text_end, smp->name); |
377 | 494 | ||
378 | list_add_tail(&smp->next, &smp_alt_modules); | 495 | list_add_tail(&smp->next, &smp_alt_modules); |
@@ -440,7 +557,7 @@ int alternatives_text_reserved(void *start, void *end) | |||
440 | 557 | ||
441 | return 0; | 558 | return 0; |
442 | } | 559 | } |
443 | #endif | 560 | #endif /* CONFIG_SMP */ |
444 | 561 | ||
445 | #ifdef CONFIG_PARAVIRT | 562 | #ifdef CONFIG_PARAVIRT |
446 | void __init_or_module apply_paravirt(struct paravirt_patch_site *start, | 563 | void __init_or_module apply_paravirt(struct paravirt_patch_site *start, |
@@ -601,7 +718,7 @@ int poke_int3_handler(struct pt_regs *regs) | |||
601 | if (likely(!bp_patching_in_progress)) | 718 | if (likely(!bp_patching_in_progress)) |
602 | return 0; | 719 | return 0; |
603 | 720 | ||
604 | if (user_mode_vm(regs) || regs->ip != (unsigned long)bp_int3_addr) | 721 | if (user_mode(regs) || regs->ip != (unsigned long)bp_int3_addr) |
605 | return 0; | 722 | return 0; |
606 | 723 | ||
607 | /* set up the specified breakpoint handler */ | 724 | /* set up the specified breakpoint handler */ |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index ad3639ae1b9b..dcb52850a28f 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -1084,67 +1084,6 @@ void lapic_shutdown(void) | |||
1084 | local_irq_restore(flags); | 1084 | local_irq_restore(flags); |
1085 | } | 1085 | } |
1086 | 1086 | ||
1087 | /* | ||
1088 | * This is to verify that we're looking at a real local APIC. | ||
1089 | * Check these against your board if the CPUs aren't getting | ||
1090 | * started for no apparent reason. | ||
1091 | */ | ||
1092 | int __init verify_local_APIC(void) | ||
1093 | { | ||
1094 | unsigned int reg0, reg1; | ||
1095 | |||
1096 | /* | ||
1097 | * The version register is read-only in a real APIC. | ||
1098 | */ | ||
1099 | reg0 = apic_read(APIC_LVR); | ||
1100 | apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0); | ||
1101 | apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK); | ||
1102 | reg1 = apic_read(APIC_LVR); | ||
1103 | apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1); | ||
1104 | |||
1105 | /* | ||
1106 | * The two version reads above should print the same | ||
1107 | * numbers. If the second one is different, then we | ||
1108 | * poke at a non-APIC. | ||
1109 | */ | ||
1110 | if (reg1 != reg0) | ||
1111 | return 0; | ||
1112 | |||
1113 | /* | ||
1114 | * Check if the version looks reasonably. | ||
1115 | */ | ||
1116 | reg1 = GET_APIC_VERSION(reg0); | ||
1117 | if (reg1 == 0x00 || reg1 == 0xff) | ||
1118 | return 0; | ||
1119 | reg1 = lapic_get_maxlvt(); | ||
1120 | if (reg1 < 0x02 || reg1 == 0xff) | ||
1121 | return 0; | ||
1122 | |||
1123 | /* | ||
1124 | * The ID register is read/write in a real APIC. | ||
1125 | */ | ||
1126 | reg0 = apic_read(APIC_ID); | ||
1127 | apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); | ||
1128 | apic_write(APIC_ID, reg0 ^ apic->apic_id_mask); | ||
1129 | reg1 = apic_read(APIC_ID); | ||
1130 | apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); | ||
1131 | apic_write(APIC_ID, reg0); | ||
1132 | if (reg1 != (reg0 ^ apic->apic_id_mask)) | ||
1133 | return 0; | ||
1134 | |||
1135 | /* | ||
1136 | * The next two are just to see if we have sane values. | ||
1137 | * They're only really relevant if we're in Virtual Wire | ||
1138 | * compatibility mode, but most boxes are anymore. | ||
1139 | */ | ||
1140 | reg0 = apic_read(APIC_LVT0); | ||
1141 | apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0); | ||
1142 | reg1 = apic_read(APIC_LVT1); | ||
1143 | apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1); | ||
1144 | |||
1145 | return 1; | ||
1146 | } | ||
1147 | |||
1148 | /** | 1087 | /** |
1149 | * sync_Arb_IDs - synchronize APIC bus arbitration IDs | 1088 | * sync_Arb_IDs - synchronize APIC bus arbitration IDs |
1150 | */ | 1089 | */ |
@@ -2283,7 +2222,6 @@ int __init APIC_init_uniprocessor(void) | |||
2283 | disable_ioapic_support(); | 2222 | disable_ioapic_support(); |
2284 | 2223 | ||
2285 | default_setup_apic_routing(); | 2224 | default_setup_apic_routing(); |
2286 | verify_local_APIC(); | ||
2287 | apic_bsp_setup(true); | 2225 | apic_bsp_setup(true); |
2288 | return 0; | 2226 | return 0; |
2289 | } | 2227 | } |
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index e658f21681c8..d9d0bd2faaf4 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c | |||
@@ -135,12 +135,12 @@ static void init_x2apic_ldr(void) | |||
135 | 135 | ||
136 | per_cpu(x86_cpu_to_logical_apicid, this_cpu) = apic_read(APIC_LDR); | 136 | per_cpu(x86_cpu_to_logical_apicid, this_cpu) = apic_read(APIC_LDR); |
137 | 137 | ||
138 | __cpu_set(this_cpu, per_cpu(cpus_in_cluster, this_cpu)); | 138 | cpumask_set_cpu(this_cpu, per_cpu(cpus_in_cluster, this_cpu)); |
139 | for_each_online_cpu(cpu) { | 139 | for_each_online_cpu(cpu) { |
140 | if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu)) | 140 | if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu)) |
141 | continue; | 141 | continue; |
142 | __cpu_set(this_cpu, per_cpu(cpus_in_cluster, cpu)); | 142 | cpumask_set_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu)); |
143 | __cpu_set(cpu, per_cpu(cpus_in_cluster, this_cpu)); | 143 | cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu)); |
144 | } | 144 | } |
145 | } | 145 | } |
146 | 146 | ||
@@ -195,7 +195,7 @@ static int x2apic_init_cpu_notifier(void) | |||
195 | 195 | ||
196 | BUG_ON(!per_cpu(cpus_in_cluster, cpu) || !per_cpu(ipi_mask, cpu)); | 196 | BUG_ON(!per_cpu(cpus_in_cluster, cpu) || !per_cpu(ipi_mask, cpu)); |
197 | 197 | ||
198 | __cpu_set(cpu, per_cpu(cpus_in_cluster, cpu)); | 198 | cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu)); |
199 | register_hotcpu_notifier(&x2apic_cpu_notifier); | 199 | register_hotcpu_notifier(&x2apic_cpu_notifier); |
200 | return 1; | 200 | return 1; |
201 | } | 201 | } |
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 8e9dcfd630e4..c8d92950bc04 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -144,33 +144,60 @@ static void __init uv_set_apicid_hibit(void) | |||
144 | 144 | ||
145 | static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | 145 | static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) |
146 | { | 146 | { |
147 | int pnodeid, is_uv1, is_uv2, is_uv3; | 147 | int pnodeid; |
148 | 148 | int uv_apic; | |
149 | is_uv1 = !strcmp(oem_id, "SGI"); | 149 | |
150 | is_uv2 = !strcmp(oem_id, "SGI2"); | 150 | if (strncmp(oem_id, "SGI", 3) != 0) |
151 | is_uv3 = !strncmp(oem_id, "SGI3", 4); /* there are varieties of UV3 */ | 151 | return 0; |
152 | if (is_uv1 || is_uv2 || is_uv3) { | 152 | |
153 | uv_hub_info->hub_revision = | 153 | /* |
154 | (is_uv1 ? UV1_HUB_REVISION_BASE : | 154 | * Determine UV arch type. |
155 | (is_uv2 ? UV2_HUB_REVISION_BASE : | 155 | * SGI: UV100/1000 |
156 | UV3_HUB_REVISION_BASE)); | 156 | * SGI2: UV2000/3000 |
157 | pnodeid = early_get_pnodeid(); | 157 | * SGI3: UV300 (truncated to 4 chars because of different varieties) |
158 | early_get_apic_pnode_shift(); | 158 | */ |
159 | x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; | 159 | uv_hub_info->hub_revision = |
160 | x86_platform.nmi_init = uv_nmi_init; | 160 | !strncmp(oem_id, "SGI3", 4) ? UV3_HUB_REVISION_BASE : |
161 | if (!strcmp(oem_table_id, "UVL")) | 161 | !strcmp(oem_id, "SGI2") ? UV2_HUB_REVISION_BASE : |
162 | uv_system_type = UV_LEGACY_APIC; | 162 | !strcmp(oem_id, "SGI") ? UV1_HUB_REVISION_BASE : 0; |
163 | else if (!strcmp(oem_table_id, "UVX")) | 163 | |
164 | uv_system_type = UV_X2APIC; | 164 | if (uv_hub_info->hub_revision == 0) |
165 | else if (!strcmp(oem_table_id, "UVH")) { | 165 | goto badbios; |
166 | __this_cpu_write(x2apic_extra_bits, | 166 | |
167 | pnodeid << uvh_apicid.s.pnode_shift); | 167 | pnodeid = early_get_pnodeid(); |
168 | uv_system_type = UV_NON_UNIQUE_APIC; | 168 | early_get_apic_pnode_shift(); |
169 | uv_set_apicid_hibit(); | 169 | x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; |
170 | return 1; | 170 | x86_platform.nmi_init = uv_nmi_init; |
171 | } | 171 | |
172 | if (!strcmp(oem_table_id, "UVX")) { /* most common */ | ||
173 | uv_system_type = UV_X2APIC; | ||
174 | uv_apic = 0; | ||
175 | |||
176 | } else if (!strcmp(oem_table_id, "UVH")) { /* only UV1 systems */ | ||
177 | uv_system_type = UV_NON_UNIQUE_APIC; | ||
178 | __this_cpu_write(x2apic_extra_bits, | ||
179 | pnodeid << uvh_apicid.s.pnode_shift); | ||
180 | uv_set_apicid_hibit(); | ||
181 | uv_apic = 1; | ||
182 | |||
183 | } else if (!strcmp(oem_table_id, "UVL")) { /* only used for */ | ||
184 | uv_system_type = UV_LEGACY_APIC; /* very small systems */ | ||
185 | uv_apic = 0; | ||
186 | |||
187 | } else { | ||
188 | goto badbios; | ||
172 | } | 189 | } |
173 | return 0; | 190 | |
191 | pr_info("UV: OEM IDs %s/%s, System/HUB Types %d/%d, uv_apic %d\n", | ||
192 | oem_id, oem_table_id, uv_system_type, | ||
193 | uv_min_hub_revision_id, uv_apic); | ||
194 | |||
195 | return uv_apic; | ||
196 | |||
197 | badbios: | ||
198 | pr_err("UV: OEM_ID:%s OEM_TABLE_ID:%s\n", oem_id, oem_table_id); | ||
199 | pr_err("Current BIOS not supported, update kernel and/or BIOS\n"); | ||
200 | BUG(); | ||
174 | } | 201 | } |
175 | 202 | ||
176 | enum uv_system_type get_uv_system_type(void) | 203 | enum uv_system_type get_uv_system_type(void) |
@@ -854,10 +881,14 @@ void __init uv_system_init(void) | |||
854 | unsigned long mmr_base, present, paddr; | 881 | unsigned long mmr_base, present, paddr; |
855 | unsigned short pnode_mask; | 882 | unsigned short pnode_mask; |
856 | unsigned char n_lshift; | 883 | unsigned char n_lshift; |
857 | char *hub = (is_uv1_hub() ? "UV1" : | 884 | char *hub = (is_uv1_hub() ? "UV100/1000" : |
858 | (is_uv2_hub() ? "UV2" : | 885 | (is_uv2_hub() ? "UV2000/3000" : |
859 | "UV3")); | 886 | (is_uv3_hub() ? "UV300" : NULL))); |
860 | 887 | ||
888 | if (!hub) { | ||
889 | pr_err("UV: Unknown/unsupported UV hub\n"); | ||
890 | return; | ||
891 | } | ||
861 | pr_info("UV: Found %s hub\n", hub); | 892 | pr_info("UV: Found %s hub\n", hub); |
862 | map_low_mmrs(); | 893 | map_low_mmrs(); |
863 | 894 | ||
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 3b3b9d33ac1d..47703aed74cf 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c | |||
@@ -68,7 +68,7 @@ void foo(void) | |||
68 | 68 | ||
69 | /* Offset from the sysenter stack to tss.sp0 */ | 69 | /* Offset from the sysenter stack to tss.sp0 */ |
70 | DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) - | 70 | DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) - |
71 | sizeof(struct tss_struct)); | 71 | offsetofend(struct tss_struct, SYSENTER_stack)); |
72 | 72 | ||
73 | #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE) | 73 | #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE) |
74 | BLANK(); | 74 | BLANK(); |
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index fdcbb4d27c9f..5ce6f2da8763 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c | |||
@@ -81,6 +81,7 @@ int main(void) | |||
81 | #undef ENTRY | 81 | #undef ENTRY |
82 | 82 | ||
83 | OFFSET(TSS_ist, tss_struct, x86_tss.ist); | 83 | OFFSET(TSS_ist, tss_struct, x86_tss.ist); |
84 | OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); | ||
84 | BLANK(); | 85 | BLANK(); |
85 | 86 | ||
86 | DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1); | 87 | DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1); |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index a220239cea65..fd470ebf924e 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -5,6 +5,7 @@ | |||
5 | 5 | ||
6 | #include <linux/io.h> | 6 | #include <linux/io.h> |
7 | #include <linux/sched.h> | 7 | #include <linux/sched.h> |
8 | #include <linux/random.h> | ||
8 | #include <asm/processor.h> | 9 | #include <asm/processor.h> |
9 | #include <asm/apic.h> | 10 | #include <asm/apic.h> |
10 | #include <asm/cpu.h> | 11 | #include <asm/cpu.h> |
@@ -488,6 +489,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) | |||
488 | 489 | ||
489 | va_align.mask = (upperbit - 1) & PAGE_MASK; | 490 | va_align.mask = (upperbit - 1) & PAGE_MASK; |
490 | va_align.flags = ALIGN_VA_32 | ALIGN_VA_64; | 491 | va_align.flags = ALIGN_VA_32 | ALIGN_VA_64; |
492 | |||
493 | /* A random value per boot for bit slice [12:upper_bit) */ | ||
494 | va_align.bits = get_random_int() & va_align.mask; | ||
491 | } | 495 | } |
492 | } | 496 | } |
493 | 497 | ||
@@ -711,6 +715,11 @@ static void init_amd(struct cpuinfo_x86 *c) | |||
711 | set_cpu_bug(c, X86_BUG_AMD_APIC_C1E); | 715 | set_cpu_bug(c, X86_BUG_AMD_APIC_C1E); |
712 | 716 | ||
713 | rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy); | 717 | rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy); |
718 | |||
719 | /* 3DNow or LM implies PREFETCHW */ | ||
720 | if (!cpu_has(c, X86_FEATURE_3DNOWPREFETCH)) | ||
721 | if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM)) | ||
722 | set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH); | ||
714 | } | 723 | } |
715 | 724 | ||
716 | #ifdef CONFIG_X86_32 | 725 | #ifdef CONFIG_X86_32 |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 1cd4a1a44b95..a62cf04dac8a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -998,38 +998,37 @@ static void identify_cpu(struct cpuinfo_x86 *c) | |||
998 | #endif | 998 | #endif |
999 | } | 999 | } |
1000 | 1000 | ||
1001 | #ifdef CONFIG_X86_64 | 1001 | /* |
1002 | #ifdef CONFIG_IA32_EMULATION | 1002 | * Set up the CPU state needed to execute SYSENTER/SYSEXIT instructions |
1003 | /* May not be __init: called during resume */ | 1003 | * on 32-bit kernels: |
1004 | static void syscall32_cpu_init(void) | 1004 | */ |
1005 | { | ||
1006 | /* Load these always in case some future AMD CPU supports | ||
1007 | SYSENTER from compat mode too. */ | ||
1008 | wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); | ||
1009 | wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); | ||
1010 | wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target); | ||
1011 | |||
1012 | wrmsrl(MSR_CSTAR, ia32_cstar_target); | ||
1013 | } | ||
1014 | #endif /* CONFIG_IA32_EMULATION */ | ||
1015 | #endif /* CONFIG_X86_64 */ | ||
1016 | |||
1017 | #ifdef CONFIG_X86_32 | 1005 | #ifdef CONFIG_X86_32 |
1018 | void enable_sep_cpu(void) | 1006 | void enable_sep_cpu(void) |
1019 | { | 1007 | { |
1020 | int cpu = get_cpu(); | 1008 | struct tss_struct *tss; |
1021 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | 1009 | int cpu; |
1022 | 1010 | ||
1023 | if (!boot_cpu_has(X86_FEATURE_SEP)) { | 1011 | cpu = get_cpu(); |
1024 | put_cpu(); | 1012 | tss = &per_cpu(cpu_tss, cpu); |
1025 | return; | 1013 | |
1026 | } | 1014 | if (!boot_cpu_has(X86_FEATURE_SEP)) |
1015 | goto out; | ||
1016 | |||
1017 | /* | ||
1018 | * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field -- | ||
1019 | * see the big comment in struct x86_hw_tss's definition. | ||
1020 | */ | ||
1027 | 1021 | ||
1028 | tss->x86_tss.ss1 = __KERNEL_CS; | 1022 | tss->x86_tss.ss1 = __KERNEL_CS; |
1029 | tss->x86_tss.sp1 = sizeof(struct tss_struct) + (unsigned long) tss; | 1023 | wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0); |
1030 | wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); | 1024 | |
1031 | wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.sp1, 0); | 1025 | wrmsr(MSR_IA32_SYSENTER_ESP, |
1032 | wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) ia32_sysenter_target, 0); | 1026 | (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack), |
1027 | 0); | ||
1028 | |||
1029 | wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)ia32_sysenter_target, 0); | ||
1030 | |||
1031 | out: | ||
1033 | put_cpu(); | 1032 | put_cpu(); |
1034 | } | 1033 | } |
1035 | #endif | 1034 | #endif |
@@ -1157,7 +1156,7 @@ static __init int setup_disablecpuid(char *arg) | |||
1157 | __setup("clearcpuid=", setup_disablecpuid); | 1156 | __setup("clearcpuid=", setup_disablecpuid); |
1158 | 1157 | ||
1159 | DEFINE_PER_CPU(unsigned long, kernel_stack) = | 1158 | DEFINE_PER_CPU(unsigned long, kernel_stack) = |
1160 | (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; | 1159 | (unsigned long)&init_thread_union + THREAD_SIZE; |
1161 | EXPORT_PER_CPU_SYMBOL(kernel_stack); | 1160 | EXPORT_PER_CPU_SYMBOL(kernel_stack); |
1162 | 1161 | ||
1163 | #ifdef CONFIG_X86_64 | 1162 | #ifdef CONFIG_X86_64 |
@@ -1169,8 +1168,8 @@ DEFINE_PER_CPU_FIRST(union irq_stack_union, | |||
1169 | irq_stack_union) __aligned(PAGE_SIZE) __visible; | 1168 | irq_stack_union) __aligned(PAGE_SIZE) __visible; |
1170 | 1169 | ||
1171 | /* | 1170 | /* |
1172 | * The following four percpu variables are hot. Align current_task to | 1171 | * The following percpu variables are hot. Align current_task to |
1173 | * cacheline size such that all four fall in the same cacheline. | 1172 | * cacheline size such that they fall in the same cacheline. |
1174 | */ | 1173 | */ |
1175 | DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = | 1174 | DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = |
1176 | &init_task; | 1175 | &init_task; |
@@ -1210,10 +1209,23 @@ void syscall_init(void) | |||
1210 | */ | 1209 | */ |
1211 | wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); | 1210 | wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); |
1212 | wrmsrl(MSR_LSTAR, system_call); | 1211 | wrmsrl(MSR_LSTAR, system_call); |
1213 | wrmsrl(MSR_CSTAR, ignore_sysret); | ||
1214 | 1212 | ||
1215 | #ifdef CONFIG_IA32_EMULATION | 1213 | #ifdef CONFIG_IA32_EMULATION |
1216 | syscall32_cpu_init(); | 1214 | wrmsrl(MSR_CSTAR, ia32_cstar_target); |
1215 | /* | ||
1216 | * This only works on Intel CPUs. | ||
1217 | * On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP. | ||
1218 | * This does not cause SYSENTER to jump to the wrong location, because | ||
1219 | * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit). | ||
1220 | */ | ||
1221 | wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); | ||
1222 | wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); | ||
1223 | wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target); | ||
1224 | #else | ||
1225 | wrmsrl(MSR_CSTAR, ignore_sysret); | ||
1226 | wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG); | ||
1227 | wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); | ||
1228 | wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL); | ||
1217 | #endif | 1229 | #endif |
1218 | 1230 | ||
1219 | /* Flags to clear on syscall */ | 1231 | /* Flags to clear on syscall */ |
@@ -1265,6 +1277,15 @@ DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; | |||
1265 | EXPORT_PER_CPU_SYMBOL(__preempt_count); | 1277 | EXPORT_PER_CPU_SYMBOL(__preempt_count); |
1266 | DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); | 1278 | DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); |
1267 | 1279 | ||
1280 | /* | ||
1281 | * On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find | ||
1282 | * the top of the kernel stack. Use an extra percpu variable to track the | ||
1283 | * top of the kernel stack directly. | ||
1284 | */ | ||
1285 | DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) = | ||
1286 | (unsigned long)&init_thread_union + THREAD_SIZE; | ||
1287 | EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack); | ||
1288 | |||
1268 | #ifdef CONFIG_CC_STACKPROTECTOR | 1289 | #ifdef CONFIG_CC_STACKPROTECTOR |
1269 | DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); | 1290 | DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); |
1270 | #endif | 1291 | #endif |
@@ -1346,7 +1367,7 @@ void cpu_init(void) | |||
1346 | */ | 1367 | */ |
1347 | load_ucode_ap(); | 1368 | load_ucode_ap(); |
1348 | 1369 | ||
1349 | t = &per_cpu(init_tss, cpu); | 1370 | t = &per_cpu(cpu_tss, cpu); |
1350 | oist = &per_cpu(orig_ist, cpu); | 1371 | oist = &per_cpu(orig_ist, cpu); |
1351 | 1372 | ||
1352 | #ifdef CONFIG_NUMA | 1373 | #ifdef CONFIG_NUMA |
@@ -1430,7 +1451,7 @@ void cpu_init(void) | |||
1430 | { | 1451 | { |
1431 | int cpu = smp_processor_id(); | 1452 | int cpu = smp_processor_id(); |
1432 | struct task_struct *curr = current; | 1453 | struct task_struct *curr = current; |
1433 | struct tss_struct *t = &per_cpu(init_tss, cpu); | 1454 | struct tss_struct *t = &per_cpu(cpu_tss, cpu); |
1434 | struct thread_struct *thread = &curr->thread; | 1455 | struct thread_struct *thread = &curr->thread; |
1435 | 1456 | ||
1436 | wait_for_master_cpu(cpu); | 1457 | wait_for_master_cpu(cpu); |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 659643376dbf..edcb0e28c336 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -7,16 +7,14 @@ | |||
7 | * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD. | 7 | * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD. |
8 | */ | 8 | */ |
9 | 9 | ||
10 | #include <linux/init.h> | ||
11 | #include <linux/slab.h> | 10 | #include <linux/slab.h> |
12 | #include <linux/device.h> | 11 | #include <linux/cacheinfo.h> |
13 | #include <linux/compiler.h> | ||
14 | #include <linux/cpu.h> | 12 | #include <linux/cpu.h> |
15 | #include <linux/sched.h> | 13 | #include <linux/sched.h> |
14 | #include <linux/sysfs.h> | ||
16 | #include <linux/pci.h> | 15 | #include <linux/pci.h> |
17 | 16 | ||
18 | #include <asm/processor.h> | 17 | #include <asm/processor.h> |
19 | #include <linux/smp.h> | ||
20 | #include <asm/amd_nb.h> | 18 | #include <asm/amd_nb.h> |
21 | #include <asm/smp.h> | 19 | #include <asm/smp.h> |
22 | 20 | ||
@@ -116,10 +114,10 @@ static const struct _cache_table cache_table[] = | |||
116 | 114 | ||
117 | 115 | ||
118 | enum _cache_type { | 116 | enum _cache_type { |
119 | CACHE_TYPE_NULL = 0, | 117 | CTYPE_NULL = 0, |
120 | CACHE_TYPE_DATA = 1, | 118 | CTYPE_DATA = 1, |
121 | CACHE_TYPE_INST = 2, | 119 | CTYPE_INST = 2, |
122 | CACHE_TYPE_UNIFIED = 3 | 120 | CTYPE_UNIFIED = 3 |
123 | }; | 121 | }; |
124 | 122 | ||
125 | union _cpuid4_leaf_eax { | 123 | union _cpuid4_leaf_eax { |
@@ -159,11 +157,6 @@ struct _cpuid4_info_regs { | |||
159 | struct amd_northbridge *nb; | 157 | struct amd_northbridge *nb; |
160 | }; | 158 | }; |
161 | 159 | ||
162 | struct _cpuid4_info { | ||
163 | struct _cpuid4_info_regs base; | ||
164 | DECLARE_BITMAP(shared_cpu_map, NR_CPUS); | ||
165 | }; | ||
166 | |||
167 | unsigned short num_cache_leaves; | 160 | unsigned short num_cache_leaves; |
168 | 161 | ||
169 | /* AMD doesn't have CPUID4. Emulate it here to report the same | 162 | /* AMD doesn't have CPUID4. Emulate it here to report the same |
@@ -220,6 +213,13 @@ static const unsigned short assocs[] = { | |||
220 | static const unsigned char levels[] = { 1, 1, 2, 3 }; | 213 | static const unsigned char levels[] = { 1, 1, 2, 3 }; |
221 | static const unsigned char types[] = { 1, 2, 3, 3 }; | 214 | static const unsigned char types[] = { 1, 2, 3, 3 }; |
222 | 215 | ||
216 | static const enum cache_type cache_type_map[] = { | ||
217 | [CTYPE_NULL] = CACHE_TYPE_NOCACHE, | ||
218 | [CTYPE_DATA] = CACHE_TYPE_DATA, | ||
219 | [CTYPE_INST] = CACHE_TYPE_INST, | ||
220 | [CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED, | ||
221 | }; | ||
222 | |||
223 | static void | 223 | static void |
224 | amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, | 224 | amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, |
225 | union _cpuid4_leaf_ebx *ebx, | 225 | union _cpuid4_leaf_ebx *ebx, |
@@ -291,14 +291,8 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, | |||
291 | (ebx->split.ways_of_associativity + 1) - 1; | 291 | (ebx->split.ways_of_associativity + 1) - 1; |
292 | } | 292 | } |
293 | 293 | ||
294 | struct _cache_attr { | ||
295 | struct attribute attr; | ||
296 | ssize_t (*show)(struct _cpuid4_info *, char *, unsigned int); | ||
297 | ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count, | ||
298 | unsigned int); | ||
299 | }; | ||
300 | |||
301 | #if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS) | 294 | #if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS) |
295 | |||
302 | /* | 296 | /* |
303 | * L3 cache descriptors | 297 | * L3 cache descriptors |
304 | */ | 298 | */ |
@@ -325,20 +319,6 @@ static void amd_calc_l3_indices(struct amd_northbridge *nb) | |||
325 | l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1; | 319 | l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1; |
326 | } | 320 | } |
327 | 321 | ||
328 | static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index) | ||
329 | { | ||
330 | int node; | ||
331 | |||
332 | /* only for L3, and not in virtualized environments */ | ||
333 | if (index < 3) | ||
334 | return; | ||
335 | |||
336 | node = amd_get_nb_id(smp_processor_id()); | ||
337 | this_leaf->nb = node_to_amd_nb(node); | ||
338 | if (this_leaf->nb && !this_leaf->nb->l3_cache.indices) | ||
339 | amd_calc_l3_indices(this_leaf->nb); | ||
340 | } | ||
341 | |||
342 | /* | 322 | /* |
343 | * check whether a slot used for disabling an L3 index is occupied. | 323 | * check whether a slot used for disabling an L3 index is occupied. |
344 | * @l3: L3 cache descriptor | 324 | * @l3: L3 cache descriptor |
@@ -359,15 +339,13 @@ int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot) | |||
359 | return -1; | 339 | return -1; |
360 | } | 340 | } |
361 | 341 | ||
362 | static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, | 342 | static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf, |
363 | unsigned int slot) | 343 | unsigned int slot) |
364 | { | 344 | { |
365 | int index; | 345 | int index; |
346 | struct amd_northbridge *nb = this_leaf->priv; | ||
366 | 347 | ||
367 | if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) | 348 | index = amd_get_l3_disable_slot(nb, slot); |
368 | return -EINVAL; | ||
369 | |||
370 | index = amd_get_l3_disable_slot(this_leaf->base.nb, slot); | ||
371 | if (index >= 0) | 349 | if (index >= 0) |
372 | return sprintf(buf, "%d\n", index); | 350 | return sprintf(buf, "%d\n", index); |
373 | 351 | ||
@@ -376,9 +354,10 @@ static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, | |||
376 | 354 | ||
377 | #define SHOW_CACHE_DISABLE(slot) \ | 355 | #define SHOW_CACHE_DISABLE(slot) \ |
378 | static ssize_t \ | 356 | static ssize_t \ |
379 | show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf, \ | 357 | cache_disable_##slot##_show(struct device *dev, \ |
380 | unsigned int cpu) \ | 358 | struct device_attribute *attr, char *buf) \ |
381 | { \ | 359 | { \ |
360 | struct cacheinfo *this_leaf = dev_get_drvdata(dev); \ | ||
382 | return show_cache_disable(this_leaf, buf, slot); \ | 361 | return show_cache_disable(this_leaf, buf, slot); \ |
383 | } | 362 | } |
384 | SHOW_CACHE_DISABLE(0) | 363 | SHOW_CACHE_DISABLE(0) |
@@ -446,25 +425,23 @@ int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu, unsigned slot, | |||
446 | return 0; | 425 | return 0; |
447 | } | 426 | } |
448 | 427 | ||
449 | static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, | 428 | static ssize_t store_cache_disable(struct cacheinfo *this_leaf, |
450 | const char *buf, size_t count, | 429 | const char *buf, size_t count, |
451 | unsigned int slot) | 430 | unsigned int slot) |
452 | { | 431 | { |
453 | unsigned long val = 0; | 432 | unsigned long val = 0; |
454 | int cpu, err = 0; | 433 | int cpu, err = 0; |
434 | struct amd_northbridge *nb = this_leaf->priv; | ||
455 | 435 | ||
456 | if (!capable(CAP_SYS_ADMIN)) | 436 | if (!capable(CAP_SYS_ADMIN)) |
457 | return -EPERM; | 437 | return -EPERM; |
458 | 438 | ||
459 | if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) | 439 | cpu = cpumask_first(&this_leaf->shared_cpu_map); |
460 | return -EINVAL; | ||
461 | |||
462 | cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); | ||
463 | 440 | ||
464 | if (kstrtoul(buf, 10, &val) < 0) | 441 | if (kstrtoul(buf, 10, &val) < 0) |
465 | return -EINVAL; | 442 | return -EINVAL; |
466 | 443 | ||
467 | err = amd_set_l3_disable_slot(this_leaf->base.nb, cpu, slot, val); | 444 | err = amd_set_l3_disable_slot(nb, cpu, slot, val); |
468 | if (err) { | 445 | if (err) { |
469 | if (err == -EEXIST) | 446 | if (err == -EEXIST) |
470 | pr_warning("L3 slot %d in use/index already disabled!\n", | 447 | pr_warning("L3 slot %d in use/index already disabled!\n", |
@@ -476,41 +453,36 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, | |||
476 | 453 | ||
477 | #define STORE_CACHE_DISABLE(slot) \ | 454 | #define STORE_CACHE_DISABLE(slot) \ |
478 | static ssize_t \ | 455 | static ssize_t \ |
479 | store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \ | 456 | cache_disable_##slot##_store(struct device *dev, \ |
480 | const char *buf, size_t count, \ | 457 | struct device_attribute *attr, \ |
481 | unsigned int cpu) \ | 458 | const char *buf, size_t count) \ |
482 | { \ | 459 | { \ |
460 | struct cacheinfo *this_leaf = dev_get_drvdata(dev); \ | ||
483 | return store_cache_disable(this_leaf, buf, count, slot); \ | 461 | return store_cache_disable(this_leaf, buf, count, slot); \ |
484 | } | 462 | } |
485 | STORE_CACHE_DISABLE(0) | 463 | STORE_CACHE_DISABLE(0) |
486 | STORE_CACHE_DISABLE(1) | 464 | STORE_CACHE_DISABLE(1) |
487 | 465 | ||
488 | static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644, | 466 | static ssize_t subcaches_show(struct device *dev, |
489 | show_cache_disable_0, store_cache_disable_0); | 467 | struct device_attribute *attr, char *buf) |
490 | static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, | ||
491 | show_cache_disable_1, store_cache_disable_1); | ||
492 | |||
493 | static ssize_t | ||
494 | show_subcaches(struct _cpuid4_info *this_leaf, char *buf, unsigned int cpu) | ||
495 | { | 468 | { |
496 | if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) | 469 | struct cacheinfo *this_leaf = dev_get_drvdata(dev); |
497 | return -EINVAL; | 470 | int cpu = cpumask_first(&this_leaf->shared_cpu_map); |
498 | 471 | ||
499 | return sprintf(buf, "%x\n", amd_get_subcaches(cpu)); | 472 | return sprintf(buf, "%x\n", amd_get_subcaches(cpu)); |
500 | } | 473 | } |
501 | 474 | ||
502 | static ssize_t | 475 | static ssize_t subcaches_store(struct device *dev, |
503 | store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count, | 476 | struct device_attribute *attr, |
504 | unsigned int cpu) | 477 | const char *buf, size_t count) |
505 | { | 478 | { |
479 | struct cacheinfo *this_leaf = dev_get_drvdata(dev); | ||
480 | int cpu = cpumask_first(&this_leaf->shared_cpu_map); | ||
506 | unsigned long val; | 481 | unsigned long val; |
507 | 482 | ||
508 | if (!capable(CAP_SYS_ADMIN)) | 483 | if (!capable(CAP_SYS_ADMIN)) |
509 | return -EPERM; | 484 | return -EPERM; |
510 | 485 | ||
511 | if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) | ||
512 | return -EINVAL; | ||
513 | |||
514 | if (kstrtoul(buf, 16, &val) < 0) | 486 | if (kstrtoul(buf, 16, &val) < 0) |
515 | return -EINVAL; | 487 | return -EINVAL; |
516 | 488 | ||
@@ -520,9 +492,92 @@ store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count, | |||
520 | return count; | 492 | return count; |
521 | } | 493 | } |
522 | 494 | ||
523 | static struct _cache_attr subcaches = | 495 | static DEVICE_ATTR_RW(cache_disable_0); |
524 | __ATTR(subcaches, 0644, show_subcaches, store_subcaches); | 496 | static DEVICE_ATTR_RW(cache_disable_1); |
497 | static DEVICE_ATTR_RW(subcaches); | ||
498 | |||
499 | static umode_t | ||
500 | cache_private_attrs_is_visible(struct kobject *kobj, | ||
501 | struct attribute *attr, int unused) | ||
502 | { | ||
503 | struct device *dev = kobj_to_dev(kobj); | ||
504 | struct cacheinfo *this_leaf = dev_get_drvdata(dev); | ||
505 | umode_t mode = attr->mode; | ||
506 | |||
507 | if (!this_leaf->priv) | ||
508 | return 0; | ||
509 | |||
510 | if ((attr == &dev_attr_subcaches.attr) && | ||
511 | amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) | ||
512 | return mode; | ||
513 | |||
514 | if ((attr == &dev_attr_cache_disable_0.attr || | ||
515 | attr == &dev_attr_cache_disable_1.attr) && | ||
516 | amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) | ||
517 | return mode; | ||
518 | |||
519 | return 0; | ||
520 | } | ||
521 | |||
522 | static struct attribute_group cache_private_group = { | ||
523 | .is_visible = cache_private_attrs_is_visible, | ||
524 | }; | ||
525 | |||
526 | static void init_amd_l3_attrs(void) | ||
527 | { | ||
528 | int n = 1; | ||
529 | static struct attribute **amd_l3_attrs; | ||
530 | |||
531 | if (amd_l3_attrs) /* already initialized */ | ||
532 | return; | ||
533 | |||
534 | if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) | ||
535 | n += 2; | ||
536 | if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) | ||
537 | n += 1; | ||
538 | |||
539 | amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL); | ||
540 | if (!amd_l3_attrs) | ||
541 | return; | ||
542 | |||
543 | n = 0; | ||
544 | if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) { | ||
545 | amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr; | ||
546 | amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr; | ||
547 | } | ||
548 | if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) | ||
549 | amd_l3_attrs[n++] = &dev_attr_subcaches.attr; | ||
525 | 550 | ||
551 | cache_private_group.attrs = amd_l3_attrs; | ||
552 | } | ||
553 | |||
554 | const struct attribute_group * | ||
555 | cache_get_priv_group(struct cacheinfo *this_leaf) | ||
556 | { | ||
557 | struct amd_northbridge *nb = this_leaf->priv; | ||
558 | |||
559 | if (this_leaf->level < 3 || !nb) | ||
560 | return NULL; | ||
561 | |||
562 | if (nb && nb->l3_cache.indices) | ||
563 | init_amd_l3_attrs(); | ||
564 | |||
565 | return &cache_private_group; | ||
566 | } | ||
567 | |||
568 | static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index) | ||
569 | { | ||
570 | int node; | ||
571 | |||
572 | /* only for L3, and not in virtualized environments */ | ||
573 | if (index < 3) | ||
574 | return; | ||
575 | |||
576 | node = amd_get_nb_id(smp_processor_id()); | ||
577 | this_leaf->nb = node_to_amd_nb(node); | ||
578 | if (this_leaf->nb && !this_leaf->nb->l3_cache.indices) | ||
579 | amd_calc_l3_indices(this_leaf->nb); | ||
580 | } | ||
526 | #else | 581 | #else |
527 | #define amd_init_l3_cache(x, y) | 582 | #define amd_init_l3_cache(x, y) |
528 | #endif /* CONFIG_AMD_NB && CONFIG_SYSFS */ | 583 | #endif /* CONFIG_AMD_NB && CONFIG_SYSFS */ |
@@ -546,7 +601,7 @@ cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf) | |||
546 | cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); | 601 | cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); |
547 | } | 602 | } |
548 | 603 | ||
549 | if (eax.split.type == CACHE_TYPE_NULL) | 604 | if (eax.split.type == CTYPE_NULL) |
550 | return -EIO; /* better error ? */ | 605 | return -EIO; /* better error ? */ |
551 | 606 | ||
552 | this_leaf->eax = eax; | 607 | this_leaf->eax = eax; |
@@ -575,7 +630,7 @@ static int find_num_cache_leaves(struct cpuinfo_x86 *c) | |||
575 | /* Do cpuid(op) loop to find out num_cache_leaves */ | 630 | /* Do cpuid(op) loop to find out num_cache_leaves */ |
576 | cpuid_count(op, i, &eax, &ebx, &ecx, &edx); | 631 | cpuid_count(op, i, &eax, &ebx, &ecx, &edx); |
577 | cache_eax.full = eax; | 632 | cache_eax.full = eax; |
578 | } while (cache_eax.split.type != CACHE_TYPE_NULL); | 633 | } while (cache_eax.split.type != CTYPE_NULL); |
579 | return i; | 634 | return i; |
580 | } | 635 | } |
581 | 636 | ||
@@ -626,9 +681,9 @@ unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c) | |||
626 | 681 | ||
627 | switch (this_leaf.eax.split.level) { | 682 | switch (this_leaf.eax.split.level) { |
628 | case 1: | 683 | case 1: |
629 | if (this_leaf.eax.split.type == CACHE_TYPE_DATA) | 684 | if (this_leaf.eax.split.type == CTYPE_DATA) |
630 | new_l1d = this_leaf.size/1024; | 685 | new_l1d = this_leaf.size/1024; |
631 | else if (this_leaf.eax.split.type == CACHE_TYPE_INST) | 686 | else if (this_leaf.eax.split.type == CTYPE_INST) |
632 | new_l1i = this_leaf.size/1024; | 687 | new_l1i = this_leaf.size/1024; |
633 | break; | 688 | break; |
634 | case 2: | 689 | case 2: |
@@ -747,55 +802,52 @@ unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c) | |||
747 | return l2; | 802 | return l2; |
748 | } | 803 | } |
749 | 804 | ||
750 | #ifdef CONFIG_SYSFS | 805 | static int __cache_amd_cpumap_setup(unsigned int cpu, int index, |
751 | 806 | struct _cpuid4_info_regs *base) | |
752 | /* pointer to _cpuid4_info array (for each cache leaf) */ | ||
753 | static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info); | ||
754 | #define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y])) | ||
755 | |||
756 | #ifdef CONFIG_SMP | ||
757 | |||
758 | static int cache_shared_amd_cpu_map_setup(unsigned int cpu, int index) | ||
759 | { | 807 | { |
760 | struct _cpuid4_info *this_leaf; | 808 | struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); |
809 | struct cacheinfo *this_leaf; | ||
761 | int i, sibling; | 810 | int i, sibling; |
762 | 811 | ||
763 | if (cpu_has_topoext) { | 812 | if (cpu_has_topoext) { |
764 | unsigned int apicid, nshared, first, last; | 813 | unsigned int apicid, nshared, first, last; |
765 | 814 | ||
766 | if (!per_cpu(ici_cpuid4_info, cpu)) | 815 | this_leaf = this_cpu_ci->info_list + index; |
767 | return 0; | 816 | nshared = base->eax.split.num_threads_sharing + 1; |
768 | |||
769 | this_leaf = CPUID4_INFO_IDX(cpu, index); | ||
770 | nshared = this_leaf->base.eax.split.num_threads_sharing + 1; | ||
771 | apicid = cpu_data(cpu).apicid; | 817 | apicid = cpu_data(cpu).apicid; |
772 | first = apicid - (apicid % nshared); | 818 | first = apicid - (apicid % nshared); |
773 | last = first + nshared - 1; | 819 | last = first + nshared - 1; |
774 | 820 | ||
775 | for_each_online_cpu(i) { | 821 | for_each_online_cpu(i) { |
822 | this_cpu_ci = get_cpu_cacheinfo(i); | ||
823 | if (!this_cpu_ci->info_list) | ||
824 | continue; | ||
825 | |||
776 | apicid = cpu_data(i).apicid; | 826 | apicid = cpu_data(i).apicid; |
777 | if ((apicid < first) || (apicid > last)) | 827 | if ((apicid < first) || (apicid > last)) |
778 | continue; | 828 | continue; |
779 | if (!per_cpu(ici_cpuid4_info, i)) | 829 | |
780 | continue; | 830 | this_leaf = this_cpu_ci->info_list + index; |
781 | this_leaf = CPUID4_INFO_IDX(i, index); | ||
782 | 831 | ||
783 | for_each_online_cpu(sibling) { | 832 | for_each_online_cpu(sibling) { |
784 | apicid = cpu_data(sibling).apicid; | 833 | apicid = cpu_data(sibling).apicid; |
785 | if ((apicid < first) || (apicid > last)) | 834 | if ((apicid < first) || (apicid > last)) |
786 | continue; | 835 | continue; |
787 | set_bit(sibling, this_leaf->shared_cpu_map); | 836 | cpumask_set_cpu(sibling, |
837 | &this_leaf->shared_cpu_map); | ||
788 | } | 838 | } |
789 | } | 839 | } |
790 | } else if (index == 3) { | 840 | } else if (index == 3) { |
791 | for_each_cpu(i, cpu_llc_shared_mask(cpu)) { | 841 | for_each_cpu(i, cpu_llc_shared_mask(cpu)) { |
792 | if (!per_cpu(ici_cpuid4_info, i)) | 842 | this_cpu_ci = get_cpu_cacheinfo(i); |
843 | if (!this_cpu_ci->info_list) | ||
793 | continue; | 844 | continue; |
794 | this_leaf = CPUID4_INFO_IDX(i, index); | 845 | this_leaf = this_cpu_ci->info_list + index; |
795 | for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) { | 846 | for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) { |
796 | if (!cpu_online(sibling)) | 847 | if (!cpu_online(sibling)) |
797 | continue; | 848 | continue; |
798 | set_bit(sibling, this_leaf->shared_cpu_map); | 849 | cpumask_set_cpu(sibling, |
850 | &this_leaf->shared_cpu_map); | ||
799 | } | 851 | } |
800 | } | 852 | } |
801 | } else | 853 | } else |
@@ -804,457 +856,86 @@ static int cache_shared_amd_cpu_map_setup(unsigned int cpu, int index) | |||
804 | return 1; | 856 | return 1; |
805 | } | 857 | } |
806 | 858 | ||
807 | static void cache_shared_cpu_map_setup(unsigned int cpu, int index) | 859 | static void __cache_cpumap_setup(unsigned int cpu, int index, |
860 | struct _cpuid4_info_regs *base) | ||
808 | { | 861 | { |
809 | struct _cpuid4_info *this_leaf, *sibling_leaf; | 862 | struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); |
863 | struct cacheinfo *this_leaf, *sibling_leaf; | ||
810 | unsigned long num_threads_sharing; | 864 | unsigned long num_threads_sharing; |
811 | int index_msb, i; | 865 | int index_msb, i; |
812 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 866 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
813 | 867 | ||
814 | if (c->x86_vendor == X86_VENDOR_AMD) { | 868 | if (c->x86_vendor == X86_VENDOR_AMD) { |
815 | if (cache_shared_amd_cpu_map_setup(cpu, index)) | 869 | if (__cache_amd_cpumap_setup(cpu, index, base)) |
816 | return; | 870 | return; |
817 | } | 871 | } |
818 | 872 | ||
819 | this_leaf = CPUID4_INFO_IDX(cpu, index); | 873 | this_leaf = this_cpu_ci->info_list + index; |
820 | num_threads_sharing = 1 + this_leaf->base.eax.split.num_threads_sharing; | 874 | num_threads_sharing = 1 + base->eax.split.num_threads_sharing; |
821 | 875 | ||
876 | cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map); | ||
822 | if (num_threads_sharing == 1) | 877 | if (num_threads_sharing == 1) |
823 | cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map)); | 878 | return; |
824 | else { | ||
825 | index_msb = get_count_order(num_threads_sharing); | ||
826 | |||
827 | for_each_online_cpu(i) { | ||
828 | if (cpu_data(i).apicid >> index_msb == | ||
829 | c->apicid >> index_msb) { | ||
830 | cpumask_set_cpu(i, | ||
831 | to_cpumask(this_leaf->shared_cpu_map)); | ||
832 | if (i != cpu && per_cpu(ici_cpuid4_info, i)) { | ||
833 | sibling_leaf = | ||
834 | CPUID4_INFO_IDX(i, index); | ||
835 | cpumask_set_cpu(cpu, to_cpumask( | ||
836 | sibling_leaf->shared_cpu_map)); | ||
837 | } | ||
838 | } | ||
839 | } | ||
840 | } | ||
841 | } | ||
842 | static void cache_remove_shared_cpu_map(unsigned int cpu, int index) | ||
843 | { | ||
844 | struct _cpuid4_info *this_leaf, *sibling_leaf; | ||
845 | int sibling; | ||
846 | |||
847 | this_leaf = CPUID4_INFO_IDX(cpu, index); | ||
848 | for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) { | ||
849 | sibling_leaf = CPUID4_INFO_IDX(sibling, index); | ||
850 | cpumask_clear_cpu(cpu, | ||
851 | to_cpumask(sibling_leaf->shared_cpu_map)); | ||
852 | } | ||
853 | } | ||
854 | #else | ||
855 | static void cache_shared_cpu_map_setup(unsigned int cpu, int index) | ||
856 | { | ||
857 | } | ||
858 | |||
859 | static void cache_remove_shared_cpu_map(unsigned int cpu, int index) | ||
860 | { | ||
861 | } | ||
862 | #endif | ||
863 | |||
864 | static void free_cache_attributes(unsigned int cpu) | ||
865 | { | ||
866 | int i; | ||
867 | |||
868 | for (i = 0; i < num_cache_leaves; i++) | ||
869 | cache_remove_shared_cpu_map(cpu, i); | ||
870 | |||
871 | kfree(per_cpu(ici_cpuid4_info, cpu)); | ||
872 | per_cpu(ici_cpuid4_info, cpu) = NULL; | ||
873 | } | ||
874 | |||
875 | static void get_cpu_leaves(void *_retval) | ||
876 | { | ||
877 | int j, *retval = _retval, cpu = smp_processor_id(); | ||
878 | 879 | ||
879 | /* Do cpuid and store the results */ | 880 | index_msb = get_count_order(num_threads_sharing); |
880 | for (j = 0; j < num_cache_leaves; j++) { | ||
881 | struct _cpuid4_info *this_leaf = CPUID4_INFO_IDX(cpu, j); | ||
882 | 881 | ||
883 | *retval = cpuid4_cache_lookup_regs(j, &this_leaf->base); | 882 | for_each_online_cpu(i) |
884 | if (unlikely(*retval < 0)) { | 883 | if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) { |
885 | int i; | 884 | struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i); |
886 | 885 | ||
887 | for (i = 0; i < j; i++) | 886 | if (i == cpu || !sib_cpu_ci->info_list) |
888 | cache_remove_shared_cpu_map(cpu, i); | 887 | continue;/* skip if itself or no cacheinfo */ |
889 | break; | 888 | sibling_leaf = sib_cpu_ci->info_list + index; |
889 | cpumask_set_cpu(i, &this_leaf->shared_cpu_map); | ||
890 | cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map); | ||
890 | } | 891 | } |
891 | cache_shared_cpu_map_setup(cpu, j); | ||
892 | } | ||
893 | } | 892 | } |
894 | 893 | ||
895 | static int detect_cache_attributes(unsigned int cpu) | 894 | static void ci_leaf_init(struct cacheinfo *this_leaf, |
895 | struct _cpuid4_info_regs *base) | ||
896 | { | 896 | { |
897 | int retval; | 897 | this_leaf->level = base->eax.split.level; |
898 | 898 | this_leaf->type = cache_type_map[base->eax.split.type]; | |
899 | if (num_cache_leaves == 0) | 899 | this_leaf->coherency_line_size = |
900 | return -ENOENT; | 900 | base->ebx.split.coherency_line_size + 1; |
901 | 901 | this_leaf->ways_of_associativity = | |
902 | per_cpu(ici_cpuid4_info, cpu) = kzalloc( | 902 | base->ebx.split.ways_of_associativity + 1; |
903 | sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL); | 903 | this_leaf->size = base->size; |
904 | if (per_cpu(ici_cpuid4_info, cpu) == NULL) | 904 | this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1; |
905 | return -ENOMEM; | 905 | this_leaf->physical_line_partition = |
906 | 906 | base->ebx.split.physical_line_partition + 1; | |
907 | smp_call_function_single(cpu, get_cpu_leaves, &retval, true); | 907 | this_leaf->priv = base->nb; |
908 | if (retval) { | ||
909 | kfree(per_cpu(ici_cpuid4_info, cpu)); | ||
910 | per_cpu(ici_cpuid4_info, cpu) = NULL; | ||
911 | } | ||
912 | |||
913 | return retval; | ||
914 | } | 908 | } |
915 | 909 | ||
916 | #include <linux/kobject.h> | 910 | static int __init_cache_level(unsigned int cpu) |
917 | #include <linux/sysfs.h> | ||
918 | #include <linux/cpu.h> | ||
919 | |||
920 | /* pointer to kobject for cpuX/cache */ | ||
921 | static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject); | ||
922 | |||
923 | struct _index_kobject { | ||
924 | struct kobject kobj; | ||
925 | unsigned int cpu; | ||
926 | unsigned short index; | ||
927 | }; | ||
928 | |||
929 | /* pointer to array of kobjects for cpuX/cache/indexY */ | ||
930 | static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject); | ||
931 | #define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(ici_index_kobject, x))[y])) | ||
932 | |||
933 | #define show_one_plus(file_name, object, val) \ | ||
934 | static ssize_t show_##file_name(struct _cpuid4_info *this_leaf, char *buf, \ | ||
935 | unsigned int cpu) \ | ||
936 | { \ | ||
937 | return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \ | ||
938 | } | ||
939 | |||
940 | show_one_plus(level, base.eax.split.level, 0); | ||
941 | show_one_plus(coherency_line_size, base.ebx.split.coherency_line_size, 1); | ||
942 | show_one_plus(physical_line_partition, base.ebx.split.physical_line_partition, 1); | ||
943 | show_one_plus(ways_of_associativity, base.ebx.split.ways_of_associativity, 1); | ||
944 | show_one_plus(number_of_sets, base.ecx.split.number_of_sets, 1); | ||
945 | |||
946 | static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf, | ||
947 | unsigned int cpu) | ||
948 | { | ||
949 | return sprintf(buf, "%luK\n", this_leaf->base.size / 1024); | ||
950 | } | ||
951 | |||
952 | static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf, | ||
953 | int type, char *buf) | ||
954 | { | ||
955 | const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map); | ||
956 | int ret; | ||
957 | |||
958 | if (type) | ||
959 | ret = scnprintf(buf, PAGE_SIZE - 1, "%*pbl", | ||
960 | cpumask_pr_args(mask)); | ||
961 | else | ||
962 | ret = scnprintf(buf, PAGE_SIZE - 1, "%*pb", | ||
963 | cpumask_pr_args(mask)); | ||
964 | buf[ret++] = '\n'; | ||
965 | buf[ret] = '\0'; | ||
966 | return ret; | ||
967 | } | ||
968 | |||
969 | static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf, | ||
970 | unsigned int cpu) | ||
971 | { | 911 | { |
972 | return show_shared_cpu_map_func(leaf, 0, buf); | 912 | struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); |
973 | } | ||
974 | |||
975 | static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf, | ||
976 | unsigned int cpu) | ||
977 | { | ||
978 | return show_shared_cpu_map_func(leaf, 1, buf); | ||
979 | } | ||
980 | 913 | ||
981 | static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf, | 914 | if (!num_cache_leaves) |
982 | unsigned int cpu) | ||
983 | { | ||
984 | switch (this_leaf->base.eax.split.type) { | ||
985 | case CACHE_TYPE_DATA: | ||
986 | return sprintf(buf, "Data\n"); | ||
987 | case CACHE_TYPE_INST: | ||
988 | return sprintf(buf, "Instruction\n"); | ||
989 | case CACHE_TYPE_UNIFIED: | ||
990 | return sprintf(buf, "Unified\n"); | ||
991 | default: | ||
992 | return sprintf(buf, "Unknown\n"); | ||
993 | } | ||
994 | } | ||
995 | |||
996 | #define to_object(k) container_of(k, struct _index_kobject, kobj) | ||
997 | #define to_attr(a) container_of(a, struct _cache_attr, attr) | ||
998 | |||
999 | #define define_one_ro(_name) \ | ||
1000 | static struct _cache_attr _name = \ | ||
1001 | __ATTR(_name, 0444, show_##_name, NULL) | ||
1002 | |||
1003 | define_one_ro(level); | ||
1004 | define_one_ro(type); | ||
1005 | define_one_ro(coherency_line_size); | ||
1006 | define_one_ro(physical_line_partition); | ||
1007 | define_one_ro(ways_of_associativity); | ||
1008 | define_one_ro(number_of_sets); | ||
1009 | define_one_ro(size); | ||
1010 | define_one_ro(shared_cpu_map); | ||
1011 | define_one_ro(shared_cpu_list); | ||
1012 | |||
1013 | static struct attribute *default_attrs[] = { | ||
1014 | &type.attr, | ||
1015 | &level.attr, | ||
1016 | &coherency_line_size.attr, | ||
1017 | &physical_line_partition.attr, | ||
1018 | &ways_of_associativity.attr, | ||
1019 | &number_of_sets.attr, | ||
1020 | &size.attr, | ||
1021 | &shared_cpu_map.attr, | ||
1022 | &shared_cpu_list.attr, | ||
1023 | NULL | ||
1024 | }; | ||
1025 | |||
1026 | #ifdef CONFIG_AMD_NB | ||
1027 | static struct attribute **amd_l3_attrs(void) | ||
1028 | { | ||
1029 | static struct attribute **attrs; | ||
1030 | int n; | ||
1031 | |||
1032 | if (attrs) | ||
1033 | return attrs; | ||
1034 | |||
1035 | n = ARRAY_SIZE(default_attrs); | ||
1036 | |||
1037 | if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) | ||
1038 | n += 2; | ||
1039 | |||
1040 | if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) | ||
1041 | n += 1; | ||
1042 | |||
1043 | attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL); | ||
1044 | if (attrs == NULL) | ||
1045 | return attrs = default_attrs; | ||
1046 | |||
1047 | for (n = 0; default_attrs[n]; n++) | ||
1048 | attrs[n] = default_attrs[n]; | ||
1049 | |||
1050 | if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) { | ||
1051 | attrs[n++] = &cache_disable_0.attr; | ||
1052 | attrs[n++] = &cache_disable_1.attr; | ||
1053 | } | ||
1054 | |||
1055 | if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) | ||
1056 | attrs[n++] = &subcaches.attr; | ||
1057 | |||
1058 | return attrs; | ||
1059 | } | ||
1060 | #endif | ||
1061 | |||
1062 | static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) | ||
1063 | { | ||
1064 | struct _cache_attr *fattr = to_attr(attr); | ||
1065 | struct _index_kobject *this_leaf = to_object(kobj); | ||
1066 | ssize_t ret; | ||
1067 | |||
1068 | ret = fattr->show ? | ||
1069 | fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), | ||
1070 | buf, this_leaf->cpu) : | ||
1071 | 0; | ||
1072 | return ret; | ||
1073 | } | ||
1074 | |||
1075 | static ssize_t store(struct kobject *kobj, struct attribute *attr, | ||
1076 | const char *buf, size_t count) | ||
1077 | { | ||
1078 | struct _cache_attr *fattr = to_attr(attr); | ||
1079 | struct _index_kobject *this_leaf = to_object(kobj); | ||
1080 | ssize_t ret; | ||
1081 | |||
1082 | ret = fattr->store ? | ||
1083 | fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), | ||
1084 | buf, count, this_leaf->cpu) : | ||
1085 | 0; | ||
1086 | return ret; | ||
1087 | } | ||
1088 | |||
1089 | static const struct sysfs_ops sysfs_ops = { | ||
1090 | .show = show, | ||
1091 | .store = store, | ||
1092 | }; | ||
1093 | |||
1094 | static struct kobj_type ktype_cache = { | ||
1095 | .sysfs_ops = &sysfs_ops, | ||
1096 | .default_attrs = default_attrs, | ||
1097 | }; | ||
1098 | |||
1099 | static struct kobj_type ktype_percpu_entry = { | ||
1100 | .sysfs_ops = &sysfs_ops, | ||
1101 | }; | ||
1102 | |||
1103 | static void cpuid4_cache_sysfs_exit(unsigned int cpu) | ||
1104 | { | ||
1105 | kfree(per_cpu(ici_cache_kobject, cpu)); | ||
1106 | kfree(per_cpu(ici_index_kobject, cpu)); | ||
1107 | per_cpu(ici_cache_kobject, cpu) = NULL; | ||
1108 | per_cpu(ici_index_kobject, cpu) = NULL; | ||
1109 | free_cache_attributes(cpu); | ||
1110 | } | ||
1111 | |||
1112 | static int cpuid4_cache_sysfs_init(unsigned int cpu) | ||
1113 | { | ||
1114 | int err; | ||
1115 | |||
1116 | if (num_cache_leaves == 0) | ||
1117 | return -ENOENT; | 915 | return -ENOENT; |
1118 | 916 | if (!this_cpu_ci) | |
1119 | err = detect_cache_attributes(cpu); | 917 | return -EINVAL; |
1120 | if (err) | 918 | this_cpu_ci->num_levels = 3; |
1121 | return err; | 919 | this_cpu_ci->num_leaves = num_cache_leaves; |
1122 | |||
1123 | /* Allocate all required memory */ | ||
1124 | per_cpu(ici_cache_kobject, cpu) = | ||
1125 | kzalloc(sizeof(struct kobject), GFP_KERNEL); | ||
1126 | if (unlikely(per_cpu(ici_cache_kobject, cpu) == NULL)) | ||
1127 | goto err_out; | ||
1128 | |||
1129 | per_cpu(ici_index_kobject, cpu) = kzalloc( | ||
1130 | sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL); | ||
1131 | if (unlikely(per_cpu(ici_index_kobject, cpu) == NULL)) | ||
1132 | goto err_out; | ||
1133 | |||
1134 | return 0; | 920 | return 0; |
1135 | |||
1136 | err_out: | ||
1137 | cpuid4_cache_sysfs_exit(cpu); | ||
1138 | return -ENOMEM; | ||
1139 | } | 921 | } |
1140 | 922 | ||
1141 | static DECLARE_BITMAP(cache_dev_map, NR_CPUS); | 923 | static int __populate_cache_leaves(unsigned int cpu) |
1142 | |||
1143 | /* Add/Remove cache interface for CPU device */ | ||
1144 | static int cache_add_dev(struct device *dev) | ||
1145 | { | 924 | { |
1146 | unsigned int cpu = dev->id; | 925 | unsigned int idx, ret; |
1147 | unsigned long i, j; | 926 | struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); |
1148 | struct _index_kobject *this_object; | 927 | struct cacheinfo *this_leaf = this_cpu_ci->info_list; |
1149 | struct _cpuid4_info *this_leaf; | 928 | struct _cpuid4_info_regs id4_regs = {}; |
1150 | int retval; | ||
1151 | |||
1152 | retval = cpuid4_cache_sysfs_init(cpu); | ||
1153 | if (unlikely(retval < 0)) | ||
1154 | return retval; | ||
1155 | |||
1156 | retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu), | ||
1157 | &ktype_percpu_entry, | ||
1158 | &dev->kobj, "%s", "cache"); | ||
1159 | if (retval < 0) { | ||
1160 | cpuid4_cache_sysfs_exit(cpu); | ||
1161 | return retval; | ||
1162 | } | ||
1163 | 929 | ||
1164 | for (i = 0; i < num_cache_leaves; i++) { | 930 | for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) { |
1165 | this_object = INDEX_KOBJECT_PTR(cpu, i); | 931 | ret = cpuid4_cache_lookup_regs(idx, &id4_regs); |
1166 | this_object->cpu = cpu; | 932 | if (ret) |
1167 | this_object->index = i; | 933 | return ret; |
1168 | 934 | ci_leaf_init(this_leaf++, &id4_regs); | |
1169 | this_leaf = CPUID4_INFO_IDX(cpu, i); | 935 | __cache_cpumap_setup(cpu, idx, &id4_regs); |
1170 | |||
1171 | ktype_cache.default_attrs = default_attrs; | ||
1172 | #ifdef CONFIG_AMD_NB | ||
1173 | if (this_leaf->base.nb) | ||
1174 | ktype_cache.default_attrs = amd_l3_attrs(); | ||
1175 | #endif | ||
1176 | retval = kobject_init_and_add(&(this_object->kobj), | ||
1177 | &ktype_cache, | ||
1178 | per_cpu(ici_cache_kobject, cpu), | ||
1179 | "index%1lu", i); | ||
1180 | if (unlikely(retval)) { | ||
1181 | for (j = 0; j < i; j++) | ||
1182 | kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj)); | ||
1183 | kobject_put(per_cpu(ici_cache_kobject, cpu)); | ||
1184 | cpuid4_cache_sysfs_exit(cpu); | ||
1185 | return retval; | ||
1186 | } | ||
1187 | kobject_uevent(&(this_object->kobj), KOBJ_ADD); | ||
1188 | } | 936 | } |
1189 | cpumask_set_cpu(cpu, to_cpumask(cache_dev_map)); | ||
1190 | |||
1191 | kobject_uevent(per_cpu(ici_cache_kobject, cpu), KOBJ_ADD); | ||
1192 | return 0; | 937 | return 0; |
1193 | } | 938 | } |
1194 | 939 | ||
1195 | static void cache_remove_dev(struct device *dev) | 940 | DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level) |
1196 | { | 941 | DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves) |
1197 | unsigned int cpu = dev->id; | ||
1198 | unsigned long i; | ||
1199 | |||
1200 | if (per_cpu(ici_cpuid4_info, cpu) == NULL) | ||
1201 | return; | ||
1202 | if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map))) | ||
1203 | return; | ||
1204 | cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map)); | ||
1205 | |||
1206 | for (i = 0; i < num_cache_leaves; i++) | ||
1207 | kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj)); | ||
1208 | kobject_put(per_cpu(ici_cache_kobject, cpu)); | ||
1209 | cpuid4_cache_sysfs_exit(cpu); | ||
1210 | } | ||
1211 | |||
1212 | static int cacheinfo_cpu_callback(struct notifier_block *nfb, | ||
1213 | unsigned long action, void *hcpu) | ||
1214 | { | ||
1215 | unsigned int cpu = (unsigned long)hcpu; | ||
1216 | struct device *dev; | ||
1217 | |||
1218 | dev = get_cpu_device(cpu); | ||
1219 | switch (action) { | ||
1220 | case CPU_ONLINE: | ||
1221 | case CPU_ONLINE_FROZEN: | ||
1222 | cache_add_dev(dev); | ||
1223 | break; | ||
1224 | case CPU_DEAD: | ||
1225 | case CPU_DEAD_FROZEN: | ||
1226 | cache_remove_dev(dev); | ||
1227 | break; | ||
1228 | } | ||
1229 | return NOTIFY_OK; | ||
1230 | } | ||
1231 | |||
1232 | static struct notifier_block cacheinfo_cpu_notifier = { | ||
1233 | .notifier_call = cacheinfo_cpu_callback, | ||
1234 | }; | ||
1235 | |||
1236 | static int __init cache_sysfs_init(void) | ||
1237 | { | ||
1238 | int i, err = 0; | ||
1239 | |||
1240 | if (num_cache_leaves == 0) | ||
1241 | return 0; | ||
1242 | |||
1243 | cpu_notifier_register_begin(); | ||
1244 | for_each_online_cpu(i) { | ||
1245 | struct device *dev = get_cpu_device(i); | ||
1246 | |||
1247 | err = cache_add_dev(dev); | ||
1248 | if (err) | ||
1249 | goto out; | ||
1250 | } | ||
1251 | __register_hotcpu_notifier(&cacheinfo_cpu_notifier); | ||
1252 | |||
1253 | out: | ||
1254 | cpu_notifier_register_done(); | ||
1255 | return err; | ||
1256 | } | ||
1257 | |||
1258 | device_initcall(cache_sysfs_init); | ||
1259 | |||
1260 | #endif | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index 10b46906767f..fe32074b865b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h | |||
@@ -14,6 +14,7 @@ enum severity_level { | |||
14 | }; | 14 | }; |
15 | 15 | ||
16 | #define ATTR_LEN 16 | 16 | #define ATTR_LEN 16 |
17 | #define INITIAL_CHECK_INTERVAL 5 * 60 /* 5 minutes */ | ||
17 | 18 | ||
18 | /* One object for each MCE bank, shared by all CPUs */ | 19 | /* One object for each MCE bank, shared by all CPUs */ |
19 | struct mce_bank { | 20 | struct mce_bank { |
@@ -23,20 +24,20 @@ struct mce_bank { | |||
23 | char attrname[ATTR_LEN]; /* attribute name */ | 24 | char attrname[ATTR_LEN]; /* attribute name */ |
24 | }; | 25 | }; |
25 | 26 | ||
26 | int mce_severity(struct mce *a, int tolerant, char **msg, bool is_excp); | 27 | extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp); |
27 | struct dentry *mce_get_debugfs_dir(void); | 28 | struct dentry *mce_get_debugfs_dir(void); |
28 | 29 | ||
29 | extern struct mce_bank *mce_banks; | 30 | extern struct mce_bank *mce_banks; |
30 | extern mce_banks_t mce_banks_ce_disabled; | 31 | extern mce_banks_t mce_banks_ce_disabled; |
31 | 32 | ||
32 | #ifdef CONFIG_X86_MCE_INTEL | 33 | #ifdef CONFIG_X86_MCE_INTEL |
33 | unsigned long mce_intel_adjust_timer(unsigned long interval); | 34 | unsigned long cmci_intel_adjust_timer(unsigned long interval); |
34 | void mce_intel_cmci_poll(void); | 35 | bool mce_intel_cmci_poll(void); |
35 | void mce_intel_hcpu_update(unsigned long cpu); | 36 | void mce_intel_hcpu_update(unsigned long cpu); |
36 | void cmci_disable_bank(int bank); | 37 | void cmci_disable_bank(int bank); |
37 | #else | 38 | #else |
38 | # define mce_intel_adjust_timer mce_adjust_timer_default | 39 | # define cmci_intel_adjust_timer mce_adjust_timer_default |
39 | static inline void mce_intel_cmci_poll(void) { } | 40 | static inline bool mce_intel_cmci_poll(void) { return false; } |
40 | static inline void mce_intel_hcpu_update(unsigned long cpu) { } | 41 | static inline void mce_intel_hcpu_update(unsigned long cpu) { } |
41 | static inline void cmci_disable_bank(int bank) { } | 42 | static inline void cmci_disable_bank(int bank) { } |
42 | #endif | 43 | #endif |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index 8bb433043a7f..9c682c222071 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c | |||
@@ -186,7 +186,61 @@ static int error_context(struct mce *m) | |||
186 | return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL; | 186 | return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL; |
187 | } | 187 | } |
188 | 188 | ||
189 | int mce_severity(struct mce *m, int tolerant, char **msg, bool is_excp) | 189 | /* |
190 | * See AMD Error Scope Hierarchy table in a newer BKDG. For example | ||
191 | * 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features" | ||
192 | */ | ||
193 | static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_excp) | ||
194 | { | ||
195 | enum context ctx = error_context(m); | ||
196 | |||
197 | /* Processor Context Corrupt, no need to fumble too much, die! */ | ||
198 | if (m->status & MCI_STATUS_PCC) | ||
199 | return MCE_PANIC_SEVERITY; | ||
200 | |||
201 | if (m->status & MCI_STATUS_UC) { | ||
202 | |||
203 | /* | ||
204 | * On older systems where overflow_recov flag is not present, we | ||
205 | * should simply panic if an error overflow occurs. If | ||
206 | * overflow_recov flag is present and set, then software can try | ||
207 | * to at least kill process to prolong system operation. | ||
208 | */ | ||
209 | if (mce_flags.overflow_recov) { | ||
210 | /* software can try to contain */ | ||
211 | if (!(m->mcgstatus & MCG_STATUS_RIPV) && (ctx == IN_KERNEL)) | ||
212 | return MCE_PANIC_SEVERITY; | ||
213 | |||
214 | /* kill current process */ | ||
215 | return MCE_AR_SEVERITY; | ||
216 | } else { | ||
217 | /* at least one error was not logged */ | ||
218 | if (m->status & MCI_STATUS_OVER) | ||
219 | return MCE_PANIC_SEVERITY; | ||
220 | } | ||
221 | |||
222 | /* | ||
223 | * For any other case, return MCE_UC_SEVERITY so that we log the | ||
224 | * error and exit #MC handler. | ||
225 | */ | ||
226 | return MCE_UC_SEVERITY; | ||
227 | } | ||
228 | |||
229 | /* | ||
230 | * deferred error: poll handler catches these and adds to mce_ring so | ||
231 | * memory-failure can take recovery actions. | ||
232 | */ | ||
233 | if (m->status & MCI_STATUS_DEFERRED) | ||
234 | return MCE_DEFERRED_SEVERITY; | ||
235 | |||
236 | /* | ||
237 | * corrected error: poll handler catches these and passes responsibility | ||
238 | * of decoding the error to EDAC | ||
239 | */ | ||
240 | return MCE_KEEP_SEVERITY; | ||
241 | } | ||
242 | |||
243 | static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_excp) | ||
190 | { | 244 | { |
191 | enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP); | 245 | enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP); |
192 | enum context ctx = error_context(m); | 246 | enum context ctx = error_context(m); |
@@ -216,6 +270,16 @@ int mce_severity(struct mce *m, int tolerant, char **msg, bool is_excp) | |||
216 | } | 270 | } |
217 | } | 271 | } |
218 | 272 | ||
273 | /* Default to mce_severity_intel */ | ||
274 | int (*mce_severity)(struct mce *m, int tolerant, char **msg, bool is_excp) = | ||
275 | mce_severity_intel; | ||
276 | |||
277 | void __init mcheck_vendor_init_severity(void) | ||
278 | { | ||
279 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) | ||
280 | mce_severity = mce_severity_amd; | ||
281 | } | ||
282 | |||
219 | #ifdef CONFIG_DEBUG_FS | 283 | #ifdef CONFIG_DEBUG_FS |
220 | static void *s_start(struct seq_file *f, loff_t *pos) | 284 | static void *s_start(struct seq_file *f, loff_t *pos) |
221 | { | 285 | { |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 3c036cb4a370..e535533d5ab8 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -60,11 +60,12 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex); | |||
60 | #define CREATE_TRACE_POINTS | 60 | #define CREATE_TRACE_POINTS |
61 | #include <trace/events/mce.h> | 61 | #include <trace/events/mce.h> |
62 | 62 | ||
63 | #define SPINUNIT 100 /* 100ns */ | 63 | #define SPINUNIT 100 /* 100ns */ |
64 | 64 | ||
65 | DEFINE_PER_CPU(unsigned, mce_exception_count); | 65 | DEFINE_PER_CPU(unsigned, mce_exception_count); |
66 | 66 | ||
67 | struct mce_bank *mce_banks __read_mostly; | 67 | struct mce_bank *mce_banks __read_mostly; |
68 | struct mce_vendor_flags mce_flags __read_mostly; | ||
68 | 69 | ||
69 | struct mca_config mca_cfg __read_mostly = { | 70 | struct mca_config mca_cfg __read_mostly = { |
70 | .bootlog = -1, | 71 | .bootlog = -1, |
@@ -89,9 +90,6 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait); | |||
89 | static DEFINE_PER_CPU(struct mce, mces_seen); | 90 | static DEFINE_PER_CPU(struct mce, mces_seen); |
90 | static int cpu_missing; | 91 | static int cpu_missing; |
91 | 92 | ||
92 | /* CMCI storm detection filter */ | ||
93 | static DEFINE_PER_CPU(unsigned long, mce_polled_error); | ||
94 | |||
95 | /* | 93 | /* |
96 | * MCA banks polled by the period polling timer for corrected events. | 94 | * MCA banks polled by the period polling timer for corrected events. |
97 | * With Intel CMCI, this only has MCA banks which do not support CMCI (if any). | 95 | * With Intel CMCI, this only has MCA banks which do not support CMCI (if any). |
@@ -622,8 +620,9 @@ DEFINE_PER_CPU(unsigned, mce_poll_count); | |||
622 | * is already totally * confused. In this case it's likely it will | 620 | * is already totally * confused. In this case it's likely it will |
623 | * not fully execute the machine check handler either. | 621 | * not fully execute the machine check handler either. |
624 | */ | 622 | */ |
625 | void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | 623 | bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) |
626 | { | 624 | { |
625 | bool error_logged = false; | ||
627 | struct mce m; | 626 | struct mce m; |
628 | int severity; | 627 | int severity; |
629 | int i; | 628 | int i; |
@@ -646,7 +645,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
646 | if (!(m.status & MCI_STATUS_VAL)) | 645 | if (!(m.status & MCI_STATUS_VAL)) |
647 | continue; | 646 | continue; |
648 | 647 | ||
649 | this_cpu_write(mce_polled_error, 1); | 648 | |
650 | /* | 649 | /* |
651 | * Uncorrected or signalled events are handled by the exception | 650 | * Uncorrected or signalled events are handled by the exception |
652 | * handler when it is enabled, so don't process those here. | 651 | * handler when it is enabled, so don't process those here. |
@@ -679,8 +678,10 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
679 | * Don't get the IP here because it's unlikely to | 678 | * Don't get the IP here because it's unlikely to |
680 | * have anything to do with the actual error location. | 679 | * have anything to do with the actual error location. |
681 | */ | 680 | */ |
682 | if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce) | 681 | if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce) { |
682 | error_logged = true; | ||
683 | mce_log(&m); | 683 | mce_log(&m); |
684 | } | ||
684 | 685 | ||
685 | /* | 686 | /* |
686 | * Clear state for this bank. | 687 | * Clear state for this bank. |
@@ -694,6 +695,8 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
694 | */ | 695 | */ |
695 | 696 | ||
696 | sync_core(); | 697 | sync_core(); |
698 | |||
699 | return error_logged; | ||
697 | } | 700 | } |
698 | EXPORT_SYMBOL_GPL(machine_check_poll); | 701 | EXPORT_SYMBOL_GPL(machine_check_poll); |
699 | 702 | ||
@@ -813,7 +816,7 @@ static void mce_reign(void) | |||
813 | * other CPUs. | 816 | * other CPUs. |
814 | */ | 817 | */ |
815 | if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) | 818 | if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) |
816 | mce_panic("Fatal Machine check", m, msg); | 819 | mce_panic("Fatal machine check", m, msg); |
817 | 820 | ||
818 | /* | 821 | /* |
819 | * For UC somewhere we let the CPU who detects it handle it. | 822 | * For UC somewhere we let the CPU who detects it handle it. |
@@ -826,7 +829,7 @@ static void mce_reign(void) | |||
826 | * source or one CPU is hung. Panic. | 829 | * source or one CPU is hung. Panic. |
827 | */ | 830 | */ |
828 | if (global_worst <= MCE_KEEP_SEVERITY && mca_cfg.tolerant < 3) | 831 | if (global_worst <= MCE_KEEP_SEVERITY && mca_cfg.tolerant < 3) |
829 | mce_panic("Machine check from unknown source", NULL, NULL); | 832 | mce_panic("Fatal machine check from unknown source", NULL, NULL); |
830 | 833 | ||
831 | /* | 834 | /* |
832 | * Now clear all the mces_seen so that they don't reappear on | 835 | * Now clear all the mces_seen so that they don't reappear on |
@@ -1258,7 +1261,7 @@ void mce_log_therm_throt_event(__u64 status) | |||
1258 | * poller finds an MCE, poll 2x faster. When the poller finds no more | 1261 | * poller finds an MCE, poll 2x faster. When the poller finds no more |
1259 | * errors, poll 2x slower (up to check_interval seconds). | 1262 | * errors, poll 2x slower (up to check_interval seconds). |
1260 | */ | 1263 | */ |
1261 | static unsigned long check_interval = 5 * 60; /* 5 minutes */ | 1264 | static unsigned long check_interval = INITIAL_CHECK_INTERVAL; |
1262 | 1265 | ||
1263 | static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ | 1266 | static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ |
1264 | static DEFINE_PER_CPU(struct timer_list, mce_timer); | 1267 | static DEFINE_PER_CPU(struct timer_list, mce_timer); |
@@ -1268,49 +1271,57 @@ static unsigned long mce_adjust_timer_default(unsigned long interval) | |||
1268 | return interval; | 1271 | return interval; |
1269 | } | 1272 | } |
1270 | 1273 | ||
1271 | static unsigned long (*mce_adjust_timer)(unsigned long interval) = | 1274 | static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default; |
1272 | mce_adjust_timer_default; | ||
1273 | 1275 | ||
1274 | static int cmc_error_seen(void) | 1276 | static void __restart_timer(struct timer_list *t, unsigned long interval) |
1275 | { | 1277 | { |
1276 | unsigned long *v = this_cpu_ptr(&mce_polled_error); | 1278 | unsigned long when = jiffies + interval; |
1279 | unsigned long flags; | ||
1280 | |||
1281 | local_irq_save(flags); | ||
1277 | 1282 | ||
1278 | return test_and_clear_bit(0, v); | 1283 | if (timer_pending(t)) { |
1284 | if (time_before(when, t->expires)) | ||
1285 | mod_timer_pinned(t, when); | ||
1286 | } else { | ||
1287 | t->expires = round_jiffies(when); | ||
1288 | add_timer_on(t, smp_processor_id()); | ||
1289 | } | ||
1290 | |||
1291 | local_irq_restore(flags); | ||
1279 | } | 1292 | } |
1280 | 1293 | ||
1281 | static void mce_timer_fn(unsigned long data) | 1294 | static void mce_timer_fn(unsigned long data) |
1282 | { | 1295 | { |
1283 | struct timer_list *t = this_cpu_ptr(&mce_timer); | 1296 | struct timer_list *t = this_cpu_ptr(&mce_timer); |
1297 | int cpu = smp_processor_id(); | ||
1284 | unsigned long iv; | 1298 | unsigned long iv; |
1285 | int notify; | ||
1286 | 1299 | ||
1287 | WARN_ON(smp_processor_id() != data); | 1300 | WARN_ON(cpu != data); |
1301 | |||
1302 | iv = __this_cpu_read(mce_next_interval); | ||
1288 | 1303 | ||
1289 | if (mce_available(this_cpu_ptr(&cpu_info))) { | 1304 | if (mce_available(this_cpu_ptr(&cpu_info))) { |
1290 | machine_check_poll(MCP_TIMESTAMP, | 1305 | machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_poll_banks)); |
1291 | this_cpu_ptr(&mce_poll_banks)); | 1306 | |
1292 | mce_intel_cmci_poll(); | 1307 | if (mce_intel_cmci_poll()) { |
1308 | iv = mce_adjust_timer(iv); | ||
1309 | goto done; | ||
1310 | } | ||
1293 | } | 1311 | } |
1294 | 1312 | ||
1295 | /* | 1313 | /* |
1296 | * Alert userspace if needed. If we logged an MCE, reduce the | 1314 | * Alert userspace if needed. If we logged an MCE, reduce the polling |
1297 | * polling interval, otherwise increase the polling interval. | 1315 | * interval, otherwise increase the polling interval. |
1298 | */ | 1316 | */ |
1299 | iv = __this_cpu_read(mce_next_interval); | 1317 | if (mce_notify_irq()) |
1300 | notify = mce_notify_irq(); | ||
1301 | notify |= cmc_error_seen(); | ||
1302 | if (notify) { | ||
1303 | iv = max(iv / 2, (unsigned long) HZ/100); | 1318 | iv = max(iv / 2, (unsigned long) HZ/100); |
1304 | } else { | 1319 | else |
1305 | iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); | 1320 | iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); |
1306 | iv = mce_adjust_timer(iv); | 1321 | |
1307 | } | 1322 | done: |
1308 | __this_cpu_write(mce_next_interval, iv); | 1323 | __this_cpu_write(mce_next_interval, iv); |
1309 | /* Might have become 0 after CMCI storm subsided */ | 1324 | __restart_timer(t, iv); |
1310 | if (iv) { | ||
1311 | t->expires = jiffies + iv; | ||
1312 | add_timer_on(t, smp_processor_id()); | ||
1313 | } | ||
1314 | } | 1325 | } |
1315 | 1326 | ||
1316 | /* | 1327 | /* |
@@ -1319,16 +1330,10 @@ static void mce_timer_fn(unsigned long data) | |||
1319 | void mce_timer_kick(unsigned long interval) | 1330 | void mce_timer_kick(unsigned long interval) |
1320 | { | 1331 | { |
1321 | struct timer_list *t = this_cpu_ptr(&mce_timer); | 1332 | struct timer_list *t = this_cpu_ptr(&mce_timer); |
1322 | unsigned long when = jiffies + interval; | ||
1323 | unsigned long iv = __this_cpu_read(mce_next_interval); | 1333 | unsigned long iv = __this_cpu_read(mce_next_interval); |
1324 | 1334 | ||
1325 | if (timer_pending(t)) { | 1335 | __restart_timer(t, interval); |
1326 | if (time_before(when, t->expires)) | 1336 | |
1327 | mod_timer_pinned(t, when); | ||
1328 | } else { | ||
1329 | t->expires = round_jiffies(when); | ||
1330 | add_timer_on(t, smp_processor_id()); | ||
1331 | } | ||
1332 | if (interval < iv) | 1337 | if (interval < iv) |
1333 | __this_cpu_write(mce_next_interval, interval); | 1338 | __this_cpu_write(mce_next_interval, interval); |
1334 | } | 1339 | } |
@@ -1525,45 +1530,46 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) | |||
1525 | * Various K7s with broken bank 0 around. Always disable | 1530 | * Various K7s with broken bank 0 around. Always disable |
1526 | * by default. | 1531 | * by default. |
1527 | */ | 1532 | */ |
1528 | if (c->x86 == 6 && cfg->banks > 0) | 1533 | if (c->x86 == 6 && cfg->banks > 0) |
1529 | mce_banks[0].ctl = 0; | 1534 | mce_banks[0].ctl = 0; |
1530 | 1535 | ||
1531 | /* | 1536 | /* |
1532 | * Turn off MC4_MISC thresholding banks on those models since | 1537 | * overflow_recov is supported for F15h Models 00h-0fh |
1533 | * they're not supported there. | 1538 | * even though we don't have a CPUID bit for it. |
1534 | */ | 1539 | */ |
1535 | if (c->x86 == 0x15 && | 1540 | if (c->x86 == 0x15 && c->x86_model <= 0xf) |
1536 | (c->x86_model >= 0x10 && c->x86_model <= 0x1f)) { | 1541 | mce_flags.overflow_recov = 1; |
1537 | int i; | 1542 | |
1538 | u64 val, hwcr; | 1543 | /* |
1539 | bool need_toggle; | 1544 | * Turn off MC4_MISC thresholding banks on those models since |
1540 | u32 msrs[] = { | 1545 | * they're not supported there. |
1546 | */ | ||
1547 | if (c->x86 == 0x15 && | ||
1548 | (c->x86_model >= 0x10 && c->x86_model <= 0x1f)) { | ||
1549 | int i; | ||
1550 | u64 hwcr; | ||
1551 | bool need_toggle; | ||
1552 | u32 msrs[] = { | ||
1541 | 0x00000413, /* MC4_MISC0 */ | 1553 | 0x00000413, /* MC4_MISC0 */ |
1542 | 0xc0000408, /* MC4_MISC1 */ | 1554 | 0xc0000408, /* MC4_MISC1 */ |
1543 | }; | 1555 | }; |
1544 | 1556 | ||
1545 | rdmsrl(MSR_K7_HWCR, hwcr); | 1557 | rdmsrl(MSR_K7_HWCR, hwcr); |
1546 | 1558 | ||
1547 | /* McStatusWrEn has to be set */ | 1559 | /* McStatusWrEn has to be set */ |
1548 | need_toggle = !(hwcr & BIT(18)); | 1560 | need_toggle = !(hwcr & BIT(18)); |
1549 | 1561 | ||
1550 | if (need_toggle) | 1562 | if (need_toggle) |
1551 | wrmsrl(MSR_K7_HWCR, hwcr | BIT(18)); | 1563 | wrmsrl(MSR_K7_HWCR, hwcr | BIT(18)); |
1552 | 1564 | ||
1553 | for (i = 0; i < ARRAY_SIZE(msrs); i++) { | 1565 | /* Clear CntP bit safely */ |
1554 | rdmsrl(msrs[i], val); | 1566 | for (i = 0; i < ARRAY_SIZE(msrs); i++) |
1567 | msr_clear_bit(msrs[i], 62); | ||
1555 | 1568 | ||
1556 | /* CntP bit set? */ | 1569 | /* restore old settings */ |
1557 | if (val & BIT_64(62)) { | 1570 | if (need_toggle) |
1558 | val &= ~BIT_64(62); | 1571 | wrmsrl(MSR_K7_HWCR, hwcr); |
1559 | wrmsrl(msrs[i], val); | 1572 | } |
1560 | } | ||
1561 | } | ||
1562 | |||
1563 | /* restore old settings */ | ||
1564 | if (need_toggle) | ||
1565 | wrmsrl(MSR_K7_HWCR, hwcr); | ||
1566 | } | ||
1567 | } | 1573 | } |
1568 | 1574 | ||
1569 | if (c->x86_vendor == X86_VENDOR_INTEL) { | 1575 | if (c->x86_vendor == X86_VENDOR_INTEL) { |
@@ -1629,10 +1635,11 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) | |||
1629 | switch (c->x86_vendor) { | 1635 | switch (c->x86_vendor) { |
1630 | case X86_VENDOR_INTEL: | 1636 | case X86_VENDOR_INTEL: |
1631 | mce_intel_feature_init(c); | 1637 | mce_intel_feature_init(c); |
1632 | mce_adjust_timer = mce_intel_adjust_timer; | 1638 | mce_adjust_timer = cmci_intel_adjust_timer; |
1633 | break; | 1639 | break; |
1634 | case X86_VENDOR_AMD: | 1640 | case X86_VENDOR_AMD: |
1635 | mce_amd_feature_init(c); | 1641 | mce_amd_feature_init(c); |
1642 | mce_flags.overflow_recov = cpuid_ebx(0x80000007) & 0x1; | ||
1636 | break; | 1643 | break; |
1637 | default: | 1644 | default: |
1638 | break; | 1645 | break; |
@@ -2017,6 +2024,7 @@ __setup("mce", mcheck_enable); | |||
2017 | int __init mcheck_init(void) | 2024 | int __init mcheck_init(void) |
2018 | { | 2025 | { |
2019 | mcheck_intel_therm_init(); | 2026 | mcheck_intel_therm_init(); |
2027 | mcheck_vendor_init_severity(); | ||
2020 | 2028 | ||
2021 | return 0; | 2029 | return 0; |
2022 | } | 2030 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index f1c3769bbd64..55ad9b37cae8 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
@@ -79,7 +79,7 @@ static inline bool is_shared_bank(int bank) | |||
79 | return (bank == 4); | 79 | return (bank == 4); |
80 | } | 80 | } |
81 | 81 | ||
82 | static const char * const bank4_names(struct threshold_block *b) | 82 | static const char *bank4_names(const struct threshold_block *b) |
83 | { | 83 | { |
84 | switch (b->address) { | 84 | switch (b->address) { |
85 | /* MSR4_MISC0 */ | 85 | /* MSR4_MISC0 */ |
@@ -250,6 +250,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
250 | if (!b.interrupt_capable) | 250 | if (!b.interrupt_capable) |
251 | goto init; | 251 | goto init; |
252 | 252 | ||
253 | b.interrupt_enable = 1; | ||
253 | new = (high & MASK_LVTOFF_HI) >> 20; | 254 | new = (high & MASK_LVTOFF_HI) >> 20; |
254 | offset = setup_APIC_mce(offset, new); | 255 | offset = setup_APIC_mce(offset, new); |
255 | 256 | ||
@@ -322,6 +323,8 @@ static void amd_threshold_interrupt(void) | |||
322 | log: | 323 | log: |
323 | mce_setup(&m); | 324 | mce_setup(&m); |
324 | rdmsrl(MSR_IA32_MCx_STATUS(bank), m.status); | 325 | rdmsrl(MSR_IA32_MCx_STATUS(bank), m.status); |
326 | if (!(m.status & MCI_STATUS_VAL)) | ||
327 | return; | ||
325 | m.misc = ((u64)high << 32) | low; | 328 | m.misc = ((u64)high << 32) | low; |
326 | m.bank = bank; | 329 | m.bank = bank; |
327 | mce_log(&m); | 330 | mce_log(&m); |
@@ -497,10 +500,12 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, | |||
497 | b->interrupt_capable = lvt_interrupt_supported(bank, high); | 500 | b->interrupt_capable = lvt_interrupt_supported(bank, high); |
498 | b->threshold_limit = THRESHOLD_MAX; | 501 | b->threshold_limit = THRESHOLD_MAX; |
499 | 502 | ||
500 | if (b->interrupt_capable) | 503 | if (b->interrupt_capable) { |
501 | threshold_ktype.default_attrs[2] = &interrupt_enable.attr; | 504 | threshold_ktype.default_attrs[2] = &interrupt_enable.attr; |
502 | else | 505 | b->interrupt_enable = 1; |
506 | } else { | ||
503 | threshold_ktype.default_attrs[2] = NULL; | 507 | threshold_ktype.default_attrs[2] = NULL; |
508 | } | ||
504 | 509 | ||
505 | INIT_LIST_HEAD(&b->miscj); | 510 | INIT_LIST_HEAD(&b->miscj); |
506 | 511 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index b3c97bafc123..b4a41cf030ed 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c | |||
@@ -39,6 +39,15 @@ | |||
39 | static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); | 39 | static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); |
40 | 40 | ||
41 | /* | 41 | /* |
42 | * CMCI storm detection backoff counter | ||
43 | * | ||
44 | * During storm, we reset this counter to INITIAL_CHECK_INTERVAL in case we've | ||
45 | * encountered an error. If not, we decrement it by one. We signal the end of | ||
46 | * the CMCI storm when it reaches 0. | ||
47 | */ | ||
48 | static DEFINE_PER_CPU(int, cmci_backoff_cnt); | ||
49 | |||
50 | /* | ||
42 | * cmci_discover_lock protects against parallel discovery attempts | 51 | * cmci_discover_lock protects against parallel discovery attempts |
43 | * which could race against each other. | 52 | * which could race against each other. |
44 | */ | 53 | */ |
@@ -46,7 +55,7 @@ static DEFINE_RAW_SPINLOCK(cmci_discover_lock); | |||
46 | 55 | ||
47 | #define CMCI_THRESHOLD 1 | 56 | #define CMCI_THRESHOLD 1 |
48 | #define CMCI_POLL_INTERVAL (30 * HZ) | 57 | #define CMCI_POLL_INTERVAL (30 * HZ) |
49 | #define CMCI_STORM_INTERVAL (1 * HZ) | 58 | #define CMCI_STORM_INTERVAL (HZ) |
50 | #define CMCI_STORM_THRESHOLD 15 | 59 | #define CMCI_STORM_THRESHOLD 15 |
51 | 60 | ||
52 | static DEFINE_PER_CPU(unsigned long, cmci_time_stamp); | 61 | static DEFINE_PER_CPU(unsigned long, cmci_time_stamp); |
@@ -82,11 +91,21 @@ static int cmci_supported(int *banks) | |||
82 | return !!(cap & MCG_CMCI_P); | 91 | return !!(cap & MCG_CMCI_P); |
83 | } | 92 | } |
84 | 93 | ||
85 | void mce_intel_cmci_poll(void) | 94 | bool mce_intel_cmci_poll(void) |
86 | { | 95 | { |
87 | if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE) | 96 | if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE) |
88 | return; | 97 | return false; |
89 | machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); | 98 | |
99 | /* | ||
100 | * Reset the counter if we've logged an error in the last poll | ||
101 | * during the storm. | ||
102 | */ | ||
103 | if (machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned))) | ||
104 | this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL); | ||
105 | else | ||
106 | this_cpu_dec(cmci_backoff_cnt); | ||
107 | |||
108 | return true; | ||
90 | } | 109 | } |
91 | 110 | ||
92 | void mce_intel_hcpu_update(unsigned long cpu) | 111 | void mce_intel_hcpu_update(unsigned long cpu) |
@@ -97,31 +116,32 @@ void mce_intel_hcpu_update(unsigned long cpu) | |||
97 | per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE; | 116 | per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE; |
98 | } | 117 | } |
99 | 118 | ||
100 | unsigned long mce_intel_adjust_timer(unsigned long interval) | 119 | unsigned long cmci_intel_adjust_timer(unsigned long interval) |
101 | { | 120 | { |
102 | int r; | 121 | if ((this_cpu_read(cmci_backoff_cnt) > 0) && |
103 | 122 | (__this_cpu_read(cmci_storm_state) == CMCI_STORM_ACTIVE)) { | |
104 | if (interval < CMCI_POLL_INTERVAL) | 123 | mce_notify_irq(); |
105 | return interval; | 124 | return CMCI_STORM_INTERVAL; |
125 | } | ||
106 | 126 | ||
107 | switch (__this_cpu_read(cmci_storm_state)) { | 127 | switch (__this_cpu_read(cmci_storm_state)) { |
108 | case CMCI_STORM_ACTIVE: | 128 | case CMCI_STORM_ACTIVE: |
129 | |||
109 | /* | 130 | /* |
110 | * We switch back to interrupt mode once the poll timer has | 131 | * We switch back to interrupt mode once the poll timer has |
111 | * silenced itself. That means no events recorded and the | 132 | * silenced itself. That means no events recorded and the timer |
112 | * timer interval is back to our poll interval. | 133 | * interval is back to our poll interval. |
113 | */ | 134 | */ |
114 | __this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED); | 135 | __this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED); |
115 | r = atomic_sub_return(1, &cmci_storm_on_cpus); | 136 | if (!atomic_sub_return(1, &cmci_storm_on_cpus)) |
116 | if (r == 0) | ||
117 | pr_notice("CMCI storm subsided: switching to interrupt mode\n"); | 137 | pr_notice("CMCI storm subsided: switching to interrupt mode\n"); |
138 | |||
118 | /* FALLTHROUGH */ | 139 | /* FALLTHROUGH */ |
119 | 140 | ||
120 | case CMCI_STORM_SUBSIDED: | 141 | case CMCI_STORM_SUBSIDED: |
121 | /* | 142 | /* |
122 | * We wait for all cpus to go back to SUBSIDED | 143 | * We wait for all CPUs to go back to SUBSIDED state. When that |
123 | * state. When that happens we switch back to | 144 | * happens we switch back to interrupt mode. |
124 | * interrupt mode. | ||
125 | */ | 145 | */ |
126 | if (!atomic_read(&cmci_storm_on_cpus)) { | 146 | if (!atomic_read(&cmci_storm_on_cpus)) { |
127 | __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE); | 147 | __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE); |
@@ -130,10 +150,8 @@ unsigned long mce_intel_adjust_timer(unsigned long interval) | |||
130 | } | 150 | } |
131 | return CMCI_POLL_INTERVAL; | 151 | return CMCI_POLL_INTERVAL; |
132 | default: | 152 | default: |
133 | /* | 153 | |
134 | * We have shiny weather. Let the poll do whatever it | 154 | /* We have shiny weather. Let the poll do whatever it thinks. */ |
135 | * thinks. | ||
136 | */ | ||
137 | return interval; | 155 | return interval; |
138 | } | 156 | } |
139 | } | 157 | } |
@@ -178,7 +196,8 @@ static bool cmci_storm_detect(void) | |||
178 | cmci_storm_disable_banks(); | 196 | cmci_storm_disable_banks(); |
179 | __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE); | 197 | __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE); |
180 | r = atomic_add_return(1, &cmci_storm_on_cpus); | 198 | r = atomic_add_return(1, &cmci_storm_on_cpus); |
181 | mce_timer_kick(CMCI_POLL_INTERVAL); | 199 | mce_timer_kick(CMCI_STORM_INTERVAL); |
200 | this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL); | ||
182 | 201 | ||
183 | if (r == 1) | 202 | if (r == 1) |
184 | pr_notice("CMCI storm detected: switching to poll mode\n"); | 203 | pr_notice("CMCI storm detected: switching to poll mode\n"); |
@@ -195,6 +214,7 @@ static void intel_threshold_interrupt(void) | |||
195 | { | 214 | { |
196 | if (cmci_storm_detect()) | 215 | if (cmci_storm_detect()) |
197 | return; | 216 | return; |
217 | |||
198 | machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); | 218 | machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); |
199 | mce_notify_irq(); | 219 | mce_notify_irq(); |
200 | } | 220 | } |
@@ -286,6 +306,7 @@ void cmci_recheck(void) | |||
286 | 306 | ||
287 | if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks)) | 307 | if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks)) |
288 | return; | 308 | return; |
309 | |||
289 | local_irq_save(flags); | 310 | local_irq_save(flags); |
290 | machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); | 311 | machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); |
291 | local_irq_restore(flags); | 312 | local_irq_restore(flags); |
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index bfbbe6195e2d..12829c3ced3c 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c | |||
@@ -21,7 +21,6 @@ | |||
21 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | 21 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
22 | 22 | ||
23 | #include <linux/firmware.h> | 23 | #include <linux/firmware.h> |
24 | #include <linux/pci_ids.h> | ||
25 | #include <linux/uaccess.h> | 24 | #include <linux/uaccess.h> |
26 | #include <linux/vmalloc.h> | 25 | #include <linux/vmalloc.h> |
27 | #include <linux/kernel.h> | 26 | #include <linux/kernel.h> |
diff --git a/arch/x86/kernel/cpu/microcode/core_early.c b/arch/x86/kernel/cpu/microcode/core_early.c index d45df4bd16ab..a413a69cbd74 100644 --- a/arch/x86/kernel/cpu/microcode/core_early.c +++ b/arch/x86/kernel/cpu/microcode/core_early.c | |||
@@ -23,57 +23,6 @@ | |||
23 | #include <asm/processor.h> | 23 | #include <asm/processor.h> |
24 | #include <asm/cmdline.h> | 24 | #include <asm/cmdline.h> |
25 | 25 | ||
26 | #define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24)) | ||
27 | #define CPUID_INTEL1 QCHAR('G', 'e', 'n', 'u') | ||
28 | #define CPUID_INTEL2 QCHAR('i', 'n', 'e', 'I') | ||
29 | #define CPUID_INTEL3 QCHAR('n', 't', 'e', 'l') | ||
30 | #define CPUID_AMD1 QCHAR('A', 'u', 't', 'h') | ||
31 | #define CPUID_AMD2 QCHAR('e', 'n', 't', 'i') | ||
32 | #define CPUID_AMD3 QCHAR('c', 'A', 'M', 'D') | ||
33 | |||
34 | #define CPUID_IS(a, b, c, ebx, ecx, edx) \ | ||
35 | (!((ebx ^ (a))|(edx ^ (b))|(ecx ^ (c)))) | ||
36 | |||
37 | /* | ||
38 | * In early loading microcode phase on BSP, boot_cpu_data is not set up yet. | ||
39 | * x86_vendor() gets vendor id for BSP. | ||
40 | * | ||
41 | * In 32 bit AP case, accessing boot_cpu_data needs linear address. To simplify | ||
42 | * coding, we still use x86_vendor() to get vendor id for AP. | ||
43 | * | ||
44 | * x86_vendor() gets vendor information directly through cpuid. | ||
45 | */ | ||
46 | static int x86_vendor(void) | ||
47 | { | ||
48 | u32 eax = 0x00000000; | ||
49 | u32 ebx, ecx = 0, edx; | ||
50 | |||
51 | native_cpuid(&eax, &ebx, &ecx, &edx); | ||
52 | |||
53 | if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx)) | ||
54 | return X86_VENDOR_INTEL; | ||
55 | |||
56 | if (CPUID_IS(CPUID_AMD1, CPUID_AMD2, CPUID_AMD3, ebx, ecx, edx)) | ||
57 | return X86_VENDOR_AMD; | ||
58 | |||
59 | return X86_VENDOR_UNKNOWN; | ||
60 | } | ||
61 | |||
62 | static int x86_family(void) | ||
63 | { | ||
64 | u32 eax = 0x00000001; | ||
65 | u32 ebx, ecx = 0, edx; | ||
66 | int x86; | ||
67 | |||
68 | native_cpuid(&eax, &ebx, &ecx, &edx); | ||
69 | |||
70 | x86 = (eax >> 8) & 0xf; | ||
71 | if (x86 == 15) | ||
72 | x86 += (eax >> 20) & 0xff; | ||
73 | |||
74 | return x86; | ||
75 | } | ||
76 | |||
77 | static bool __init check_loader_disabled_bsp(void) | 26 | static bool __init check_loader_disabled_bsp(void) |
78 | { | 27 | { |
79 | #ifdef CONFIG_X86_32 | 28 | #ifdef CONFIG_X86_32 |
@@ -96,7 +45,7 @@ static bool __init check_loader_disabled_bsp(void) | |||
96 | 45 | ||
97 | void __init load_ucode_bsp(void) | 46 | void __init load_ucode_bsp(void) |
98 | { | 47 | { |
99 | int vendor, x86; | 48 | int vendor, family; |
100 | 49 | ||
101 | if (check_loader_disabled_bsp()) | 50 | if (check_loader_disabled_bsp()) |
102 | return; | 51 | return; |
@@ -105,15 +54,15 @@ void __init load_ucode_bsp(void) | |||
105 | return; | 54 | return; |
106 | 55 | ||
107 | vendor = x86_vendor(); | 56 | vendor = x86_vendor(); |
108 | x86 = x86_family(); | 57 | family = x86_family(); |
109 | 58 | ||
110 | switch (vendor) { | 59 | switch (vendor) { |
111 | case X86_VENDOR_INTEL: | 60 | case X86_VENDOR_INTEL: |
112 | if (x86 >= 6) | 61 | if (family >= 6) |
113 | load_ucode_intel_bsp(); | 62 | load_ucode_intel_bsp(); |
114 | break; | 63 | break; |
115 | case X86_VENDOR_AMD: | 64 | case X86_VENDOR_AMD: |
116 | if (x86 >= 0x10) | 65 | if (family >= 0x10) |
117 | load_ucode_amd_bsp(); | 66 | load_ucode_amd_bsp(); |
118 | break; | 67 | break; |
119 | default: | 68 | default: |
@@ -132,7 +81,7 @@ static bool check_loader_disabled_ap(void) | |||
132 | 81 | ||
133 | void load_ucode_ap(void) | 82 | void load_ucode_ap(void) |
134 | { | 83 | { |
135 | int vendor, x86; | 84 | int vendor, family; |
136 | 85 | ||
137 | if (check_loader_disabled_ap()) | 86 | if (check_loader_disabled_ap()) |
138 | return; | 87 | return; |
@@ -141,15 +90,15 @@ void load_ucode_ap(void) | |||
141 | return; | 90 | return; |
142 | 91 | ||
143 | vendor = x86_vendor(); | 92 | vendor = x86_vendor(); |
144 | x86 = x86_family(); | 93 | family = x86_family(); |
145 | 94 | ||
146 | switch (vendor) { | 95 | switch (vendor) { |
147 | case X86_VENDOR_INTEL: | 96 | case X86_VENDOR_INTEL: |
148 | if (x86 >= 6) | 97 | if (family >= 6) |
149 | load_ucode_intel_ap(); | 98 | load_ucode_intel_ap(); |
150 | break; | 99 | break; |
151 | case X86_VENDOR_AMD: | 100 | case X86_VENDOR_AMD: |
152 | if (x86 >= 0x10) | 101 | if (family >= 0x10) |
153 | load_ucode_amd_ap(); | 102 | load_ucode_amd_ap(); |
154 | break; | 103 | break; |
155 | default: | 104 | default: |
@@ -179,18 +128,18 @@ int __init save_microcode_in_initrd(void) | |||
179 | 128 | ||
180 | void reload_early_microcode(void) | 129 | void reload_early_microcode(void) |
181 | { | 130 | { |
182 | int vendor, x86; | 131 | int vendor, family; |
183 | 132 | ||
184 | vendor = x86_vendor(); | 133 | vendor = x86_vendor(); |
185 | x86 = x86_family(); | 134 | family = x86_family(); |
186 | 135 | ||
187 | switch (vendor) { | 136 | switch (vendor) { |
188 | case X86_VENDOR_INTEL: | 137 | case X86_VENDOR_INTEL: |
189 | if (x86 >= 6) | 138 | if (family >= 6) |
190 | reload_ucode_intel(); | 139 | reload_ucode_intel(); |
191 | break; | 140 | break; |
192 | case X86_VENDOR_AMD: | 141 | case X86_VENDOR_AMD: |
193 | if (x86 >= 0x10) | 142 | if (family >= 0x10) |
194 | reload_ucode_amd(); | 143 | reload_ucode_amd(); |
195 | break; | 144 | break; |
196 | default: | 145 | default: |
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 746e7fd08aad..a41beadb3db9 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c | |||
@@ -124,7 +124,7 @@ static int get_matching_mc(struct microcode_intel *mc_intel, int cpu) | |||
124 | cpf = cpu_sig.pf; | 124 | cpf = cpu_sig.pf; |
125 | crev = cpu_sig.rev; | 125 | crev = cpu_sig.rev; |
126 | 126 | ||
127 | return get_matching_microcode(csig, cpf, mc_intel, crev); | 127 | return get_matching_microcode(csig, cpf, crev, mc_intel); |
128 | } | 128 | } |
129 | 129 | ||
130 | static int apply_microcode_intel(int cpu) | 130 | static int apply_microcode_intel(int cpu) |
@@ -226,7 +226,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, | |||
226 | 226 | ||
227 | csig = uci->cpu_sig.sig; | 227 | csig = uci->cpu_sig.sig; |
228 | cpf = uci->cpu_sig.pf; | 228 | cpf = uci->cpu_sig.pf; |
229 | if (get_matching_microcode(csig, cpf, mc, new_rev)) { | 229 | if (get_matching_microcode(csig, cpf, new_rev, mc)) { |
230 | vfree(new_mc); | 230 | vfree(new_mc); |
231 | new_rev = mc_header.rev; | 231 | new_rev = mc_header.rev; |
232 | new_mc = mc; | 232 | new_mc = mc; |
diff --git a/arch/x86/kernel/cpu/microcode/intel_early.c b/arch/x86/kernel/cpu/microcode/intel_early.c index 420eb933189c..2f49ab4ac0ae 100644 --- a/arch/x86/kernel/cpu/microcode/intel_early.c +++ b/arch/x86/kernel/cpu/microcode/intel_early.c | |||
@@ -16,6 +16,14 @@ | |||
16 | * as published by the Free Software Foundation; either version | 16 | * as published by the Free Software Foundation; either version |
17 | * 2 of the License, or (at your option) any later version. | 17 | * 2 of the License, or (at your option) any later version. |
18 | */ | 18 | */ |
19 | |||
20 | /* | ||
21 | * This needs to be before all headers so that pr_debug in printk.h doesn't turn | ||
22 | * printk calls into no_printk(). | ||
23 | * | ||
24 | *#define DEBUG | ||
25 | */ | ||
26 | |||
19 | #include <linux/module.h> | 27 | #include <linux/module.h> |
20 | #include <linux/mm.h> | 28 | #include <linux/mm.h> |
21 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
@@ -28,6 +36,9 @@ | |||
28 | #include <asm/tlbflush.h> | 36 | #include <asm/tlbflush.h> |
29 | #include <asm/setup.h> | 37 | #include <asm/setup.h> |
30 | 38 | ||
39 | #undef pr_fmt | ||
40 | #define pr_fmt(fmt) "microcode: " fmt | ||
41 | |||
31 | static unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT]; | 42 | static unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT]; |
32 | static struct mc_saved_data { | 43 | static struct mc_saved_data { |
33 | unsigned int mc_saved_count; | 44 | unsigned int mc_saved_count; |
@@ -35,50 +46,45 @@ static struct mc_saved_data { | |||
35 | } mc_saved_data; | 46 | } mc_saved_data; |
36 | 47 | ||
37 | static enum ucode_state | 48 | static enum ucode_state |
38 | generic_load_microcode_early(struct microcode_intel **mc_saved_p, | 49 | load_microcode_early(struct microcode_intel **saved, |
39 | unsigned int mc_saved_count, | 50 | unsigned int num_saved, struct ucode_cpu_info *uci) |
40 | struct ucode_cpu_info *uci) | ||
41 | { | 51 | { |
42 | struct microcode_intel *ucode_ptr, *new_mc = NULL; | 52 | struct microcode_intel *ucode_ptr, *new_mc = NULL; |
43 | int new_rev = uci->cpu_sig.rev; | 53 | struct microcode_header_intel *mc_hdr; |
44 | enum ucode_state state = UCODE_OK; | 54 | int new_rev, ret, i; |
45 | unsigned int mc_size; | ||
46 | struct microcode_header_intel *mc_header; | ||
47 | unsigned int csig = uci->cpu_sig.sig; | ||
48 | unsigned int cpf = uci->cpu_sig.pf; | ||
49 | int i; | ||
50 | 55 | ||
51 | for (i = 0; i < mc_saved_count; i++) { | 56 | new_rev = uci->cpu_sig.rev; |
52 | ucode_ptr = mc_saved_p[i]; | ||
53 | 57 | ||
54 | mc_header = (struct microcode_header_intel *)ucode_ptr; | 58 | for (i = 0; i < num_saved; i++) { |
55 | mc_size = get_totalsize(mc_header); | 59 | ucode_ptr = saved[i]; |
56 | if (get_matching_microcode(csig, cpf, ucode_ptr, new_rev)) { | 60 | mc_hdr = (struct microcode_header_intel *)ucode_ptr; |
57 | new_rev = mc_header->rev; | ||
58 | new_mc = ucode_ptr; | ||
59 | } | ||
60 | } | ||
61 | 61 | ||
62 | if (!new_mc) { | 62 | ret = get_matching_microcode(uci->cpu_sig.sig, |
63 | state = UCODE_NFOUND; | 63 | uci->cpu_sig.pf, |
64 | goto out; | 64 | new_rev, |
65 | ucode_ptr); | ||
66 | if (!ret) | ||
67 | continue; | ||
68 | |||
69 | new_rev = mc_hdr->rev; | ||
70 | new_mc = ucode_ptr; | ||
65 | } | 71 | } |
66 | 72 | ||
73 | if (!new_mc) | ||
74 | return UCODE_NFOUND; | ||
75 | |||
67 | uci->mc = (struct microcode_intel *)new_mc; | 76 | uci->mc = (struct microcode_intel *)new_mc; |
68 | out: | 77 | return UCODE_OK; |
69 | return state; | ||
70 | } | 78 | } |
71 | 79 | ||
72 | static void | 80 | static inline void |
73 | microcode_pointer(struct microcode_intel **mc_saved, | 81 | copy_initrd_ptrs(struct microcode_intel **mc_saved, unsigned long *initrd, |
74 | unsigned long *mc_saved_in_initrd, | 82 | unsigned long off, int num_saved) |
75 | unsigned long initrd_start, int mc_saved_count) | ||
76 | { | 83 | { |
77 | int i; | 84 | int i; |
78 | 85 | ||
79 | for (i = 0; i < mc_saved_count; i++) | 86 | for (i = 0; i < num_saved; i++) |
80 | mc_saved[i] = (struct microcode_intel *) | 87 | mc_saved[i] = (struct microcode_intel *)(initrd[i] + off); |
81 | (mc_saved_in_initrd[i] + initrd_start); | ||
82 | } | 88 | } |
83 | 89 | ||
84 | #ifdef CONFIG_X86_32 | 90 | #ifdef CONFIG_X86_32 |
@@ -102,55 +108,27 @@ microcode_phys(struct microcode_intel **mc_saved_tmp, | |||
102 | #endif | 108 | #endif |
103 | 109 | ||
104 | static enum ucode_state | 110 | static enum ucode_state |
105 | load_microcode(struct mc_saved_data *mc_saved_data, | 111 | load_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd, |
106 | unsigned long *mc_saved_in_initrd, | 112 | unsigned long initrd_start, struct ucode_cpu_info *uci) |
107 | unsigned long initrd_start, | ||
108 | struct ucode_cpu_info *uci) | ||
109 | { | 113 | { |
110 | struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; | 114 | struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; |
111 | unsigned int count = mc_saved_data->mc_saved_count; | 115 | unsigned int count = mc_saved_data->mc_saved_count; |
112 | 116 | ||
113 | if (!mc_saved_data->mc_saved) { | 117 | if (!mc_saved_data->mc_saved) { |
114 | microcode_pointer(mc_saved_tmp, mc_saved_in_initrd, | 118 | copy_initrd_ptrs(mc_saved_tmp, initrd, initrd_start, count); |
115 | initrd_start, count); | ||
116 | 119 | ||
117 | return generic_load_microcode_early(mc_saved_tmp, count, uci); | 120 | return load_microcode_early(mc_saved_tmp, count, uci); |
118 | } else { | 121 | } else { |
119 | #ifdef CONFIG_X86_32 | 122 | #ifdef CONFIG_X86_32 |
120 | microcode_phys(mc_saved_tmp, mc_saved_data); | 123 | microcode_phys(mc_saved_tmp, mc_saved_data); |
121 | return generic_load_microcode_early(mc_saved_tmp, count, uci); | 124 | return load_microcode_early(mc_saved_tmp, count, uci); |
122 | #else | 125 | #else |
123 | return generic_load_microcode_early(mc_saved_data->mc_saved, | 126 | return load_microcode_early(mc_saved_data->mc_saved, |
124 | count, uci); | 127 | count, uci); |
125 | #endif | 128 | #endif |
126 | } | 129 | } |
127 | } | 130 | } |
128 | 131 | ||
129 | static u8 get_x86_family(unsigned long sig) | ||
130 | { | ||
131 | u8 x86; | ||
132 | |||
133 | x86 = (sig >> 8) & 0xf; | ||
134 | |||
135 | if (x86 == 0xf) | ||
136 | x86 += (sig >> 20) & 0xff; | ||
137 | |||
138 | return x86; | ||
139 | } | ||
140 | |||
141 | static u8 get_x86_model(unsigned long sig) | ||
142 | { | ||
143 | u8 x86, x86_model; | ||
144 | |||
145 | x86 = get_x86_family(sig); | ||
146 | x86_model = (sig >> 4) & 0xf; | ||
147 | |||
148 | if (x86 == 0x6 || x86 == 0xf) | ||
149 | x86_model += ((sig >> 16) & 0xf) << 4; | ||
150 | |||
151 | return x86_model; | ||
152 | } | ||
153 | |||
154 | /* | 132 | /* |
155 | * Given CPU signature and a microcode patch, this function finds if the | 133 | * Given CPU signature and a microcode patch, this function finds if the |
156 | * microcode patch has matching family and model with the CPU. | 134 | * microcode patch has matching family and model with the CPU. |
@@ -159,42 +137,40 @@ static enum ucode_state | |||
159 | matching_model_microcode(struct microcode_header_intel *mc_header, | 137 | matching_model_microcode(struct microcode_header_intel *mc_header, |
160 | unsigned long sig) | 138 | unsigned long sig) |
161 | { | 139 | { |
162 | u8 x86, x86_model; | 140 | unsigned int fam, model; |
163 | u8 x86_ucode, x86_model_ucode; | 141 | unsigned int fam_ucode, model_ucode; |
164 | struct extended_sigtable *ext_header; | 142 | struct extended_sigtable *ext_header; |
165 | unsigned long total_size = get_totalsize(mc_header); | 143 | unsigned long total_size = get_totalsize(mc_header); |
166 | unsigned long data_size = get_datasize(mc_header); | 144 | unsigned long data_size = get_datasize(mc_header); |
167 | int ext_sigcount, i; | 145 | int ext_sigcount, i; |
168 | struct extended_signature *ext_sig; | 146 | struct extended_signature *ext_sig; |
169 | 147 | ||
170 | x86 = get_x86_family(sig); | 148 | fam = __x86_family(sig); |
171 | x86_model = get_x86_model(sig); | 149 | model = x86_model(sig); |
172 | 150 | ||
173 | x86_ucode = get_x86_family(mc_header->sig); | 151 | fam_ucode = __x86_family(mc_header->sig); |
174 | x86_model_ucode = get_x86_model(mc_header->sig); | 152 | model_ucode = x86_model(mc_header->sig); |
175 | 153 | ||
176 | if (x86 == x86_ucode && x86_model == x86_model_ucode) | 154 | if (fam == fam_ucode && model == model_ucode) |
177 | return UCODE_OK; | 155 | return UCODE_OK; |
178 | 156 | ||
179 | /* Look for ext. headers: */ | 157 | /* Look for ext. headers: */ |
180 | if (total_size <= data_size + MC_HEADER_SIZE) | 158 | if (total_size <= data_size + MC_HEADER_SIZE) |
181 | return UCODE_NFOUND; | 159 | return UCODE_NFOUND; |
182 | 160 | ||
183 | ext_header = (struct extended_sigtable *) | 161 | ext_header = (void *) mc_header + data_size + MC_HEADER_SIZE; |
184 | mc_header + data_size + MC_HEADER_SIZE; | 162 | ext_sig = (void *)ext_header + EXT_HEADER_SIZE; |
185 | ext_sigcount = ext_header->count; | 163 | ext_sigcount = ext_header->count; |
186 | ext_sig = (void *)ext_header + EXT_HEADER_SIZE; | ||
187 | 164 | ||
188 | for (i = 0; i < ext_sigcount; i++) { | 165 | for (i = 0; i < ext_sigcount; i++) { |
189 | x86_ucode = get_x86_family(ext_sig->sig); | 166 | fam_ucode = __x86_family(ext_sig->sig); |
190 | x86_model_ucode = get_x86_model(ext_sig->sig); | 167 | model_ucode = x86_model(ext_sig->sig); |
191 | 168 | ||
192 | if (x86 == x86_ucode && x86_model == x86_model_ucode) | 169 | if (fam == fam_ucode && model == model_ucode) |
193 | return UCODE_OK; | 170 | return UCODE_OK; |
194 | 171 | ||
195 | ext_sig++; | 172 | ext_sig++; |
196 | } | 173 | } |
197 | |||
198 | return UCODE_NFOUND; | 174 | return UCODE_NFOUND; |
199 | } | 175 | } |
200 | 176 | ||
@@ -204,7 +180,7 @@ save_microcode(struct mc_saved_data *mc_saved_data, | |||
204 | unsigned int mc_saved_count) | 180 | unsigned int mc_saved_count) |
205 | { | 181 | { |
206 | int i, j; | 182 | int i, j; |
207 | struct microcode_intel **mc_saved_p; | 183 | struct microcode_intel **saved_ptr; |
208 | int ret; | 184 | int ret; |
209 | 185 | ||
210 | if (!mc_saved_count) | 186 | if (!mc_saved_count) |
@@ -213,39 +189,45 @@ save_microcode(struct mc_saved_data *mc_saved_data, | |||
213 | /* | 189 | /* |
214 | * Copy new microcode data. | 190 | * Copy new microcode data. |
215 | */ | 191 | */ |
216 | mc_saved_p = kmalloc(mc_saved_count*sizeof(struct microcode_intel *), | 192 | saved_ptr = kcalloc(mc_saved_count, sizeof(struct microcode_intel *), GFP_KERNEL); |
217 | GFP_KERNEL); | 193 | if (!saved_ptr) |
218 | if (!mc_saved_p) | ||
219 | return -ENOMEM; | 194 | return -ENOMEM; |
220 | 195 | ||
221 | for (i = 0; i < mc_saved_count; i++) { | 196 | for (i = 0; i < mc_saved_count; i++) { |
222 | struct microcode_intel *mc = mc_saved_src[i]; | 197 | struct microcode_header_intel *mc_hdr; |
223 | struct microcode_header_intel *mc_header = &mc->hdr; | 198 | struct microcode_intel *mc; |
224 | unsigned long mc_size = get_totalsize(mc_header); | 199 | unsigned long size; |
225 | mc_saved_p[i] = kmalloc(mc_size, GFP_KERNEL); | 200 | |
226 | if (!mc_saved_p[i]) { | ||
227 | ret = -ENOMEM; | ||
228 | goto err; | ||
229 | } | ||
230 | if (!mc_saved_src[i]) { | 201 | if (!mc_saved_src[i]) { |
231 | ret = -EINVAL; | 202 | ret = -EINVAL; |
232 | goto err; | 203 | goto err; |
233 | } | 204 | } |
234 | memcpy(mc_saved_p[i], mc, mc_size); | 205 | |
206 | mc = mc_saved_src[i]; | ||
207 | mc_hdr = &mc->hdr; | ||
208 | size = get_totalsize(mc_hdr); | ||
209 | |||
210 | saved_ptr[i] = kmalloc(size, GFP_KERNEL); | ||
211 | if (!saved_ptr[i]) { | ||
212 | ret = -ENOMEM; | ||
213 | goto err; | ||
214 | } | ||
215 | |||
216 | memcpy(saved_ptr[i], mc, size); | ||
235 | } | 217 | } |
236 | 218 | ||
237 | /* | 219 | /* |
238 | * Point to newly saved microcode. | 220 | * Point to newly saved microcode. |
239 | */ | 221 | */ |
240 | mc_saved_data->mc_saved = mc_saved_p; | 222 | mc_saved_data->mc_saved = saved_ptr; |
241 | mc_saved_data->mc_saved_count = mc_saved_count; | 223 | mc_saved_data->mc_saved_count = mc_saved_count; |
242 | 224 | ||
243 | return 0; | 225 | return 0; |
244 | 226 | ||
245 | err: | 227 | err: |
246 | for (j = 0; j <= i; j++) | 228 | for (j = 0; j <= i; j++) |
247 | kfree(mc_saved_p[j]); | 229 | kfree(saved_ptr[j]); |
248 | kfree(mc_saved_p); | 230 | kfree(saved_ptr); |
249 | 231 | ||
250 | return ret; | 232 | return ret; |
251 | } | 233 | } |
@@ -257,48 +239,45 @@ err: | |||
257 | * - or if it is a newly discovered microcode patch. | 239 | * - or if it is a newly discovered microcode patch. |
258 | * | 240 | * |
259 | * The microcode patch should have matching model with CPU. | 241 | * The microcode patch should have matching model with CPU. |
242 | * | ||
243 | * Returns: The updated number @num_saved of saved microcode patches. | ||
260 | */ | 244 | */ |
261 | static void _save_mc(struct microcode_intel **mc_saved, u8 *ucode_ptr, | 245 | static unsigned int _save_mc(struct microcode_intel **mc_saved, |
262 | unsigned int *mc_saved_count_p) | 246 | u8 *ucode_ptr, unsigned int num_saved) |
263 | { | 247 | { |
264 | int i; | 248 | struct microcode_header_intel *mc_hdr, *mc_saved_hdr; |
265 | int found = 0; | 249 | unsigned int sig, pf, new_rev; |
266 | unsigned int mc_saved_count = *mc_saved_count_p; | 250 | int found = 0, i; |
267 | struct microcode_header_intel *mc_header; | 251 | |
252 | mc_hdr = (struct microcode_header_intel *)ucode_ptr; | ||
253 | |||
254 | for (i = 0; i < num_saved; i++) { | ||
255 | mc_saved_hdr = (struct microcode_header_intel *)mc_saved[i]; | ||
256 | sig = mc_saved_hdr->sig; | ||
257 | pf = mc_saved_hdr->pf; | ||
258 | new_rev = mc_hdr->rev; | ||
259 | |||
260 | if (!get_matching_sig(sig, pf, new_rev, ucode_ptr)) | ||
261 | continue; | ||
262 | |||
263 | found = 1; | ||
264 | |||
265 | if (!revision_is_newer(mc_hdr, new_rev)) | ||
266 | continue; | ||
268 | 267 | ||
269 | mc_header = (struct microcode_header_intel *)ucode_ptr; | ||
270 | for (i = 0; i < mc_saved_count; i++) { | ||
271 | unsigned int sig, pf; | ||
272 | unsigned int new_rev; | ||
273 | struct microcode_header_intel *mc_saved_header = | ||
274 | (struct microcode_header_intel *)mc_saved[i]; | ||
275 | sig = mc_saved_header->sig; | ||
276 | pf = mc_saved_header->pf; | ||
277 | new_rev = mc_header->rev; | ||
278 | |||
279 | if (get_matching_sig(sig, pf, ucode_ptr, new_rev)) { | ||
280 | found = 1; | ||
281 | if (update_match_revision(mc_header, new_rev)) { | ||
282 | /* | ||
283 | * Found an older ucode saved before. | ||
284 | * Replace the older one with this newer | ||
285 | * one. | ||
286 | */ | ||
287 | mc_saved[i] = | ||
288 | (struct microcode_intel *)ucode_ptr; | ||
289 | break; | ||
290 | } | ||
291 | } | ||
292 | } | ||
293 | if (i >= mc_saved_count && !found) | ||
294 | /* | 268 | /* |
295 | * This ucode is first time discovered in ucode file. | 269 | * Found an older ucode saved earlier. Replace it with |
296 | * Save it to memory. | 270 | * this newer one. |
297 | */ | 271 | */ |
298 | mc_saved[mc_saved_count++] = | 272 | mc_saved[i] = (struct microcode_intel *)ucode_ptr; |
299 | (struct microcode_intel *)ucode_ptr; | 273 | break; |
274 | } | ||
275 | |||
276 | /* Newly detected microcode, save it to memory. */ | ||
277 | if (i >= num_saved && !found) | ||
278 | mc_saved[num_saved++] = (struct microcode_intel *)ucode_ptr; | ||
300 | 279 | ||
301 | *mc_saved_count_p = mc_saved_count; | 280 | return num_saved; |
302 | } | 281 | } |
303 | 282 | ||
304 | /* | 283 | /* |
@@ -346,7 +325,7 @@ get_matching_model_microcode(int cpu, unsigned long start, | |||
346 | continue; | 325 | continue; |
347 | } | 326 | } |
348 | 327 | ||
349 | _save_mc(mc_saved_tmp, ucode_ptr, &mc_saved_count); | 328 | mc_saved_count = _save_mc(mc_saved_tmp, ucode_ptr, mc_saved_count); |
350 | 329 | ||
351 | ucode_ptr += mc_size; | 330 | ucode_ptr += mc_size; |
352 | } | 331 | } |
@@ -372,7 +351,7 @@ out: | |||
372 | static int collect_cpu_info_early(struct ucode_cpu_info *uci) | 351 | static int collect_cpu_info_early(struct ucode_cpu_info *uci) |
373 | { | 352 | { |
374 | unsigned int val[2]; | 353 | unsigned int val[2]; |
375 | u8 x86, x86_model; | 354 | unsigned int family, model; |
376 | struct cpu_signature csig; | 355 | struct cpu_signature csig; |
377 | unsigned int eax, ebx, ecx, edx; | 356 | unsigned int eax, ebx, ecx, edx; |
378 | 357 | ||
@@ -387,10 +366,10 @@ static int collect_cpu_info_early(struct ucode_cpu_info *uci) | |||
387 | native_cpuid(&eax, &ebx, &ecx, &edx); | 366 | native_cpuid(&eax, &ebx, &ecx, &edx); |
388 | csig.sig = eax; | 367 | csig.sig = eax; |
389 | 368 | ||
390 | x86 = get_x86_family(csig.sig); | 369 | family = __x86_family(csig.sig); |
391 | x86_model = get_x86_model(csig.sig); | 370 | model = x86_model(csig.sig); |
392 | 371 | ||
393 | if ((x86_model >= 5) || (x86 > 6)) { | 372 | if ((model >= 5) || (family > 6)) { |
394 | /* get processor flags from MSR 0x17 */ | 373 | /* get processor flags from MSR 0x17 */ |
395 | native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); | 374 | native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); |
396 | csig.pf = 1 << ((val[1] >> 18) & 7); | 375 | csig.pf = 1 << ((val[1] >> 18) & 7); |
@@ -429,8 +408,7 @@ static void __ref show_saved_mc(void) | |||
429 | sig = uci.cpu_sig.sig; | 408 | sig = uci.cpu_sig.sig; |
430 | pf = uci.cpu_sig.pf; | 409 | pf = uci.cpu_sig.pf; |
431 | rev = uci.cpu_sig.rev; | 410 | rev = uci.cpu_sig.rev; |
432 | pr_debug("CPU%d: sig=0x%x, pf=0x%x, rev=0x%x\n", | 411 | pr_debug("CPU: sig=0x%x, pf=0x%x, rev=0x%x\n", sig, pf, rev); |
433 | smp_processor_id(), sig, pf, rev); | ||
434 | 412 | ||
435 | for (i = 0; i < mc_saved_data.mc_saved_count; i++) { | 413 | for (i = 0; i < mc_saved_data.mc_saved_count; i++) { |
436 | struct microcode_header_intel *mc_saved_header; | 414 | struct microcode_header_intel *mc_saved_header; |
@@ -457,8 +435,7 @@ static void __ref show_saved_mc(void) | |||
457 | if (total_size <= data_size + MC_HEADER_SIZE) | 435 | if (total_size <= data_size + MC_HEADER_SIZE) |
458 | continue; | 436 | continue; |
459 | 437 | ||
460 | ext_header = (struct extended_sigtable *) | 438 | ext_header = (void *) mc_saved_header + data_size + MC_HEADER_SIZE; |
461 | mc_saved_header + data_size + MC_HEADER_SIZE; | ||
462 | ext_sigcount = ext_header->count; | 439 | ext_sigcount = ext_header->count; |
463 | ext_sig = (void *)ext_header + EXT_HEADER_SIZE; | 440 | ext_sig = (void *)ext_header + EXT_HEADER_SIZE; |
464 | 441 | ||
@@ -515,8 +492,7 @@ int save_mc_for_early(u8 *mc) | |||
515 | * Save the microcode patch mc in mc_save_tmp structure if it's a newer | 492 | * Save the microcode patch mc in mc_save_tmp structure if it's a newer |
516 | * version. | 493 | * version. |
517 | */ | 494 | */ |
518 | 495 | mc_saved_count = _save_mc(mc_saved_tmp, mc, mc_saved_count); | |
519 | _save_mc(mc_saved_tmp, mc, &mc_saved_count); | ||
520 | 496 | ||
521 | /* | 497 | /* |
522 | * Save the mc_save_tmp in global mc_saved_data. | 498 | * Save the mc_save_tmp in global mc_saved_data. |
@@ -548,12 +524,10 @@ EXPORT_SYMBOL_GPL(save_mc_for_early); | |||
548 | 524 | ||
549 | static __initdata char ucode_name[] = "kernel/x86/microcode/GenuineIntel.bin"; | 525 | static __initdata char ucode_name[] = "kernel/x86/microcode/GenuineIntel.bin"; |
550 | static __init enum ucode_state | 526 | static __init enum ucode_state |
551 | scan_microcode(unsigned long start, unsigned long end, | 527 | scan_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd, |
552 | struct mc_saved_data *mc_saved_data, | 528 | unsigned long start, unsigned long size, |
553 | unsigned long *mc_saved_in_initrd, | 529 | struct ucode_cpu_info *uci) |
554 | struct ucode_cpu_info *uci) | ||
555 | { | 530 | { |
556 | unsigned int size = end - start + 1; | ||
557 | struct cpio_data cd; | 531 | struct cpio_data cd; |
558 | long offset = 0; | 532 | long offset = 0; |
559 | #ifdef CONFIG_X86_32 | 533 | #ifdef CONFIG_X86_32 |
@@ -569,10 +543,8 @@ scan_microcode(unsigned long start, unsigned long end, | |||
569 | if (!cd.data) | 543 | if (!cd.data) |
570 | return UCODE_ERROR; | 544 | return UCODE_ERROR; |
571 | 545 | ||
572 | |||
573 | return get_matching_model_microcode(0, start, cd.data, cd.size, | 546 | return get_matching_model_microcode(0, start, cd.data, cd.size, |
574 | mc_saved_data, mc_saved_in_initrd, | 547 | mc_saved_data, initrd, uci); |
575 | uci); | ||
576 | } | 548 | } |
577 | 549 | ||
578 | /* | 550 | /* |
@@ -704,7 +676,7 @@ int __init save_microcode_in_initrd_intel(void) | |||
704 | if (count == 0) | 676 | if (count == 0) |
705 | return ret; | 677 | return ret; |
706 | 678 | ||
707 | microcode_pointer(mc_saved, mc_saved_in_initrd, initrd_start, count); | 679 | copy_initrd_ptrs(mc_saved, mc_saved_in_initrd, initrd_start, count); |
708 | ret = save_microcode(&mc_saved_data, mc_saved, count); | 680 | ret = save_microcode(&mc_saved_data, mc_saved, count); |
709 | if (ret) | 681 | if (ret) |
710 | pr_err("Cannot save microcode patches from initrd.\n"); | 682 | pr_err("Cannot save microcode patches from initrd.\n"); |
@@ -716,52 +688,44 @@ int __init save_microcode_in_initrd_intel(void) | |||
716 | 688 | ||
717 | static void __init | 689 | static void __init |
718 | _load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data, | 690 | _load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data, |
719 | unsigned long *mc_saved_in_initrd, | 691 | unsigned long *initrd, |
720 | unsigned long initrd_start_early, | 692 | unsigned long start, unsigned long size) |
721 | unsigned long initrd_end_early, | ||
722 | struct ucode_cpu_info *uci) | ||
723 | { | 693 | { |
694 | struct ucode_cpu_info uci; | ||
724 | enum ucode_state ret; | 695 | enum ucode_state ret; |
725 | 696 | ||
726 | collect_cpu_info_early(uci); | 697 | collect_cpu_info_early(&uci); |
727 | scan_microcode(initrd_start_early, initrd_end_early, mc_saved_data, | ||
728 | mc_saved_in_initrd, uci); | ||
729 | 698 | ||
730 | ret = load_microcode(mc_saved_data, mc_saved_in_initrd, | 699 | ret = scan_microcode(mc_saved_data, initrd, start, size, &uci); |
731 | initrd_start_early, uci); | 700 | if (ret != UCODE_OK) |
701 | return; | ||
732 | 702 | ||
733 | if (ret == UCODE_OK) | 703 | ret = load_microcode(mc_saved_data, initrd, start, &uci); |
734 | apply_microcode_early(uci, true); | 704 | if (ret != UCODE_OK) |
705 | return; | ||
706 | |||
707 | apply_microcode_early(&uci, true); | ||
735 | } | 708 | } |
736 | 709 | ||
737 | void __init | 710 | void __init load_ucode_intel_bsp(void) |
738 | load_ucode_intel_bsp(void) | ||
739 | { | 711 | { |
740 | u64 ramdisk_image, ramdisk_size; | 712 | u64 start, size; |
741 | unsigned long initrd_start_early, initrd_end_early; | ||
742 | struct ucode_cpu_info uci; | ||
743 | #ifdef CONFIG_X86_32 | 713 | #ifdef CONFIG_X86_32 |
744 | struct boot_params *boot_params_p; | 714 | struct boot_params *p; |
745 | 715 | ||
746 | boot_params_p = (struct boot_params *)__pa_nodebug(&boot_params); | 716 | p = (struct boot_params *)__pa_nodebug(&boot_params); |
747 | ramdisk_image = boot_params_p->hdr.ramdisk_image; | 717 | start = p->hdr.ramdisk_image; |
748 | ramdisk_size = boot_params_p->hdr.ramdisk_size; | 718 | size = p->hdr.ramdisk_size; |
749 | initrd_start_early = ramdisk_image; | ||
750 | initrd_end_early = initrd_start_early + ramdisk_size; | ||
751 | 719 | ||
752 | _load_ucode_intel_bsp( | 720 | _load_ucode_intel_bsp( |
753 | (struct mc_saved_data *)__pa_nodebug(&mc_saved_data), | 721 | (struct mc_saved_data *)__pa_nodebug(&mc_saved_data), |
754 | (unsigned long *)__pa_nodebug(&mc_saved_in_initrd), | 722 | (unsigned long *)__pa_nodebug(&mc_saved_in_initrd), |
755 | initrd_start_early, initrd_end_early, &uci); | 723 | start, size); |
756 | #else | 724 | #else |
757 | ramdisk_image = boot_params.hdr.ramdisk_image; | 725 | start = boot_params.hdr.ramdisk_image + PAGE_OFFSET; |
758 | ramdisk_size = boot_params.hdr.ramdisk_size; | 726 | size = boot_params.hdr.ramdisk_size; |
759 | initrd_start_early = ramdisk_image + PAGE_OFFSET; | 727 | |
760 | initrd_end_early = initrd_start_early + ramdisk_size; | 728 | _load_ucode_intel_bsp(&mc_saved_data, mc_saved_in_initrd, start, size); |
761 | |||
762 | _load_ucode_intel_bsp(&mc_saved_data, mc_saved_in_initrd, | ||
763 | initrd_start_early, initrd_end_early, | ||
764 | &uci); | ||
765 | #endif | 729 | #endif |
766 | } | 730 | } |
767 | 731 | ||
@@ -771,6 +735,7 @@ void load_ucode_intel_ap(void) | |||
771 | struct ucode_cpu_info uci; | 735 | struct ucode_cpu_info uci; |
772 | unsigned long *mc_saved_in_initrd_p; | 736 | unsigned long *mc_saved_in_initrd_p; |
773 | unsigned long initrd_start_addr; | 737 | unsigned long initrd_start_addr; |
738 | enum ucode_state ret; | ||
774 | #ifdef CONFIG_X86_32 | 739 | #ifdef CONFIG_X86_32 |
775 | unsigned long *initrd_start_p; | 740 | unsigned long *initrd_start_p; |
776 | 741 | ||
@@ -793,8 +758,12 @@ void load_ucode_intel_ap(void) | |||
793 | return; | 758 | return; |
794 | 759 | ||
795 | collect_cpu_info_early(&uci); | 760 | collect_cpu_info_early(&uci); |
796 | load_microcode(mc_saved_data_p, mc_saved_in_initrd_p, | 761 | ret = load_microcode(mc_saved_data_p, mc_saved_in_initrd_p, |
797 | initrd_start_addr, &uci); | 762 | initrd_start_addr, &uci); |
763 | |||
764 | if (ret != UCODE_OK) | ||
765 | return; | ||
766 | |||
798 | apply_microcode_early(&uci, true); | 767 | apply_microcode_early(&uci, true); |
799 | } | 768 | } |
800 | 769 | ||
@@ -808,8 +777,8 @@ void reload_ucode_intel(void) | |||
808 | 777 | ||
809 | collect_cpu_info_early(&uci); | 778 | collect_cpu_info_early(&uci); |
810 | 779 | ||
811 | ret = generic_load_microcode_early(mc_saved_data.mc_saved, | 780 | ret = load_microcode_early(mc_saved_data.mc_saved, |
812 | mc_saved_data.mc_saved_count, &uci); | 781 | mc_saved_data.mc_saved_count, &uci); |
813 | if (ret != UCODE_OK) | 782 | if (ret != UCODE_OK) |
814 | return; | 783 | return; |
815 | 784 | ||
diff --git a/arch/x86/kernel/cpu/microcode/intel_lib.c b/arch/x86/kernel/cpu/microcode/intel_lib.c index ce69320d0179..cd47a510a3f1 100644 --- a/arch/x86/kernel/cpu/microcode/intel_lib.c +++ b/arch/x86/kernel/cpu/microcode/intel_lib.c | |||
@@ -38,12 +38,6 @@ update_match_cpu(unsigned int csig, unsigned int cpf, | |||
38 | return (!sigmatch(sig, csig, pf, cpf)) ? 0 : 1; | 38 | return (!sigmatch(sig, csig, pf, cpf)) ? 0 : 1; |
39 | } | 39 | } |
40 | 40 | ||
41 | int | ||
42 | update_match_revision(struct microcode_header_intel *mc_header, int rev) | ||
43 | { | ||
44 | return (mc_header->rev <= rev) ? 0 : 1; | ||
45 | } | ||
46 | |||
47 | int microcode_sanity_check(void *mc, int print_err) | 41 | int microcode_sanity_check(void *mc, int print_err) |
48 | { | 42 | { |
49 | unsigned long total_size, data_size, ext_table_size; | 43 | unsigned long total_size, data_size, ext_table_size; |
@@ -128,10 +122,9 @@ int microcode_sanity_check(void *mc, int print_err) | |||
128 | EXPORT_SYMBOL_GPL(microcode_sanity_check); | 122 | EXPORT_SYMBOL_GPL(microcode_sanity_check); |
129 | 123 | ||
130 | /* | 124 | /* |
131 | * return 0 - no update found | 125 | * Returns 1 if update has been found, 0 otherwise. |
132 | * return 1 - found update | ||
133 | */ | 126 | */ |
134 | int get_matching_sig(unsigned int csig, int cpf, void *mc, int rev) | 127 | int get_matching_sig(unsigned int csig, int cpf, int rev, void *mc) |
135 | { | 128 | { |
136 | struct microcode_header_intel *mc_header = mc; | 129 | struct microcode_header_intel *mc_header = mc; |
137 | struct extended_sigtable *ext_header; | 130 | struct extended_sigtable *ext_header; |
@@ -159,16 +152,15 @@ int get_matching_sig(unsigned int csig, int cpf, void *mc, int rev) | |||
159 | } | 152 | } |
160 | 153 | ||
161 | /* | 154 | /* |
162 | * return 0 - no update found | 155 | * Returns 1 if update has been found, 0 otherwise. |
163 | * return 1 - found update | ||
164 | */ | 156 | */ |
165 | int get_matching_microcode(unsigned int csig, int cpf, void *mc, int rev) | 157 | int get_matching_microcode(unsigned int csig, int cpf, int rev, void *mc) |
166 | { | 158 | { |
167 | struct microcode_header_intel *mc_header = mc; | 159 | struct microcode_header_intel *mc_hdr = mc; |
168 | 160 | ||
169 | if (!update_match_revision(mc_header, rev)) | 161 | if (!revision_is_newer(mc_hdr, rev)) |
170 | return 0; | 162 | return 0; |
171 | 163 | ||
172 | return get_matching_sig(csig, cpf, mc, rev); | 164 | return get_matching_sig(csig, cpf, rev, mc); |
173 | } | 165 | } |
174 | EXPORT_SYMBOL_GPL(get_matching_microcode); | 166 | EXPORT_SYMBOL_GPL(get_matching_microcode); |
diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh index 36d99a337b49..3f20710a5b23 100644 --- a/arch/x86/kernel/cpu/mkcapflags.sh +++ b/arch/x86/kernel/cpu/mkcapflags.sh | |||
@@ -6,7 +6,7 @@ | |||
6 | IN=$1 | 6 | IN=$1 |
7 | OUT=$2 | 7 | OUT=$2 |
8 | 8 | ||
9 | function dump_array() | 9 | dump_array() |
10 | { | 10 | { |
11 | ARRAY=$1 | 11 | ARRAY=$1 |
12 | SIZE=$2 | 12 | SIZE=$2 |
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index a041e094b8b9..d76f13d6d8d6 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c | |||
@@ -404,11 +404,10 @@ static const struct file_operations mtrr_fops = { | |||
404 | static int mtrr_seq_show(struct seq_file *seq, void *offset) | 404 | static int mtrr_seq_show(struct seq_file *seq, void *offset) |
405 | { | 405 | { |
406 | char factor; | 406 | char factor; |
407 | int i, max, len; | 407 | int i, max; |
408 | mtrr_type type; | 408 | mtrr_type type; |
409 | unsigned long base, size; | 409 | unsigned long base, size; |
410 | 410 | ||
411 | len = 0; | ||
412 | max = num_var_ranges; | 411 | max = num_var_ranges; |
413 | for (i = 0; i < max; i++) { | 412 | for (i = 0; i < max; i++) { |
414 | mtrr_if->get(i, &base, &size, &type); | 413 | mtrr_if->get(i, &base, &size, &type); |
@@ -425,11 +424,10 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset) | |||
425 | size >>= 20 - PAGE_SHIFT; | 424 | size >>= 20 - PAGE_SHIFT; |
426 | } | 425 | } |
427 | /* Base can be > 32bit */ | 426 | /* Base can be > 32bit */ |
428 | len += seq_printf(seq, "reg%02i: base=0x%06lx000 " | 427 | seq_printf(seq, "reg%02i: base=0x%06lx000 (%5luMB), size=%5lu%cB, count=%d: %s\n", |
429 | "(%5luMB), size=%5lu%cB, count=%d: %s\n", | 428 | i, base, base >> (20 - PAGE_SHIFT), |
430 | i, base, base >> (20 - PAGE_SHIFT), size, | 429 | size, factor, |
431 | factor, mtrr_usage_table[i], | 430 | mtrr_usage_table[i], mtrr_attrib_to_str(type)); |
432 | mtrr_attrib_to_str(type)); | ||
433 | } | 431 | } |
434 | return 0; | 432 | return 0; |
435 | } | 433 | } |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 689e35760924..87848ebe2bb7 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -2236,24 +2236,24 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) | |||
2236 | static unsigned long code_segment_base(struct pt_regs *regs) | 2236 | static unsigned long code_segment_base(struct pt_regs *regs) |
2237 | { | 2237 | { |
2238 | /* | 2238 | /* |
2239 | * For IA32 we look at the GDT/LDT segment base to convert the | ||
2240 | * effective IP to a linear address. | ||
2241 | */ | ||
2242 | |||
2243 | #ifdef CONFIG_X86_32 | ||
2244 | /* | ||
2239 | * If we are in VM86 mode, add the segment offset to convert to a | 2245 | * If we are in VM86 mode, add the segment offset to convert to a |
2240 | * linear address. | 2246 | * linear address. |
2241 | */ | 2247 | */ |
2242 | if (regs->flags & X86_VM_MASK) | 2248 | if (regs->flags & X86_VM_MASK) |
2243 | return 0x10 * regs->cs; | 2249 | return 0x10 * regs->cs; |
2244 | 2250 | ||
2245 | /* | ||
2246 | * For IA32 we look at the GDT/LDT segment base to convert the | ||
2247 | * effective IP to a linear address. | ||
2248 | */ | ||
2249 | #ifdef CONFIG_X86_32 | ||
2250 | if (user_mode(regs) && regs->cs != __USER_CS) | 2251 | if (user_mode(regs) && regs->cs != __USER_CS) |
2251 | return get_segment_base(regs->cs); | 2252 | return get_segment_base(regs->cs); |
2252 | #else | 2253 | #else |
2253 | if (test_thread_flag(TIF_IA32)) { | 2254 | if (user_mode(regs) && !user_64bit_mode(regs) && |
2254 | if (user_mode(regs) && regs->cs != __USER32_CS) | 2255 | regs->cs != __USER32_CS) |
2255 | return get_segment_base(regs->cs); | 2256 | return get_segment_base(regs->cs); |
2256 | } | ||
2257 | #endif | 2257 | #endif |
2258 | return 0; | 2258 | return 0; |
2259 | } | 2259 | } |
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index aceb2f90c716..c76d3e37c6e1 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c | |||
@@ -105,7 +105,7 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs) | |||
105 | #ifdef CONFIG_X86_32 | 105 | #ifdef CONFIG_X86_32 |
106 | struct pt_regs fixed_regs; | 106 | struct pt_regs fixed_regs; |
107 | 107 | ||
108 | if (!user_mode_vm(regs)) { | 108 | if (!user_mode(regs)) { |
109 | crash_fixup_ss_esp(&fixed_regs, regs); | 109 | crash_fixup_ss_esp(&fixed_regs, regs); |
110 | regs = &fixed_regs; | 110 | regs = &fixed_regs; |
111 | } | 111 | } |
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 3d3503351242..6367a780cc8c 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c | |||
@@ -286,13 +286,13 @@ static void __init x86_flattree_get_config(void) | |||
286 | initial_boot_params = dt = early_memremap(initial_dtb, map_len); | 286 | initial_boot_params = dt = early_memremap(initial_dtb, map_len); |
287 | size = of_get_flat_dt_size(); | 287 | size = of_get_flat_dt_size(); |
288 | if (map_len < size) { | 288 | if (map_len < size) { |
289 | early_iounmap(dt, map_len); | 289 | early_memunmap(dt, map_len); |
290 | initial_boot_params = dt = early_memremap(initial_dtb, size); | 290 | initial_boot_params = dt = early_memremap(initial_dtb, size); |
291 | map_len = size; | 291 | map_len = size; |
292 | } | 292 | } |
293 | 293 | ||
294 | unflatten_and_copy_device_tree(); | 294 | unflatten_and_copy_device_tree(); |
295 | early_iounmap(dt, map_len); | 295 | early_memunmap(dt, map_len); |
296 | } | 296 | } |
297 | #else | 297 | #else |
298 | static inline void x86_flattree_get_config(void) { } | 298 | static inline void x86_flattree_get_config(void) { } |
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index cf3df1d8d039..9c30acfadae2 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c | |||
@@ -25,10 +25,12 @@ unsigned int code_bytes = 64; | |||
25 | int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; | 25 | int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; |
26 | static int die_counter; | 26 | static int die_counter; |
27 | 27 | ||
28 | static void printk_stack_address(unsigned long address, int reliable) | 28 | static void printk_stack_address(unsigned long address, int reliable, |
29 | void *data) | ||
29 | { | 30 | { |
30 | pr_cont(" [<%p>] %s%pB\n", | 31 | printk("%s [<%p>] %s%pB\n", |
31 | (void *)address, reliable ? "" : "? ", (void *)address); | 32 | (char *)data, (void *)address, reliable ? "" : "? ", |
33 | (void *)address); | ||
32 | } | 34 | } |
33 | 35 | ||
34 | void printk_address(unsigned long address) | 36 | void printk_address(unsigned long address) |
@@ -155,8 +157,7 @@ static int print_trace_stack(void *data, char *name) | |||
155 | static void print_trace_address(void *data, unsigned long addr, int reliable) | 157 | static void print_trace_address(void *data, unsigned long addr, int reliable) |
156 | { | 158 | { |
157 | touch_nmi_watchdog(); | 159 | touch_nmi_watchdog(); |
158 | printk(data); | 160 | printk_stack_address(addr, reliable, data); |
159 | printk_stack_address(addr, reliable); | ||
160 | } | 161 | } |
161 | 162 | ||
162 | static const struct stacktrace_ops print_trace_ops = { | 163 | static const struct stacktrace_ops print_trace_ops = { |
@@ -278,7 +279,7 @@ int __die(const char *str, struct pt_regs *regs, long err) | |||
278 | print_modules(); | 279 | print_modules(); |
279 | show_regs(regs); | 280 | show_regs(regs); |
280 | #ifdef CONFIG_X86_32 | 281 | #ifdef CONFIG_X86_32 |
281 | if (user_mode_vm(regs)) { | 282 | if (user_mode(regs)) { |
282 | sp = regs->sp; | 283 | sp = regs->sp; |
283 | ss = regs->ss & 0xffff; | 284 | ss = regs->ss & 0xffff; |
284 | } else { | 285 | } else { |
@@ -307,7 +308,7 @@ void die(const char *str, struct pt_regs *regs, long err) | |||
307 | unsigned long flags = oops_begin(); | 308 | unsigned long flags = oops_begin(); |
308 | int sig = SIGSEGV; | 309 | int sig = SIGSEGV; |
309 | 310 | ||
310 | if (!user_mode_vm(regs)) | 311 | if (!user_mode(regs)) |
311 | report_bug(regs->ip, regs); | 312 | report_bug(regs->ip, regs); |
312 | 313 | ||
313 | if (__die(str, regs, err)) | 314 | if (__die(str, regs, err)) |
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 5abd4cd4230c..464ffd69b92e 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c | |||
@@ -108,9 +108,12 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
108 | for (i = 0; i < kstack_depth_to_print; i++) { | 108 | for (i = 0; i < kstack_depth_to_print; i++) { |
109 | if (kstack_end(stack)) | 109 | if (kstack_end(stack)) |
110 | break; | 110 | break; |
111 | if (i && ((i % STACKSLOTS_PER_LINE) == 0)) | 111 | if ((i % STACKSLOTS_PER_LINE) == 0) { |
112 | pr_cont("\n"); | 112 | if (i != 0) |
113 | pr_cont(" %08lx", *stack++); | 113 | pr_cont("\n"); |
114 | printk("%s %08lx", log_lvl, *stack++); | ||
115 | } else | ||
116 | pr_cont(" %08lx", *stack++); | ||
114 | touch_nmi_watchdog(); | 117 | touch_nmi_watchdog(); |
115 | } | 118 | } |
116 | pr_cont("\n"); | 119 | pr_cont("\n"); |
@@ -123,13 +126,13 @@ void show_regs(struct pt_regs *regs) | |||
123 | int i; | 126 | int i; |
124 | 127 | ||
125 | show_regs_print_info(KERN_EMERG); | 128 | show_regs_print_info(KERN_EMERG); |
126 | __show_regs(regs, !user_mode_vm(regs)); | 129 | __show_regs(regs, !user_mode(regs)); |
127 | 130 | ||
128 | /* | 131 | /* |
129 | * When in-kernel, we also print out the stack and code at the | 132 | * When in-kernel, we also print out the stack and code at the |
130 | * time of the fault.. | 133 | * time of the fault.. |
131 | */ | 134 | */ |
132 | if (!user_mode_vm(regs)) { | 135 | if (!user_mode(regs)) { |
133 | unsigned int code_prologue = code_bytes * 43 / 64; | 136 | unsigned int code_prologue = code_bytes * 43 / 64; |
134 | unsigned int code_len = code_bytes; | 137 | unsigned int code_len = code_bytes; |
135 | unsigned char c; | 138 | unsigned char c; |
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index ff86f19b5758..5f1c6266eb30 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c | |||
@@ -280,12 +280,15 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
280 | pr_cont(" <EOI> "); | 280 | pr_cont(" <EOI> "); |
281 | } | 281 | } |
282 | } else { | 282 | } else { |
283 | if (((long) stack & (THREAD_SIZE-1)) == 0) | 283 | if (kstack_end(stack)) |
284 | break; | 284 | break; |
285 | } | 285 | } |
286 | if (i && ((i % STACKSLOTS_PER_LINE) == 0)) | 286 | if ((i % STACKSLOTS_PER_LINE) == 0) { |
287 | pr_cont("\n"); | 287 | if (i != 0) |
288 | pr_cont(" %016lx", *stack++); | 288 | pr_cont("\n"); |
289 | printk("%s %016lx", log_lvl, *stack++); | ||
290 | } else | ||
291 | pr_cont(" %016lx", *stack++); | ||
289 | touch_nmi_watchdog(); | 292 | touch_nmi_watchdog(); |
290 | } | 293 | } |
291 | preempt_enable(); | 294 | preempt_enable(); |
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 46201deee923..7d46bb260334 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -661,7 +661,7 @@ void __init parse_e820_ext(u64 phys_addr, u32 data_len) | |||
661 | extmap = (struct e820entry *)(sdata->data); | 661 | extmap = (struct e820entry *)(sdata->data); |
662 | __append_e820_map(extmap, entries); | 662 | __append_e820_map(extmap, entries); |
663 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | 663 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); |
664 | early_iounmap(sdata, data_len); | 664 | early_memunmap(sdata, data_len); |
665 | printk(KERN_INFO "e820: extended physical RAM map:\n"); | 665 | printk(KERN_INFO "e820: extended physical RAM map:\n"); |
666 | e820_print_map("extended"); | 666 | e820_print_map("extended"); |
667 | } | 667 | } |
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index a62536a1be88..49ff55ef9b26 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c | |||
@@ -95,20 +95,6 @@ static unsigned long early_serial_base = 0x3f8; /* ttyS0 */ | |||
95 | #define DLL 0 /* Divisor Latch Low */ | 95 | #define DLL 0 /* Divisor Latch Low */ |
96 | #define DLH 1 /* Divisor latch High */ | 96 | #define DLH 1 /* Divisor latch High */ |
97 | 97 | ||
98 | static void mem32_serial_out(unsigned long addr, int offset, int value) | ||
99 | { | ||
100 | uint32_t *vaddr = (uint32_t *)addr; | ||
101 | /* shift implied by pointer type */ | ||
102 | writel(value, vaddr + offset); | ||
103 | } | ||
104 | |||
105 | static unsigned int mem32_serial_in(unsigned long addr, int offset) | ||
106 | { | ||
107 | uint32_t *vaddr = (uint32_t *)addr; | ||
108 | /* shift implied by pointer type */ | ||
109 | return readl(vaddr + offset); | ||
110 | } | ||
111 | |||
112 | static unsigned int io_serial_in(unsigned long addr, int offset) | 98 | static unsigned int io_serial_in(unsigned long addr, int offset) |
113 | { | 99 | { |
114 | return inb(addr + offset); | 100 | return inb(addr + offset); |
@@ -205,6 +191,20 @@ static __init void early_serial_init(char *s) | |||
205 | } | 191 | } |
206 | 192 | ||
207 | #ifdef CONFIG_PCI | 193 | #ifdef CONFIG_PCI |
194 | static void mem32_serial_out(unsigned long addr, int offset, int value) | ||
195 | { | ||
196 | u32 *vaddr = (u32 *)addr; | ||
197 | /* shift implied by pointer type */ | ||
198 | writel(value, vaddr + offset); | ||
199 | } | ||
200 | |||
201 | static unsigned int mem32_serial_in(unsigned long addr, int offset) | ||
202 | { | ||
203 | u32 *vaddr = (u32 *)addr; | ||
204 | /* shift implied by pointer type */ | ||
205 | return readl(vaddr + offset); | ||
206 | } | ||
207 | |||
208 | /* | 208 | /* |
209 | * early_pci_serial_init() | 209 | * early_pci_serial_init() |
210 | * | 210 | * |
@@ -217,8 +217,8 @@ static __init void early_pci_serial_init(char *s) | |||
217 | unsigned divisor; | 217 | unsigned divisor; |
218 | unsigned long baud = DEFAULT_BAUD; | 218 | unsigned long baud = DEFAULT_BAUD; |
219 | u8 bus, slot, func; | 219 | u8 bus, slot, func; |
220 | uint32_t classcode, bar0; | 220 | u32 classcode, bar0; |
221 | uint16_t cmdreg; | 221 | u16 cmdreg; |
222 | char *e; | 222 | char *e; |
223 | 223 | ||
224 | 224 | ||
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 31e2d5bf3e38..1c309763e321 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -395,10 +395,13 @@ sysenter_past_esp: | |||
395 | /*CFI_REL_OFFSET cs, 0*/ | 395 | /*CFI_REL_OFFSET cs, 0*/ |
396 | /* | 396 | /* |
397 | * Push current_thread_info()->sysenter_return to the stack. | 397 | * Push current_thread_info()->sysenter_return to the stack. |
398 | * A tiny bit of offset fixup is necessary - 4*4 means the 4 words | 398 | * A tiny bit of offset fixup is necessary: TI_sysenter_return |
399 | * pushed above; +8 corresponds to copy_thread's esp0 setting. | 399 | * is relative to thread_info, which is at the bottom of the |
400 | * kernel stack page. 4*4 means the 4 words pushed above; | ||
401 | * TOP_OF_KERNEL_STACK_PADDING takes us to the top of the stack; | ||
402 | * and THREAD_SIZE takes us to the bottom. | ||
400 | */ | 403 | */ |
401 | pushl_cfi ((TI_sysenter_return)-THREAD_SIZE+8+4*4)(%esp) | 404 | pushl_cfi ((TI_sysenter_return) - THREAD_SIZE + TOP_OF_KERNEL_STACK_PADDING + 4*4)(%esp) |
402 | CFI_REL_OFFSET eip, 0 | 405 | CFI_REL_OFFSET eip, 0 |
403 | 406 | ||
404 | pushl_cfi %eax | 407 | pushl_cfi %eax |
@@ -432,7 +435,7 @@ sysenter_after_call: | |||
432 | TRACE_IRQS_OFF | 435 | TRACE_IRQS_OFF |
433 | movl TI_flags(%ebp), %ecx | 436 | movl TI_flags(%ebp), %ecx |
434 | testl $_TIF_ALLWORK_MASK, %ecx | 437 | testl $_TIF_ALLWORK_MASK, %ecx |
435 | jne sysexit_audit | 438 | jnz sysexit_audit |
436 | sysenter_exit: | 439 | sysenter_exit: |
437 | /* if something modifies registers it must also disable sysexit */ | 440 | /* if something modifies registers it must also disable sysexit */ |
438 | movl PT_EIP(%esp), %edx | 441 | movl PT_EIP(%esp), %edx |
@@ -460,7 +463,7 @@ sysenter_audit: | |||
460 | 463 | ||
461 | sysexit_audit: | 464 | sysexit_audit: |
462 | testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx | 465 | testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx |
463 | jne syscall_exit_work | 466 | jnz syscall_exit_work |
464 | TRACE_IRQS_ON | 467 | TRACE_IRQS_ON |
465 | ENABLE_INTERRUPTS(CLBR_ANY) | 468 | ENABLE_INTERRUPTS(CLBR_ANY) |
466 | movl %eax,%edx /* second arg, syscall return value */ | 469 | movl %eax,%edx /* second arg, syscall return value */ |
@@ -472,7 +475,7 @@ sysexit_audit: | |||
472 | TRACE_IRQS_OFF | 475 | TRACE_IRQS_OFF |
473 | movl TI_flags(%ebp), %ecx | 476 | movl TI_flags(%ebp), %ecx |
474 | testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx | 477 | testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx |
475 | jne syscall_exit_work | 478 | jnz syscall_exit_work |
476 | movl PT_EAX(%esp),%eax /* reload syscall return value */ | 479 | movl PT_EAX(%esp),%eax /* reload syscall return value */ |
477 | jmp sysenter_exit | 480 | jmp sysenter_exit |
478 | #endif | 481 | #endif |
@@ -510,7 +513,7 @@ syscall_exit: | |||
510 | TRACE_IRQS_OFF | 513 | TRACE_IRQS_OFF |
511 | movl TI_flags(%ebp), %ecx | 514 | movl TI_flags(%ebp), %ecx |
512 | testl $_TIF_ALLWORK_MASK, %ecx # current->work | 515 | testl $_TIF_ALLWORK_MASK, %ecx # current->work |
513 | jne syscall_exit_work | 516 | jnz syscall_exit_work |
514 | 517 | ||
515 | restore_all: | 518 | restore_all: |
516 | TRACE_IRQS_IRET | 519 | TRACE_IRQS_IRET |
@@ -612,7 +615,7 @@ work_notifysig: # deal with pending signals and | |||
612 | #ifdef CONFIG_VM86 | 615 | #ifdef CONFIG_VM86 |
613 | testl $X86_EFLAGS_VM, PT_EFLAGS(%esp) | 616 | testl $X86_EFLAGS_VM, PT_EFLAGS(%esp) |
614 | movl %esp, %eax | 617 | movl %esp, %eax |
615 | jne work_notifysig_v86 # returning to kernel-space or | 618 | jnz work_notifysig_v86 # returning to kernel-space or |
616 | # vm86-space | 619 | # vm86-space |
617 | 1: | 620 | 1: |
618 | #else | 621 | #else |
@@ -720,43 +723,22 @@ END(sysenter_badsys) | |||
720 | .endm | 723 | .endm |
721 | 724 | ||
722 | /* | 725 | /* |
723 | * Build the entry stubs and pointer table with some assembler magic. | 726 | * Build the entry stubs with some assembler magic. |
724 | * We pack 7 stubs into a single 32-byte chunk, which will fit in a | 727 | * We pack 1 stub into every 8-byte block. |
725 | * single cache line on all modern x86 implementations. | ||
726 | */ | 728 | */ |
727 | .section .init.rodata,"a" | 729 | .align 8 |
728 | ENTRY(interrupt) | ||
729 | .section .entry.text, "ax" | ||
730 | .p2align 5 | ||
731 | .p2align CONFIG_X86_L1_CACHE_SHIFT | ||
732 | ENTRY(irq_entries_start) | 730 | ENTRY(irq_entries_start) |
733 | RING0_INT_FRAME | 731 | RING0_INT_FRAME |
734 | vector=FIRST_EXTERNAL_VECTOR | 732 | vector=FIRST_EXTERNAL_VECTOR |
735 | .rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7 | 733 | .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) |
736 | .balign 32 | 734 | pushl_cfi $(~vector+0x80) /* Note: always in signed byte range */ |
737 | .rept 7 | 735 | vector=vector+1 |
738 | .if vector < FIRST_SYSTEM_VECTOR | 736 | jmp common_interrupt |
739 | .if vector <> FIRST_EXTERNAL_VECTOR | ||
740 | CFI_ADJUST_CFA_OFFSET -4 | 737 | CFI_ADJUST_CFA_OFFSET -4 |
741 | .endif | 738 | .align 8 |
742 | 1: pushl_cfi $(~vector+0x80) /* Note: always in signed byte range */ | 739 | .endr |
743 | .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 | ||
744 | jmp 2f | ||
745 | .endif | ||
746 | .previous | ||
747 | .long 1b | ||
748 | .section .entry.text, "ax" | ||
749 | vector=vector+1 | ||
750 | .endif | ||
751 | .endr | ||
752 | 2: jmp common_interrupt | ||
753 | .endr | ||
754 | END(irq_entries_start) | 740 | END(irq_entries_start) |
755 | 741 | ||
756 | .previous | ||
757 | END(interrupt) | ||
758 | .previous | ||
759 | |||
760 | /* | 742 | /* |
761 | * the CPU automatically disables interrupts when executing an IRQ vector, | 743 | * the CPU automatically disables interrupts when executing an IRQ vector, |
762 | * so IRQ-flags tracing has to follow that: | 744 | * so IRQ-flags tracing has to follow that: |
@@ -816,15 +798,9 @@ ENTRY(simd_coprocessor_error) | |||
816 | pushl_cfi $0 | 798 | pushl_cfi $0 |
817 | #ifdef CONFIG_X86_INVD_BUG | 799 | #ifdef CONFIG_X86_INVD_BUG |
818 | /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ | 800 | /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ |
819 | 661: pushl_cfi $do_general_protection | 801 | ALTERNATIVE "pushl_cfi $do_general_protection", \ |
820 | 662: | 802 | "pushl $do_simd_coprocessor_error", \ |
821 | .section .altinstructions,"a" | 803 | X86_FEATURE_XMM |
822 | altinstruction_entry 661b, 663f, X86_FEATURE_XMM, 662b-661b, 664f-663f | ||
823 | .previous | ||
824 | .section .altinstr_replacement,"ax" | ||
825 | 663: pushl $do_simd_coprocessor_error | ||
826 | 664: | ||
827 | .previous | ||
828 | #else | 804 | #else |
829 | pushl_cfi $do_simd_coprocessor_error | 805 | pushl_cfi $do_simd_coprocessor_error |
830 | #endif | 806 | #endif |
@@ -1240,20 +1216,13 @@ error_code: | |||
1240 | /*CFI_REL_OFFSET es, 0*/ | 1216 | /*CFI_REL_OFFSET es, 0*/ |
1241 | pushl_cfi %ds | 1217 | pushl_cfi %ds |
1242 | /*CFI_REL_OFFSET ds, 0*/ | 1218 | /*CFI_REL_OFFSET ds, 0*/ |
1243 | pushl_cfi %eax | 1219 | pushl_cfi_reg eax |
1244 | CFI_REL_OFFSET eax, 0 | 1220 | pushl_cfi_reg ebp |
1245 | pushl_cfi %ebp | 1221 | pushl_cfi_reg edi |
1246 | CFI_REL_OFFSET ebp, 0 | 1222 | pushl_cfi_reg esi |
1247 | pushl_cfi %edi | 1223 | pushl_cfi_reg edx |
1248 | CFI_REL_OFFSET edi, 0 | 1224 | pushl_cfi_reg ecx |
1249 | pushl_cfi %esi | 1225 | pushl_cfi_reg ebx |
1250 | CFI_REL_OFFSET esi, 0 | ||
1251 | pushl_cfi %edx | ||
1252 | CFI_REL_OFFSET edx, 0 | ||
1253 | pushl_cfi %ecx | ||
1254 | CFI_REL_OFFSET ecx, 0 | ||
1255 | pushl_cfi %ebx | ||
1256 | CFI_REL_OFFSET ebx, 0 | ||
1257 | cld | 1226 | cld |
1258 | movl $(__KERNEL_PERCPU), %ecx | 1227 | movl $(__KERNEL_PERCPU), %ecx |
1259 | movl %ecx, %fs | 1228 | movl %ecx, %fs |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 2babb393915e..c7b238494b31 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -14,27 +14,14 @@ | |||
14 | * NOTE: This code handles signal-recognition, which happens every time | 14 | * NOTE: This code handles signal-recognition, which happens every time |
15 | * after an interrupt and after each system call. | 15 | * after an interrupt and after each system call. |
16 | * | 16 | * |
17 | * Normal syscalls and interrupts don't save a full stack frame, this is | ||
18 | * only done for syscall tracing, signals or fork/exec et.al. | ||
19 | * | ||
20 | * A note on terminology: | 17 | * A note on terminology: |
21 | * - top of stack: Architecture defined interrupt frame from SS to RIP | 18 | * - iret frame: Architecture defined interrupt frame from SS to RIP |
22 | * at the top of the kernel process stack. | 19 | * at the top of the kernel process stack. |
23 | * - partial stack frame: partially saved registers up to R11. | ||
24 | * - full stack frame: Like partial stack frame, but all register saved. | ||
25 | * | 20 | * |
26 | * Some macro usage: | 21 | * Some macro usage: |
27 | * - CFI macros are used to generate dwarf2 unwind information for better | 22 | * - CFI macros are used to generate dwarf2 unwind information for better |
28 | * backtraces. They don't change any code. | 23 | * backtraces. They don't change any code. |
29 | * - SAVE_ALL/RESTORE_ALL - Save/restore all registers | ||
30 | * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify. | ||
31 | * There are unfortunately lots of special cases where some registers | ||
32 | * not touched. The macro is a big mess that should be cleaned up. | ||
33 | * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS. | ||
34 | * Gives a full stack frame. | ||
35 | * - ENTRY/END Define functions in the symbol table. | 24 | * - ENTRY/END Define functions in the symbol table. |
36 | * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack | ||
37 | * frame that is otherwise undefined after a SYSCALL | ||
38 | * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging. | 25 | * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging. |
39 | * - idtentry - Define exception entry points. | 26 | * - idtentry - Define exception entry points. |
40 | */ | 27 | */ |
@@ -70,10 +57,6 @@ | |||
70 | .section .entry.text, "ax" | 57 | .section .entry.text, "ax" |
71 | 58 | ||
72 | 59 | ||
73 | #ifndef CONFIG_PREEMPT | ||
74 | #define retint_kernel retint_restore_args | ||
75 | #endif | ||
76 | |||
77 | #ifdef CONFIG_PARAVIRT | 60 | #ifdef CONFIG_PARAVIRT |
78 | ENTRY(native_usergs_sysret64) | 61 | ENTRY(native_usergs_sysret64) |
79 | swapgs | 62 | swapgs |
@@ -82,9 +65,9 @@ ENDPROC(native_usergs_sysret64) | |||
82 | #endif /* CONFIG_PARAVIRT */ | 65 | #endif /* CONFIG_PARAVIRT */ |
83 | 66 | ||
84 | 67 | ||
85 | .macro TRACE_IRQS_IRETQ offset=ARGOFFSET | 68 | .macro TRACE_IRQS_IRETQ |
86 | #ifdef CONFIG_TRACE_IRQFLAGS | 69 | #ifdef CONFIG_TRACE_IRQFLAGS |
87 | bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */ | 70 | bt $9,EFLAGS(%rsp) /* interrupts off? */ |
88 | jnc 1f | 71 | jnc 1f |
89 | TRACE_IRQS_ON | 72 | TRACE_IRQS_ON |
90 | 1: | 73 | 1: |
@@ -116,8 +99,8 @@ ENDPROC(native_usergs_sysret64) | |||
116 | call debug_stack_reset | 99 | call debug_stack_reset |
117 | .endm | 100 | .endm |
118 | 101 | ||
119 | .macro TRACE_IRQS_IRETQ_DEBUG offset=ARGOFFSET | 102 | .macro TRACE_IRQS_IRETQ_DEBUG |
120 | bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */ | 103 | bt $9,EFLAGS(%rsp) /* interrupts off? */ |
121 | jnc 1f | 104 | jnc 1f |
122 | TRACE_IRQS_ON_DEBUG | 105 | TRACE_IRQS_ON_DEBUG |
123 | 1: | 106 | 1: |
@@ -130,34 +113,7 @@ ENDPROC(native_usergs_sysret64) | |||
130 | #endif | 113 | #endif |
131 | 114 | ||
132 | /* | 115 | /* |
133 | * C code is not supposed to know about undefined top of stack. Every time | 116 | * empty frame |
134 | * a C function with an pt_regs argument is called from the SYSCALL based | ||
135 | * fast path FIXUP_TOP_OF_STACK is needed. | ||
136 | * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs | ||
137 | * manipulation. | ||
138 | */ | ||
139 | |||
140 | /* %rsp:at FRAMEEND */ | ||
141 | .macro FIXUP_TOP_OF_STACK tmp offset=0 | ||
142 | movq PER_CPU_VAR(old_rsp),\tmp | ||
143 | movq \tmp,RSP+\offset(%rsp) | ||
144 | movq $__USER_DS,SS+\offset(%rsp) | ||
145 | movq $__USER_CS,CS+\offset(%rsp) | ||
146 | movq RIP+\offset(%rsp),\tmp /* get rip */ | ||
147 | movq \tmp,RCX+\offset(%rsp) /* copy it to rcx as sysret would do */ | ||
148 | movq R11+\offset(%rsp),\tmp /* get eflags */ | ||
149 | movq \tmp,EFLAGS+\offset(%rsp) | ||
150 | .endm | ||
151 | |||
152 | .macro RESTORE_TOP_OF_STACK tmp offset=0 | ||
153 | movq RSP+\offset(%rsp),\tmp | ||
154 | movq \tmp,PER_CPU_VAR(old_rsp) | ||
155 | movq EFLAGS+\offset(%rsp),\tmp | ||
156 | movq \tmp,R11+\offset(%rsp) | ||
157 | .endm | ||
158 | |||
159 | /* | ||
160 | * initial frame state for interrupts (and exceptions without error code) | ||
161 | */ | 117 | */ |
162 | .macro EMPTY_FRAME start=1 offset=0 | 118 | .macro EMPTY_FRAME start=1 offset=0 |
163 | .if \start | 119 | .if \start |
@@ -173,12 +129,12 @@ ENDPROC(native_usergs_sysret64) | |||
173 | * initial frame state for interrupts (and exceptions without error code) | 129 | * initial frame state for interrupts (and exceptions without error code) |
174 | */ | 130 | */ |
175 | .macro INTR_FRAME start=1 offset=0 | 131 | .macro INTR_FRAME start=1 offset=0 |
176 | EMPTY_FRAME \start, SS+8+\offset-RIP | 132 | EMPTY_FRAME \start, 5*8+\offset |
177 | /*CFI_REL_OFFSET ss, SS+\offset-RIP*/ | 133 | /*CFI_REL_OFFSET ss, 4*8+\offset*/ |
178 | CFI_REL_OFFSET rsp, RSP+\offset-RIP | 134 | CFI_REL_OFFSET rsp, 3*8+\offset |
179 | /*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/ | 135 | /*CFI_REL_OFFSET rflags, 2*8+\offset*/ |
180 | /*CFI_REL_OFFSET cs, CS+\offset-RIP*/ | 136 | /*CFI_REL_OFFSET cs, 1*8+\offset*/ |
181 | CFI_REL_OFFSET rip, RIP+\offset-RIP | 137 | CFI_REL_OFFSET rip, 0*8+\offset |
182 | .endm | 138 | .endm |
183 | 139 | ||
184 | /* | 140 | /* |
@@ -186,30 +142,23 @@ ENDPROC(native_usergs_sysret64) | |||
186 | * with vector already pushed) | 142 | * with vector already pushed) |
187 | */ | 143 | */ |
188 | .macro XCPT_FRAME start=1 offset=0 | 144 | .macro XCPT_FRAME start=1 offset=0 |
189 | INTR_FRAME \start, RIP+\offset-ORIG_RAX | 145 | INTR_FRAME \start, 1*8+\offset |
190 | .endm | ||
191 | |||
192 | /* | ||
193 | * frame that enables calling into C. | ||
194 | */ | ||
195 | .macro PARTIAL_FRAME start=1 offset=0 | ||
196 | XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET | ||
197 | CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET | ||
198 | CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET | ||
199 | CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET | ||
200 | CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET | ||
201 | CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET | ||
202 | CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET | ||
203 | CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET | ||
204 | CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET | ||
205 | CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET | ||
206 | .endm | 146 | .endm |
207 | 147 | ||
208 | /* | 148 | /* |
209 | * frame that enables passing a complete pt_regs to a C function. | 149 | * frame that enables passing a complete pt_regs to a C function. |
210 | */ | 150 | */ |
211 | .macro DEFAULT_FRAME start=1 offset=0 | 151 | .macro DEFAULT_FRAME start=1 offset=0 |
212 | PARTIAL_FRAME \start, R11+\offset-R15 | 152 | XCPT_FRAME \start, ORIG_RAX+\offset |
153 | CFI_REL_OFFSET rdi, RDI+\offset | ||
154 | CFI_REL_OFFSET rsi, RSI+\offset | ||
155 | CFI_REL_OFFSET rdx, RDX+\offset | ||
156 | CFI_REL_OFFSET rcx, RCX+\offset | ||
157 | CFI_REL_OFFSET rax, RAX+\offset | ||
158 | CFI_REL_OFFSET r8, R8+\offset | ||
159 | CFI_REL_OFFSET r9, R9+\offset | ||
160 | CFI_REL_OFFSET r10, R10+\offset | ||
161 | CFI_REL_OFFSET r11, R11+\offset | ||
213 | CFI_REL_OFFSET rbx, RBX+\offset | 162 | CFI_REL_OFFSET rbx, RBX+\offset |
214 | CFI_REL_OFFSET rbp, RBP+\offset | 163 | CFI_REL_OFFSET rbp, RBP+\offset |
215 | CFI_REL_OFFSET r12, R12+\offset | 164 | CFI_REL_OFFSET r12, R12+\offset |
@@ -218,105 +167,30 @@ ENDPROC(native_usergs_sysret64) | |||
218 | CFI_REL_OFFSET r15, R15+\offset | 167 | CFI_REL_OFFSET r15, R15+\offset |
219 | .endm | 168 | .endm |
220 | 169 | ||
221 | ENTRY(save_paranoid) | ||
222 | XCPT_FRAME 1 RDI+8 | ||
223 | cld | ||
224 | movq %rdi, RDI+8(%rsp) | ||
225 | movq %rsi, RSI+8(%rsp) | ||
226 | movq_cfi rdx, RDX+8 | ||
227 | movq_cfi rcx, RCX+8 | ||
228 | movq_cfi rax, RAX+8 | ||
229 | movq %r8, R8+8(%rsp) | ||
230 | movq %r9, R9+8(%rsp) | ||
231 | movq %r10, R10+8(%rsp) | ||
232 | movq %r11, R11+8(%rsp) | ||
233 | movq_cfi rbx, RBX+8 | ||
234 | movq %rbp, RBP+8(%rsp) | ||
235 | movq %r12, R12+8(%rsp) | ||
236 | movq %r13, R13+8(%rsp) | ||
237 | movq %r14, R14+8(%rsp) | ||
238 | movq %r15, R15+8(%rsp) | ||
239 | movl $1,%ebx | ||
240 | movl $MSR_GS_BASE,%ecx | ||
241 | rdmsr | ||
242 | testl %edx,%edx | ||
243 | js 1f /* negative -> in kernel */ | ||
244 | SWAPGS | ||
245 | xorl %ebx,%ebx | ||
246 | 1: ret | ||
247 | CFI_ENDPROC | ||
248 | END(save_paranoid) | ||
249 | |||
250 | /* | 170 | /* |
251 | * A newly forked process directly context switches into this address. | 171 | * 64bit SYSCALL instruction entry. Up to 6 arguments in registers. |
252 | * | 172 | * |
253 | * rdi: prev task we switched from | 173 | * 64bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11, |
254 | */ | 174 | * then loads new ss, cs, and rip from previously programmed MSRs. |
255 | ENTRY(ret_from_fork) | 175 | * rflags gets masked by a value from another MSR (so CLD and CLAC |
256 | DEFAULT_FRAME | 176 | * are not needed). SYSCALL does not save anything on the stack |
257 | 177 | * and does not change rsp. | |
258 | LOCK ; btr $TIF_FORK,TI_flags(%r8) | ||
259 | |||
260 | pushq_cfi $0x0002 | ||
261 | popfq_cfi # reset kernel eflags | ||
262 | |||
263 | call schedule_tail # rdi: 'prev' task parameter | ||
264 | |||
265 | GET_THREAD_INFO(%rcx) | ||
266 | |||
267 | RESTORE_REST | ||
268 | |||
269 | testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? | ||
270 | jz 1f | ||
271 | |||
272 | /* | ||
273 | * By the time we get here, we have no idea whether our pt_regs, | ||
274 | * ti flags, and ti status came from the 64-bit SYSCALL fast path, | ||
275 | * the slow path, or one of the ia32entry paths. | ||
276 | * Use int_ret_from_sys_call to return, since it can safely handle | ||
277 | * all of the above. | ||
278 | */ | ||
279 | jmp int_ret_from_sys_call | ||
280 | |||
281 | 1: | ||
282 | subq $REST_SKIP, %rsp # leave space for volatiles | ||
283 | CFI_ADJUST_CFA_OFFSET REST_SKIP | ||
284 | movq %rbp, %rdi | ||
285 | call *%rbx | ||
286 | movl $0, RAX(%rsp) | ||
287 | RESTORE_REST | ||
288 | jmp int_ret_from_sys_call | ||
289 | CFI_ENDPROC | ||
290 | END(ret_from_fork) | ||
291 | |||
292 | /* | ||
293 | * System call entry. Up to 6 arguments in registers are supported. | ||
294 | * | 178 | * |
295 | * SYSCALL does not save anything on the stack and does not change the | 179 | * Registers on entry: |
296 | * stack pointer. However, it does mask the flags register for us, so | ||
297 | * CLD and CLAC are not needed. | ||
298 | */ | ||
299 | |||
300 | /* | ||
301 | * Register setup: | ||
302 | * rax system call number | 180 | * rax system call number |
181 | * rcx return address | ||
182 | * r11 saved rflags (note: r11 is callee-clobbered register in C ABI) | ||
303 | * rdi arg0 | 183 | * rdi arg0 |
304 | * rcx return address for syscall/sysret, C arg3 | ||
305 | * rsi arg1 | 184 | * rsi arg1 |
306 | * rdx arg2 | 185 | * rdx arg2 |
307 | * r10 arg3 (--> moved to rcx for C) | 186 | * r10 arg3 (needs to be moved to rcx to conform to C ABI) |
308 | * r8 arg4 | 187 | * r8 arg4 |
309 | * r9 arg5 | 188 | * r9 arg5 |
310 | * r11 eflags for syscall/sysret, temporary for C | 189 | * (note: r12-r15,rbp,rbx are callee-preserved in C ABI) |
311 | * r12-r15,rbp,rbx saved by C code, not touched. | ||
312 | * | 190 | * |
313 | * Interrupts are off on entry. | ||
314 | * Only called from user space. | 191 | * Only called from user space. |
315 | * | 192 | * |
316 | * XXX if we had a free scratch register we could save the RSP into the stack frame | 193 | * When user can change pt_regs->foo always force IRET. That is because |
317 | * and report it properly in ps. Unfortunately we haven't. | ||
318 | * | ||
319 | * When user can change the frames always force IRET. That is because | ||
320 | * it deals with uncanonical addresses better. SYSRET has trouble | 194 | * it deals with uncanonical addresses better. SYSRET has trouble |
321 | * with them due to bugs in both AMD and Intel CPUs. | 195 | * with them due to bugs in both AMD and Intel CPUs. |
322 | */ | 196 | */ |
@@ -324,9 +198,15 @@ END(ret_from_fork) | |||
324 | ENTRY(system_call) | 198 | ENTRY(system_call) |
325 | CFI_STARTPROC simple | 199 | CFI_STARTPROC simple |
326 | CFI_SIGNAL_FRAME | 200 | CFI_SIGNAL_FRAME |
327 | CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET | 201 | CFI_DEF_CFA rsp,0 |
328 | CFI_REGISTER rip,rcx | 202 | CFI_REGISTER rip,rcx |
329 | /*CFI_REGISTER rflags,r11*/ | 203 | /*CFI_REGISTER rflags,r11*/ |
204 | |||
205 | /* | ||
206 | * Interrupts are off on entry. | ||
207 | * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, | ||
208 | * it is too small to ever cause noticeable irq latency. | ||
209 | */ | ||
330 | SWAPGS_UNSAFE_STACK | 210 | SWAPGS_UNSAFE_STACK |
331 | /* | 211 | /* |
332 | * A hypervisor implementation might want to use a label | 212 | * A hypervisor implementation might want to use a label |
@@ -335,18 +215,38 @@ ENTRY(system_call) | |||
335 | */ | 215 | */ |
336 | GLOBAL(system_call_after_swapgs) | 216 | GLOBAL(system_call_after_swapgs) |
337 | 217 | ||
338 | movq %rsp,PER_CPU_VAR(old_rsp) | 218 | movq %rsp,PER_CPU_VAR(rsp_scratch) |
339 | movq PER_CPU_VAR(kernel_stack),%rsp | 219 | movq PER_CPU_VAR(kernel_stack),%rsp |
220 | |||
221 | /* Construct struct pt_regs on stack */ | ||
222 | pushq_cfi $__USER_DS /* pt_regs->ss */ | ||
223 | pushq_cfi PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */ | ||
340 | /* | 224 | /* |
341 | * No need to follow this irqs off/on section - it's straight | 225 | * Re-enable interrupts. |
342 | * and short: | 226 | * We use 'rsp_scratch' as a scratch space, hence irq-off block above |
227 | * must execute atomically in the face of possible interrupt-driven | ||
228 | * task preemption. We must enable interrupts only after we're done | ||
229 | * with using rsp_scratch: | ||
343 | */ | 230 | */ |
344 | ENABLE_INTERRUPTS(CLBR_NONE) | 231 | ENABLE_INTERRUPTS(CLBR_NONE) |
345 | SAVE_ARGS 8, 0, rax_enosys=1 | 232 | pushq_cfi %r11 /* pt_regs->flags */ |
346 | movq_cfi rax,(ORIG_RAX-ARGOFFSET) | 233 | pushq_cfi $__USER_CS /* pt_regs->cs */ |
347 | movq %rcx,RIP-ARGOFFSET(%rsp) | 234 | pushq_cfi %rcx /* pt_regs->ip */ |
348 | CFI_REL_OFFSET rip,RIP-ARGOFFSET | 235 | CFI_REL_OFFSET rip,0 |
349 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 236 | pushq_cfi_reg rax /* pt_regs->orig_ax */ |
237 | pushq_cfi_reg rdi /* pt_regs->di */ | ||
238 | pushq_cfi_reg rsi /* pt_regs->si */ | ||
239 | pushq_cfi_reg rdx /* pt_regs->dx */ | ||
240 | pushq_cfi_reg rcx /* pt_regs->cx */ | ||
241 | pushq_cfi $-ENOSYS /* pt_regs->ax */ | ||
242 | pushq_cfi_reg r8 /* pt_regs->r8 */ | ||
243 | pushq_cfi_reg r9 /* pt_regs->r9 */ | ||
244 | pushq_cfi_reg r10 /* pt_regs->r10 */ | ||
245 | pushq_cfi_reg r11 /* pt_regs->r11 */ | ||
246 | sub $(6*8),%rsp /* pt_regs->bp,bx,r12-15 not saved */ | ||
247 | CFI_ADJUST_CFA_OFFSET 6*8 | ||
248 | |||
249 | testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) | ||
350 | jnz tracesys | 250 | jnz tracesys |
351 | system_call_fastpath: | 251 | system_call_fastpath: |
352 | #if __SYSCALL_MASK == ~0 | 252 | #if __SYSCALL_MASK == ~0 |
@@ -355,18 +255,21 @@ system_call_fastpath: | |||
355 | andl $__SYSCALL_MASK,%eax | 255 | andl $__SYSCALL_MASK,%eax |
356 | cmpl $__NR_syscall_max,%eax | 256 | cmpl $__NR_syscall_max,%eax |
357 | #endif | 257 | #endif |
358 | ja ret_from_sys_call /* and return regs->ax */ | 258 | ja 1f /* return -ENOSYS (already in pt_regs->ax) */ |
359 | movq %r10,%rcx | 259 | movq %r10,%rcx |
360 | call *sys_call_table(,%rax,8) # XXX: rip relative | 260 | call *sys_call_table(,%rax,8) |
361 | movq %rax,RAX-ARGOFFSET(%rsp) | 261 | movq %rax,RAX(%rsp) |
262 | 1: | ||
362 | /* | 263 | /* |
363 | * Syscall return path ending with SYSRET (fast path) | 264 | * Syscall return path ending with SYSRET (fast path). |
364 | * Has incomplete stack frame and undefined top of stack. | 265 | * Has incompletely filled pt_regs. |
365 | */ | 266 | */ |
366 | ret_from_sys_call: | ||
367 | LOCKDEP_SYS_EXIT | 267 | LOCKDEP_SYS_EXIT |
268 | /* | ||
269 | * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, | ||
270 | * it is too small to ever cause noticeable irq latency. | ||
271 | */ | ||
368 | DISABLE_INTERRUPTS(CLBR_NONE) | 272 | DISABLE_INTERRUPTS(CLBR_NONE) |
369 | TRACE_IRQS_OFF | ||
370 | 273 | ||
371 | /* | 274 | /* |
372 | * We must check ti flags with interrupts (or at least preemption) | 275 | * We must check ti flags with interrupts (or at least preemption) |
@@ -376,72 +279,73 @@ ret_from_sys_call: | |||
376 | * flags (TIF_NOTIFY_RESUME, TIF_USER_RETURN_NOTIFY, etc) set is | 279 | * flags (TIF_NOTIFY_RESUME, TIF_USER_RETURN_NOTIFY, etc) set is |
377 | * very bad. | 280 | * very bad. |
378 | */ | 281 | */ |
379 | testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 282 | testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) |
380 | jnz int_ret_from_sys_call_fixup /* Go the the slow path */ | 283 | jnz int_ret_from_sys_call_irqs_off /* Go to the slow path */ |
381 | 284 | ||
382 | CFI_REMEMBER_STATE | 285 | CFI_REMEMBER_STATE |
383 | /* | 286 | |
384 | * sysretq will re-enable interrupts: | 287 | RESTORE_C_REGS_EXCEPT_RCX_R11 |
385 | */ | 288 | movq RIP(%rsp),%rcx |
386 | TRACE_IRQS_ON | ||
387 | movq RIP-ARGOFFSET(%rsp),%rcx | ||
388 | CFI_REGISTER rip,rcx | 289 | CFI_REGISTER rip,rcx |
389 | RESTORE_ARGS 1,-ARG_SKIP,0 | 290 | movq EFLAGS(%rsp),%r11 |
390 | /*CFI_REGISTER rflags,r11*/ | 291 | /*CFI_REGISTER rflags,r11*/ |
391 | movq PER_CPU_VAR(old_rsp), %rsp | 292 | movq RSP(%rsp),%rsp |
293 | /* | ||
294 | * 64bit SYSRET restores rip from rcx, | ||
295 | * rflags from r11 (but RF and VM bits are forced to 0), | ||
296 | * cs and ss are loaded from MSRs. | ||
297 | * Restoration of rflags re-enables interrupts. | ||
298 | */ | ||
392 | USERGS_SYSRET64 | 299 | USERGS_SYSRET64 |
393 | 300 | ||
394 | CFI_RESTORE_STATE | 301 | CFI_RESTORE_STATE |
395 | 302 | ||
396 | int_ret_from_sys_call_fixup: | 303 | /* Do syscall entry tracing */ |
397 | FIXUP_TOP_OF_STACK %r11, -ARGOFFSET | ||
398 | jmp int_ret_from_sys_call_irqs_off | ||
399 | |||
400 | /* Do syscall tracing */ | ||
401 | tracesys: | 304 | tracesys: |
402 | leaq -REST_SKIP(%rsp), %rdi | 305 | movq %rsp, %rdi |
403 | movq $AUDIT_ARCH_X86_64, %rsi | 306 | movl $AUDIT_ARCH_X86_64, %esi |
404 | call syscall_trace_enter_phase1 | 307 | call syscall_trace_enter_phase1 |
405 | test %rax, %rax | 308 | test %rax, %rax |
406 | jnz tracesys_phase2 /* if needed, run the slow path */ | 309 | jnz tracesys_phase2 /* if needed, run the slow path */ |
407 | LOAD_ARGS 0 /* else restore clobbered regs */ | 310 | RESTORE_C_REGS_EXCEPT_RAX /* else restore clobbered regs */ |
311 | movq ORIG_RAX(%rsp), %rax | ||
408 | jmp system_call_fastpath /* and return to the fast path */ | 312 | jmp system_call_fastpath /* and return to the fast path */ |
409 | 313 | ||
410 | tracesys_phase2: | 314 | tracesys_phase2: |
411 | SAVE_REST | 315 | SAVE_EXTRA_REGS |
412 | FIXUP_TOP_OF_STACK %rdi | ||
413 | movq %rsp, %rdi | 316 | movq %rsp, %rdi |
414 | movq $AUDIT_ARCH_X86_64, %rsi | 317 | movl $AUDIT_ARCH_X86_64, %esi |
415 | movq %rax,%rdx | 318 | movq %rax,%rdx |
416 | call syscall_trace_enter_phase2 | 319 | call syscall_trace_enter_phase2 |
417 | 320 | ||
418 | /* | 321 | /* |
419 | * Reload arg registers from stack in case ptrace changed them. | 322 | * Reload registers from stack in case ptrace changed them. |
420 | * We don't reload %rax because syscall_trace_entry_phase2() returned | 323 | * We don't reload %rax because syscall_trace_entry_phase2() returned |
421 | * the value it wants us to use in the table lookup. | 324 | * the value it wants us to use in the table lookup. |
422 | */ | 325 | */ |
423 | LOAD_ARGS ARGOFFSET, 1 | 326 | RESTORE_C_REGS_EXCEPT_RAX |
424 | RESTORE_REST | 327 | RESTORE_EXTRA_REGS |
425 | #if __SYSCALL_MASK == ~0 | 328 | #if __SYSCALL_MASK == ~0 |
426 | cmpq $__NR_syscall_max,%rax | 329 | cmpq $__NR_syscall_max,%rax |
427 | #else | 330 | #else |
428 | andl $__SYSCALL_MASK,%eax | 331 | andl $__SYSCALL_MASK,%eax |
429 | cmpl $__NR_syscall_max,%eax | 332 | cmpl $__NR_syscall_max,%eax |
430 | #endif | 333 | #endif |
431 | ja int_ret_from_sys_call /* RAX(%rsp) is already set */ | 334 | ja 1f /* return -ENOSYS (already in pt_regs->ax) */ |
432 | movq %r10,%rcx /* fixup for C */ | 335 | movq %r10,%rcx /* fixup for C */ |
433 | call *sys_call_table(,%rax,8) | 336 | call *sys_call_table(,%rax,8) |
434 | movq %rax,RAX-ARGOFFSET(%rsp) | 337 | movq %rax,RAX(%rsp) |
435 | /* Use IRET because user could have changed frame */ | 338 | 1: |
339 | /* Use IRET because user could have changed pt_regs->foo */ | ||
436 | 340 | ||
437 | /* | 341 | /* |
438 | * Syscall return path ending with IRET. | 342 | * Syscall return path ending with IRET. |
439 | * Has correct top of stack, but partial stack frame. | 343 | * Has correct iret frame. |
440 | */ | 344 | */ |
441 | GLOBAL(int_ret_from_sys_call) | 345 | GLOBAL(int_ret_from_sys_call) |
442 | DISABLE_INTERRUPTS(CLBR_NONE) | 346 | DISABLE_INTERRUPTS(CLBR_NONE) |
347 | int_ret_from_sys_call_irqs_off: /* jumps come here from the irqs-off SYSRET path */ | ||
443 | TRACE_IRQS_OFF | 348 | TRACE_IRQS_OFF |
444 | int_ret_from_sys_call_irqs_off: | ||
445 | movl $_TIF_ALLWORK_MASK,%edi | 349 | movl $_TIF_ALLWORK_MASK,%edi |
446 | /* edi: mask to check */ | 350 | /* edi: mask to check */ |
447 | GLOBAL(int_with_check) | 351 | GLOBAL(int_with_check) |
@@ -450,8 +354,8 @@ GLOBAL(int_with_check) | |||
450 | movl TI_flags(%rcx),%edx | 354 | movl TI_flags(%rcx),%edx |
451 | andl %edi,%edx | 355 | andl %edi,%edx |
452 | jnz int_careful | 356 | jnz int_careful |
453 | andl $~TS_COMPAT,TI_status(%rcx) | 357 | andl $~TS_COMPAT,TI_status(%rcx) |
454 | jmp retint_swapgs | 358 | jmp syscall_return |
455 | 359 | ||
456 | /* Either reschedule or signal or syscall exit tracking needed. */ | 360 | /* Either reschedule or signal or syscall exit tracking needed. */ |
457 | /* First do a reschedule test. */ | 361 | /* First do a reschedule test. */ |
@@ -468,12 +372,11 @@ int_careful: | |||
468 | TRACE_IRQS_OFF | 372 | TRACE_IRQS_OFF |
469 | jmp int_with_check | 373 | jmp int_with_check |
470 | 374 | ||
471 | /* handle signals and tracing -- both require a full stack frame */ | 375 | /* handle signals and tracing -- both require a full pt_regs */ |
472 | int_very_careful: | 376 | int_very_careful: |
473 | TRACE_IRQS_ON | 377 | TRACE_IRQS_ON |
474 | ENABLE_INTERRUPTS(CLBR_NONE) | 378 | ENABLE_INTERRUPTS(CLBR_NONE) |
475 | int_check_syscall_exit_work: | 379 | SAVE_EXTRA_REGS |
476 | SAVE_REST | ||
477 | /* Check for syscall exit trace */ | 380 | /* Check for syscall exit trace */ |
478 | testl $_TIF_WORK_SYSCALL_EXIT,%edx | 381 | testl $_TIF_WORK_SYSCALL_EXIT,%edx |
479 | jz int_signal | 382 | jz int_signal |
@@ -492,86 +395,192 @@ int_signal: | |||
492 | call do_notify_resume | 395 | call do_notify_resume |
493 | 1: movl $_TIF_WORK_MASK,%edi | 396 | 1: movl $_TIF_WORK_MASK,%edi |
494 | int_restore_rest: | 397 | int_restore_rest: |
495 | RESTORE_REST | 398 | RESTORE_EXTRA_REGS |
496 | DISABLE_INTERRUPTS(CLBR_NONE) | 399 | DISABLE_INTERRUPTS(CLBR_NONE) |
497 | TRACE_IRQS_OFF | 400 | TRACE_IRQS_OFF |
498 | jmp int_with_check | 401 | jmp int_with_check |
402 | |||
403 | syscall_return: | ||
404 | /* The IRETQ could re-enable interrupts: */ | ||
405 | DISABLE_INTERRUPTS(CLBR_ANY) | ||
406 | TRACE_IRQS_IRETQ | ||
407 | |||
408 | /* | ||
409 | * Try to use SYSRET instead of IRET if we're returning to | ||
410 | * a completely clean 64-bit userspace context. | ||
411 | */ | ||
412 | movq RCX(%rsp),%rcx | ||
413 | cmpq %rcx,RIP(%rsp) /* RCX == RIP */ | ||
414 | jne opportunistic_sysret_failed | ||
415 | |||
416 | /* | ||
417 | * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP | ||
418 | * in kernel space. This essentially lets the user take over | ||
419 | * the kernel, since userspace controls RSP. It's not worth | ||
420 | * testing for canonicalness exactly -- this check detects any | ||
421 | * of the 17 high bits set, which is true for non-canonical | ||
422 | * or kernel addresses. (This will pessimize vsyscall=native. | ||
423 | * Big deal.) | ||
424 | * | ||
425 | * If virtual addresses ever become wider, this will need | ||
426 | * to be updated to remain correct on both old and new CPUs. | ||
427 | */ | ||
428 | .ifne __VIRTUAL_MASK_SHIFT - 47 | ||
429 | .error "virtual address width changed -- SYSRET checks need update" | ||
430 | .endif | ||
431 | shr $__VIRTUAL_MASK_SHIFT, %rcx | ||
432 | jnz opportunistic_sysret_failed | ||
433 | |||
434 | cmpq $__USER_CS,CS(%rsp) /* CS must match SYSRET */ | ||
435 | jne opportunistic_sysret_failed | ||
436 | |||
437 | movq R11(%rsp),%r11 | ||
438 | cmpq %r11,EFLAGS(%rsp) /* R11 == RFLAGS */ | ||
439 | jne opportunistic_sysret_failed | ||
440 | |||
441 | /* | ||
442 | * SYSRET can't restore RF. SYSRET can restore TF, but unlike IRET, | ||
443 | * restoring TF results in a trap from userspace immediately after | ||
444 | * SYSRET. This would cause an infinite loop whenever #DB happens | ||
445 | * with register state that satisfies the opportunistic SYSRET | ||
446 | * conditions. For example, single-stepping this user code: | ||
447 | * | ||
448 | * movq $stuck_here,%rcx | ||
449 | * pushfq | ||
450 | * popq %r11 | ||
451 | * stuck_here: | ||
452 | * | ||
453 | * would never get past 'stuck_here'. | ||
454 | */ | ||
455 | testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11 | ||
456 | jnz opportunistic_sysret_failed | ||
457 | |||
458 | /* nothing to check for RSP */ | ||
459 | |||
460 | cmpq $__USER_DS,SS(%rsp) /* SS must match SYSRET */ | ||
461 | jne opportunistic_sysret_failed | ||
462 | |||
463 | /* | ||
464 | * We win! This label is here just for ease of understanding | ||
465 | * perf profiles. Nothing jumps here. | ||
466 | */ | ||
467 | syscall_return_via_sysret: | ||
468 | CFI_REMEMBER_STATE | ||
469 | /* r11 is already restored (see code above) */ | ||
470 | RESTORE_C_REGS_EXCEPT_R11 | ||
471 | movq RSP(%rsp),%rsp | ||
472 | USERGS_SYSRET64 | ||
473 | CFI_RESTORE_STATE | ||
474 | |||
475 | opportunistic_sysret_failed: | ||
476 | SWAPGS | ||
477 | jmp restore_c_regs_and_iret | ||
499 | CFI_ENDPROC | 478 | CFI_ENDPROC |
500 | END(system_call) | 479 | END(system_call) |
501 | 480 | ||
481 | |||
502 | .macro FORK_LIKE func | 482 | .macro FORK_LIKE func |
503 | ENTRY(stub_\func) | 483 | ENTRY(stub_\func) |
504 | CFI_STARTPROC | 484 | CFI_STARTPROC |
505 | popq %r11 /* save return address */ | 485 | DEFAULT_FRAME 0, 8 /* offset 8: return address */ |
506 | PARTIAL_FRAME 0 | 486 | SAVE_EXTRA_REGS 8 |
507 | SAVE_REST | 487 | jmp sys_\func |
508 | pushq %r11 /* put it back on stack */ | ||
509 | FIXUP_TOP_OF_STACK %r11, 8 | ||
510 | DEFAULT_FRAME 0 8 /* offset 8: return address */ | ||
511 | call sys_\func | ||
512 | RESTORE_TOP_OF_STACK %r11, 8 | ||
513 | ret $REST_SKIP /* pop extended registers */ | ||
514 | CFI_ENDPROC | 488 | CFI_ENDPROC |
515 | END(stub_\func) | 489 | END(stub_\func) |
516 | .endm | 490 | .endm |
517 | 491 | ||
518 | .macro FIXED_FRAME label,func | ||
519 | ENTRY(\label) | ||
520 | CFI_STARTPROC | ||
521 | PARTIAL_FRAME 0 8 /* offset 8: return address */ | ||
522 | FIXUP_TOP_OF_STACK %r11, 8-ARGOFFSET | ||
523 | call \func | ||
524 | RESTORE_TOP_OF_STACK %r11, 8-ARGOFFSET | ||
525 | ret | ||
526 | CFI_ENDPROC | ||
527 | END(\label) | ||
528 | .endm | ||
529 | |||
530 | FORK_LIKE clone | 492 | FORK_LIKE clone |
531 | FORK_LIKE fork | 493 | FORK_LIKE fork |
532 | FORK_LIKE vfork | 494 | FORK_LIKE vfork |
533 | FIXED_FRAME stub_iopl, sys_iopl | ||
534 | 495 | ||
535 | ENTRY(stub_execve) | 496 | ENTRY(stub_execve) |
536 | CFI_STARTPROC | 497 | CFI_STARTPROC |
537 | addq $8, %rsp | 498 | DEFAULT_FRAME 0, 8 |
538 | PARTIAL_FRAME 0 | 499 | call sys_execve |
539 | SAVE_REST | 500 | return_from_execve: |
540 | FIXUP_TOP_OF_STACK %r11 | 501 | testl %eax, %eax |
541 | call sys_execve | 502 | jz 1f |
542 | movq %rax,RAX(%rsp) | 503 | /* exec failed, can use fast SYSRET code path in this case */ |
543 | RESTORE_REST | 504 | ret |
544 | jmp int_ret_from_sys_call | 505 | 1: |
506 | /* must use IRET code path (pt_regs->cs may have changed) */ | ||
507 | addq $8, %rsp | ||
508 | CFI_ADJUST_CFA_OFFSET -8 | ||
509 | ZERO_EXTRA_REGS | ||
510 | movq %rax,RAX(%rsp) | ||
511 | jmp int_ret_from_sys_call | ||
545 | CFI_ENDPROC | 512 | CFI_ENDPROC |
546 | END(stub_execve) | 513 | END(stub_execve) |
547 | 514 | /* | |
548 | ENTRY(stub_execveat) | 515 | * Remaining execve stubs are only 7 bytes long. |
516 | * ENTRY() often aligns to 16 bytes, which in this case has no benefits. | ||
517 | */ | ||
518 | .align 8 | ||
519 | GLOBAL(stub_execveat) | ||
549 | CFI_STARTPROC | 520 | CFI_STARTPROC |
550 | addq $8, %rsp | 521 | DEFAULT_FRAME 0, 8 |
551 | PARTIAL_FRAME 0 | 522 | call sys_execveat |
552 | SAVE_REST | 523 | jmp return_from_execve |
553 | FIXUP_TOP_OF_STACK %r11 | ||
554 | call sys_execveat | ||
555 | RESTORE_TOP_OF_STACK %r11 | ||
556 | movq %rax,RAX(%rsp) | ||
557 | RESTORE_REST | ||
558 | jmp int_ret_from_sys_call | ||
559 | CFI_ENDPROC | 524 | CFI_ENDPROC |
560 | END(stub_execveat) | 525 | END(stub_execveat) |
561 | 526 | ||
527 | #ifdef CONFIG_X86_X32_ABI | ||
528 | .align 8 | ||
529 | GLOBAL(stub_x32_execve) | ||
530 | CFI_STARTPROC | ||
531 | DEFAULT_FRAME 0, 8 | ||
532 | call compat_sys_execve | ||
533 | jmp return_from_execve | ||
534 | CFI_ENDPROC | ||
535 | END(stub_x32_execve) | ||
536 | .align 8 | ||
537 | GLOBAL(stub_x32_execveat) | ||
538 | CFI_STARTPROC | ||
539 | DEFAULT_FRAME 0, 8 | ||
540 | call compat_sys_execveat | ||
541 | jmp return_from_execve | ||
542 | CFI_ENDPROC | ||
543 | END(stub_x32_execveat) | ||
544 | #endif | ||
545 | |||
546 | #ifdef CONFIG_IA32_EMULATION | ||
547 | .align 8 | ||
548 | GLOBAL(stub32_execve) | ||
549 | CFI_STARTPROC | ||
550 | call compat_sys_execve | ||
551 | jmp return_from_execve | ||
552 | CFI_ENDPROC | ||
553 | END(stub32_execve) | ||
554 | .align 8 | ||
555 | GLOBAL(stub32_execveat) | ||
556 | CFI_STARTPROC | ||
557 | call compat_sys_execveat | ||
558 | jmp return_from_execve | ||
559 | CFI_ENDPROC | ||
560 | END(stub32_execveat) | ||
561 | #endif | ||
562 | |||
562 | /* | 563 | /* |
563 | * sigreturn is special because it needs to restore all registers on return. | 564 | * sigreturn is special because it needs to restore all registers on return. |
564 | * This cannot be done with SYSRET, so use the IRET return path instead. | 565 | * This cannot be done with SYSRET, so use the IRET return path instead. |
565 | */ | 566 | */ |
566 | ENTRY(stub_rt_sigreturn) | 567 | ENTRY(stub_rt_sigreturn) |
567 | CFI_STARTPROC | 568 | CFI_STARTPROC |
568 | addq $8, %rsp | 569 | DEFAULT_FRAME 0, 8 |
569 | PARTIAL_FRAME 0 | 570 | /* |
570 | SAVE_REST | 571 | * SAVE_EXTRA_REGS result is not normally needed: |
571 | FIXUP_TOP_OF_STACK %r11 | 572 | * sigreturn overwrites all pt_regs->GPREGS. |
573 | * But sigreturn can fail (!), and there is no easy way to detect that. | ||
574 | * To make sure RESTORE_EXTRA_REGS doesn't restore garbage on error, | ||
575 | * we SAVE_EXTRA_REGS here. | ||
576 | */ | ||
577 | SAVE_EXTRA_REGS 8 | ||
572 | call sys_rt_sigreturn | 578 | call sys_rt_sigreturn |
573 | movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer | 579 | return_from_stub: |
574 | RESTORE_REST | 580 | addq $8, %rsp |
581 | CFI_ADJUST_CFA_OFFSET -8 | ||
582 | RESTORE_EXTRA_REGS | ||
583 | movq %rax,RAX(%rsp) | ||
575 | jmp int_ret_from_sys_call | 584 | jmp int_ret_from_sys_call |
576 | CFI_ENDPROC | 585 | CFI_ENDPROC |
577 | END(stub_rt_sigreturn) | 586 | END(stub_rt_sigreturn) |
@@ -579,86 +588,70 @@ END(stub_rt_sigreturn) | |||
579 | #ifdef CONFIG_X86_X32_ABI | 588 | #ifdef CONFIG_X86_X32_ABI |
580 | ENTRY(stub_x32_rt_sigreturn) | 589 | ENTRY(stub_x32_rt_sigreturn) |
581 | CFI_STARTPROC | 590 | CFI_STARTPROC |
582 | addq $8, %rsp | 591 | DEFAULT_FRAME 0, 8 |
583 | PARTIAL_FRAME 0 | 592 | SAVE_EXTRA_REGS 8 |
584 | SAVE_REST | ||
585 | FIXUP_TOP_OF_STACK %r11 | ||
586 | call sys32_x32_rt_sigreturn | 593 | call sys32_x32_rt_sigreturn |
587 | movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer | 594 | jmp return_from_stub |
588 | RESTORE_REST | ||
589 | jmp int_ret_from_sys_call | ||
590 | CFI_ENDPROC | 595 | CFI_ENDPROC |
591 | END(stub_x32_rt_sigreturn) | 596 | END(stub_x32_rt_sigreturn) |
597 | #endif | ||
592 | 598 | ||
593 | ENTRY(stub_x32_execve) | 599 | /* |
594 | CFI_STARTPROC | 600 | * A newly forked process directly context switches into this address. |
595 | addq $8, %rsp | 601 | * |
596 | PARTIAL_FRAME 0 | 602 | * rdi: prev task we switched from |
597 | SAVE_REST | 603 | */ |
598 | FIXUP_TOP_OF_STACK %r11 | 604 | ENTRY(ret_from_fork) |
599 | call compat_sys_execve | 605 | DEFAULT_FRAME |
600 | RESTORE_TOP_OF_STACK %r11 | ||
601 | movq %rax,RAX(%rsp) | ||
602 | RESTORE_REST | ||
603 | jmp int_ret_from_sys_call | ||
604 | CFI_ENDPROC | ||
605 | END(stub_x32_execve) | ||
606 | 606 | ||
607 | ENTRY(stub_x32_execveat) | 607 | LOCK ; btr $TIF_FORK,TI_flags(%r8) |
608 | CFI_STARTPROC | 608 | |
609 | addq $8, %rsp | 609 | pushq_cfi $0x0002 |
610 | PARTIAL_FRAME 0 | 610 | popfq_cfi # reset kernel eflags |
611 | SAVE_REST | 611 | |
612 | FIXUP_TOP_OF_STACK %r11 | 612 | call schedule_tail # rdi: 'prev' task parameter |
613 | call compat_sys_execveat | 613 | |
614 | RESTORE_TOP_OF_STACK %r11 | 614 | RESTORE_EXTRA_REGS |
615 | movq %rax,RAX(%rsp) | 615 | |
616 | RESTORE_REST | 616 | testl $3,CS(%rsp) # from kernel_thread? |
617 | |||
618 | /* | ||
619 | * By the time we get here, we have no idea whether our pt_regs, | ||
620 | * ti flags, and ti status came from the 64-bit SYSCALL fast path, | ||
621 | * the slow path, or one of the ia32entry paths. | ||
622 | * Use IRET code path to return, since it can safely handle | ||
623 | * all of the above. | ||
624 | */ | ||
625 | jnz int_ret_from_sys_call | ||
626 | |||
627 | /* We came from kernel_thread */ | ||
628 | /* nb: we depend on RESTORE_EXTRA_REGS above */ | ||
629 | movq %rbp, %rdi | ||
630 | call *%rbx | ||
631 | movl $0, RAX(%rsp) | ||
632 | RESTORE_EXTRA_REGS | ||
617 | jmp int_ret_from_sys_call | 633 | jmp int_ret_from_sys_call |
618 | CFI_ENDPROC | 634 | CFI_ENDPROC |
619 | END(stub_x32_execveat) | 635 | END(ret_from_fork) |
620 | |||
621 | #endif | ||
622 | 636 | ||
623 | /* | 637 | /* |
624 | * Build the entry stubs and pointer table with some assembler magic. | 638 | * Build the entry stubs with some assembler magic. |
625 | * We pack 7 stubs into a single 32-byte chunk, which will fit in a | 639 | * We pack 1 stub into every 8-byte block. |
626 | * single cache line on all modern x86 implementations. | ||
627 | */ | 640 | */ |
628 | .section .init.rodata,"a" | 641 | .align 8 |
629 | ENTRY(interrupt) | ||
630 | .section .entry.text | ||
631 | .p2align 5 | ||
632 | .p2align CONFIG_X86_L1_CACHE_SHIFT | ||
633 | ENTRY(irq_entries_start) | 642 | ENTRY(irq_entries_start) |
634 | INTR_FRAME | 643 | INTR_FRAME |
635 | vector=FIRST_EXTERNAL_VECTOR | 644 | vector=FIRST_EXTERNAL_VECTOR |
636 | .rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7 | 645 | .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) |
637 | .balign 32 | 646 | pushq_cfi $(~vector+0x80) /* Note: always in signed byte range */ |
638 | .rept 7 | 647 | vector=vector+1 |
639 | .if vector < FIRST_SYSTEM_VECTOR | 648 | jmp common_interrupt |
640 | .if vector <> FIRST_EXTERNAL_VECTOR | ||
641 | CFI_ADJUST_CFA_OFFSET -8 | 649 | CFI_ADJUST_CFA_OFFSET -8 |
642 | .endif | 650 | .align 8 |
643 | 1: pushq_cfi $(~vector+0x80) /* Note: always in signed byte range */ | 651 | .endr |
644 | .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 | ||
645 | jmp 2f | ||
646 | .endif | ||
647 | .previous | ||
648 | .quad 1b | ||
649 | .section .entry.text | ||
650 | vector=vector+1 | ||
651 | .endif | ||
652 | .endr | ||
653 | 2: jmp common_interrupt | ||
654 | .endr | ||
655 | CFI_ENDPROC | 652 | CFI_ENDPROC |
656 | END(irq_entries_start) | 653 | END(irq_entries_start) |
657 | 654 | ||
658 | .previous | ||
659 | END(interrupt) | ||
660 | .previous | ||
661 | |||
662 | /* | 655 | /* |
663 | * Interrupt entry/exit. | 656 | * Interrupt entry/exit. |
664 | * | 657 | * |
@@ -669,47 +662,45 @@ END(interrupt) | |||
669 | 662 | ||
670 | /* 0(%rsp): ~(interrupt number) */ | 663 | /* 0(%rsp): ~(interrupt number) */ |
671 | .macro interrupt func | 664 | .macro interrupt func |
672 | /* reserve pt_regs for scratch regs and rbp */ | ||
673 | subq $ORIG_RAX-RBP, %rsp | ||
674 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP | ||
675 | cld | 665 | cld |
676 | /* start from rbp in pt_regs and jump over */ | 666 | /* |
677 | movq_cfi rdi, (RDI-RBP) | 667 | * Since nothing in interrupt handling code touches r12...r15 members |
678 | movq_cfi rsi, (RSI-RBP) | 668 | * of "struct pt_regs", and since interrupts can nest, we can save |
679 | movq_cfi rdx, (RDX-RBP) | 669 | * four stack slots and simultaneously provide |
680 | movq_cfi rcx, (RCX-RBP) | 670 | * an unwind-friendly stack layout by saving "truncated" pt_regs |
681 | movq_cfi rax, (RAX-RBP) | 671 | * exactly up to rbp slot, without these members. |
682 | movq_cfi r8, (R8-RBP) | 672 | */ |
683 | movq_cfi r9, (R9-RBP) | 673 | ALLOC_PT_GPREGS_ON_STACK -RBP |
684 | movq_cfi r10, (R10-RBP) | 674 | SAVE_C_REGS -RBP |
685 | movq_cfi r11, (R11-RBP) | 675 | /* this goes to 0(%rsp) for unwinder, not for saving the value: */ |
686 | 676 | SAVE_EXTRA_REGS_RBP -RBP | |
687 | /* Save rbp so that we can unwind from get_irq_regs() */ | 677 | |
688 | movq_cfi rbp, 0 | 678 | leaq -RBP(%rsp),%rdi /* arg1 for \func (pointer to pt_regs) */ |
689 | |||
690 | /* Save previous stack value */ | ||
691 | movq %rsp, %rsi | ||
692 | 679 | ||
693 | leaq -RBP(%rsp),%rdi /* arg1 for handler */ | 680 | testl $3, CS-RBP(%rsp) |
694 | testl $3, CS-RBP(%rsi) | ||
695 | je 1f | 681 | je 1f |
696 | SWAPGS | 682 | SWAPGS |
683 | 1: | ||
697 | /* | 684 | /* |
685 | * Save previous stack pointer, optionally switch to interrupt stack. | ||
698 | * irq_count is used to check if a CPU is already on an interrupt stack | 686 | * irq_count is used to check if a CPU is already on an interrupt stack |
699 | * or not. While this is essentially redundant with preempt_count it is | 687 | * or not. While this is essentially redundant with preempt_count it is |
700 | * a little cheaper to use a separate counter in the PDA (short of | 688 | * a little cheaper to use a separate counter in the PDA (short of |
701 | * moving irq_enter into assembly, which would be too much work) | 689 | * moving irq_enter into assembly, which would be too much work) |
702 | */ | 690 | */ |
703 | 1: incl PER_CPU_VAR(irq_count) | 691 | movq %rsp, %rsi |
692 | incl PER_CPU_VAR(irq_count) | ||
704 | cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp | 693 | cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp |
705 | CFI_DEF_CFA_REGISTER rsi | 694 | CFI_DEF_CFA_REGISTER rsi |
706 | |||
707 | /* Store previous stack value */ | ||
708 | pushq %rsi | 695 | pushq %rsi |
696 | /* | ||
697 | * For debugger: | ||
698 | * "CFA (Current Frame Address) is the value on stack + offset" | ||
699 | */ | ||
709 | CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \ | 700 | CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \ |
710 | 0x77 /* DW_OP_breg7 */, 0, \ | 701 | 0x77 /* DW_OP_breg7 (rsp) */, 0, \ |
711 | 0x06 /* DW_OP_deref */, \ | 702 | 0x06 /* DW_OP_deref */, \ |
712 | 0x08 /* DW_OP_const1u */, SS+8-RBP, \ | 703 | 0x08 /* DW_OP_const1u */, SIZEOF_PTREGS-RBP, \ |
713 | 0x22 /* DW_OP_plus */ | 704 | 0x22 /* DW_OP_plus */ |
714 | /* We entered an interrupt context - irqs are off: */ | 705 | /* We entered an interrupt context - irqs are off: */ |
715 | TRACE_IRQS_OFF | 706 | TRACE_IRQS_OFF |
@@ -727,7 +718,7 @@ common_interrupt: | |||
727 | ASM_CLAC | 718 | ASM_CLAC |
728 | addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ | 719 | addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ |
729 | interrupt do_IRQ | 720 | interrupt do_IRQ |
730 | /* 0(%rsp): old_rsp-ARGOFFSET */ | 721 | /* 0(%rsp): old RSP */ |
731 | ret_from_intr: | 722 | ret_from_intr: |
732 | DISABLE_INTERRUPTS(CLBR_NONE) | 723 | DISABLE_INTERRUPTS(CLBR_NONE) |
733 | TRACE_IRQS_OFF | 724 | TRACE_IRQS_OFF |
@@ -735,19 +726,18 @@ ret_from_intr: | |||
735 | 726 | ||
736 | /* Restore saved previous stack */ | 727 | /* Restore saved previous stack */ |
737 | popq %rsi | 728 | popq %rsi |
738 | CFI_DEF_CFA rsi,SS+8-RBP /* reg/off reset after def_cfa_expr */ | 729 | CFI_DEF_CFA rsi,SIZEOF_PTREGS-RBP /* reg/off reset after def_cfa_expr */ |
739 | leaq ARGOFFSET-RBP(%rsi), %rsp | 730 | /* return code expects complete pt_regs - adjust rsp accordingly: */ |
731 | leaq -RBP(%rsi),%rsp | ||
740 | CFI_DEF_CFA_REGISTER rsp | 732 | CFI_DEF_CFA_REGISTER rsp |
741 | CFI_ADJUST_CFA_OFFSET RBP-ARGOFFSET | 733 | CFI_ADJUST_CFA_OFFSET RBP |
742 | 734 | ||
743 | exit_intr: | 735 | testl $3,CS(%rsp) |
744 | GET_THREAD_INFO(%rcx) | ||
745 | testl $3,CS-ARGOFFSET(%rsp) | ||
746 | je retint_kernel | 736 | je retint_kernel |
747 | |||
748 | /* Interrupt came from user space */ | 737 | /* Interrupt came from user space */ |
738 | |||
739 | GET_THREAD_INFO(%rcx) | ||
749 | /* | 740 | /* |
750 | * Has a correct top of stack, but a partial stack frame | ||
751 | * %rcx: thread info. Interrupts off. | 741 | * %rcx: thread info. Interrupts off. |
752 | */ | 742 | */ |
753 | retint_with_reschedule: | 743 | retint_with_reschedule: |
@@ -766,70 +756,34 @@ retint_swapgs: /* return to user-space */ | |||
766 | DISABLE_INTERRUPTS(CLBR_ANY) | 756 | DISABLE_INTERRUPTS(CLBR_ANY) |
767 | TRACE_IRQS_IRETQ | 757 | TRACE_IRQS_IRETQ |
768 | 758 | ||
769 | /* | ||
770 | * Try to use SYSRET instead of IRET if we're returning to | ||
771 | * a completely clean 64-bit userspace context. | ||
772 | */ | ||
773 | movq (RCX-R11)(%rsp), %rcx | ||
774 | cmpq %rcx,(RIP-R11)(%rsp) /* RCX == RIP */ | ||
775 | jne opportunistic_sysret_failed | ||
776 | |||
777 | /* | ||
778 | * On Intel CPUs, sysret with non-canonical RCX/RIP will #GP | ||
779 | * in kernel space. This essentially lets the user take over | ||
780 | * the kernel, since userspace controls RSP. It's not worth | ||
781 | * testing for canonicalness exactly -- this check detects any | ||
782 | * of the 17 high bits set, which is true for non-canonical | ||
783 | * or kernel addresses. (This will pessimize vsyscall=native. | ||
784 | * Big deal.) | ||
785 | * | ||
786 | * If virtual addresses ever become wider, this will need | ||
787 | * to be updated to remain correct on both old and new CPUs. | ||
788 | */ | ||
789 | .ifne __VIRTUAL_MASK_SHIFT - 47 | ||
790 | .error "virtual address width changed -- sysret checks need update" | ||
791 | .endif | ||
792 | shr $__VIRTUAL_MASK_SHIFT, %rcx | ||
793 | jnz opportunistic_sysret_failed | ||
794 | |||
795 | cmpq $__USER_CS,(CS-R11)(%rsp) /* CS must match SYSRET */ | ||
796 | jne opportunistic_sysret_failed | ||
797 | |||
798 | movq (R11-ARGOFFSET)(%rsp), %r11 | ||
799 | cmpq %r11,(EFLAGS-ARGOFFSET)(%rsp) /* R11 == RFLAGS */ | ||
800 | jne opportunistic_sysret_failed | ||
801 | |||
802 | testq $X86_EFLAGS_RF,%r11 /* sysret can't restore RF */ | ||
803 | jnz opportunistic_sysret_failed | ||
804 | |||
805 | /* nothing to check for RSP */ | ||
806 | |||
807 | cmpq $__USER_DS,(SS-ARGOFFSET)(%rsp) /* SS must match SYSRET */ | ||
808 | jne opportunistic_sysret_failed | ||
809 | |||
810 | /* | ||
811 | * We win! This label is here just for ease of understanding | ||
812 | * perf profiles. Nothing jumps here. | ||
813 | */ | ||
814 | irq_return_via_sysret: | ||
815 | CFI_REMEMBER_STATE | ||
816 | RESTORE_ARGS 1,8,1 | ||
817 | movq (RSP-RIP)(%rsp),%rsp | ||
818 | USERGS_SYSRET64 | ||
819 | CFI_RESTORE_STATE | ||
820 | |||
821 | opportunistic_sysret_failed: | ||
822 | SWAPGS | 759 | SWAPGS |
823 | jmp restore_args | 760 | jmp restore_c_regs_and_iret |
824 | 761 | ||
825 | retint_restore_args: /* return to kernel space */ | 762 | /* Returning to kernel space */ |
826 | DISABLE_INTERRUPTS(CLBR_ANY) | 763 | retint_kernel: |
764 | #ifdef CONFIG_PREEMPT | ||
765 | /* Interrupts are off */ | ||
766 | /* Check if we need preemption */ | ||
767 | bt $9,EFLAGS(%rsp) /* interrupts were off? */ | ||
768 | jnc 1f | ||
769 | 0: cmpl $0,PER_CPU_VAR(__preempt_count) | ||
770 | jnz 1f | ||
771 | call preempt_schedule_irq | ||
772 | jmp 0b | ||
773 | 1: | ||
774 | #endif | ||
827 | /* | 775 | /* |
828 | * The iretq could re-enable interrupts: | 776 | * The iretq could re-enable interrupts: |
829 | */ | 777 | */ |
830 | TRACE_IRQS_IRETQ | 778 | TRACE_IRQS_IRETQ |
831 | restore_args: | 779 | |
832 | RESTORE_ARGS 1,8,1 | 780 | /* |
781 | * At this label, code paths which return to kernel and to user, | ||
782 | * which come from interrupts/exception and from syscalls, merge. | ||
783 | */ | ||
784 | restore_c_regs_and_iret: | ||
785 | RESTORE_C_REGS | ||
786 | REMOVE_PT_GPREGS_FROM_STACK 8 | ||
833 | 787 | ||
834 | irq_return: | 788 | irq_return: |
835 | INTERRUPT_RETURN | 789 | INTERRUPT_RETURN |
@@ -900,28 +854,17 @@ retint_signal: | |||
900 | jz retint_swapgs | 854 | jz retint_swapgs |
901 | TRACE_IRQS_ON | 855 | TRACE_IRQS_ON |
902 | ENABLE_INTERRUPTS(CLBR_NONE) | 856 | ENABLE_INTERRUPTS(CLBR_NONE) |
903 | SAVE_REST | 857 | SAVE_EXTRA_REGS |
904 | movq $-1,ORIG_RAX(%rsp) | 858 | movq $-1,ORIG_RAX(%rsp) |
905 | xorl %esi,%esi # oldset | 859 | xorl %esi,%esi # oldset |
906 | movq %rsp,%rdi # &pt_regs | 860 | movq %rsp,%rdi # &pt_regs |
907 | call do_notify_resume | 861 | call do_notify_resume |
908 | RESTORE_REST | 862 | RESTORE_EXTRA_REGS |
909 | DISABLE_INTERRUPTS(CLBR_NONE) | 863 | DISABLE_INTERRUPTS(CLBR_NONE) |
910 | TRACE_IRQS_OFF | 864 | TRACE_IRQS_OFF |
911 | GET_THREAD_INFO(%rcx) | 865 | GET_THREAD_INFO(%rcx) |
912 | jmp retint_with_reschedule | 866 | jmp retint_with_reschedule |
913 | 867 | ||
914 | #ifdef CONFIG_PREEMPT | ||
915 | /* Returning to kernel space. Check if we need preemption */ | ||
916 | /* rcx: threadinfo. interrupts off. */ | ||
917 | ENTRY(retint_kernel) | ||
918 | cmpl $0,PER_CPU_VAR(__preempt_count) | ||
919 | jnz retint_restore_args | ||
920 | bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ | ||
921 | jnc retint_restore_args | ||
922 | call preempt_schedule_irq | ||
923 | jmp exit_intr | ||
924 | #endif | ||
925 | CFI_ENDPROC | 868 | CFI_ENDPROC |
926 | END(common_interrupt) | 869 | END(common_interrupt) |
927 | 870 | ||
@@ -1010,7 +953,7 @@ apicinterrupt IRQ_WORK_VECTOR \ | |||
1010 | /* | 953 | /* |
1011 | * Exception entry points. | 954 | * Exception entry points. |
1012 | */ | 955 | */ |
1013 | #define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8) | 956 | #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8) |
1014 | 957 | ||
1015 | .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 | 958 | .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 |
1016 | ENTRY(\sym) | 959 | ENTRY(\sym) |
@@ -1032,8 +975,7 @@ ENTRY(\sym) | |||
1032 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ | 975 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ |
1033 | .endif | 976 | .endif |
1034 | 977 | ||
1035 | subq $ORIG_RAX-R15, %rsp | 978 | ALLOC_PT_GPREGS_ON_STACK |
1036 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 | ||
1037 | 979 | ||
1038 | .if \paranoid | 980 | .if \paranoid |
1039 | .if \paranoid == 1 | 981 | .if \paranoid == 1 |
@@ -1041,10 +983,11 @@ ENTRY(\sym) | |||
1041 | testl $3, CS(%rsp) /* If coming from userspace, switch */ | 983 | testl $3, CS(%rsp) /* If coming from userspace, switch */ |
1042 | jnz 1f /* stacks. */ | 984 | jnz 1f /* stacks. */ |
1043 | .endif | 985 | .endif |
1044 | call save_paranoid | 986 | call paranoid_entry |
1045 | .else | 987 | .else |
1046 | call error_entry | 988 | call error_entry |
1047 | .endif | 989 | .endif |
990 | /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */ | ||
1048 | 991 | ||
1049 | DEFAULT_FRAME 0 | 992 | DEFAULT_FRAME 0 |
1050 | 993 | ||
@@ -1066,19 +1009,20 @@ ENTRY(\sym) | |||
1066 | .endif | 1009 | .endif |
1067 | 1010 | ||
1068 | .if \shift_ist != -1 | 1011 | .if \shift_ist != -1 |
1069 | subq $EXCEPTION_STKSZ, INIT_TSS_IST(\shift_ist) | 1012 | subq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist) |
1070 | .endif | 1013 | .endif |
1071 | 1014 | ||
1072 | call \do_sym | 1015 | call \do_sym |
1073 | 1016 | ||
1074 | .if \shift_ist != -1 | 1017 | .if \shift_ist != -1 |
1075 | addq $EXCEPTION_STKSZ, INIT_TSS_IST(\shift_ist) | 1018 | addq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist) |
1076 | .endif | 1019 | .endif |
1077 | 1020 | ||
1021 | /* these procedures expect "no swapgs" flag in ebx */ | ||
1078 | .if \paranoid | 1022 | .if \paranoid |
1079 | jmp paranoid_exit /* %ebx: no swapgs flag */ | 1023 | jmp paranoid_exit |
1080 | .else | 1024 | .else |
1081 | jmp error_exit /* %ebx: no swapgs flag */ | 1025 | jmp error_exit |
1082 | .endif | 1026 | .endif |
1083 | 1027 | ||
1084 | .if \paranoid == 1 | 1028 | .if \paranoid == 1 |
@@ -1282,7 +1226,9 @@ ENTRY(xen_failsafe_callback) | |||
1282 | addq $0x30,%rsp | 1226 | addq $0x30,%rsp |
1283 | CFI_ADJUST_CFA_OFFSET -0x30 | 1227 | CFI_ADJUST_CFA_OFFSET -0x30 |
1284 | pushq_cfi $-1 /* orig_ax = -1 => not a system call */ | 1228 | pushq_cfi $-1 /* orig_ax = -1 => not a system call */ |
1285 | SAVE_ALL | 1229 | ALLOC_PT_GPREGS_ON_STACK |
1230 | SAVE_C_REGS | ||
1231 | SAVE_EXTRA_REGS | ||
1286 | jmp error_exit | 1232 | jmp error_exit |
1287 | CFI_ENDPROC | 1233 | CFI_ENDPROC |
1288 | END(xen_failsafe_callback) | 1234 | END(xen_failsafe_callback) |
@@ -1314,59 +1260,66 @@ idtentry async_page_fault do_async_page_fault has_error_code=1 | |||
1314 | idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip) | 1260 | idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip) |
1315 | #endif | 1261 | #endif |
1316 | 1262 | ||
1317 | /* | 1263 | /* |
1318 | * "Paranoid" exit path from exception stack. This is invoked | 1264 | * Save all registers in pt_regs, and switch gs if needed. |
1319 | * only on return from non-NMI IST interrupts that came | 1265 | * Use slow, but surefire "are we in kernel?" check. |
1320 | * from kernel space. | 1266 | * Return: ebx=0: need swapgs on exit, ebx=1: otherwise |
1321 | * | 1267 | */ |
1322 | * We may be returning to very strange contexts (e.g. very early | 1268 | ENTRY(paranoid_entry) |
1323 | * in syscall entry), so checking for preemption here would | 1269 | XCPT_FRAME 1 15*8 |
1324 | * be complicated. Fortunately, we there's no good reason | 1270 | cld |
1325 | * to try to handle preemption here. | 1271 | SAVE_C_REGS 8 |
1326 | */ | 1272 | SAVE_EXTRA_REGS 8 |
1273 | movl $1,%ebx | ||
1274 | movl $MSR_GS_BASE,%ecx | ||
1275 | rdmsr | ||
1276 | testl %edx,%edx | ||
1277 | js 1f /* negative -> in kernel */ | ||
1278 | SWAPGS | ||
1279 | xorl %ebx,%ebx | ||
1280 | 1: ret | ||
1281 | CFI_ENDPROC | ||
1282 | END(paranoid_entry) | ||
1327 | 1283 | ||
1328 | /* ebx: no swapgs flag */ | 1284 | /* |
1285 | * "Paranoid" exit path from exception stack. This is invoked | ||
1286 | * only on return from non-NMI IST interrupts that came | ||
1287 | * from kernel space. | ||
1288 | * | ||
1289 | * We may be returning to very strange contexts (e.g. very early | ||
1290 | * in syscall entry), so checking for preemption here would | ||
1291 | * be complicated. Fortunately, we there's no good reason | ||
1292 | * to try to handle preemption here. | ||
1293 | */ | ||
1294 | /* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */ | ||
1329 | ENTRY(paranoid_exit) | 1295 | ENTRY(paranoid_exit) |
1330 | DEFAULT_FRAME | 1296 | DEFAULT_FRAME |
1331 | DISABLE_INTERRUPTS(CLBR_NONE) | 1297 | DISABLE_INTERRUPTS(CLBR_NONE) |
1332 | TRACE_IRQS_OFF_DEBUG | 1298 | TRACE_IRQS_OFF_DEBUG |
1333 | testl %ebx,%ebx /* swapgs needed? */ | 1299 | testl %ebx,%ebx /* swapgs needed? */ |
1334 | jnz paranoid_restore | 1300 | jnz paranoid_exit_no_swapgs |
1335 | TRACE_IRQS_IRETQ 0 | 1301 | TRACE_IRQS_IRETQ |
1336 | SWAPGS_UNSAFE_STACK | 1302 | SWAPGS_UNSAFE_STACK |
1337 | RESTORE_ALL 8 | 1303 | jmp paranoid_exit_restore |
1338 | INTERRUPT_RETURN | 1304 | paranoid_exit_no_swapgs: |
1339 | paranoid_restore: | 1305 | TRACE_IRQS_IRETQ_DEBUG |
1340 | TRACE_IRQS_IRETQ_DEBUG 0 | 1306 | paranoid_exit_restore: |
1341 | RESTORE_ALL 8 | 1307 | RESTORE_EXTRA_REGS |
1308 | RESTORE_C_REGS | ||
1309 | REMOVE_PT_GPREGS_FROM_STACK 8 | ||
1342 | INTERRUPT_RETURN | 1310 | INTERRUPT_RETURN |
1343 | CFI_ENDPROC | 1311 | CFI_ENDPROC |
1344 | END(paranoid_exit) | 1312 | END(paranoid_exit) |
1345 | 1313 | ||
1346 | /* | 1314 | /* |
1347 | * Exception entry point. This expects an error code/orig_rax on the stack. | 1315 | * Save all registers in pt_regs, and switch gs if needed. |
1348 | * returns in "no swapgs flag" in %ebx. | 1316 | * Return: ebx=0: need swapgs on exit, ebx=1: otherwise |
1349 | */ | 1317 | */ |
1350 | ENTRY(error_entry) | 1318 | ENTRY(error_entry) |
1351 | XCPT_FRAME | 1319 | XCPT_FRAME 1 15*8 |
1352 | CFI_ADJUST_CFA_OFFSET 15*8 | ||
1353 | /* oldrax contains error code */ | ||
1354 | cld | 1320 | cld |
1355 | movq %rdi, RDI+8(%rsp) | 1321 | SAVE_C_REGS 8 |
1356 | movq %rsi, RSI+8(%rsp) | 1322 | SAVE_EXTRA_REGS 8 |
1357 | movq %rdx, RDX+8(%rsp) | ||
1358 | movq %rcx, RCX+8(%rsp) | ||
1359 | movq %rax, RAX+8(%rsp) | ||
1360 | movq %r8, R8+8(%rsp) | ||
1361 | movq %r9, R9+8(%rsp) | ||
1362 | movq %r10, R10+8(%rsp) | ||
1363 | movq %r11, R11+8(%rsp) | ||
1364 | movq_cfi rbx, RBX+8 | ||
1365 | movq %rbp, RBP+8(%rsp) | ||
1366 | movq %r12, R12+8(%rsp) | ||
1367 | movq %r13, R13+8(%rsp) | ||
1368 | movq %r14, R14+8(%rsp) | ||
1369 | movq %r15, R15+8(%rsp) | ||
1370 | xorl %ebx,%ebx | 1323 | xorl %ebx,%ebx |
1371 | testl $3,CS+8(%rsp) | 1324 | testl $3,CS+8(%rsp) |
1372 | je error_kernelspace | 1325 | je error_kernelspace |
@@ -1376,12 +1329,12 @@ error_sti: | |||
1376 | TRACE_IRQS_OFF | 1329 | TRACE_IRQS_OFF |
1377 | ret | 1330 | ret |
1378 | 1331 | ||
1379 | /* | 1332 | /* |
1380 | * There are two places in the kernel that can potentially fault with | 1333 | * There are two places in the kernel that can potentially fault with |
1381 | * usergs. Handle them here. B stepping K8s sometimes report a | 1334 | * usergs. Handle them here. B stepping K8s sometimes report a |
1382 | * truncated RIP for IRET exceptions returning to compat mode. Check | 1335 | * truncated RIP for IRET exceptions returning to compat mode. Check |
1383 | * for these here too. | 1336 | * for these here too. |
1384 | */ | 1337 | */ |
1385 | error_kernelspace: | 1338 | error_kernelspace: |
1386 | CFI_REL_OFFSET rcx, RCX+8 | 1339 | CFI_REL_OFFSET rcx, RCX+8 |
1387 | incl %ebx | 1340 | incl %ebx |
@@ -1411,11 +1364,11 @@ error_bad_iret: | |||
1411 | END(error_entry) | 1364 | END(error_entry) |
1412 | 1365 | ||
1413 | 1366 | ||
1414 | /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ | 1367 | /* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */ |
1415 | ENTRY(error_exit) | 1368 | ENTRY(error_exit) |
1416 | DEFAULT_FRAME | 1369 | DEFAULT_FRAME |
1417 | movl %ebx,%eax | 1370 | movl %ebx,%eax |
1418 | RESTORE_REST | 1371 | RESTORE_EXTRA_REGS |
1419 | DISABLE_INTERRUPTS(CLBR_NONE) | 1372 | DISABLE_INTERRUPTS(CLBR_NONE) |
1420 | TRACE_IRQS_OFF | 1373 | TRACE_IRQS_OFF |
1421 | GET_THREAD_INFO(%rcx) | 1374 | GET_THREAD_INFO(%rcx) |
@@ -1430,19 +1383,7 @@ ENTRY(error_exit) | |||
1430 | CFI_ENDPROC | 1383 | CFI_ENDPROC |
1431 | END(error_exit) | 1384 | END(error_exit) |
1432 | 1385 | ||
1433 | /* | 1386 | /* Runs on exception stack */ |
1434 | * Test if a given stack is an NMI stack or not. | ||
1435 | */ | ||
1436 | .macro test_in_nmi reg stack nmi_ret normal_ret | ||
1437 | cmpq %\reg, \stack | ||
1438 | ja \normal_ret | ||
1439 | subq $EXCEPTION_STKSZ, %\reg | ||
1440 | cmpq %\reg, \stack | ||
1441 | jb \normal_ret | ||
1442 | jmp \nmi_ret | ||
1443 | .endm | ||
1444 | |||
1445 | /* runs on exception stack */ | ||
1446 | ENTRY(nmi) | 1387 | ENTRY(nmi) |
1447 | INTR_FRAME | 1388 | INTR_FRAME |
1448 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1389 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
@@ -1478,7 +1419,7 @@ ENTRY(nmi) | |||
1478 | * NMI. | 1419 | * NMI. |
1479 | */ | 1420 | */ |
1480 | 1421 | ||
1481 | /* Use %rdx as out temp variable throughout */ | 1422 | /* Use %rdx as our temp variable throughout */ |
1482 | pushq_cfi %rdx | 1423 | pushq_cfi %rdx |
1483 | CFI_REL_OFFSET rdx, 0 | 1424 | CFI_REL_OFFSET rdx, 0 |
1484 | 1425 | ||
@@ -1503,8 +1444,17 @@ ENTRY(nmi) | |||
1503 | * We check the variable because the first NMI could be in a | 1444 | * We check the variable because the first NMI could be in a |
1504 | * breakpoint routine using a breakpoint stack. | 1445 | * breakpoint routine using a breakpoint stack. |
1505 | */ | 1446 | */ |
1506 | lea 6*8(%rsp), %rdx | 1447 | lea 6*8(%rsp), %rdx |
1507 | test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi | 1448 | /* Compare the NMI stack (rdx) with the stack we came from (4*8(%rsp)) */ |
1449 | cmpq %rdx, 4*8(%rsp) | ||
1450 | /* If the stack pointer is above the NMI stack, this is a normal NMI */ | ||
1451 | ja first_nmi | ||
1452 | subq $EXCEPTION_STKSZ, %rdx | ||
1453 | cmpq %rdx, 4*8(%rsp) | ||
1454 | /* If it is below the NMI stack, it is a normal NMI */ | ||
1455 | jb first_nmi | ||
1456 | /* Ah, it is within the NMI stack, treat it as nested */ | ||
1457 | |||
1508 | CFI_REMEMBER_STATE | 1458 | CFI_REMEMBER_STATE |
1509 | 1459 | ||
1510 | nested_nmi: | 1460 | nested_nmi: |
@@ -1597,7 +1547,7 @@ first_nmi: | |||
1597 | .rept 5 | 1547 | .rept 5 |
1598 | pushq_cfi 11*8(%rsp) | 1548 | pushq_cfi 11*8(%rsp) |
1599 | .endr | 1549 | .endr |
1600 | CFI_DEF_CFA_OFFSET SS+8-RIP | 1550 | CFI_DEF_CFA_OFFSET 5*8 |
1601 | 1551 | ||
1602 | /* Everything up to here is safe from nested NMIs */ | 1552 | /* Everything up to here is safe from nested NMIs */ |
1603 | 1553 | ||
@@ -1625,7 +1575,7 @@ repeat_nmi: | |||
1625 | pushq_cfi -6*8(%rsp) | 1575 | pushq_cfi -6*8(%rsp) |
1626 | .endr | 1576 | .endr |
1627 | subq $(5*8), %rsp | 1577 | subq $(5*8), %rsp |
1628 | CFI_DEF_CFA_OFFSET SS+8-RIP | 1578 | CFI_DEF_CFA_OFFSET 5*8 |
1629 | end_repeat_nmi: | 1579 | end_repeat_nmi: |
1630 | 1580 | ||
1631 | /* | 1581 | /* |
@@ -1634,16 +1584,16 @@ end_repeat_nmi: | |||
1634 | * so that we repeat another NMI. | 1584 | * so that we repeat another NMI. |
1635 | */ | 1585 | */ |
1636 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ | 1586 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ |
1637 | subq $ORIG_RAX-R15, %rsp | 1587 | ALLOC_PT_GPREGS_ON_STACK |
1638 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 | 1588 | |
1639 | /* | 1589 | /* |
1640 | * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit | 1590 | * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit |
1641 | * as we should not be calling schedule in NMI context. | 1591 | * as we should not be calling schedule in NMI context. |
1642 | * Even with normal interrupts enabled. An NMI should not be | 1592 | * Even with normal interrupts enabled. An NMI should not be |
1643 | * setting NEED_RESCHED or anything that normal interrupts and | 1593 | * setting NEED_RESCHED or anything that normal interrupts and |
1644 | * exceptions might do. | 1594 | * exceptions might do. |
1645 | */ | 1595 | */ |
1646 | call save_paranoid | 1596 | call paranoid_entry |
1647 | DEFAULT_FRAME 0 | 1597 | DEFAULT_FRAME 0 |
1648 | 1598 | ||
1649 | /* | 1599 | /* |
@@ -1674,8 +1624,10 @@ end_repeat_nmi: | |||
1674 | nmi_swapgs: | 1624 | nmi_swapgs: |
1675 | SWAPGS_UNSAFE_STACK | 1625 | SWAPGS_UNSAFE_STACK |
1676 | nmi_restore: | 1626 | nmi_restore: |
1627 | RESTORE_EXTRA_REGS | ||
1628 | RESTORE_C_REGS | ||
1677 | /* Pop the extra iret frame at once */ | 1629 | /* Pop the extra iret frame at once */ |
1678 | RESTORE_ALL 6*8 | 1630 | REMOVE_PT_GPREGS_FROM_STACK 6*8 |
1679 | 1631 | ||
1680 | /* Clear the NMI executing stack variable */ | 1632 | /* Clear the NMI executing stack variable */ |
1681 | movq $0, 5*8(%rsp) | 1633 | movq $0, 5*8(%rsp) |
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index c4f8d4659070..2b55ee6db053 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -177,9 +177,6 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) | |||
177 | */ | 177 | */ |
178 | load_ucode_bsp(); | 178 | load_ucode_bsp(); |
179 | 179 | ||
180 | if (console_loglevel >= CONSOLE_LOGLEVEL_DEBUG) | ||
181 | early_printk("Kernel alive\n"); | ||
182 | |||
183 | clear_page(init_level4_pgt); | 180 | clear_page(init_level4_pgt); |
184 | /* set init_level4_pgt kernel high mapping*/ | 181 | /* set init_level4_pgt kernel high mapping*/ |
185 | init_level4_pgt[511] = early_level4_pgt[511]; | 182 | init_level4_pgt[511] = early_level4_pgt[511]; |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index f36bd42d6f0c..d031bad9e07e 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <asm/cpufeature.h> | 22 | #include <asm/cpufeature.h> |
23 | #include <asm/percpu.h> | 23 | #include <asm/percpu.h> |
24 | #include <asm/nops.h> | 24 | #include <asm/nops.h> |
25 | #include <asm/bootparam.h> | ||
25 | 26 | ||
26 | /* Physical address */ | 27 | /* Physical address */ |
27 | #define pa(X) ((X) - __PAGE_OFFSET) | 28 | #define pa(X) ((X) - __PAGE_OFFSET) |
@@ -90,7 +91,7 @@ ENTRY(startup_32) | |||
90 | 91 | ||
91 | /* test KEEP_SEGMENTS flag to see if the bootloader is asking | 92 | /* test KEEP_SEGMENTS flag to see if the bootloader is asking |
92 | us to not reload segments */ | 93 | us to not reload segments */ |
93 | testb $(1<<6), BP_loadflags(%esi) | 94 | testb $KEEP_SEGMENTS, BP_loadflags(%esi) |
94 | jnz 2f | 95 | jnz 2f |
95 | 96 | ||
96 | /* | 97 | /* |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 6fd514d9f69a..ae6588b301c2 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/arch/x86_64/kernel/head.S -- start in 32bit and switch to 64bit | 2 | * linux/arch/x86/kernel/head_64.S -- start in 32bit and switch to 64bit |
3 | * | 3 | * |
4 | * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE | 4 | * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE |
5 | * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> | 5 | * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> |
@@ -56,7 +56,7 @@ startup_64: | |||
56 | * %rsi holds a physical pointer to real_mode_data. | 56 | * %rsi holds a physical pointer to real_mode_data. |
57 | * | 57 | * |
58 | * We come here either directly from a 64bit bootloader, or from | 58 | * We come here either directly from a 64bit bootloader, or from |
59 | * arch/x86_64/boot/compressed/head.S. | 59 | * arch/x86/boot/compressed/head_64.S. |
60 | * | 60 | * |
61 | * We only come here initially at boot nothing else comes here. | 61 | * We only come here initially at boot nothing else comes here. |
62 | * | 62 | * |
@@ -146,7 +146,7 @@ startup_64: | |||
146 | leaq level2_kernel_pgt(%rip), %rdi | 146 | leaq level2_kernel_pgt(%rip), %rdi |
147 | leaq 4096(%rdi), %r8 | 147 | leaq 4096(%rdi), %r8 |
148 | /* See if it is a valid page table entry */ | 148 | /* See if it is a valid page table entry */ |
149 | 1: testq $1, 0(%rdi) | 149 | 1: testb $1, 0(%rdi) |
150 | jz 2f | 150 | jz 2f |
151 | addq %rbp, 0(%rdi) | 151 | addq %rbp, 0(%rdi) |
152 | /* Go to the next page */ | 152 | /* Go to the next page */ |
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index d5651fce0b71..367f39d35e9c 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
@@ -42,8 +42,8 @@ void kernel_fpu_enable(void) | |||
42 | * be set (so that the clts/stts pair does nothing that is | 42 | * be set (so that the clts/stts pair does nothing that is |
43 | * visible in the interrupted kernel thread). | 43 | * visible in the interrupted kernel thread). |
44 | * | 44 | * |
45 | * Except for the eagerfpu case when we return 1 unless we've already | 45 | * Except for the eagerfpu case when we return true; in the likely case |
46 | * been eager and saved the state in kernel_fpu_begin(). | 46 | * the thread has FPU but we are not going to set/clear TS. |
47 | */ | 47 | */ |
48 | static inline bool interrupted_kernel_fpu_idle(void) | 48 | static inline bool interrupted_kernel_fpu_idle(void) |
49 | { | 49 | { |
@@ -51,7 +51,7 @@ static inline bool interrupted_kernel_fpu_idle(void) | |||
51 | return false; | 51 | return false; |
52 | 52 | ||
53 | if (use_eager_fpu()) | 53 | if (use_eager_fpu()) |
54 | return __thread_has_fpu(current); | 54 | return true; |
55 | 55 | ||
56 | return !__thread_has_fpu(current) && | 56 | return !__thread_has_fpu(current) && |
57 | (read_cr0() & X86_CR0_TS); | 57 | (read_cr0() & X86_CR0_TS); |
@@ -68,7 +68,7 @@ static inline bool interrupted_kernel_fpu_idle(void) | |||
68 | static inline bool interrupted_user_mode(void) | 68 | static inline bool interrupted_user_mode(void) |
69 | { | 69 | { |
70 | struct pt_regs *regs = get_irq_regs(); | 70 | struct pt_regs *regs = get_irq_regs(); |
71 | return regs && user_mode_vm(regs); | 71 | return regs && user_mode(regs); |
72 | } | 72 | } |
73 | 73 | ||
74 | /* | 74 | /* |
@@ -94,9 +94,10 @@ void __kernel_fpu_begin(void) | |||
94 | 94 | ||
95 | if (__thread_has_fpu(me)) { | 95 | if (__thread_has_fpu(me)) { |
96 | __save_init_fpu(me); | 96 | __save_init_fpu(me); |
97 | } else if (!use_eager_fpu()) { | 97 | } else { |
98 | this_cpu_write(fpu_owner_task, NULL); | 98 | this_cpu_write(fpu_owner_task, NULL); |
99 | clts(); | 99 | if (!use_eager_fpu()) |
100 | clts(); | ||
100 | } | 101 | } |
101 | } | 102 | } |
102 | EXPORT_SYMBOL(__kernel_fpu_begin); | 103 | EXPORT_SYMBOL(__kernel_fpu_begin); |
@@ -107,7 +108,7 @@ void __kernel_fpu_end(void) | |||
107 | 108 | ||
108 | if (__thread_has_fpu(me)) { | 109 | if (__thread_has_fpu(me)) { |
109 | if (WARN_ON(restore_fpu_checking(me))) | 110 | if (WARN_ON(restore_fpu_checking(me))) |
110 | drop_init_fpu(me); | 111 | fpu_reset_state(me); |
111 | } else if (!use_eager_fpu()) { | 112 | } else if (!use_eager_fpu()) { |
112 | stts(); | 113 | stts(); |
113 | } | 114 | } |
@@ -120,10 +121,13 @@ void unlazy_fpu(struct task_struct *tsk) | |||
120 | { | 121 | { |
121 | preempt_disable(); | 122 | preempt_disable(); |
122 | if (__thread_has_fpu(tsk)) { | 123 | if (__thread_has_fpu(tsk)) { |
123 | __save_init_fpu(tsk); | 124 | if (use_eager_fpu()) { |
124 | __thread_fpu_end(tsk); | 125 | __save_fpu(tsk); |
125 | } else | 126 | } else { |
126 | tsk->thread.fpu_counter = 0; | 127 | __save_init_fpu(tsk); |
128 | __thread_fpu_end(tsk); | ||
129 | } | ||
130 | } | ||
127 | preempt_enable(); | 131 | preempt_enable(); |
128 | } | 132 | } |
129 | EXPORT_SYMBOL(unlazy_fpu); | 133 | EXPORT_SYMBOL(unlazy_fpu); |
@@ -221,11 +225,12 @@ void fpu_finit(struct fpu *fpu) | |||
221 | return; | 225 | return; |
222 | } | 226 | } |
223 | 227 | ||
228 | memset(fpu->state, 0, xstate_size); | ||
229 | |||
224 | if (cpu_has_fxsr) { | 230 | if (cpu_has_fxsr) { |
225 | fx_finit(&fpu->state->fxsave); | 231 | fx_finit(&fpu->state->fxsave); |
226 | } else { | 232 | } else { |
227 | struct i387_fsave_struct *fp = &fpu->state->fsave; | 233 | struct i387_fsave_struct *fp = &fpu->state->fsave; |
228 | memset(fp, 0, xstate_size); | ||
229 | fp->cwd = 0xffff037fu; | 234 | fp->cwd = 0xffff037fu; |
230 | fp->swd = 0xffff0000u; | 235 | fp->swd = 0xffff0000u; |
231 | fp->twd = 0xffffffffu; | 236 | fp->twd = 0xffffffffu; |
@@ -247,7 +252,7 @@ int init_fpu(struct task_struct *tsk) | |||
247 | if (tsk_used_math(tsk)) { | 252 | if (tsk_used_math(tsk)) { |
248 | if (cpu_has_fpu && tsk == current) | 253 | if (cpu_has_fpu && tsk == current) |
249 | unlazy_fpu(tsk); | 254 | unlazy_fpu(tsk); |
250 | tsk->thread.fpu.last_cpu = ~0; | 255 | task_disable_lazy_fpu_restore(tsk); |
251 | return 0; | 256 | return 0; |
252 | } | 257 | } |
253 | 258 | ||
@@ -336,6 +341,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset, | |||
336 | unsigned int pos, unsigned int count, | 341 | unsigned int pos, unsigned int count, |
337 | void *kbuf, void __user *ubuf) | 342 | void *kbuf, void __user *ubuf) |
338 | { | 343 | { |
344 | struct xsave_struct *xsave = &target->thread.fpu.state->xsave; | ||
339 | int ret; | 345 | int ret; |
340 | 346 | ||
341 | if (!cpu_has_xsave) | 347 | if (!cpu_has_xsave) |
@@ -350,14 +356,12 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset, | |||
350 | * memory layout in the thread struct, so that we can copy the entire | 356 | * memory layout in the thread struct, so that we can copy the entire |
351 | * xstateregs to the user using one user_regset_copyout(). | 357 | * xstateregs to the user using one user_regset_copyout(). |
352 | */ | 358 | */ |
353 | memcpy(&target->thread.fpu.state->fxsave.sw_reserved, | 359 | memcpy(&xsave->i387.sw_reserved, |
354 | xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes)); | 360 | xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes)); |
355 | |||
356 | /* | 361 | /* |
357 | * Copy the xstate memory layout. | 362 | * Copy the xstate memory layout. |
358 | */ | 363 | */ |
359 | ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, | 364 | ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); |
360 | &target->thread.fpu.state->xsave, 0, -1); | ||
361 | return ret; | 365 | return ret; |
362 | } | 366 | } |
363 | 367 | ||
@@ -365,8 +369,8 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, | |||
365 | unsigned int pos, unsigned int count, | 369 | unsigned int pos, unsigned int count, |
366 | const void *kbuf, const void __user *ubuf) | 370 | const void *kbuf, const void __user *ubuf) |
367 | { | 371 | { |
372 | struct xsave_struct *xsave = &target->thread.fpu.state->xsave; | ||
368 | int ret; | 373 | int ret; |
369 | struct xsave_hdr_struct *xsave_hdr; | ||
370 | 374 | ||
371 | if (!cpu_has_xsave) | 375 | if (!cpu_has_xsave) |
372 | return -ENODEV; | 376 | return -ENODEV; |
@@ -375,22 +379,16 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, | |||
375 | if (ret) | 379 | if (ret) |
376 | return ret; | 380 | return ret; |
377 | 381 | ||
378 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, | 382 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); |
379 | &target->thread.fpu.state->xsave, 0, -1); | ||
380 | |||
381 | /* | 383 | /* |
382 | * mxcsr reserved bits must be masked to zero for security reasons. | 384 | * mxcsr reserved bits must be masked to zero for security reasons. |
383 | */ | 385 | */ |
384 | target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask; | 386 | xsave->i387.mxcsr &= mxcsr_feature_mask; |
385 | 387 | xsave->xsave_hdr.xstate_bv &= pcntxt_mask; | |
386 | xsave_hdr = &target->thread.fpu.state->xsave.xsave_hdr; | ||
387 | |||
388 | xsave_hdr->xstate_bv &= pcntxt_mask; | ||
389 | /* | 388 | /* |
390 | * These bits must be zero. | 389 | * These bits must be zero. |
391 | */ | 390 | */ |
392 | memset(xsave_hdr->reserved, 0, 48); | 391 | memset(&xsave->xsave_hdr.reserved, 0, 48); |
393 | |||
394 | return ret; | 392 | return ret; |
395 | } | 393 | } |
396 | 394 | ||
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 4ddaf66ea35f..37dae792dbbe 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c | |||
@@ -54,7 +54,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) | |||
54 | * because the ->io_bitmap_max value must match the bitmap | 54 | * because the ->io_bitmap_max value must match the bitmap |
55 | * contents: | 55 | * contents: |
56 | */ | 56 | */ |
57 | tss = &per_cpu(init_tss, get_cpu()); | 57 | tss = &per_cpu(cpu_tss, get_cpu()); |
58 | 58 | ||
59 | if (turn_on) | 59 | if (turn_on) |
60 | bitmap_clear(t->io_bitmap_ptr, from, num); | 60 | bitmap_clear(t->io_bitmap_ptr, from, num); |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 67b1cbe0093a..e5952c225532 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -295,7 +295,7 @@ int check_irq_vectors_for_cpu_disable(void) | |||
295 | 295 | ||
296 | this_cpu = smp_processor_id(); | 296 | this_cpu = smp_processor_id(); |
297 | cpumask_copy(&online_new, cpu_online_mask); | 297 | cpumask_copy(&online_new, cpu_online_mask); |
298 | cpu_clear(this_cpu, online_new); | 298 | cpumask_clear_cpu(this_cpu, &online_new); |
299 | 299 | ||
300 | this_count = 0; | 300 | this_count = 0; |
301 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { | 301 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { |
@@ -307,7 +307,7 @@ int check_irq_vectors_for_cpu_disable(void) | |||
307 | 307 | ||
308 | data = irq_desc_get_irq_data(desc); | 308 | data = irq_desc_get_irq_data(desc); |
309 | cpumask_copy(&affinity_new, data->affinity); | 309 | cpumask_copy(&affinity_new, data->affinity); |
310 | cpu_clear(this_cpu, affinity_new); | 310 | cpumask_clear_cpu(this_cpu, &affinity_new); |
311 | 311 | ||
312 | /* Do not count inactive or per-cpu irqs. */ | 312 | /* Do not count inactive or per-cpu irqs. */ |
313 | if (!irq_has_action(irq) || irqd_is_per_cpu(data)) | 313 | if (!irq_has_action(irq) || irqd_is_per_cpu(data)) |
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 28d28f5eb8f4..f9fd86a7fcc7 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c | |||
@@ -165,7 +165,7 @@ bool handle_irq(unsigned irq, struct pt_regs *regs) | |||
165 | if (unlikely(!desc)) | 165 | if (unlikely(!desc)) |
166 | return false; | 166 | return false; |
167 | 167 | ||
168 | if (user_mode_vm(regs) || !execute_on_irq_stack(overflow, desc, irq)) { | 168 | if (user_mode(regs) || !execute_on_irq_stack(overflow, desc, irq)) { |
169 | if (unlikely(overflow)) | 169 | if (unlikely(overflow)) |
170 | print_stack_overflow(); | 170 | print_stack_overflow(); |
171 | desc->handle_irq(irq, desc); | 171 | desc->handle_irq(irq, desc); |
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index e4b503d5558c..394e643d7830 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c | |||
@@ -44,7 +44,7 @@ static inline void stack_overflow_check(struct pt_regs *regs) | |||
44 | u64 estack_top, estack_bottom; | 44 | u64 estack_top, estack_bottom; |
45 | u64 curbase = (u64)task_stack_page(current); | 45 | u64 curbase = (u64)task_stack_page(current); |
46 | 46 | ||
47 | if (user_mode_vm(regs)) | 47 | if (user_mode(regs)) |
48 | return; | 48 | return; |
49 | 49 | ||
50 | if (regs->sp >= curbase + sizeof(struct thread_info) + | 50 | if (regs->sp >= curbase + sizeof(struct thread_info) + |
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 70e181ea1eac..cd10a6437264 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
@@ -178,7 +178,8 @@ void __init native_init_IRQ(void) | |||
178 | #endif | 178 | #endif |
179 | for_each_clear_bit_from(i, used_vectors, first_system_vector) { | 179 | for_each_clear_bit_from(i, used_vectors, first_system_vector) { |
180 | /* IA32_SYSCALL_VECTOR could be used in trap_init already. */ | 180 | /* IA32_SYSCALL_VECTOR could be used in trap_init already. */ |
181 | set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]); | 181 | set_intr_gate(i, irq_entries_start + |
182 | 8 * (i - FIRST_EXTERNAL_VECTOR)); | ||
182 | } | 183 | } |
183 | #ifdef CONFIG_X86_LOCAL_APIC | 184 | #ifdef CONFIG_X86_LOCAL_APIC |
184 | for_each_clear_bit_from(i, used_vectors, NR_VECTORS) | 185 | for_each_clear_bit_from(i, used_vectors, NR_VECTORS) |
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 7ec1d5f8d283..d6178d9791db 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
@@ -72,7 +72,7 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = | |||
72 | { "bx", 8, offsetof(struct pt_regs, bx) }, | 72 | { "bx", 8, offsetof(struct pt_regs, bx) }, |
73 | { "cx", 8, offsetof(struct pt_regs, cx) }, | 73 | { "cx", 8, offsetof(struct pt_regs, cx) }, |
74 | { "dx", 8, offsetof(struct pt_regs, dx) }, | 74 | { "dx", 8, offsetof(struct pt_regs, dx) }, |
75 | { "si", 8, offsetof(struct pt_regs, dx) }, | 75 | { "si", 8, offsetof(struct pt_regs, si) }, |
76 | { "di", 8, offsetof(struct pt_regs, di) }, | 76 | { "di", 8, offsetof(struct pt_regs, di) }, |
77 | { "bp", 8, offsetof(struct pt_regs, bp) }, | 77 | { "bp", 8, offsetof(struct pt_regs, bp) }, |
78 | { "sp", 8, offsetof(struct pt_regs, sp) }, | 78 | { "sp", 8, offsetof(struct pt_regs, sp) }, |
@@ -126,11 +126,11 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) | |||
126 | #ifdef CONFIG_X86_32 | 126 | #ifdef CONFIG_X86_32 |
127 | switch (regno) { | 127 | switch (regno) { |
128 | case GDB_SS: | 128 | case GDB_SS: |
129 | if (!user_mode_vm(regs)) | 129 | if (!user_mode(regs)) |
130 | *(unsigned long *)mem = __KERNEL_DS; | 130 | *(unsigned long *)mem = __KERNEL_DS; |
131 | break; | 131 | break; |
132 | case GDB_SP: | 132 | case GDB_SP: |
133 | if (!user_mode_vm(regs)) | 133 | if (!user_mode(regs)) |
134 | *(unsigned long *)mem = kernel_stack_pointer(regs); | 134 | *(unsigned long *)mem = kernel_stack_pointer(regs); |
135 | break; | 135 | break; |
136 | case GDB_GS: | 136 | case GDB_GS: |
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 03189d86357d..1deffe6cc873 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c | |||
@@ -605,7 +605,7 @@ int kprobe_int3_handler(struct pt_regs *regs) | |||
605 | struct kprobe *p; | 605 | struct kprobe *p; |
606 | struct kprobe_ctlblk *kcb; | 606 | struct kprobe_ctlblk *kcb; |
607 | 607 | ||
608 | if (user_mode_vm(regs)) | 608 | if (user_mode(regs)) |
609 | return 0; | 609 | return 0; |
610 | 610 | ||
611 | addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t)); | 611 | addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t)); |
@@ -1010,7 +1010,7 @@ int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, | |||
1010 | struct die_args *args = data; | 1010 | struct die_args *args = data; |
1011 | int ret = NOTIFY_DONE; | 1011 | int ret = NOTIFY_DONE; |
1012 | 1012 | ||
1013 | if (args->regs && user_mode_vm(args->regs)) | 1013 | if (args->regs && user_mode(args->regs)) |
1014 | return ret; | 1014 | return ret; |
1015 | 1015 | ||
1016 | if (val == DIE_GPF) { | 1016 | if (val == DIE_GPF) { |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index e354cc6446ab..9435620062df 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -513,7 +513,7 @@ void __init kvm_guest_init(void) | |||
513 | * can get false positives too easily, for example if the host is | 513 | * can get false positives too easily, for example if the host is |
514 | * overcommitted. | 514 | * overcommitted. |
515 | */ | 515 | */ |
516 | watchdog_enable_hardlockup_detector(false); | 516 | hardlockup_detector_disable(); |
517 | } | 517 | } |
518 | 518 | ||
519 | static noinline uint32_t __kvm_cpuid_base(void) | 519 | static noinline uint32_t __kvm_cpuid_base(void) |
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index d1ac80b72c72..005c03e93fc5 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c | |||
@@ -33,6 +33,7 @@ | |||
33 | 33 | ||
34 | #include <asm/page.h> | 34 | #include <asm/page.h> |
35 | #include <asm/pgtable.h> | 35 | #include <asm/pgtable.h> |
36 | #include <asm/setup.h> | ||
36 | 37 | ||
37 | #if 0 | 38 | #if 0 |
38 | #define DEBUGP(fmt, ...) \ | 39 | #define DEBUGP(fmt, ...) \ |
@@ -47,21 +48,13 @@ do { \ | |||
47 | 48 | ||
48 | #ifdef CONFIG_RANDOMIZE_BASE | 49 | #ifdef CONFIG_RANDOMIZE_BASE |
49 | static unsigned long module_load_offset; | 50 | static unsigned long module_load_offset; |
50 | static int randomize_modules = 1; | ||
51 | 51 | ||
52 | /* Mutex protects the module_load_offset. */ | 52 | /* Mutex protects the module_load_offset. */ |
53 | static DEFINE_MUTEX(module_kaslr_mutex); | 53 | static DEFINE_MUTEX(module_kaslr_mutex); |
54 | 54 | ||
55 | static int __init parse_nokaslr(char *p) | ||
56 | { | ||
57 | randomize_modules = 0; | ||
58 | return 0; | ||
59 | } | ||
60 | early_param("nokaslr", parse_nokaslr); | ||
61 | |||
62 | static unsigned long int get_module_load_offset(void) | 55 | static unsigned long int get_module_load_offset(void) |
63 | { | 56 | { |
64 | if (randomize_modules) { | 57 | if (kaslr_enabled()) { |
65 | mutex_lock(&module_kaslr_mutex); | 58 | mutex_lock(&module_kaslr_mutex); |
66 | /* | 59 | /* |
67 | * Calculate the module_load_offset the first time this | 60 | * Calculate the module_load_offset the first time this |
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 548d25f00c90..c614dd492f5f 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -443,7 +443,7 @@ struct pv_mmu_ops pv_mmu_ops = { | |||
443 | .ptep_modify_prot_start = __ptep_modify_prot_start, | 443 | .ptep_modify_prot_start = __ptep_modify_prot_start, |
444 | .ptep_modify_prot_commit = __ptep_modify_prot_commit, | 444 | .ptep_modify_prot_commit = __ptep_modify_prot_commit, |
445 | 445 | ||
446 | #if PAGETABLE_LEVELS >= 3 | 446 | #if CONFIG_PGTABLE_LEVELS >= 3 |
447 | #ifdef CONFIG_X86_PAE | 447 | #ifdef CONFIG_X86_PAE |
448 | .set_pte_atomic = native_set_pte_atomic, | 448 | .set_pte_atomic = native_set_pte_atomic, |
449 | .pte_clear = native_pte_clear, | 449 | .pte_clear = native_pte_clear, |
@@ -454,13 +454,13 @@ struct pv_mmu_ops pv_mmu_ops = { | |||
454 | .pmd_val = PTE_IDENT, | 454 | .pmd_val = PTE_IDENT, |
455 | .make_pmd = PTE_IDENT, | 455 | .make_pmd = PTE_IDENT, |
456 | 456 | ||
457 | #if PAGETABLE_LEVELS == 4 | 457 | #if CONFIG_PGTABLE_LEVELS == 4 |
458 | .pud_val = PTE_IDENT, | 458 | .pud_val = PTE_IDENT, |
459 | .make_pud = PTE_IDENT, | 459 | .make_pud = PTE_IDENT, |
460 | 460 | ||
461 | .set_pgd = native_set_pgd, | 461 | .set_pgd = native_set_pgd, |
462 | #endif | 462 | #endif |
463 | #endif /* PAGETABLE_LEVELS >= 3 */ | 463 | #endif /* CONFIG_PGTABLE_LEVELS >= 3 */ |
464 | 464 | ||
465 | .pte_val = PTE_IDENT, | 465 | .pte_val = PTE_IDENT, |
466 | .pgd_val = PTE_IDENT, | 466 | .pgd_val = PTE_IDENT, |
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c index 781861cc5ee8..da8cb987b973 100644 --- a/arch/x86/kernel/perf_regs.c +++ b/arch/x86/kernel/perf_regs.c | |||
@@ -131,10 +131,11 @@ void perf_get_regs_user(struct perf_regs *regs_user, | |||
131 | } | 131 | } |
132 | 132 | ||
133 | /* | 133 | /* |
134 | * RIP, flags, and the argument registers are usually saved. | 134 | * These registers are always saved on 64-bit syscall entry. |
135 | * orig_ax is probably okay, too. | 135 | * On 32-bit entry points, they are saved too except r8..r11. |
136 | */ | 136 | */ |
137 | regs_user_copy->ip = user_regs->ip; | 137 | regs_user_copy->ip = user_regs->ip; |
138 | regs_user_copy->ax = user_regs->ax; | ||
138 | regs_user_copy->cx = user_regs->cx; | 139 | regs_user_copy->cx = user_regs->cx; |
139 | regs_user_copy->dx = user_regs->dx; | 140 | regs_user_copy->dx = user_regs->dx; |
140 | regs_user_copy->si = user_regs->si; | 141 | regs_user_copy->si = user_regs->si; |
@@ -145,9 +146,12 @@ void perf_get_regs_user(struct perf_regs *regs_user, | |||
145 | regs_user_copy->r11 = user_regs->r11; | 146 | regs_user_copy->r11 = user_regs->r11; |
146 | regs_user_copy->orig_ax = user_regs->orig_ax; | 147 | regs_user_copy->orig_ax = user_regs->orig_ax; |
147 | regs_user_copy->flags = user_regs->flags; | 148 | regs_user_copy->flags = user_regs->flags; |
149 | regs_user_copy->sp = user_regs->sp; | ||
150 | regs_user_copy->cs = user_regs->cs; | ||
151 | regs_user_copy->ss = user_regs->ss; | ||
148 | 152 | ||
149 | /* | 153 | /* |
150 | * Don't even try to report the "rest" regs. | 154 | * Most system calls don't save these registers, don't report them. |
151 | */ | 155 | */ |
152 | regs_user_copy->bx = -1; | 156 | regs_user_copy->bx = -1; |
153 | regs_user_copy->bp = -1; | 157 | regs_user_copy->bp = -1; |
@@ -158,37 +162,13 @@ void perf_get_regs_user(struct perf_regs *regs_user, | |||
158 | 162 | ||
159 | /* | 163 | /* |
160 | * For this to be at all useful, we need a reasonable guess for | 164 | * For this to be at all useful, we need a reasonable guess for |
161 | * sp and the ABI. Be careful: we're in NMI context, and we're | 165 | * the ABI. Be careful: we're in NMI context, and we're |
162 | * considering current to be the current task, so we should | 166 | * considering current to be the current task, so we should |
163 | * be careful not to look at any other percpu variables that might | 167 | * be careful not to look at any other percpu variables that might |
164 | * change during context switches. | 168 | * change during context switches. |
165 | */ | 169 | */ |
166 | if (IS_ENABLED(CONFIG_IA32_EMULATION) && | 170 | regs_user->abi = user_64bit_mode(user_regs) ? |
167 | task_thread_info(current)->status & TS_COMPAT) { | 171 | PERF_SAMPLE_REGS_ABI_64 : PERF_SAMPLE_REGS_ABI_32; |
168 | /* Easy case: we're in a compat syscall. */ | ||
169 | regs_user->abi = PERF_SAMPLE_REGS_ABI_32; | ||
170 | regs_user_copy->sp = user_regs->sp; | ||
171 | regs_user_copy->cs = user_regs->cs; | ||
172 | regs_user_copy->ss = user_regs->ss; | ||
173 | } else if (user_regs->orig_ax != -1) { | ||
174 | /* | ||
175 | * We're probably in a 64-bit syscall. | ||
176 | * Warning: this code is severely racy. At least it's better | ||
177 | * than just blindly copying user_regs. | ||
178 | */ | ||
179 | regs_user->abi = PERF_SAMPLE_REGS_ABI_64; | ||
180 | regs_user_copy->sp = this_cpu_read(old_rsp); | ||
181 | regs_user_copy->cs = __USER_CS; | ||
182 | regs_user_copy->ss = __USER_DS; | ||
183 | regs_user_copy->cx = -1; /* usually contains garbage */ | ||
184 | } else { | ||
185 | /* We're probably in an interrupt or exception. */ | ||
186 | regs_user->abi = user_64bit_mode(user_regs) ? | ||
187 | PERF_SAMPLE_REGS_ABI_64 : PERF_SAMPLE_REGS_ABI_32; | ||
188 | regs_user_copy->sp = user_regs->sp; | ||
189 | regs_user_copy->cs = user_regs->cs; | ||
190 | regs_user_copy->ss = user_regs->ss; | ||
191 | } | ||
192 | 172 | ||
193 | regs_user->regs = regs_user_copy; | 173 | regs_user->regs = regs_user_copy; |
194 | } | 174 | } |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 046e2d620bbe..8213da62b1b7 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -9,7 +9,7 @@ | |||
9 | #include <linux/sched.h> | 9 | #include <linux/sched.h> |
10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
11 | #include <linux/pm.h> | 11 | #include <linux/pm.h> |
12 | #include <linux/clockchips.h> | 12 | #include <linux/tick.h> |
13 | #include <linux/random.h> | 13 | #include <linux/random.h> |
14 | #include <linux/user-return-notifier.h> | 14 | #include <linux/user-return-notifier.h> |
15 | #include <linux/dmi.h> | 15 | #include <linux/dmi.h> |
@@ -24,6 +24,7 @@ | |||
24 | #include <asm/syscalls.h> | 24 | #include <asm/syscalls.h> |
25 | #include <asm/idle.h> | 25 | #include <asm/idle.h> |
26 | #include <asm/uaccess.h> | 26 | #include <asm/uaccess.h> |
27 | #include <asm/mwait.h> | ||
27 | #include <asm/i387.h> | 28 | #include <asm/i387.h> |
28 | #include <asm/fpu-internal.h> | 29 | #include <asm/fpu-internal.h> |
29 | #include <asm/debugreg.h> | 30 | #include <asm/debugreg.h> |
@@ -37,7 +38,26 @@ | |||
37 | * section. Since TSS's are completely CPU-local, we want them | 38 | * section. Since TSS's are completely CPU-local, we want them |
38 | * on exact cacheline boundaries, to eliminate cacheline ping-pong. | 39 | * on exact cacheline boundaries, to eliminate cacheline ping-pong. |
39 | */ | 40 | */ |
40 | __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS; | 41 | __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { |
42 | .x86_tss = { | ||
43 | .sp0 = TOP_OF_INIT_STACK, | ||
44 | #ifdef CONFIG_X86_32 | ||
45 | .ss0 = __KERNEL_DS, | ||
46 | .ss1 = __KERNEL_CS, | ||
47 | .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, | ||
48 | #endif | ||
49 | }, | ||
50 | #ifdef CONFIG_X86_32 | ||
51 | /* | ||
52 | * Note that the .io_bitmap member must be extra-big. This is because | ||
53 | * the CPU will access an additional byte beyond the end of the IO | ||
54 | * permission bitmap. The extra byte must be all 1 bits, and must | ||
55 | * be within the limit. | ||
56 | */ | ||
57 | .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, | ||
58 | #endif | ||
59 | }; | ||
60 | EXPORT_PER_CPU_SYMBOL_GPL(cpu_tss); | ||
41 | 61 | ||
42 | #ifdef CONFIG_X86_64 | 62 | #ifdef CONFIG_X86_64 |
43 | static DEFINE_PER_CPU(unsigned char, is_idle); | 63 | static DEFINE_PER_CPU(unsigned char, is_idle); |
@@ -69,8 +89,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) | |||
69 | 89 | ||
70 | dst->thread.fpu_counter = 0; | 90 | dst->thread.fpu_counter = 0; |
71 | dst->thread.fpu.has_fpu = 0; | 91 | dst->thread.fpu.has_fpu = 0; |
72 | dst->thread.fpu.last_cpu = ~0; | ||
73 | dst->thread.fpu.state = NULL; | 92 | dst->thread.fpu.state = NULL; |
93 | task_disable_lazy_fpu_restore(dst); | ||
74 | if (tsk_used_math(src)) { | 94 | if (tsk_used_math(src)) { |
75 | int err = fpu_alloc(&dst->thread.fpu); | 95 | int err = fpu_alloc(&dst->thread.fpu); |
76 | if (err) | 96 | if (err) |
@@ -109,7 +129,7 @@ void exit_thread(void) | |||
109 | unsigned long *bp = t->io_bitmap_ptr; | 129 | unsigned long *bp = t->io_bitmap_ptr; |
110 | 130 | ||
111 | if (bp) { | 131 | if (bp) { |
112 | struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); | 132 | struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu()); |
113 | 133 | ||
114 | t->io_bitmap_ptr = NULL; | 134 | t->io_bitmap_ptr = NULL; |
115 | clear_thread_flag(TIF_IO_BITMAP); | 135 | clear_thread_flag(TIF_IO_BITMAP); |
@@ -131,13 +151,18 @@ void flush_thread(void) | |||
131 | 151 | ||
132 | flush_ptrace_hw_breakpoint(tsk); | 152 | flush_ptrace_hw_breakpoint(tsk); |
133 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); | 153 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); |
134 | drop_init_fpu(tsk); | 154 | |
135 | /* | 155 | if (!use_eager_fpu()) { |
136 | * Free the FPU state for non xsave platforms. They get reallocated | 156 | /* FPU state will be reallocated lazily at the first use. */ |
137 | * lazily at the first use. | 157 | drop_fpu(tsk); |
138 | */ | ||
139 | if (!use_eager_fpu()) | ||
140 | free_thread_xstate(tsk); | 158 | free_thread_xstate(tsk); |
159 | } else if (!used_math()) { | ||
160 | /* kthread execs. TODO: cleanup this horror. */ | ||
161 | if (WARN_ON(init_fpu(tsk))) | ||
162 | force_sig(SIGKILL, tsk); | ||
163 | user_fpu_begin(); | ||
164 | restore_init_xstate(); | ||
165 | } | ||
141 | } | 166 | } |
142 | 167 | ||
143 | static void hard_disable_TSC(void) | 168 | static void hard_disable_TSC(void) |
@@ -377,14 +402,11 @@ static void amd_e400_idle(void) | |||
377 | 402 | ||
378 | if (!cpumask_test_cpu(cpu, amd_e400_c1e_mask)) { | 403 | if (!cpumask_test_cpu(cpu, amd_e400_c1e_mask)) { |
379 | cpumask_set_cpu(cpu, amd_e400_c1e_mask); | 404 | cpumask_set_cpu(cpu, amd_e400_c1e_mask); |
380 | /* | 405 | /* Force broadcast so ACPI can not interfere. */ |
381 | * Force broadcast so ACPI can not interfere. | 406 | tick_broadcast_force(); |
382 | */ | ||
383 | clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, | ||
384 | &cpu); | ||
385 | pr_info("Switch to broadcast mode on CPU%d\n", cpu); | 407 | pr_info("Switch to broadcast mode on CPU%d\n", cpu); |
386 | } | 408 | } |
387 | clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); | 409 | tick_broadcast_enter(); |
388 | 410 | ||
389 | default_idle(); | 411 | default_idle(); |
390 | 412 | ||
@@ -393,12 +415,59 @@ static void amd_e400_idle(void) | |||
393 | * called with interrupts disabled. | 415 | * called with interrupts disabled. |
394 | */ | 416 | */ |
395 | local_irq_disable(); | 417 | local_irq_disable(); |
396 | clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); | 418 | tick_broadcast_exit(); |
397 | local_irq_enable(); | 419 | local_irq_enable(); |
398 | } else | 420 | } else |
399 | default_idle(); | 421 | default_idle(); |
400 | } | 422 | } |
401 | 423 | ||
424 | /* | ||
425 | * Intel Core2 and older machines prefer MWAIT over HALT for C1. | ||
426 | * We can't rely on cpuidle installing MWAIT, because it will not load | ||
427 | * on systems that support only C1 -- so the boot default must be MWAIT. | ||
428 | * | ||
429 | * Some AMD machines are the opposite, they depend on using HALT. | ||
430 | * | ||
431 | * So for default C1, which is used during boot until cpuidle loads, | ||
432 | * use MWAIT-C1 on Intel HW that has it, else use HALT. | ||
433 | */ | ||
434 | static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c) | ||
435 | { | ||
436 | if (c->x86_vendor != X86_VENDOR_INTEL) | ||
437 | return 0; | ||
438 | |||
439 | if (!cpu_has(c, X86_FEATURE_MWAIT)) | ||
440 | return 0; | ||
441 | |||
442 | return 1; | ||
443 | } | ||
444 | |||
445 | /* | ||
446 | * MONITOR/MWAIT with no hints, used for default default C1 state. | ||
447 | * This invokes MWAIT with interrutps enabled and no flags, | ||
448 | * which is backwards compatible with the original MWAIT implementation. | ||
449 | */ | ||
450 | |||
451 | static void mwait_idle(void) | ||
452 | { | ||
453 | if (!current_set_polling_and_test()) { | ||
454 | if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) { | ||
455 | smp_mb(); /* quirk */ | ||
456 | clflush((void *)¤t_thread_info()->flags); | ||
457 | smp_mb(); /* quirk */ | ||
458 | } | ||
459 | |||
460 | __monitor((void *)¤t_thread_info()->flags, 0, 0); | ||
461 | if (!need_resched()) | ||
462 | __sti_mwait(0, 0); | ||
463 | else | ||
464 | local_irq_enable(); | ||
465 | } else { | ||
466 | local_irq_enable(); | ||
467 | } | ||
468 | __current_clr_polling(); | ||
469 | } | ||
470 | |||
402 | void select_idle_routine(const struct cpuinfo_x86 *c) | 471 | void select_idle_routine(const struct cpuinfo_x86 *c) |
403 | { | 472 | { |
404 | #ifdef CONFIG_SMP | 473 | #ifdef CONFIG_SMP |
@@ -412,6 +481,9 @@ void select_idle_routine(const struct cpuinfo_x86 *c) | |||
412 | /* E400: APIC timer interrupt does not wake up CPU from C1e */ | 481 | /* E400: APIC timer interrupt does not wake up CPU from C1e */ |
413 | pr_info("using AMD E400 aware idle routine\n"); | 482 | pr_info("using AMD E400 aware idle routine\n"); |
414 | x86_idle = amd_e400_idle; | 483 | x86_idle = amd_e400_idle; |
484 | } else if (prefer_mwait_c1_over_halt(c)) { | ||
485 | pr_info("using mwait in idle threads\n"); | ||
486 | x86_idle = mwait_idle; | ||
415 | } else | 487 | } else |
416 | x86_idle = default_idle; | 488 | x86_idle = default_idle; |
417 | } | 489 | } |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 603c4f99cb5a..8ed2106b06da 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -73,7 +73,7 @@ void __show_regs(struct pt_regs *regs, int all) | |||
73 | unsigned long sp; | 73 | unsigned long sp; |
74 | unsigned short ss, gs; | 74 | unsigned short ss, gs; |
75 | 75 | ||
76 | if (user_mode_vm(regs)) { | 76 | if (user_mode(regs)) { |
77 | sp = regs->sp; | 77 | sp = regs->sp; |
78 | ss = regs->ss & 0xffff; | 78 | ss = regs->ss & 0xffff; |
79 | gs = get_user_gs(regs); | 79 | gs = get_user_gs(regs); |
@@ -206,11 +206,7 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) | |||
206 | regs->ip = new_ip; | 206 | regs->ip = new_ip; |
207 | regs->sp = new_sp; | 207 | regs->sp = new_sp; |
208 | regs->flags = X86_EFLAGS_IF; | 208 | regs->flags = X86_EFLAGS_IF; |
209 | /* | 209 | force_iret(); |
210 | * force it to the iret return path by making it look as if there was | ||
211 | * some work pending. | ||
212 | */ | ||
213 | set_thread_flag(TIF_NOTIFY_RESUME); | ||
214 | } | 210 | } |
215 | EXPORT_SYMBOL_GPL(start_thread); | 211 | EXPORT_SYMBOL_GPL(start_thread); |
216 | 212 | ||
@@ -248,7 +244,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
248 | struct thread_struct *prev = &prev_p->thread, | 244 | struct thread_struct *prev = &prev_p->thread, |
249 | *next = &next_p->thread; | 245 | *next = &next_p->thread; |
250 | int cpu = smp_processor_id(); | 246 | int cpu = smp_processor_id(); |
251 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | 247 | struct tss_struct *tss = &per_cpu(cpu_tss, cpu); |
252 | fpu_switch_t fpu; | 248 | fpu_switch_t fpu; |
253 | 249 | ||
254 | /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ | 250 | /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ |
@@ -256,11 +252,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
256 | fpu = switch_fpu_prepare(prev_p, next_p, cpu); | 252 | fpu = switch_fpu_prepare(prev_p, next_p, cpu); |
257 | 253 | ||
258 | /* | 254 | /* |
259 | * Reload esp0. | ||
260 | */ | ||
261 | load_sp0(tss, next); | ||
262 | |||
263 | /* | ||
264 | * Save away %gs. No need to save %fs, as it was saved on the | 255 | * Save away %gs. No need to save %fs, as it was saved on the |
265 | * stack on entry. No need to save %es and %ds, as those are | 256 | * stack on entry. No need to save %es and %ds, as those are |
266 | * always kernel segments while inside the kernel. Doing this | 257 | * always kernel segments while inside the kernel. Doing this |
@@ -310,9 +301,17 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
310 | */ | 301 | */ |
311 | arch_end_context_switch(next_p); | 302 | arch_end_context_switch(next_p); |
312 | 303 | ||
304 | /* | ||
305 | * Reload esp0, kernel_stack, and current_top_of_stack. This changes | ||
306 | * current_thread_info(). | ||
307 | */ | ||
308 | load_sp0(tss, next); | ||
313 | this_cpu_write(kernel_stack, | 309 | this_cpu_write(kernel_stack, |
314 | (unsigned long)task_stack_page(next_p) + | 310 | (unsigned long)task_stack_page(next_p) + |
315 | THREAD_SIZE - KERNEL_STACK_OFFSET); | 311 | THREAD_SIZE); |
312 | this_cpu_write(cpu_current_top_of_stack, | ||
313 | (unsigned long)task_stack_page(next_p) + | ||
314 | THREAD_SIZE); | ||
316 | 315 | ||
317 | /* | 316 | /* |
318 | * Restore %gs if needed (which is common) | 317 | * Restore %gs if needed (which is common) |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 67fcc43577d2..4baaa972f52a 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -52,7 +52,7 @@ | |||
52 | 52 | ||
53 | asmlinkage extern void ret_from_fork(void); | 53 | asmlinkage extern void ret_from_fork(void); |
54 | 54 | ||
55 | __visible DEFINE_PER_CPU(unsigned long, old_rsp); | 55 | __visible DEFINE_PER_CPU(unsigned long, rsp_scratch); |
56 | 56 | ||
57 | /* Prints also some state that isn't saved in the pt_regs */ | 57 | /* Prints also some state that isn't saved in the pt_regs */ |
58 | void __show_regs(struct pt_regs *regs, int all) | 58 | void __show_regs(struct pt_regs *regs, int all) |
@@ -161,7 +161,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
161 | p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE; | 161 | p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE; |
162 | childregs = task_pt_regs(p); | 162 | childregs = task_pt_regs(p); |
163 | p->thread.sp = (unsigned long) childregs; | 163 | p->thread.sp = (unsigned long) childregs; |
164 | p->thread.usersp = me->thread.usersp; | ||
165 | set_tsk_thread_flag(p, TIF_FORK); | 164 | set_tsk_thread_flag(p, TIF_FORK); |
166 | p->thread.io_bitmap_ptr = NULL; | 165 | p->thread.io_bitmap_ptr = NULL; |
167 | 166 | ||
@@ -207,7 +206,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
207 | */ | 206 | */ |
208 | if (clone_flags & CLONE_SETTLS) { | 207 | if (clone_flags & CLONE_SETTLS) { |
209 | #ifdef CONFIG_IA32_EMULATION | 208 | #ifdef CONFIG_IA32_EMULATION |
210 | if (test_thread_flag(TIF_IA32)) | 209 | if (is_ia32_task()) |
211 | err = do_set_thread_area(p, -1, | 210 | err = do_set_thread_area(p, -1, |
212 | (struct user_desc __user *)childregs->si, 0); | 211 | (struct user_desc __user *)childregs->si, 0); |
213 | else | 212 | else |
@@ -235,13 +234,12 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip, | |||
235 | loadsegment(es, _ds); | 234 | loadsegment(es, _ds); |
236 | loadsegment(ds, _ds); | 235 | loadsegment(ds, _ds); |
237 | load_gs_index(0); | 236 | load_gs_index(0); |
238 | current->thread.usersp = new_sp; | ||
239 | regs->ip = new_ip; | 237 | regs->ip = new_ip; |
240 | regs->sp = new_sp; | 238 | regs->sp = new_sp; |
241 | this_cpu_write(old_rsp, new_sp); | ||
242 | regs->cs = _cs; | 239 | regs->cs = _cs; |
243 | regs->ss = _ss; | 240 | regs->ss = _ss; |
244 | regs->flags = X86_EFLAGS_IF; | 241 | regs->flags = X86_EFLAGS_IF; |
242 | force_iret(); | ||
245 | } | 243 | } |
246 | 244 | ||
247 | void | 245 | void |
@@ -277,15 +275,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
277 | struct thread_struct *prev = &prev_p->thread; | 275 | struct thread_struct *prev = &prev_p->thread; |
278 | struct thread_struct *next = &next_p->thread; | 276 | struct thread_struct *next = &next_p->thread; |
279 | int cpu = smp_processor_id(); | 277 | int cpu = smp_processor_id(); |
280 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | 278 | struct tss_struct *tss = &per_cpu(cpu_tss, cpu); |
281 | unsigned fsindex, gsindex; | 279 | unsigned fsindex, gsindex; |
282 | fpu_switch_t fpu; | 280 | fpu_switch_t fpu; |
283 | 281 | ||
284 | fpu = switch_fpu_prepare(prev_p, next_p, cpu); | 282 | fpu = switch_fpu_prepare(prev_p, next_p, cpu); |
285 | 283 | ||
286 | /* Reload esp0 and ss1. */ | ||
287 | load_sp0(tss, next); | ||
288 | |||
289 | /* We must save %fs and %gs before load_TLS() because | 284 | /* We must save %fs and %gs before load_TLS() because |
290 | * %fs and %gs may be cleared by load_TLS(). | 285 | * %fs and %gs may be cleared by load_TLS(). |
291 | * | 286 | * |
@@ -401,8 +396,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
401 | /* | 396 | /* |
402 | * Switch the PDA and FPU contexts. | 397 | * Switch the PDA and FPU contexts. |
403 | */ | 398 | */ |
404 | prev->usersp = this_cpu_read(old_rsp); | ||
405 | this_cpu_write(old_rsp, next->usersp); | ||
406 | this_cpu_write(current_task, next_p); | 399 | this_cpu_write(current_task, next_p); |
407 | 400 | ||
408 | /* | 401 | /* |
@@ -413,9 +406,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
413 | task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count); | 406 | task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count); |
414 | this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count); | 407 | this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count); |
415 | 408 | ||
409 | /* Reload esp0 and ss1. This changes current_thread_info(). */ | ||
410 | load_sp0(tss, next); | ||
411 | |||
416 | this_cpu_write(kernel_stack, | 412 | this_cpu_write(kernel_stack, |
417 | (unsigned long)task_stack_page(next_p) + | 413 | (unsigned long)task_stack_page(next_p) + THREAD_SIZE); |
418 | THREAD_SIZE - KERNEL_STACK_OFFSET); | ||
419 | 414 | ||
420 | /* | 415 | /* |
421 | * Now maybe reload the debug registers and handle I/O bitmaps | 416 | * Now maybe reload the debug registers and handle I/O bitmaps |
@@ -602,6 +597,5 @@ long sys_arch_prctl(int code, unsigned long addr) | |||
602 | 597 | ||
603 | unsigned long KSTK_ESP(struct task_struct *task) | 598 | unsigned long KSTK_ESP(struct task_struct *task) |
604 | { | 599 | { |
605 | return (test_tsk_thread_flag(task, TIF_IA32)) ? | 600 | return task_pt_regs(task)->sp; |
606 | (task_pt_regs(task)->sp) : ((task)->thread.usersp); | ||
607 | } | 601 | } |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index e510618b2e91..a7bc79480719 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -364,18 +364,12 @@ static int set_segment_reg(struct task_struct *task, | |||
364 | case offsetof(struct user_regs_struct,cs): | 364 | case offsetof(struct user_regs_struct,cs): |
365 | if (unlikely(value == 0)) | 365 | if (unlikely(value == 0)) |
366 | return -EIO; | 366 | return -EIO; |
367 | #ifdef CONFIG_IA32_EMULATION | 367 | task_pt_regs(task)->cs = value; |
368 | if (test_tsk_thread_flag(task, TIF_IA32)) | ||
369 | task_pt_regs(task)->cs = value; | ||
370 | #endif | ||
371 | break; | 368 | break; |
372 | case offsetof(struct user_regs_struct,ss): | 369 | case offsetof(struct user_regs_struct,ss): |
373 | if (unlikely(value == 0)) | 370 | if (unlikely(value == 0)) |
374 | return -EIO; | 371 | return -EIO; |
375 | #ifdef CONFIG_IA32_EMULATION | 372 | task_pt_regs(task)->ss = value; |
376 | if (test_tsk_thread_flag(task, TIF_IA32)) | ||
377 | task_pt_regs(task)->ss = value; | ||
378 | #endif | ||
379 | break; | 373 | break; |
380 | } | 374 | } |
381 | 375 | ||
@@ -1421,7 +1415,7 @@ static void fill_sigtrap_info(struct task_struct *tsk, | |||
1421 | memset(info, 0, sizeof(*info)); | 1415 | memset(info, 0, sizeof(*info)); |
1422 | info->si_signo = SIGTRAP; | 1416 | info->si_signo = SIGTRAP; |
1423 | info->si_code = si_code; | 1417 | info->si_code = si_code; |
1424 | info->si_addr = user_mode_vm(regs) ? (void __user *)regs->ip : NULL; | 1418 | info->si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL; |
1425 | } | 1419 | } |
1426 | 1420 | ||
1427 | void user_single_step_siginfo(struct task_struct *tsk, | 1421 | void user_single_step_siginfo(struct task_struct *tsk, |
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 2f355d229a58..e5ecd20e72dd 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c | |||
@@ -141,7 +141,46 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, | |||
141 | set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); | 141 | set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); |
142 | } | 142 | } |
143 | 143 | ||
144 | static struct pvclock_vsyscall_time_info *pvclock_vdso_info; | ||
145 | |||
146 | static struct pvclock_vsyscall_time_info * | ||
147 | pvclock_get_vsyscall_user_time_info(int cpu) | ||
148 | { | ||
149 | if (!pvclock_vdso_info) { | ||
150 | BUG(); | ||
151 | return NULL; | ||
152 | } | ||
153 | |||
154 | return &pvclock_vdso_info[cpu]; | ||
155 | } | ||
156 | |||
157 | struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu) | ||
158 | { | ||
159 | return &pvclock_get_vsyscall_user_time_info(cpu)->pvti; | ||
160 | } | ||
161 | |||
144 | #ifdef CONFIG_X86_64 | 162 | #ifdef CONFIG_X86_64 |
163 | static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l, | ||
164 | void *v) | ||
165 | { | ||
166 | struct task_migration_notifier *mn = v; | ||
167 | struct pvclock_vsyscall_time_info *pvti; | ||
168 | |||
169 | pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu); | ||
170 | |||
171 | /* this is NULL when pvclock vsyscall is not initialized */ | ||
172 | if (unlikely(pvti == NULL)) | ||
173 | return NOTIFY_DONE; | ||
174 | |||
175 | pvti->migrate_count++; | ||
176 | |||
177 | return NOTIFY_DONE; | ||
178 | } | ||
179 | |||
180 | static struct notifier_block pvclock_migrate = { | ||
181 | .notifier_call = pvclock_task_migrate, | ||
182 | }; | ||
183 | |||
145 | /* | 184 | /* |
146 | * Initialize the generic pvclock vsyscall state. This will allocate | 185 | * Initialize the generic pvclock vsyscall state. This will allocate |
147 | * a/some page(s) for the per-vcpu pvclock information, set up a | 186 | * a/some page(s) for the per-vcpu pvclock information, set up a |
@@ -155,12 +194,17 @@ int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i, | |||
155 | 194 | ||
156 | WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE); | 195 | WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE); |
157 | 196 | ||
197 | pvclock_vdso_info = i; | ||
198 | |||
158 | for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) { | 199 | for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) { |
159 | __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx, | 200 | __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx, |
160 | __pa(i) + (idx*PAGE_SIZE), | 201 | __pa(i) + (idx*PAGE_SIZE), |
161 | PAGE_KERNEL_VVAR); | 202 | PAGE_KERNEL_VVAR); |
162 | } | 203 | } |
163 | 204 | ||
205 | |||
206 | register_task_migration_notifier(&pvclock_migrate); | ||
207 | |||
164 | return 0; | 208 | return 0; |
165 | } | 209 | } |
166 | #endif | 210 | #endif |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index bae6c609888e..86db4bcd7ce5 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -183,6 +183,16 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { | |||
183 | }, | 183 | }, |
184 | }, | 184 | }, |
185 | 185 | ||
186 | /* ASRock */ | ||
187 | { /* Handle problems with rebooting on ASRock Q1900DC-ITX */ | ||
188 | .callback = set_pci_reboot, | ||
189 | .ident = "ASRock Q1900DC-ITX", | ||
190 | .matches = { | ||
191 | DMI_MATCH(DMI_BOARD_VENDOR, "ASRock"), | ||
192 | DMI_MATCH(DMI_BOARD_NAME, "Q1900DC-ITX"), | ||
193 | }, | ||
194 | }, | ||
195 | |||
186 | /* ASUS */ | 196 | /* ASUS */ |
187 | { /* Handle problems with rebooting on ASUS P4S800 */ | 197 | { /* Handle problems with rebooting on ASUS P4S800 */ |
188 | .callback = set_bios_reboot, | 198 | .callback = set_bios_reboot, |
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index e13f8e7c22a6..77630d57e7bf 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S | |||
@@ -226,23 +226,23 @@ swap_pages: | |||
226 | movl (%ebx), %ecx | 226 | movl (%ebx), %ecx |
227 | addl $4, %ebx | 227 | addl $4, %ebx |
228 | 1: | 228 | 1: |
229 | testl $0x1, %ecx /* is it a destination page */ | 229 | testb $0x1, %cl /* is it a destination page */ |
230 | jz 2f | 230 | jz 2f |
231 | movl %ecx, %edi | 231 | movl %ecx, %edi |
232 | andl $0xfffff000, %edi | 232 | andl $0xfffff000, %edi |
233 | jmp 0b | 233 | jmp 0b |
234 | 2: | 234 | 2: |
235 | testl $0x2, %ecx /* is it an indirection page */ | 235 | testb $0x2, %cl /* is it an indirection page */ |
236 | jz 2f | 236 | jz 2f |
237 | movl %ecx, %ebx | 237 | movl %ecx, %ebx |
238 | andl $0xfffff000, %ebx | 238 | andl $0xfffff000, %ebx |
239 | jmp 0b | 239 | jmp 0b |
240 | 2: | 240 | 2: |
241 | testl $0x4, %ecx /* is it the done indicator */ | 241 | testb $0x4, %cl /* is it the done indicator */ |
242 | jz 2f | 242 | jz 2f |
243 | jmp 3f | 243 | jmp 3f |
244 | 2: | 244 | 2: |
245 | testl $0x8, %ecx /* is it the source indicator */ | 245 | testb $0x8, %cl /* is it the source indicator */ |
246 | jz 0b /* Ignore it otherwise */ | 246 | jz 0b /* Ignore it otherwise */ |
247 | movl %ecx, %esi /* For every source page do a copy */ | 247 | movl %ecx, %esi /* For every source page do a copy */ |
248 | andl $0xfffff000, %esi | 248 | andl $0xfffff000, %esi |
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index 3fd2c693e475..98111b38ebfd 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S | |||
@@ -123,7 +123,7 @@ identity_mapped: | |||
123 | * Set cr4 to a known state: | 123 | * Set cr4 to a known state: |
124 | * - physical address extension enabled | 124 | * - physical address extension enabled |
125 | */ | 125 | */ |
126 | movq $X86_CR4_PAE, %rax | 126 | movl $X86_CR4_PAE, %eax |
127 | movq %rax, %cr4 | 127 | movq %rax, %cr4 |
128 | 128 | ||
129 | jmp 1f | 129 | jmp 1f |
@@ -221,23 +221,23 @@ swap_pages: | |||
221 | movq (%rbx), %rcx | 221 | movq (%rbx), %rcx |
222 | addq $8, %rbx | 222 | addq $8, %rbx |
223 | 1: | 223 | 1: |
224 | testq $0x1, %rcx /* is it a destination page? */ | 224 | testb $0x1, %cl /* is it a destination page? */ |
225 | jz 2f | 225 | jz 2f |
226 | movq %rcx, %rdi | 226 | movq %rcx, %rdi |
227 | andq $0xfffffffffffff000, %rdi | 227 | andq $0xfffffffffffff000, %rdi |
228 | jmp 0b | 228 | jmp 0b |
229 | 2: | 229 | 2: |
230 | testq $0x2, %rcx /* is it an indirection page? */ | 230 | testb $0x2, %cl /* is it an indirection page? */ |
231 | jz 2f | 231 | jz 2f |
232 | movq %rcx, %rbx | 232 | movq %rcx, %rbx |
233 | andq $0xfffffffffffff000, %rbx | 233 | andq $0xfffffffffffff000, %rbx |
234 | jmp 0b | 234 | jmp 0b |
235 | 2: | 235 | 2: |
236 | testq $0x4, %rcx /* is it the done indicator? */ | 236 | testb $0x4, %cl /* is it the done indicator? */ |
237 | jz 2f | 237 | jz 2f |
238 | jmp 3f | 238 | jmp 3f |
239 | 2: | 239 | 2: |
240 | testq $0x8, %rcx /* is it the source indicator? */ | 240 | testb $0x8, %cl /* is it the source indicator? */ |
241 | jz 0b /* Ignore it otherwise */ | 241 | jz 0b /* Ignore it otherwise */ |
242 | movq %rcx, %rsi /* For ever source page do a copy */ | 242 | movq %rcx, %rsi /* For ever source page do a copy */ |
243 | andq $0xfffffffffffff000, %rsi | 243 | andq $0xfffffffffffff000, %rsi |
@@ -246,17 +246,17 @@ swap_pages: | |||
246 | movq %rsi, %rax | 246 | movq %rsi, %rax |
247 | 247 | ||
248 | movq %r10, %rdi | 248 | movq %r10, %rdi |
249 | movq $512, %rcx | 249 | movl $512, %ecx |
250 | rep ; movsq | 250 | rep ; movsq |
251 | 251 | ||
252 | movq %rax, %rdi | 252 | movq %rax, %rdi |
253 | movq %rdx, %rsi | 253 | movq %rdx, %rsi |
254 | movq $512, %rcx | 254 | movl $512, %ecx |
255 | rep ; movsq | 255 | rep ; movsq |
256 | 256 | ||
257 | movq %rdx, %rdi | 257 | movq %rdx, %rdi |
258 | movq %r10, %rsi | 258 | movq %r10, %rsi |
259 | movq $512, %rcx | 259 | movl $512, %ecx |
260 | rep ; movsq | 260 | rep ; movsq |
261 | 261 | ||
262 | lea PAGE_SIZE(%rax), %rsi | 262 | lea PAGE_SIZE(%rax), %rsi |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 0a2421cca01f..d74ac33290ae 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -354,7 +354,7 @@ static void __init relocate_initrd(void) | |||
354 | mapaddr = ramdisk_image & PAGE_MASK; | 354 | mapaddr = ramdisk_image & PAGE_MASK; |
355 | p = early_memremap(mapaddr, clen+slop); | 355 | p = early_memremap(mapaddr, clen+slop); |
356 | memcpy(q, p+slop, clen); | 356 | memcpy(q, p+slop, clen); |
357 | early_iounmap(p, clen+slop); | 357 | early_memunmap(p, clen+slop); |
358 | q += clen; | 358 | q += clen; |
359 | ramdisk_image += clen; | 359 | ramdisk_image += clen; |
360 | ramdisk_size -= clen; | 360 | ramdisk_size -= clen; |
@@ -438,7 +438,7 @@ static void __init parse_setup_data(void) | |||
438 | data_len = data->len + sizeof(struct setup_data); | 438 | data_len = data->len + sizeof(struct setup_data); |
439 | data_type = data->type; | 439 | data_type = data->type; |
440 | pa_next = data->next; | 440 | pa_next = data->next; |
441 | early_iounmap(data, sizeof(*data)); | 441 | early_memunmap(data, sizeof(*data)); |
442 | 442 | ||
443 | switch (data_type) { | 443 | switch (data_type) { |
444 | case SETUP_E820_EXT: | 444 | case SETUP_E820_EXT: |
@@ -470,7 +470,7 @@ static void __init e820_reserve_setup_data(void) | |||
470 | E820_RAM, E820_RESERVED_KERN); | 470 | E820_RAM, E820_RESERVED_KERN); |
471 | found = 1; | 471 | found = 1; |
472 | pa_data = data->next; | 472 | pa_data = data->next; |
473 | early_iounmap(data, sizeof(*data)); | 473 | early_memunmap(data, sizeof(*data)); |
474 | } | 474 | } |
475 | if (!found) | 475 | if (!found) |
476 | return; | 476 | return; |
@@ -491,7 +491,7 @@ static void __init memblock_x86_reserve_range_setup_data(void) | |||
491 | data = early_memremap(pa_data, sizeof(*data)); | 491 | data = early_memremap(pa_data, sizeof(*data)); |
492 | memblock_reserve(pa_data, sizeof(*data) + data->len); | 492 | memblock_reserve(pa_data, sizeof(*data) + data->len); |
493 | pa_data = data->next; | 493 | pa_data = data->next; |
494 | early_iounmap(data, sizeof(*data)); | 494 | early_memunmap(data, sizeof(*data)); |
495 | } | 495 | } |
496 | } | 496 | } |
497 | 497 | ||
@@ -832,10 +832,15 @@ static void __init trim_low_memory_range(void) | |||
832 | static int | 832 | static int |
833 | dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p) | 833 | dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p) |
834 | { | 834 | { |
835 | pr_emerg("Kernel Offset: 0x%lx from 0x%lx " | 835 | if (kaslr_enabled()) { |
836 | "(relocation range: 0x%lx-0x%lx)\n", | 836 | pr_emerg("Kernel Offset: 0x%lx from 0x%lx (relocation range: 0x%lx-0x%lx)\n", |
837 | (unsigned long)&_text - __START_KERNEL, __START_KERNEL, | 837 | (unsigned long)&_text - __START_KERNEL, |
838 | __START_KERNEL_map, MODULES_VADDR-1); | 838 | __START_KERNEL, |
839 | __START_KERNEL_map, | ||
840 | MODULES_VADDR-1); | ||
841 | } else { | ||
842 | pr_emerg("Kernel Offset: disabled\n"); | ||
843 | } | ||
839 | 844 | ||
840 | return 0; | 845 | return 0; |
841 | } | 846 | } |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index e5042463c1bc..f9804080ccb3 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -61,8 +61,7 @@ | |||
61 | regs->seg = GET_SEG(seg) | 3; \ | 61 | regs->seg = GET_SEG(seg) | 3; \ |
62 | } while (0) | 62 | } while (0) |
63 | 63 | ||
64 | int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, | 64 | int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) |
65 | unsigned long *pax) | ||
66 | { | 65 | { |
67 | void __user *buf; | 66 | void __user *buf; |
68 | unsigned int tmpflags; | 67 | unsigned int tmpflags; |
@@ -81,7 +80,7 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, | |||
81 | #endif /* CONFIG_X86_32 */ | 80 | #endif /* CONFIG_X86_32 */ |
82 | 81 | ||
83 | COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); | 82 | COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); |
84 | COPY(dx); COPY(cx); COPY(ip); | 83 | COPY(dx); COPY(cx); COPY(ip); COPY(ax); |
85 | 84 | ||
86 | #ifdef CONFIG_X86_64 | 85 | #ifdef CONFIG_X86_64 |
87 | COPY(r8); | 86 | COPY(r8); |
@@ -94,27 +93,20 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, | |||
94 | COPY(r15); | 93 | COPY(r15); |
95 | #endif /* CONFIG_X86_64 */ | 94 | #endif /* CONFIG_X86_64 */ |
96 | 95 | ||
97 | #ifdef CONFIG_X86_32 | ||
98 | COPY_SEG_CPL3(cs); | 96 | COPY_SEG_CPL3(cs); |
99 | COPY_SEG_CPL3(ss); | 97 | COPY_SEG_CPL3(ss); |
100 | #else /* !CONFIG_X86_32 */ | ||
101 | /* Kernel saves and restores only the CS segment register on signals, | ||
102 | * which is the bare minimum needed to allow mixed 32/64-bit code. | ||
103 | * App's signal handler can save/restore other segments if needed. */ | ||
104 | COPY_SEG_CPL3(cs); | ||
105 | #endif /* CONFIG_X86_32 */ | ||
106 | 98 | ||
107 | get_user_ex(tmpflags, &sc->flags); | 99 | get_user_ex(tmpflags, &sc->flags); |
108 | regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); | 100 | regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); |
109 | regs->orig_ax = -1; /* disable syscall checks */ | 101 | regs->orig_ax = -1; /* disable syscall checks */ |
110 | 102 | ||
111 | get_user_ex(buf, &sc->fpstate); | 103 | get_user_ex(buf, &sc->fpstate); |
112 | |||
113 | get_user_ex(*pax, &sc->ax); | ||
114 | } get_user_catch(err); | 104 | } get_user_catch(err); |
115 | 105 | ||
116 | err |= restore_xstate_sig(buf, config_enabled(CONFIG_X86_32)); | 106 | err |= restore_xstate_sig(buf, config_enabled(CONFIG_X86_32)); |
117 | 107 | ||
108 | force_iret(); | ||
109 | |||
118 | return err; | 110 | return err; |
119 | } | 111 | } |
120 | 112 | ||
@@ -162,8 +154,9 @@ int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, | |||
162 | #else /* !CONFIG_X86_32 */ | 154 | #else /* !CONFIG_X86_32 */ |
163 | put_user_ex(regs->flags, &sc->flags); | 155 | put_user_ex(regs->flags, &sc->flags); |
164 | put_user_ex(regs->cs, &sc->cs); | 156 | put_user_ex(regs->cs, &sc->cs); |
165 | put_user_ex(0, &sc->gs); | 157 | put_user_ex(0, &sc->__pad2); |
166 | put_user_ex(0, &sc->fs); | 158 | put_user_ex(0, &sc->__pad1); |
159 | put_user_ex(regs->ss, &sc->ss); | ||
167 | #endif /* CONFIG_X86_32 */ | 160 | #endif /* CONFIG_X86_32 */ |
168 | 161 | ||
169 | put_user_ex(fpstate, &sc->fpstate); | 162 | put_user_ex(fpstate, &sc->fpstate); |
@@ -457,9 +450,19 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig, | |||
457 | 450 | ||
458 | regs->sp = (unsigned long)frame; | 451 | regs->sp = (unsigned long)frame; |
459 | 452 | ||
460 | /* Set up the CS register to run signal handlers in 64-bit mode, | 453 | /* |
461 | even if the handler happens to be interrupting 32-bit code. */ | 454 | * Set up the CS and SS registers to run signal handlers in |
455 | * 64-bit mode, even if the handler happens to be interrupting | ||
456 | * 32-bit or 16-bit code. | ||
457 | * | ||
458 | * SS is subtle. In 64-bit mode, we don't need any particular | ||
459 | * SS descriptor, but we do need SS to be valid. It's possible | ||
460 | * that the old SS is entirely bogus -- this can happen if the | ||
461 | * signal we're trying to deliver is #GP or #SS caused by a bad | ||
462 | * SS value. | ||
463 | */ | ||
462 | regs->cs = __USER_CS; | 464 | regs->cs = __USER_CS; |
465 | regs->ss = __USER_DS; | ||
463 | 466 | ||
464 | return 0; | 467 | return 0; |
465 | } | 468 | } |
@@ -539,7 +542,6 @@ asmlinkage unsigned long sys_sigreturn(void) | |||
539 | { | 542 | { |
540 | struct pt_regs *regs = current_pt_regs(); | 543 | struct pt_regs *regs = current_pt_regs(); |
541 | struct sigframe __user *frame; | 544 | struct sigframe __user *frame; |
542 | unsigned long ax; | ||
543 | sigset_t set; | 545 | sigset_t set; |
544 | 546 | ||
545 | frame = (struct sigframe __user *)(regs->sp - 8); | 547 | frame = (struct sigframe __user *)(regs->sp - 8); |
@@ -553,9 +555,9 @@ asmlinkage unsigned long sys_sigreturn(void) | |||
553 | 555 | ||
554 | set_current_blocked(&set); | 556 | set_current_blocked(&set); |
555 | 557 | ||
556 | if (restore_sigcontext(regs, &frame->sc, &ax)) | 558 | if (restore_sigcontext(regs, &frame->sc)) |
557 | goto badframe; | 559 | goto badframe; |
558 | return ax; | 560 | return regs->ax; |
559 | 561 | ||
560 | badframe: | 562 | badframe: |
561 | signal_fault(regs, frame, "sigreturn"); | 563 | signal_fault(regs, frame, "sigreturn"); |
@@ -568,7 +570,6 @@ asmlinkage long sys_rt_sigreturn(void) | |||
568 | { | 570 | { |
569 | struct pt_regs *regs = current_pt_regs(); | 571 | struct pt_regs *regs = current_pt_regs(); |
570 | struct rt_sigframe __user *frame; | 572 | struct rt_sigframe __user *frame; |
571 | unsigned long ax; | ||
572 | sigset_t set; | 573 | sigset_t set; |
573 | 574 | ||
574 | frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long)); | 575 | frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long)); |
@@ -579,37 +580,23 @@ asmlinkage long sys_rt_sigreturn(void) | |||
579 | 580 | ||
580 | set_current_blocked(&set); | 581 | set_current_blocked(&set); |
581 | 582 | ||
582 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) | 583 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext)) |
583 | goto badframe; | 584 | goto badframe; |
584 | 585 | ||
585 | if (restore_altstack(&frame->uc.uc_stack)) | 586 | if (restore_altstack(&frame->uc.uc_stack)) |
586 | goto badframe; | 587 | goto badframe; |
587 | 588 | ||
588 | return ax; | 589 | return regs->ax; |
589 | 590 | ||
590 | badframe: | 591 | badframe: |
591 | signal_fault(regs, frame, "rt_sigreturn"); | 592 | signal_fault(regs, frame, "rt_sigreturn"); |
592 | return 0; | 593 | return 0; |
593 | } | 594 | } |
594 | 595 | ||
595 | /* | ||
596 | * OK, we're invoking a handler: | ||
597 | */ | ||
598 | static int signr_convert(int sig) | ||
599 | { | ||
600 | #ifdef CONFIG_X86_32 | ||
601 | struct thread_info *info = current_thread_info(); | ||
602 | |||
603 | if (info->exec_domain && info->exec_domain->signal_invmap && sig < 32) | ||
604 | return info->exec_domain->signal_invmap[sig]; | ||
605 | #endif /* CONFIG_X86_32 */ | ||
606 | return sig; | ||
607 | } | ||
608 | |||
609 | static int | 596 | static int |
610 | setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) | 597 | setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) |
611 | { | 598 | { |
612 | int usig = signr_convert(ksig->sig); | 599 | int usig = ksig->sig; |
613 | sigset_t *set = sigmask_to_save(); | 600 | sigset_t *set = sigmask_to_save(); |
614 | compat_sigset_t *cset = (compat_sigset_t *) set; | 601 | compat_sigset_t *cset = (compat_sigset_t *) set; |
615 | 602 | ||
@@ -679,7 +666,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) | |||
679 | * Ensure the signal handler starts with the new fpu state. | 666 | * Ensure the signal handler starts with the new fpu state. |
680 | */ | 667 | */ |
681 | if (used_math()) | 668 | if (used_math()) |
682 | drop_init_fpu(current); | 669 | fpu_reset_state(current); |
683 | } | 670 | } |
684 | signal_setup_done(failed, ksig, test_thread_flag(TIF_SINGLESTEP)); | 671 | signal_setup_done(failed, ksig, test_thread_flag(TIF_SINGLESTEP)); |
685 | } | 672 | } |
@@ -780,7 +767,6 @@ asmlinkage long sys32_x32_rt_sigreturn(void) | |||
780 | struct pt_regs *regs = current_pt_regs(); | 767 | struct pt_regs *regs = current_pt_regs(); |
781 | struct rt_sigframe_x32 __user *frame; | 768 | struct rt_sigframe_x32 __user *frame; |
782 | sigset_t set; | 769 | sigset_t set; |
783 | unsigned long ax; | ||
784 | 770 | ||
785 | frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8); | 771 | frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8); |
786 | 772 | ||
@@ -791,13 +777,13 @@ asmlinkage long sys32_x32_rt_sigreturn(void) | |||
791 | 777 | ||
792 | set_current_blocked(&set); | 778 | set_current_blocked(&set); |
793 | 779 | ||
794 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) | 780 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext)) |
795 | goto badframe; | 781 | goto badframe; |
796 | 782 | ||
797 | if (compat_restore_altstack(&frame->uc.uc_stack)) | 783 | if (compat_restore_altstack(&frame->uc.uc_stack)) |
798 | goto badframe; | 784 | goto badframe; |
799 | 785 | ||
800 | return ax; | 786 | return regs->ax; |
801 | 787 | ||
802 | badframe: | 788 | badframe: |
803 | signal_fault(regs, frame, "x32 rt_sigreturn"); | 789 | signal_fault(regs, frame, "x32 rt_sigreturn"); |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index febc6aabc72e..50e547eac8cd 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -77,9 +77,6 @@ | |||
77 | #include <asm/realmode.h> | 77 | #include <asm/realmode.h> |
78 | #include <asm/misc.h> | 78 | #include <asm/misc.h> |
79 | 79 | ||
80 | /* State of each CPU */ | ||
81 | DEFINE_PER_CPU(int, cpu_state) = { 0 }; | ||
82 | |||
83 | /* Number of siblings per CPU package */ | 80 | /* Number of siblings per CPU package */ |
84 | int smp_num_siblings = 1; | 81 | int smp_num_siblings = 1; |
85 | EXPORT_SYMBOL(smp_num_siblings); | 82 | EXPORT_SYMBOL(smp_num_siblings); |
@@ -257,7 +254,7 @@ static void notrace start_secondary(void *unused) | |||
257 | lock_vector_lock(); | 254 | lock_vector_lock(); |
258 | set_cpu_online(smp_processor_id(), true); | 255 | set_cpu_online(smp_processor_id(), true); |
259 | unlock_vector_lock(); | 256 | unlock_vector_lock(); |
260 | per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; | 257 | cpu_set_state_online(smp_processor_id()); |
261 | x86_platform.nmi_init(); | 258 | x86_platform.nmi_init(); |
262 | 259 | ||
263 | /* enable local interrupts */ | 260 | /* enable local interrupts */ |
@@ -779,6 +776,26 @@ out: | |||
779 | return boot_error; | 776 | return boot_error; |
780 | } | 777 | } |
781 | 778 | ||
779 | void common_cpu_up(unsigned int cpu, struct task_struct *idle) | ||
780 | { | ||
781 | /* Just in case we booted with a single CPU. */ | ||
782 | alternatives_enable_smp(); | ||
783 | |||
784 | per_cpu(current_task, cpu) = idle; | ||
785 | |||
786 | #ifdef CONFIG_X86_32 | ||
787 | /* Stack for startup_32 can be just as for start_secondary onwards */ | ||
788 | irq_ctx_init(cpu); | ||
789 | per_cpu(cpu_current_top_of_stack, cpu) = | ||
790 | (unsigned long)task_stack_page(idle) + THREAD_SIZE; | ||
791 | #else | ||
792 | clear_tsk_thread_flag(idle, TIF_FORK); | ||
793 | initial_gs = per_cpu_offset(cpu); | ||
794 | #endif | ||
795 | per_cpu(kernel_stack, cpu) = | ||
796 | (unsigned long)task_stack_page(idle) + THREAD_SIZE; | ||
797 | } | ||
798 | |||
782 | /* | 799 | /* |
783 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad | 800 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad |
784 | * (ie clustered apic addressing mode), this is a LOGICAL apic ID. | 801 | * (ie clustered apic addressing mode), this is a LOGICAL apic ID. |
@@ -796,23 +813,9 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) | |||
796 | int cpu0_nmi_registered = 0; | 813 | int cpu0_nmi_registered = 0; |
797 | unsigned long timeout; | 814 | unsigned long timeout; |
798 | 815 | ||
799 | /* Just in case we booted with a single CPU. */ | ||
800 | alternatives_enable_smp(); | ||
801 | |||
802 | idle->thread.sp = (unsigned long) (((struct pt_regs *) | 816 | idle->thread.sp = (unsigned long) (((struct pt_regs *) |
803 | (THREAD_SIZE + task_stack_page(idle))) - 1); | 817 | (THREAD_SIZE + task_stack_page(idle))) - 1); |
804 | per_cpu(current_task, cpu) = idle; | ||
805 | 818 | ||
806 | #ifdef CONFIG_X86_32 | ||
807 | /* Stack for startup_32 can be just as for start_secondary onwards */ | ||
808 | irq_ctx_init(cpu); | ||
809 | #else | ||
810 | clear_tsk_thread_flag(idle, TIF_FORK); | ||
811 | initial_gs = per_cpu_offset(cpu); | ||
812 | #endif | ||
813 | per_cpu(kernel_stack, cpu) = | ||
814 | (unsigned long)task_stack_page(idle) - | ||
815 | KERNEL_STACK_OFFSET + THREAD_SIZE; | ||
816 | early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); | 819 | early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); |
817 | initial_code = (unsigned long)start_secondary; | 820 | initial_code = (unsigned long)start_secondary; |
818 | stack_start = idle->thread.sp; | 821 | stack_start = idle->thread.sp; |
@@ -948,11 +951,16 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle) | |||
948 | */ | 951 | */ |
949 | mtrr_save_state(); | 952 | mtrr_save_state(); |
950 | 953 | ||
951 | per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; | 954 | /* x86 CPUs take themselves offline, so delayed offline is OK. */ |
955 | err = cpu_check_up_prepare(cpu); | ||
956 | if (err && err != -EBUSY) | ||
957 | return err; | ||
952 | 958 | ||
953 | /* the FPU context is blank, nobody can own it */ | 959 | /* the FPU context is blank, nobody can own it */ |
954 | __cpu_disable_lazy_restore(cpu); | 960 | __cpu_disable_lazy_restore(cpu); |
955 | 961 | ||
962 | common_cpu_up(cpu, tidle); | ||
963 | |||
956 | err = do_boot_cpu(apicid, cpu, tidle); | 964 | err = do_boot_cpu(apicid, cpu, tidle); |
957 | if (err) { | 965 | if (err) { |
958 | pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu); | 966 | pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu); |
@@ -1086,8 +1094,6 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
1086 | return SMP_NO_APIC; | 1094 | return SMP_NO_APIC; |
1087 | } | 1095 | } |
1088 | 1096 | ||
1089 | verify_local_APIC(); | ||
1090 | |||
1091 | /* | 1097 | /* |
1092 | * If SMP should be disabled, then really disable it! | 1098 | * If SMP should be disabled, then really disable it! |
1093 | */ | 1099 | */ |
@@ -1191,7 +1197,7 @@ void __init native_smp_prepare_boot_cpu(void) | |||
1191 | switch_to_new_gdt(me); | 1197 | switch_to_new_gdt(me); |
1192 | /* already set me in cpu_online_mask in boot_cpu_init() */ | 1198 | /* already set me in cpu_online_mask in boot_cpu_init() */ |
1193 | cpumask_set_cpu(me, cpu_callout_mask); | 1199 | cpumask_set_cpu(me, cpu_callout_mask); |
1194 | per_cpu(cpu_state, me) = CPU_ONLINE; | 1200 | cpu_set_state_online(me); |
1195 | } | 1201 | } |
1196 | 1202 | ||
1197 | void __init native_smp_cpus_done(unsigned int max_cpus) | 1203 | void __init native_smp_cpus_done(unsigned int max_cpus) |
@@ -1318,14 +1324,10 @@ static void __ref remove_cpu_from_maps(int cpu) | |||
1318 | numa_remove_cpu(cpu); | 1324 | numa_remove_cpu(cpu); |
1319 | } | 1325 | } |
1320 | 1326 | ||
1321 | static DEFINE_PER_CPU(struct completion, die_complete); | ||
1322 | |||
1323 | void cpu_disable_common(void) | 1327 | void cpu_disable_common(void) |
1324 | { | 1328 | { |
1325 | int cpu = smp_processor_id(); | 1329 | int cpu = smp_processor_id(); |
1326 | 1330 | ||
1327 | init_completion(&per_cpu(die_complete, smp_processor_id())); | ||
1328 | |||
1329 | remove_siblinginfo(cpu); | 1331 | remove_siblinginfo(cpu); |
1330 | 1332 | ||
1331 | /* It's now safe to remove this processor from the online map */ | 1333 | /* It's now safe to remove this processor from the online map */ |
@@ -1349,24 +1351,27 @@ int native_cpu_disable(void) | |||
1349 | return 0; | 1351 | return 0; |
1350 | } | 1352 | } |
1351 | 1353 | ||
1352 | void cpu_die_common(unsigned int cpu) | 1354 | int common_cpu_die(unsigned int cpu) |
1353 | { | 1355 | { |
1354 | wait_for_completion_timeout(&per_cpu(die_complete, cpu), HZ); | 1356 | int ret = 0; |
1355 | } | ||
1356 | 1357 | ||
1357 | void native_cpu_die(unsigned int cpu) | ||
1358 | { | ||
1359 | /* We don't do anything here: idle task is faking death itself. */ | 1358 | /* We don't do anything here: idle task is faking death itself. */ |
1360 | 1359 | ||
1361 | cpu_die_common(cpu); | ||
1362 | |||
1363 | /* They ack this in play_dead() by setting CPU_DEAD */ | 1360 | /* They ack this in play_dead() by setting CPU_DEAD */ |
1364 | if (per_cpu(cpu_state, cpu) == CPU_DEAD) { | 1361 | if (cpu_wait_death(cpu, 5)) { |
1365 | if (system_state == SYSTEM_RUNNING) | 1362 | if (system_state == SYSTEM_RUNNING) |
1366 | pr_info("CPU %u is now offline\n", cpu); | 1363 | pr_info("CPU %u is now offline\n", cpu); |
1367 | } else { | 1364 | } else { |
1368 | pr_err("CPU %u didn't die...\n", cpu); | 1365 | pr_err("CPU %u didn't die...\n", cpu); |
1366 | ret = -1; | ||
1369 | } | 1367 | } |
1368 | |||
1369 | return ret; | ||
1370 | } | ||
1371 | |||
1372 | void native_cpu_die(unsigned int cpu) | ||
1373 | { | ||
1374 | common_cpu_die(cpu); | ||
1370 | } | 1375 | } |
1371 | 1376 | ||
1372 | void play_dead_common(void) | 1377 | void play_dead_common(void) |
@@ -1375,10 +1380,8 @@ void play_dead_common(void) | |||
1375 | reset_lazy_tlbstate(); | 1380 | reset_lazy_tlbstate(); |
1376 | amd_e400_remove_cpu(raw_smp_processor_id()); | 1381 | amd_e400_remove_cpu(raw_smp_processor_id()); |
1377 | 1382 | ||
1378 | mb(); | ||
1379 | /* Ack it */ | 1383 | /* Ack it */ |
1380 | __this_cpu_write(cpu_state, CPU_DEAD); | 1384 | (void)cpu_report_death(); |
1381 | complete(&per_cpu(die_complete, smp_processor_id())); | ||
1382 | 1385 | ||
1383 | /* | 1386 | /* |
1384 | * With physical CPU hotplug, we should halt the cpu | 1387 | * With physical CPU hotplug, we should halt the cpu |
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 30277e27431a..10e0272d789a 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c | |||
@@ -34,10 +34,26 @@ static unsigned long get_align_mask(void) | |||
34 | return va_align.mask; | 34 | return va_align.mask; |
35 | } | 35 | } |
36 | 36 | ||
37 | /* | ||
38 | * To avoid aliasing in the I$ on AMD F15h, the bits defined by the | ||
39 | * va_align.bits, [12:upper_bit), are set to a random value instead of | ||
40 | * zeroing them. This random value is computed once per boot. This form | ||
41 | * of ASLR is known as "per-boot ASLR". | ||
42 | * | ||
43 | * To achieve this, the random value is added to the info.align_offset | ||
44 | * value before calling vm_unmapped_area() or ORed directly to the | ||
45 | * address. | ||
46 | */ | ||
47 | static unsigned long get_align_bits(void) | ||
48 | { | ||
49 | return va_align.bits & get_align_mask(); | ||
50 | } | ||
51 | |||
37 | unsigned long align_vdso_addr(unsigned long addr) | 52 | unsigned long align_vdso_addr(unsigned long addr) |
38 | { | 53 | { |
39 | unsigned long align_mask = get_align_mask(); | 54 | unsigned long align_mask = get_align_mask(); |
40 | return (addr + align_mask) & ~align_mask; | 55 | addr = (addr + align_mask) & ~align_mask; |
56 | return addr | get_align_bits(); | ||
41 | } | 57 | } |
42 | 58 | ||
43 | static int __init control_va_addr_alignment(char *str) | 59 | static int __init control_va_addr_alignment(char *str) |
@@ -135,8 +151,12 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, | |||
135 | info.length = len; | 151 | info.length = len; |
136 | info.low_limit = begin; | 152 | info.low_limit = begin; |
137 | info.high_limit = end; | 153 | info.high_limit = end; |
138 | info.align_mask = filp ? get_align_mask() : 0; | 154 | info.align_mask = 0; |
139 | info.align_offset = pgoff << PAGE_SHIFT; | 155 | info.align_offset = pgoff << PAGE_SHIFT; |
156 | if (filp) { | ||
157 | info.align_mask = get_align_mask(); | ||
158 | info.align_offset += get_align_bits(); | ||
159 | } | ||
140 | return vm_unmapped_area(&info); | 160 | return vm_unmapped_area(&info); |
141 | } | 161 | } |
142 | 162 | ||
@@ -174,8 +194,12 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, | |||
174 | info.length = len; | 194 | info.length = len; |
175 | info.low_limit = PAGE_SIZE; | 195 | info.low_limit = PAGE_SIZE; |
176 | info.high_limit = mm->mmap_base; | 196 | info.high_limit = mm->mmap_base; |
177 | info.align_mask = filp ? get_align_mask() : 0; | 197 | info.align_mask = 0; |
178 | info.align_offset = pgoff << PAGE_SHIFT; | 198 | info.align_offset = pgoff << PAGE_SHIFT; |
199 | if (filp) { | ||
200 | info.align_mask = get_align_mask(); | ||
201 | info.align_offset += get_align_bits(); | ||
202 | } | ||
179 | addr = vm_unmapped_area(&info); | 203 | addr = vm_unmapped_area(&info); |
180 | if (!(addr & ~PAGE_MASK)) | 204 | if (!(addr & ~PAGE_MASK)) |
181 | return addr; | 205 | return addr; |
diff --git a/arch/x86/kernel/syscall_32.c b/arch/x86/kernel/syscall_32.c index e9bcd57d8a9e..3777189c4a19 100644 --- a/arch/x86/kernel/syscall_32.c +++ b/arch/x86/kernel/syscall_32.c | |||
@@ -5,21 +5,29 @@ | |||
5 | #include <linux/cache.h> | 5 | #include <linux/cache.h> |
6 | #include <asm/asm-offsets.h> | 6 | #include <asm/asm-offsets.h> |
7 | 7 | ||
8 | #define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void sym(void) ; | 8 | #ifdef CONFIG_IA32_EMULATION |
9 | #define SYM(sym, compat) compat | ||
10 | #else | ||
11 | #define SYM(sym, compat) sym | ||
12 | #define ia32_sys_call_table sys_call_table | ||
13 | #define __NR_ia32_syscall_max __NR_syscall_max | ||
14 | #endif | ||
15 | |||
16 | #define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void SYM(sym, compat)(void) ; | ||
9 | #include <asm/syscalls_32.h> | 17 | #include <asm/syscalls_32.h> |
10 | #undef __SYSCALL_I386 | 18 | #undef __SYSCALL_I386 |
11 | 19 | ||
12 | #define __SYSCALL_I386(nr, sym, compat) [nr] = sym, | 20 | #define __SYSCALL_I386(nr, sym, compat) [nr] = SYM(sym, compat), |
13 | 21 | ||
14 | typedef asmlinkage void (*sys_call_ptr_t)(void); | 22 | typedef asmlinkage void (*sys_call_ptr_t)(void); |
15 | 23 | ||
16 | extern asmlinkage void sys_ni_syscall(void); | 24 | extern asmlinkage void sys_ni_syscall(void); |
17 | 25 | ||
18 | __visible const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { | 26 | __visible const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = { |
19 | /* | 27 | /* |
20 | * Smells like a compiler bug -- it doesn't work | 28 | * Smells like a compiler bug -- it doesn't work |
21 | * when the & below is removed. | 29 | * when the & below is removed. |
22 | */ | 30 | */ |
23 | [0 ... __NR_syscall_max] = &sys_ni_syscall, | 31 | [0 ... __NR_ia32_syscall_max] = &sys_ni_syscall, |
24 | #include <asm/syscalls_32.h> | 32 | #include <asm/syscalls_32.h> |
25 | }; | 33 | }; |
diff --git a/arch/x86/kernel/test_rodata.c b/arch/x86/kernel/test_rodata.c index b79133abda48..5ecbfe5099da 100644 --- a/arch/x86/kernel/test_rodata.c +++ b/arch/x86/kernel/test_rodata.c | |||
@@ -57,7 +57,7 @@ int rodata_test(void) | |||
57 | /* test 3: check the value hasn't changed */ | 57 | /* test 3: check the value hasn't changed */ |
58 | /* If this test fails, we managed to overwrite the data */ | 58 | /* If this test fails, we managed to overwrite the data */ |
59 | if (!rodata_test_data) { | 59 | if (!rodata_test_data) { |
60 | printk(KERN_ERR "rodata_test: Test 3 failes (end data)\n"); | 60 | printk(KERN_ERR "rodata_test: Test 3 fails (end data)\n"); |
61 | return -ENODEV; | 61 | return -ENODEV; |
62 | } | 62 | } |
63 | /* test 4: check if the rodata section is 4Kb aligned */ | 63 | /* test 4: check if the rodata section is 4Kb aligned */ |
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 25adc0e16eaa..d39c09119db6 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c | |||
@@ -30,7 +30,7 @@ unsigned long profile_pc(struct pt_regs *regs) | |||
30 | { | 30 | { |
31 | unsigned long pc = instruction_pointer(regs); | 31 | unsigned long pc = instruction_pointer(regs); |
32 | 32 | ||
33 | if (!user_mode_vm(regs) && in_lock_functions(pc)) { | 33 | if (!user_mode(regs) && in_lock_functions(pc)) { |
34 | #ifdef CONFIG_FRAME_POINTER | 34 | #ifdef CONFIG_FRAME_POINTER |
35 | return *(unsigned long *)(regs->bp + sizeof(long)); | 35 | return *(unsigned long *)(regs->bp + sizeof(long)); |
36 | #else | 36 | #else |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 4ff5d162ff9f..324ab5247687 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -112,7 +112,7 @@ enum ctx_state ist_enter(struct pt_regs *regs) | |||
112 | { | 112 | { |
113 | enum ctx_state prev_state; | 113 | enum ctx_state prev_state; |
114 | 114 | ||
115 | if (user_mode_vm(regs)) { | 115 | if (user_mode(regs)) { |
116 | /* Other than that, we're just an exception. */ | 116 | /* Other than that, we're just an exception. */ |
117 | prev_state = exception_enter(); | 117 | prev_state = exception_enter(); |
118 | } else { | 118 | } else { |
@@ -123,7 +123,7 @@ enum ctx_state ist_enter(struct pt_regs *regs) | |||
123 | * but we need to notify RCU. | 123 | * but we need to notify RCU. |
124 | */ | 124 | */ |
125 | rcu_nmi_enter(); | 125 | rcu_nmi_enter(); |
126 | prev_state = IN_KERNEL; /* the value is irrelevant. */ | 126 | prev_state = CONTEXT_KERNEL; /* the value is irrelevant. */ |
127 | } | 127 | } |
128 | 128 | ||
129 | /* | 129 | /* |
@@ -146,7 +146,7 @@ void ist_exit(struct pt_regs *regs, enum ctx_state prev_state) | |||
146 | /* Must be before exception_exit. */ | 146 | /* Must be before exception_exit. */ |
147 | preempt_count_sub(HARDIRQ_OFFSET); | 147 | preempt_count_sub(HARDIRQ_OFFSET); |
148 | 148 | ||
149 | if (user_mode_vm(regs)) | 149 | if (user_mode(regs)) |
150 | return exception_exit(prev_state); | 150 | return exception_exit(prev_state); |
151 | else | 151 | else |
152 | rcu_nmi_exit(); | 152 | rcu_nmi_exit(); |
@@ -158,7 +158,7 @@ void ist_exit(struct pt_regs *regs, enum ctx_state prev_state) | |||
158 | * | 158 | * |
159 | * IST exception handlers normally cannot schedule. As a special | 159 | * IST exception handlers normally cannot schedule. As a special |
160 | * exception, if the exception interrupted userspace code (i.e. | 160 | * exception, if the exception interrupted userspace code (i.e. |
161 | * user_mode_vm(regs) would return true) and the exception was not | 161 | * user_mode(regs) would return true) and the exception was not |
162 | * a double fault, it can be safe to schedule. ist_begin_non_atomic() | 162 | * a double fault, it can be safe to schedule. ist_begin_non_atomic() |
163 | * begins a non-atomic section within an ist_enter()/ist_exit() region. | 163 | * begins a non-atomic section within an ist_enter()/ist_exit() region. |
164 | * Callers are responsible for enabling interrupts themselves inside | 164 | * Callers are responsible for enabling interrupts themselves inside |
@@ -167,15 +167,15 @@ void ist_exit(struct pt_regs *regs, enum ctx_state prev_state) | |||
167 | */ | 167 | */ |
168 | void ist_begin_non_atomic(struct pt_regs *regs) | 168 | void ist_begin_non_atomic(struct pt_regs *regs) |
169 | { | 169 | { |
170 | BUG_ON(!user_mode_vm(regs)); | 170 | BUG_ON(!user_mode(regs)); |
171 | 171 | ||
172 | /* | 172 | /* |
173 | * Sanity check: we need to be on the normal thread stack. This | 173 | * Sanity check: we need to be on the normal thread stack. This |
174 | * will catch asm bugs and any attempt to use ist_preempt_enable | 174 | * will catch asm bugs and any attempt to use ist_preempt_enable |
175 | * from double_fault. | 175 | * from double_fault. |
176 | */ | 176 | */ |
177 | BUG_ON(((current_stack_pointer() ^ this_cpu_read_stable(kernel_stack)) | 177 | BUG_ON((unsigned long)(current_top_of_stack() - |
178 | & ~(THREAD_SIZE - 1)) != 0); | 178 | current_stack_pointer()) >= THREAD_SIZE); |
179 | 179 | ||
180 | preempt_count_sub(HARDIRQ_OFFSET); | 180 | preempt_count_sub(HARDIRQ_OFFSET); |
181 | } | 181 | } |
@@ -194,8 +194,7 @@ static nokprobe_inline int | |||
194 | do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, | 194 | do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, |
195 | struct pt_regs *regs, long error_code) | 195 | struct pt_regs *regs, long error_code) |
196 | { | 196 | { |
197 | #ifdef CONFIG_X86_32 | 197 | if (v8086_mode(regs)) { |
198 | if (regs->flags & X86_VM_MASK) { | ||
199 | /* | 198 | /* |
200 | * Traps 0, 1, 3, 4, and 5 should be forwarded to vm86. | 199 | * Traps 0, 1, 3, 4, and 5 should be forwarded to vm86. |
201 | * On nmi (interrupt 2), do_trap should not be called. | 200 | * On nmi (interrupt 2), do_trap should not be called. |
@@ -207,7 +206,7 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, | |||
207 | } | 206 | } |
208 | return -1; | 207 | return -1; |
209 | } | 208 | } |
210 | #endif | 209 | |
211 | if (!user_mode(regs)) { | 210 | if (!user_mode(regs)) { |
212 | if (!fixup_exception(regs)) { | 211 | if (!fixup_exception(regs)) { |
213 | tsk->thread.error_code = error_code; | 212 | tsk->thread.error_code = error_code; |
@@ -384,7 +383,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) | |||
384 | goto exit; | 383 | goto exit; |
385 | conditional_sti(regs); | 384 | conditional_sti(regs); |
386 | 385 | ||
387 | if (!user_mode_vm(regs)) | 386 | if (!user_mode(regs)) |
388 | die("bounds", regs, error_code); | 387 | die("bounds", regs, error_code); |
389 | 388 | ||
390 | if (!cpu_feature_enabled(X86_FEATURE_MPX)) { | 389 | if (!cpu_feature_enabled(X86_FEATURE_MPX)) { |
@@ -462,13 +461,11 @@ do_general_protection(struct pt_regs *regs, long error_code) | |||
462 | prev_state = exception_enter(); | 461 | prev_state = exception_enter(); |
463 | conditional_sti(regs); | 462 | conditional_sti(regs); |
464 | 463 | ||
465 | #ifdef CONFIG_X86_32 | 464 | if (v8086_mode(regs)) { |
466 | if (regs->flags & X86_VM_MASK) { | ||
467 | local_irq_enable(); | 465 | local_irq_enable(); |
468 | handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); | 466 | handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); |
469 | goto exit; | 467 | goto exit; |
470 | } | 468 | } |
471 | #endif | ||
472 | 469 | ||
473 | tsk = current; | 470 | tsk = current; |
474 | if (!user_mode(regs)) { | 471 | if (!user_mode(regs)) { |
@@ -587,7 +584,7 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) | |||
587 | /* Copy the remainder of the stack from the current stack. */ | 584 | /* Copy the remainder of the stack from the current stack. */ |
588 | memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip)); | 585 | memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip)); |
589 | 586 | ||
590 | BUG_ON(!user_mode_vm(&new_stack->regs)); | 587 | BUG_ON(!user_mode(&new_stack->regs)); |
591 | return new_stack; | 588 | return new_stack; |
592 | } | 589 | } |
593 | NOKPROBE_SYMBOL(fixup_bad_iret); | 590 | NOKPROBE_SYMBOL(fixup_bad_iret); |
@@ -637,7 +634,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) | |||
637 | * then it's very likely the result of an icebp/int01 trap. | 634 | * then it's very likely the result of an icebp/int01 trap. |
638 | * User wants a sigtrap for that. | 635 | * User wants a sigtrap for that. |
639 | */ | 636 | */ |
640 | if (!dr6 && user_mode_vm(regs)) | 637 | if (!dr6 && user_mode(regs)) |
641 | user_icebp = 1; | 638 | user_icebp = 1; |
642 | 639 | ||
643 | /* Catch kmemcheck conditions first of all! */ | 640 | /* Catch kmemcheck conditions first of all! */ |
@@ -673,7 +670,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) | |||
673 | /* It's safe to allow irq's after DR6 has been saved */ | 670 | /* It's safe to allow irq's after DR6 has been saved */ |
674 | preempt_conditional_sti(regs); | 671 | preempt_conditional_sti(regs); |
675 | 672 | ||
676 | if (regs->flags & X86_VM_MASK) { | 673 | if (v8086_mode(regs)) { |
677 | handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, | 674 | handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, |
678 | X86_TRAP_DB); | 675 | X86_TRAP_DB); |
679 | preempt_conditional_cli(regs); | 676 | preempt_conditional_cli(regs); |
@@ -721,7 +718,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) | |||
721 | return; | 718 | return; |
722 | conditional_sti(regs); | 719 | conditional_sti(regs); |
723 | 720 | ||
724 | if (!user_mode_vm(regs)) | 721 | if (!user_mode(regs)) |
725 | { | 722 | { |
726 | if (!fixup_exception(regs)) { | 723 | if (!fixup_exception(regs)) { |
727 | task->thread.error_code = error_code; | 724 | task->thread.error_code = error_code; |
@@ -734,7 +731,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) | |||
734 | /* | 731 | /* |
735 | * Save the info for the exception handler and clear the error. | 732 | * Save the info for the exception handler and clear the error. |
736 | */ | 733 | */ |
737 | save_init_fpu(task); | 734 | unlazy_fpu(task); |
738 | task->thread.trap_nr = trapnr; | 735 | task->thread.trap_nr = trapnr; |
739 | task->thread.error_code = error_code; | 736 | task->thread.error_code = error_code; |
740 | info.si_signo = SIGFPE; | 737 | info.si_signo = SIGFPE; |
@@ -863,7 +860,7 @@ void math_state_restore(void) | |||
863 | kernel_fpu_disable(); | 860 | kernel_fpu_disable(); |
864 | __thread_fpu_begin(tsk); | 861 | __thread_fpu_begin(tsk); |
865 | if (unlikely(restore_fpu_checking(tsk))) { | 862 | if (unlikely(restore_fpu_checking(tsk))) { |
866 | drop_init_fpu(tsk); | 863 | fpu_reset_state(tsk); |
867 | force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); | 864 | force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); |
868 | } else { | 865 | } else { |
869 | tsk->thread.fpu_counter++; | 866 | tsk->thread.fpu_counter++; |
@@ -925,9 +922,21 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) | |||
925 | /* Set of traps needed for early debugging. */ | 922 | /* Set of traps needed for early debugging. */ |
926 | void __init early_trap_init(void) | 923 | void __init early_trap_init(void) |
927 | { | 924 | { |
928 | set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK); | 925 | /* |
926 | * Don't use IST to set DEBUG_STACK as it doesn't work until TSS | ||
927 | * is ready in cpu_init() <-- trap_init(). Before trap_init(), | ||
928 | * CPU runs at ring 0 so it is impossible to hit an invalid | ||
929 | * stack. Using the original stack works well enough at this | ||
930 | * early stage. DEBUG_STACK will be equipped after cpu_init() in | ||
931 | * trap_init(). | ||
932 | * | ||
933 | * We don't need to set trace_idt_table like set_intr_gate(), | ||
934 | * since we don't have trace_debug and it will be reset to | ||
935 | * 'debug' in trap_init() by set_intr_gate_ist(). | ||
936 | */ | ||
937 | set_intr_gate_notrace(X86_TRAP_DB, debug); | ||
929 | /* int3 can be called from all */ | 938 | /* int3 can be called from all */ |
930 | set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK); | 939 | set_system_intr_gate(X86_TRAP_BP, &int3); |
931 | #ifdef CONFIG_X86_32 | 940 | #ifdef CONFIG_X86_32 |
932 | set_intr_gate(X86_TRAP_PF, page_fault); | 941 | set_intr_gate(X86_TRAP_PF, page_fault); |
933 | #endif | 942 | #endif |
@@ -1005,6 +1014,15 @@ void __init trap_init(void) | |||
1005 | */ | 1014 | */ |
1006 | cpu_init(); | 1015 | cpu_init(); |
1007 | 1016 | ||
1017 | /* | ||
1018 | * X86_TRAP_DB and X86_TRAP_BP have been set | ||
1019 | * in early_trap_init(). However, ITS works only after | ||
1020 | * cpu_init() loads TSS. See comments in early_trap_init(). | ||
1021 | */ | ||
1022 | set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK); | ||
1023 | /* int3 can be called from all */ | ||
1024 | set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK); | ||
1025 | |||
1008 | x86_init.irqs.trap_init(); | 1026 | x86_init.irqs.trap_init(); |
1009 | 1027 | ||
1010 | #ifdef CONFIG_X86_64 | 1028 | #ifdef CONFIG_X86_64 |
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 81f8adb0679e..0b81ad67da07 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c | |||
@@ -912,7 +912,7 @@ int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, | |||
912 | int ret = NOTIFY_DONE; | 912 | int ret = NOTIFY_DONE; |
913 | 913 | ||
914 | /* We are only interested in userspace traps */ | 914 | /* We are only interested in userspace traps */ |
915 | if (regs && !user_mode_vm(regs)) | 915 | if (regs && !user_mode(regs)) |
916 | return NOTIFY_DONE; | 916 | return NOTIFY_DONE; |
917 | 917 | ||
918 | switch (val) { | 918 | switch (val) { |
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index e8edcf52e069..fc9db6ef2a95 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c | |||
@@ -150,7 +150,7 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs) | |||
150 | do_exit(SIGSEGV); | 150 | do_exit(SIGSEGV); |
151 | } | 151 | } |
152 | 152 | ||
153 | tss = &per_cpu(init_tss, get_cpu()); | 153 | tss = &per_cpu(cpu_tss, get_cpu()); |
154 | current->thread.sp0 = current->thread.saved_sp0; | 154 | current->thread.sp0 = current->thread.saved_sp0; |
155 | current->thread.sysenter_cs = __KERNEL_CS; | 155 | current->thread.sysenter_cs = __KERNEL_CS; |
156 | load_sp0(tss, ¤t->thread); | 156 | load_sp0(tss, ¤t->thread); |
@@ -318,7 +318,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk | |||
318 | tsk->thread.saved_fs = info->regs32->fs; | 318 | tsk->thread.saved_fs = info->regs32->fs; |
319 | tsk->thread.saved_gs = get_user_gs(info->regs32); | 319 | tsk->thread.saved_gs = get_user_gs(info->regs32); |
320 | 320 | ||
321 | tss = &per_cpu(init_tss, get_cpu()); | 321 | tss = &per_cpu(cpu_tss, get_cpu()); |
322 | tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0; | 322 | tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0; |
323 | if (cpu_has_sep) | 323 | if (cpu_has_sep) |
324 | tsk->thread.sysenter_cs = 0; | 324 | tsk->thread.sysenter_cs = 0; |
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index cdc6cf903078..87a815b85f3e 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c | |||
@@ -342,7 +342,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) | |||
342 | config_enabled(CONFIG_IA32_EMULATION)); | 342 | config_enabled(CONFIG_IA32_EMULATION)); |
343 | 343 | ||
344 | if (!buf) { | 344 | if (!buf) { |
345 | drop_init_fpu(tsk); | 345 | fpu_reset_state(tsk); |
346 | return 0; | 346 | return 0; |
347 | } | 347 | } |
348 | 348 | ||
@@ -416,7 +416,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) | |||
416 | */ | 416 | */ |
417 | user_fpu_begin(); | 417 | user_fpu_begin(); |
418 | if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) { | 418 | if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) { |
419 | drop_init_fpu(tsk); | 419 | fpu_reset_state(tsk); |
420 | return -1; | 420 | return -1; |
421 | } | 421 | } |
422 | } | 422 | } |
@@ -678,19 +678,13 @@ void xsave_init(void) | |||
678 | this_func(); | 678 | this_func(); |
679 | } | 679 | } |
680 | 680 | ||
681 | static inline void __init eager_fpu_init_bp(void) | 681 | /* |
682 | { | 682 | * setup_init_fpu_buf() is __init and it is OK to call it here because |
683 | current->thread.fpu.state = | 683 | * init_xstate_buf will be unset only once during boot. |
684 | alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct)); | 684 | */ |
685 | if (!init_xstate_buf) | 685 | void __init_refok eager_fpu_init(void) |
686 | setup_init_fpu_buf(); | ||
687 | } | ||
688 | |||
689 | void eager_fpu_init(void) | ||
690 | { | 686 | { |
691 | static __refdata void (*boot_func)(void) = eager_fpu_init_bp; | 687 | WARN_ON(used_math()); |
692 | |||
693 | clear_used_math(); | ||
694 | current_thread_info()->status = 0; | 688 | current_thread_info()->status = 0; |
695 | 689 | ||
696 | if (eagerfpu == ENABLE) | 690 | if (eagerfpu == ENABLE) |
@@ -701,21 +695,8 @@ void eager_fpu_init(void) | |||
701 | return; | 695 | return; |
702 | } | 696 | } |
703 | 697 | ||
704 | if (boot_func) { | 698 | if (!init_xstate_buf) |
705 | boot_func(); | 699 | setup_init_fpu_buf(); |
706 | boot_func = NULL; | ||
707 | } | ||
708 | |||
709 | /* | ||
710 | * This is same as math_state_restore(). But use_xsave() is | ||
711 | * not yet patched to use math_state_restore(). | ||
712 | */ | ||
713 | init_fpu(current); | ||
714 | __thread_fpu_begin(current); | ||
715 | if (cpu_has_xsave) | ||
716 | xrstor_state(init_xstate_buf, -1); | ||
717 | else | ||
718 | fxrstor_checking(&init_xstate_buf->i387); | ||
719 | } | 700 | } |
720 | 701 | ||
721 | /* | 702 | /* |
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 08f790dfadc9..16e8f962eaad 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile | |||
@@ -1,5 +1,5 @@ | |||
1 | 1 | ||
2 | ccflags-y += -Ivirt/kvm -Iarch/x86/kvm | 2 | ccflags-y += -Iarch/x86/kvm |
3 | 3 | ||
4 | CFLAGS_x86.o := -I. | 4 | CFLAGS_x86.o := -I. |
5 | CFLAGS_svm.o := -I. | 5 | CFLAGS_svm.o := -I. |
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 8a80737ee6e6..59b69f6a2844 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c | |||
@@ -104,6 +104,9 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) | |||
104 | ((best->eax & 0xff00) >> 8) != 0) | 104 | ((best->eax & 0xff00) >> 8) != 0) |
105 | return -EINVAL; | 105 | return -EINVAL; |
106 | 106 | ||
107 | /* Update physical-address width */ | ||
108 | vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); | ||
109 | |||
107 | kvm_pmu_cpuid_update(vcpu); | 110 | kvm_pmu_cpuid_update(vcpu); |
108 | return 0; | 111 | return 0; |
109 | } | 112 | } |
@@ -135,6 +138,21 @@ static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) | |||
135 | } | 138 | } |
136 | } | 139 | } |
137 | 140 | ||
141 | int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu) | ||
142 | { | ||
143 | struct kvm_cpuid_entry2 *best; | ||
144 | |||
145 | best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0); | ||
146 | if (!best || best->eax < 0x80000008) | ||
147 | goto not_found; | ||
148 | best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); | ||
149 | if (best) | ||
150 | return best->eax & 0xff; | ||
151 | not_found: | ||
152 | return 36; | ||
153 | } | ||
154 | EXPORT_SYMBOL_GPL(cpuid_query_maxphyaddr); | ||
155 | |||
138 | /* when an old userspace process fills a new kernel module */ | 156 | /* when an old userspace process fills a new kernel module */ |
139 | int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, | 157 | int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, |
140 | struct kvm_cpuid *cpuid, | 158 | struct kvm_cpuid *cpuid, |
@@ -757,21 +775,6 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, | |||
757 | } | 775 | } |
758 | EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry); | 776 | EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry); |
759 | 777 | ||
760 | int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) | ||
761 | { | ||
762 | struct kvm_cpuid_entry2 *best; | ||
763 | |||
764 | best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0); | ||
765 | if (!best || best->eax < 0x80000008) | ||
766 | goto not_found; | ||
767 | best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); | ||
768 | if (best) | ||
769 | return best->eax & 0xff; | ||
770 | not_found: | ||
771 | return 36; | ||
772 | } | ||
773 | EXPORT_SYMBOL_GPL(cpuid_maxphyaddr); | ||
774 | |||
775 | /* | 778 | /* |
776 | * If no match is found, check whether we exceed the vCPU's limit | 779 | * If no match is found, check whether we exceed the vCPU's limit |
777 | * and return the content of the highest valid _standard_ leaf instead. | 780 | * and return the content of the highest valid _standard_ leaf instead. |
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 4452eedfaedd..c3b1ad9fca81 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h | |||
@@ -20,13 +20,19 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, | |||
20 | struct kvm_cpuid_entry2 __user *entries); | 20 | struct kvm_cpuid_entry2 __user *entries); |
21 | void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx); | 21 | void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx); |
22 | 22 | ||
23 | int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu); | ||
24 | |||
25 | static inline int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) | ||
26 | { | ||
27 | return vcpu->arch.maxphyaddr; | ||
28 | } | ||
23 | 29 | ||
24 | static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu) | 30 | static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu) |
25 | { | 31 | { |
26 | struct kvm_cpuid_entry2 *best; | 32 | struct kvm_cpuid_entry2 *best; |
27 | 33 | ||
28 | if (!static_cpu_has(X86_FEATURE_XSAVE)) | 34 | if (!static_cpu_has(X86_FEATURE_XSAVE)) |
29 | return 0; | 35 | return false; |
30 | 36 | ||
31 | best = kvm_find_cpuid_entry(vcpu, 1, 0); | 37 | best = kvm_find_cpuid_entry(vcpu, 1, 0); |
32 | return best && (best->ecx & bit(X86_FEATURE_XSAVE)); | 38 | return best && (best->ecx & bit(X86_FEATURE_XSAVE)); |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 106c01557f2b..630bcb0d7a04 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -248,27 +248,7 @@ struct mode_dual { | |||
248 | struct opcode mode64; | 248 | struct opcode mode64; |
249 | }; | 249 | }; |
250 | 250 | ||
251 | /* EFLAGS bit definitions. */ | ||
252 | #define EFLG_ID (1<<21) | ||
253 | #define EFLG_VIP (1<<20) | ||
254 | #define EFLG_VIF (1<<19) | ||
255 | #define EFLG_AC (1<<18) | ||
256 | #define EFLG_VM (1<<17) | ||
257 | #define EFLG_RF (1<<16) | ||
258 | #define EFLG_IOPL (3<<12) | ||
259 | #define EFLG_NT (1<<14) | ||
260 | #define EFLG_OF (1<<11) | ||
261 | #define EFLG_DF (1<<10) | ||
262 | #define EFLG_IF (1<<9) | ||
263 | #define EFLG_TF (1<<8) | ||
264 | #define EFLG_SF (1<<7) | ||
265 | #define EFLG_ZF (1<<6) | ||
266 | #define EFLG_AF (1<<4) | ||
267 | #define EFLG_PF (1<<2) | ||
268 | #define EFLG_CF (1<<0) | ||
269 | |||
270 | #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a | 251 | #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a |
271 | #define EFLG_RESERVED_ONE_MASK 2 | ||
272 | 252 | ||
273 | enum x86_transfer_type { | 253 | enum x86_transfer_type { |
274 | X86_TRANSFER_NONE, | 254 | X86_TRANSFER_NONE, |
@@ -317,7 +297,8 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt) | |||
317 | * These EFLAGS bits are restored from saved value during emulation, and | 297 | * These EFLAGS bits are restored from saved value during emulation, and |
318 | * any changes are written back to the saved value after emulation. | 298 | * any changes are written back to the saved value after emulation. |
319 | */ | 299 | */ |
320 | #define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF) | 300 | #define EFLAGS_MASK (X86_EFLAGS_OF|X86_EFLAGS_SF|X86_EFLAGS_ZF|X86_EFLAGS_AF|\ |
301 | X86_EFLAGS_PF|X86_EFLAGS_CF) | ||
321 | 302 | ||
322 | #ifdef CONFIG_X86_64 | 303 | #ifdef CONFIG_X86_64 |
323 | #define ON64(x) x | 304 | #define ON64(x) x |
@@ -478,6 +459,25 @@ static void assign_masked(ulong *dest, ulong src, ulong mask) | |||
478 | *dest = (*dest & ~mask) | (src & mask); | 459 | *dest = (*dest & ~mask) | (src & mask); |
479 | } | 460 | } |
480 | 461 | ||
462 | static void assign_register(unsigned long *reg, u64 val, int bytes) | ||
463 | { | ||
464 | /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */ | ||
465 | switch (bytes) { | ||
466 | case 1: | ||
467 | *(u8 *)reg = (u8)val; | ||
468 | break; | ||
469 | case 2: | ||
470 | *(u16 *)reg = (u16)val; | ||
471 | break; | ||
472 | case 4: | ||
473 | *reg = (u32)val; | ||
474 | break; /* 64b: zero-extend */ | ||
475 | case 8: | ||
476 | *reg = val; | ||
477 | break; | ||
478 | } | ||
479 | } | ||
480 | |||
481 | static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt) | 481 | static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt) |
482 | { | 482 | { |
483 | return (1UL << (ctxt->ad_bytes << 3)) - 1; | 483 | return (1UL << (ctxt->ad_bytes << 3)) - 1; |
@@ -943,6 +943,22 @@ FASTOP2(xadd); | |||
943 | 943 | ||
944 | FASTOP2R(cmp, cmp_r); | 944 | FASTOP2R(cmp, cmp_r); |
945 | 945 | ||
946 | static int em_bsf_c(struct x86_emulate_ctxt *ctxt) | ||
947 | { | ||
948 | /* If src is zero, do not writeback, but update flags */ | ||
949 | if (ctxt->src.val == 0) | ||
950 | ctxt->dst.type = OP_NONE; | ||
951 | return fastop(ctxt, em_bsf); | ||
952 | } | ||
953 | |||
954 | static int em_bsr_c(struct x86_emulate_ctxt *ctxt) | ||
955 | { | ||
956 | /* If src is zero, do not writeback, but update flags */ | ||
957 | if (ctxt->src.val == 0) | ||
958 | ctxt->dst.type = OP_NONE; | ||
959 | return fastop(ctxt, em_bsr); | ||
960 | } | ||
961 | |||
946 | static u8 test_cc(unsigned int condition, unsigned long flags) | 962 | static u8 test_cc(unsigned int condition, unsigned long flags) |
947 | { | 963 | { |
948 | u8 rc; | 964 | u8 rc; |
@@ -1399,7 +1415,7 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, | |||
1399 | unsigned int in_page, n; | 1415 | unsigned int in_page, n; |
1400 | unsigned int count = ctxt->rep_prefix ? | 1416 | unsigned int count = ctxt->rep_prefix ? |
1401 | address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1; | 1417 | address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1; |
1402 | in_page = (ctxt->eflags & EFLG_DF) ? | 1418 | in_page = (ctxt->eflags & X86_EFLAGS_DF) ? |
1403 | offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) : | 1419 | offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) : |
1404 | PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)); | 1420 | PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)); |
1405 | n = min3(in_page, (unsigned int)sizeof(rc->data) / size, count); | 1421 | n = min3(in_page, (unsigned int)sizeof(rc->data) / size, count); |
@@ -1412,7 +1428,7 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, | |||
1412 | } | 1428 | } |
1413 | 1429 | ||
1414 | if (ctxt->rep_prefix && (ctxt->d & String) && | 1430 | if (ctxt->rep_prefix && (ctxt->d & String) && |
1415 | !(ctxt->eflags & EFLG_DF)) { | 1431 | !(ctxt->eflags & X86_EFLAGS_DF)) { |
1416 | ctxt->dst.data = rc->data + rc->pos; | 1432 | ctxt->dst.data = rc->data + rc->pos; |
1417 | ctxt->dst.type = OP_MEM_STR; | 1433 | ctxt->dst.type = OP_MEM_STR; |
1418 | ctxt->dst.count = (rc->end - rc->pos) / size; | 1434 | ctxt->dst.count = (rc->end - rc->pos) / size; |
@@ -1691,21 +1707,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1691 | 1707 | ||
1692 | static void write_register_operand(struct operand *op) | 1708 | static void write_register_operand(struct operand *op) |
1693 | { | 1709 | { |
1694 | /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */ | 1710 | return assign_register(op->addr.reg, op->val, op->bytes); |
1695 | switch (op->bytes) { | ||
1696 | case 1: | ||
1697 | *(u8 *)op->addr.reg = (u8)op->val; | ||
1698 | break; | ||
1699 | case 2: | ||
1700 | *(u16 *)op->addr.reg = (u16)op->val; | ||
1701 | break; | ||
1702 | case 4: | ||
1703 | *op->addr.reg = (u32)op->val; | ||
1704 | break; /* 64b: zero-extend */ | ||
1705 | case 8: | ||
1706 | *op->addr.reg = op->val; | ||
1707 | break; | ||
1708 | } | ||
1709 | } | 1711 | } |
1710 | 1712 | ||
1711 | static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op) | 1713 | static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op) |
@@ -1792,32 +1794,34 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, | |||
1792 | { | 1794 | { |
1793 | int rc; | 1795 | int rc; |
1794 | unsigned long val, change_mask; | 1796 | unsigned long val, change_mask; |
1795 | int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; | 1797 | int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT; |
1796 | int cpl = ctxt->ops->cpl(ctxt); | 1798 | int cpl = ctxt->ops->cpl(ctxt); |
1797 | 1799 | ||
1798 | rc = emulate_pop(ctxt, &val, len); | 1800 | rc = emulate_pop(ctxt, &val, len); |
1799 | if (rc != X86EMUL_CONTINUE) | 1801 | if (rc != X86EMUL_CONTINUE) |
1800 | return rc; | 1802 | return rc; |
1801 | 1803 | ||
1802 | change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF | 1804 | change_mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | |
1803 | | EFLG_TF | EFLG_DF | EFLG_NT | EFLG_AC | EFLG_ID; | 1805 | X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF | |
1806 | X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_NT | | ||
1807 | X86_EFLAGS_AC | X86_EFLAGS_ID; | ||
1804 | 1808 | ||
1805 | switch(ctxt->mode) { | 1809 | switch(ctxt->mode) { |
1806 | case X86EMUL_MODE_PROT64: | 1810 | case X86EMUL_MODE_PROT64: |
1807 | case X86EMUL_MODE_PROT32: | 1811 | case X86EMUL_MODE_PROT32: |
1808 | case X86EMUL_MODE_PROT16: | 1812 | case X86EMUL_MODE_PROT16: |
1809 | if (cpl == 0) | 1813 | if (cpl == 0) |
1810 | change_mask |= EFLG_IOPL; | 1814 | change_mask |= X86_EFLAGS_IOPL; |
1811 | if (cpl <= iopl) | 1815 | if (cpl <= iopl) |
1812 | change_mask |= EFLG_IF; | 1816 | change_mask |= X86_EFLAGS_IF; |
1813 | break; | 1817 | break; |
1814 | case X86EMUL_MODE_VM86: | 1818 | case X86EMUL_MODE_VM86: |
1815 | if (iopl < 3) | 1819 | if (iopl < 3) |
1816 | return emulate_gp(ctxt, 0); | 1820 | return emulate_gp(ctxt, 0); |
1817 | change_mask |= EFLG_IF; | 1821 | change_mask |= X86_EFLAGS_IF; |
1818 | break; | 1822 | break; |
1819 | default: /* real mode */ | 1823 | default: /* real mode */ |
1820 | change_mask |= (EFLG_IOPL | EFLG_IF); | 1824 | change_mask |= (X86_EFLAGS_IOPL | X86_EFLAGS_IF); |
1821 | break; | 1825 | break; |
1822 | } | 1826 | } |
1823 | 1827 | ||
@@ -1918,7 +1922,7 @@ static int em_pusha(struct x86_emulate_ctxt *ctxt) | |||
1918 | 1922 | ||
1919 | static int em_pushf(struct x86_emulate_ctxt *ctxt) | 1923 | static int em_pushf(struct x86_emulate_ctxt *ctxt) |
1920 | { | 1924 | { |
1921 | ctxt->src.val = (unsigned long)ctxt->eflags & ~EFLG_VM; | 1925 | ctxt->src.val = (unsigned long)ctxt->eflags & ~X86_EFLAGS_VM; |
1922 | return em_push(ctxt); | 1926 | return em_push(ctxt); |
1923 | } | 1927 | } |
1924 | 1928 | ||
@@ -1926,6 +1930,7 @@ static int em_popa(struct x86_emulate_ctxt *ctxt) | |||
1926 | { | 1930 | { |
1927 | int rc = X86EMUL_CONTINUE; | 1931 | int rc = X86EMUL_CONTINUE; |
1928 | int reg = VCPU_REGS_RDI; | 1932 | int reg = VCPU_REGS_RDI; |
1933 | u32 val; | ||
1929 | 1934 | ||
1930 | while (reg >= VCPU_REGS_RAX) { | 1935 | while (reg >= VCPU_REGS_RAX) { |
1931 | if (reg == VCPU_REGS_RSP) { | 1936 | if (reg == VCPU_REGS_RSP) { |
@@ -1933,9 +1938,10 @@ static int em_popa(struct x86_emulate_ctxt *ctxt) | |||
1933 | --reg; | 1938 | --reg; |
1934 | } | 1939 | } |
1935 | 1940 | ||
1936 | rc = emulate_pop(ctxt, reg_rmw(ctxt, reg), ctxt->op_bytes); | 1941 | rc = emulate_pop(ctxt, &val, ctxt->op_bytes); |
1937 | if (rc != X86EMUL_CONTINUE) | 1942 | if (rc != X86EMUL_CONTINUE) |
1938 | break; | 1943 | break; |
1944 | assign_register(reg_rmw(ctxt, reg), val, ctxt->op_bytes); | ||
1939 | --reg; | 1945 | --reg; |
1940 | } | 1946 | } |
1941 | return rc; | 1947 | return rc; |
@@ -1956,7 +1962,7 @@ static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq) | |||
1956 | if (rc != X86EMUL_CONTINUE) | 1962 | if (rc != X86EMUL_CONTINUE) |
1957 | return rc; | 1963 | return rc; |
1958 | 1964 | ||
1959 | ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC); | 1965 | ctxt->eflags &= ~(X86_EFLAGS_IF | X86_EFLAGS_TF | X86_EFLAGS_AC); |
1960 | 1966 | ||
1961 | ctxt->src.val = get_segment_selector(ctxt, VCPU_SREG_CS); | 1967 | ctxt->src.val = get_segment_selector(ctxt, VCPU_SREG_CS); |
1962 | rc = em_push(ctxt); | 1968 | rc = em_push(ctxt); |
@@ -2022,10 +2028,14 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt) | |||
2022 | unsigned long temp_eip = 0; | 2028 | unsigned long temp_eip = 0; |
2023 | unsigned long temp_eflags = 0; | 2029 | unsigned long temp_eflags = 0; |
2024 | unsigned long cs = 0; | 2030 | unsigned long cs = 0; |
2025 | unsigned long mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_TF | | 2031 | unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | |
2026 | EFLG_IF | EFLG_DF | EFLG_OF | EFLG_IOPL | EFLG_NT | EFLG_RF | | 2032 | X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_TF | |
2027 | EFLG_AC | EFLG_ID | (1 << 1); /* Last one is the reserved bit */ | 2033 | X86_EFLAGS_IF | X86_EFLAGS_DF | X86_EFLAGS_OF | |
2028 | unsigned long vm86_mask = EFLG_VM | EFLG_VIF | EFLG_VIP; | 2034 | X86_EFLAGS_IOPL | X86_EFLAGS_NT | X86_EFLAGS_RF | |
2035 | X86_EFLAGS_AC | X86_EFLAGS_ID | | ||
2036 | X86_EFLAGS_FIXED; | ||
2037 | unsigned long vm86_mask = X86_EFLAGS_VM | X86_EFLAGS_VIF | | ||
2038 | X86_EFLAGS_VIP; | ||
2029 | 2039 | ||
2030 | /* TODO: Add stack limit check */ | 2040 | /* TODO: Add stack limit check */ |
2031 | 2041 | ||
@@ -2054,7 +2064,6 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt) | |||
2054 | 2064 | ||
2055 | ctxt->_eip = temp_eip; | 2065 | ctxt->_eip = temp_eip; |
2056 | 2066 | ||
2057 | |||
2058 | if (ctxt->op_bytes == 4) | 2067 | if (ctxt->op_bytes == 4) |
2059 | ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask)); | 2068 | ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask)); |
2060 | else if (ctxt->op_bytes == 2) { | 2069 | else if (ctxt->op_bytes == 2) { |
@@ -2063,7 +2072,7 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt) | |||
2063 | } | 2072 | } |
2064 | 2073 | ||
2065 | ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */ | 2074 | ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */ |
2066 | ctxt->eflags |= EFLG_RESERVED_ONE_MASK; | 2075 | ctxt->eflags |= X86_EFLAGS_FIXED; |
2067 | ctxt->ops->set_nmi_mask(ctxt, false); | 2076 | ctxt->ops->set_nmi_mask(ctxt, false); |
2068 | 2077 | ||
2069 | return rc; | 2078 | return rc; |
@@ -2145,12 +2154,12 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt) | |||
2145 | ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) { | 2154 | ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) { |
2146 | *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0); | 2155 | *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0); |
2147 | *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32); | 2156 | *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32); |
2148 | ctxt->eflags &= ~EFLG_ZF; | 2157 | ctxt->eflags &= ~X86_EFLAGS_ZF; |
2149 | } else { | 2158 | } else { |
2150 | ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) | | 2159 | ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) | |
2151 | (u32) reg_read(ctxt, VCPU_REGS_RBX); | 2160 | (u32) reg_read(ctxt, VCPU_REGS_RBX); |
2152 | 2161 | ||
2153 | ctxt->eflags |= EFLG_ZF; | 2162 | ctxt->eflags |= X86_EFLAGS_ZF; |
2154 | } | 2163 | } |
2155 | return X86EMUL_CONTINUE; | 2164 | return X86EMUL_CONTINUE; |
2156 | } | 2165 | } |
@@ -2222,7 +2231,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) | |||
2222 | ctxt->src.val = ctxt->dst.orig_val; | 2231 | ctxt->src.val = ctxt->dst.orig_val; |
2223 | fastop(ctxt, em_cmp); | 2232 | fastop(ctxt, em_cmp); |
2224 | 2233 | ||
2225 | if (ctxt->eflags & EFLG_ZF) { | 2234 | if (ctxt->eflags & X86_EFLAGS_ZF) { |
2226 | /* Success: write back to memory; no update of EAX */ | 2235 | /* Success: write back to memory; no update of EAX */ |
2227 | ctxt->src.type = OP_NONE; | 2236 | ctxt->src.type = OP_NONE; |
2228 | ctxt->dst.val = ctxt->src.orig_val; | 2237 | ctxt->dst.val = ctxt->src.orig_val; |
@@ -2381,14 +2390,14 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt) | |||
2381 | 2390 | ||
2382 | ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data); | 2391 | ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data); |
2383 | ctxt->eflags &= ~msr_data; | 2392 | ctxt->eflags &= ~msr_data; |
2384 | ctxt->eflags |= EFLG_RESERVED_ONE_MASK; | 2393 | ctxt->eflags |= X86_EFLAGS_FIXED; |
2385 | #endif | 2394 | #endif |
2386 | } else { | 2395 | } else { |
2387 | /* legacy mode */ | 2396 | /* legacy mode */ |
2388 | ops->get_msr(ctxt, MSR_STAR, &msr_data); | 2397 | ops->get_msr(ctxt, MSR_STAR, &msr_data); |
2389 | ctxt->_eip = (u32)msr_data; | 2398 | ctxt->_eip = (u32)msr_data; |
2390 | 2399 | ||
2391 | ctxt->eflags &= ~(EFLG_VM | EFLG_IF); | 2400 | ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF); |
2392 | } | 2401 | } |
2393 | 2402 | ||
2394 | return X86EMUL_CONTINUE; | 2403 | return X86EMUL_CONTINUE; |
@@ -2425,8 +2434,8 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt) | |||
2425 | if ((msr_data & 0xfffc) == 0x0) | 2434 | if ((msr_data & 0xfffc) == 0x0) |
2426 | return emulate_gp(ctxt, 0); | 2435 | return emulate_gp(ctxt, 0); |
2427 | 2436 | ||
2428 | ctxt->eflags &= ~(EFLG_VM | EFLG_IF); | 2437 | ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF); |
2429 | cs_sel = (u16)msr_data & ~SELECTOR_RPL_MASK; | 2438 | cs_sel = (u16)msr_data & ~SEGMENT_RPL_MASK; |
2430 | ss_sel = cs_sel + 8; | 2439 | ss_sel = cs_sel + 8; |
2431 | if (efer & EFER_LMA) { | 2440 | if (efer & EFER_LMA) { |
2432 | cs.d = 0; | 2441 | cs.d = 0; |
@@ -2493,8 +2502,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) | |||
2493 | return emulate_gp(ctxt, 0); | 2502 | return emulate_gp(ctxt, 0); |
2494 | break; | 2503 | break; |
2495 | } | 2504 | } |
2496 | cs_sel |= SELECTOR_RPL_MASK; | 2505 | cs_sel |= SEGMENT_RPL_MASK; |
2497 | ss_sel |= SELECTOR_RPL_MASK; | 2506 | ss_sel |= SEGMENT_RPL_MASK; |
2498 | 2507 | ||
2499 | ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); | 2508 | ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); |
2500 | ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); | 2509 | ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); |
@@ -2512,7 +2521,7 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt) | |||
2512 | return false; | 2521 | return false; |
2513 | if (ctxt->mode == X86EMUL_MODE_VM86) | 2522 | if (ctxt->mode == X86EMUL_MODE_VM86) |
2514 | return true; | 2523 | return true; |
2515 | iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; | 2524 | iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT; |
2516 | return ctxt->ops->cpl(ctxt) > iopl; | 2525 | return ctxt->ops->cpl(ctxt) > iopl; |
2517 | } | 2526 | } |
2518 | 2527 | ||
@@ -2782,10 +2791,8 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, | |||
2782 | return ret; | 2791 | return ret; |
2783 | ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl, | 2792 | ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl, |
2784 | X86_TRANSFER_TASK_SWITCH, NULL); | 2793 | X86_TRANSFER_TASK_SWITCH, NULL); |
2785 | if (ret != X86EMUL_CONTINUE) | ||
2786 | return ret; | ||
2787 | 2794 | ||
2788 | return X86EMUL_CONTINUE; | 2795 | return ret; |
2789 | } | 2796 | } |
2790 | 2797 | ||
2791 | static int task_switch_32(struct x86_emulate_ctxt *ctxt, | 2798 | static int task_switch_32(struct x86_emulate_ctxt *ctxt, |
@@ -2954,7 +2961,7 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2954 | static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg, | 2961 | static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg, |
2955 | struct operand *op) | 2962 | struct operand *op) |
2956 | { | 2963 | { |
2957 | int df = (ctxt->eflags & EFLG_DF) ? -op->count : op->count; | 2964 | int df = (ctxt->eflags & X86_EFLAGS_DF) ? -op->count : op->count; |
2958 | 2965 | ||
2959 | register_address_increment(ctxt, reg, df * op->bytes); | 2966 | register_address_increment(ctxt, reg, df * op->bytes); |
2960 | op->addr.mem.ea = register_address(ctxt, reg); | 2967 | op->addr.mem.ea = register_address(ctxt, reg); |
@@ -3323,7 +3330,7 @@ static int em_clts(struct x86_emulate_ctxt *ctxt) | |||
3323 | return X86EMUL_CONTINUE; | 3330 | return X86EMUL_CONTINUE; |
3324 | } | 3331 | } |
3325 | 3332 | ||
3326 | static int em_vmcall(struct x86_emulate_ctxt *ctxt) | 3333 | static int em_hypercall(struct x86_emulate_ctxt *ctxt) |
3327 | { | 3334 | { |
3328 | int rc = ctxt->ops->fix_hypercall(ctxt); | 3335 | int rc = ctxt->ops->fix_hypercall(ctxt); |
3329 | 3336 | ||
@@ -3395,17 +3402,6 @@ static int em_lgdt(struct x86_emulate_ctxt *ctxt) | |||
3395 | return em_lgdt_lidt(ctxt, true); | 3402 | return em_lgdt_lidt(ctxt, true); |
3396 | } | 3403 | } |
3397 | 3404 | ||
3398 | static int em_vmmcall(struct x86_emulate_ctxt *ctxt) | ||
3399 | { | ||
3400 | int rc; | ||
3401 | |||
3402 | rc = ctxt->ops->fix_hypercall(ctxt); | ||
3403 | |||
3404 | /* Disable writeback. */ | ||
3405 | ctxt->dst.type = OP_NONE; | ||
3406 | return rc; | ||
3407 | } | ||
3408 | |||
3409 | static int em_lidt(struct x86_emulate_ctxt *ctxt) | 3405 | static int em_lidt(struct x86_emulate_ctxt *ctxt) |
3410 | { | 3406 | { |
3411 | return em_lgdt_lidt(ctxt, false); | 3407 | return em_lgdt_lidt(ctxt, false); |
@@ -3504,7 +3500,8 @@ static int em_sahf(struct x86_emulate_ctxt *ctxt) | |||
3504 | { | 3500 | { |
3505 | u32 flags; | 3501 | u32 flags; |
3506 | 3502 | ||
3507 | flags = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF; | 3503 | flags = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF | |
3504 | X86_EFLAGS_SF; | ||
3508 | flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8; | 3505 | flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8; |
3509 | 3506 | ||
3510 | ctxt->eflags &= ~0xffUL; | 3507 | ctxt->eflags &= ~0xffUL; |
@@ -3769,7 +3766,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) | |||
3769 | 3766 | ||
3770 | static const struct opcode group7_rm0[] = { | 3767 | static const struct opcode group7_rm0[] = { |
3771 | N, | 3768 | N, |
3772 | I(SrcNone | Priv | EmulateOnUD, em_vmcall), | 3769 | I(SrcNone | Priv | EmulateOnUD, em_hypercall), |
3773 | N, N, N, N, N, N, | 3770 | N, N, N, N, N, N, |
3774 | }; | 3771 | }; |
3775 | 3772 | ||
@@ -3781,7 +3778,7 @@ static const struct opcode group7_rm1[] = { | |||
3781 | 3778 | ||
3782 | static const struct opcode group7_rm3[] = { | 3779 | static const struct opcode group7_rm3[] = { |
3783 | DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa), | 3780 | DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa), |
3784 | II(SrcNone | Prot | EmulateOnUD, em_vmmcall, vmmcall), | 3781 | II(SrcNone | Prot | EmulateOnUD, em_hypercall, vmmcall), |
3785 | DIP(SrcNone | Prot | Priv, vmload, check_svme_pa), | 3782 | DIP(SrcNone | Prot | Priv, vmload, check_svme_pa), |
3786 | DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa), | 3783 | DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa), |
3787 | DIP(SrcNone | Prot | Priv, stgi, check_svme), | 3784 | DIP(SrcNone | Prot | Priv, stgi, check_svme), |
@@ -4192,7 +4189,8 @@ static const struct opcode twobyte_table[256] = { | |||
4192 | N, N, | 4189 | N, N, |
4193 | G(BitOp, group8), | 4190 | G(BitOp, group8), |
4194 | F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), | 4191 | F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), |
4195 | F(DstReg | SrcMem | ModRM, em_bsf), F(DstReg | SrcMem | ModRM, em_bsr), | 4192 | I(DstReg | SrcMem | ModRM, em_bsf_c), |
4193 | I(DstReg | SrcMem | ModRM, em_bsr_c), | ||
4196 | D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), | 4194 | D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), |
4197 | /* 0xC0 - 0xC7 */ | 4195 | /* 0xC0 - 0xC7 */ |
4198 | F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd), | 4196 | F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd), |
@@ -4759,9 +4757,9 @@ static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) | |||
4759 | if (((ctxt->b == 0xa6) || (ctxt->b == 0xa7) || | 4757 | if (((ctxt->b == 0xa6) || (ctxt->b == 0xa7) || |
4760 | (ctxt->b == 0xae) || (ctxt->b == 0xaf)) | 4758 | (ctxt->b == 0xae) || (ctxt->b == 0xaf)) |
4761 | && (((ctxt->rep_prefix == REPE_PREFIX) && | 4759 | && (((ctxt->rep_prefix == REPE_PREFIX) && |
4762 | ((ctxt->eflags & EFLG_ZF) == 0)) | 4760 | ((ctxt->eflags & X86_EFLAGS_ZF) == 0)) |
4763 | || ((ctxt->rep_prefix == REPNE_PREFIX) && | 4761 | || ((ctxt->rep_prefix == REPNE_PREFIX) && |
4764 | ((ctxt->eflags & EFLG_ZF) == EFLG_ZF)))) | 4762 | ((ctxt->eflags & X86_EFLAGS_ZF) == X86_EFLAGS_ZF)))) |
4765 | return true; | 4763 | return true; |
4766 | 4764 | ||
4767 | return false; | 4765 | return false; |
@@ -4913,7 +4911,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | |||
4913 | /* All REP prefixes have the same first termination condition */ | 4911 | /* All REP prefixes have the same first termination condition */ |
4914 | if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) { | 4912 | if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) { |
4915 | ctxt->eip = ctxt->_eip; | 4913 | ctxt->eip = ctxt->_eip; |
4916 | ctxt->eflags &= ~EFLG_RF; | 4914 | ctxt->eflags &= ~X86_EFLAGS_RF; |
4917 | goto done; | 4915 | goto done; |
4918 | } | 4916 | } |
4919 | } | 4917 | } |
@@ -4963,9 +4961,9 @@ special_insn: | |||
4963 | } | 4961 | } |
4964 | 4962 | ||
4965 | if (ctxt->rep_prefix && (ctxt->d & String)) | 4963 | if (ctxt->rep_prefix && (ctxt->d & String)) |
4966 | ctxt->eflags |= EFLG_RF; | 4964 | ctxt->eflags |= X86_EFLAGS_RF; |
4967 | else | 4965 | else |
4968 | ctxt->eflags &= ~EFLG_RF; | 4966 | ctxt->eflags &= ~X86_EFLAGS_RF; |
4969 | 4967 | ||
4970 | if (ctxt->execute) { | 4968 | if (ctxt->execute) { |
4971 | if (ctxt->d & Fastop) { | 4969 | if (ctxt->d & Fastop) { |
@@ -5014,7 +5012,7 @@ special_insn: | |||
5014 | rc = emulate_int(ctxt, ctxt->src.val); | 5012 | rc = emulate_int(ctxt, ctxt->src.val); |
5015 | break; | 5013 | break; |
5016 | case 0xce: /* into */ | 5014 | case 0xce: /* into */ |
5017 | if (ctxt->eflags & EFLG_OF) | 5015 | if (ctxt->eflags & X86_EFLAGS_OF) |
5018 | rc = emulate_int(ctxt, 4); | 5016 | rc = emulate_int(ctxt, 4); |
5019 | break; | 5017 | break; |
5020 | case 0xe9: /* jmp rel */ | 5018 | case 0xe9: /* jmp rel */ |
@@ -5027,19 +5025,19 @@ special_insn: | |||
5027 | break; | 5025 | break; |
5028 | case 0xf5: /* cmc */ | 5026 | case 0xf5: /* cmc */ |
5029 | /* complement carry flag from eflags reg */ | 5027 | /* complement carry flag from eflags reg */ |
5030 | ctxt->eflags ^= EFLG_CF; | 5028 | ctxt->eflags ^= X86_EFLAGS_CF; |
5031 | break; | 5029 | break; |
5032 | case 0xf8: /* clc */ | 5030 | case 0xf8: /* clc */ |
5033 | ctxt->eflags &= ~EFLG_CF; | 5031 | ctxt->eflags &= ~X86_EFLAGS_CF; |
5034 | break; | 5032 | break; |
5035 | case 0xf9: /* stc */ | 5033 | case 0xf9: /* stc */ |
5036 | ctxt->eflags |= EFLG_CF; | 5034 | ctxt->eflags |= X86_EFLAGS_CF; |
5037 | break; | 5035 | break; |
5038 | case 0xfc: /* cld */ | 5036 | case 0xfc: /* cld */ |
5039 | ctxt->eflags &= ~EFLG_DF; | 5037 | ctxt->eflags &= ~X86_EFLAGS_DF; |
5040 | break; | 5038 | break; |
5041 | case 0xfd: /* std */ | 5039 | case 0xfd: /* std */ |
5042 | ctxt->eflags |= EFLG_DF; | 5040 | ctxt->eflags |= X86_EFLAGS_DF; |
5043 | break; | 5041 | break; |
5044 | default: | 5042 | default: |
5045 | goto cannot_emulate; | 5043 | goto cannot_emulate; |
@@ -5100,7 +5098,7 @@ writeback: | |||
5100 | } | 5098 | } |
5101 | goto done; /* skip rip writeback */ | 5099 | goto done; /* skip rip writeback */ |
5102 | } | 5100 | } |
5103 | ctxt->eflags &= ~EFLG_RF; | 5101 | ctxt->eflags &= ~X86_EFLAGS_RF; |
5104 | } | 5102 | } |
5105 | 5103 | ||
5106 | ctxt->eip = ctxt->_eip; | 5104 | ctxt->eip = ctxt->_eip; |
@@ -5137,8 +5135,7 @@ twobyte_insn: | |||
5137 | case 0x40 ... 0x4f: /* cmov */ | 5135 | case 0x40 ... 0x4f: /* cmov */ |
5138 | if (test_cc(ctxt->b, ctxt->eflags)) | 5136 | if (test_cc(ctxt->b, ctxt->eflags)) |
5139 | ctxt->dst.val = ctxt->src.val; | 5137 | ctxt->dst.val = ctxt->src.val; |
5140 | else if (ctxt->mode != X86EMUL_MODE_PROT64 || | 5138 | else if (ctxt->op_bytes != 4) |
5141 | ctxt->op_bytes != 4) | ||
5142 | ctxt->dst.type = OP_NONE; /* no writeback */ | 5139 | ctxt->dst.type = OP_NONE; /* no writeback */ |
5143 | break; | 5140 | break; |
5144 | case 0x80 ... 0x8f: /* jnz rel, etc*/ | 5141 | case 0x80 ... 0x8f: /* jnz rel, etc*/ |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 298781d4cfb4..4dce6f8b6129 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
@@ -443,7 +443,8 @@ static inline int pit_in_range(gpa_t addr) | |||
443 | (addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH)); | 443 | (addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH)); |
444 | } | 444 | } |
445 | 445 | ||
446 | static int pit_ioport_write(struct kvm_io_device *this, | 446 | static int pit_ioport_write(struct kvm_vcpu *vcpu, |
447 | struct kvm_io_device *this, | ||
447 | gpa_t addr, int len, const void *data) | 448 | gpa_t addr, int len, const void *data) |
448 | { | 449 | { |
449 | struct kvm_pit *pit = dev_to_pit(this); | 450 | struct kvm_pit *pit = dev_to_pit(this); |
@@ -519,7 +520,8 @@ static int pit_ioport_write(struct kvm_io_device *this, | |||
519 | return 0; | 520 | return 0; |
520 | } | 521 | } |
521 | 522 | ||
522 | static int pit_ioport_read(struct kvm_io_device *this, | 523 | static int pit_ioport_read(struct kvm_vcpu *vcpu, |
524 | struct kvm_io_device *this, | ||
523 | gpa_t addr, int len, void *data) | 525 | gpa_t addr, int len, void *data) |
524 | { | 526 | { |
525 | struct kvm_pit *pit = dev_to_pit(this); | 527 | struct kvm_pit *pit = dev_to_pit(this); |
@@ -589,7 +591,8 @@ static int pit_ioport_read(struct kvm_io_device *this, | |||
589 | return 0; | 591 | return 0; |
590 | } | 592 | } |
591 | 593 | ||
592 | static int speaker_ioport_write(struct kvm_io_device *this, | 594 | static int speaker_ioport_write(struct kvm_vcpu *vcpu, |
595 | struct kvm_io_device *this, | ||
593 | gpa_t addr, int len, const void *data) | 596 | gpa_t addr, int len, const void *data) |
594 | { | 597 | { |
595 | struct kvm_pit *pit = speaker_to_pit(this); | 598 | struct kvm_pit *pit = speaker_to_pit(this); |
@@ -606,8 +609,9 @@ static int speaker_ioport_write(struct kvm_io_device *this, | |||
606 | return 0; | 609 | return 0; |
607 | } | 610 | } |
608 | 611 | ||
609 | static int speaker_ioport_read(struct kvm_io_device *this, | 612 | static int speaker_ioport_read(struct kvm_vcpu *vcpu, |
610 | gpa_t addr, int len, void *data) | 613 | struct kvm_io_device *this, |
614 | gpa_t addr, int len, void *data) | ||
611 | { | 615 | { |
612 | struct kvm_pit *pit = speaker_to_pit(this); | 616 | struct kvm_pit *pit = speaker_to_pit(this); |
613 | struct kvm_kpit_state *pit_state = &pit->pit_state; | 617 | struct kvm_kpit_state *pit_state = &pit->pit_state; |
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index dd1b16b611b0..c84990b42b5b 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h | |||
@@ -3,7 +3,7 @@ | |||
3 | 3 | ||
4 | #include <linux/kthread.h> | 4 | #include <linux/kthread.h> |
5 | 5 | ||
6 | #include "iodev.h" | 6 | #include <kvm/iodev.h> |
7 | 7 | ||
8 | struct kvm_kpit_channel_state { | 8 | struct kvm_kpit_channel_state { |
9 | u32 count; /* can be 65536 */ | 9 | u32 count; /* can be 65536 */ |
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 9541ba34126b..fef922ff2635 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
@@ -529,42 +529,42 @@ static int picdev_read(struct kvm_pic *s, | |||
529 | return 0; | 529 | return 0; |
530 | } | 530 | } |
531 | 531 | ||
532 | static int picdev_master_write(struct kvm_io_device *dev, | 532 | static int picdev_master_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, |
533 | gpa_t addr, int len, const void *val) | 533 | gpa_t addr, int len, const void *val) |
534 | { | 534 | { |
535 | return picdev_write(container_of(dev, struct kvm_pic, dev_master), | 535 | return picdev_write(container_of(dev, struct kvm_pic, dev_master), |
536 | addr, len, val); | 536 | addr, len, val); |
537 | } | 537 | } |
538 | 538 | ||
539 | static int picdev_master_read(struct kvm_io_device *dev, | 539 | static int picdev_master_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, |
540 | gpa_t addr, int len, void *val) | 540 | gpa_t addr, int len, void *val) |
541 | { | 541 | { |
542 | return picdev_read(container_of(dev, struct kvm_pic, dev_master), | 542 | return picdev_read(container_of(dev, struct kvm_pic, dev_master), |
543 | addr, len, val); | 543 | addr, len, val); |
544 | } | 544 | } |
545 | 545 | ||
546 | static int picdev_slave_write(struct kvm_io_device *dev, | 546 | static int picdev_slave_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, |
547 | gpa_t addr, int len, const void *val) | 547 | gpa_t addr, int len, const void *val) |
548 | { | 548 | { |
549 | return picdev_write(container_of(dev, struct kvm_pic, dev_slave), | 549 | return picdev_write(container_of(dev, struct kvm_pic, dev_slave), |
550 | addr, len, val); | 550 | addr, len, val); |
551 | } | 551 | } |
552 | 552 | ||
553 | static int picdev_slave_read(struct kvm_io_device *dev, | 553 | static int picdev_slave_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, |
554 | gpa_t addr, int len, void *val) | 554 | gpa_t addr, int len, void *val) |
555 | { | 555 | { |
556 | return picdev_read(container_of(dev, struct kvm_pic, dev_slave), | 556 | return picdev_read(container_of(dev, struct kvm_pic, dev_slave), |
557 | addr, len, val); | 557 | addr, len, val); |
558 | } | 558 | } |
559 | 559 | ||
560 | static int picdev_eclr_write(struct kvm_io_device *dev, | 560 | static int picdev_eclr_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, |
561 | gpa_t addr, int len, const void *val) | 561 | gpa_t addr, int len, const void *val) |
562 | { | 562 | { |
563 | return picdev_write(container_of(dev, struct kvm_pic, dev_eclr), | 563 | return picdev_write(container_of(dev, struct kvm_pic, dev_eclr), |
564 | addr, len, val); | 564 | addr, len, val); |
565 | } | 565 | } |
566 | 566 | ||
567 | static int picdev_eclr_read(struct kvm_io_device *dev, | 567 | static int picdev_eclr_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, |
568 | gpa_t addr, int len, void *val) | 568 | gpa_t addr, int len, void *val) |
569 | { | 569 | { |
570 | return picdev_read(container_of(dev, struct kvm_pic, dev_eclr), | 570 | return picdev_read(container_of(dev, struct kvm_pic, dev_eclr), |
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 46d4449772bc..28146f03c514 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c | |||
@@ -206,6 +206,8 @@ static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq, | |||
206 | 206 | ||
207 | old_irr = ioapic->irr; | 207 | old_irr = ioapic->irr; |
208 | ioapic->irr |= mask; | 208 | ioapic->irr |= mask; |
209 | if (edge) | ||
210 | ioapic->irr_delivered &= ~mask; | ||
209 | if ((edge && old_irr == ioapic->irr) || | 211 | if ((edge && old_irr == ioapic->irr) || |
210 | (!edge && entry.fields.remote_irr)) { | 212 | (!edge && entry.fields.remote_irr)) { |
211 | ret = 0; | 213 | ret = 0; |
@@ -349,7 +351,7 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status) | |||
349 | irqe.shorthand = 0; | 351 | irqe.shorthand = 0; |
350 | 352 | ||
351 | if (irqe.trig_mode == IOAPIC_EDGE_TRIG) | 353 | if (irqe.trig_mode == IOAPIC_EDGE_TRIG) |
352 | ioapic->irr &= ~(1 << irq); | 354 | ioapic->irr_delivered |= 1 << irq; |
353 | 355 | ||
354 | if (irq == RTC_GSI && line_status) { | 356 | if (irq == RTC_GSI && line_status) { |
355 | /* | 357 | /* |
@@ -473,13 +475,6 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, | |||
473 | } | 475 | } |
474 | } | 476 | } |
475 | 477 | ||
476 | bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector) | ||
477 | { | ||
478 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; | ||
479 | smp_rmb(); | ||
480 | return test_bit(vector, ioapic->handled_vectors); | ||
481 | } | ||
482 | |||
483 | void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode) | 478 | void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode) |
484 | { | 479 | { |
485 | struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; | 480 | struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; |
@@ -500,8 +495,8 @@ static inline int ioapic_in_range(struct kvm_ioapic *ioapic, gpa_t addr) | |||
500 | (addr < ioapic->base_address + IOAPIC_MEM_LENGTH))); | 495 | (addr < ioapic->base_address + IOAPIC_MEM_LENGTH))); |
501 | } | 496 | } |
502 | 497 | ||
503 | static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, | 498 | static int ioapic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, |
504 | void *val) | 499 | gpa_t addr, int len, void *val) |
505 | { | 500 | { |
506 | struct kvm_ioapic *ioapic = to_ioapic(this); | 501 | struct kvm_ioapic *ioapic = to_ioapic(this); |
507 | u32 result; | 502 | u32 result; |
@@ -543,8 +538,8 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, | |||
543 | return 0; | 538 | return 0; |
544 | } | 539 | } |
545 | 540 | ||
546 | static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, | 541 | static int ioapic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, |
547 | const void *val) | 542 | gpa_t addr, int len, const void *val) |
548 | { | 543 | { |
549 | struct kvm_ioapic *ioapic = to_ioapic(this); | 544 | struct kvm_ioapic *ioapic = to_ioapic(this); |
550 | u32 data; | 545 | u32 data; |
@@ -599,6 +594,7 @@ static void kvm_ioapic_reset(struct kvm_ioapic *ioapic) | |||
599 | ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS; | 594 | ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS; |
600 | ioapic->ioregsel = 0; | 595 | ioapic->ioregsel = 0; |
601 | ioapic->irr = 0; | 596 | ioapic->irr = 0; |
597 | ioapic->irr_delivered = 0; | ||
602 | ioapic->id = 0; | 598 | ioapic->id = 0; |
603 | memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS); | 599 | memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS); |
604 | rtc_irq_eoi_tracking_reset(ioapic); | 600 | rtc_irq_eoi_tracking_reset(ioapic); |
@@ -656,6 +652,7 @@ int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) | |||
656 | 652 | ||
657 | spin_lock(&ioapic->lock); | 653 | spin_lock(&ioapic->lock); |
658 | memcpy(state, ioapic, sizeof(struct kvm_ioapic_state)); | 654 | memcpy(state, ioapic, sizeof(struct kvm_ioapic_state)); |
655 | state->irr &= ~ioapic->irr_delivered; | ||
659 | spin_unlock(&ioapic->lock); | 656 | spin_unlock(&ioapic->lock); |
660 | return 0; | 657 | return 0; |
661 | } | 658 | } |
@@ -669,6 +666,7 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) | |||
669 | spin_lock(&ioapic->lock); | 666 | spin_lock(&ioapic->lock); |
670 | memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); | 667 | memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); |
671 | ioapic->irr = 0; | 668 | ioapic->irr = 0; |
669 | ioapic->irr_delivered = 0; | ||
672 | update_handled_vectors(ioapic); | 670 | update_handled_vectors(ioapic); |
673 | kvm_vcpu_request_scan_ioapic(kvm); | 671 | kvm_vcpu_request_scan_ioapic(kvm); |
674 | kvm_ioapic_inject_all(ioapic, state->irr); | 672 | kvm_ioapic_inject_all(ioapic, state->irr); |
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index c2e36d934af4..ca0b0b4e6256 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h | |||
@@ -3,7 +3,7 @@ | |||
3 | 3 | ||
4 | #include <linux/kvm_host.h> | 4 | #include <linux/kvm_host.h> |
5 | 5 | ||
6 | #include "iodev.h" | 6 | #include <kvm/iodev.h> |
7 | 7 | ||
8 | struct kvm; | 8 | struct kvm; |
9 | struct kvm_vcpu; | 9 | struct kvm_vcpu; |
@@ -77,6 +77,7 @@ struct kvm_ioapic { | |||
77 | struct rtc_status rtc_status; | 77 | struct rtc_status rtc_status; |
78 | struct delayed_work eoi_inject; | 78 | struct delayed_work eoi_inject; |
79 | u32 irq_eoi[IOAPIC_NUM_PINS]; | 79 | u32 irq_eoi[IOAPIC_NUM_PINS]; |
80 | u32 irr_delivered; | ||
80 | }; | 81 | }; |
81 | 82 | ||
82 | #ifdef DEBUG | 83 | #ifdef DEBUG |
@@ -97,13 +98,19 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm) | |||
97 | return kvm->arch.vioapic; | 98 | return kvm->arch.vioapic; |
98 | } | 99 | } |
99 | 100 | ||
101 | static inline bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector) | ||
102 | { | ||
103 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; | ||
104 | smp_rmb(); | ||
105 | return test_bit(vector, ioapic->handled_vectors); | ||
106 | } | ||
107 | |||
100 | void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu); | 108 | void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu); |
101 | bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, | 109 | bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, |
102 | int short_hand, unsigned int dest, int dest_mode); | 110 | int short_hand, unsigned int dest, int dest_mode); |
103 | int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); | 111 | int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); |
104 | void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, | 112 | void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, |
105 | int trigger_mode); | 113 | int trigger_mode); |
106 | bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector); | ||
107 | int kvm_ioapic_init(struct kvm *kvm); | 114 | int kvm_ioapic_init(struct kvm *kvm); |
108 | void kvm_ioapic_destroy(struct kvm *kvm); | 115 | void kvm_ioapic_destroy(struct kvm *kvm); |
109 | int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, | 116 | int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, |
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 2d03568e9498..ad68c73008c5 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h | |||
@@ -27,7 +27,7 @@ | |||
27 | #include <linux/kvm_host.h> | 27 | #include <linux/kvm_host.h> |
28 | #include <linux/spinlock.h> | 28 | #include <linux/spinlock.h> |
29 | 29 | ||
30 | #include "iodev.h" | 30 | #include <kvm/iodev.h> |
31 | #include "ioapic.h" | 31 | #include "ioapic.h" |
32 | #include "lapic.h" | 32 | #include "lapic.h" |
33 | 33 | ||
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 4ee827d7bf36..d67206a7b99a 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -133,6 +133,28 @@ static inline int kvm_apic_id(struct kvm_lapic *apic) | |||
133 | return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; | 133 | return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; |
134 | } | 134 | } |
135 | 135 | ||
136 | /* The logical map is definitely wrong if we have multiple | ||
137 | * modes at the same time. (Physical map is always right.) | ||
138 | */ | ||
139 | static inline bool kvm_apic_logical_map_valid(struct kvm_apic_map *map) | ||
140 | { | ||
141 | return !(map->mode & (map->mode - 1)); | ||
142 | } | ||
143 | |||
144 | static inline void | ||
145 | apic_logical_id(struct kvm_apic_map *map, u32 dest_id, u16 *cid, u16 *lid) | ||
146 | { | ||
147 | unsigned lid_bits; | ||
148 | |||
149 | BUILD_BUG_ON(KVM_APIC_MODE_XAPIC_CLUSTER != 4); | ||
150 | BUILD_BUG_ON(KVM_APIC_MODE_XAPIC_FLAT != 8); | ||
151 | BUILD_BUG_ON(KVM_APIC_MODE_X2APIC != 16); | ||
152 | lid_bits = map->mode; | ||
153 | |||
154 | *cid = dest_id >> lid_bits; | ||
155 | *lid = dest_id & ((1 << lid_bits) - 1); | ||
156 | } | ||
157 | |||
136 | static void recalculate_apic_map(struct kvm *kvm) | 158 | static void recalculate_apic_map(struct kvm *kvm) |
137 | { | 159 | { |
138 | struct kvm_apic_map *new, *old = NULL; | 160 | struct kvm_apic_map *new, *old = NULL; |
@@ -146,48 +168,6 @@ static void recalculate_apic_map(struct kvm *kvm) | |||
146 | if (!new) | 168 | if (!new) |
147 | goto out; | 169 | goto out; |
148 | 170 | ||
149 | new->ldr_bits = 8; | ||
150 | /* flat mode is default */ | ||
151 | new->cid_shift = 8; | ||
152 | new->cid_mask = 0; | ||
153 | new->lid_mask = 0xff; | ||
154 | new->broadcast = APIC_BROADCAST; | ||
155 | |||
156 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
157 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
158 | |||
159 | if (!kvm_apic_present(vcpu)) | ||
160 | continue; | ||
161 | |||
162 | if (apic_x2apic_mode(apic)) { | ||
163 | new->ldr_bits = 32; | ||
164 | new->cid_shift = 16; | ||
165 | new->cid_mask = new->lid_mask = 0xffff; | ||
166 | new->broadcast = X2APIC_BROADCAST; | ||
167 | } else if (kvm_apic_get_reg(apic, APIC_LDR)) { | ||
168 | if (kvm_apic_get_reg(apic, APIC_DFR) == | ||
169 | APIC_DFR_CLUSTER) { | ||
170 | new->cid_shift = 4; | ||
171 | new->cid_mask = 0xf; | ||
172 | new->lid_mask = 0xf; | ||
173 | } else { | ||
174 | new->cid_shift = 8; | ||
175 | new->cid_mask = 0; | ||
176 | new->lid_mask = 0xff; | ||
177 | } | ||
178 | } | ||
179 | |||
180 | /* | ||
181 | * All APICs have to be configured in the same mode by an OS. | ||
182 | * We take advatage of this while building logical id loockup | ||
183 | * table. After reset APICs are in software disabled mode, so if | ||
184 | * we find apic with different setting we assume this is the mode | ||
185 | * OS wants all apics to be in; build lookup table accordingly. | ||
186 | */ | ||
187 | if (kvm_apic_sw_enabled(apic)) | ||
188 | break; | ||
189 | } | ||
190 | |||
191 | kvm_for_each_vcpu(i, vcpu, kvm) { | 171 | kvm_for_each_vcpu(i, vcpu, kvm) { |
192 | struct kvm_lapic *apic = vcpu->arch.apic; | 172 | struct kvm_lapic *apic = vcpu->arch.apic; |
193 | u16 cid, lid; | 173 | u16 cid, lid; |
@@ -198,11 +178,25 @@ static void recalculate_apic_map(struct kvm *kvm) | |||
198 | 178 | ||
199 | aid = kvm_apic_id(apic); | 179 | aid = kvm_apic_id(apic); |
200 | ldr = kvm_apic_get_reg(apic, APIC_LDR); | 180 | ldr = kvm_apic_get_reg(apic, APIC_LDR); |
201 | cid = apic_cluster_id(new, ldr); | ||
202 | lid = apic_logical_id(new, ldr); | ||
203 | 181 | ||
204 | if (aid < ARRAY_SIZE(new->phys_map)) | 182 | if (aid < ARRAY_SIZE(new->phys_map)) |
205 | new->phys_map[aid] = apic; | 183 | new->phys_map[aid] = apic; |
184 | |||
185 | if (apic_x2apic_mode(apic)) { | ||
186 | new->mode |= KVM_APIC_MODE_X2APIC; | ||
187 | } else if (ldr) { | ||
188 | ldr = GET_APIC_LOGICAL_ID(ldr); | ||
189 | if (kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT) | ||
190 | new->mode |= KVM_APIC_MODE_XAPIC_FLAT; | ||
191 | else | ||
192 | new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER; | ||
193 | } | ||
194 | |||
195 | if (!kvm_apic_logical_map_valid(new)) | ||
196 | continue; | ||
197 | |||
198 | apic_logical_id(new, ldr, &cid, &lid); | ||
199 | |||
206 | if (lid && cid < ARRAY_SIZE(new->logical_map)) | 200 | if (lid && cid < ARRAY_SIZE(new->logical_map)) |
207 | new->logical_map[cid][ffs(lid) - 1] = apic; | 201 | new->logical_map[cid][ffs(lid) - 1] = apic; |
208 | } | 202 | } |
@@ -588,15 +582,23 @@ static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr) | |||
588 | apic_update_ppr(apic); | 582 | apic_update_ppr(apic); |
589 | } | 583 | } |
590 | 584 | ||
591 | static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 dest) | 585 | static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda) |
592 | { | 586 | { |
593 | return dest == (apic_x2apic_mode(apic) ? | 587 | if (apic_x2apic_mode(apic)) |
594 | X2APIC_BROADCAST : APIC_BROADCAST); | 588 | return mda == X2APIC_BROADCAST; |
589 | |||
590 | return GET_APIC_DEST_FIELD(mda) == APIC_BROADCAST; | ||
595 | } | 591 | } |
596 | 592 | ||
597 | static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest) | 593 | static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda) |
598 | { | 594 | { |
599 | return kvm_apic_id(apic) == dest || kvm_apic_broadcast(apic, dest); | 595 | if (kvm_apic_broadcast(apic, mda)) |
596 | return true; | ||
597 | |||
598 | if (apic_x2apic_mode(apic)) | ||
599 | return mda == kvm_apic_id(apic); | ||
600 | |||
601 | return mda == SET_APIC_DEST_FIELD(kvm_apic_id(apic)); | ||
600 | } | 602 | } |
601 | 603 | ||
602 | static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda) | 604 | static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda) |
@@ -613,6 +615,7 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda) | |||
613 | && (logical_id & mda & 0xffff) != 0; | 615 | && (logical_id & mda & 0xffff) != 0; |
614 | 616 | ||
615 | logical_id = GET_APIC_LOGICAL_ID(logical_id); | 617 | logical_id = GET_APIC_LOGICAL_ID(logical_id); |
618 | mda = GET_APIC_DEST_FIELD(mda); | ||
616 | 619 | ||
617 | switch (kvm_apic_get_reg(apic, APIC_DFR)) { | 620 | switch (kvm_apic_get_reg(apic, APIC_DFR)) { |
618 | case APIC_DFR_FLAT: | 621 | case APIC_DFR_FLAT: |
@@ -627,10 +630,27 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda) | |||
627 | } | 630 | } |
628 | } | 631 | } |
629 | 632 | ||
633 | /* KVM APIC implementation has two quirks | ||
634 | * - dest always begins at 0 while xAPIC MDA has offset 24, | ||
635 | * - IOxAPIC messages have to be delivered (directly) to x2APIC. | ||
636 | */ | ||
637 | static u32 kvm_apic_mda(unsigned int dest_id, struct kvm_lapic *source, | ||
638 | struct kvm_lapic *target) | ||
639 | { | ||
640 | bool ipi = source != NULL; | ||
641 | bool x2apic_mda = apic_x2apic_mode(ipi ? source : target); | ||
642 | |||
643 | if (!ipi && dest_id == APIC_BROADCAST && x2apic_mda) | ||
644 | return X2APIC_BROADCAST; | ||
645 | |||
646 | return x2apic_mda ? dest_id : SET_APIC_DEST_FIELD(dest_id); | ||
647 | } | ||
648 | |||
630 | bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, | 649 | bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, |
631 | int short_hand, unsigned int dest, int dest_mode) | 650 | int short_hand, unsigned int dest, int dest_mode) |
632 | { | 651 | { |
633 | struct kvm_lapic *target = vcpu->arch.apic; | 652 | struct kvm_lapic *target = vcpu->arch.apic; |
653 | u32 mda = kvm_apic_mda(dest, source, target); | ||
634 | 654 | ||
635 | apic_debug("target %p, source %p, dest 0x%x, " | 655 | apic_debug("target %p, source %p, dest 0x%x, " |
636 | "dest_mode 0x%x, short_hand 0x%x\n", | 656 | "dest_mode 0x%x, short_hand 0x%x\n", |
@@ -640,9 +660,9 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, | |||
640 | switch (short_hand) { | 660 | switch (short_hand) { |
641 | case APIC_DEST_NOSHORT: | 661 | case APIC_DEST_NOSHORT: |
642 | if (dest_mode == APIC_DEST_PHYSICAL) | 662 | if (dest_mode == APIC_DEST_PHYSICAL) |
643 | return kvm_apic_match_physical_addr(target, dest); | 663 | return kvm_apic_match_physical_addr(target, mda); |
644 | else | 664 | else |
645 | return kvm_apic_match_logical_addr(target, dest); | 665 | return kvm_apic_match_logical_addr(target, mda); |
646 | case APIC_DEST_SELF: | 666 | case APIC_DEST_SELF: |
647 | return target == source; | 667 | return target == source; |
648 | case APIC_DEST_ALLINC: | 668 | case APIC_DEST_ALLINC: |
@@ -664,6 +684,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, | |||
664 | struct kvm_lapic **dst; | 684 | struct kvm_lapic **dst; |
665 | int i; | 685 | int i; |
666 | bool ret = false; | 686 | bool ret = false; |
687 | bool x2apic_ipi = src && apic_x2apic_mode(src); | ||
667 | 688 | ||
668 | *r = -1; | 689 | *r = -1; |
669 | 690 | ||
@@ -675,15 +696,15 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, | |||
675 | if (irq->shorthand) | 696 | if (irq->shorthand) |
676 | return false; | 697 | return false; |
677 | 698 | ||
699 | if (irq->dest_id == (x2apic_ipi ? X2APIC_BROADCAST : APIC_BROADCAST)) | ||
700 | return false; | ||
701 | |||
678 | rcu_read_lock(); | 702 | rcu_read_lock(); |
679 | map = rcu_dereference(kvm->arch.apic_map); | 703 | map = rcu_dereference(kvm->arch.apic_map); |
680 | 704 | ||
681 | if (!map) | 705 | if (!map) |
682 | goto out; | 706 | goto out; |
683 | 707 | ||
684 | if (irq->dest_id == map->broadcast) | ||
685 | goto out; | ||
686 | |||
687 | ret = true; | 708 | ret = true; |
688 | 709 | ||
689 | if (irq->dest_mode == APIC_DEST_PHYSICAL) { | 710 | if (irq->dest_mode == APIC_DEST_PHYSICAL) { |
@@ -692,16 +713,20 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, | |||
692 | 713 | ||
693 | dst = &map->phys_map[irq->dest_id]; | 714 | dst = &map->phys_map[irq->dest_id]; |
694 | } else { | 715 | } else { |
695 | u32 mda = irq->dest_id << (32 - map->ldr_bits); | 716 | u16 cid; |
696 | u16 cid = apic_cluster_id(map, mda); | 717 | |
718 | if (!kvm_apic_logical_map_valid(map)) { | ||
719 | ret = false; | ||
720 | goto out; | ||
721 | } | ||
722 | |||
723 | apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap); | ||
697 | 724 | ||
698 | if (cid >= ARRAY_SIZE(map->logical_map)) | 725 | if (cid >= ARRAY_SIZE(map->logical_map)) |
699 | goto out; | 726 | goto out; |
700 | 727 | ||
701 | dst = map->logical_map[cid]; | 728 | dst = map->logical_map[cid]; |
702 | 729 | ||
703 | bitmap = apic_logical_id(map, mda); | ||
704 | |||
705 | if (irq->delivery_mode == APIC_DM_LOWEST) { | 730 | if (irq->delivery_mode == APIC_DM_LOWEST) { |
706 | int l = -1; | 731 | int l = -1; |
707 | for_each_set_bit(i, &bitmap, 16) { | 732 | for_each_set_bit(i, &bitmap, 16) { |
@@ -1037,7 +1062,7 @@ static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr) | |||
1037 | addr < apic->base_address + LAPIC_MMIO_LENGTH; | 1062 | addr < apic->base_address + LAPIC_MMIO_LENGTH; |
1038 | } | 1063 | } |
1039 | 1064 | ||
1040 | static int apic_mmio_read(struct kvm_io_device *this, | 1065 | static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, |
1041 | gpa_t address, int len, void *data) | 1066 | gpa_t address, int len, void *data) |
1042 | { | 1067 | { |
1043 | struct kvm_lapic *apic = to_lapic(this); | 1068 | struct kvm_lapic *apic = to_lapic(this); |
@@ -1357,7 +1382,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) | |||
1357 | return ret; | 1382 | return ret; |
1358 | } | 1383 | } |
1359 | 1384 | ||
1360 | static int apic_mmio_write(struct kvm_io_device *this, | 1385 | static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, |
1361 | gpa_t address, int len, const void *data) | 1386 | gpa_t address, int len, const void *data) |
1362 | { | 1387 | { |
1363 | struct kvm_lapic *apic = to_lapic(this); | 1388 | struct kvm_lapic *apic = to_lapic(this); |
@@ -1497,8 +1522,6 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) | |||
1497 | return; | 1522 | return; |
1498 | } | 1523 | } |
1499 | 1524 | ||
1500 | if (!kvm_vcpu_is_bsp(apic->vcpu)) | ||
1501 | value &= ~MSR_IA32_APICBASE_BSP; | ||
1502 | vcpu->arch.apic_base = value; | 1525 | vcpu->arch.apic_base = value; |
1503 | 1526 | ||
1504 | /* update jump label if enable bit changes */ | 1527 | /* update jump label if enable bit changes */ |
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 0bc6c656625b..9d28383fc1e7 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
@@ -1,7 +1,7 @@ | |||
1 | #ifndef __KVM_X86_LAPIC_H | 1 | #ifndef __KVM_X86_LAPIC_H |
2 | #define __KVM_X86_LAPIC_H | 2 | #define __KVM_X86_LAPIC_H |
3 | 3 | ||
4 | #include "iodev.h" | 4 | #include <kvm/iodev.h> |
5 | 5 | ||
6 | #include <linux/kvm_host.h> | 6 | #include <linux/kvm_host.h> |
7 | 7 | ||
@@ -148,21 +148,6 @@ static inline bool kvm_apic_vid_enabled(struct kvm *kvm) | |||
148 | return kvm_x86_ops->vm_has_apicv(kvm); | 148 | return kvm_x86_ops->vm_has_apicv(kvm); |
149 | } | 149 | } |
150 | 150 | ||
151 | static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr) | ||
152 | { | ||
153 | u16 cid; | ||
154 | ldr >>= 32 - map->ldr_bits; | ||
155 | cid = (ldr >> map->cid_shift) & map->cid_mask; | ||
156 | |||
157 | return cid; | ||
158 | } | ||
159 | |||
160 | static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr) | ||
161 | { | ||
162 | ldr >>= (32 - map->ldr_bits); | ||
163 | return ldr & map->lid_mask; | ||
164 | } | ||
165 | |||
166 | static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) | 151 | static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) |
167 | { | 152 | { |
168 | return vcpu->arch.apic->pending_events; | 153 | return vcpu->arch.apic->pending_events; |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index cee759299a35..146f295ee322 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -4465,6 +4465,79 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, | |||
4465 | kvm_flush_remote_tlbs(kvm); | 4465 | kvm_flush_remote_tlbs(kvm); |
4466 | } | 4466 | } |
4467 | 4467 | ||
4468 | static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm, | ||
4469 | unsigned long *rmapp) | ||
4470 | { | ||
4471 | u64 *sptep; | ||
4472 | struct rmap_iterator iter; | ||
4473 | int need_tlb_flush = 0; | ||
4474 | pfn_t pfn; | ||
4475 | struct kvm_mmu_page *sp; | ||
4476 | |||
4477 | for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { | ||
4478 | BUG_ON(!(*sptep & PT_PRESENT_MASK)); | ||
4479 | |||
4480 | sp = page_header(__pa(sptep)); | ||
4481 | pfn = spte_to_pfn(*sptep); | ||
4482 | |||
4483 | /* | ||
4484 | * Only EPT supported for now; otherwise, one would need to | ||
4485 | * find out efficiently whether the guest page tables are | ||
4486 | * also using huge pages. | ||
4487 | */ | ||
4488 | if (sp->role.direct && | ||
4489 | !kvm_is_reserved_pfn(pfn) && | ||
4490 | PageTransCompound(pfn_to_page(pfn))) { | ||
4491 | drop_spte(kvm, sptep); | ||
4492 | sptep = rmap_get_first(*rmapp, &iter); | ||
4493 | need_tlb_flush = 1; | ||
4494 | } else | ||
4495 | sptep = rmap_get_next(&iter); | ||
4496 | } | ||
4497 | |||
4498 | return need_tlb_flush; | ||
4499 | } | ||
4500 | |||
4501 | void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, | ||
4502 | struct kvm_memory_slot *memslot) | ||
4503 | { | ||
4504 | bool flush = false; | ||
4505 | unsigned long *rmapp; | ||
4506 | unsigned long last_index, index; | ||
4507 | gfn_t gfn_start, gfn_end; | ||
4508 | |||
4509 | spin_lock(&kvm->mmu_lock); | ||
4510 | |||
4511 | gfn_start = memslot->base_gfn; | ||
4512 | gfn_end = memslot->base_gfn + memslot->npages - 1; | ||
4513 | |||
4514 | if (gfn_start >= gfn_end) | ||
4515 | goto out; | ||
4516 | |||
4517 | rmapp = memslot->arch.rmap[0]; | ||
4518 | last_index = gfn_to_index(gfn_end, memslot->base_gfn, | ||
4519 | PT_PAGE_TABLE_LEVEL); | ||
4520 | |||
4521 | for (index = 0; index <= last_index; ++index, ++rmapp) { | ||
4522 | if (*rmapp) | ||
4523 | flush |= kvm_mmu_zap_collapsible_spte(kvm, rmapp); | ||
4524 | |||
4525 | if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { | ||
4526 | if (flush) { | ||
4527 | kvm_flush_remote_tlbs(kvm); | ||
4528 | flush = false; | ||
4529 | } | ||
4530 | cond_resched_lock(&kvm->mmu_lock); | ||
4531 | } | ||
4532 | } | ||
4533 | |||
4534 | if (flush) | ||
4535 | kvm_flush_remote_tlbs(kvm); | ||
4536 | |||
4537 | out: | ||
4538 | spin_unlock(&kvm->mmu_lock); | ||
4539 | } | ||
4540 | |||
4468 | void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, | 4541 | void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, |
4469 | struct kvm_memory_slot *memslot) | 4542 | struct kvm_memory_slot *memslot) |
4470 | { | 4543 | { |
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 8e6b7d869d2f..29fbf9dfdc54 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c | |||
@@ -38,7 +38,7 @@ static struct kvm_arch_event_perf_mapping { | |||
38 | }; | 38 | }; |
39 | 39 | ||
40 | /* mapping between fixed pmc index and arch_events array */ | 40 | /* mapping between fixed pmc index and arch_events array */ |
41 | int fixed_pmc_events[] = {1, 0, 7}; | 41 | static int fixed_pmc_events[] = {1, 0, 7}; |
42 | 42 | ||
43 | static bool pmc_is_gp(struct kvm_pmc *pmc) | 43 | static bool pmc_is_gp(struct kvm_pmc *pmc) |
44 | { | 44 | { |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index cc618c882f90..ce741b8650f6 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -1261,7 +1261,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | |||
1261 | 1261 | ||
1262 | svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | | 1262 | svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | |
1263 | MSR_IA32_APICBASE_ENABLE; | 1263 | MSR_IA32_APICBASE_ENABLE; |
1264 | if (kvm_vcpu_is_bsp(&svm->vcpu)) | 1264 | if (kvm_vcpu_is_reset_bsp(&svm->vcpu)) |
1265 | svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; | 1265 | svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; |
1266 | 1266 | ||
1267 | svm_init_osvw(&svm->vcpu); | 1267 | svm_init_osvw(&svm->vcpu); |
@@ -1929,14 +1929,12 @@ static int nop_on_interception(struct vcpu_svm *svm) | |||
1929 | static int halt_interception(struct vcpu_svm *svm) | 1929 | static int halt_interception(struct vcpu_svm *svm) |
1930 | { | 1930 | { |
1931 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 1; | 1931 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 1; |
1932 | skip_emulated_instruction(&svm->vcpu); | ||
1933 | return kvm_emulate_halt(&svm->vcpu); | 1932 | return kvm_emulate_halt(&svm->vcpu); |
1934 | } | 1933 | } |
1935 | 1934 | ||
1936 | static int vmmcall_interception(struct vcpu_svm *svm) | 1935 | static int vmmcall_interception(struct vcpu_svm *svm) |
1937 | { | 1936 | { |
1938 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; | 1937 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; |
1939 | skip_emulated_instruction(&svm->vcpu); | ||
1940 | kvm_emulate_hypercall(&svm->vcpu); | 1938 | kvm_emulate_hypercall(&svm->vcpu); |
1941 | return 1; | 1939 | return 1; |
1942 | } | 1940 | } |
@@ -2757,11 +2755,11 @@ static int invlpga_interception(struct vcpu_svm *svm) | |||
2757 | { | 2755 | { |
2758 | struct kvm_vcpu *vcpu = &svm->vcpu; | 2756 | struct kvm_vcpu *vcpu = &svm->vcpu; |
2759 | 2757 | ||
2760 | trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX], | 2758 | trace_kvm_invlpga(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RCX), |
2761 | vcpu->arch.regs[VCPU_REGS_RAX]); | 2759 | kvm_register_read(&svm->vcpu, VCPU_REGS_RAX)); |
2762 | 2760 | ||
2763 | /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */ | 2761 | /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */ |
2764 | kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]); | 2762 | kvm_mmu_invlpg(vcpu, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX)); |
2765 | 2763 | ||
2766 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; | 2764 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; |
2767 | skip_emulated_instruction(&svm->vcpu); | 2765 | skip_emulated_instruction(&svm->vcpu); |
@@ -2770,12 +2768,18 @@ static int invlpga_interception(struct vcpu_svm *svm) | |||
2770 | 2768 | ||
2771 | static int skinit_interception(struct vcpu_svm *svm) | 2769 | static int skinit_interception(struct vcpu_svm *svm) |
2772 | { | 2770 | { |
2773 | trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]); | 2771 | trace_kvm_skinit(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX)); |
2774 | 2772 | ||
2775 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); | 2773 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); |
2776 | return 1; | 2774 | return 1; |
2777 | } | 2775 | } |
2778 | 2776 | ||
2777 | static int wbinvd_interception(struct vcpu_svm *svm) | ||
2778 | { | ||
2779 | kvm_emulate_wbinvd(&svm->vcpu); | ||
2780 | return 1; | ||
2781 | } | ||
2782 | |||
2779 | static int xsetbv_interception(struct vcpu_svm *svm) | 2783 | static int xsetbv_interception(struct vcpu_svm *svm) |
2780 | { | 2784 | { |
2781 | u64 new_bv = kvm_read_edx_eax(&svm->vcpu); | 2785 | u64 new_bv = kvm_read_edx_eax(&svm->vcpu); |
@@ -2902,7 +2906,8 @@ static int rdpmc_interception(struct vcpu_svm *svm) | |||
2902 | return 1; | 2906 | return 1; |
2903 | } | 2907 | } |
2904 | 2908 | ||
2905 | bool check_selective_cr0_intercepted(struct vcpu_svm *svm, unsigned long val) | 2909 | static bool check_selective_cr0_intercepted(struct vcpu_svm *svm, |
2910 | unsigned long val) | ||
2906 | { | 2911 | { |
2907 | unsigned long cr0 = svm->vcpu.arch.cr0; | 2912 | unsigned long cr0 = svm->vcpu.arch.cr0; |
2908 | bool ret = false; | 2913 | bool ret = false; |
@@ -2940,7 +2945,10 @@ static int cr_interception(struct vcpu_svm *svm) | |||
2940 | return emulate_on_interception(svm); | 2945 | return emulate_on_interception(svm); |
2941 | 2946 | ||
2942 | reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK; | 2947 | reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK; |
2943 | cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0; | 2948 | if (svm->vmcb->control.exit_code == SVM_EXIT_CR0_SEL_WRITE) |
2949 | cr = SVM_EXIT_WRITE_CR0 - SVM_EXIT_READ_CR0; | ||
2950 | else | ||
2951 | cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0; | ||
2944 | 2952 | ||
2945 | err = 0; | 2953 | err = 0; |
2946 | if (cr >= 16) { /* mov to cr */ | 2954 | if (cr >= 16) { /* mov to cr */ |
@@ -3133,7 +3141,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) | |||
3133 | 3141 | ||
3134 | static int rdmsr_interception(struct vcpu_svm *svm) | 3142 | static int rdmsr_interception(struct vcpu_svm *svm) |
3135 | { | 3143 | { |
3136 | u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; | 3144 | u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX); |
3137 | u64 data; | 3145 | u64 data; |
3138 | 3146 | ||
3139 | if (svm_get_msr(&svm->vcpu, ecx, &data)) { | 3147 | if (svm_get_msr(&svm->vcpu, ecx, &data)) { |
@@ -3142,8 +3150,8 @@ static int rdmsr_interception(struct vcpu_svm *svm) | |||
3142 | } else { | 3150 | } else { |
3143 | trace_kvm_msr_read(ecx, data); | 3151 | trace_kvm_msr_read(ecx, data); |
3144 | 3152 | ||
3145 | svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff; | 3153 | kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, data & 0xffffffff); |
3146 | svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32; | 3154 | kvm_register_write(&svm->vcpu, VCPU_REGS_RDX, data >> 32); |
3147 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; | 3155 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; |
3148 | skip_emulated_instruction(&svm->vcpu); | 3156 | skip_emulated_instruction(&svm->vcpu); |
3149 | } | 3157 | } |
@@ -3246,9 +3254,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) | |||
3246 | static int wrmsr_interception(struct vcpu_svm *svm) | 3254 | static int wrmsr_interception(struct vcpu_svm *svm) |
3247 | { | 3255 | { |
3248 | struct msr_data msr; | 3256 | struct msr_data msr; |
3249 | u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; | 3257 | u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX); |
3250 | u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u) | 3258 | u64 data = kvm_read_edx_eax(&svm->vcpu); |
3251 | | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32); | ||
3252 | 3259 | ||
3253 | msr.data = data; | 3260 | msr.data = data; |
3254 | msr.index = ecx; | 3261 | msr.index = ecx; |
@@ -3325,7 +3332,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { | |||
3325 | [SVM_EXIT_READ_CR3] = cr_interception, | 3332 | [SVM_EXIT_READ_CR3] = cr_interception, |
3326 | [SVM_EXIT_READ_CR4] = cr_interception, | 3333 | [SVM_EXIT_READ_CR4] = cr_interception, |
3327 | [SVM_EXIT_READ_CR8] = cr_interception, | 3334 | [SVM_EXIT_READ_CR8] = cr_interception, |
3328 | [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, | 3335 | [SVM_EXIT_CR0_SEL_WRITE] = cr_interception, |
3329 | [SVM_EXIT_WRITE_CR0] = cr_interception, | 3336 | [SVM_EXIT_WRITE_CR0] = cr_interception, |
3330 | [SVM_EXIT_WRITE_CR3] = cr_interception, | 3337 | [SVM_EXIT_WRITE_CR3] = cr_interception, |
3331 | [SVM_EXIT_WRITE_CR4] = cr_interception, | 3338 | [SVM_EXIT_WRITE_CR4] = cr_interception, |
@@ -3376,7 +3383,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { | |||
3376 | [SVM_EXIT_STGI] = stgi_interception, | 3383 | [SVM_EXIT_STGI] = stgi_interception, |
3377 | [SVM_EXIT_CLGI] = clgi_interception, | 3384 | [SVM_EXIT_CLGI] = clgi_interception, |
3378 | [SVM_EXIT_SKINIT] = skinit_interception, | 3385 | [SVM_EXIT_SKINIT] = skinit_interception, |
3379 | [SVM_EXIT_WBINVD] = emulate_on_interception, | 3386 | [SVM_EXIT_WBINVD] = wbinvd_interception, |
3380 | [SVM_EXIT_MONITOR] = monitor_interception, | 3387 | [SVM_EXIT_MONITOR] = monitor_interception, |
3381 | [SVM_EXIT_MWAIT] = mwait_interception, | 3388 | [SVM_EXIT_MWAIT] = mwait_interception, |
3382 | [SVM_EXIT_XSETBV] = xsetbv_interception, | 3389 | [SVM_EXIT_XSETBV] = xsetbv_interception, |
@@ -3555,7 +3562,7 @@ static int handle_exit(struct kvm_vcpu *vcpu) | |||
3555 | 3562 | ||
3556 | if (exit_code >= ARRAY_SIZE(svm_exit_handlers) | 3563 | if (exit_code >= ARRAY_SIZE(svm_exit_handlers) |
3557 | || !svm_exit_handlers[exit_code]) { | 3564 | || !svm_exit_handlers[exit_code]) { |
3558 | WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_code); | 3565 | WARN_ONCE(1, "svm: unexpected exit reason 0x%x\n", exit_code); |
3559 | kvm_queue_exception(vcpu, UD_VECTOR); | 3566 | kvm_queue_exception(vcpu, UD_VECTOR); |
3560 | return 1; | 3567 | return 1; |
3561 | } | 3568 | } |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ae4f6d35d19c..f5e8dce8046c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -2470,6 +2470,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) | |||
2470 | vmx->nested.nested_vmx_secondary_ctls_low = 0; | 2470 | vmx->nested.nested_vmx_secondary_ctls_low = 0; |
2471 | vmx->nested.nested_vmx_secondary_ctls_high &= | 2471 | vmx->nested.nested_vmx_secondary_ctls_high &= |
2472 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | | 2472 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | |
2473 | SECONDARY_EXEC_RDTSCP | | ||
2473 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | | 2474 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | |
2474 | SECONDARY_EXEC_APIC_REGISTER_VIRT | | 2475 | SECONDARY_EXEC_APIC_REGISTER_VIRT | |
2475 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | | 2476 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | |
@@ -3268,8 +3269,8 @@ static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg, | |||
3268 | * default value. | 3269 | * default value. |
3269 | */ | 3270 | */ |
3270 | if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS) | 3271 | if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS) |
3271 | save->selector &= ~SELECTOR_RPL_MASK; | 3272 | save->selector &= ~SEGMENT_RPL_MASK; |
3272 | save->dpl = save->selector & SELECTOR_RPL_MASK; | 3273 | save->dpl = save->selector & SEGMENT_RPL_MASK; |
3273 | save->s = 1; | 3274 | save->s = 1; |
3274 | } | 3275 | } |
3275 | vmx_set_segment(vcpu, save, seg); | 3276 | vmx_set_segment(vcpu, save, seg); |
@@ -3842,7 +3843,7 @@ static bool code_segment_valid(struct kvm_vcpu *vcpu) | |||
3842 | unsigned int cs_rpl; | 3843 | unsigned int cs_rpl; |
3843 | 3844 | ||
3844 | vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); | 3845 | vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); |
3845 | cs_rpl = cs.selector & SELECTOR_RPL_MASK; | 3846 | cs_rpl = cs.selector & SEGMENT_RPL_MASK; |
3846 | 3847 | ||
3847 | if (cs.unusable) | 3848 | if (cs.unusable) |
3848 | return false; | 3849 | return false; |
@@ -3870,7 +3871,7 @@ static bool stack_segment_valid(struct kvm_vcpu *vcpu) | |||
3870 | unsigned int ss_rpl; | 3871 | unsigned int ss_rpl; |
3871 | 3872 | ||
3872 | vmx_get_segment(vcpu, &ss, VCPU_SREG_SS); | 3873 | vmx_get_segment(vcpu, &ss, VCPU_SREG_SS); |
3873 | ss_rpl = ss.selector & SELECTOR_RPL_MASK; | 3874 | ss_rpl = ss.selector & SEGMENT_RPL_MASK; |
3874 | 3875 | ||
3875 | if (ss.unusable) | 3876 | if (ss.unusable) |
3876 | return true; | 3877 | return true; |
@@ -3892,7 +3893,7 @@ static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg) | |||
3892 | unsigned int rpl; | 3893 | unsigned int rpl; |
3893 | 3894 | ||
3894 | vmx_get_segment(vcpu, &var, seg); | 3895 | vmx_get_segment(vcpu, &var, seg); |
3895 | rpl = var.selector & SELECTOR_RPL_MASK; | 3896 | rpl = var.selector & SEGMENT_RPL_MASK; |
3896 | 3897 | ||
3897 | if (var.unusable) | 3898 | if (var.unusable) |
3898 | return true; | 3899 | return true; |
@@ -3919,7 +3920,7 @@ static bool tr_valid(struct kvm_vcpu *vcpu) | |||
3919 | 3920 | ||
3920 | if (tr.unusable) | 3921 | if (tr.unusable) |
3921 | return false; | 3922 | return false; |
3922 | if (tr.selector & SELECTOR_TI_MASK) /* TI = 1 */ | 3923 | if (tr.selector & SEGMENT_TI_MASK) /* TI = 1 */ |
3923 | return false; | 3924 | return false; |
3924 | if (tr.type != 3 && tr.type != 11) /* TODO: Check if guest is in IA32e mode */ | 3925 | if (tr.type != 3 && tr.type != 11) /* TODO: Check if guest is in IA32e mode */ |
3925 | return false; | 3926 | return false; |
@@ -3937,7 +3938,7 @@ static bool ldtr_valid(struct kvm_vcpu *vcpu) | |||
3937 | 3938 | ||
3938 | if (ldtr.unusable) | 3939 | if (ldtr.unusable) |
3939 | return true; | 3940 | return true; |
3940 | if (ldtr.selector & SELECTOR_TI_MASK) /* TI = 1 */ | 3941 | if (ldtr.selector & SEGMENT_TI_MASK) /* TI = 1 */ |
3941 | return false; | 3942 | return false; |
3942 | if (ldtr.type != 2) | 3943 | if (ldtr.type != 2) |
3943 | return false; | 3944 | return false; |
@@ -3954,8 +3955,8 @@ static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu) | |||
3954 | vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); | 3955 | vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); |
3955 | vmx_get_segment(vcpu, &ss, VCPU_SREG_SS); | 3956 | vmx_get_segment(vcpu, &ss, VCPU_SREG_SS); |
3956 | 3957 | ||
3957 | return ((cs.selector & SELECTOR_RPL_MASK) == | 3958 | return ((cs.selector & SEGMENT_RPL_MASK) == |
3958 | (ss.selector & SELECTOR_RPL_MASK)); | 3959 | (ss.selector & SEGMENT_RPL_MASK)); |
3959 | } | 3960 | } |
3960 | 3961 | ||
3961 | /* | 3962 | /* |
@@ -4711,7 +4712,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
4711 | vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); | 4712 | vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); |
4712 | kvm_set_cr8(&vmx->vcpu, 0); | 4713 | kvm_set_cr8(&vmx->vcpu, 0); |
4713 | apic_base_msr.data = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE; | 4714 | apic_base_msr.data = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE; |
4714 | if (kvm_vcpu_is_bsp(&vmx->vcpu)) | 4715 | if (kvm_vcpu_is_reset_bsp(&vmx->vcpu)) |
4715 | apic_base_msr.data |= MSR_IA32_APICBASE_BSP; | 4716 | apic_base_msr.data |= MSR_IA32_APICBASE_BSP; |
4716 | apic_base_msr.host_initiated = true; | 4717 | apic_base_msr.host_initiated = true; |
4717 | kvm_set_apic_base(&vmx->vcpu, &apic_base_msr); | 4718 | kvm_set_apic_base(&vmx->vcpu, &apic_base_msr); |
@@ -5006,7 +5007,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, | |||
5006 | if (emulate_instruction(vcpu, 0) == EMULATE_DONE) { | 5007 | if (emulate_instruction(vcpu, 0) == EMULATE_DONE) { |
5007 | if (vcpu->arch.halt_request) { | 5008 | if (vcpu->arch.halt_request) { |
5008 | vcpu->arch.halt_request = 0; | 5009 | vcpu->arch.halt_request = 0; |
5009 | return kvm_emulate_halt(vcpu); | 5010 | return kvm_vcpu_halt(vcpu); |
5010 | } | 5011 | } |
5011 | return 1; | 5012 | return 1; |
5012 | } | 5013 | } |
@@ -5071,6 +5072,10 @@ static int handle_exception(struct kvm_vcpu *vcpu) | |||
5071 | } | 5072 | } |
5072 | 5073 | ||
5073 | if (is_invalid_opcode(intr_info)) { | 5074 | if (is_invalid_opcode(intr_info)) { |
5075 | if (is_guest_mode(vcpu)) { | ||
5076 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
5077 | return 1; | ||
5078 | } | ||
5074 | er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD); | 5079 | er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD); |
5075 | if (er != EMULATE_DONE) | 5080 | if (er != EMULATE_DONE) |
5076 | kvm_queue_exception(vcpu, UD_VECTOR); | 5081 | kvm_queue_exception(vcpu, UD_VECTOR); |
@@ -5090,9 +5095,10 @@ static int handle_exception(struct kvm_vcpu *vcpu) | |||
5090 | !(is_page_fault(intr_info) && !(error_code & PFERR_RSVD_MASK))) { | 5095 | !(is_page_fault(intr_info) && !(error_code & PFERR_RSVD_MASK))) { |
5091 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | 5096 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; |
5092 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX; | 5097 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX; |
5093 | vcpu->run->internal.ndata = 2; | 5098 | vcpu->run->internal.ndata = 3; |
5094 | vcpu->run->internal.data[0] = vect_info; | 5099 | vcpu->run->internal.data[0] = vect_info; |
5095 | vcpu->run->internal.data[1] = intr_info; | 5100 | vcpu->run->internal.data[1] = intr_info; |
5101 | vcpu->run->internal.data[2] = error_code; | ||
5096 | return 0; | 5102 | return 0; |
5097 | } | 5103 | } |
5098 | 5104 | ||
@@ -5533,13 +5539,11 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu) | |||
5533 | 5539 | ||
5534 | static int handle_halt(struct kvm_vcpu *vcpu) | 5540 | static int handle_halt(struct kvm_vcpu *vcpu) |
5535 | { | 5541 | { |
5536 | skip_emulated_instruction(vcpu); | ||
5537 | return kvm_emulate_halt(vcpu); | 5542 | return kvm_emulate_halt(vcpu); |
5538 | } | 5543 | } |
5539 | 5544 | ||
5540 | static int handle_vmcall(struct kvm_vcpu *vcpu) | 5545 | static int handle_vmcall(struct kvm_vcpu *vcpu) |
5541 | { | 5546 | { |
5542 | skip_emulated_instruction(vcpu); | ||
5543 | kvm_emulate_hypercall(vcpu); | 5547 | kvm_emulate_hypercall(vcpu); |
5544 | return 1; | 5548 | return 1; |
5545 | } | 5549 | } |
@@ -5570,7 +5574,6 @@ static int handle_rdpmc(struct kvm_vcpu *vcpu) | |||
5570 | 5574 | ||
5571 | static int handle_wbinvd(struct kvm_vcpu *vcpu) | 5575 | static int handle_wbinvd(struct kvm_vcpu *vcpu) |
5572 | { | 5576 | { |
5573 | skip_emulated_instruction(vcpu); | ||
5574 | kvm_emulate_wbinvd(vcpu); | 5577 | kvm_emulate_wbinvd(vcpu); |
5575 | return 1; | 5578 | return 1; |
5576 | } | 5579 | } |
@@ -5828,7 +5831,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) | |||
5828 | gpa_t gpa; | 5831 | gpa_t gpa; |
5829 | 5832 | ||
5830 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); | 5833 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); |
5831 | if (!kvm_io_bus_write(vcpu->kvm, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { | 5834 | if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { |
5832 | skip_emulated_instruction(vcpu); | 5835 | skip_emulated_instruction(vcpu); |
5833 | return 1; | 5836 | return 1; |
5834 | } | 5837 | } |
@@ -5909,7 +5912,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) | |||
5909 | 5912 | ||
5910 | if (vcpu->arch.halt_request) { | 5913 | if (vcpu->arch.halt_request) { |
5911 | vcpu->arch.halt_request = 0; | 5914 | vcpu->arch.halt_request = 0; |
5912 | ret = kvm_emulate_halt(vcpu); | 5915 | ret = kvm_vcpu_halt(vcpu); |
5913 | goto out; | 5916 | goto out; |
5914 | } | 5917 | } |
5915 | 5918 | ||
@@ -7318,21 +7321,21 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, | |||
7318 | else if (port < 0x10000) | 7321 | else if (port < 0x10000) |
7319 | bitmap = vmcs12->io_bitmap_b; | 7322 | bitmap = vmcs12->io_bitmap_b; |
7320 | else | 7323 | else |
7321 | return 1; | 7324 | return true; |
7322 | bitmap += (port & 0x7fff) / 8; | 7325 | bitmap += (port & 0x7fff) / 8; |
7323 | 7326 | ||
7324 | if (last_bitmap != bitmap) | 7327 | if (last_bitmap != bitmap) |
7325 | if (kvm_read_guest(vcpu->kvm, bitmap, &b, 1)) | 7328 | if (kvm_read_guest(vcpu->kvm, bitmap, &b, 1)) |
7326 | return 1; | 7329 | return true; |
7327 | if (b & (1 << (port & 7))) | 7330 | if (b & (1 << (port & 7))) |
7328 | return 1; | 7331 | return true; |
7329 | 7332 | ||
7330 | port++; | 7333 | port++; |
7331 | size--; | 7334 | size--; |
7332 | last_bitmap = bitmap; | 7335 | last_bitmap = bitmap; |
7333 | } | 7336 | } |
7334 | 7337 | ||
7335 | return 0; | 7338 | return false; |
7336 | } | 7339 | } |
7337 | 7340 | ||
7338 | /* | 7341 | /* |
@@ -7348,7 +7351,7 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu, | |||
7348 | gpa_t bitmap; | 7351 | gpa_t bitmap; |
7349 | 7352 | ||
7350 | if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS)) | 7353 | if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS)) |
7351 | return 1; | 7354 | return true; |
7352 | 7355 | ||
7353 | /* | 7356 | /* |
7354 | * The MSR_BITMAP page is divided into four 1024-byte bitmaps, | 7357 | * The MSR_BITMAP page is divided into four 1024-byte bitmaps, |
@@ -7367,10 +7370,10 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu, | |||
7367 | if (msr_index < 1024*8) { | 7370 | if (msr_index < 1024*8) { |
7368 | unsigned char b; | 7371 | unsigned char b; |
7369 | if (kvm_read_guest(vcpu->kvm, bitmap + msr_index/8, &b, 1)) | 7372 | if (kvm_read_guest(vcpu->kvm, bitmap + msr_index/8, &b, 1)) |
7370 | return 1; | 7373 | return true; |
7371 | return 1 & (b >> (msr_index & 7)); | 7374 | return 1 & (b >> (msr_index & 7)); |
7372 | } else | 7375 | } else |
7373 | return 1; /* let L1 handle the wrong parameter */ | 7376 | return true; /* let L1 handle the wrong parameter */ |
7374 | } | 7377 | } |
7375 | 7378 | ||
7376 | /* | 7379 | /* |
@@ -7392,7 +7395,7 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, | |||
7392 | case 0: | 7395 | case 0: |
7393 | if (vmcs12->cr0_guest_host_mask & | 7396 | if (vmcs12->cr0_guest_host_mask & |
7394 | (val ^ vmcs12->cr0_read_shadow)) | 7397 | (val ^ vmcs12->cr0_read_shadow)) |
7395 | return 1; | 7398 | return true; |
7396 | break; | 7399 | break; |
7397 | case 3: | 7400 | case 3: |
7398 | if ((vmcs12->cr3_target_count >= 1 && | 7401 | if ((vmcs12->cr3_target_count >= 1 && |
@@ -7403,37 +7406,37 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, | |||
7403 | vmcs12->cr3_target_value2 == val) || | 7406 | vmcs12->cr3_target_value2 == val) || |
7404 | (vmcs12->cr3_target_count >= 4 && | 7407 | (vmcs12->cr3_target_count >= 4 && |
7405 | vmcs12->cr3_target_value3 == val)) | 7408 | vmcs12->cr3_target_value3 == val)) |
7406 | return 0; | 7409 | return false; |
7407 | if (nested_cpu_has(vmcs12, CPU_BASED_CR3_LOAD_EXITING)) | 7410 | if (nested_cpu_has(vmcs12, CPU_BASED_CR3_LOAD_EXITING)) |
7408 | return 1; | 7411 | return true; |
7409 | break; | 7412 | break; |
7410 | case 4: | 7413 | case 4: |
7411 | if (vmcs12->cr4_guest_host_mask & | 7414 | if (vmcs12->cr4_guest_host_mask & |
7412 | (vmcs12->cr4_read_shadow ^ val)) | 7415 | (vmcs12->cr4_read_shadow ^ val)) |
7413 | return 1; | 7416 | return true; |
7414 | break; | 7417 | break; |
7415 | case 8: | 7418 | case 8: |
7416 | if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING)) | 7419 | if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING)) |
7417 | return 1; | 7420 | return true; |
7418 | break; | 7421 | break; |
7419 | } | 7422 | } |
7420 | break; | 7423 | break; |
7421 | case 2: /* clts */ | 7424 | case 2: /* clts */ |
7422 | if ((vmcs12->cr0_guest_host_mask & X86_CR0_TS) && | 7425 | if ((vmcs12->cr0_guest_host_mask & X86_CR0_TS) && |
7423 | (vmcs12->cr0_read_shadow & X86_CR0_TS)) | 7426 | (vmcs12->cr0_read_shadow & X86_CR0_TS)) |
7424 | return 1; | 7427 | return true; |
7425 | break; | 7428 | break; |
7426 | case 1: /* mov from cr */ | 7429 | case 1: /* mov from cr */ |
7427 | switch (cr) { | 7430 | switch (cr) { |
7428 | case 3: | 7431 | case 3: |
7429 | if (vmcs12->cpu_based_vm_exec_control & | 7432 | if (vmcs12->cpu_based_vm_exec_control & |
7430 | CPU_BASED_CR3_STORE_EXITING) | 7433 | CPU_BASED_CR3_STORE_EXITING) |
7431 | return 1; | 7434 | return true; |
7432 | break; | 7435 | break; |
7433 | case 8: | 7436 | case 8: |
7434 | if (vmcs12->cpu_based_vm_exec_control & | 7437 | if (vmcs12->cpu_based_vm_exec_control & |
7435 | CPU_BASED_CR8_STORE_EXITING) | 7438 | CPU_BASED_CR8_STORE_EXITING) |
7436 | return 1; | 7439 | return true; |
7437 | break; | 7440 | break; |
7438 | } | 7441 | } |
7439 | break; | 7442 | break; |
@@ -7444,14 +7447,14 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, | |||
7444 | */ | 7447 | */ |
7445 | if (vmcs12->cr0_guest_host_mask & 0xe & | 7448 | if (vmcs12->cr0_guest_host_mask & 0xe & |
7446 | (val ^ vmcs12->cr0_read_shadow)) | 7449 | (val ^ vmcs12->cr0_read_shadow)) |
7447 | return 1; | 7450 | return true; |
7448 | if ((vmcs12->cr0_guest_host_mask & 0x1) && | 7451 | if ((vmcs12->cr0_guest_host_mask & 0x1) && |
7449 | !(vmcs12->cr0_read_shadow & 0x1) && | 7452 | !(vmcs12->cr0_read_shadow & 0x1) && |
7450 | (val & 0x1)) | 7453 | (val & 0x1)) |
7451 | return 1; | 7454 | return true; |
7452 | break; | 7455 | break; |
7453 | } | 7456 | } |
7454 | return 0; | 7457 | return false; |
7455 | } | 7458 | } |
7456 | 7459 | ||
7457 | /* | 7460 | /* |
@@ -7474,48 +7477,48 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
7474 | KVM_ISA_VMX); | 7477 | KVM_ISA_VMX); |
7475 | 7478 | ||
7476 | if (vmx->nested.nested_run_pending) | 7479 | if (vmx->nested.nested_run_pending) |
7477 | return 0; | 7480 | return false; |
7478 | 7481 | ||
7479 | if (unlikely(vmx->fail)) { | 7482 | if (unlikely(vmx->fail)) { |
7480 | pr_info_ratelimited("%s failed vm entry %x\n", __func__, | 7483 | pr_info_ratelimited("%s failed vm entry %x\n", __func__, |
7481 | vmcs_read32(VM_INSTRUCTION_ERROR)); | 7484 | vmcs_read32(VM_INSTRUCTION_ERROR)); |
7482 | return 1; | 7485 | return true; |
7483 | } | 7486 | } |
7484 | 7487 | ||
7485 | switch (exit_reason) { | 7488 | switch (exit_reason) { |
7486 | case EXIT_REASON_EXCEPTION_NMI: | 7489 | case EXIT_REASON_EXCEPTION_NMI: |
7487 | if (!is_exception(intr_info)) | 7490 | if (!is_exception(intr_info)) |
7488 | return 0; | 7491 | return false; |
7489 | else if (is_page_fault(intr_info)) | 7492 | else if (is_page_fault(intr_info)) |
7490 | return enable_ept; | 7493 | return enable_ept; |
7491 | else if (is_no_device(intr_info) && | 7494 | else if (is_no_device(intr_info) && |
7492 | !(vmcs12->guest_cr0 & X86_CR0_TS)) | 7495 | !(vmcs12->guest_cr0 & X86_CR0_TS)) |
7493 | return 0; | 7496 | return false; |
7494 | return vmcs12->exception_bitmap & | 7497 | return vmcs12->exception_bitmap & |
7495 | (1u << (intr_info & INTR_INFO_VECTOR_MASK)); | 7498 | (1u << (intr_info & INTR_INFO_VECTOR_MASK)); |
7496 | case EXIT_REASON_EXTERNAL_INTERRUPT: | 7499 | case EXIT_REASON_EXTERNAL_INTERRUPT: |
7497 | return 0; | 7500 | return false; |
7498 | case EXIT_REASON_TRIPLE_FAULT: | 7501 | case EXIT_REASON_TRIPLE_FAULT: |
7499 | return 1; | 7502 | return true; |
7500 | case EXIT_REASON_PENDING_INTERRUPT: | 7503 | case EXIT_REASON_PENDING_INTERRUPT: |
7501 | return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_INTR_PENDING); | 7504 | return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_INTR_PENDING); |
7502 | case EXIT_REASON_NMI_WINDOW: | 7505 | case EXIT_REASON_NMI_WINDOW: |
7503 | return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING); | 7506 | return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING); |
7504 | case EXIT_REASON_TASK_SWITCH: | 7507 | case EXIT_REASON_TASK_SWITCH: |
7505 | return 1; | 7508 | return true; |
7506 | case EXIT_REASON_CPUID: | 7509 | case EXIT_REASON_CPUID: |
7507 | if (kvm_register_read(vcpu, VCPU_REGS_RAX) == 0xa) | 7510 | if (kvm_register_read(vcpu, VCPU_REGS_RAX) == 0xa) |
7508 | return 0; | 7511 | return false; |
7509 | return 1; | 7512 | return true; |
7510 | case EXIT_REASON_HLT: | 7513 | case EXIT_REASON_HLT: |
7511 | return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING); | 7514 | return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING); |
7512 | case EXIT_REASON_INVD: | 7515 | case EXIT_REASON_INVD: |
7513 | return 1; | 7516 | return true; |
7514 | case EXIT_REASON_INVLPG: | 7517 | case EXIT_REASON_INVLPG: |
7515 | return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING); | 7518 | return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING); |
7516 | case EXIT_REASON_RDPMC: | 7519 | case EXIT_REASON_RDPMC: |
7517 | return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING); | 7520 | return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING); |
7518 | case EXIT_REASON_RDTSC: | 7521 | case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP: |
7519 | return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING); | 7522 | return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING); |
7520 | case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR: | 7523 | case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR: |
7521 | case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD: | 7524 | case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD: |
@@ -7527,7 +7530,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
7527 | * VMX instructions trap unconditionally. This allows L1 to | 7530 | * VMX instructions trap unconditionally. This allows L1 to |
7528 | * emulate them for its L2 guest, i.e., allows 3-level nesting! | 7531 | * emulate them for its L2 guest, i.e., allows 3-level nesting! |
7529 | */ | 7532 | */ |
7530 | return 1; | 7533 | return true; |
7531 | case EXIT_REASON_CR_ACCESS: | 7534 | case EXIT_REASON_CR_ACCESS: |
7532 | return nested_vmx_exit_handled_cr(vcpu, vmcs12); | 7535 | return nested_vmx_exit_handled_cr(vcpu, vmcs12); |
7533 | case EXIT_REASON_DR_ACCESS: | 7536 | case EXIT_REASON_DR_ACCESS: |
@@ -7538,7 +7541,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
7538 | case EXIT_REASON_MSR_WRITE: | 7541 | case EXIT_REASON_MSR_WRITE: |
7539 | return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason); | 7542 | return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason); |
7540 | case EXIT_REASON_INVALID_STATE: | 7543 | case EXIT_REASON_INVALID_STATE: |
7541 | return 1; | 7544 | return true; |
7542 | case EXIT_REASON_MWAIT_INSTRUCTION: | 7545 | case EXIT_REASON_MWAIT_INSTRUCTION: |
7543 | return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING); | 7546 | return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING); |
7544 | case EXIT_REASON_MONITOR_INSTRUCTION: | 7547 | case EXIT_REASON_MONITOR_INSTRUCTION: |
@@ -7548,7 +7551,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
7548 | nested_cpu_has2(vmcs12, | 7551 | nested_cpu_has2(vmcs12, |
7549 | SECONDARY_EXEC_PAUSE_LOOP_EXITING); | 7552 | SECONDARY_EXEC_PAUSE_LOOP_EXITING); |
7550 | case EXIT_REASON_MCE_DURING_VMENTRY: | 7553 | case EXIT_REASON_MCE_DURING_VMENTRY: |
7551 | return 0; | 7554 | return false; |
7552 | case EXIT_REASON_TPR_BELOW_THRESHOLD: | 7555 | case EXIT_REASON_TPR_BELOW_THRESHOLD: |
7553 | return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW); | 7556 | return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW); |
7554 | case EXIT_REASON_APIC_ACCESS: | 7557 | case EXIT_REASON_APIC_ACCESS: |
@@ -7557,7 +7560,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
7557 | case EXIT_REASON_APIC_WRITE: | 7560 | case EXIT_REASON_APIC_WRITE: |
7558 | case EXIT_REASON_EOI_INDUCED: | 7561 | case EXIT_REASON_EOI_INDUCED: |
7559 | /* apic_write and eoi_induced should exit unconditionally. */ | 7562 | /* apic_write and eoi_induced should exit unconditionally. */ |
7560 | return 1; | 7563 | return true; |
7561 | case EXIT_REASON_EPT_VIOLATION: | 7564 | case EXIT_REASON_EPT_VIOLATION: |
7562 | /* | 7565 | /* |
7563 | * L0 always deals with the EPT violation. If nested EPT is | 7566 | * L0 always deals with the EPT violation. If nested EPT is |
@@ -7565,7 +7568,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
7565 | * missing in the guest EPT table (EPT12), the EPT violation | 7568 | * missing in the guest EPT table (EPT12), the EPT violation |
7566 | * will be injected with nested_ept_inject_page_fault() | 7569 | * will be injected with nested_ept_inject_page_fault() |
7567 | */ | 7570 | */ |
7568 | return 0; | 7571 | return false; |
7569 | case EXIT_REASON_EPT_MISCONFIG: | 7572 | case EXIT_REASON_EPT_MISCONFIG: |
7570 | /* | 7573 | /* |
7571 | * L2 never uses directly L1's EPT, but rather L0's own EPT | 7574 | * L2 never uses directly L1's EPT, but rather L0's own EPT |
@@ -7573,11 +7576,11 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
7573 | * (EPT on EPT). So any problems with the structure of the | 7576 | * (EPT on EPT). So any problems with the structure of the |
7574 | * table is L0's fault. | 7577 | * table is L0's fault. |
7575 | */ | 7578 | */ |
7576 | return 0; | 7579 | return false; |
7577 | case EXIT_REASON_WBINVD: | 7580 | case EXIT_REASON_WBINVD: |
7578 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING); | 7581 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING); |
7579 | case EXIT_REASON_XSETBV: | 7582 | case EXIT_REASON_XSETBV: |
7580 | return 1; | 7583 | return true; |
7581 | case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS: | 7584 | case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS: |
7582 | /* | 7585 | /* |
7583 | * This should never happen, since it is not possible to | 7586 | * This should never happen, since it is not possible to |
@@ -7587,7 +7590,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
7587 | */ | 7590 | */ |
7588 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES); | 7591 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES); |
7589 | default: | 7592 | default: |
7590 | return 1; | 7593 | return true; |
7591 | } | 7594 | } |
7592 | } | 7595 | } |
7593 | 7596 | ||
@@ -8522,6 +8525,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) | |||
8522 | exec_control); | 8525 | exec_control); |
8523 | } | 8526 | } |
8524 | } | 8527 | } |
8528 | if (nested && !vmx->rdtscp_enabled) | ||
8529 | vmx->nested.nested_vmx_secondary_ctls_high &= | ||
8530 | ~SECONDARY_EXEC_RDTSCP; | ||
8525 | } | 8531 | } |
8526 | 8532 | ||
8527 | /* Exposing INVPCID only when PCID is exposed */ | 8533 | /* Exposing INVPCID only when PCID is exposed */ |
@@ -8622,10 +8628,11 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, | |||
8622 | struct vmcs12 *vmcs12) | 8628 | struct vmcs12 *vmcs12) |
8623 | { | 8629 | { |
8624 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 8630 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
8631 | int maxphyaddr = cpuid_maxphyaddr(vcpu); | ||
8625 | 8632 | ||
8626 | if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { | 8633 | if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { |
8627 | /* TODO: Also verify bits beyond physical address width are 0 */ | 8634 | if (!PAGE_ALIGNED(vmcs12->apic_access_addr) || |
8628 | if (!PAGE_ALIGNED(vmcs12->apic_access_addr)) | 8635 | vmcs12->apic_access_addr >> maxphyaddr) |
8629 | return false; | 8636 | return false; |
8630 | 8637 | ||
8631 | /* | 8638 | /* |
@@ -8641,8 +8648,8 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, | |||
8641 | } | 8648 | } |
8642 | 8649 | ||
8643 | if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { | 8650 | if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { |
8644 | /* TODO: Also verify bits beyond physical address width are 0 */ | 8651 | if (!PAGE_ALIGNED(vmcs12->virtual_apic_page_addr) || |
8645 | if (!PAGE_ALIGNED(vmcs12->virtual_apic_page_addr)) | 8652 | vmcs12->virtual_apic_page_addr >> maxphyaddr) |
8646 | return false; | 8653 | return false; |
8647 | 8654 | ||
8648 | if (vmx->nested.virtual_apic_page) /* shouldn't happen */ | 8655 | if (vmx->nested.virtual_apic_page) /* shouldn't happen */ |
@@ -8665,7 +8672,8 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, | |||
8665 | } | 8672 | } |
8666 | 8673 | ||
8667 | if (nested_cpu_has_posted_intr(vmcs12)) { | 8674 | if (nested_cpu_has_posted_intr(vmcs12)) { |
8668 | if (!IS_ALIGNED(vmcs12->posted_intr_desc_addr, 64)) | 8675 | if (!IS_ALIGNED(vmcs12->posted_intr_desc_addr, 64) || |
8676 | vmcs12->posted_intr_desc_addr >> maxphyaddr) | ||
8669 | return false; | 8677 | return false; |
8670 | 8678 | ||
8671 | if (vmx->nested.pi_desc_page) { /* shouldn't happen */ | 8679 | if (vmx->nested.pi_desc_page) { /* shouldn't happen */ |
@@ -8864,9 +8872,9 @@ static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu, | |||
8864 | 8872 | ||
8865 | static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu, | 8873 | static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu, |
8866 | unsigned long count_field, | 8874 | unsigned long count_field, |
8867 | unsigned long addr_field, | 8875 | unsigned long addr_field) |
8868 | int maxphyaddr) | ||
8869 | { | 8876 | { |
8877 | int maxphyaddr; | ||
8870 | u64 count, addr; | 8878 | u64 count, addr; |
8871 | 8879 | ||
8872 | if (vmcs12_read_any(vcpu, count_field, &count) || | 8880 | if (vmcs12_read_any(vcpu, count_field, &count) || |
@@ -8876,6 +8884,7 @@ static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu, | |||
8876 | } | 8884 | } |
8877 | if (count == 0) | 8885 | if (count == 0) |
8878 | return 0; | 8886 | return 0; |
8887 | maxphyaddr = cpuid_maxphyaddr(vcpu); | ||
8879 | if (!IS_ALIGNED(addr, 16) || addr >> maxphyaddr || | 8888 | if (!IS_ALIGNED(addr, 16) || addr >> maxphyaddr || |
8880 | (addr + count * sizeof(struct vmx_msr_entry) - 1) >> maxphyaddr) { | 8889 | (addr + count * sizeof(struct vmx_msr_entry) - 1) >> maxphyaddr) { |
8881 | pr_warn_ratelimited( | 8890 | pr_warn_ratelimited( |
@@ -8889,19 +8898,16 @@ static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu, | |||
8889 | static int nested_vmx_check_msr_switch_controls(struct kvm_vcpu *vcpu, | 8898 | static int nested_vmx_check_msr_switch_controls(struct kvm_vcpu *vcpu, |
8890 | struct vmcs12 *vmcs12) | 8899 | struct vmcs12 *vmcs12) |
8891 | { | 8900 | { |
8892 | int maxphyaddr; | ||
8893 | |||
8894 | if (vmcs12->vm_exit_msr_load_count == 0 && | 8901 | if (vmcs12->vm_exit_msr_load_count == 0 && |
8895 | vmcs12->vm_exit_msr_store_count == 0 && | 8902 | vmcs12->vm_exit_msr_store_count == 0 && |
8896 | vmcs12->vm_entry_msr_load_count == 0) | 8903 | vmcs12->vm_entry_msr_load_count == 0) |
8897 | return 0; /* Fast path */ | 8904 | return 0; /* Fast path */ |
8898 | maxphyaddr = cpuid_maxphyaddr(vcpu); | ||
8899 | if (nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_LOAD_COUNT, | 8905 | if (nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_LOAD_COUNT, |
8900 | VM_EXIT_MSR_LOAD_ADDR, maxphyaddr) || | 8906 | VM_EXIT_MSR_LOAD_ADDR) || |
8901 | nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_STORE_COUNT, | 8907 | nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_STORE_COUNT, |
8902 | VM_EXIT_MSR_STORE_ADDR, maxphyaddr) || | 8908 | VM_EXIT_MSR_STORE_ADDR) || |
8903 | nested_vmx_check_msr_switch(vcpu, VM_ENTRY_MSR_LOAD_COUNT, | 8909 | nested_vmx_check_msr_switch(vcpu, VM_ENTRY_MSR_LOAD_COUNT, |
8904 | VM_ENTRY_MSR_LOAD_ADDR, maxphyaddr)) | 8910 | VM_ENTRY_MSR_LOAD_ADDR)) |
8905 | return -EINVAL; | 8911 | return -EINVAL; |
8906 | return 0; | 8912 | return 0; |
8907 | } | 8913 | } |
@@ -9151,8 +9157,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
9151 | exec_control &= ~SECONDARY_EXEC_RDTSCP; | 9157 | exec_control &= ~SECONDARY_EXEC_RDTSCP; |
9152 | /* Take the following fields only from vmcs12 */ | 9158 | /* Take the following fields only from vmcs12 */ |
9153 | exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | | 9159 | exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | |
9160 | SECONDARY_EXEC_RDTSCP | | ||
9154 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | | 9161 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | |
9155 | SECONDARY_EXEC_APIC_REGISTER_VIRT); | 9162 | SECONDARY_EXEC_APIC_REGISTER_VIRT); |
9156 | if (nested_cpu_has(vmcs12, | 9163 | if (nested_cpu_has(vmcs12, |
9157 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) | 9164 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) |
9158 | exec_control |= vmcs12->secondary_vm_exec_control; | 9165 | exec_control |= vmcs12->secondary_vm_exec_control; |
@@ -9385,7 +9392,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
9385 | } | 9392 | } |
9386 | 9393 | ||
9387 | if (!nested_get_vmcs12_pages(vcpu, vmcs12)) { | 9394 | if (!nested_get_vmcs12_pages(vcpu, vmcs12)) { |
9388 | /*TODO: Also verify bits beyond physical address width are 0*/ | ||
9389 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); | 9395 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); |
9390 | return 1; | 9396 | return 1; |
9391 | } | 9397 | } |
@@ -9524,7 +9530,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
9524 | vmcs12->launch_state = 1; | 9530 | vmcs12->launch_state = 1; |
9525 | 9531 | ||
9526 | if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) | 9532 | if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) |
9527 | return kvm_emulate_halt(vcpu); | 9533 | return kvm_vcpu_halt(vcpu); |
9528 | 9534 | ||
9529 | vmx->nested.nested_run_pending = 1; | 9535 | vmx->nested.nested_run_pending = 1; |
9530 | 9536 | ||
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0ee725f1896d..e1a81267f3f6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -801,6 +801,17 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) | |||
801 | } | 801 | } |
802 | EXPORT_SYMBOL_GPL(kvm_get_cr8); | 802 | EXPORT_SYMBOL_GPL(kvm_get_cr8); |
803 | 803 | ||
804 | static void kvm_update_dr0123(struct kvm_vcpu *vcpu) | ||
805 | { | ||
806 | int i; | ||
807 | |||
808 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { | ||
809 | for (i = 0; i < KVM_NR_DB_REGS; i++) | ||
810 | vcpu->arch.eff_db[i] = vcpu->arch.db[i]; | ||
811 | vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD; | ||
812 | } | ||
813 | } | ||
814 | |||
804 | static void kvm_update_dr6(struct kvm_vcpu *vcpu) | 815 | static void kvm_update_dr6(struct kvm_vcpu *vcpu) |
805 | { | 816 | { |
806 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) | 817 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) |
@@ -3149,6 +3160,7 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, | |||
3149 | return -EINVAL; | 3160 | return -EINVAL; |
3150 | 3161 | ||
3151 | memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); | 3162 | memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); |
3163 | kvm_update_dr0123(vcpu); | ||
3152 | vcpu->arch.dr6 = dbgregs->dr6; | 3164 | vcpu->arch.dr6 = dbgregs->dr6; |
3153 | kvm_update_dr6(vcpu); | 3165 | kvm_update_dr6(vcpu); |
3154 | vcpu->arch.dr7 = dbgregs->dr7; | 3166 | vcpu->arch.dr7 = dbgregs->dr7; |
@@ -4114,8 +4126,8 @@ static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, | |||
4114 | do { | 4126 | do { |
4115 | n = min(len, 8); | 4127 | n = min(len, 8); |
4116 | if (!(vcpu->arch.apic && | 4128 | if (!(vcpu->arch.apic && |
4117 | !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v)) | 4129 | !kvm_iodevice_write(vcpu, &vcpu->arch.apic->dev, addr, n, v)) |
4118 | && kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v)) | 4130 | && kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, n, v)) |
4119 | break; | 4131 | break; |
4120 | handled += n; | 4132 | handled += n; |
4121 | addr += n; | 4133 | addr += n; |
@@ -4134,8 +4146,9 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) | |||
4134 | do { | 4146 | do { |
4135 | n = min(len, 8); | 4147 | n = min(len, 8); |
4136 | if (!(vcpu->arch.apic && | 4148 | if (!(vcpu->arch.apic && |
4137 | !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v)) | 4149 | !kvm_iodevice_read(vcpu, &vcpu->arch.apic->dev, |
4138 | && kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v)) | 4150 | addr, n, v)) |
4151 | && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v)) | ||
4139 | break; | 4152 | break; |
4140 | trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v); | 4153 | trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v); |
4141 | handled += n; | 4154 | handled += n; |
@@ -4475,7 +4488,8 @@ mmio: | |||
4475 | return X86EMUL_CONTINUE; | 4488 | return X86EMUL_CONTINUE; |
4476 | } | 4489 | } |
4477 | 4490 | ||
4478 | int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr, | 4491 | static int emulator_read_write(struct x86_emulate_ctxt *ctxt, |
4492 | unsigned long addr, | ||
4479 | void *val, unsigned int bytes, | 4493 | void *val, unsigned int bytes, |
4480 | struct x86_exception *exception, | 4494 | struct x86_exception *exception, |
4481 | const struct read_write_emulator_ops *ops) | 4495 | const struct read_write_emulator_ops *ops) |
@@ -4538,7 +4552,7 @@ static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt, | |||
4538 | exception, &read_emultor); | 4552 | exception, &read_emultor); |
4539 | } | 4553 | } |
4540 | 4554 | ||
4541 | int emulator_write_emulated(struct x86_emulate_ctxt *ctxt, | 4555 | static int emulator_write_emulated(struct x86_emulate_ctxt *ctxt, |
4542 | unsigned long addr, | 4556 | unsigned long addr, |
4543 | const void *val, | 4557 | const void *val, |
4544 | unsigned int bytes, | 4558 | unsigned int bytes, |
@@ -4629,10 +4643,10 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) | |||
4629 | int r; | 4643 | int r; |
4630 | 4644 | ||
4631 | if (vcpu->arch.pio.in) | 4645 | if (vcpu->arch.pio.in) |
4632 | r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port, | 4646 | r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port, |
4633 | vcpu->arch.pio.size, pd); | 4647 | vcpu->arch.pio.size, pd); |
4634 | else | 4648 | else |
4635 | r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, | 4649 | r = kvm_io_bus_write(vcpu, KVM_PIO_BUS, |
4636 | vcpu->arch.pio.port, vcpu->arch.pio.size, | 4650 | vcpu->arch.pio.port, vcpu->arch.pio.size, |
4637 | pd); | 4651 | pd); |
4638 | return r; | 4652 | return r; |
@@ -4705,7 +4719,7 @@ static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address) | |||
4705 | kvm_mmu_invlpg(emul_to_vcpu(ctxt), address); | 4719 | kvm_mmu_invlpg(emul_to_vcpu(ctxt), address); |
4706 | } | 4720 | } |
4707 | 4721 | ||
4708 | int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) | 4722 | int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu) |
4709 | { | 4723 | { |
4710 | if (!need_emulate_wbinvd(vcpu)) | 4724 | if (!need_emulate_wbinvd(vcpu)) |
4711 | return X86EMUL_CONTINUE; | 4725 | return X86EMUL_CONTINUE; |
@@ -4722,19 +4736,29 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) | |||
4722 | wbinvd(); | 4736 | wbinvd(); |
4723 | return X86EMUL_CONTINUE; | 4737 | return X86EMUL_CONTINUE; |
4724 | } | 4738 | } |
4739 | |||
4740 | int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) | ||
4741 | { | ||
4742 | kvm_x86_ops->skip_emulated_instruction(vcpu); | ||
4743 | return kvm_emulate_wbinvd_noskip(vcpu); | ||
4744 | } | ||
4725 | EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd); | 4745 | EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd); |
4726 | 4746 | ||
4747 | |||
4748 | |||
4727 | static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt) | 4749 | static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt) |
4728 | { | 4750 | { |
4729 | kvm_emulate_wbinvd(emul_to_vcpu(ctxt)); | 4751 | kvm_emulate_wbinvd_noskip(emul_to_vcpu(ctxt)); |
4730 | } | 4752 | } |
4731 | 4753 | ||
4732 | int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) | 4754 | static int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, |
4755 | unsigned long *dest) | ||
4733 | { | 4756 | { |
4734 | return kvm_get_dr(emul_to_vcpu(ctxt), dr, dest); | 4757 | return kvm_get_dr(emul_to_vcpu(ctxt), dr, dest); |
4735 | } | 4758 | } |
4736 | 4759 | ||
4737 | int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) | 4760 | static int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, |
4761 | unsigned long value) | ||
4738 | { | 4762 | { |
4739 | 4763 | ||
4740 | return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value); | 4764 | return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value); |
@@ -5816,7 +5840,7 @@ void kvm_arch_exit(void) | |||
5816 | free_percpu(shared_msrs); | 5840 | free_percpu(shared_msrs); |
5817 | } | 5841 | } |
5818 | 5842 | ||
5819 | int kvm_emulate_halt(struct kvm_vcpu *vcpu) | 5843 | int kvm_vcpu_halt(struct kvm_vcpu *vcpu) |
5820 | { | 5844 | { |
5821 | ++vcpu->stat.halt_exits; | 5845 | ++vcpu->stat.halt_exits; |
5822 | if (irqchip_in_kernel(vcpu->kvm)) { | 5846 | if (irqchip_in_kernel(vcpu->kvm)) { |
@@ -5827,6 +5851,13 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) | |||
5827 | return 0; | 5851 | return 0; |
5828 | } | 5852 | } |
5829 | } | 5853 | } |
5854 | EXPORT_SYMBOL_GPL(kvm_vcpu_halt); | ||
5855 | |||
5856 | int kvm_emulate_halt(struct kvm_vcpu *vcpu) | ||
5857 | { | ||
5858 | kvm_x86_ops->skip_emulated_instruction(vcpu); | ||
5859 | return kvm_vcpu_halt(vcpu); | ||
5860 | } | ||
5830 | EXPORT_SYMBOL_GPL(kvm_emulate_halt); | 5861 | EXPORT_SYMBOL_GPL(kvm_emulate_halt); |
5831 | 5862 | ||
5832 | int kvm_hv_hypercall(struct kvm_vcpu *vcpu) | 5863 | int kvm_hv_hypercall(struct kvm_vcpu *vcpu) |
@@ -5903,7 +5934,7 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) | |||
5903 | lapic_irq.dest_id = apicid; | 5934 | lapic_irq.dest_id = apicid; |
5904 | 5935 | ||
5905 | lapic_irq.delivery_mode = APIC_DM_REMRD; | 5936 | lapic_irq.delivery_mode = APIC_DM_REMRD; |
5906 | kvm_irq_delivery_to_apic(kvm, 0, &lapic_irq, NULL); | 5937 | kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL); |
5907 | } | 5938 | } |
5908 | 5939 | ||
5909 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) | 5940 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) |
@@ -5911,6 +5942,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) | |||
5911 | unsigned long nr, a0, a1, a2, a3, ret; | 5942 | unsigned long nr, a0, a1, a2, a3, ret; |
5912 | int op_64_bit, r = 1; | 5943 | int op_64_bit, r = 1; |
5913 | 5944 | ||
5945 | kvm_x86_ops->skip_emulated_instruction(vcpu); | ||
5946 | |||
5914 | if (kvm_hv_hypercall_enabled(vcpu->kvm)) | 5947 | if (kvm_hv_hypercall_enabled(vcpu->kvm)) |
5915 | return kvm_hv_hypercall(vcpu); | 5948 | return kvm_hv_hypercall(vcpu); |
5916 | 5949 | ||
@@ -6164,7 +6197,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, | |||
6164 | } | 6197 | } |
6165 | 6198 | ||
6166 | /* | 6199 | /* |
6167 | * Returns 1 to let __vcpu_run() continue the guest execution loop without | 6200 | * Returns 1 to let vcpu_run() continue the guest execution loop without |
6168 | * exiting to the userspace. Otherwise, the value will be returned to the | 6201 | * exiting to the userspace. Otherwise, the value will be returned to the |
6169 | * userspace. | 6202 | * userspace. |
6170 | */ | 6203 | */ |
@@ -6301,6 +6334,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
6301 | set_debugreg(vcpu->arch.eff_db[2], 2); | 6334 | set_debugreg(vcpu->arch.eff_db[2], 2); |
6302 | set_debugreg(vcpu->arch.eff_db[3], 3); | 6335 | set_debugreg(vcpu->arch.eff_db[3], 3); |
6303 | set_debugreg(vcpu->arch.dr6, 6); | 6336 | set_debugreg(vcpu->arch.dr6, 6); |
6337 | vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD; | ||
6304 | } | 6338 | } |
6305 | 6339 | ||
6306 | trace_kvm_entry(vcpu->vcpu_id); | 6340 | trace_kvm_entry(vcpu->vcpu_id); |
@@ -6382,42 +6416,47 @@ out: | |||
6382 | return r; | 6416 | return r; |
6383 | } | 6417 | } |
6384 | 6418 | ||
6419 | static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) | ||
6420 | { | ||
6421 | if (!kvm_arch_vcpu_runnable(vcpu)) { | ||
6422 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); | ||
6423 | kvm_vcpu_block(vcpu); | ||
6424 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); | ||
6425 | if (!kvm_check_request(KVM_REQ_UNHALT, vcpu)) | ||
6426 | return 1; | ||
6427 | } | ||
6428 | |||
6429 | kvm_apic_accept_events(vcpu); | ||
6430 | switch(vcpu->arch.mp_state) { | ||
6431 | case KVM_MP_STATE_HALTED: | ||
6432 | vcpu->arch.pv.pv_unhalted = false; | ||
6433 | vcpu->arch.mp_state = | ||
6434 | KVM_MP_STATE_RUNNABLE; | ||
6435 | case KVM_MP_STATE_RUNNABLE: | ||
6436 | vcpu->arch.apf.halted = false; | ||
6437 | break; | ||
6438 | case KVM_MP_STATE_INIT_RECEIVED: | ||
6439 | break; | ||
6440 | default: | ||
6441 | return -EINTR; | ||
6442 | break; | ||
6443 | } | ||
6444 | return 1; | ||
6445 | } | ||
6385 | 6446 | ||
6386 | static int __vcpu_run(struct kvm_vcpu *vcpu) | 6447 | static int vcpu_run(struct kvm_vcpu *vcpu) |
6387 | { | 6448 | { |
6388 | int r; | 6449 | int r; |
6389 | struct kvm *kvm = vcpu->kvm; | 6450 | struct kvm *kvm = vcpu->kvm; |
6390 | 6451 | ||
6391 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); | 6452 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); |
6392 | 6453 | ||
6393 | r = 1; | 6454 | for (;;) { |
6394 | while (r > 0) { | ||
6395 | if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && | 6455 | if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && |
6396 | !vcpu->arch.apf.halted) | 6456 | !vcpu->arch.apf.halted) |
6397 | r = vcpu_enter_guest(vcpu); | 6457 | r = vcpu_enter_guest(vcpu); |
6398 | else { | 6458 | else |
6399 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); | 6459 | r = vcpu_block(kvm, vcpu); |
6400 | kvm_vcpu_block(vcpu); | ||
6401 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); | ||
6402 | if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) { | ||
6403 | kvm_apic_accept_events(vcpu); | ||
6404 | switch(vcpu->arch.mp_state) { | ||
6405 | case KVM_MP_STATE_HALTED: | ||
6406 | vcpu->arch.pv.pv_unhalted = false; | ||
6407 | vcpu->arch.mp_state = | ||
6408 | KVM_MP_STATE_RUNNABLE; | ||
6409 | case KVM_MP_STATE_RUNNABLE: | ||
6410 | vcpu->arch.apf.halted = false; | ||
6411 | break; | ||
6412 | case KVM_MP_STATE_INIT_RECEIVED: | ||
6413 | break; | ||
6414 | default: | ||
6415 | r = -EINTR; | ||
6416 | break; | ||
6417 | } | ||
6418 | } | ||
6419 | } | ||
6420 | |||
6421 | if (r <= 0) | 6460 | if (r <= 0) |
6422 | break; | 6461 | break; |
6423 | 6462 | ||
@@ -6429,6 +6468,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
6429 | r = -EINTR; | 6468 | r = -EINTR; |
6430 | vcpu->run->exit_reason = KVM_EXIT_INTR; | 6469 | vcpu->run->exit_reason = KVM_EXIT_INTR; |
6431 | ++vcpu->stat.request_irq_exits; | 6470 | ++vcpu->stat.request_irq_exits; |
6471 | break; | ||
6432 | } | 6472 | } |
6433 | 6473 | ||
6434 | kvm_check_async_pf_completion(vcpu); | 6474 | kvm_check_async_pf_completion(vcpu); |
@@ -6437,6 +6477,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
6437 | r = -EINTR; | 6477 | r = -EINTR; |
6438 | vcpu->run->exit_reason = KVM_EXIT_INTR; | 6478 | vcpu->run->exit_reason = KVM_EXIT_INTR; |
6439 | ++vcpu->stat.signal_exits; | 6479 | ++vcpu->stat.signal_exits; |
6480 | break; | ||
6440 | } | 6481 | } |
6441 | if (need_resched()) { | 6482 | if (need_resched()) { |
6442 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); | 6483 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); |
@@ -6568,7 +6609,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
6568 | } else | 6609 | } else |
6569 | WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed); | 6610 | WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed); |
6570 | 6611 | ||
6571 | r = __vcpu_run(vcpu); | 6612 | r = vcpu_run(vcpu); |
6572 | 6613 | ||
6573 | out: | 6614 | out: |
6574 | post_kvm_run_save(vcpu); | 6615 | post_kvm_run_save(vcpu); |
@@ -7075,11 +7116,14 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu) | |||
7075 | kvm_clear_exception_queue(vcpu); | 7116 | kvm_clear_exception_queue(vcpu); |
7076 | 7117 | ||
7077 | memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); | 7118 | memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); |
7119 | kvm_update_dr0123(vcpu); | ||
7078 | vcpu->arch.dr6 = DR6_INIT; | 7120 | vcpu->arch.dr6 = DR6_INIT; |
7079 | kvm_update_dr6(vcpu); | 7121 | kvm_update_dr6(vcpu); |
7080 | vcpu->arch.dr7 = DR7_FIXED_1; | 7122 | vcpu->arch.dr7 = DR7_FIXED_1; |
7081 | kvm_update_dr7(vcpu); | 7123 | kvm_update_dr7(vcpu); |
7082 | 7124 | ||
7125 | vcpu->arch.cr2 = 0; | ||
7126 | |||
7083 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 7127 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
7084 | vcpu->arch.apf.msr_val = 0; | 7128 | vcpu->arch.apf.msr_val = 0; |
7085 | vcpu->arch.st.msr_val = 0; | 7129 | vcpu->arch.st.msr_val = 0; |
@@ -7240,7 +7284,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
7240 | 7284 | ||
7241 | vcpu->arch.pv.pv_unhalted = false; | 7285 | vcpu->arch.pv.pv_unhalted = false; |
7242 | vcpu->arch.emulate_ctxt.ops = &emulate_ops; | 7286 | vcpu->arch.emulate_ctxt.ops = &emulate_ops; |
7243 | if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu)) | 7287 | if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu)) |
7244 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | 7288 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
7245 | else | 7289 | else |
7246 | vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED; | 7290 | vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED; |
@@ -7288,6 +7332,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
7288 | vcpu->arch.guest_supported_xcr0 = 0; | 7332 | vcpu->arch.guest_supported_xcr0 = 0; |
7289 | vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; | 7333 | vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; |
7290 | 7334 | ||
7335 | vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); | ||
7336 | |||
7291 | kvm_async_pf_hash_reset(vcpu); | 7337 | kvm_async_pf_hash_reset(vcpu); |
7292 | kvm_pmu_init(vcpu); | 7338 | kvm_pmu_init(vcpu); |
7293 | 7339 | ||
@@ -7428,7 +7474,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, | |||
7428 | 7474 | ||
7429 | for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) { | 7475 | for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) { |
7430 | if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) { | 7476 | if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) { |
7431 | kvm_kvfree(free->arch.rmap[i]); | 7477 | kvfree(free->arch.rmap[i]); |
7432 | free->arch.rmap[i] = NULL; | 7478 | free->arch.rmap[i] = NULL; |
7433 | } | 7479 | } |
7434 | if (i == 0) | 7480 | if (i == 0) |
@@ -7436,7 +7482,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, | |||
7436 | 7482 | ||
7437 | if (!dont || free->arch.lpage_info[i - 1] != | 7483 | if (!dont || free->arch.lpage_info[i - 1] != |
7438 | dont->arch.lpage_info[i - 1]) { | 7484 | dont->arch.lpage_info[i - 1]) { |
7439 | kvm_kvfree(free->arch.lpage_info[i - 1]); | 7485 | kvfree(free->arch.lpage_info[i - 1]); |
7440 | free->arch.lpage_info[i - 1] = NULL; | 7486 | free->arch.lpage_info[i - 1] = NULL; |
7441 | } | 7487 | } |
7442 | } | 7488 | } |
@@ -7490,12 +7536,12 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, | |||
7490 | 7536 | ||
7491 | out_free: | 7537 | out_free: |
7492 | for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) { | 7538 | for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) { |
7493 | kvm_kvfree(slot->arch.rmap[i]); | 7539 | kvfree(slot->arch.rmap[i]); |
7494 | slot->arch.rmap[i] = NULL; | 7540 | slot->arch.rmap[i] = NULL; |
7495 | if (i == 0) | 7541 | if (i == 0) |
7496 | continue; | 7542 | continue; |
7497 | 7543 | ||
7498 | kvm_kvfree(slot->arch.lpage_info[i - 1]); | 7544 | kvfree(slot->arch.lpage_info[i - 1]); |
7499 | slot->arch.lpage_info[i - 1] = NULL; | 7545 | slot->arch.lpage_info[i - 1] = NULL; |
7500 | } | 7546 | } |
7501 | return -ENOMEM; | 7547 | return -ENOMEM; |
@@ -7618,6 +7664,23 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
7618 | new = id_to_memslot(kvm->memslots, mem->slot); | 7664 | new = id_to_memslot(kvm->memslots, mem->slot); |
7619 | 7665 | ||
7620 | /* | 7666 | /* |
7667 | * Dirty logging tracks sptes in 4k granularity, meaning that large | ||
7668 | * sptes have to be split. If live migration is successful, the guest | ||
7669 | * in the source machine will be destroyed and large sptes will be | ||
7670 | * created in the destination. However, if the guest continues to run | ||
7671 | * in the source machine (for example if live migration fails), small | ||
7672 | * sptes will remain around and cause bad performance. | ||
7673 | * | ||
7674 | * Scan sptes if dirty logging has been stopped, dropping those | ||
7675 | * which can be collapsed into a single large-page spte. Later | ||
7676 | * page faults will create the large-page sptes. | ||
7677 | */ | ||
7678 | if ((change != KVM_MR_DELETE) && | ||
7679 | (old->flags & KVM_MEM_LOG_DIRTY_PAGES) && | ||
7680 | !(new->flags & KVM_MEM_LOG_DIRTY_PAGES)) | ||
7681 | kvm_mmu_zap_collapsible_sptes(kvm, new); | ||
7682 | |||
7683 | /* | ||
7621 | * Set up write protection and/or dirty logging for the new slot. | 7684 | * Set up write protection and/or dirty logging for the new slot. |
7622 | * | 7685 | * |
7623 | * For KVM_MR_DELETE and KVM_MR_MOVE, the shadow pages of old slot have | 7686 | * For KVM_MR_DELETE and KVM_MR_MOVE, the shadow pages of old slot have |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index ac4453d8520e..717908b16037 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -868,7 +868,8 @@ static void __init lguest_init_IRQ(void) | |||
868 | /* Some systems map "vectors" to interrupts weirdly. Not us! */ | 868 | /* Some systems map "vectors" to interrupts weirdly. Not us! */ |
869 | __this_cpu_write(vector_irq[i], i - FIRST_EXTERNAL_VECTOR); | 869 | __this_cpu_write(vector_irq[i], i - FIRST_EXTERNAL_VECTOR); |
870 | if (i != SYSCALL_VECTOR) | 870 | if (i != SYSCALL_VECTOR) |
871 | set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]); | 871 | set_intr_gate(i, irq_entries_start + |
872 | 8 * (i - FIRST_EXTERNAL_VECTOR)); | ||
872 | } | 873 | } |
873 | 874 | ||
874 | /* | 875 | /* |
@@ -1076,6 +1077,7 @@ static void lguest_load_sp0(struct tss_struct *tss, | |||
1076 | { | 1077 | { |
1077 | lazy_hcall3(LHCALL_SET_STACK, __KERNEL_DS | 0x1, thread->sp0, | 1078 | lazy_hcall3(LHCALL_SET_STACK, __KERNEL_DS | 0x1, thread->sp0, |
1078 | THREAD_SIZE / PAGE_SIZE); | 1079 | THREAD_SIZE / PAGE_SIZE); |
1080 | tss->x86_tss.sp0 = thread->sp0; | ||
1079 | } | 1081 | } |
1080 | 1082 | ||
1081 | /* Let's just say, I wouldn't do debugging under a Guest. */ | 1083 | /* Let's just say, I wouldn't do debugging under a Guest. */ |
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S index f5cc9eb1d51b..082a85167a5b 100644 --- a/arch/x86/lib/atomic64_cx8_32.S +++ b/arch/x86/lib/atomic64_cx8_32.S | |||
@@ -13,16 +13,6 @@ | |||
13 | #include <asm/alternative-asm.h> | 13 | #include <asm/alternative-asm.h> |
14 | #include <asm/dwarf2.h> | 14 | #include <asm/dwarf2.h> |
15 | 15 | ||
16 | .macro SAVE reg | ||
17 | pushl_cfi %\reg | ||
18 | CFI_REL_OFFSET \reg, 0 | ||
19 | .endm | ||
20 | |||
21 | .macro RESTORE reg | ||
22 | popl_cfi %\reg | ||
23 | CFI_RESTORE \reg | ||
24 | .endm | ||
25 | |||
26 | .macro read64 reg | 16 | .macro read64 reg |
27 | movl %ebx, %eax | 17 | movl %ebx, %eax |
28 | movl %ecx, %edx | 18 | movl %ecx, %edx |
@@ -67,10 +57,10 @@ ENDPROC(atomic64_xchg_cx8) | |||
67 | .macro addsub_return func ins insc | 57 | .macro addsub_return func ins insc |
68 | ENTRY(atomic64_\func\()_return_cx8) | 58 | ENTRY(atomic64_\func\()_return_cx8) |
69 | CFI_STARTPROC | 59 | CFI_STARTPROC |
70 | SAVE ebp | 60 | pushl_cfi_reg ebp |
71 | SAVE ebx | 61 | pushl_cfi_reg ebx |
72 | SAVE esi | 62 | pushl_cfi_reg esi |
73 | SAVE edi | 63 | pushl_cfi_reg edi |
74 | 64 | ||
75 | movl %eax, %esi | 65 | movl %eax, %esi |
76 | movl %edx, %edi | 66 | movl %edx, %edi |
@@ -89,10 +79,10 @@ ENTRY(atomic64_\func\()_return_cx8) | |||
89 | 10: | 79 | 10: |
90 | movl %ebx, %eax | 80 | movl %ebx, %eax |
91 | movl %ecx, %edx | 81 | movl %ecx, %edx |
92 | RESTORE edi | 82 | popl_cfi_reg edi |
93 | RESTORE esi | 83 | popl_cfi_reg esi |
94 | RESTORE ebx | 84 | popl_cfi_reg ebx |
95 | RESTORE ebp | 85 | popl_cfi_reg ebp |
96 | ret | 86 | ret |
97 | CFI_ENDPROC | 87 | CFI_ENDPROC |
98 | ENDPROC(atomic64_\func\()_return_cx8) | 88 | ENDPROC(atomic64_\func\()_return_cx8) |
@@ -104,7 +94,7 @@ addsub_return sub sub sbb | |||
104 | .macro incdec_return func ins insc | 94 | .macro incdec_return func ins insc |
105 | ENTRY(atomic64_\func\()_return_cx8) | 95 | ENTRY(atomic64_\func\()_return_cx8) |
106 | CFI_STARTPROC | 96 | CFI_STARTPROC |
107 | SAVE ebx | 97 | pushl_cfi_reg ebx |
108 | 98 | ||
109 | read64 %esi | 99 | read64 %esi |
110 | 1: | 100 | 1: |
@@ -119,7 +109,7 @@ ENTRY(atomic64_\func\()_return_cx8) | |||
119 | 10: | 109 | 10: |
120 | movl %ebx, %eax | 110 | movl %ebx, %eax |
121 | movl %ecx, %edx | 111 | movl %ecx, %edx |
122 | RESTORE ebx | 112 | popl_cfi_reg ebx |
123 | ret | 113 | ret |
124 | CFI_ENDPROC | 114 | CFI_ENDPROC |
125 | ENDPROC(atomic64_\func\()_return_cx8) | 115 | ENDPROC(atomic64_\func\()_return_cx8) |
@@ -130,7 +120,7 @@ incdec_return dec sub sbb | |||
130 | 120 | ||
131 | ENTRY(atomic64_dec_if_positive_cx8) | 121 | ENTRY(atomic64_dec_if_positive_cx8) |
132 | CFI_STARTPROC | 122 | CFI_STARTPROC |
133 | SAVE ebx | 123 | pushl_cfi_reg ebx |
134 | 124 | ||
135 | read64 %esi | 125 | read64 %esi |
136 | 1: | 126 | 1: |
@@ -146,18 +136,18 @@ ENTRY(atomic64_dec_if_positive_cx8) | |||
146 | 2: | 136 | 2: |
147 | movl %ebx, %eax | 137 | movl %ebx, %eax |
148 | movl %ecx, %edx | 138 | movl %ecx, %edx |
149 | RESTORE ebx | 139 | popl_cfi_reg ebx |
150 | ret | 140 | ret |
151 | CFI_ENDPROC | 141 | CFI_ENDPROC |
152 | ENDPROC(atomic64_dec_if_positive_cx8) | 142 | ENDPROC(atomic64_dec_if_positive_cx8) |
153 | 143 | ||
154 | ENTRY(atomic64_add_unless_cx8) | 144 | ENTRY(atomic64_add_unless_cx8) |
155 | CFI_STARTPROC | 145 | CFI_STARTPROC |
156 | SAVE ebp | 146 | pushl_cfi_reg ebp |
157 | SAVE ebx | 147 | pushl_cfi_reg ebx |
158 | /* these just push these two parameters on the stack */ | 148 | /* these just push these two parameters on the stack */ |
159 | SAVE edi | 149 | pushl_cfi_reg edi |
160 | SAVE ecx | 150 | pushl_cfi_reg ecx |
161 | 151 | ||
162 | movl %eax, %ebp | 152 | movl %eax, %ebp |
163 | movl %edx, %edi | 153 | movl %edx, %edi |
@@ -179,8 +169,8 @@ ENTRY(atomic64_add_unless_cx8) | |||
179 | 3: | 169 | 3: |
180 | addl $8, %esp | 170 | addl $8, %esp |
181 | CFI_ADJUST_CFA_OFFSET -8 | 171 | CFI_ADJUST_CFA_OFFSET -8 |
182 | RESTORE ebx | 172 | popl_cfi_reg ebx |
183 | RESTORE ebp | 173 | popl_cfi_reg ebp |
184 | ret | 174 | ret |
185 | 4: | 175 | 4: |
186 | cmpl %edx, 4(%esp) | 176 | cmpl %edx, 4(%esp) |
@@ -192,7 +182,7 @@ ENDPROC(atomic64_add_unless_cx8) | |||
192 | 182 | ||
193 | ENTRY(atomic64_inc_not_zero_cx8) | 183 | ENTRY(atomic64_inc_not_zero_cx8) |
194 | CFI_STARTPROC | 184 | CFI_STARTPROC |
195 | SAVE ebx | 185 | pushl_cfi_reg ebx |
196 | 186 | ||
197 | read64 %esi | 187 | read64 %esi |
198 | 1: | 188 | 1: |
@@ -209,7 +199,7 @@ ENTRY(atomic64_inc_not_zero_cx8) | |||
209 | 199 | ||
210 | movl $1, %eax | 200 | movl $1, %eax |
211 | 3: | 201 | 3: |
212 | RESTORE ebx | 202 | popl_cfi_reg ebx |
213 | ret | 203 | ret |
214 | CFI_ENDPROC | 204 | CFI_ENDPROC |
215 | ENDPROC(atomic64_inc_not_zero_cx8) | 205 | ENDPROC(atomic64_inc_not_zero_cx8) |
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S index e78b8eee6615..9bc944a91274 100644 --- a/arch/x86/lib/checksum_32.S +++ b/arch/x86/lib/checksum_32.S | |||
@@ -51,10 +51,8 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) | |||
51 | */ | 51 | */ |
52 | ENTRY(csum_partial) | 52 | ENTRY(csum_partial) |
53 | CFI_STARTPROC | 53 | CFI_STARTPROC |
54 | pushl_cfi %esi | 54 | pushl_cfi_reg esi |
55 | CFI_REL_OFFSET esi, 0 | 55 | pushl_cfi_reg ebx |
56 | pushl_cfi %ebx | ||
57 | CFI_REL_OFFSET ebx, 0 | ||
58 | movl 20(%esp),%eax # Function arg: unsigned int sum | 56 | movl 20(%esp),%eax # Function arg: unsigned int sum |
59 | movl 16(%esp),%ecx # Function arg: int len | 57 | movl 16(%esp),%ecx # Function arg: int len |
60 | movl 12(%esp),%esi # Function arg: unsigned char *buff | 58 | movl 12(%esp),%esi # Function arg: unsigned char *buff |
@@ -127,14 +125,12 @@ ENTRY(csum_partial) | |||
127 | 6: addl %ecx,%eax | 125 | 6: addl %ecx,%eax |
128 | adcl $0, %eax | 126 | adcl $0, %eax |
129 | 7: | 127 | 7: |
130 | testl $1, 12(%esp) | 128 | testb $1, 12(%esp) |
131 | jz 8f | 129 | jz 8f |
132 | roll $8, %eax | 130 | roll $8, %eax |
133 | 8: | 131 | 8: |
134 | popl_cfi %ebx | 132 | popl_cfi_reg ebx |
135 | CFI_RESTORE ebx | 133 | popl_cfi_reg esi |
136 | popl_cfi %esi | ||
137 | CFI_RESTORE esi | ||
138 | ret | 134 | ret |
139 | CFI_ENDPROC | 135 | CFI_ENDPROC |
140 | ENDPROC(csum_partial) | 136 | ENDPROC(csum_partial) |
@@ -145,10 +141,8 @@ ENDPROC(csum_partial) | |||
145 | 141 | ||
146 | ENTRY(csum_partial) | 142 | ENTRY(csum_partial) |
147 | CFI_STARTPROC | 143 | CFI_STARTPROC |
148 | pushl_cfi %esi | 144 | pushl_cfi_reg esi |
149 | CFI_REL_OFFSET esi, 0 | 145 | pushl_cfi_reg ebx |
150 | pushl_cfi %ebx | ||
151 | CFI_REL_OFFSET ebx, 0 | ||
152 | movl 20(%esp),%eax # Function arg: unsigned int sum | 146 | movl 20(%esp),%eax # Function arg: unsigned int sum |
153 | movl 16(%esp),%ecx # Function arg: int len | 147 | movl 16(%esp),%ecx # Function arg: int len |
154 | movl 12(%esp),%esi # Function arg: const unsigned char *buf | 148 | movl 12(%esp),%esi # Function arg: const unsigned char *buf |
@@ -251,14 +245,12 @@ ENTRY(csum_partial) | |||
251 | addl %ebx,%eax | 245 | addl %ebx,%eax |
252 | adcl $0,%eax | 246 | adcl $0,%eax |
253 | 80: | 247 | 80: |
254 | testl $1, 12(%esp) | 248 | testb $1, 12(%esp) |
255 | jz 90f | 249 | jz 90f |
256 | roll $8, %eax | 250 | roll $8, %eax |
257 | 90: | 251 | 90: |
258 | popl_cfi %ebx | 252 | popl_cfi_reg ebx |
259 | CFI_RESTORE ebx | 253 | popl_cfi_reg esi |
260 | popl_cfi %esi | ||
261 | CFI_RESTORE esi | ||
262 | ret | 254 | ret |
263 | CFI_ENDPROC | 255 | CFI_ENDPROC |
264 | ENDPROC(csum_partial) | 256 | ENDPROC(csum_partial) |
@@ -298,12 +290,9 @@ ENTRY(csum_partial_copy_generic) | |||
298 | CFI_STARTPROC | 290 | CFI_STARTPROC |
299 | subl $4,%esp | 291 | subl $4,%esp |
300 | CFI_ADJUST_CFA_OFFSET 4 | 292 | CFI_ADJUST_CFA_OFFSET 4 |
301 | pushl_cfi %edi | 293 | pushl_cfi_reg edi |
302 | CFI_REL_OFFSET edi, 0 | 294 | pushl_cfi_reg esi |
303 | pushl_cfi %esi | 295 | pushl_cfi_reg ebx |
304 | CFI_REL_OFFSET esi, 0 | ||
305 | pushl_cfi %ebx | ||
306 | CFI_REL_OFFSET ebx, 0 | ||
307 | movl ARGBASE+16(%esp),%eax # sum | 296 | movl ARGBASE+16(%esp),%eax # sum |
308 | movl ARGBASE+12(%esp),%ecx # len | 297 | movl ARGBASE+12(%esp),%ecx # len |
309 | movl ARGBASE+4(%esp),%esi # src | 298 | movl ARGBASE+4(%esp),%esi # src |
@@ -412,12 +401,9 @@ DST( movb %cl, (%edi) ) | |||
412 | 401 | ||
413 | .previous | 402 | .previous |
414 | 403 | ||
415 | popl_cfi %ebx | 404 | popl_cfi_reg ebx |
416 | CFI_RESTORE ebx | 405 | popl_cfi_reg esi |
417 | popl_cfi %esi | 406 | popl_cfi_reg edi |
418 | CFI_RESTORE esi | ||
419 | popl_cfi %edi | ||
420 | CFI_RESTORE edi | ||
421 | popl_cfi %ecx # equivalent to addl $4,%esp | 407 | popl_cfi %ecx # equivalent to addl $4,%esp |
422 | ret | 408 | ret |
423 | CFI_ENDPROC | 409 | CFI_ENDPROC |
@@ -441,12 +427,9 @@ ENDPROC(csum_partial_copy_generic) | |||
441 | 427 | ||
442 | ENTRY(csum_partial_copy_generic) | 428 | ENTRY(csum_partial_copy_generic) |
443 | CFI_STARTPROC | 429 | CFI_STARTPROC |
444 | pushl_cfi %ebx | 430 | pushl_cfi_reg ebx |
445 | CFI_REL_OFFSET ebx, 0 | 431 | pushl_cfi_reg edi |
446 | pushl_cfi %edi | 432 | pushl_cfi_reg esi |
447 | CFI_REL_OFFSET edi, 0 | ||
448 | pushl_cfi %esi | ||
449 | CFI_REL_OFFSET esi, 0 | ||
450 | movl ARGBASE+4(%esp),%esi #src | 433 | movl ARGBASE+4(%esp),%esi #src |
451 | movl ARGBASE+8(%esp),%edi #dst | 434 | movl ARGBASE+8(%esp),%edi #dst |
452 | movl ARGBASE+12(%esp),%ecx #len | 435 | movl ARGBASE+12(%esp),%ecx #len |
@@ -506,12 +489,9 @@ DST( movb %dl, (%edi) ) | |||
506 | jmp 7b | 489 | jmp 7b |
507 | .previous | 490 | .previous |
508 | 491 | ||
509 | popl_cfi %esi | 492 | popl_cfi_reg esi |
510 | CFI_RESTORE esi | 493 | popl_cfi_reg edi |
511 | popl_cfi %edi | 494 | popl_cfi_reg ebx |
512 | CFI_RESTORE edi | ||
513 | popl_cfi %ebx | ||
514 | CFI_RESTORE ebx | ||
515 | ret | 495 | ret |
516 | CFI_ENDPROC | 496 | CFI_ENDPROC |
517 | ENDPROC(csum_partial_copy_generic) | 497 | ENDPROC(csum_partial_copy_generic) |
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S index f2145cfa12a6..e67e579c93bd 100644 --- a/arch/x86/lib/clear_page_64.S +++ b/arch/x86/lib/clear_page_64.S | |||
@@ -1,31 +1,35 @@ | |||
1 | #include <linux/linkage.h> | 1 | #include <linux/linkage.h> |
2 | #include <asm/dwarf2.h> | 2 | #include <asm/dwarf2.h> |
3 | #include <asm/cpufeature.h> | ||
3 | #include <asm/alternative-asm.h> | 4 | #include <asm/alternative-asm.h> |
4 | 5 | ||
5 | /* | 6 | /* |
6 | * Zero a page. | 7 | * Most CPUs support enhanced REP MOVSB/STOSB instructions. It is |
7 | * rdi page | 8 | * recommended to use this when possible and we do use them by default. |
8 | */ | 9 | * If enhanced REP MOVSB/STOSB is not available, try to use fast string. |
9 | ENTRY(clear_page_c) | 10 | * Otherwise, use original. |
11 | */ | ||
12 | |||
13 | /* | ||
14 | * Zero a page. | ||
15 | * %rdi - page | ||
16 | */ | ||
17 | ENTRY(clear_page) | ||
10 | CFI_STARTPROC | 18 | CFI_STARTPROC |
19 | |||
20 | ALTERNATIVE_2 "jmp clear_page_orig", "", X86_FEATURE_REP_GOOD, \ | ||
21 | "jmp clear_page_c_e", X86_FEATURE_ERMS | ||
22 | |||
11 | movl $4096/8,%ecx | 23 | movl $4096/8,%ecx |
12 | xorl %eax,%eax | 24 | xorl %eax,%eax |
13 | rep stosq | 25 | rep stosq |
14 | ret | 26 | ret |
15 | CFI_ENDPROC | 27 | CFI_ENDPROC |
16 | ENDPROC(clear_page_c) | 28 | ENDPROC(clear_page) |
17 | 29 | ||
18 | ENTRY(clear_page_c_e) | 30 | ENTRY(clear_page_orig) |
19 | CFI_STARTPROC | 31 | CFI_STARTPROC |
20 | movl $4096,%ecx | ||
21 | xorl %eax,%eax | ||
22 | rep stosb | ||
23 | ret | ||
24 | CFI_ENDPROC | ||
25 | ENDPROC(clear_page_c_e) | ||
26 | 32 | ||
27 | ENTRY(clear_page) | ||
28 | CFI_STARTPROC | ||
29 | xorl %eax,%eax | 33 | xorl %eax,%eax |
30 | movl $4096/64,%ecx | 34 | movl $4096/64,%ecx |
31 | .p2align 4 | 35 | .p2align 4 |
@@ -45,29 +49,13 @@ ENTRY(clear_page) | |||
45 | nop | 49 | nop |
46 | ret | 50 | ret |
47 | CFI_ENDPROC | 51 | CFI_ENDPROC |
48 | .Lclear_page_end: | 52 | ENDPROC(clear_page_orig) |
49 | ENDPROC(clear_page) | ||
50 | |||
51 | /* | ||
52 | * Some CPUs support enhanced REP MOVSB/STOSB instructions. | ||
53 | * It is recommended to use this when possible. | ||
54 | * If enhanced REP MOVSB/STOSB is not available, try to use fast string. | ||
55 | * Otherwise, use original function. | ||
56 | * | ||
57 | */ | ||
58 | 53 | ||
59 | #include <asm/cpufeature.h> | 54 | ENTRY(clear_page_c_e) |
60 | 55 | CFI_STARTPROC | |
61 | .section .altinstr_replacement,"ax" | 56 | movl $4096,%ecx |
62 | 1: .byte 0xeb /* jmp <disp8> */ | 57 | xorl %eax,%eax |
63 | .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */ | 58 | rep stosb |
64 | 2: .byte 0xeb /* jmp <disp8> */ | 59 | ret |
65 | .byte (clear_page_c_e - clear_page) - (3f - 2b) /* offset */ | 60 | CFI_ENDPROC |
66 | 3: | 61 | ENDPROC(clear_page_c_e) |
67 | .previous | ||
68 | .section .altinstructions,"a" | ||
69 | altinstruction_entry clear_page,1b,X86_FEATURE_REP_GOOD,\ | ||
70 | .Lclear_page_end-clear_page, 2b-1b | ||
71 | altinstruction_entry clear_page,2b,X86_FEATURE_ERMS, \ | ||
72 | .Lclear_page_end-clear_page,3b-2b | ||
73 | .previous | ||
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S index 176cca67212b..8239dbcbf984 100644 --- a/arch/x86/lib/copy_page_64.S +++ b/arch/x86/lib/copy_page_64.S | |||
@@ -2,23 +2,26 @@ | |||
2 | 2 | ||
3 | #include <linux/linkage.h> | 3 | #include <linux/linkage.h> |
4 | #include <asm/dwarf2.h> | 4 | #include <asm/dwarf2.h> |
5 | #include <asm/cpufeature.h> | ||
5 | #include <asm/alternative-asm.h> | 6 | #include <asm/alternative-asm.h> |
6 | 7 | ||
8 | /* | ||
9 | * Some CPUs run faster using the string copy instructions (sane microcode). | ||
10 | * It is also a lot simpler. Use this when possible. But, don't use streaming | ||
11 | * copy unless the CPU indicates X86_FEATURE_REP_GOOD. Could vary the | ||
12 | * prefetch distance based on SMP/UP. | ||
13 | */ | ||
7 | ALIGN | 14 | ALIGN |
8 | copy_page_rep: | 15 | ENTRY(copy_page) |
9 | CFI_STARTPROC | 16 | CFI_STARTPROC |
17 | ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD | ||
10 | movl $4096/8, %ecx | 18 | movl $4096/8, %ecx |
11 | rep movsq | 19 | rep movsq |
12 | ret | 20 | ret |
13 | CFI_ENDPROC | 21 | CFI_ENDPROC |
14 | ENDPROC(copy_page_rep) | 22 | ENDPROC(copy_page) |
15 | |||
16 | /* | ||
17 | * Don't use streaming copy unless the CPU indicates X86_FEATURE_REP_GOOD. | ||
18 | * Could vary the prefetch distance based on SMP/UP. | ||
19 | */ | ||
20 | 23 | ||
21 | ENTRY(copy_page) | 24 | ENTRY(copy_page_regs) |
22 | CFI_STARTPROC | 25 | CFI_STARTPROC |
23 | subq $2*8, %rsp | 26 | subq $2*8, %rsp |
24 | CFI_ADJUST_CFA_OFFSET 2*8 | 27 | CFI_ADJUST_CFA_OFFSET 2*8 |
@@ -90,21 +93,5 @@ ENTRY(copy_page) | |||
90 | addq $2*8, %rsp | 93 | addq $2*8, %rsp |
91 | CFI_ADJUST_CFA_OFFSET -2*8 | 94 | CFI_ADJUST_CFA_OFFSET -2*8 |
92 | ret | 95 | ret |
93 | .Lcopy_page_end: | ||
94 | CFI_ENDPROC | 96 | CFI_ENDPROC |
95 | ENDPROC(copy_page) | 97 | ENDPROC(copy_page_regs) |
96 | |||
97 | /* Some CPUs run faster using the string copy instructions. | ||
98 | It is also a lot simpler. Use this when possible */ | ||
99 | |||
100 | #include <asm/cpufeature.h> | ||
101 | |||
102 | .section .altinstr_replacement,"ax" | ||
103 | 1: .byte 0xeb /* jmp <disp8> */ | ||
104 | .byte (copy_page_rep - copy_page) - (2f - 1b) /* offset */ | ||
105 | 2: | ||
106 | .previous | ||
107 | .section .altinstructions,"a" | ||
108 | altinstruction_entry copy_page, 1b, X86_FEATURE_REP_GOOD, \ | ||
109 | .Lcopy_page_end-copy_page, 2b-1b | ||
110 | .previous | ||
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index dee945d55594..fa997dfaef24 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S | |||
@@ -8,9 +8,6 @@ | |||
8 | 8 | ||
9 | #include <linux/linkage.h> | 9 | #include <linux/linkage.h> |
10 | #include <asm/dwarf2.h> | 10 | #include <asm/dwarf2.h> |
11 | |||
12 | #define FIX_ALIGNMENT 1 | ||
13 | |||
14 | #include <asm/current.h> | 11 | #include <asm/current.h> |
15 | #include <asm/asm-offsets.h> | 12 | #include <asm/asm-offsets.h> |
16 | #include <asm/thread_info.h> | 13 | #include <asm/thread_info.h> |
@@ -19,33 +16,7 @@ | |||
19 | #include <asm/asm.h> | 16 | #include <asm/asm.h> |
20 | #include <asm/smap.h> | 17 | #include <asm/smap.h> |
21 | 18 | ||
22 | /* | ||
23 | * By placing feature2 after feature1 in altinstructions section, we logically | ||
24 | * implement: | ||
25 | * If CPU has feature2, jmp to alt2 is used | ||
26 | * else if CPU has feature1, jmp to alt1 is used | ||
27 | * else jmp to orig is used. | ||
28 | */ | ||
29 | .macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2 | ||
30 | 0: | ||
31 | .byte 0xe9 /* 32bit jump */ | ||
32 | .long \orig-1f /* by default jump to orig */ | ||
33 | 1: | ||
34 | .section .altinstr_replacement,"ax" | ||
35 | 2: .byte 0xe9 /* near jump with 32bit immediate */ | ||
36 | .long \alt1-1b /* offset */ /* or alternatively to alt1 */ | ||
37 | 3: .byte 0xe9 /* near jump with 32bit immediate */ | ||
38 | .long \alt2-1b /* offset */ /* or alternatively to alt2 */ | ||
39 | .previous | ||
40 | |||
41 | .section .altinstructions,"a" | ||
42 | altinstruction_entry 0b,2b,\feature1,5,5 | ||
43 | altinstruction_entry 0b,3b,\feature2,5,5 | ||
44 | .previous | ||
45 | .endm | ||
46 | |||
47 | .macro ALIGN_DESTINATION | 19 | .macro ALIGN_DESTINATION |
48 | #ifdef FIX_ALIGNMENT | ||
49 | /* check for bad alignment of destination */ | 20 | /* check for bad alignment of destination */ |
50 | movl %edi,%ecx | 21 | movl %edi,%ecx |
51 | andl $7,%ecx | 22 | andl $7,%ecx |
@@ -67,7 +38,6 @@ | |||
67 | 38 | ||
68 | _ASM_EXTABLE(100b,103b) | 39 | _ASM_EXTABLE(100b,103b) |
69 | _ASM_EXTABLE(101b,103b) | 40 | _ASM_EXTABLE(101b,103b) |
70 | #endif | ||
71 | .endm | 41 | .endm |
72 | 42 | ||
73 | /* Standard copy_to_user with segment limit checking */ | 43 | /* Standard copy_to_user with segment limit checking */ |
@@ -79,9 +49,11 @@ ENTRY(_copy_to_user) | |||
79 | jc bad_to_user | 49 | jc bad_to_user |
80 | cmpq TI_addr_limit(%rax),%rcx | 50 | cmpq TI_addr_limit(%rax),%rcx |
81 | ja bad_to_user | 51 | ja bad_to_user |
82 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \ | 52 | ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \ |
83 | copy_user_generic_unrolled,copy_user_generic_string, \ | 53 | "jmp copy_user_generic_string", \ |
84 | copy_user_enhanced_fast_string | 54 | X86_FEATURE_REP_GOOD, \ |
55 | "jmp copy_user_enhanced_fast_string", \ | ||
56 | X86_FEATURE_ERMS | ||
85 | CFI_ENDPROC | 57 | CFI_ENDPROC |
86 | ENDPROC(_copy_to_user) | 58 | ENDPROC(_copy_to_user) |
87 | 59 | ||
@@ -94,9 +66,11 @@ ENTRY(_copy_from_user) | |||
94 | jc bad_from_user | 66 | jc bad_from_user |
95 | cmpq TI_addr_limit(%rax),%rcx | 67 | cmpq TI_addr_limit(%rax),%rcx |
96 | ja bad_from_user | 68 | ja bad_from_user |
97 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \ | 69 | ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \ |
98 | copy_user_generic_unrolled,copy_user_generic_string, \ | 70 | "jmp copy_user_generic_string", \ |
99 | copy_user_enhanced_fast_string | 71 | X86_FEATURE_REP_GOOD, \ |
72 | "jmp copy_user_enhanced_fast_string", \ | ||
73 | X86_FEATURE_ERMS | ||
100 | CFI_ENDPROC | 74 | CFI_ENDPROC |
101 | ENDPROC(_copy_from_user) | 75 | ENDPROC(_copy_from_user) |
102 | 76 | ||
diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S index 2419d5fefae3..9734182966f3 100644 --- a/arch/x86/lib/csum-copy_64.S +++ b/arch/x86/lib/csum-copy_64.S | |||
@@ -196,7 +196,7 @@ ENTRY(csum_partial_copy_generic) | |||
196 | 196 | ||
197 | /* handle last odd byte */ | 197 | /* handle last odd byte */ |
198 | .Lhandle_1: | 198 | .Lhandle_1: |
199 | testl $1, %r10d | 199 | testb $1, %r10b |
200 | jz .Lende | 200 | jz .Lende |
201 | xorl %ebx, %ebx | 201 | xorl %ebx, %ebx |
202 | source | 202 | source |
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 1313ae6b478b..8f72b334aea0 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c | |||
@@ -52,6 +52,13 @@ | |||
52 | */ | 52 | */ |
53 | void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64) | 53 | void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64) |
54 | { | 54 | { |
55 | /* | ||
56 | * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid | ||
57 | * even if the input buffer is long enough to hold them. | ||
58 | */ | ||
59 | if (buf_len > MAX_INSN_SIZE) | ||
60 | buf_len = MAX_INSN_SIZE; | ||
61 | |||
55 | memset(insn, 0, sizeof(*insn)); | 62 | memset(insn, 0, sizeof(*insn)); |
56 | insn->kaddr = kaddr; | 63 | insn->kaddr = kaddr; |
57 | insn->end_kaddr = kaddr + buf_len; | 64 | insn->end_kaddr = kaddr + buf_len; |
@@ -164,6 +171,12 @@ found: | |||
164 | /* VEX.W overrides opnd_size */ | 171 | /* VEX.W overrides opnd_size */ |
165 | insn->opnd_bytes = 8; | 172 | insn->opnd_bytes = 8; |
166 | } else { | 173 | } else { |
174 | /* | ||
175 | * For VEX2, fake VEX3-like byte#2. | ||
176 | * Makes it easier to decode vex.W, vex.vvvv, | ||
177 | * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0. | ||
178 | */ | ||
179 | insn->vex_prefix.bytes[2] = b2 & 0x7f; | ||
167 | insn->vex_prefix.nbytes = 2; | 180 | insn->vex_prefix.nbytes = 2; |
168 | insn->next_byte += 2; | 181 | insn->next_byte += 2; |
169 | } | 182 | } |
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 89b53c9968e7..b046664f5a1c 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S | |||
@@ -1,12 +1,20 @@ | |||
1 | /* Copyright 2002 Andi Kleen */ | 1 | /* Copyright 2002 Andi Kleen */ |
2 | 2 | ||
3 | #include <linux/linkage.h> | 3 | #include <linux/linkage.h> |
4 | |||
5 | #include <asm/cpufeature.h> | 4 | #include <asm/cpufeature.h> |
6 | #include <asm/dwarf2.h> | 5 | #include <asm/dwarf2.h> |
7 | #include <asm/alternative-asm.h> | 6 | #include <asm/alternative-asm.h> |
8 | 7 | ||
9 | /* | 8 | /* |
9 | * We build a jump to memcpy_orig by default which gets NOPped out on | ||
10 | * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which | ||
11 | * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs | ||
12 | * to a jmp to memcpy_erms which does the REP; MOVSB mem copy. | ||
13 | */ | ||
14 | |||
15 | .weak memcpy | ||
16 | |||
17 | /* | ||
10 | * memcpy - Copy a memory block. | 18 | * memcpy - Copy a memory block. |
11 | * | 19 | * |
12 | * Input: | 20 | * Input: |
@@ -17,15 +25,11 @@ | |||
17 | * Output: | 25 | * Output: |
18 | * rax original destination | 26 | * rax original destination |
19 | */ | 27 | */ |
28 | ENTRY(__memcpy) | ||
29 | ENTRY(memcpy) | ||
30 | ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ | ||
31 | "jmp memcpy_erms", X86_FEATURE_ERMS | ||
20 | 32 | ||
21 | /* | ||
22 | * memcpy_c() - fast string ops (REP MOVSQ) based variant. | ||
23 | * | ||
24 | * This gets patched over the unrolled variant (below) via the | ||
25 | * alternative instructions framework: | ||
26 | */ | ||
27 | .section .altinstr_replacement, "ax", @progbits | ||
28 | .Lmemcpy_c: | ||
29 | movq %rdi, %rax | 33 | movq %rdi, %rax |
30 | movq %rdx, %rcx | 34 | movq %rdx, %rcx |
31 | shrq $3, %rcx | 35 | shrq $3, %rcx |
@@ -34,29 +38,21 @@ | |||
34 | movl %edx, %ecx | 38 | movl %edx, %ecx |
35 | rep movsb | 39 | rep movsb |
36 | ret | 40 | ret |
37 | .Lmemcpy_e: | 41 | ENDPROC(memcpy) |
38 | .previous | 42 | ENDPROC(__memcpy) |
39 | 43 | ||
40 | /* | 44 | /* |
41 | * memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than | 45 | * memcpy_erms() - enhanced fast string memcpy. This is faster and |
42 | * memcpy_c. Use memcpy_c_e when possible. | 46 | * simpler than memcpy. Use memcpy_erms when possible. |
43 | * | ||
44 | * This gets patched over the unrolled variant (below) via the | ||
45 | * alternative instructions framework: | ||
46 | */ | 47 | */ |
47 | .section .altinstr_replacement, "ax", @progbits | 48 | ENTRY(memcpy_erms) |
48 | .Lmemcpy_c_e: | ||
49 | movq %rdi, %rax | 49 | movq %rdi, %rax |
50 | movq %rdx, %rcx | 50 | movq %rdx, %rcx |
51 | rep movsb | 51 | rep movsb |
52 | ret | 52 | ret |
53 | .Lmemcpy_e_e: | 53 | ENDPROC(memcpy_erms) |
54 | .previous | ||
55 | |||
56 | .weak memcpy | ||
57 | 54 | ||
58 | ENTRY(__memcpy) | 55 | ENTRY(memcpy_orig) |
59 | ENTRY(memcpy) | ||
60 | CFI_STARTPROC | 56 | CFI_STARTPROC |
61 | movq %rdi, %rax | 57 | movq %rdi, %rax |
62 | 58 | ||
@@ -183,26 +179,4 @@ ENTRY(memcpy) | |||
183 | .Lend: | 179 | .Lend: |
184 | retq | 180 | retq |
185 | CFI_ENDPROC | 181 | CFI_ENDPROC |
186 | ENDPROC(memcpy) | 182 | ENDPROC(memcpy_orig) |
187 | ENDPROC(__memcpy) | ||
188 | |||
189 | /* | ||
190 | * Some CPUs are adding enhanced REP MOVSB/STOSB feature | ||
191 | * If the feature is supported, memcpy_c_e() is the first choice. | ||
192 | * If enhanced rep movsb copy is not available, use fast string copy | ||
193 | * memcpy_c() when possible. This is faster and code is simpler than | ||
194 | * original memcpy(). | ||
195 | * Otherwise, original memcpy() is used. | ||
196 | * In .altinstructions section, ERMS feature is placed after REG_GOOD | ||
197 | * feature to implement the right patch order. | ||
198 | * | ||
199 | * Replace only beginning, memcpy is used to apply alternatives, | ||
200 | * so it is silly to overwrite itself with nops - reboot is the | ||
201 | * only outcome... | ||
202 | */ | ||
203 | .section .altinstructions, "a" | ||
204 | altinstruction_entry __memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\ | ||
205 | .Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c | ||
206 | altinstruction_entry __memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \ | ||
207 | .Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e | ||
208 | .previous | ||
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 9c4b530575da..0f8a0d0331b9 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S | |||
@@ -5,7 +5,6 @@ | |||
5 | * This assembly file is re-written from memmove_64.c file. | 5 | * This assembly file is re-written from memmove_64.c file. |
6 | * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com> | 6 | * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com> |
7 | */ | 7 | */ |
8 | #define _STRING_C | ||
9 | #include <linux/linkage.h> | 8 | #include <linux/linkage.h> |
10 | #include <asm/dwarf2.h> | 9 | #include <asm/dwarf2.h> |
11 | #include <asm/cpufeature.h> | 10 | #include <asm/cpufeature.h> |
@@ -44,6 +43,8 @@ ENTRY(__memmove) | |||
44 | jg 2f | 43 | jg 2f |
45 | 44 | ||
46 | .Lmemmove_begin_forward: | 45 | .Lmemmove_begin_forward: |
46 | ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; retq", X86_FEATURE_ERMS | ||
47 | |||
47 | /* | 48 | /* |
48 | * movsq instruction have many startup latency | 49 | * movsq instruction have many startup latency |
49 | * so we handle small size by general register. | 50 | * so we handle small size by general register. |
@@ -207,21 +208,5 @@ ENTRY(__memmove) | |||
207 | 13: | 208 | 13: |
208 | retq | 209 | retq |
209 | CFI_ENDPROC | 210 | CFI_ENDPROC |
210 | |||
211 | .section .altinstr_replacement,"ax" | ||
212 | .Lmemmove_begin_forward_efs: | ||
213 | /* Forward moving data. */ | ||
214 | movq %rdx, %rcx | ||
215 | rep movsb | ||
216 | retq | ||
217 | .Lmemmove_end_forward_efs: | ||
218 | .previous | ||
219 | |||
220 | .section .altinstructions,"a" | ||
221 | altinstruction_entry .Lmemmove_begin_forward, \ | ||
222 | .Lmemmove_begin_forward_efs,X86_FEATURE_ERMS, \ | ||
223 | .Lmemmove_end_forward-.Lmemmove_begin_forward, \ | ||
224 | .Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs | ||
225 | .previous | ||
226 | ENDPROC(__memmove) | 211 | ENDPROC(__memmove) |
227 | ENDPROC(memmove) | 212 | ENDPROC(memmove) |
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 6f44935c6a60..93118fb23976 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S | |||
@@ -5,19 +5,30 @@ | |||
5 | #include <asm/cpufeature.h> | 5 | #include <asm/cpufeature.h> |
6 | #include <asm/alternative-asm.h> | 6 | #include <asm/alternative-asm.h> |
7 | 7 | ||
8 | .weak memset | ||
9 | |||
8 | /* | 10 | /* |
9 | * ISO C memset - set a memory block to a byte value. This function uses fast | 11 | * ISO C memset - set a memory block to a byte value. This function uses fast |
10 | * string to get better performance than the original function. The code is | 12 | * string to get better performance than the original function. The code is |
11 | * simpler and shorter than the orignal function as well. | 13 | * simpler and shorter than the orignal function as well. |
12 | * | 14 | * |
13 | * rdi destination | 15 | * rdi destination |
14 | * rsi value (char) | 16 | * rsi value (char) |
15 | * rdx count (bytes) | 17 | * rdx count (bytes) |
16 | * | 18 | * |
17 | * rax original destination | 19 | * rax original destination |
18 | */ | 20 | */ |
19 | .section .altinstr_replacement, "ax", @progbits | 21 | ENTRY(memset) |
20 | .Lmemset_c: | 22 | ENTRY(__memset) |
23 | /* | ||
24 | * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended | ||
25 | * to use it when possible. If not available, use fast string instructions. | ||
26 | * | ||
27 | * Otherwise, use original memset function. | ||
28 | */ | ||
29 | ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \ | ||
30 | "jmp memset_erms", X86_FEATURE_ERMS | ||
31 | |||
21 | movq %rdi,%r9 | 32 | movq %rdi,%r9 |
22 | movq %rdx,%rcx | 33 | movq %rdx,%rcx |
23 | andl $7,%edx | 34 | andl $7,%edx |
@@ -31,8 +42,8 @@ | |||
31 | rep stosb | 42 | rep stosb |
32 | movq %r9,%rax | 43 | movq %r9,%rax |
33 | ret | 44 | ret |
34 | .Lmemset_e: | 45 | ENDPROC(memset) |
35 | .previous | 46 | ENDPROC(__memset) |
36 | 47 | ||
37 | /* | 48 | /* |
38 | * ISO C memset - set a memory block to a byte value. This function uses | 49 | * ISO C memset - set a memory block to a byte value. This function uses |
@@ -45,21 +56,16 @@ | |||
45 | * | 56 | * |
46 | * rax original destination | 57 | * rax original destination |
47 | */ | 58 | */ |
48 | .section .altinstr_replacement, "ax", @progbits | 59 | ENTRY(memset_erms) |
49 | .Lmemset_c_e: | ||
50 | movq %rdi,%r9 | 60 | movq %rdi,%r9 |
51 | movb %sil,%al | 61 | movb %sil,%al |
52 | movq %rdx,%rcx | 62 | movq %rdx,%rcx |
53 | rep stosb | 63 | rep stosb |
54 | movq %r9,%rax | 64 | movq %r9,%rax |
55 | ret | 65 | ret |
56 | .Lmemset_e_e: | 66 | ENDPROC(memset_erms) |
57 | .previous | ||
58 | |||
59 | .weak memset | ||
60 | 67 | ||
61 | ENTRY(memset) | 68 | ENTRY(memset_orig) |
62 | ENTRY(__memset) | ||
63 | CFI_STARTPROC | 69 | CFI_STARTPROC |
64 | movq %rdi,%r10 | 70 | movq %rdi,%r10 |
65 | 71 | ||
@@ -134,23 +140,4 @@ ENTRY(__memset) | |||
134 | jmp .Lafter_bad_alignment | 140 | jmp .Lafter_bad_alignment |
135 | .Lfinal: | 141 | .Lfinal: |
136 | CFI_ENDPROC | 142 | CFI_ENDPROC |
137 | ENDPROC(memset) | 143 | ENDPROC(memset_orig) |
138 | ENDPROC(__memset) | ||
139 | |||
140 | /* Some CPUs support enhanced REP MOVSB/STOSB feature. | ||
141 | * It is recommended to use this when possible. | ||
142 | * | ||
143 | * If enhanced REP MOVSB/STOSB feature is not available, use fast string | ||
144 | * instructions. | ||
145 | * | ||
146 | * Otherwise, use original memset function. | ||
147 | * | ||
148 | * In .altinstructions section, ERMS feature is placed after REG_GOOD | ||
149 | * feature to implement the right patch order. | ||
150 | */ | ||
151 | .section .altinstructions,"a" | ||
152 | altinstruction_entry __memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\ | ||
153 | .Lfinal-__memset,.Lmemset_e-.Lmemset_c | ||
154 | altinstruction_entry __memset,.Lmemset_c_e,X86_FEATURE_ERMS, \ | ||
155 | .Lfinal-__memset,.Lmemset_e_e-.Lmemset_c_e | ||
156 | .previous | ||
diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S index f6d13eefad10..3ca5218fbece 100644 --- a/arch/x86/lib/msr-reg.S +++ b/arch/x86/lib/msr-reg.S | |||
@@ -14,8 +14,8 @@ | |||
14 | .macro op_safe_regs op | 14 | .macro op_safe_regs op |
15 | ENTRY(\op\()_safe_regs) | 15 | ENTRY(\op\()_safe_regs) |
16 | CFI_STARTPROC | 16 | CFI_STARTPROC |
17 | pushq_cfi %rbx | 17 | pushq_cfi_reg rbx |
18 | pushq_cfi %rbp | 18 | pushq_cfi_reg rbp |
19 | movq %rdi, %r10 /* Save pointer */ | 19 | movq %rdi, %r10 /* Save pointer */ |
20 | xorl %r11d, %r11d /* Return value */ | 20 | xorl %r11d, %r11d /* Return value */ |
21 | movl (%rdi), %eax | 21 | movl (%rdi), %eax |
@@ -35,8 +35,8 @@ ENTRY(\op\()_safe_regs) | |||
35 | movl %ebp, 20(%r10) | 35 | movl %ebp, 20(%r10) |
36 | movl %esi, 24(%r10) | 36 | movl %esi, 24(%r10) |
37 | movl %edi, 28(%r10) | 37 | movl %edi, 28(%r10) |
38 | popq_cfi %rbp | 38 | popq_cfi_reg rbp |
39 | popq_cfi %rbx | 39 | popq_cfi_reg rbx |
40 | ret | 40 | ret |
41 | 3: | 41 | 3: |
42 | CFI_RESTORE_STATE | 42 | CFI_RESTORE_STATE |
@@ -53,10 +53,10 @@ ENDPROC(\op\()_safe_regs) | |||
53 | .macro op_safe_regs op | 53 | .macro op_safe_regs op |
54 | ENTRY(\op\()_safe_regs) | 54 | ENTRY(\op\()_safe_regs) |
55 | CFI_STARTPROC | 55 | CFI_STARTPROC |
56 | pushl_cfi %ebx | 56 | pushl_cfi_reg ebx |
57 | pushl_cfi %ebp | 57 | pushl_cfi_reg ebp |
58 | pushl_cfi %esi | 58 | pushl_cfi_reg esi |
59 | pushl_cfi %edi | 59 | pushl_cfi_reg edi |
60 | pushl_cfi $0 /* Return value */ | 60 | pushl_cfi $0 /* Return value */ |
61 | pushl_cfi %eax | 61 | pushl_cfi %eax |
62 | movl 4(%eax), %ecx | 62 | movl 4(%eax), %ecx |
@@ -80,10 +80,10 @@ ENTRY(\op\()_safe_regs) | |||
80 | movl %esi, 24(%eax) | 80 | movl %esi, 24(%eax) |
81 | movl %edi, 28(%eax) | 81 | movl %edi, 28(%eax) |
82 | popl_cfi %eax | 82 | popl_cfi %eax |
83 | popl_cfi %edi | 83 | popl_cfi_reg edi |
84 | popl_cfi %esi | 84 | popl_cfi_reg esi |
85 | popl_cfi %ebp | 85 | popl_cfi_reg ebp |
86 | popl_cfi %ebx | 86 | popl_cfi_reg ebx |
87 | ret | 87 | ret |
88 | 3: | 88 | 3: |
89 | CFI_RESTORE_STATE | 89 | CFI_RESTORE_STATE |
diff --git a/arch/x86/lib/rwsem.S b/arch/x86/lib/rwsem.S index 5dff5f042468..2322abe4da3b 100644 --- a/arch/x86/lib/rwsem.S +++ b/arch/x86/lib/rwsem.S | |||
@@ -34,10 +34,10 @@ | |||
34 | */ | 34 | */ |
35 | 35 | ||
36 | #define save_common_regs \ | 36 | #define save_common_regs \ |
37 | pushl_cfi %ecx; CFI_REL_OFFSET ecx, 0 | 37 | pushl_cfi_reg ecx |
38 | 38 | ||
39 | #define restore_common_regs \ | 39 | #define restore_common_regs \ |
40 | popl_cfi %ecx; CFI_RESTORE ecx | 40 | popl_cfi_reg ecx |
41 | 41 | ||
42 | /* Avoid uglifying the argument copying x86-64 needs to do. */ | 42 | /* Avoid uglifying the argument copying x86-64 needs to do. */ |
43 | .macro movq src, dst | 43 | .macro movq src, dst |
@@ -64,22 +64,22 @@ | |||
64 | */ | 64 | */ |
65 | 65 | ||
66 | #define save_common_regs \ | 66 | #define save_common_regs \ |
67 | pushq_cfi %rdi; CFI_REL_OFFSET rdi, 0; \ | 67 | pushq_cfi_reg rdi; \ |
68 | pushq_cfi %rsi; CFI_REL_OFFSET rsi, 0; \ | 68 | pushq_cfi_reg rsi; \ |
69 | pushq_cfi %rcx; CFI_REL_OFFSET rcx, 0; \ | 69 | pushq_cfi_reg rcx; \ |
70 | pushq_cfi %r8; CFI_REL_OFFSET r8, 0; \ | 70 | pushq_cfi_reg r8; \ |
71 | pushq_cfi %r9; CFI_REL_OFFSET r9, 0; \ | 71 | pushq_cfi_reg r9; \ |
72 | pushq_cfi %r10; CFI_REL_OFFSET r10, 0; \ | 72 | pushq_cfi_reg r10; \ |
73 | pushq_cfi %r11; CFI_REL_OFFSET r11, 0 | 73 | pushq_cfi_reg r11 |
74 | 74 | ||
75 | #define restore_common_regs \ | 75 | #define restore_common_regs \ |
76 | popq_cfi %r11; CFI_RESTORE r11; \ | 76 | popq_cfi_reg r11; \ |
77 | popq_cfi %r10; CFI_RESTORE r10; \ | 77 | popq_cfi_reg r10; \ |
78 | popq_cfi %r9; CFI_RESTORE r9; \ | 78 | popq_cfi_reg r9; \ |
79 | popq_cfi %r8; CFI_RESTORE r8; \ | 79 | popq_cfi_reg r8; \ |
80 | popq_cfi %rcx; CFI_RESTORE rcx; \ | 80 | popq_cfi_reg rcx; \ |
81 | popq_cfi %rsi; CFI_RESTORE rsi; \ | 81 | popq_cfi_reg rsi; \ |
82 | popq_cfi %rdi; CFI_RESTORE rdi | 82 | popq_cfi_reg rdi |
83 | 83 | ||
84 | #endif | 84 | #endif |
85 | 85 | ||
@@ -87,12 +87,10 @@ | |||
87 | ENTRY(call_rwsem_down_read_failed) | 87 | ENTRY(call_rwsem_down_read_failed) |
88 | CFI_STARTPROC | 88 | CFI_STARTPROC |
89 | save_common_regs | 89 | save_common_regs |
90 | __ASM_SIZE(push,_cfi) %__ASM_REG(dx) | 90 | __ASM_SIZE(push,_cfi_reg) __ASM_REG(dx) |
91 | CFI_REL_OFFSET __ASM_REG(dx), 0 | ||
92 | movq %rax,%rdi | 91 | movq %rax,%rdi |
93 | call rwsem_down_read_failed | 92 | call rwsem_down_read_failed |
94 | __ASM_SIZE(pop,_cfi) %__ASM_REG(dx) | 93 | __ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx) |
95 | CFI_RESTORE __ASM_REG(dx) | ||
96 | restore_common_regs | 94 | restore_common_regs |
97 | ret | 95 | ret |
98 | CFI_ENDPROC | 96 | CFI_ENDPROC |
@@ -124,12 +122,10 @@ ENDPROC(call_rwsem_wake) | |||
124 | ENTRY(call_rwsem_downgrade_wake) | 122 | ENTRY(call_rwsem_downgrade_wake) |
125 | CFI_STARTPROC | 123 | CFI_STARTPROC |
126 | save_common_regs | 124 | save_common_regs |
127 | __ASM_SIZE(push,_cfi) %__ASM_REG(dx) | 125 | __ASM_SIZE(push,_cfi_reg) __ASM_REG(dx) |
128 | CFI_REL_OFFSET __ASM_REG(dx), 0 | ||
129 | movq %rax,%rdi | 126 | movq %rax,%rdi |
130 | call rwsem_downgrade_wake | 127 | call rwsem_downgrade_wake |
131 | __ASM_SIZE(pop,_cfi) %__ASM_REG(dx) | 128 | __ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx) |
132 | CFI_RESTORE __ASM_REG(dx) | ||
133 | restore_common_regs | 129 | restore_common_regs |
134 | ret | 130 | ret |
135 | CFI_ENDPROC | 131 | CFI_ENDPROC |
diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S index e28cdaf5ac2c..5eb715087b80 100644 --- a/arch/x86/lib/thunk_32.S +++ b/arch/x86/lib/thunk_32.S | |||
@@ -13,12 +13,9 @@ | |||
13 | .globl \name | 13 | .globl \name |
14 | \name: | 14 | \name: |
15 | CFI_STARTPROC | 15 | CFI_STARTPROC |
16 | pushl_cfi %eax | 16 | pushl_cfi_reg eax |
17 | CFI_REL_OFFSET eax, 0 | 17 | pushl_cfi_reg ecx |
18 | pushl_cfi %ecx | 18 | pushl_cfi_reg edx |
19 | CFI_REL_OFFSET ecx, 0 | ||
20 | pushl_cfi %edx | ||
21 | CFI_REL_OFFSET edx, 0 | ||
22 | 19 | ||
23 | .if \put_ret_addr_in_eax | 20 | .if \put_ret_addr_in_eax |
24 | /* Place EIP in the arg1 */ | 21 | /* Place EIP in the arg1 */ |
@@ -26,12 +23,9 @@ | |||
26 | .endif | 23 | .endif |
27 | 24 | ||
28 | call \func | 25 | call \func |
29 | popl_cfi %edx | 26 | popl_cfi_reg edx |
30 | CFI_RESTORE edx | 27 | popl_cfi_reg ecx |
31 | popl_cfi %ecx | 28 | popl_cfi_reg eax |
32 | CFI_RESTORE ecx | ||
33 | popl_cfi %eax | ||
34 | CFI_RESTORE eax | ||
35 | ret | 29 | ret |
36 | CFI_ENDPROC | 30 | CFI_ENDPROC |
37 | _ASM_NOKPROBE(\name) | 31 | _ASM_NOKPROBE(\name) |
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S index b30b5ebd614a..f89ba4e93025 100644 --- a/arch/x86/lib/thunk_64.S +++ b/arch/x86/lib/thunk_64.S | |||
@@ -17,9 +17,18 @@ | |||
17 | CFI_STARTPROC | 17 | CFI_STARTPROC |
18 | 18 | ||
19 | /* this one pushes 9 elems, the next one would be %rIP */ | 19 | /* this one pushes 9 elems, the next one would be %rIP */ |
20 | SAVE_ARGS | 20 | pushq_cfi_reg rdi |
21 | pushq_cfi_reg rsi | ||
22 | pushq_cfi_reg rdx | ||
23 | pushq_cfi_reg rcx | ||
24 | pushq_cfi_reg rax | ||
25 | pushq_cfi_reg r8 | ||
26 | pushq_cfi_reg r9 | ||
27 | pushq_cfi_reg r10 | ||
28 | pushq_cfi_reg r11 | ||
21 | 29 | ||
22 | .if \put_ret_addr_in_rdi | 30 | .if \put_ret_addr_in_rdi |
31 | /* 9*8(%rsp) is return addr on stack */ | ||
23 | movq_cfi_restore 9*8, rdi | 32 | movq_cfi_restore 9*8, rdi |
24 | .endif | 33 | .endif |
25 | 34 | ||
@@ -45,11 +54,22 @@ | |||
45 | #endif | 54 | #endif |
46 | #endif | 55 | #endif |
47 | 56 | ||
48 | /* SAVE_ARGS below is used only for the .cfi directives it contains. */ | 57 | #if defined(CONFIG_TRACE_IRQFLAGS) \ |
58 | || defined(CONFIG_DEBUG_LOCK_ALLOC) \ | ||
59 | || defined(CONFIG_PREEMPT) | ||
49 | CFI_STARTPROC | 60 | CFI_STARTPROC |
50 | SAVE_ARGS | 61 | CFI_ADJUST_CFA_OFFSET 9*8 |
51 | restore: | 62 | restore: |
52 | RESTORE_ARGS | 63 | popq_cfi_reg r11 |
64 | popq_cfi_reg r10 | ||
65 | popq_cfi_reg r9 | ||
66 | popq_cfi_reg r8 | ||
67 | popq_cfi_reg rax | ||
68 | popq_cfi_reg rcx | ||
69 | popq_cfi_reg rdx | ||
70 | popq_cfi_reg rsi | ||
71 | popq_cfi_reg rdi | ||
53 | ret | 72 | ret |
54 | CFI_ENDPROC | 73 | CFI_ENDPROC |
55 | _ASM_NOKPROBE(restore) | 74 | _ASM_NOKPROBE(restore) |
75 | #endif | ||
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index c905e89e19fe..1f33b3d1fd68 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c | |||
@@ -69,21 +69,20 @@ EXPORT_SYMBOL(copy_in_user); | |||
69 | * it is not necessary to optimize tail handling. | 69 | * it is not necessary to optimize tail handling. |
70 | */ | 70 | */ |
71 | __visible unsigned long | 71 | __visible unsigned long |
72 | copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest) | 72 | copy_user_handle_tail(char *to, char *from, unsigned len) |
73 | { | 73 | { |
74 | char c; | ||
75 | unsigned zero_len; | ||
76 | |||
77 | for (; len; --len, to++) { | 74 | for (; len; --len, to++) { |
75 | char c; | ||
76 | |||
78 | if (__get_user_nocheck(c, from++, sizeof(char))) | 77 | if (__get_user_nocheck(c, from++, sizeof(char))) |
79 | break; | 78 | break; |
80 | if (__put_user_nocheck(c, to, sizeof(char))) | 79 | if (__put_user_nocheck(c, to, sizeof(char))) |
81 | break; | 80 | break; |
82 | } | 81 | } |
83 | |||
84 | for (c = 0, zero_len = len; zerorest && zero_len; --zero_len) | ||
85 | if (__put_user_nocheck(c, to++, sizeof(char))) | ||
86 | break; | ||
87 | clac(); | 82 | clac(); |
83 | |||
84 | /* If the destination is a kernel buffer, we always clear the end */ | ||
85 | if ((unsigned long)to >= TASK_SIZE_MAX) | ||
86 | memset(to, 0, len); | ||
88 | return len; | 87 | return len; |
89 | } | 88 | } |
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 1a2be7c6895d..816488c0b97e 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt | |||
@@ -273,6 +273,9 @@ dd: ESC | |||
273 | de: ESC | 273 | de: ESC |
274 | df: ESC | 274 | df: ESC |
275 | # 0xe0 - 0xef | 275 | # 0xe0 - 0xef |
276 | # Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix | ||
277 | # in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation | ||
278 | # to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD. | ||
276 | e0: LOOPNE/LOOPNZ Jb (f64) | 279 | e0: LOOPNE/LOOPNZ Jb (f64) |
277 | e1: LOOPE/LOOPZ Jb (f64) | 280 | e1: LOOPE/LOOPZ Jb (f64) |
278 | e2: LOOP Jb (f64) | 281 | e2: LOOP Jb (f64) |
@@ -281,6 +284,10 @@ e4: IN AL,Ib | |||
281 | e5: IN eAX,Ib | 284 | e5: IN eAX,Ib |
282 | e6: OUT Ib,AL | 285 | e6: OUT Ib,AL |
283 | e7: OUT Ib,eAX | 286 | e7: OUT Ib,eAX |
287 | # With 0x66 prefix in 64-bit mode, for AMD CPUs immediate offset | ||
288 | # in "near" jumps and calls is 16-bit. For CALL, | ||
289 | # push of return address is 16-bit wide, RSP is decremented by 2 | ||
290 | # but is not truncated to 16 bits, unlike RIP. | ||
284 | e8: CALL Jz (f64) | 291 | e8: CALL Jz (f64) |
285 | e9: JMP-near Jz (f64) | 292 | e9: JMP-near Jz (f64) |
286 | ea: JMP-far Ap (i64) | 293 | ea: JMP-far Ap (i64) |
@@ -456,6 +463,7 @@ AVXcode: 1 | |||
456 | 7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1) | 463 | 7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1) |
457 | 7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3) | 464 | 7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3) |
458 | # 0x0f 0x80-0x8f | 465 | # 0x0f 0x80-0x8f |
466 | # Note: "forced64" is Intel CPU behavior (see comment about CALL insn). | ||
459 | 80: JO Jz (f64) | 467 | 80: JO Jz (f64) |
460 | 81: JNO Jz (f64) | 468 | 81: JNO Jz (f64) |
461 | 82: JB/JC/JNAE Jz (f64) | 469 | 82: JB/JC/JNAE Jz (f64) |
@@ -842,6 +850,7 @@ EndTable | |||
842 | GrpTable: Grp5 | 850 | GrpTable: Grp5 |
843 | 0: INC Ev | 851 | 0: INC Ev |
844 | 1: DEC Ev | 852 | 1: DEC Ev |
853 | # Note: "forced64" is Intel CPU behavior (see comment about CALL insn). | ||
845 | 2: CALLN Ev (f64) | 854 | 2: CALLN Ev (f64) |
846 | 3: CALLF Ep | 855 | 3: CALLF Ep |
847 | 4: JMPN Ev (f64) | 856 | 4: JMPN Ev (f64) |
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index c4cc74006c61..a482d105172b 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile | |||
@@ -32,6 +32,4 @@ obj-$(CONFIG_AMD_NUMA) += amdtopology.o | |||
32 | obj-$(CONFIG_ACPI_NUMA) += srat.o | 32 | obj-$(CONFIG_ACPI_NUMA) += srat.o |
33 | obj-$(CONFIG_NUMA_EMU) += numa_emulation.o | 33 | obj-$(CONFIG_NUMA_EMU) += numa_emulation.o |
34 | 34 | ||
35 | obj-$(CONFIG_MEMTEST) += memtest.o | ||
36 | |||
37 | obj-$(CONFIG_X86_INTEL_MPX) += mpx.o | 35 | obj-$(CONFIG_X86_INTEL_MPX) += mpx.o |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index ede025fb46f1..181c53bac3a7 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -59,7 +59,7 @@ static nokprobe_inline int kprobes_fault(struct pt_regs *regs) | |||
59 | int ret = 0; | 59 | int ret = 0; |
60 | 60 | ||
61 | /* kprobe_running() needs smp_processor_id() */ | 61 | /* kprobe_running() needs smp_processor_id() */ |
62 | if (kprobes_built_in() && !user_mode_vm(regs)) { | 62 | if (kprobes_built_in() && !user_mode(regs)) { |
63 | preempt_disable(); | 63 | preempt_disable(); |
64 | if (kprobe_running() && kprobe_fault_handler(regs, 14)) | 64 | if (kprobe_running() && kprobe_fault_handler(regs, 14)) |
65 | ret = 1; | 65 | ret = 1; |
@@ -148,7 +148,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) | |||
148 | instr = (void *)convert_ip_to_linear(current, regs); | 148 | instr = (void *)convert_ip_to_linear(current, regs); |
149 | max_instr = instr + 15; | 149 | max_instr = instr + 15; |
150 | 150 | ||
151 | if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE) | 151 | if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE_MAX) |
152 | return 0; | 152 | return 0; |
153 | 153 | ||
154 | while (instr < max_instr) { | 154 | while (instr < max_instr) { |
@@ -1035,7 +1035,7 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs) | |||
1035 | if (error_code & PF_USER) | 1035 | if (error_code & PF_USER) |
1036 | return false; | 1036 | return false; |
1037 | 1037 | ||
1038 | if (!user_mode_vm(regs) && (regs->flags & X86_EFLAGS_AC)) | 1038 | if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC)) |
1039 | return false; | 1039 | return false; |
1040 | 1040 | ||
1041 | return true; | 1041 | return true; |
@@ -1140,7 +1140,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, | |||
1140 | * User-mode registers count as a user access even for any | 1140 | * User-mode registers count as a user access even for any |
1141 | * potential system fault or CPU buglet: | 1141 | * potential system fault or CPU buglet: |
1142 | */ | 1142 | */ |
1143 | if (user_mode_vm(regs)) { | 1143 | if (user_mode(regs)) { |
1144 | local_irq_enable(); | 1144 | local_irq_enable(); |
1145 | error_code |= PF_USER; | 1145 | error_code |= PF_USER; |
1146 | flags |= FAULT_FLAG_USER; | 1146 | flags |= FAULT_FLAG_USER; |
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index a110efca6d06..1d553186c434 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -29,29 +29,33 @@ | |||
29 | 29 | ||
30 | /* | 30 | /* |
31 | * Tables translating between page_cache_type_t and pte encoding. | 31 | * Tables translating between page_cache_type_t and pte encoding. |
32 | * Minimal supported modes are defined statically, modified if more supported | 32 | * |
33 | * cache modes are available. | 33 | * Minimal supported modes are defined statically, they are modified |
34 | * Index into __cachemode2pte_tbl is the cachemode. | 34 | * during bootup if more supported cache modes are available. |
35 | * Index into __pte2cachemode_tbl are the caching attribute bits of the pte | 35 | * |
36 | * (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT) at index bit positions 0, 1, 2. | 36 | * Index into __cachemode2pte_tbl[] is the cachemode. |
37 | * | ||
38 | * Index into __pte2cachemode_tbl[] are the caching attribute bits of the pte | ||
39 | * (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT) at index bit positions 0, 1, 2. | ||
37 | */ | 40 | */ |
38 | uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = { | 41 | uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = { |
39 | [_PAGE_CACHE_MODE_WB] = 0, | 42 | [_PAGE_CACHE_MODE_WB ] = 0 | 0 , |
40 | [_PAGE_CACHE_MODE_WC] = _PAGE_PWT, | 43 | [_PAGE_CACHE_MODE_WC ] = _PAGE_PWT | 0 , |
41 | [_PAGE_CACHE_MODE_UC_MINUS] = _PAGE_PCD, | 44 | [_PAGE_CACHE_MODE_UC_MINUS] = 0 | _PAGE_PCD, |
42 | [_PAGE_CACHE_MODE_UC] = _PAGE_PCD | _PAGE_PWT, | 45 | [_PAGE_CACHE_MODE_UC ] = _PAGE_PWT | _PAGE_PCD, |
43 | [_PAGE_CACHE_MODE_WT] = _PAGE_PCD, | 46 | [_PAGE_CACHE_MODE_WT ] = 0 | _PAGE_PCD, |
44 | [_PAGE_CACHE_MODE_WP] = _PAGE_PCD, | 47 | [_PAGE_CACHE_MODE_WP ] = 0 | _PAGE_PCD, |
45 | }; | 48 | }; |
46 | EXPORT_SYMBOL(__cachemode2pte_tbl); | 49 | EXPORT_SYMBOL(__cachemode2pte_tbl); |
50 | |||
47 | uint8_t __pte2cachemode_tbl[8] = { | 51 | uint8_t __pte2cachemode_tbl[8] = { |
48 | [__pte2cm_idx(0)] = _PAGE_CACHE_MODE_WB, | 52 | [__pte2cm_idx( 0 | 0 | 0 )] = _PAGE_CACHE_MODE_WB, |
49 | [__pte2cm_idx(_PAGE_PWT)] = _PAGE_CACHE_MODE_WC, | 53 | [__pte2cm_idx(_PAGE_PWT | 0 | 0 )] = _PAGE_CACHE_MODE_WC, |
50 | [__pte2cm_idx(_PAGE_PCD)] = _PAGE_CACHE_MODE_UC_MINUS, | 54 | [__pte2cm_idx( 0 | _PAGE_PCD | 0 )] = _PAGE_CACHE_MODE_UC_MINUS, |
51 | [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD)] = _PAGE_CACHE_MODE_UC, | 55 | [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | 0 )] = _PAGE_CACHE_MODE_UC, |
52 | [__pte2cm_idx(_PAGE_PAT)] = _PAGE_CACHE_MODE_WB, | 56 | [__pte2cm_idx( 0 | 0 | _PAGE_PAT)] = _PAGE_CACHE_MODE_WB, |
53 | [__pte2cm_idx(_PAGE_PWT | _PAGE_PAT)] = _PAGE_CACHE_MODE_WC, | 57 | [__pte2cm_idx(_PAGE_PWT | 0 | _PAGE_PAT)] = _PAGE_CACHE_MODE_WC, |
54 | [__pte2cm_idx(_PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS, | 58 | [__pte2cm_idx(0 | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS, |
55 | [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC, | 59 | [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC, |
56 | }; | 60 | }; |
57 | EXPORT_SYMBOL(__pte2cachemode_tbl); | 61 | EXPORT_SYMBOL(__pte2cachemode_tbl); |
@@ -131,21 +135,7 @@ void __init early_alloc_pgt_buf(void) | |||
131 | 135 | ||
132 | int after_bootmem; | 136 | int after_bootmem; |
133 | 137 | ||
134 | int direct_gbpages | 138 | early_param_on_off("gbpages", "nogbpages", direct_gbpages, CONFIG_X86_DIRECT_GBPAGES); |
135 | #ifdef CONFIG_DIRECT_GBPAGES | ||
136 | = 1 | ||
137 | #endif | ||
138 | ; | ||
139 | |||
140 | static void __init init_gbpages(void) | ||
141 | { | ||
142 | #ifdef CONFIG_X86_64 | ||
143 | if (direct_gbpages && cpu_has_gbpages) | ||
144 | printk(KERN_INFO "Using GB pages for direct mapping\n"); | ||
145 | else | ||
146 | direct_gbpages = 0; | ||
147 | #endif | ||
148 | } | ||
149 | 139 | ||
150 | struct map_range { | 140 | struct map_range { |
151 | unsigned long start; | 141 | unsigned long start; |
@@ -157,16 +147,12 @@ static int page_size_mask; | |||
157 | 147 | ||
158 | static void __init probe_page_size_mask(void) | 148 | static void __init probe_page_size_mask(void) |
159 | { | 149 | { |
160 | init_gbpages(); | ||
161 | |||
162 | #if !defined(CONFIG_DEBUG_PAGEALLOC) && !defined(CONFIG_KMEMCHECK) | 150 | #if !defined(CONFIG_DEBUG_PAGEALLOC) && !defined(CONFIG_KMEMCHECK) |
163 | /* | 151 | /* |
164 | * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. | 152 | * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. |
165 | * This will simplify cpa(), which otherwise needs to support splitting | 153 | * This will simplify cpa(), which otherwise needs to support splitting |
166 | * large pages into small in interrupt context, etc. | 154 | * large pages into small in interrupt context, etc. |
167 | */ | 155 | */ |
168 | if (direct_gbpages) | ||
169 | page_size_mask |= 1 << PG_LEVEL_1G; | ||
170 | if (cpu_has_pse) | 156 | if (cpu_has_pse) |
171 | page_size_mask |= 1 << PG_LEVEL_2M; | 157 | page_size_mask |= 1 << PG_LEVEL_2M; |
172 | #endif | 158 | #endif |
@@ -179,6 +165,15 @@ static void __init probe_page_size_mask(void) | |||
179 | if (cpu_has_pge) { | 165 | if (cpu_has_pge) { |
180 | cr4_set_bits_and_update_boot(X86_CR4_PGE); | 166 | cr4_set_bits_and_update_boot(X86_CR4_PGE); |
181 | __supported_pte_mask |= _PAGE_GLOBAL; | 167 | __supported_pte_mask |= _PAGE_GLOBAL; |
168 | } else | ||
169 | __supported_pte_mask &= ~_PAGE_GLOBAL; | ||
170 | |||
171 | /* Enable 1 GB linear kernel mappings if available: */ | ||
172 | if (direct_gbpages && cpu_has_gbpages) { | ||
173 | printk(KERN_INFO "Using GB pages for direct mapping\n"); | ||
174 | page_size_mask |= 1 << PG_LEVEL_1G; | ||
175 | } else { | ||
176 | direct_gbpages = 0; | ||
182 | } | 177 | } |
183 | } | 178 | } |
184 | 179 | ||
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 30eb05ae7061..3fba623e3ba5 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -130,20 +130,6 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page, | |||
130 | return 0; | 130 | return 0; |
131 | } | 131 | } |
132 | 132 | ||
133 | static int __init parse_direct_gbpages_off(char *arg) | ||
134 | { | ||
135 | direct_gbpages = 0; | ||
136 | return 0; | ||
137 | } | ||
138 | early_param("nogbpages", parse_direct_gbpages_off); | ||
139 | |||
140 | static int __init parse_direct_gbpages_on(char *arg) | ||
141 | { | ||
142 | direct_gbpages = 1; | ||
143 | return 0; | ||
144 | } | ||
145 | early_param("gbpages", parse_direct_gbpages_on); | ||
146 | |||
147 | /* | 133 | /* |
148 | * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the | 134 | * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the |
149 | * physical space so we can cache the place of the first one and move | 135 | * physical space so we can cache the place of the first one and move |
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index fdf617c00e2f..5ead4d6cf3a7 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -67,8 +67,13 @@ static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages, | |||
67 | 67 | ||
68 | /* | 68 | /* |
69 | * Remap an arbitrary physical address space into the kernel virtual | 69 | * Remap an arbitrary physical address space into the kernel virtual |
70 | * address space. Needed when the kernel wants to access high addresses | 70 | * address space. It transparently creates kernel huge I/O mapping when |
71 | * directly. | 71 | * the physical address is aligned by a huge page size (1GB or 2MB) and |
72 | * the requested size is at least the huge page size. | ||
73 | * | ||
74 | * NOTE: MTRRs can override PAT memory types with a 4KB granularity. | ||
75 | * Therefore, the mapping code falls back to use a smaller page toward 4KB | ||
76 | * when a mapping range is covered by non-WB type of MTRRs. | ||
72 | * | 77 | * |
73 | * NOTE! We need to allow non-page-aligned mappings too: we will obviously | 78 | * NOTE! We need to allow non-page-aligned mappings too: we will obviously |
74 | * have to convert them into an offset in a page-aligned mapping, but the | 79 | * have to convert them into an offset in a page-aligned mapping, but the |
@@ -326,6 +331,20 @@ void iounmap(volatile void __iomem *addr) | |||
326 | } | 331 | } |
327 | EXPORT_SYMBOL(iounmap); | 332 | EXPORT_SYMBOL(iounmap); |
328 | 333 | ||
334 | int arch_ioremap_pud_supported(void) | ||
335 | { | ||
336 | #ifdef CONFIG_X86_64 | ||
337 | return cpu_has_gbpages; | ||
338 | #else | ||
339 | return 0; | ||
340 | #endif | ||
341 | } | ||
342 | |||
343 | int arch_ioremap_pmd_supported(void) | ||
344 | { | ||
345 | return cpu_has_pse; | ||
346 | } | ||
347 | |||
329 | /* | 348 | /* |
330 | * Convert a physical pointer to a virtual kernel pointer for /dev/mem | 349 | * Convert a physical pointer to a virtual kernel pointer for /dev/mem |
331 | * access | 350 | * access |
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c deleted file mode 100644 index 1e9da795767a..000000000000 --- a/arch/x86/mm/memtest.c +++ /dev/null | |||
@@ -1,118 +0,0 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | #include <linux/errno.h> | ||
3 | #include <linux/string.h> | ||
4 | #include <linux/types.h> | ||
5 | #include <linux/mm.h> | ||
6 | #include <linux/smp.h> | ||
7 | #include <linux/init.h> | ||
8 | #include <linux/pfn.h> | ||
9 | #include <linux/memblock.h> | ||
10 | |||
11 | static u64 patterns[] __initdata = { | ||
12 | /* The first entry has to be 0 to leave memtest with zeroed memory */ | ||
13 | 0, | ||
14 | 0xffffffffffffffffULL, | ||
15 | 0x5555555555555555ULL, | ||
16 | 0xaaaaaaaaaaaaaaaaULL, | ||
17 | 0x1111111111111111ULL, | ||
18 | 0x2222222222222222ULL, | ||
19 | 0x4444444444444444ULL, | ||
20 | 0x8888888888888888ULL, | ||
21 | 0x3333333333333333ULL, | ||
22 | 0x6666666666666666ULL, | ||
23 | 0x9999999999999999ULL, | ||
24 | 0xccccccccccccccccULL, | ||
25 | 0x7777777777777777ULL, | ||
26 | 0xbbbbbbbbbbbbbbbbULL, | ||
27 | 0xddddddddddddddddULL, | ||
28 | 0xeeeeeeeeeeeeeeeeULL, | ||
29 | 0x7a6c7258554e494cULL, /* yeah ;-) */ | ||
30 | }; | ||
31 | |||
32 | static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad) | ||
33 | { | ||
34 | printk(KERN_INFO " %016llx bad mem addr %010llx - %010llx reserved\n", | ||
35 | (unsigned long long) pattern, | ||
36 | (unsigned long long) start_bad, | ||
37 | (unsigned long long) end_bad); | ||
38 | memblock_reserve(start_bad, end_bad - start_bad); | ||
39 | } | ||
40 | |||
41 | static void __init memtest(u64 pattern, u64 start_phys, u64 size) | ||
42 | { | ||
43 | u64 *p, *start, *end; | ||
44 | u64 start_bad, last_bad; | ||
45 | u64 start_phys_aligned; | ||
46 | const size_t incr = sizeof(pattern); | ||
47 | |||
48 | start_phys_aligned = ALIGN(start_phys, incr); | ||
49 | start = __va(start_phys_aligned); | ||
50 | end = start + (size - (start_phys_aligned - start_phys)) / incr; | ||
51 | start_bad = 0; | ||
52 | last_bad = 0; | ||
53 | |||
54 | for (p = start; p < end; p++) | ||
55 | *p = pattern; | ||
56 | |||
57 | for (p = start; p < end; p++, start_phys_aligned += incr) { | ||
58 | if (*p == pattern) | ||
59 | continue; | ||
60 | if (start_phys_aligned == last_bad + incr) { | ||
61 | last_bad += incr; | ||
62 | continue; | ||
63 | } | ||
64 | if (start_bad) | ||
65 | reserve_bad_mem(pattern, start_bad, last_bad + incr); | ||
66 | start_bad = last_bad = start_phys_aligned; | ||
67 | } | ||
68 | if (start_bad) | ||
69 | reserve_bad_mem(pattern, start_bad, last_bad + incr); | ||
70 | } | ||
71 | |||
72 | static void __init do_one_pass(u64 pattern, u64 start, u64 end) | ||
73 | { | ||
74 | u64 i; | ||
75 | phys_addr_t this_start, this_end; | ||
76 | |||
77 | for_each_free_mem_range(i, NUMA_NO_NODE, &this_start, &this_end, NULL) { | ||
78 | this_start = clamp_t(phys_addr_t, this_start, start, end); | ||
79 | this_end = clamp_t(phys_addr_t, this_end, start, end); | ||
80 | if (this_start < this_end) { | ||
81 | printk(KERN_INFO " %010llx - %010llx pattern %016llx\n", | ||
82 | (unsigned long long)this_start, | ||
83 | (unsigned long long)this_end, | ||
84 | (unsigned long long)cpu_to_be64(pattern)); | ||
85 | memtest(pattern, this_start, this_end - this_start); | ||
86 | } | ||
87 | } | ||
88 | } | ||
89 | |||
90 | /* default is disabled */ | ||
91 | static int memtest_pattern __initdata; | ||
92 | |||
93 | static int __init parse_memtest(char *arg) | ||
94 | { | ||
95 | if (arg) | ||
96 | memtest_pattern = simple_strtoul(arg, NULL, 0); | ||
97 | else | ||
98 | memtest_pattern = ARRAY_SIZE(patterns); | ||
99 | |||
100 | return 0; | ||
101 | } | ||
102 | |||
103 | early_param("memtest", parse_memtest); | ||
104 | |||
105 | void __init early_memtest(unsigned long start, unsigned long end) | ||
106 | { | ||
107 | unsigned int i; | ||
108 | unsigned int idx = 0; | ||
109 | |||
110 | if (!memtest_pattern) | ||
111 | return; | ||
112 | |||
113 | printk(KERN_INFO "early_memtest: # of tests: %d\n", memtest_pattern); | ||
114 | for (i = memtest_pattern-1; i < UINT_MAX; --i) { | ||
115 | idx = i % ARRAY_SIZE(patterns); | ||
116 | do_one_pass(patterns[idx], start, end); | ||
117 | } | ||
118 | } | ||
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index df4552bd239e..9d518d693b4b 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c | |||
@@ -65,24 +65,23 @@ static int mmap_is_legacy(void) | |||
65 | return sysctl_legacy_va_layout; | 65 | return sysctl_legacy_va_layout; |
66 | } | 66 | } |
67 | 67 | ||
68 | static unsigned long mmap_rnd(void) | 68 | unsigned long arch_mmap_rnd(void) |
69 | { | 69 | { |
70 | unsigned long rnd = 0; | 70 | unsigned long rnd; |
71 | 71 | ||
72 | /* | 72 | /* |
73 | * 8 bits of randomness in 32bit mmaps, 20 address space bits | 73 | * 8 bits of randomness in 32bit mmaps, 20 address space bits |
74 | * 28 bits of randomness in 64bit mmaps, 40 address space bits | 74 | * 28 bits of randomness in 64bit mmaps, 40 address space bits |
75 | */ | 75 | */ |
76 | if (current->flags & PF_RANDOMIZE) { | 76 | if (mmap_is_ia32()) |
77 | if (mmap_is_ia32()) | 77 | rnd = (unsigned long)get_random_int() % (1<<8); |
78 | rnd = get_random_int() % (1<<8); | 78 | else |
79 | else | 79 | rnd = (unsigned long)get_random_int() % (1<<28); |
80 | rnd = get_random_int() % (1<<28); | 80 | |
81 | } | ||
82 | return rnd << PAGE_SHIFT; | 81 | return rnd << PAGE_SHIFT; |
83 | } | 82 | } |
84 | 83 | ||
85 | static unsigned long mmap_base(void) | 84 | static unsigned long mmap_base(unsigned long rnd) |
86 | { | 85 | { |
87 | unsigned long gap = rlimit(RLIMIT_STACK); | 86 | unsigned long gap = rlimit(RLIMIT_STACK); |
88 | 87 | ||
@@ -91,19 +90,19 @@ static unsigned long mmap_base(void) | |||
91 | else if (gap > MAX_GAP) | 90 | else if (gap > MAX_GAP) |
92 | gap = MAX_GAP; | 91 | gap = MAX_GAP; |
93 | 92 | ||
94 | return PAGE_ALIGN(TASK_SIZE - gap - mmap_rnd()); | 93 | return PAGE_ALIGN(TASK_SIZE - gap - rnd); |
95 | } | 94 | } |
96 | 95 | ||
97 | /* | 96 | /* |
98 | * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64 | 97 | * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64 |
99 | * does, but not when emulating X86_32 | 98 | * does, but not when emulating X86_32 |
100 | */ | 99 | */ |
101 | static unsigned long mmap_legacy_base(void) | 100 | static unsigned long mmap_legacy_base(unsigned long rnd) |
102 | { | 101 | { |
103 | if (mmap_is_ia32()) | 102 | if (mmap_is_ia32()) |
104 | return TASK_UNMAPPED_BASE; | 103 | return TASK_UNMAPPED_BASE; |
105 | else | 104 | else |
106 | return TASK_UNMAPPED_BASE + mmap_rnd(); | 105 | return TASK_UNMAPPED_BASE + rnd; |
107 | } | 106 | } |
108 | 107 | ||
109 | /* | 108 | /* |
@@ -112,13 +111,18 @@ static unsigned long mmap_legacy_base(void) | |||
112 | */ | 111 | */ |
113 | void arch_pick_mmap_layout(struct mm_struct *mm) | 112 | void arch_pick_mmap_layout(struct mm_struct *mm) |
114 | { | 113 | { |
115 | mm->mmap_legacy_base = mmap_legacy_base(); | 114 | unsigned long random_factor = 0UL; |
116 | mm->mmap_base = mmap_base(); | 115 | |
116 | if (current->flags & PF_RANDOMIZE) | ||
117 | random_factor = arch_mmap_rnd(); | ||
118 | |||
119 | mm->mmap_legacy_base = mmap_legacy_base(random_factor); | ||
117 | 120 | ||
118 | if (mmap_is_legacy()) { | 121 | if (mmap_is_legacy()) { |
119 | mm->mmap_base = mm->mmap_legacy_base; | 122 | mm->mmap_base = mm->mmap_legacy_base; |
120 | mm->get_unmapped_area = arch_get_unmapped_area; | 123 | mm->get_unmapped_area = arch_get_unmapped_area; |
121 | } else { | 124 | } else { |
125 | mm->mmap_base = mmap_base(random_factor); | ||
122 | mm->get_unmapped_area = arch_get_unmapped_area_topdown; | 126 | mm->get_unmapped_area = arch_get_unmapped_area_topdown; |
123 | } | 127 | } |
124 | } | 128 | } |
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index cd4785bbacb9..4053bb58bf92 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c | |||
@@ -482,9 +482,16 @@ static void __init numa_clear_kernel_node_hotplug(void) | |||
482 | &memblock.reserved, mb->nid); | 482 | &memblock.reserved, mb->nid); |
483 | } | 483 | } |
484 | 484 | ||
485 | /* Mark all kernel nodes. */ | 485 | /* |
486 | * Mark all kernel nodes. | ||
487 | * | ||
488 | * When booting with mem=nn[kMG] or in a kdump kernel, numa_meminfo | ||
489 | * may not include all the memblock.reserved memory ranges because | ||
490 | * trim_snb_memory() reserves specific pages for Sandy Bridge graphics. | ||
491 | */ | ||
486 | for_each_memblock(reserved, r) | 492 | for_each_memblock(reserved, r) |
487 | node_set(r->nid, numa_kernel_nodes); | 493 | if (r->nid != MAX_NUMNODES) |
494 | node_set(r->nid, numa_kernel_nodes); | ||
488 | 495 | ||
489 | /* Clear MEMBLOCK_HOTPLUG flag for memory in kernel nodes. */ | 496 | /* Clear MEMBLOCK_HOTPLUG flag for memory in kernel nodes. */ |
490 | for (i = 0; i < numa_meminfo.nr_blks; i++) { | 497 | for (i = 0; i < numa_meminfo.nr_blks; i++) { |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 536ea2fb6e33..89af288ec674 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -81,11 +81,9 @@ void arch_report_meminfo(struct seq_file *m) | |||
81 | seq_printf(m, "DirectMap4M: %8lu kB\n", | 81 | seq_printf(m, "DirectMap4M: %8lu kB\n", |
82 | direct_pages_count[PG_LEVEL_2M] << 12); | 82 | direct_pages_count[PG_LEVEL_2M] << 12); |
83 | #endif | 83 | #endif |
84 | #ifdef CONFIG_X86_64 | ||
85 | if (direct_gbpages) | 84 | if (direct_gbpages) |
86 | seq_printf(m, "DirectMap1G: %8lu kB\n", | 85 | seq_printf(m, "DirectMap1G: %8lu kB\n", |
87 | direct_pages_count[PG_LEVEL_1G] << 20); | 86 | direct_pages_count[PG_LEVEL_1G] << 20); |
88 | #endif | ||
89 | } | 87 | } |
90 | #else | 88 | #else |
91 | static inline void split_page_count(int level) { } | 89 | static inline void split_page_count(int level) { } |
@@ -1654,13 +1652,11 @@ int set_memory_ro(unsigned long addr, int numpages) | |||
1654 | { | 1652 | { |
1655 | return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0); | 1653 | return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0); |
1656 | } | 1654 | } |
1657 | EXPORT_SYMBOL_GPL(set_memory_ro); | ||
1658 | 1655 | ||
1659 | int set_memory_rw(unsigned long addr, int numpages) | 1656 | int set_memory_rw(unsigned long addr, int numpages) |
1660 | { | 1657 | { |
1661 | return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0); | 1658 | return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0); |
1662 | } | 1659 | } |
1663 | EXPORT_SYMBOL_GPL(set_memory_rw); | ||
1664 | 1660 | ||
1665 | int set_memory_np(unsigned long addr, int numpages) | 1661 | int set_memory_np(unsigned long addr, int numpages) |
1666 | { | 1662 | { |
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 7ac68698406c..35af6771a95a 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c | |||
@@ -610,7 +610,7 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, | |||
610 | } | 610 | } |
611 | 611 | ||
612 | #ifdef CONFIG_STRICT_DEVMEM | 612 | #ifdef CONFIG_STRICT_DEVMEM |
613 | /* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM*/ | 613 | /* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM */ |
614 | static inline int range_is_allowed(unsigned long pfn, unsigned long size) | 614 | static inline int range_is_allowed(unsigned long pfn, unsigned long size) |
615 | { | 615 | { |
616 | return 1; | 616 | return 1; |
@@ -628,8 +628,8 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size) | |||
628 | 628 | ||
629 | while (cursor < to) { | 629 | while (cursor < to) { |
630 | if (!devmem_is_allowed(pfn)) { | 630 | if (!devmem_is_allowed(pfn)) { |
631 | printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx]\n", | 631 | printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n", |
632 | current->comm, from, to - 1); | 632 | current->comm, from, to - 1); |
633 | return 0; | 633 | return 0; |
634 | } | 634 | } |
635 | cursor += PAGE_SIZE; | 635 | cursor += PAGE_SIZE; |
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 7b22adaad4f1..0b97d2c75df3 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c | |||
@@ -4,6 +4,7 @@ | |||
4 | #include <asm/pgtable.h> | 4 | #include <asm/pgtable.h> |
5 | #include <asm/tlb.h> | 5 | #include <asm/tlb.h> |
6 | #include <asm/fixmap.h> | 6 | #include <asm/fixmap.h> |
7 | #include <asm/mtrr.h> | ||
7 | 8 | ||
8 | #define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO | 9 | #define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO |
9 | 10 | ||
@@ -58,7 +59,7 @@ void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte) | |||
58 | tlb_remove_page(tlb, pte); | 59 | tlb_remove_page(tlb, pte); |
59 | } | 60 | } |
60 | 61 | ||
61 | #if PAGETABLE_LEVELS > 2 | 62 | #if CONFIG_PGTABLE_LEVELS > 2 |
62 | void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) | 63 | void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) |
63 | { | 64 | { |
64 | struct page *page = virt_to_page(pmd); | 65 | struct page *page = virt_to_page(pmd); |
@@ -74,14 +75,14 @@ void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) | |||
74 | tlb_remove_page(tlb, page); | 75 | tlb_remove_page(tlb, page); |
75 | } | 76 | } |
76 | 77 | ||
77 | #if PAGETABLE_LEVELS > 3 | 78 | #if CONFIG_PGTABLE_LEVELS > 3 |
78 | void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) | 79 | void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) |
79 | { | 80 | { |
80 | paravirt_release_pud(__pa(pud) >> PAGE_SHIFT); | 81 | paravirt_release_pud(__pa(pud) >> PAGE_SHIFT); |
81 | tlb_remove_page(tlb, virt_to_page(pud)); | 82 | tlb_remove_page(tlb, virt_to_page(pud)); |
82 | } | 83 | } |
83 | #endif /* PAGETABLE_LEVELS > 3 */ | 84 | #endif /* CONFIG_PGTABLE_LEVELS > 3 */ |
84 | #endif /* PAGETABLE_LEVELS > 2 */ | 85 | #endif /* CONFIG_PGTABLE_LEVELS > 2 */ |
85 | 86 | ||
86 | static inline void pgd_list_add(pgd_t *pgd) | 87 | static inline void pgd_list_add(pgd_t *pgd) |
87 | { | 88 | { |
@@ -117,9 +118,9 @@ static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd) | |||
117 | /* If the pgd points to a shared pagetable level (either the | 118 | /* If the pgd points to a shared pagetable level (either the |
118 | ptes in non-PAE, or shared PMD in PAE), then just copy the | 119 | ptes in non-PAE, or shared PMD in PAE), then just copy the |
119 | references from swapper_pg_dir. */ | 120 | references from swapper_pg_dir. */ |
120 | if (PAGETABLE_LEVELS == 2 || | 121 | if (CONFIG_PGTABLE_LEVELS == 2 || |
121 | (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD) || | 122 | (CONFIG_PGTABLE_LEVELS == 3 && SHARED_KERNEL_PMD) || |
122 | PAGETABLE_LEVELS == 4) { | 123 | CONFIG_PGTABLE_LEVELS == 4) { |
123 | clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY, | 124 | clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY, |
124 | swapper_pg_dir + KERNEL_PGD_BOUNDARY, | 125 | swapper_pg_dir + KERNEL_PGD_BOUNDARY, |
125 | KERNEL_PGD_PTRS); | 126 | KERNEL_PGD_PTRS); |
@@ -275,12 +276,87 @@ static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[]) | |||
275 | } | 276 | } |
276 | } | 277 | } |
277 | 278 | ||
279 | /* | ||
280 | * Xen paravirt assumes pgd table should be in one page. 64 bit kernel also | ||
281 | * assumes that pgd should be in one page. | ||
282 | * | ||
283 | * But kernel with PAE paging that is not running as a Xen domain | ||
284 | * only needs to allocate 32 bytes for pgd instead of one page. | ||
285 | */ | ||
286 | #ifdef CONFIG_X86_PAE | ||
287 | |||
288 | #include <linux/slab.h> | ||
289 | |||
290 | #define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) | ||
291 | #define PGD_ALIGN 32 | ||
292 | |||
293 | static struct kmem_cache *pgd_cache; | ||
294 | |||
295 | static int __init pgd_cache_init(void) | ||
296 | { | ||
297 | /* | ||
298 | * When PAE kernel is running as a Xen domain, it does not use | ||
299 | * shared kernel pmd. And this requires a whole page for pgd. | ||
300 | */ | ||
301 | if (!SHARED_KERNEL_PMD) | ||
302 | return 0; | ||
303 | |||
304 | /* | ||
305 | * when PAE kernel is not running as a Xen domain, it uses | ||
306 | * shared kernel pmd. Shared kernel pmd does not require a whole | ||
307 | * page for pgd. We are able to just allocate a 32-byte for pgd. | ||
308 | * During boot time, we create a 32-byte slab for pgd table allocation. | ||
309 | */ | ||
310 | pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_ALIGN, | ||
311 | SLAB_PANIC, NULL); | ||
312 | if (!pgd_cache) | ||
313 | return -ENOMEM; | ||
314 | |||
315 | return 0; | ||
316 | } | ||
317 | core_initcall(pgd_cache_init); | ||
318 | |||
319 | static inline pgd_t *_pgd_alloc(void) | ||
320 | { | ||
321 | /* | ||
322 | * If no SHARED_KERNEL_PMD, PAE kernel is running as a Xen domain. | ||
323 | * We allocate one page for pgd. | ||
324 | */ | ||
325 | if (!SHARED_KERNEL_PMD) | ||
326 | return (pgd_t *)__get_free_page(PGALLOC_GFP); | ||
327 | |||
328 | /* | ||
329 | * Now PAE kernel is not running as a Xen domain. We can allocate | ||
330 | * a 32-byte slab for pgd to save memory space. | ||
331 | */ | ||
332 | return kmem_cache_alloc(pgd_cache, PGALLOC_GFP); | ||
333 | } | ||
334 | |||
335 | static inline void _pgd_free(pgd_t *pgd) | ||
336 | { | ||
337 | if (!SHARED_KERNEL_PMD) | ||
338 | free_page((unsigned long)pgd); | ||
339 | else | ||
340 | kmem_cache_free(pgd_cache, pgd); | ||
341 | } | ||
342 | #else | ||
343 | static inline pgd_t *_pgd_alloc(void) | ||
344 | { | ||
345 | return (pgd_t *)__get_free_page(PGALLOC_GFP); | ||
346 | } | ||
347 | |||
348 | static inline void _pgd_free(pgd_t *pgd) | ||
349 | { | ||
350 | free_page((unsigned long)pgd); | ||
351 | } | ||
352 | #endif /* CONFIG_X86_PAE */ | ||
353 | |||
278 | pgd_t *pgd_alloc(struct mm_struct *mm) | 354 | pgd_t *pgd_alloc(struct mm_struct *mm) |
279 | { | 355 | { |
280 | pgd_t *pgd; | 356 | pgd_t *pgd; |
281 | pmd_t *pmds[PREALLOCATED_PMDS]; | 357 | pmd_t *pmds[PREALLOCATED_PMDS]; |
282 | 358 | ||
283 | pgd = (pgd_t *)__get_free_page(PGALLOC_GFP); | 359 | pgd = _pgd_alloc(); |
284 | 360 | ||
285 | if (pgd == NULL) | 361 | if (pgd == NULL) |
286 | goto out; | 362 | goto out; |
@@ -310,7 +386,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) | |||
310 | out_free_pmds: | 386 | out_free_pmds: |
311 | free_pmds(mm, pmds); | 387 | free_pmds(mm, pmds); |
312 | out_free_pgd: | 388 | out_free_pgd: |
313 | free_page((unsigned long)pgd); | 389 | _pgd_free(pgd); |
314 | out: | 390 | out: |
315 | return NULL; | 391 | return NULL; |
316 | } | 392 | } |
@@ -320,7 +396,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd) | |||
320 | pgd_mop_up_pmds(mm, pgd); | 396 | pgd_mop_up_pmds(mm, pgd); |
321 | pgd_dtor(pgd); | 397 | pgd_dtor(pgd); |
322 | paravirt_pgd_free(mm, pgd); | 398 | paravirt_pgd_free(mm, pgd); |
323 | free_page((unsigned long)pgd); | 399 | _pgd_free(pgd); |
324 | } | 400 | } |
325 | 401 | ||
326 | /* | 402 | /* |
@@ -485,3 +561,67 @@ void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys, | |||
485 | { | 561 | { |
486 | __native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags)); | 562 | __native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags)); |
487 | } | 563 | } |
564 | |||
565 | #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP | ||
566 | int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) | ||
567 | { | ||
568 | u8 mtrr; | ||
569 | |||
570 | /* | ||
571 | * Do not use a huge page when the range is covered by non-WB type | ||
572 | * of MTRRs. | ||
573 | */ | ||
574 | mtrr = mtrr_type_lookup(addr, addr + PUD_SIZE); | ||
575 | if ((mtrr != MTRR_TYPE_WRBACK) && (mtrr != 0xFF)) | ||
576 | return 0; | ||
577 | |||
578 | prot = pgprot_4k_2_large(prot); | ||
579 | |||
580 | set_pte((pte_t *)pud, pfn_pte( | ||
581 | (u64)addr >> PAGE_SHIFT, | ||
582 | __pgprot(pgprot_val(prot) | _PAGE_PSE))); | ||
583 | |||
584 | return 1; | ||
585 | } | ||
586 | |||
587 | int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) | ||
588 | { | ||
589 | u8 mtrr; | ||
590 | |||
591 | /* | ||
592 | * Do not use a huge page when the range is covered by non-WB type | ||
593 | * of MTRRs. | ||
594 | */ | ||
595 | mtrr = mtrr_type_lookup(addr, addr + PMD_SIZE); | ||
596 | if ((mtrr != MTRR_TYPE_WRBACK) && (mtrr != 0xFF)) | ||
597 | return 0; | ||
598 | |||
599 | prot = pgprot_4k_2_large(prot); | ||
600 | |||
601 | set_pte((pte_t *)pmd, pfn_pte( | ||
602 | (u64)addr >> PAGE_SHIFT, | ||
603 | __pgprot(pgprot_val(prot) | _PAGE_PSE))); | ||
604 | |||
605 | return 1; | ||
606 | } | ||
607 | |||
608 | int pud_clear_huge(pud_t *pud) | ||
609 | { | ||
610 | if (pud_large(*pud)) { | ||
611 | pud_clear(pud); | ||
612 | return 1; | ||
613 | } | ||
614 | |||
615 | return 0; | ||
616 | } | ||
617 | |||
618 | int pmd_clear_huge(pmd_t *pmd) | ||
619 | { | ||
620 | if (pmd_large(*pmd)) { | ||
621 | pmd_clear(pmd); | ||
622 | return 1; | ||
623 | } | ||
624 | |||
625 | return 0; | ||
626 | } | ||
627 | #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ | ||
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index 5d04be5efb64..4e664bdb535a 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c | |||
@@ -111,7 +111,7 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth) | |||
111 | { | 111 | { |
112 | struct stack_frame *head = (struct stack_frame *)frame_pointer(regs); | 112 | struct stack_frame *head = (struct stack_frame *)frame_pointer(regs); |
113 | 113 | ||
114 | if (!user_mode_vm(regs)) { | 114 | if (!user_mode(regs)) { |
115 | unsigned long stack = kernel_stack_pointer(regs); | 115 | unsigned long stack = kernel_stack_pointer(regs); |
116 | if (depth) | 116 | if (depth) |
117 | dump_trace(NULL, regs, (unsigned long *)stack, 0, | 117 | dump_trace(NULL, regs, (unsigned long *)stack, 0, |
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 2fb384724ebb..8fd6f44aee83 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c | |||
@@ -490,7 +490,9 @@ void pcibios_scan_root(int busnum) | |||
490 | if (!bus) { | 490 | if (!bus) { |
491 | pci_free_resource_list(&resources); | 491 | pci_free_resource_list(&resources); |
492 | kfree(sd); | 492 | kfree(sd); |
493 | return; | ||
493 | } | 494 | } |
495 | pci_bus_add_devices(bus); | ||
494 | } | 496 | } |
495 | 497 | ||
496 | void __init pcibios_set_cache_line_size(void) | 498 | void __init pcibios_set_cache_line_size(void) |
diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c index d143d216d52b..d7f997f7c26d 100644 --- a/arch/x86/platform/efi/efi-bgrt.c +++ b/arch/x86/platform/efi/efi-bgrt.c | |||
@@ -67,7 +67,7 @@ void __init efi_bgrt_init(void) | |||
67 | 67 | ||
68 | image = efi_lookup_mapped_addr(bgrt_tab->image_address); | 68 | image = efi_lookup_mapped_addr(bgrt_tab->image_address); |
69 | if (!image) { | 69 | if (!image) { |
70 | image = early_memremap(bgrt_tab->image_address, | 70 | image = early_ioremap(bgrt_tab->image_address, |
71 | sizeof(bmp_header)); | 71 | sizeof(bmp_header)); |
72 | ioremapped = true; | 72 | ioremapped = true; |
73 | if (!image) { | 73 | if (!image) { |
@@ -89,7 +89,7 @@ void __init efi_bgrt_init(void) | |||
89 | } | 89 | } |
90 | 90 | ||
91 | if (ioremapped) { | 91 | if (ioremapped) { |
92 | image = early_memremap(bgrt_tab->image_address, | 92 | image = early_ioremap(bgrt_tab->image_address, |
93 | bmp_header.size); | 93 | bmp_header.size); |
94 | if (!image) { | 94 | if (!image) { |
95 | pr_err("Ignoring BGRT: failed to map image memory\n"); | 95 | pr_err("Ignoring BGRT: failed to map image memory\n"); |
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index dbc8627a5cdf..02744df576d5 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c | |||
@@ -85,12 +85,20 @@ static efi_status_t __init phys_efi_set_virtual_address_map( | |||
85 | efi_memory_desc_t *virtual_map) | 85 | efi_memory_desc_t *virtual_map) |
86 | { | 86 | { |
87 | efi_status_t status; | 87 | efi_status_t status; |
88 | unsigned long flags; | ||
89 | pgd_t *save_pgd; | ||
88 | 90 | ||
89 | efi_call_phys_prolog(); | 91 | save_pgd = efi_call_phys_prolog(); |
92 | |||
93 | /* Disable interrupts around EFI calls: */ | ||
94 | local_irq_save(flags); | ||
90 | status = efi_call_phys(efi_phys.set_virtual_address_map, | 95 | status = efi_call_phys(efi_phys.set_virtual_address_map, |
91 | memory_map_size, descriptor_size, | 96 | memory_map_size, descriptor_size, |
92 | descriptor_version, virtual_map); | 97 | descriptor_version, virtual_map); |
93 | efi_call_phys_epilog(); | 98 | local_irq_restore(flags); |
99 | |||
100 | efi_call_phys_epilog(save_pgd); | ||
101 | |||
94 | return status; | 102 | return status; |
95 | } | 103 | } |
96 | 104 | ||
@@ -491,7 +499,8 @@ void __init efi_init(void) | |||
491 | if (efi_memmap_init()) | 499 | if (efi_memmap_init()) |
492 | return; | 500 | return; |
493 | 501 | ||
494 | print_efi_memmap(); | 502 | if (efi_enabled(EFI_DBG)) |
503 | print_efi_memmap(); | ||
495 | } | 504 | } |
496 | 505 | ||
497 | void __init efi_late_init(void) | 506 | void __init efi_late_init(void) |
@@ -939,6 +948,8 @@ static int __init arch_parse_efi_cmdline(char *str) | |||
939 | { | 948 | { |
940 | if (parse_option_str(str, "old_map")) | 949 | if (parse_option_str(str, "old_map")) |
941 | set_bit(EFI_OLD_MEMMAP, &efi.flags); | 950 | set_bit(EFI_OLD_MEMMAP, &efi.flags); |
951 | if (parse_option_str(str, "debug")) | ||
952 | set_bit(EFI_DBG, &efi.flags); | ||
942 | 953 | ||
943 | return 0; | 954 | return 0; |
944 | } | 955 | } |
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index 40e7cda52936..ed5b67338294 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c | |||
@@ -33,11 +33,10 @@ | |||
33 | 33 | ||
34 | /* | 34 | /* |
35 | * To make EFI call EFI runtime service in physical addressing mode we need | 35 | * To make EFI call EFI runtime service in physical addressing mode we need |
36 | * prolog/epilog before/after the invocation to disable interrupt, to | 36 | * prolog/epilog before/after the invocation to claim the EFI runtime service |
37 | * claim EFI runtime service handler exclusively and to duplicate a memory in | 37 | * handler exclusively and to duplicate a memory mapping in low memory space, |
38 | * low memory space say 0 - 3G. | 38 | * say 0 - 3G. |
39 | */ | 39 | */ |
40 | static unsigned long efi_rt_eflags; | ||
41 | 40 | ||
42 | void efi_sync_low_kernel_mappings(void) {} | 41 | void efi_sync_low_kernel_mappings(void) {} |
43 | void __init efi_dump_pagetable(void) {} | 42 | void __init efi_dump_pagetable(void) {} |
@@ -57,21 +56,24 @@ void __init efi_map_region(efi_memory_desc_t *md) | |||
57 | void __init efi_map_region_fixed(efi_memory_desc_t *md) {} | 56 | void __init efi_map_region_fixed(efi_memory_desc_t *md) {} |
58 | void __init parse_efi_setup(u64 phys_addr, u32 data_len) {} | 57 | void __init parse_efi_setup(u64 phys_addr, u32 data_len) {} |
59 | 58 | ||
60 | void __init efi_call_phys_prolog(void) | 59 | pgd_t * __init efi_call_phys_prolog(void) |
61 | { | 60 | { |
62 | struct desc_ptr gdt_descr; | 61 | struct desc_ptr gdt_descr; |
62 | pgd_t *save_pgd; | ||
63 | 63 | ||
64 | local_irq_save(efi_rt_eflags); | 64 | /* Current pgd is swapper_pg_dir, we'll restore it later: */ |
65 | 65 | save_pgd = swapper_pg_dir; | |
66 | load_cr3(initial_page_table); | 66 | load_cr3(initial_page_table); |
67 | __flush_tlb_all(); | 67 | __flush_tlb_all(); |
68 | 68 | ||
69 | gdt_descr.address = __pa(get_cpu_gdt_table(0)); | 69 | gdt_descr.address = __pa(get_cpu_gdt_table(0)); |
70 | gdt_descr.size = GDT_SIZE - 1; | 70 | gdt_descr.size = GDT_SIZE - 1; |
71 | load_gdt(&gdt_descr); | 71 | load_gdt(&gdt_descr); |
72 | |||
73 | return save_pgd; | ||
72 | } | 74 | } |
73 | 75 | ||
74 | void __init efi_call_phys_epilog(void) | 76 | void __init efi_call_phys_epilog(pgd_t *save_pgd) |
75 | { | 77 | { |
76 | struct desc_ptr gdt_descr; | 78 | struct desc_ptr gdt_descr; |
77 | 79 | ||
@@ -79,10 +81,8 @@ void __init efi_call_phys_epilog(void) | |||
79 | gdt_descr.size = GDT_SIZE - 1; | 81 | gdt_descr.size = GDT_SIZE - 1; |
80 | load_gdt(&gdt_descr); | 82 | load_gdt(&gdt_descr); |
81 | 83 | ||
82 | load_cr3(swapper_pg_dir); | 84 | load_cr3(save_pgd); |
83 | __flush_tlb_all(); | 85 | __flush_tlb_all(); |
84 | |||
85 | local_irq_restore(efi_rt_eflags); | ||
86 | } | 86 | } |
87 | 87 | ||
88 | void __init efi_runtime_mkexec(void) | 88 | void __init efi_runtime_mkexec(void) |
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 17e80d829df0..a0ac0f9c307f 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c | |||
@@ -41,9 +41,6 @@ | |||
41 | #include <asm/realmode.h> | 41 | #include <asm/realmode.h> |
42 | #include <asm/time.h> | 42 | #include <asm/time.h> |
43 | 43 | ||
44 | static pgd_t *save_pgd __initdata; | ||
45 | static unsigned long efi_flags __initdata; | ||
46 | |||
47 | /* | 44 | /* |
48 | * We allocate runtime services regions bottom-up, starting from -4G, i.e. | 45 | * We allocate runtime services regions bottom-up, starting from -4G, i.e. |
49 | * 0xffff_ffff_0000_0000 and limit EFI VA mapping space to 64G. | 46 | * 0xffff_ffff_0000_0000 and limit EFI VA mapping space to 64G. |
@@ -78,17 +75,18 @@ static void __init early_code_mapping_set_exec(int executable) | |||
78 | } | 75 | } |
79 | } | 76 | } |
80 | 77 | ||
81 | void __init efi_call_phys_prolog(void) | 78 | pgd_t * __init efi_call_phys_prolog(void) |
82 | { | 79 | { |
83 | unsigned long vaddress; | 80 | unsigned long vaddress; |
81 | pgd_t *save_pgd; | ||
82 | |||
84 | int pgd; | 83 | int pgd; |
85 | int n_pgds; | 84 | int n_pgds; |
86 | 85 | ||
87 | if (!efi_enabled(EFI_OLD_MEMMAP)) | 86 | if (!efi_enabled(EFI_OLD_MEMMAP)) |
88 | return; | 87 | return NULL; |
89 | 88 | ||
90 | early_code_mapping_set_exec(1); | 89 | early_code_mapping_set_exec(1); |
91 | local_irq_save(efi_flags); | ||
92 | 90 | ||
93 | n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE); | 91 | n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE); |
94 | save_pgd = kmalloc(n_pgds * sizeof(pgd_t), GFP_KERNEL); | 92 | save_pgd = kmalloc(n_pgds * sizeof(pgd_t), GFP_KERNEL); |
@@ -99,24 +97,29 @@ void __init efi_call_phys_prolog(void) | |||
99 | set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress)); | 97 | set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress)); |
100 | } | 98 | } |
101 | __flush_tlb_all(); | 99 | __flush_tlb_all(); |
100 | |||
101 | return save_pgd; | ||
102 | } | 102 | } |
103 | 103 | ||
104 | void __init efi_call_phys_epilog(void) | 104 | void __init efi_call_phys_epilog(pgd_t *save_pgd) |
105 | { | 105 | { |
106 | /* | 106 | /* |
107 | * After the lock is released, the original page table is restored. | 107 | * After the lock is released, the original page table is restored. |
108 | */ | 108 | */ |
109 | int pgd; | 109 | int pgd_idx; |
110 | int n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); | 110 | int nr_pgds; |
111 | 111 | ||
112 | if (!efi_enabled(EFI_OLD_MEMMAP)) | 112 | if (!save_pgd) |
113 | return; | 113 | return; |
114 | 114 | ||
115 | for (pgd = 0; pgd < n_pgds; pgd++) | 115 | nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); |
116 | set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]); | 116 | |
117 | for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) | ||
118 | set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]); | ||
119 | |||
117 | kfree(save_pgd); | 120 | kfree(save_pgd); |
121 | |||
118 | __flush_tlb_all(); | 122 | __flush_tlb_all(); |
119 | local_irq_restore(efi_flags); | ||
120 | early_code_mapping_set_exec(0); | 123 | early_code_mapping_set_exec(0); |
121 | } | 124 | } |
122 | 125 | ||
diff --git a/arch/x86/platform/intel-quark/imr_selftest.c b/arch/x86/platform/intel-quark/imr_selftest.c index c9a0838890e2..278e4da4222f 100644 --- a/arch/x86/platform/intel-quark/imr_selftest.c +++ b/arch/x86/platform/intel-quark/imr_selftest.c | |||
@@ -11,6 +11,7 @@ | |||
11 | */ | 11 | */ |
12 | 12 | ||
13 | #include <asm-generic/sections.h> | 13 | #include <asm-generic/sections.h> |
14 | #include <asm/cpu_device_id.h> | ||
14 | #include <asm/imr.h> | 15 | #include <asm/imr.h> |
15 | #include <linux/init.h> | 16 | #include <linux/init.h> |
16 | #include <linux/mm.h> | 17 | #include <linux/mm.h> |
@@ -101,6 +102,12 @@ static void __init imr_self_test(void) | |||
101 | } | 102 | } |
102 | } | 103 | } |
103 | 104 | ||
105 | static const struct x86_cpu_id imr_ids[] __initconst = { | ||
106 | { X86_VENDOR_INTEL, 5, 9 }, /* Intel Quark SoC X1000. */ | ||
107 | {} | ||
108 | }; | ||
109 | MODULE_DEVICE_TABLE(x86cpu, imr_ids); | ||
110 | |||
104 | /** | 111 | /** |
105 | * imr_self_test_init - entry point for IMR driver. | 112 | * imr_self_test_init - entry point for IMR driver. |
106 | * | 113 | * |
@@ -108,7 +115,8 @@ static void __init imr_self_test(void) | |||
108 | */ | 115 | */ |
109 | static int __init imr_self_test_init(void) | 116 | static int __init imr_self_test_init(void) |
110 | { | 117 | { |
111 | imr_self_test(); | 118 | if (x86_match_cpu(imr_ids)) |
119 | imr_self_test(); | ||
112 | return 0; | 120 | return 0; |
113 | } | 121 | } |
114 | 122 | ||
diff --git a/arch/x86/platform/olpc/olpc-xo1-sci.c b/arch/x86/platform/olpc/olpc-xo1-sci.c index 9a2e590dd202..7fa8b3b53bc0 100644 --- a/arch/x86/platform/olpc/olpc-xo1-sci.c +++ b/arch/x86/platform/olpc/olpc-xo1-sci.c | |||
@@ -61,7 +61,7 @@ static void battery_status_changed(void) | |||
61 | 61 | ||
62 | if (psy) { | 62 | if (psy) { |
63 | power_supply_changed(psy); | 63 | power_supply_changed(psy); |
64 | put_device(psy->dev); | 64 | power_supply_put(psy); |
65 | } | 65 | } |
66 | } | 66 | } |
67 | 67 | ||
@@ -71,7 +71,7 @@ static void ac_status_changed(void) | |||
71 | 71 | ||
72 | if (psy) { | 72 | if (psy) { |
73 | power_supply_changed(psy); | 73 | power_supply_changed(psy); |
74 | put_device(psy->dev); | 74 | power_supply_put(psy); |
75 | } | 75 | } |
76 | } | 76 | } |
77 | 77 | ||
diff --git a/arch/x86/platform/olpc/olpc-xo15-sci.c b/arch/x86/platform/olpc/olpc-xo15-sci.c index 08e350e757dc..55130846ac87 100644 --- a/arch/x86/platform/olpc/olpc-xo15-sci.c +++ b/arch/x86/platform/olpc/olpc-xo15-sci.c | |||
@@ -83,7 +83,7 @@ static void battery_status_changed(void) | |||
83 | 83 | ||
84 | if (psy) { | 84 | if (psy) { |
85 | power_supply_changed(psy); | 85 | power_supply_changed(psy); |
86 | put_device(psy->dev); | 86 | power_supply_put(psy); |
87 | } | 87 | } |
88 | } | 88 | } |
89 | 89 | ||
@@ -93,7 +93,7 @@ static void ac_status_changed(void) | |||
93 | 93 | ||
94 | if (psy) { | 94 | if (psy) { |
95 | power_supply_changed(psy); | 95 | power_supply_changed(psy); |
96 | put_device(psy->dev); | 96 | power_supply_put(psy); |
97 | } | 97 | } |
98 | } | 98 | } |
99 | 99 | ||
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 994798548b1a..3b6ec42718e4 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c | |||
@@ -415,7 +415,7 @@ static void reset_with_ipi(struct pnmask *distribution, struct bau_control *bcp) | |||
415 | struct reset_args reset_args; | 415 | struct reset_args reset_args; |
416 | 416 | ||
417 | reset_args.sender = sender; | 417 | reset_args.sender = sender; |
418 | cpus_clear(*mask); | 418 | cpumask_clear(mask); |
419 | /* find a single cpu for each uvhub in this distribution mask */ | 419 | /* find a single cpu for each uvhub in this distribution mask */ |
420 | maskbits = sizeof(struct pnmask) * BITSPERBYTE; | 420 | maskbits = sizeof(struct pnmask) * BITSPERBYTE; |
421 | /* each bit is a pnode relative to the partition base pnode */ | 421 | /* each bit is a pnode relative to the partition base pnode */ |
@@ -425,7 +425,7 @@ static void reset_with_ipi(struct pnmask *distribution, struct bau_control *bcp) | |||
425 | continue; | 425 | continue; |
426 | apnode = pnode + bcp->partition_base_pnode; | 426 | apnode = pnode + bcp->partition_base_pnode; |
427 | cpu = pnode_to_first_cpu(apnode, smaster); | 427 | cpu = pnode_to_first_cpu(apnode, smaster); |
428 | cpu_set(cpu, *mask); | 428 | cpumask_set_cpu(cpu, mask); |
429 | } | 429 | } |
430 | 430 | ||
431 | /* IPI all cpus; preemption is already disabled */ | 431 | /* IPI all cpus; preemption is already disabled */ |
@@ -1126,7 +1126,7 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | |||
1126 | /* don't actually do a shootdown of the local cpu */ | 1126 | /* don't actually do a shootdown of the local cpu */ |
1127 | cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); | 1127 | cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); |
1128 | 1128 | ||
1129 | if (cpu_isset(cpu, *cpumask)) | 1129 | if (cpumask_test_cpu(cpu, cpumask)) |
1130 | stat->s_ntargself++; | 1130 | stat->s_ntargself++; |
1131 | 1131 | ||
1132 | bau_desc = bcp->descriptor_base; | 1132 | bau_desc = bcp->descriptor_base; |
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 3e32ed5648a0..757678fb26e1 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c | |||
@@ -134,7 +134,7 @@ static void do_fpu_end(void) | |||
134 | static void fix_processor_context(void) | 134 | static void fix_processor_context(void) |
135 | { | 135 | { |
136 | int cpu = smp_processor_id(); | 136 | int cpu = smp_processor_id(); |
137 | struct tss_struct *t = &per_cpu(init_tss, cpu); | 137 | struct tss_struct *t = &per_cpu(cpu_tss, cpu); |
138 | #ifdef CONFIG_X86_64 | 138 | #ifdef CONFIG_X86_64 |
139 | struct desc_struct *desc = get_cpu_gdt_table(cpu); | 139 | struct desc_struct *desc = get_cpu_gdt_table(cpu); |
140 | tss_desc tss; | 140 | tss_desc tss; |
diff --git a/arch/x86/syscalls/Makefile b/arch/x86/syscalls/Makefile index 3323c2745248..a55abb9f6c5e 100644 --- a/arch/x86/syscalls/Makefile +++ b/arch/x86/syscalls/Makefile | |||
@@ -19,6 +19,9 @@ quiet_cmd_syshdr = SYSHDR $@ | |||
19 | quiet_cmd_systbl = SYSTBL $@ | 19 | quiet_cmd_systbl = SYSTBL $@ |
20 | cmd_systbl = $(CONFIG_SHELL) '$(systbl)' $< $@ | 20 | cmd_systbl = $(CONFIG_SHELL) '$(systbl)' $< $@ |
21 | 21 | ||
22 | quiet_cmd_hypercalls = HYPERCALLS $@ | ||
23 | cmd_hypercalls = $(CONFIG_SHELL) '$<' $@ $(filter-out $<,$^) | ||
24 | |||
22 | syshdr_abi_unistd_32 := i386 | 25 | syshdr_abi_unistd_32 := i386 |
23 | $(uapi)/unistd_32.h: $(syscall32) $(syshdr) | 26 | $(uapi)/unistd_32.h: $(syscall32) $(syshdr) |
24 | $(call if_changed,syshdr) | 27 | $(call if_changed,syshdr) |
@@ -47,10 +50,16 @@ $(out)/syscalls_32.h: $(syscall32) $(systbl) | |||
47 | $(out)/syscalls_64.h: $(syscall64) $(systbl) | 50 | $(out)/syscalls_64.h: $(syscall64) $(systbl) |
48 | $(call if_changed,systbl) | 51 | $(call if_changed,systbl) |
49 | 52 | ||
53 | $(out)/xen-hypercalls.h: $(srctree)/scripts/xen-hypercalls.sh | ||
54 | $(call if_changed,hypercalls) | ||
55 | |||
56 | $(out)/xen-hypercalls.h: $(srctree)/include/xen/interface/xen*.h | ||
57 | |||
50 | uapisyshdr-y += unistd_32.h unistd_64.h unistd_x32.h | 58 | uapisyshdr-y += unistd_32.h unistd_64.h unistd_x32.h |
51 | syshdr-y += syscalls_32.h | 59 | syshdr-y += syscalls_32.h |
52 | syshdr-$(CONFIG_X86_64) += unistd_32_ia32.h unistd_64_x32.h | 60 | syshdr-$(CONFIG_X86_64) += unistd_32_ia32.h unistd_64_x32.h |
53 | syshdr-$(CONFIG_X86_64) += syscalls_64.h | 61 | syshdr-$(CONFIG_X86_64) += syscalls_64.h |
62 | syshdr-$(CONFIG_XEN) += xen-hypercalls.h | ||
54 | 63 | ||
55 | targets += $(uapisyshdr-y) $(syshdr-y) | 64 | targets += $(uapisyshdr-y) $(syshdr-y) |
56 | 65 | ||
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index b3560ece1c9f..ef8187f9d28d 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl | |||
@@ -119,7 +119,7 @@ | |||
119 | 110 i386 iopl sys_iopl | 119 | 110 i386 iopl sys_iopl |
120 | 111 i386 vhangup sys_vhangup | 120 | 111 i386 vhangup sys_vhangup |
121 | 112 i386 idle | 121 | 112 i386 idle |
122 | 113 i386 vm86old sys_vm86old sys32_vm86_warning | 122 | 113 i386 vm86old sys_vm86old sys_ni_syscall |
123 | 114 i386 wait4 sys_wait4 compat_sys_wait4 | 123 | 114 i386 wait4 sys_wait4 compat_sys_wait4 |
124 | 115 i386 swapoff sys_swapoff | 124 | 115 i386 swapoff sys_swapoff |
125 | 116 i386 sysinfo sys_sysinfo compat_sys_sysinfo | 125 | 116 i386 sysinfo sys_sysinfo compat_sys_sysinfo |
@@ -172,7 +172,7 @@ | |||
172 | 163 i386 mremap sys_mremap | 172 | 163 i386 mremap sys_mremap |
173 | 164 i386 setresuid sys_setresuid16 | 173 | 164 i386 setresuid sys_setresuid16 |
174 | 165 i386 getresuid sys_getresuid16 | 174 | 165 i386 getresuid sys_getresuid16 |
175 | 166 i386 vm86 sys_vm86 sys32_vm86_warning | 175 | 166 i386 vm86 sys_vm86 sys_ni_syscall |
176 | 167 i386 query_module | 176 | 167 i386 query_module |
177 | 168 i386 poll sys_poll | 177 | 168 i386 poll sys_poll |
178 | 169 i386 nfsservctl | 178 | 169 i386 nfsservctl |
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index 8d656fbb57aa..9ef32d5f1b19 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl | |||
@@ -178,7 +178,7 @@ | |||
178 | 169 common reboot sys_reboot | 178 | 169 common reboot sys_reboot |
179 | 170 common sethostname sys_sethostname | 179 | 170 common sethostname sys_sethostname |
180 | 171 common setdomainname sys_setdomainname | 180 | 171 common setdomainname sys_setdomainname |
181 | 172 common iopl stub_iopl | 181 | 172 common iopl sys_iopl |
182 | 173 common ioperm sys_ioperm | 182 | 173 common ioperm sys_ioperm |
183 | 174 64 create_module | 183 | 174 64 create_module |
184 | 175 common init_module sys_init_module | 184 | 175 common init_module sys_init_module |
diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile index eafa324eb7a5..acb384d24669 100644 --- a/arch/x86/um/Makefile +++ b/arch/x86/um/Makefile | |||
@@ -21,7 +21,6 @@ obj-$(CONFIG_BINFMT_ELF) += elfcore.o | |||
21 | 21 | ||
22 | subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o | 22 | subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o |
23 | subarch-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += ../lib/rwsem.o | 23 | subarch-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += ../lib/rwsem.o |
24 | subarch-$(CONFIG_HIGHMEM) += ../mm/highmem_32.o | ||
25 | 24 | ||
26 | else | 25 | else |
27 | 26 | ||
diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h index 2d7d9a1f5b53..7e8a1a650435 100644 --- a/arch/x86/um/asm/barrier.h +++ b/arch/x86/um/asm/barrier.h | |||
@@ -36,22 +36,11 @@ | |||
36 | #endif /* CONFIG_X86_PPRO_FENCE */ | 36 | #endif /* CONFIG_X86_PPRO_FENCE */ |
37 | #define dma_wmb() barrier() | 37 | #define dma_wmb() barrier() |
38 | 38 | ||
39 | #ifdef CONFIG_SMP | ||
40 | |||
41 | #define smp_mb() mb() | ||
42 | #define smp_rmb() dma_rmb() | ||
43 | #define smp_wmb() barrier() | ||
44 | #define set_mb(var, value) do { (void)xchg(&var, value); } while (0) | ||
45 | |||
46 | #else /* CONFIG_SMP */ | ||
47 | |||
48 | #define smp_mb() barrier() | 39 | #define smp_mb() barrier() |
49 | #define smp_rmb() barrier() | 40 | #define smp_rmb() barrier() |
50 | #define smp_wmb() barrier() | 41 | #define smp_wmb() barrier() |
51 | #define set_mb(var, value) do { var = value; barrier(); } while (0) | 42 | #define set_mb(var, value) do { var = value; barrier(); } while (0) |
52 | 43 | ||
53 | #endif /* CONFIG_SMP */ | ||
54 | |||
55 | #define read_barrier_depends() do { } while (0) | 44 | #define read_barrier_depends() do { } while (0) |
56 | #define smp_read_barrier_depends() do { } while (0) | 45 | #define smp_read_barrier_depends() do { } while (0) |
57 | 46 | ||
@@ -64,8 +53,8 @@ | |||
64 | */ | 53 | */ |
65 | static inline void rdtsc_barrier(void) | 54 | static inline void rdtsc_barrier(void) |
66 | { | 55 | { |
67 | alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); | 56 | alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, |
68 | alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); | 57 | "lfence", X86_FEATURE_LFENCE_RDTSC); |
69 | } | 58 | } |
70 | 59 | ||
71 | #endif | 60 | #endif |
diff --git a/arch/x86/um/asm/elf.h b/arch/x86/um/asm/elf.h index 25a1022dd793..0a656b727b1a 100644 --- a/arch/x86/um/asm/elf.h +++ b/arch/x86/um/asm/elf.h | |||
@@ -210,7 +210,7 @@ extern int elf_core_copy_fpregs(struct task_struct *t, elf_fpregset_t *fpu); | |||
210 | 210 | ||
211 | #define ELF_EXEC_PAGESIZE 4096 | 211 | #define ELF_EXEC_PAGESIZE 4096 |
212 | 212 | ||
213 | #define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3) | 213 | #define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) |
214 | 214 | ||
215 | extern long elf_aux_hwcap; | 215 | extern long elf_aux_hwcap; |
216 | #define ELF_HWCAP (elf_aux_hwcap) | 216 | #define ELF_HWCAP (elf_aux_hwcap) |
diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c index 8e08176f0bcb..5c0b711d2433 100644 --- a/arch/x86/um/ldt.c +++ b/arch/x86/um/ldt.c | |||
@@ -8,9 +8,7 @@ | |||
8 | #include <linux/slab.h> | 8 | #include <linux/slab.h> |
9 | #include <asm/unistd.h> | 9 | #include <asm/unistd.h> |
10 | #include <os.h> | 10 | #include <os.h> |
11 | #include <proc_mm.h> | ||
12 | #include <skas.h> | 11 | #include <skas.h> |
13 | #include <skas_ptrace.h> | ||
14 | #include <sysdep/tls.h> | 12 | #include <sysdep/tls.h> |
15 | 13 | ||
16 | extern int modify_ldt(int func, void *ptr, unsigned long bytecount); | 14 | extern int modify_ldt(int func, void *ptr, unsigned long bytecount); |
@@ -19,105 +17,20 @@ static long write_ldt_entry(struct mm_id *mm_idp, int func, | |||
19 | struct user_desc *desc, void **addr, int done) | 17 | struct user_desc *desc, void **addr, int done) |
20 | { | 18 | { |
21 | long res; | 19 | long res; |
22 | 20 | void *stub_addr; | |
23 | if (proc_mm) { | 21 | res = syscall_stub_data(mm_idp, (unsigned long *)desc, |
24 | /* | 22 | (sizeof(*desc) + sizeof(long) - 1) & |
25 | * This is a special handling for the case, that the mm to | 23 | ~(sizeof(long) - 1), |
26 | * modify isn't current->active_mm. | 24 | addr, &stub_addr); |
27 | * If this is called directly by modify_ldt, | 25 | if (!res) { |
28 | * (current->active_mm->context.skas.u == mm_idp) | 26 | unsigned long args[] = { func, |
29 | * will be true. So no call to __switch_mm(mm_idp) is done. | 27 | (unsigned long)stub_addr, |
30 | * If this is called in case of init_new_ldt or PTRACE_LDT, | 28 | sizeof(*desc), |
31 | * mm_idp won't belong to current->active_mm, but child->mm. | 29 | 0, 0, 0 }; |
32 | * So we need to switch child's mm into our userspace, then | 30 | res = run_syscall_stub(mm_idp, __NR_modify_ldt, args, |
33 | * later switch back. | 31 | 0, addr, done); |
34 | * | ||
35 | * Note: I'm unsure: should interrupts be disabled here? | ||
36 | */ | ||
37 | if (!current->active_mm || current->active_mm == &init_mm || | ||
38 | mm_idp != ¤t->active_mm->context.id) | ||
39 | __switch_mm(mm_idp); | ||
40 | } | ||
41 | |||
42 | if (ptrace_ldt) { | ||
43 | struct ptrace_ldt ldt_op = (struct ptrace_ldt) { | ||
44 | .func = func, | ||
45 | .ptr = desc, | ||
46 | .bytecount = sizeof(*desc)}; | ||
47 | u32 cpu; | ||
48 | int pid; | ||
49 | |||
50 | if (!proc_mm) | ||
51 | pid = mm_idp->u.pid; | ||
52 | else { | ||
53 | cpu = get_cpu(); | ||
54 | pid = userspace_pid[cpu]; | ||
55 | } | ||
56 | |||
57 | res = os_ptrace_ldt(pid, 0, (unsigned long) &ldt_op); | ||
58 | |||
59 | if (proc_mm) | ||
60 | put_cpu(); | ||
61 | } | ||
62 | else { | ||
63 | void *stub_addr; | ||
64 | res = syscall_stub_data(mm_idp, (unsigned long *)desc, | ||
65 | (sizeof(*desc) + sizeof(long) - 1) & | ||
66 | ~(sizeof(long) - 1), | ||
67 | addr, &stub_addr); | ||
68 | if (!res) { | ||
69 | unsigned long args[] = { func, | ||
70 | (unsigned long)stub_addr, | ||
71 | sizeof(*desc), | ||
72 | 0, 0, 0 }; | ||
73 | res = run_syscall_stub(mm_idp, __NR_modify_ldt, args, | ||
74 | 0, addr, done); | ||
75 | } | ||
76 | } | 32 | } |
77 | 33 | ||
78 | if (proc_mm) { | ||
79 | /* | ||
80 | * This is the second part of special handling, that makes | ||
81 | * PTRACE_LDT possible to implement. | ||
82 | */ | ||
83 | if (current->active_mm && current->active_mm != &init_mm && | ||
84 | mm_idp != ¤t->active_mm->context.id) | ||
85 | __switch_mm(¤t->active_mm->context.id); | ||
86 | } | ||
87 | |||
88 | return res; | ||
89 | } | ||
90 | |||
91 | static long read_ldt_from_host(void __user * ptr, unsigned long bytecount) | ||
92 | { | ||
93 | int res, n; | ||
94 | struct ptrace_ldt ptrace_ldt = (struct ptrace_ldt) { | ||
95 | .func = 0, | ||
96 | .bytecount = bytecount, | ||
97 | .ptr = kmalloc(bytecount, GFP_KERNEL)}; | ||
98 | u32 cpu; | ||
99 | |||
100 | if (ptrace_ldt.ptr == NULL) | ||
101 | return -ENOMEM; | ||
102 | |||
103 | /* | ||
104 | * This is called from sys_modify_ldt only, so userspace_pid gives | ||
105 | * us the right number | ||
106 | */ | ||
107 | |||
108 | cpu = get_cpu(); | ||
109 | res = os_ptrace_ldt(userspace_pid[cpu], 0, (unsigned long) &ptrace_ldt); | ||
110 | put_cpu(); | ||
111 | if (res < 0) | ||
112 | goto out; | ||
113 | |||
114 | n = copy_to_user(ptr, ptrace_ldt.ptr, res); | ||
115 | if (n != 0) | ||
116 | res = -EFAULT; | ||
117 | |||
118 | out: | ||
119 | kfree(ptrace_ldt.ptr); | ||
120 | |||
121 | return res; | 34 | return res; |
122 | } | 35 | } |
123 | 36 | ||
@@ -145,9 +58,6 @@ static int read_ldt(void __user * ptr, unsigned long bytecount) | |||
145 | bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES; | 58 | bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES; |
146 | err = bytecount; | 59 | err = bytecount; |
147 | 60 | ||
148 | if (ptrace_ldt) | ||
149 | return read_ldt_from_host(ptr, bytecount); | ||
150 | |||
151 | mutex_lock(&ldt->lock); | 61 | mutex_lock(&ldt->lock); |
152 | if (ldt->entry_count <= LDT_DIRECT_ENTRIES) { | 62 | if (ldt->entry_count <= LDT_DIRECT_ENTRIES) { |
153 | size = LDT_ENTRY_SIZE*LDT_DIRECT_ENTRIES; | 63 | size = LDT_ENTRY_SIZE*LDT_DIRECT_ENTRIES; |
@@ -229,17 +139,11 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int func) | |||
229 | goto out; | 139 | goto out; |
230 | } | 140 | } |
231 | 141 | ||
232 | if (!ptrace_ldt) | 142 | mutex_lock(&ldt->lock); |
233 | mutex_lock(&ldt->lock); | ||
234 | 143 | ||
235 | err = write_ldt_entry(mm_idp, func, &ldt_info, &addr, 1); | 144 | err = write_ldt_entry(mm_idp, func, &ldt_info, &addr, 1); |
236 | if (err) | 145 | if (err) |
237 | goto out_unlock; | 146 | goto out_unlock; |
238 | else if (ptrace_ldt) { | ||
239 | /* With PTRACE_LDT available, this is used as a flag only */ | ||
240 | ldt->entry_count = 1; | ||
241 | goto out; | ||
242 | } | ||
243 | 147 | ||
244 | if (ldt_info.entry_number >= ldt->entry_count && | 148 | if (ldt_info.entry_number >= ldt->entry_count && |
245 | ldt_info.entry_number >= LDT_DIRECT_ENTRIES) { | 149 | ldt_info.entry_number >= LDT_DIRECT_ENTRIES) { |
@@ -393,91 +297,56 @@ long init_new_ldt(struct mm_context *new_mm, struct mm_context *from_mm) | |||
393 | int i; | 297 | int i; |
394 | long page, err=0; | 298 | long page, err=0; |
395 | void *addr = NULL; | 299 | void *addr = NULL; |
396 | struct proc_mm_op copy; | ||
397 | 300 | ||
398 | 301 | ||
399 | if (!ptrace_ldt) | 302 | mutex_init(&new_mm->arch.ldt.lock); |
400 | mutex_init(&new_mm->arch.ldt.lock); | ||
401 | 303 | ||
402 | if (!from_mm) { | 304 | if (!from_mm) { |
403 | memset(&desc, 0, sizeof(desc)); | 305 | memset(&desc, 0, sizeof(desc)); |
404 | /* | 306 | /* |
405 | * We have to initialize a clean ldt. | 307 | * Now we try to retrieve info about the ldt, we |
308 | * inherited from the host. All ldt-entries found | ||
309 | * will be reset in the following loop | ||
406 | */ | 310 | */ |
407 | if (proc_mm) { | 311 | ldt_get_host_info(); |
408 | /* | 312 | for (num_p=host_ldt_entries; *num_p != -1; num_p++) { |
409 | * If the new mm was created using proc_mm, host's | 313 | desc.entry_number = *num_p; |
410 | * default-ldt currently is assigned, which normally | 314 | err = write_ldt_entry(&new_mm->id, 1, &desc, |
411 | * contains the call-gates for lcall7 and lcall27. | 315 | &addr, *(num_p + 1) == -1); |
412 | * To remove these gates, we simply write an empty | 316 | if (err) |
413 | * entry as number 0 to the host. | 317 | break; |
414 | */ | ||
415 | err = write_ldt_entry(&new_mm->id, 1, &desc, &addr, 1); | ||
416 | } | ||
417 | else{ | ||
418 | /* | ||
419 | * Now we try to retrieve info about the ldt, we | ||
420 | * inherited from the host. All ldt-entries found | ||
421 | * will be reset in the following loop | ||
422 | */ | ||
423 | ldt_get_host_info(); | ||
424 | for (num_p=host_ldt_entries; *num_p != -1; num_p++) { | ||
425 | desc.entry_number = *num_p; | ||
426 | err = write_ldt_entry(&new_mm->id, 1, &desc, | ||
427 | &addr, *(num_p + 1) == -1); | ||
428 | if (err) | ||
429 | break; | ||
430 | } | ||
431 | } | 318 | } |
432 | new_mm->arch.ldt.entry_count = 0; | 319 | new_mm->arch.ldt.entry_count = 0; |
433 | 320 | ||
434 | goto out; | 321 | goto out; |
435 | } | 322 | } |
436 | 323 | ||
437 | if (proc_mm) { | 324 | /* |
438 | /* | 325 | * Our local LDT is used to supply the data for |
439 | * We have a valid from_mm, so we now have to copy the LDT of | 326 | * modify_ldt(READLDT), if PTRACE_LDT isn't available, |
440 | * from_mm to new_mm, because using proc_mm an new mm with | 327 | * i.e., we have to use the stub for modify_ldt, which |
441 | * an empty/default LDT was created in new_mm() | 328 | * can't handle the big read buffer of up to 64kB. |
442 | */ | 329 | */ |
443 | copy = ((struct proc_mm_op) { .op = MM_COPY_SEGMENTS, | 330 | mutex_lock(&from_mm->arch.ldt.lock); |
444 | .u = | 331 | if (from_mm->arch.ldt.entry_count <= LDT_DIRECT_ENTRIES) |
445 | { .copy_segments = | 332 | memcpy(new_mm->arch.ldt.u.entries, from_mm->arch.ldt.u.entries, |
446 | from_mm->id.u.mm_fd } } ); | 333 | sizeof(new_mm->arch.ldt.u.entries)); |
447 | i = os_write_file(new_mm->id.u.mm_fd, ©, sizeof(copy)); | 334 | else { |
448 | if (i != sizeof(copy)) | 335 | i = from_mm->arch.ldt.entry_count / LDT_ENTRIES_PER_PAGE; |
449 | printk(KERN_ERR "new_mm : /proc/mm copy_segments " | 336 | while (i-->0) { |
450 | "failed, err = %d\n", -i); | 337 | page = __get_free_page(GFP_KERNEL|__GFP_ZERO); |
451 | } | 338 | if (!page) { |
452 | 339 | err = -ENOMEM; | |
453 | if (!ptrace_ldt) { | 340 | break; |
454 | /* | ||
455 | * Our local LDT is used to supply the data for | ||
456 | * modify_ldt(READLDT), if PTRACE_LDT isn't available, | ||
457 | * i.e., we have to use the stub for modify_ldt, which | ||
458 | * can't handle the big read buffer of up to 64kB. | ||
459 | */ | ||
460 | mutex_lock(&from_mm->arch.ldt.lock); | ||
461 | if (from_mm->arch.ldt.entry_count <= LDT_DIRECT_ENTRIES) | ||
462 | memcpy(new_mm->arch.ldt.u.entries, from_mm->arch.ldt.u.entries, | ||
463 | sizeof(new_mm->arch.ldt.u.entries)); | ||
464 | else { | ||
465 | i = from_mm->arch.ldt.entry_count / LDT_ENTRIES_PER_PAGE; | ||
466 | while (i-->0) { | ||
467 | page = __get_free_page(GFP_KERNEL|__GFP_ZERO); | ||
468 | if (!page) { | ||
469 | err = -ENOMEM; | ||
470 | break; | ||
471 | } | ||
472 | new_mm->arch.ldt.u.pages[i] = | ||
473 | (struct ldt_entry *) page; | ||
474 | memcpy(new_mm->arch.ldt.u.pages[i], | ||
475 | from_mm->arch.ldt.u.pages[i], PAGE_SIZE); | ||
476 | } | 341 | } |
342 | new_mm->arch.ldt.u.pages[i] = | ||
343 | (struct ldt_entry *) page; | ||
344 | memcpy(new_mm->arch.ldt.u.pages[i], | ||
345 | from_mm->arch.ldt.u.pages[i], PAGE_SIZE); | ||
477 | } | 346 | } |
478 | new_mm->arch.ldt.entry_count = from_mm->arch.ldt.entry_count; | ||
479 | mutex_unlock(&from_mm->arch.ldt.lock); | ||
480 | } | 347 | } |
348 | new_mm->arch.ldt.entry_count = from_mm->arch.ldt.entry_count; | ||
349 | mutex_unlock(&from_mm->arch.ldt.lock); | ||
481 | 350 | ||
482 | out: | 351 | out: |
483 | return err; | 352 | return err; |
@@ -488,7 +357,7 @@ void free_ldt(struct mm_context *mm) | |||
488 | { | 357 | { |
489 | int i; | 358 | int i; |
490 | 359 | ||
491 | if (!ptrace_ldt && mm->arch.ldt.entry_count > LDT_DIRECT_ENTRIES) { | 360 | if (mm->arch.ldt.entry_count > LDT_DIRECT_ENTRIES) { |
492 | i = mm->arch.ldt.entry_count / LDT_ENTRIES_PER_PAGE; | 361 | i = mm->arch.ldt.entry_count / LDT_ENTRIES_PER_PAGE; |
493 | while (i-- > 0) | 362 | while (i-- > 0) |
494 | free_page((long) mm->arch.ldt.u.pages[i]); | 363 | free_page((long) mm->arch.ldt.u.pages[i]); |
diff --git a/arch/x86/um/shared/sysdep/faultinfo_32.h b/arch/x86/um/shared/sysdep/faultinfo_32.h index a26086b8a800..b6f2437ec29c 100644 --- a/arch/x86/um/shared/sysdep/faultinfo_32.h +++ b/arch/x86/um/shared/sysdep/faultinfo_32.h | |||
@@ -27,9 +27,6 @@ struct faultinfo { | |||
27 | /* This is Page Fault */ | 27 | /* This is Page Fault */ |
28 | #define SEGV_IS_FIXABLE(fi) ((fi)->trap_no == 14) | 28 | #define SEGV_IS_FIXABLE(fi) ((fi)->trap_no == 14) |
29 | 29 | ||
30 | /* SKAS3 has no trap_no on i386, but get_skas_faultinfo() sets it to 0. */ | ||
31 | #define SEGV_MAYBE_FIXABLE(fi) ((fi)->trap_no == 0 && ptrace_faultinfo) | ||
32 | |||
33 | #define PTRACE_FULL_FAULTINFO 0 | 30 | #define PTRACE_FULL_FAULTINFO 0 |
34 | 31 | ||
35 | #endif | 32 | #endif |
diff --git a/arch/x86/um/shared/sysdep/faultinfo_64.h b/arch/x86/um/shared/sysdep/faultinfo_64.h index f811cbe15d62..ee88f88974ea 100644 --- a/arch/x86/um/shared/sysdep/faultinfo_64.h +++ b/arch/x86/um/shared/sysdep/faultinfo_64.h | |||
@@ -27,9 +27,6 @@ struct faultinfo { | |||
27 | /* This is Page Fault */ | 27 | /* This is Page Fault */ |
28 | #define SEGV_IS_FIXABLE(fi) ((fi)->trap_no == 14) | 28 | #define SEGV_IS_FIXABLE(fi) ((fi)->trap_no == 14) |
29 | 29 | ||
30 | /* No broken SKAS API, which doesn't pass trap_no, here. */ | ||
31 | #define SEGV_MAYBE_FIXABLE(fi) 0 | ||
32 | |||
33 | #define PTRACE_FULL_FAULTINFO 1 | 30 | #define PTRACE_FULL_FAULTINFO 1 |
34 | 31 | ||
35 | #endif | 32 | #endif |
diff --git a/arch/x86/um/shared/sysdep/skas_ptrace.h b/arch/x86/um/shared/sysdep/skas_ptrace.h deleted file mode 100644 index 453febe98993..000000000000 --- a/arch/x86/um/shared/sysdep/skas_ptrace.h +++ /dev/null | |||
@@ -1,22 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) | ||
3 | * Licensed under the GPL | ||
4 | */ | ||
5 | |||
6 | #ifndef __SYSDEP_X86_SKAS_PTRACE_H | ||
7 | #define __SYSDEP_X86_SKAS_PTRACE_H | ||
8 | |||
9 | struct ptrace_faultinfo { | ||
10 | int is_write; | ||
11 | unsigned long addr; | ||
12 | }; | ||
13 | |||
14 | struct ptrace_ldt { | ||
15 | int func; | ||
16 | void *ptr; | ||
17 | unsigned long bytecount; | ||
18 | }; | ||
19 | |||
20 | #define PTRACE_LDT 54 | ||
21 | |||
22 | #endif | ||
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c index 0c8c32bfd792..592491d1d70d 100644 --- a/arch/x86/um/signal.c +++ b/arch/x86/um/signal.c | |||
@@ -549,13 +549,6 @@ int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig, | |||
549 | if (err) | 549 | if (err) |
550 | return err; | 550 | return err; |
551 | 551 | ||
552 | /* Set up registers for signal handler */ | ||
553 | { | ||
554 | struct exec_domain *ed = current_thread_info()->exec_domain; | ||
555 | if (unlikely(ed && ed->signal_invmap && sig < 32)) | ||
556 | sig = ed->signal_invmap[sig]; | ||
557 | } | ||
558 | |||
559 | PT_REGS_SP(regs) = (unsigned long) frame; | 552 | PT_REGS_SP(regs) = (unsigned long) frame; |
560 | PT_REGS_DI(regs) = sig; | 553 | PT_REGS_DI(regs) = sig; |
561 | /* In case the signal handler was declared without prototypes */ | 554 | /* In case the signal handler was declared without prototypes */ |
diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c index 5cdfa9db2217..a75d8700472a 100644 --- a/arch/x86/um/sys_call_table_64.c +++ b/arch/x86/um/sys_call_table_64.c | |||
@@ -16,7 +16,7 @@ | |||
16 | */ | 16 | */ |
17 | 17 | ||
18 | /* Not going to be implemented by UML, since we have no hardware. */ | 18 | /* Not going to be implemented by UML, since we have no hardware. */ |
19 | #define stub_iopl sys_ni_syscall | 19 | #define sys_iopl sys_ni_syscall |
20 | #define sys_ioperm sys_ni_syscall | 20 | #define sys_ioperm sys_ni_syscall |
21 | 21 | ||
22 | /* | 22 | /* |
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index 7b9be9822724..275a3a8b78af 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile | |||
@@ -51,7 +51,7 @@ VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \ | |||
51 | $(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE | 51 | $(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE |
52 | $(call if_changed,vdso) | 52 | $(call if_changed,vdso) |
53 | 53 | ||
54 | HOST_EXTRACFLAGS += -I$(srctree)/tools/include | 54 | HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi |
55 | hostprogs-y += vdso2c | 55 | hostprogs-y += vdso2c |
56 | 56 | ||
57 | quiet_cmd_vdso2c = VDSO2C $@ | 57 | quiet_cmd_vdso2c = VDSO2C $@ |
@@ -206,4 +206,4 @@ $(vdso_img_insttargets): install_%: $(obj)/%.dbg $(MODLIB)/vdso FORCE | |||
206 | PHONY += vdso_install $(vdso_img_insttargets) | 206 | PHONY += vdso_install $(vdso_img_insttargets) |
207 | vdso_install: $(vdso_img_insttargets) FORCE | 207 | vdso_install: $(vdso_img_insttargets) FORCE |
208 | 208 | ||
209 | clean-files := vdso32-syscall* vdso32-sysenter* vdso32-int80* vdso64* | 209 | clean-files := vdso32-syscall* vdso32-sysenter* vdso32-int80* vdso64* vdso-image-*.c vdsox32.so* |
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 9793322751e0..40d2473836c9 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c | |||
@@ -82,18 +82,15 @@ static notrace cycle_t vread_pvclock(int *mode) | |||
82 | cycle_t ret; | 82 | cycle_t ret; |
83 | u64 last; | 83 | u64 last; |
84 | u32 version; | 84 | u32 version; |
85 | u32 migrate_count; | ||
85 | u8 flags; | 86 | u8 flags; |
86 | unsigned cpu, cpu1; | 87 | unsigned cpu, cpu1; |
87 | 88 | ||
88 | 89 | ||
89 | /* | 90 | /* |
90 | * Note: hypervisor must guarantee that: | 91 | * When looping to get a consistent (time-info, tsc) pair, we |
91 | * 1. cpu ID number maps 1:1 to per-CPU pvclock time info. | 92 | * also need to deal with the possibility we can switch vcpus, |
92 | * 2. that per-CPU pvclock time info is updated if the | 93 | * so make sure we always re-fetch time-info for the current vcpu. |
93 | * underlying CPU changes. | ||
94 | * 3. that version is increased whenever underlying CPU | ||
95 | * changes. | ||
96 | * | ||
97 | */ | 94 | */ |
98 | do { | 95 | do { |
99 | cpu = __getcpu() & VGETCPU_CPU_MASK; | 96 | cpu = __getcpu() & VGETCPU_CPU_MASK; |
@@ -102,20 +99,27 @@ static notrace cycle_t vread_pvclock(int *mode) | |||
102 | * __getcpu() calls (Gleb). | 99 | * __getcpu() calls (Gleb). |
103 | */ | 100 | */ |
104 | 101 | ||
105 | pvti = get_pvti(cpu); | 102 | /* Make sure migrate_count will change if we leave the VCPU. */ |
103 | do { | ||
104 | pvti = get_pvti(cpu); | ||
105 | migrate_count = pvti->migrate_count; | ||
106 | |||
107 | cpu1 = cpu; | ||
108 | cpu = __getcpu() & VGETCPU_CPU_MASK; | ||
109 | } while (unlikely(cpu != cpu1)); | ||
106 | 110 | ||
107 | version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags); | 111 | version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags); |
108 | 112 | ||
109 | /* | 113 | /* |
110 | * Test we're still on the cpu as well as the version. | 114 | * Test we're still on the cpu as well as the version. |
111 | * We could have been migrated just after the first | 115 | * - We must read TSC of pvti's VCPU. |
112 | * vgetcpu but before fetching the version, so we | 116 | * - KVM doesn't follow the versioning protocol, so data could |
113 | * wouldn't notice a version change. | 117 | * change before version if we left the VCPU. |
114 | */ | 118 | */ |
115 | cpu1 = __getcpu() & VGETCPU_CPU_MASK; | 119 | smp_rmb(); |
116 | } while (unlikely(cpu != cpu1 || | 120 | } while (unlikely((pvti->pvti.version & 1) || |
117 | (pvti->pvti.version & 1) || | 121 | pvti->pvti.version != version || |
118 | pvti->pvti.version != version)); | 122 | pvti->migrate_count != migrate_count)); |
119 | 123 | ||
120 | if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT))) | 124 | if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT))) |
121 | *mode = VCLOCK_NONE; | 125 | *mode = VCLOCK_NONE; |
diff --git a/arch/x86/vdso/vdso32/syscall.S b/arch/x86/vdso/vdso32/syscall.S index 5415b5613d55..6b286bb5251c 100644 --- a/arch/x86/vdso/vdso32/syscall.S +++ b/arch/x86/vdso/vdso32/syscall.S | |||
@@ -19,8 +19,6 @@ __kernel_vsyscall: | |||
19 | .Lpush_ebp: | 19 | .Lpush_ebp: |
20 | movl %ecx, %ebp | 20 | movl %ecx, %ebp |
21 | syscall | 21 | syscall |
22 | movl $__USER32_DS, %ecx | ||
23 | movl %ecx, %ss | ||
24 | movl %ebp, %ecx | 22 | movl %ebp, %ecx |
25 | popl %ebp | 23 | popl %ebp |
26 | .Lpop_ebp: | 24 | .Lpop_ebp: |
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index 7005ced5d1ad..70e060ad879a 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <xen/xen.h> | 7 | #include <xen/xen.h> |
8 | #include <xen/interface/physdev.h> | 8 | #include <xen/interface/physdev.h> |
9 | #include "xen-ops.h" | 9 | #include "xen-ops.h" |
10 | #include "smp.h" | ||
10 | 11 | ||
11 | static unsigned int xen_io_apic_read(unsigned apic, unsigned reg) | 12 | static unsigned int xen_io_apic_read(unsigned apic, unsigned reg) |
12 | { | 13 | { |
@@ -28,7 +29,186 @@ static unsigned int xen_io_apic_read(unsigned apic, unsigned reg) | |||
28 | return 0xfd; | 29 | return 0xfd; |
29 | } | 30 | } |
30 | 31 | ||
32 | static unsigned long xen_set_apic_id(unsigned int x) | ||
33 | { | ||
34 | WARN_ON(1); | ||
35 | return x; | ||
36 | } | ||
37 | |||
38 | static unsigned int xen_get_apic_id(unsigned long x) | ||
39 | { | ||
40 | return ((x)>>24) & 0xFFu; | ||
41 | } | ||
42 | |||
43 | static u32 xen_apic_read(u32 reg) | ||
44 | { | ||
45 | struct xen_platform_op op = { | ||
46 | .cmd = XENPF_get_cpuinfo, | ||
47 | .interface_version = XENPF_INTERFACE_VERSION, | ||
48 | .u.pcpu_info.xen_cpuid = 0, | ||
49 | }; | ||
50 | int ret = 0; | ||
51 | |||
52 | /* Shouldn't need this as APIC is turned off for PV, and we only | ||
53 | * get called on the bootup processor. But just in case. */ | ||
54 | if (!xen_initial_domain() || smp_processor_id()) | ||
55 | return 0; | ||
56 | |||
57 | if (reg == APIC_LVR) | ||
58 | return 0x10; | ||
59 | #ifdef CONFIG_X86_32 | ||
60 | if (reg == APIC_LDR) | ||
61 | return SET_APIC_LOGICAL_ID(1UL << smp_processor_id()); | ||
62 | #endif | ||
63 | if (reg != APIC_ID) | ||
64 | return 0; | ||
65 | |||
66 | ret = HYPERVISOR_dom0_op(&op); | ||
67 | if (ret) | ||
68 | return 0; | ||
69 | |||
70 | return op.u.pcpu_info.apic_id << 24; | ||
71 | } | ||
72 | |||
73 | static void xen_apic_write(u32 reg, u32 val) | ||
74 | { | ||
75 | /* Warn to see if there's any stray references */ | ||
76 | WARN(1,"register: %x, value: %x\n", reg, val); | ||
77 | } | ||
78 | |||
79 | static u64 xen_apic_icr_read(void) | ||
80 | { | ||
81 | return 0; | ||
82 | } | ||
83 | |||
84 | static void xen_apic_icr_write(u32 low, u32 id) | ||
85 | { | ||
86 | /* Warn to see if there's any stray references */ | ||
87 | WARN_ON(1); | ||
88 | } | ||
89 | |||
90 | static u32 xen_safe_apic_wait_icr_idle(void) | ||
91 | { | ||
92 | return 0; | ||
93 | } | ||
94 | |||
95 | static int xen_apic_probe_pv(void) | ||
96 | { | ||
97 | if (xen_pv_domain()) | ||
98 | return 1; | ||
99 | |||
100 | return 0; | ||
101 | } | ||
102 | |||
103 | static int xen_madt_oem_check(char *oem_id, char *oem_table_id) | ||
104 | { | ||
105 | return xen_pv_domain(); | ||
106 | } | ||
107 | |||
108 | static int xen_id_always_valid(int apicid) | ||
109 | { | ||
110 | return 1; | ||
111 | } | ||
112 | |||
113 | static int xen_id_always_registered(void) | ||
114 | { | ||
115 | return 1; | ||
116 | } | ||
117 | |||
118 | static int xen_phys_pkg_id(int initial_apic_id, int index_msb) | ||
119 | { | ||
120 | return initial_apic_id >> index_msb; | ||
121 | } | ||
122 | |||
123 | #ifdef CONFIG_X86_32 | ||
124 | static int xen_x86_32_early_logical_apicid(int cpu) | ||
125 | { | ||
126 | /* Match with APIC_LDR read. Otherwise setup_local_APIC complains. */ | ||
127 | return 1 << cpu; | ||
128 | } | ||
129 | #endif | ||
130 | |||
131 | static void xen_noop(void) | ||
132 | { | ||
133 | } | ||
134 | |||
135 | static void xen_silent_inquire(int apicid) | ||
136 | { | ||
137 | } | ||
138 | |||
139 | static struct apic xen_pv_apic = { | ||
140 | .name = "Xen PV", | ||
141 | .probe = xen_apic_probe_pv, | ||
142 | .acpi_madt_oem_check = xen_madt_oem_check, | ||
143 | .apic_id_valid = xen_id_always_valid, | ||
144 | .apic_id_registered = xen_id_always_registered, | ||
145 | |||
146 | /* .irq_delivery_mode - used in native_compose_msi_msg only */ | ||
147 | /* .irq_dest_mode - used in native_compose_msi_msg only */ | ||
148 | |||
149 | .target_cpus = default_target_cpus, | ||
150 | .disable_esr = 0, | ||
151 | /* .dest_logical - default_send_IPI_ use it but we use our own. */ | ||
152 | .check_apicid_used = default_check_apicid_used, /* Used on 32-bit */ | ||
153 | |||
154 | .vector_allocation_domain = flat_vector_allocation_domain, | ||
155 | .init_apic_ldr = xen_noop, /* setup_local_APIC calls it */ | ||
156 | |||
157 | .ioapic_phys_id_map = default_ioapic_phys_id_map, /* Used on 32-bit */ | ||
158 | .setup_apic_routing = NULL, | ||
159 | .cpu_present_to_apicid = default_cpu_present_to_apicid, | ||
160 | .apicid_to_cpu_present = physid_set_mask_of_physid, /* Used on 32-bit */ | ||
161 | .check_phys_apicid_present = default_check_phys_apicid_present, /* smp_sanity_check needs it */ | ||
162 | .phys_pkg_id = xen_phys_pkg_id, /* detect_ht */ | ||
163 | |||
164 | .get_apic_id = xen_get_apic_id, | ||
165 | .set_apic_id = xen_set_apic_id, /* Can be NULL on 32-bit. */ | ||
166 | .apic_id_mask = 0xFF << 24, /* Used by verify_local_APIC. Match with what xen_get_apic_id does. */ | ||
167 | |||
168 | .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, | ||
169 | |||
170 | #ifdef CONFIG_SMP | ||
171 | .send_IPI_mask = xen_send_IPI_mask, | ||
172 | .send_IPI_mask_allbutself = xen_send_IPI_mask_allbutself, | ||
173 | .send_IPI_allbutself = xen_send_IPI_allbutself, | ||
174 | .send_IPI_all = xen_send_IPI_all, | ||
175 | .send_IPI_self = xen_send_IPI_self, | ||
176 | #endif | ||
177 | /* .wait_for_init_deassert- used by AP bootup - smp_callin which we don't use */ | ||
178 | .inquire_remote_apic = xen_silent_inquire, | ||
179 | |||
180 | .read = xen_apic_read, | ||
181 | .write = xen_apic_write, | ||
182 | .eoi_write = xen_apic_write, | ||
183 | |||
184 | .icr_read = xen_apic_icr_read, | ||
185 | .icr_write = xen_apic_icr_write, | ||
186 | .wait_icr_idle = xen_noop, | ||
187 | .safe_wait_icr_idle = xen_safe_apic_wait_icr_idle, | ||
188 | |||
189 | #ifdef CONFIG_X86_32 | ||
190 | /* generic_processor_info and setup_local_APIC. */ | ||
191 | .x86_32_early_logical_apicid = xen_x86_32_early_logical_apicid, | ||
192 | #endif | ||
193 | }; | ||
194 | |||
195 | static void __init xen_apic_check(void) | ||
196 | { | ||
197 | if (apic == &xen_pv_apic) | ||
198 | return; | ||
199 | |||
200 | pr_info("Switched APIC routing from %s to %s.\n", apic->name, | ||
201 | xen_pv_apic.name); | ||
202 | apic = &xen_pv_apic; | ||
203 | } | ||
31 | void __init xen_init_apic(void) | 204 | void __init xen_init_apic(void) |
32 | { | 205 | { |
33 | x86_io_apic_ops.read = xen_io_apic_read; | 206 | x86_io_apic_ops.read = xen_io_apic_read; |
207 | /* On PV guests the APIC CPUID bit is disabled so none of the | ||
208 | * routines end up executing. */ | ||
209 | if (!xen_initial_domain()) | ||
210 | apic = &xen_pv_apic; | ||
211 | |||
212 | x86_platform.apic_post_init = xen_apic_check; | ||
34 | } | 213 | } |
214 | apic_driver(xen_pv_apic); | ||
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 5240f563076d..94578efd3067 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -912,6 +912,7 @@ static void xen_load_sp0(struct tss_struct *tss, | |||
912 | mcs = xen_mc_entry(0); | 912 | mcs = xen_mc_entry(0); |
913 | MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0); | 913 | MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0); |
914 | xen_mc_issue(PARAVIRT_LAZY_CPU); | 914 | xen_mc_issue(PARAVIRT_LAZY_CPU); |
915 | tss->x86_tss.sp0 = thread->sp0; | ||
915 | } | 916 | } |
916 | 917 | ||
917 | static void xen_set_iopl_mask(unsigned mask) | 918 | static void xen_set_iopl_mask(unsigned mask) |
@@ -927,92 +928,6 @@ static void xen_io_delay(void) | |||
927 | { | 928 | { |
928 | } | 929 | } |
929 | 930 | ||
930 | #ifdef CONFIG_X86_LOCAL_APIC | ||
931 | static unsigned long xen_set_apic_id(unsigned int x) | ||
932 | { | ||
933 | WARN_ON(1); | ||
934 | return x; | ||
935 | } | ||
936 | static unsigned int xen_get_apic_id(unsigned long x) | ||
937 | { | ||
938 | return ((x)>>24) & 0xFFu; | ||
939 | } | ||
940 | static u32 xen_apic_read(u32 reg) | ||
941 | { | ||
942 | struct xen_platform_op op = { | ||
943 | .cmd = XENPF_get_cpuinfo, | ||
944 | .interface_version = XENPF_INTERFACE_VERSION, | ||
945 | .u.pcpu_info.xen_cpuid = 0, | ||
946 | }; | ||
947 | int ret = 0; | ||
948 | |||
949 | /* Shouldn't need this as APIC is turned off for PV, and we only | ||
950 | * get called on the bootup processor. But just in case. */ | ||
951 | if (!xen_initial_domain() || smp_processor_id()) | ||
952 | return 0; | ||
953 | |||
954 | if (reg == APIC_LVR) | ||
955 | return 0x10; | ||
956 | |||
957 | if (reg != APIC_ID) | ||
958 | return 0; | ||
959 | |||
960 | ret = HYPERVISOR_dom0_op(&op); | ||
961 | if (ret) | ||
962 | return 0; | ||
963 | |||
964 | return op.u.pcpu_info.apic_id << 24; | ||
965 | } | ||
966 | |||
967 | static void xen_apic_write(u32 reg, u32 val) | ||
968 | { | ||
969 | /* Warn to see if there's any stray references */ | ||
970 | WARN_ON(1); | ||
971 | } | ||
972 | |||
973 | static u64 xen_apic_icr_read(void) | ||
974 | { | ||
975 | return 0; | ||
976 | } | ||
977 | |||
978 | static void xen_apic_icr_write(u32 low, u32 id) | ||
979 | { | ||
980 | /* Warn to see if there's any stray references */ | ||
981 | WARN_ON(1); | ||
982 | } | ||
983 | |||
984 | static void xen_apic_wait_icr_idle(void) | ||
985 | { | ||
986 | return; | ||
987 | } | ||
988 | |||
989 | static u32 xen_safe_apic_wait_icr_idle(void) | ||
990 | { | ||
991 | return 0; | ||
992 | } | ||
993 | |||
994 | static void set_xen_basic_apic_ops(void) | ||
995 | { | ||
996 | apic->read = xen_apic_read; | ||
997 | apic->write = xen_apic_write; | ||
998 | apic->icr_read = xen_apic_icr_read; | ||
999 | apic->icr_write = xen_apic_icr_write; | ||
1000 | apic->wait_icr_idle = xen_apic_wait_icr_idle; | ||
1001 | apic->safe_wait_icr_idle = xen_safe_apic_wait_icr_idle; | ||
1002 | apic->set_apic_id = xen_set_apic_id; | ||
1003 | apic->get_apic_id = xen_get_apic_id; | ||
1004 | |||
1005 | #ifdef CONFIG_SMP | ||
1006 | apic->send_IPI_allbutself = xen_send_IPI_allbutself; | ||
1007 | apic->send_IPI_mask_allbutself = xen_send_IPI_mask_allbutself; | ||
1008 | apic->send_IPI_mask = xen_send_IPI_mask; | ||
1009 | apic->send_IPI_all = xen_send_IPI_all; | ||
1010 | apic->send_IPI_self = xen_send_IPI_self; | ||
1011 | #endif | ||
1012 | } | ||
1013 | |||
1014 | #endif | ||
1015 | |||
1016 | static void xen_clts(void) | 931 | static void xen_clts(void) |
1017 | { | 932 | { |
1018 | struct multicall_space mcs; | 933 | struct multicall_space mcs; |
@@ -1618,7 +1533,7 @@ asmlinkage __visible void __init xen_start_kernel(void) | |||
1618 | /* | 1533 | /* |
1619 | * set up the basic apic ops. | 1534 | * set up the basic apic ops. |
1620 | */ | 1535 | */ |
1621 | set_xen_basic_apic_ops(); | 1536 | xen_init_apic(); |
1622 | #endif | 1537 | #endif |
1623 | 1538 | ||
1624 | if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { | 1539 | if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { |
@@ -1731,8 +1646,6 @@ asmlinkage __visible void __init xen_start_kernel(void) | |||
1731 | if (HYPERVISOR_dom0_op(&op) == 0) | 1646 | if (HYPERVISOR_dom0_op(&op) == 0) |
1732 | boot_params.kbd_status = op.u.firmware_info.u.kbd_shift_flags; | 1647 | boot_params.kbd_status = op.u.firmware_info.u.kbd_shift_flags; |
1733 | 1648 | ||
1734 | xen_init_apic(); | ||
1735 | |||
1736 | /* Make sure ACS will be enabled */ | 1649 | /* Make sure ACS will be enabled */ |
1737 | pci_request_acs(); | 1650 | pci_request_acs(); |
1738 | 1651 | ||
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index adca9e2b6553..dd151b2045b0 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -502,7 +502,7 @@ __visible pmd_t xen_make_pmd(pmdval_t pmd) | |||
502 | } | 502 | } |
503 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd); | 503 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd); |
504 | 504 | ||
505 | #if PAGETABLE_LEVELS == 4 | 505 | #if CONFIG_PGTABLE_LEVELS == 4 |
506 | __visible pudval_t xen_pud_val(pud_t pud) | 506 | __visible pudval_t xen_pud_val(pud_t pud) |
507 | { | 507 | { |
508 | return pte_mfn_to_pfn(pud.pud); | 508 | return pte_mfn_to_pfn(pud.pud); |
@@ -589,7 +589,7 @@ static void xen_set_pgd(pgd_t *ptr, pgd_t val) | |||
589 | 589 | ||
590 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 590 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
591 | } | 591 | } |
592 | #endif /* PAGETABLE_LEVELS == 4 */ | 592 | #endif /* CONFIG_PGTABLE_LEVELS == 4 */ |
593 | 593 | ||
594 | /* | 594 | /* |
595 | * (Yet another) pagetable walker. This one is intended for pinning a | 595 | * (Yet another) pagetable walker. This one is intended for pinning a |
@@ -1628,7 +1628,7 @@ static void xen_release_pmd(unsigned long pfn) | |||
1628 | xen_release_ptpage(pfn, PT_PMD); | 1628 | xen_release_ptpage(pfn, PT_PMD); |
1629 | } | 1629 | } |
1630 | 1630 | ||
1631 | #if PAGETABLE_LEVELS == 4 | 1631 | #if CONFIG_PGTABLE_LEVELS == 4 |
1632 | static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn) | 1632 | static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn) |
1633 | { | 1633 | { |
1634 | xen_alloc_ptpage(mm, pfn, PT_PUD); | 1634 | xen_alloc_ptpage(mm, pfn, PT_PUD); |
@@ -2046,7 +2046,7 @@ static void __init xen_post_allocator_init(void) | |||
2046 | pv_mmu_ops.set_pte = xen_set_pte; | 2046 | pv_mmu_ops.set_pte = xen_set_pte; |
2047 | pv_mmu_ops.set_pmd = xen_set_pmd; | 2047 | pv_mmu_ops.set_pmd = xen_set_pmd; |
2048 | pv_mmu_ops.set_pud = xen_set_pud; | 2048 | pv_mmu_ops.set_pud = xen_set_pud; |
2049 | #if PAGETABLE_LEVELS == 4 | 2049 | #if CONFIG_PGTABLE_LEVELS == 4 |
2050 | pv_mmu_ops.set_pgd = xen_set_pgd; | 2050 | pv_mmu_ops.set_pgd = xen_set_pgd; |
2051 | #endif | 2051 | #endif |
2052 | 2052 | ||
@@ -2056,7 +2056,7 @@ static void __init xen_post_allocator_init(void) | |||
2056 | pv_mmu_ops.alloc_pmd = xen_alloc_pmd; | 2056 | pv_mmu_ops.alloc_pmd = xen_alloc_pmd; |
2057 | pv_mmu_ops.release_pte = xen_release_pte; | 2057 | pv_mmu_ops.release_pte = xen_release_pte; |
2058 | pv_mmu_ops.release_pmd = xen_release_pmd; | 2058 | pv_mmu_ops.release_pmd = xen_release_pmd; |
2059 | #if PAGETABLE_LEVELS == 4 | 2059 | #if CONFIG_PGTABLE_LEVELS == 4 |
2060 | pv_mmu_ops.alloc_pud = xen_alloc_pud; | 2060 | pv_mmu_ops.alloc_pud = xen_alloc_pud; |
2061 | pv_mmu_ops.release_pud = xen_release_pud; | 2061 | pv_mmu_ops.release_pud = xen_release_pud; |
2062 | #endif | 2062 | #endif |
@@ -2122,14 +2122,14 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { | |||
2122 | .make_pmd = PV_CALLEE_SAVE(xen_make_pmd), | 2122 | .make_pmd = PV_CALLEE_SAVE(xen_make_pmd), |
2123 | .pmd_val = PV_CALLEE_SAVE(xen_pmd_val), | 2123 | .pmd_val = PV_CALLEE_SAVE(xen_pmd_val), |
2124 | 2124 | ||
2125 | #if PAGETABLE_LEVELS == 4 | 2125 | #if CONFIG_PGTABLE_LEVELS == 4 |
2126 | .pud_val = PV_CALLEE_SAVE(xen_pud_val), | 2126 | .pud_val = PV_CALLEE_SAVE(xen_pud_val), |
2127 | .make_pud = PV_CALLEE_SAVE(xen_make_pud), | 2127 | .make_pud = PV_CALLEE_SAVE(xen_make_pud), |
2128 | .set_pgd = xen_set_pgd_hyper, | 2128 | .set_pgd = xen_set_pgd_hyper, |
2129 | 2129 | ||
2130 | .alloc_pud = xen_alloc_pmd_init, | 2130 | .alloc_pud = xen_alloc_pmd_init, |
2131 | .release_pud = xen_release_pmd_init, | 2131 | .release_pud = xen_release_pmd_init, |
2132 | #endif /* PAGETABLE_LEVELS == 4 */ | 2132 | #endif /* CONFIG_PGTABLE_LEVELS == 4 */ |
2133 | 2133 | ||
2134 | .activate_mm = xen_activate_mm, | 2134 | .activate_mm = xen_activate_mm, |
2135 | .dup_mmap = xen_dup_mmap, | 2135 | .dup_mmap = xen_dup_mmap, |
@@ -2436,99 +2436,11 @@ void __init xen_hvm_init_mmu_ops(void) | |||
2436 | } | 2436 | } |
2437 | #endif | 2437 | #endif |
2438 | 2438 | ||
2439 | #ifdef CONFIG_XEN_PVH | ||
2440 | /* | ||
2441 | * Map foreign gfn (fgfn), to local pfn (lpfn). This for the user | ||
2442 | * space creating new guest on pvh dom0 and needing to map domU pages. | ||
2443 | */ | ||
2444 | static int xlate_add_to_p2m(unsigned long lpfn, unsigned long fgfn, | ||
2445 | unsigned int domid) | ||
2446 | { | ||
2447 | int rc, err = 0; | ||
2448 | xen_pfn_t gpfn = lpfn; | ||
2449 | xen_ulong_t idx = fgfn; | ||
2450 | |||
2451 | struct xen_add_to_physmap_range xatp = { | ||
2452 | .domid = DOMID_SELF, | ||
2453 | .foreign_domid = domid, | ||
2454 | .size = 1, | ||
2455 | .space = XENMAPSPACE_gmfn_foreign, | ||
2456 | }; | ||
2457 | set_xen_guest_handle(xatp.idxs, &idx); | ||
2458 | set_xen_guest_handle(xatp.gpfns, &gpfn); | ||
2459 | set_xen_guest_handle(xatp.errs, &err); | ||
2460 | |||
2461 | rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap_range, &xatp); | ||
2462 | if (rc < 0) | ||
2463 | return rc; | ||
2464 | return err; | ||
2465 | } | ||
2466 | |||
2467 | static int xlate_remove_from_p2m(unsigned long spfn, int count) | ||
2468 | { | ||
2469 | struct xen_remove_from_physmap xrp; | ||
2470 | int i, rc; | ||
2471 | |||
2472 | for (i = 0; i < count; i++) { | ||
2473 | xrp.domid = DOMID_SELF; | ||
2474 | xrp.gpfn = spfn+i; | ||
2475 | rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp); | ||
2476 | if (rc) | ||
2477 | break; | ||
2478 | } | ||
2479 | return rc; | ||
2480 | } | ||
2481 | |||
2482 | struct xlate_remap_data { | ||
2483 | unsigned long fgfn; /* foreign domain's gfn */ | ||
2484 | pgprot_t prot; | ||
2485 | domid_t domid; | ||
2486 | int index; | ||
2487 | struct page **pages; | ||
2488 | }; | ||
2489 | |||
2490 | static int xlate_map_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr, | ||
2491 | void *data) | ||
2492 | { | ||
2493 | int rc; | ||
2494 | struct xlate_remap_data *remap = data; | ||
2495 | unsigned long pfn = page_to_pfn(remap->pages[remap->index++]); | ||
2496 | pte_t pteval = pte_mkspecial(pfn_pte(pfn, remap->prot)); | ||
2497 | |||
2498 | rc = xlate_add_to_p2m(pfn, remap->fgfn, remap->domid); | ||
2499 | if (rc) | ||
2500 | return rc; | ||
2501 | native_set_pte(ptep, pteval); | ||
2502 | |||
2503 | return 0; | ||
2504 | } | ||
2505 | |||
2506 | static int xlate_remap_gfn_range(struct vm_area_struct *vma, | ||
2507 | unsigned long addr, unsigned long mfn, | ||
2508 | int nr, pgprot_t prot, unsigned domid, | ||
2509 | struct page **pages) | ||
2510 | { | ||
2511 | int err; | ||
2512 | struct xlate_remap_data pvhdata; | ||
2513 | |||
2514 | BUG_ON(!pages); | ||
2515 | |||
2516 | pvhdata.fgfn = mfn; | ||
2517 | pvhdata.prot = prot; | ||
2518 | pvhdata.domid = domid; | ||
2519 | pvhdata.index = 0; | ||
2520 | pvhdata.pages = pages; | ||
2521 | err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT, | ||
2522 | xlate_map_pte_fn, &pvhdata); | ||
2523 | flush_tlb_all(); | ||
2524 | return err; | ||
2525 | } | ||
2526 | #endif | ||
2527 | |||
2528 | #define REMAP_BATCH_SIZE 16 | 2439 | #define REMAP_BATCH_SIZE 16 |
2529 | 2440 | ||
2530 | struct remap_data { | 2441 | struct remap_data { |
2531 | unsigned long mfn; | 2442 | xen_pfn_t *mfn; |
2443 | bool contiguous; | ||
2532 | pgprot_t prot; | 2444 | pgprot_t prot; |
2533 | struct mmu_update *mmu_update; | 2445 | struct mmu_update *mmu_update; |
2534 | }; | 2446 | }; |
@@ -2537,7 +2449,14 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token, | |||
2537 | unsigned long addr, void *data) | 2449 | unsigned long addr, void *data) |
2538 | { | 2450 | { |
2539 | struct remap_data *rmd = data; | 2451 | struct remap_data *rmd = data; |
2540 | pte_t pte = pte_mkspecial(mfn_pte(rmd->mfn++, rmd->prot)); | 2452 | pte_t pte = pte_mkspecial(mfn_pte(*rmd->mfn, rmd->prot)); |
2453 | |||
2454 | /* If we have a contigious range, just update the mfn itself, | ||
2455 | else update pointer to be "next mfn". */ | ||
2456 | if (rmd->contiguous) | ||
2457 | (*rmd->mfn)++; | ||
2458 | else | ||
2459 | rmd->mfn++; | ||
2541 | 2460 | ||
2542 | rmd->mmu_update->ptr = virt_to_machine(ptep).maddr; | 2461 | rmd->mmu_update->ptr = virt_to_machine(ptep).maddr; |
2543 | rmd->mmu_update->val = pte_val_ma(pte); | 2462 | rmd->mmu_update->val = pte_val_ma(pte); |
@@ -2546,26 +2465,26 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token, | |||
2546 | return 0; | 2465 | return 0; |
2547 | } | 2466 | } |
2548 | 2467 | ||
2549 | int xen_remap_domain_mfn_range(struct vm_area_struct *vma, | 2468 | static int do_remap_mfn(struct vm_area_struct *vma, |
2550 | unsigned long addr, | 2469 | unsigned long addr, |
2551 | xen_pfn_t mfn, int nr, | 2470 | xen_pfn_t *mfn, int nr, |
2552 | pgprot_t prot, unsigned domid, | 2471 | int *err_ptr, pgprot_t prot, |
2553 | struct page **pages) | 2472 | unsigned domid, |
2554 | 2473 | struct page **pages) | |
2555 | { | 2474 | { |
2475 | int err = 0; | ||
2556 | struct remap_data rmd; | 2476 | struct remap_data rmd; |
2557 | struct mmu_update mmu_update[REMAP_BATCH_SIZE]; | 2477 | struct mmu_update mmu_update[REMAP_BATCH_SIZE]; |
2558 | int batch; | ||
2559 | unsigned long range; | 2478 | unsigned long range; |
2560 | int err = 0; | 2479 | int mapped = 0; |
2561 | 2480 | ||
2562 | BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO))); | 2481 | BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO))); |
2563 | 2482 | ||
2564 | if (xen_feature(XENFEAT_auto_translated_physmap)) { | 2483 | if (xen_feature(XENFEAT_auto_translated_physmap)) { |
2565 | #ifdef CONFIG_XEN_PVH | 2484 | #ifdef CONFIG_XEN_PVH |
2566 | /* We need to update the local page tables and the xen HAP */ | 2485 | /* We need to update the local page tables and the xen HAP */ |
2567 | return xlate_remap_gfn_range(vma, addr, mfn, nr, prot, | 2486 | return xen_xlate_remap_gfn_array(vma, addr, mfn, nr, err_ptr, |
2568 | domid, pages); | 2487 | prot, domid, pages); |
2569 | #else | 2488 | #else |
2570 | return -EINVAL; | 2489 | return -EINVAL; |
2571 | #endif | 2490 | #endif |
@@ -2573,9 +2492,15 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, | |||
2573 | 2492 | ||
2574 | rmd.mfn = mfn; | 2493 | rmd.mfn = mfn; |
2575 | rmd.prot = prot; | 2494 | rmd.prot = prot; |
2495 | /* We use the err_ptr to indicate if there we are doing a contigious | ||
2496 | * mapping or a discontigious mapping. */ | ||
2497 | rmd.contiguous = !err_ptr; | ||
2576 | 2498 | ||
2577 | while (nr) { | 2499 | while (nr) { |
2578 | batch = min(REMAP_BATCH_SIZE, nr); | 2500 | int index = 0; |
2501 | int done = 0; | ||
2502 | int batch = min(REMAP_BATCH_SIZE, nr); | ||
2503 | int batch_left = batch; | ||
2579 | range = (unsigned long)batch << PAGE_SHIFT; | 2504 | range = (unsigned long)batch << PAGE_SHIFT; |
2580 | 2505 | ||
2581 | rmd.mmu_update = mmu_update; | 2506 | rmd.mmu_update = mmu_update; |
@@ -2584,23 +2509,72 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, | |||
2584 | if (err) | 2509 | if (err) |
2585 | goto out; | 2510 | goto out; |
2586 | 2511 | ||
2587 | err = HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid); | 2512 | /* We record the error for each page that gives an error, but |
2588 | if (err < 0) | 2513 | * continue mapping until the whole set is done */ |
2589 | goto out; | 2514 | do { |
2515 | int i; | ||
2516 | |||
2517 | err = HYPERVISOR_mmu_update(&mmu_update[index], | ||
2518 | batch_left, &done, domid); | ||
2519 | |||
2520 | /* | ||
2521 | * @err_ptr may be the same buffer as @mfn, so | ||
2522 | * only clear it after each chunk of @mfn is | ||
2523 | * used. | ||
2524 | */ | ||
2525 | if (err_ptr) { | ||
2526 | for (i = index; i < index + done; i++) | ||
2527 | err_ptr[i] = 0; | ||
2528 | } | ||
2529 | if (err < 0) { | ||
2530 | if (!err_ptr) | ||
2531 | goto out; | ||
2532 | err_ptr[i] = err; | ||
2533 | done++; /* Skip failed frame. */ | ||
2534 | } else | ||
2535 | mapped += done; | ||
2536 | batch_left -= done; | ||
2537 | index += done; | ||
2538 | } while (batch_left); | ||
2590 | 2539 | ||
2591 | nr -= batch; | 2540 | nr -= batch; |
2592 | addr += range; | 2541 | addr += range; |
2542 | if (err_ptr) | ||
2543 | err_ptr += batch; | ||
2593 | } | 2544 | } |
2594 | |||
2595 | err = 0; | ||
2596 | out: | 2545 | out: |
2597 | 2546 | ||
2598 | xen_flush_tlb_all(); | 2547 | xen_flush_tlb_all(); |
2599 | 2548 | ||
2600 | return err; | 2549 | return err < 0 ? err : mapped; |
2550 | } | ||
2551 | |||
2552 | int xen_remap_domain_mfn_range(struct vm_area_struct *vma, | ||
2553 | unsigned long addr, | ||
2554 | xen_pfn_t mfn, int nr, | ||
2555 | pgprot_t prot, unsigned domid, | ||
2556 | struct page **pages) | ||
2557 | { | ||
2558 | return do_remap_mfn(vma, addr, &mfn, nr, NULL, prot, domid, pages); | ||
2601 | } | 2559 | } |
2602 | EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range); | 2560 | EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range); |
2603 | 2561 | ||
2562 | int xen_remap_domain_mfn_array(struct vm_area_struct *vma, | ||
2563 | unsigned long addr, | ||
2564 | xen_pfn_t *mfn, int nr, | ||
2565 | int *err_ptr, pgprot_t prot, | ||
2566 | unsigned domid, struct page **pages) | ||
2567 | { | ||
2568 | /* We BUG_ON because it's a programmer error to pass a NULL err_ptr, | ||
2569 | * and the consequences later is quite hard to detect what the actual | ||
2570 | * cause of "wrong memory was mapped in". | ||
2571 | */ | ||
2572 | BUG_ON(err_ptr == NULL); | ||
2573 | return do_remap_mfn(vma, addr, mfn, nr, err_ptr, prot, domid, pages); | ||
2574 | } | ||
2575 | EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array); | ||
2576 | |||
2577 | |||
2604 | /* Returns: 0 success */ | 2578 | /* Returns: 0 success */ |
2605 | int xen_unmap_domain_mfn_range(struct vm_area_struct *vma, | 2579 | int xen_unmap_domain_mfn_range(struct vm_area_struct *vma, |
2606 | int numpgs, struct page **pages) | 2580 | int numpgs, struct page **pages) |
@@ -2609,22 +2583,7 @@ int xen_unmap_domain_mfn_range(struct vm_area_struct *vma, | |||
2609 | return 0; | 2583 | return 0; |
2610 | 2584 | ||
2611 | #ifdef CONFIG_XEN_PVH | 2585 | #ifdef CONFIG_XEN_PVH |
2612 | while (numpgs--) { | 2586 | return xen_xlate_unmap_gfn_range(vma, numpgs, pages); |
2613 | /* | ||
2614 | * The mmu has already cleaned up the process mmu | ||
2615 | * resources at this point (lookup_address will return | ||
2616 | * NULL). | ||
2617 | */ | ||
2618 | unsigned long pfn = page_to_pfn(pages[numpgs]); | ||
2619 | |||
2620 | xlate_remove_from_p2m(pfn, 1); | ||
2621 | } | ||
2622 | /* | ||
2623 | * We don't need to flush tlbs because as part of | ||
2624 | * xlate_remove_from_p2m, the hypervisor will do tlb flushes | ||
2625 | * after removing the p2m entries from the EPT/NPT | ||
2626 | */ | ||
2627 | return 0; | ||
2628 | #else | 2587 | #else |
2629 | return -EINVAL; | 2588 | return -EINVAL; |
2630 | #endif | 2589 | #endif |
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 9f93af56a5fc..b47124d4cd67 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c | |||
@@ -91,6 +91,12 @@ EXPORT_SYMBOL_GPL(xen_p2m_size); | |||
91 | unsigned long xen_max_p2m_pfn __read_mostly; | 91 | unsigned long xen_max_p2m_pfn __read_mostly; |
92 | EXPORT_SYMBOL_GPL(xen_max_p2m_pfn); | 92 | EXPORT_SYMBOL_GPL(xen_max_p2m_pfn); |
93 | 93 | ||
94 | #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG_LIMIT | ||
95 | #define P2M_LIMIT CONFIG_XEN_BALLOON_MEMORY_HOTPLUG_LIMIT | ||
96 | #else | ||
97 | #define P2M_LIMIT 0 | ||
98 | #endif | ||
99 | |||
94 | static DEFINE_SPINLOCK(p2m_update_lock); | 100 | static DEFINE_SPINLOCK(p2m_update_lock); |
95 | 101 | ||
96 | static unsigned long *p2m_mid_missing_mfn; | 102 | static unsigned long *p2m_mid_missing_mfn; |
@@ -385,9 +391,11 @@ static void __init xen_rebuild_p2m_list(unsigned long *p2m) | |||
385 | void __init xen_vmalloc_p2m_tree(void) | 391 | void __init xen_vmalloc_p2m_tree(void) |
386 | { | 392 | { |
387 | static struct vm_struct vm; | 393 | static struct vm_struct vm; |
394 | unsigned long p2m_limit; | ||
388 | 395 | ||
396 | p2m_limit = (phys_addr_t)P2M_LIMIT * 1024 * 1024 * 1024 / PAGE_SIZE; | ||
389 | vm.flags = VM_ALLOC; | 397 | vm.flags = VM_ALLOC; |
390 | vm.size = ALIGN(sizeof(unsigned long) * xen_max_p2m_pfn, | 398 | vm.size = ALIGN(sizeof(unsigned long) * max(xen_max_p2m_pfn, p2m_limit), |
391 | PMD_SIZE * PMDS_PER_MID_PAGE); | 399 | PMD_SIZE * PMDS_PER_MID_PAGE); |
392 | vm_area_register_early(&vm, PMD_SIZE * PMDS_PER_MID_PAGE); | 400 | vm_area_register_early(&vm, PMD_SIZE * PMDS_PER_MID_PAGE); |
393 | pr_notice("p2m virtual area at %p, size is %lx\n", vm.addr, vm.size); | 401 | pr_notice("p2m virtual area at %p, size is %lx\n", vm.addr, vm.size); |
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 08e8489c47f1..86484384492e 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -90,14 +90,10 @@ static void cpu_bringup(void) | |||
90 | 90 | ||
91 | set_cpu_online(cpu, true); | 91 | set_cpu_online(cpu, true); |
92 | 92 | ||
93 | this_cpu_write(cpu_state, CPU_ONLINE); | 93 | cpu_set_state_online(cpu); /* Implies full memory barrier. */ |
94 | |||
95 | wmb(); | ||
96 | 94 | ||
97 | /* We can take interrupts now: we're officially "up". */ | 95 | /* We can take interrupts now: we're officially "up". */ |
98 | local_irq_enable(); | 96 | local_irq_enable(); |
99 | |||
100 | wmb(); /* make sure everything is out */ | ||
101 | } | 97 | } |
102 | 98 | ||
103 | /* | 99 | /* |
@@ -445,21 +441,19 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct *idle) | |||
445 | { | 441 | { |
446 | int rc; | 442 | int rc; |
447 | 443 | ||
448 | per_cpu(current_task, cpu) = idle; | 444 | common_cpu_up(cpu, idle); |
449 | #ifdef CONFIG_X86_32 | ||
450 | irq_ctx_init(cpu); | ||
451 | #else | ||
452 | clear_tsk_thread_flag(idle, TIF_FORK); | ||
453 | #endif | ||
454 | per_cpu(kernel_stack, cpu) = | ||
455 | (unsigned long)task_stack_page(idle) - | ||
456 | KERNEL_STACK_OFFSET + THREAD_SIZE; | ||
457 | 445 | ||
458 | xen_setup_runstate_info(cpu); | 446 | xen_setup_runstate_info(cpu); |
459 | xen_setup_timer(cpu); | 447 | xen_setup_timer(cpu); |
460 | xen_init_lock_cpu(cpu); | 448 | xen_init_lock_cpu(cpu); |
461 | 449 | ||
462 | per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; | 450 | /* |
451 | * PV VCPUs are always successfully taken down (see 'while' loop | ||
452 | * in xen_cpu_die()), so -EBUSY is an error. | ||
453 | */ | ||
454 | rc = cpu_check_up_prepare(cpu); | ||
455 | if (rc) | ||
456 | return rc; | ||
463 | 457 | ||
464 | /* make sure interrupts start blocked */ | 458 | /* make sure interrupts start blocked */ |
465 | per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1; | 459 | per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1; |
@@ -468,10 +462,6 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct *idle) | |||
468 | if (rc) | 462 | if (rc) |
469 | return rc; | 463 | return rc; |
470 | 464 | ||
471 | if (num_online_cpus() == 1) | ||
472 | /* Just in case we booted with a single CPU. */ | ||
473 | alternatives_enable_smp(); | ||
474 | |||
475 | rc = xen_smp_intr_init(cpu); | 465 | rc = xen_smp_intr_init(cpu); |
476 | if (rc) | 466 | if (rc) |
477 | return rc; | 467 | return rc; |
@@ -479,10 +469,8 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct *idle) | |||
479 | rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); | 469 | rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); |
480 | BUG_ON(rc); | 470 | BUG_ON(rc); |
481 | 471 | ||
482 | while(per_cpu(cpu_state, cpu) != CPU_ONLINE) { | 472 | while (cpu_report_state(cpu) != CPU_ONLINE) |
483 | HYPERVISOR_sched_op(SCHEDOP_yield, NULL); | 473 | HYPERVISOR_sched_op(SCHEDOP_yield, NULL); |
484 | barrier(); | ||
485 | } | ||
486 | 474 | ||
487 | return 0; | 475 | return 0; |
488 | } | 476 | } |
@@ -511,11 +499,11 @@ static void xen_cpu_die(unsigned int cpu) | |||
511 | schedule_timeout(HZ/10); | 499 | schedule_timeout(HZ/10); |
512 | } | 500 | } |
513 | 501 | ||
514 | cpu_die_common(cpu); | 502 | if (common_cpu_die(cpu) == 0) { |
515 | 503 | xen_smp_intr_free(cpu); | |
516 | xen_smp_intr_free(cpu); | 504 | xen_uninit_lock_cpu(cpu); |
517 | xen_uninit_lock_cpu(cpu); | 505 | xen_teardown_timer(cpu); |
518 | xen_teardown_timer(cpu); | 506 | } |
519 | } | 507 | } |
520 | 508 | ||
521 | static void xen_play_dead(void) /* used only with HOTPLUG_CPU */ | 509 | static void xen_play_dead(void) /* used only with HOTPLUG_CPU */ |
@@ -747,6 +735,16 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) | |||
747 | static int xen_hvm_cpu_up(unsigned int cpu, struct task_struct *tidle) | 735 | static int xen_hvm_cpu_up(unsigned int cpu, struct task_struct *tidle) |
748 | { | 736 | { |
749 | int rc; | 737 | int rc; |
738 | |||
739 | /* | ||
740 | * This can happen if CPU was offlined earlier and | ||
741 | * offlining timed out in common_cpu_die(). | ||
742 | */ | ||
743 | if (cpu_report_state(cpu) == CPU_DEAD_FROZEN) { | ||
744 | xen_smp_intr_free(cpu); | ||
745 | xen_uninit_lock_cpu(cpu); | ||
746 | } | ||
747 | |||
750 | /* | 748 | /* |
751 | * xen_smp_intr_init() needs to run before native_cpu_up() | 749 | * xen_smp_intr_init() needs to run before native_cpu_up() |
752 | * so that IPI vectors are set up on the booting CPU before | 750 | * so that IPI vectors are set up on the booting CPU before |
@@ -768,12 +766,6 @@ static int xen_hvm_cpu_up(unsigned int cpu, struct task_struct *tidle) | |||
768 | return rc; | 766 | return rc; |
769 | } | 767 | } |
770 | 768 | ||
771 | static void xen_hvm_cpu_die(unsigned int cpu) | ||
772 | { | ||
773 | xen_cpu_die(cpu); | ||
774 | native_cpu_die(cpu); | ||
775 | } | ||
776 | |||
777 | void __init xen_hvm_smp_init(void) | 769 | void __init xen_hvm_smp_init(void) |
778 | { | 770 | { |
779 | if (!xen_have_vector_callback) | 771 | if (!xen_have_vector_callback) |
@@ -781,7 +773,7 @@ void __init xen_hvm_smp_init(void) | |||
781 | smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus; | 773 | smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus; |
782 | smp_ops.smp_send_reschedule = xen_smp_send_reschedule; | 774 | smp_ops.smp_send_reschedule = xen_smp_send_reschedule; |
783 | smp_ops.cpu_up = xen_hvm_cpu_up; | 775 | smp_ops.cpu_up = xen_hvm_cpu_up; |
784 | smp_ops.cpu_die = xen_hvm_cpu_die; | 776 | smp_ops.cpu_die = xen_cpu_die; |
785 | smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi; | 777 | smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi; |
786 | smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi; | 778 | smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi; |
787 | smp_ops.smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu; | 779 | smp_ops.smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu; |
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index c4df9dbd63b7..d9497698645a 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c | |||
@@ -1,5 +1,5 @@ | |||
1 | #include <linux/types.h> | 1 | #include <linux/types.h> |
2 | #include <linux/clockchips.h> | 2 | #include <linux/tick.h> |
3 | 3 | ||
4 | #include <xen/interface/xen.h> | 4 | #include <xen/interface/xen.h> |
5 | #include <xen/grant_table.h> | 5 | #include <xen/grant_table.h> |
@@ -81,17 +81,14 @@ void xen_arch_post_suspend(int cancelled) | |||
81 | 81 | ||
82 | static void xen_vcpu_notify_restore(void *data) | 82 | static void xen_vcpu_notify_restore(void *data) |
83 | { | 83 | { |
84 | unsigned long reason = (unsigned long)data; | ||
85 | |||
86 | /* Boot processor notified via generic timekeeping_resume() */ | 84 | /* Boot processor notified via generic timekeeping_resume() */ |
87 | if ( smp_processor_id() == 0) | 85 | if (smp_processor_id() == 0) |
88 | return; | 86 | return; |
89 | 87 | ||
90 | clockevents_notify(reason, NULL); | 88 | tick_resume_local(); |
91 | } | 89 | } |
92 | 90 | ||
93 | void xen_arch_resume(void) | 91 | void xen_arch_resume(void) |
94 | { | 92 | { |
95 | on_each_cpu(xen_vcpu_notify_restore, | 93 | on_each_cpu(xen_vcpu_notify_restore, NULL, 1); |
96 | (void *)CLOCK_EVT_NOTIFY_RESUME, 1); | ||
97 | } | 94 | } |
diff --git a/arch/x86/xen/trace.c b/arch/x86/xen/trace.c index 520022d1a181..a702ec2f5931 100644 --- a/arch/x86/xen/trace.c +++ b/arch/x86/xen/trace.c | |||
@@ -1,54 +1,12 @@ | |||
1 | #include <linux/ftrace.h> | 1 | #include <linux/ftrace.h> |
2 | #include <xen/interface/xen.h> | 2 | #include <xen/interface/xen.h> |
3 | #include <xen/interface/xen-mca.h> | ||
3 | 4 | ||
4 | #define N(x) [__HYPERVISOR_##x] = "("#x")" | 5 | #define HYPERCALL(x) [__HYPERVISOR_##x] = "("#x")", |
5 | static const char *xen_hypercall_names[] = { | 6 | static const char *xen_hypercall_names[] = { |
6 | N(set_trap_table), | 7 | #include <asm/xen-hypercalls.h> |
7 | N(mmu_update), | ||
8 | N(set_gdt), | ||
9 | N(stack_switch), | ||
10 | N(set_callbacks), | ||
11 | N(fpu_taskswitch), | ||
12 | N(sched_op_compat), | ||
13 | N(dom0_op), | ||
14 | N(set_debugreg), | ||
15 | N(get_debugreg), | ||
16 | N(update_descriptor), | ||
17 | N(memory_op), | ||
18 | N(multicall), | ||
19 | N(update_va_mapping), | ||
20 | N(set_timer_op), | ||
21 | N(event_channel_op_compat), | ||
22 | N(xen_version), | ||
23 | N(console_io), | ||
24 | N(physdev_op_compat), | ||
25 | N(grant_table_op), | ||
26 | N(vm_assist), | ||
27 | N(update_va_mapping_otherdomain), | ||
28 | N(iret), | ||
29 | N(vcpu_op), | ||
30 | N(set_segment_base), | ||
31 | N(mmuext_op), | ||
32 | N(acm_op), | ||
33 | N(nmi_op), | ||
34 | N(sched_op), | ||
35 | N(callback_op), | ||
36 | N(xenoprof_op), | ||
37 | N(event_channel_op), | ||
38 | N(physdev_op), | ||
39 | N(hvm_op), | ||
40 | |||
41 | /* Architecture-specific hypercall definitions. */ | ||
42 | N(arch_0), | ||
43 | N(arch_1), | ||
44 | N(arch_2), | ||
45 | N(arch_3), | ||
46 | N(arch_4), | ||
47 | N(arch_5), | ||
48 | N(arch_6), | ||
49 | N(arch_7), | ||
50 | }; | 8 | }; |
51 | #undef N | 9 | #undef HYPERCALL |
52 | 10 | ||
53 | static const char *xen_hypercall_name(unsigned op) | 11 | static const char *xen_hypercall_name(unsigned op) |
54 | { | 12 | { |
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 53adefda4275..985fc3ee0973 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S | |||
@@ -68,11 +68,11 @@ ENTRY(xen_sysret64) | |||
68 | * We're already on the usermode stack at this point, but | 68 | * We're already on the usermode stack at this point, but |
69 | * still with the kernel gs, so we can easily switch back | 69 | * still with the kernel gs, so we can easily switch back |
70 | */ | 70 | */ |
71 | movq %rsp, PER_CPU_VAR(old_rsp) | 71 | movq %rsp, PER_CPU_VAR(rsp_scratch) |
72 | movq PER_CPU_VAR(kernel_stack), %rsp | 72 | movq PER_CPU_VAR(kernel_stack), %rsp |
73 | 73 | ||
74 | pushq $__USER_DS | 74 | pushq $__USER_DS |
75 | pushq PER_CPU_VAR(old_rsp) | 75 | pushq PER_CPU_VAR(rsp_scratch) |
76 | pushq %r11 | 76 | pushq %r11 |
77 | pushq $__USER_CS | 77 | pushq $__USER_CS |
78 | pushq %rcx | 78 | pushq %rcx |
@@ -87,11 +87,11 @@ ENTRY(xen_sysret32) | |||
87 | * We're already on the usermode stack at this point, but | 87 | * We're already on the usermode stack at this point, but |
88 | * still with the kernel gs, so we can easily switch back | 88 | * still with the kernel gs, so we can easily switch back |
89 | */ | 89 | */ |
90 | movq %rsp, PER_CPU_VAR(old_rsp) | 90 | movq %rsp, PER_CPU_VAR(rsp_scratch) |
91 | movq PER_CPU_VAR(kernel_stack), %rsp | 91 | movq PER_CPU_VAR(kernel_stack), %rsp |
92 | 92 | ||
93 | pushq $__USER32_DS | 93 | pushq $__USER32_DS |
94 | pushq PER_CPU_VAR(old_rsp) | 94 | pushq PER_CPU_VAR(rsp_scratch) |
95 | pushq %r11 | 95 | pushq %r11 |
96 | pushq $__USER32_CS | 96 | pushq $__USER32_CS |
97 | pushq %rcx | 97 | pushq %rcx |
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 674b222544b7..8afdfccf6086 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S | |||
@@ -12,6 +12,8 @@ | |||
12 | 12 | ||
13 | #include <xen/interface/elfnote.h> | 13 | #include <xen/interface/elfnote.h> |
14 | #include <xen/interface/features.h> | 14 | #include <xen/interface/features.h> |
15 | #include <xen/interface/xen.h> | ||
16 | #include <xen/interface/xen-mca.h> | ||
15 | #include <asm/xen/interface.h> | 17 | #include <asm/xen/interface.h> |
16 | 18 | ||
17 | #ifdef CONFIG_XEN_PVH | 19 | #ifdef CONFIG_XEN_PVH |
@@ -85,59 +87,14 @@ ENTRY(xen_pvh_early_cpu_init) | |||
85 | .pushsection .text | 87 | .pushsection .text |
86 | .balign PAGE_SIZE | 88 | .balign PAGE_SIZE |
87 | ENTRY(hypercall_page) | 89 | ENTRY(hypercall_page) |
88 | #define NEXT_HYPERCALL(x) \ | 90 | .skip PAGE_SIZE |
89 | ENTRY(xen_hypercall_##x) \ | 91 | |
90 | .skip 32 | 92 | #define HYPERCALL(n) \ |
91 | 93 | .equ xen_hypercall_##n, hypercall_page + __HYPERVISOR_##n * 32; \ | |
92 | NEXT_HYPERCALL(set_trap_table) | 94 | .type xen_hypercall_##n, @function; .size xen_hypercall_##n, 32 |
93 | NEXT_HYPERCALL(mmu_update) | 95 | #include <asm/xen-hypercalls.h> |
94 | NEXT_HYPERCALL(set_gdt) | 96 | #undef HYPERCALL |
95 | NEXT_HYPERCALL(stack_switch) | 97 | |
96 | NEXT_HYPERCALL(set_callbacks) | ||
97 | NEXT_HYPERCALL(fpu_taskswitch) | ||
98 | NEXT_HYPERCALL(sched_op_compat) | ||
99 | NEXT_HYPERCALL(platform_op) | ||
100 | NEXT_HYPERCALL(set_debugreg) | ||
101 | NEXT_HYPERCALL(get_debugreg) | ||
102 | NEXT_HYPERCALL(update_descriptor) | ||
103 | NEXT_HYPERCALL(ni) | ||
104 | NEXT_HYPERCALL(memory_op) | ||
105 | NEXT_HYPERCALL(multicall) | ||
106 | NEXT_HYPERCALL(update_va_mapping) | ||
107 | NEXT_HYPERCALL(set_timer_op) | ||
108 | NEXT_HYPERCALL(event_channel_op_compat) | ||
109 | NEXT_HYPERCALL(xen_version) | ||
110 | NEXT_HYPERCALL(console_io) | ||
111 | NEXT_HYPERCALL(physdev_op_compat) | ||
112 | NEXT_HYPERCALL(grant_table_op) | ||
113 | NEXT_HYPERCALL(vm_assist) | ||
114 | NEXT_HYPERCALL(update_va_mapping_otherdomain) | ||
115 | NEXT_HYPERCALL(iret) | ||
116 | NEXT_HYPERCALL(vcpu_op) | ||
117 | NEXT_HYPERCALL(set_segment_base) | ||
118 | NEXT_HYPERCALL(mmuext_op) | ||
119 | NEXT_HYPERCALL(xsm_op) | ||
120 | NEXT_HYPERCALL(nmi_op) | ||
121 | NEXT_HYPERCALL(sched_op) | ||
122 | NEXT_HYPERCALL(callback_op) | ||
123 | NEXT_HYPERCALL(xenoprof_op) | ||
124 | NEXT_HYPERCALL(event_channel_op) | ||
125 | NEXT_HYPERCALL(physdev_op) | ||
126 | NEXT_HYPERCALL(hvm_op) | ||
127 | NEXT_HYPERCALL(sysctl) | ||
128 | NEXT_HYPERCALL(domctl) | ||
129 | NEXT_HYPERCALL(kexec_op) | ||
130 | NEXT_HYPERCALL(tmem_op) /* 38 */ | ||
131 | ENTRY(xen_hypercall_rsvr) | ||
132 | .skip 320 | ||
133 | NEXT_HYPERCALL(mca) /* 48 */ | ||
134 | NEXT_HYPERCALL(arch_1) | ||
135 | NEXT_HYPERCALL(arch_2) | ||
136 | NEXT_HYPERCALL(arch_3) | ||
137 | NEXT_HYPERCALL(arch_4) | ||
138 | NEXT_HYPERCALL(arch_5) | ||
139 | NEXT_HYPERCALL(arch_6) | ||
140 | .balign PAGE_SIZE | ||
141 | .popsection | 98 | .popsection |
142 | 99 | ||
143 | ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") | 100 | ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") |