diff options
Diffstat (limited to 'arch/x86')
191 files changed, 8526 insertions, 1356 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0952ecd60eca..3e97a3dd4129 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -125,6 +125,7 @@ config X86 | |||
125 | select RTC_LIB | 125 | select RTC_LIB |
126 | select HAVE_DEBUG_STACKOVERFLOW | 126 | select HAVE_DEBUG_STACKOVERFLOW |
127 | select HAVE_IRQ_EXIT_ON_IRQ_STACK if X86_64 | 127 | select HAVE_IRQ_EXIT_ON_IRQ_STACK if X86_64 |
128 | select HAVE_CC_STACKPROTECTOR | ||
128 | 129 | ||
129 | config INSTRUCTION_DECODER | 130 | config INSTRUCTION_DECODER |
130 | def_bool y | 131 | def_bool y |
@@ -278,13 +279,13 @@ config SMP | |||
278 | bool "Symmetric multi-processing support" | 279 | bool "Symmetric multi-processing support" |
279 | ---help--- | 280 | ---help--- |
280 | This enables support for systems with more than one CPU. If you have | 281 | This enables support for systems with more than one CPU. If you have |
281 | a system with only one CPU, like most personal computers, say N. If | 282 | a system with only one CPU, say N. If you have a system with more |
282 | you have a system with more than one CPU, say Y. | 283 | than one CPU, say Y. |
283 | 284 | ||
284 | If you say N here, the kernel will run on single and multiprocessor | 285 | If you say N here, the kernel will run on uni- and multiprocessor |
285 | machines, but will use only one CPU of a multiprocessor machine. If | 286 | machines, but will use only one CPU of a multiprocessor machine. If |
286 | you say Y here, the kernel will run on many, but not all, | 287 | you say Y here, the kernel will run on many, but not all, |
287 | singleprocessor machines. On a singleprocessor machine, the kernel | 288 | uniprocessor machines. On a uniprocessor machine, the kernel |
288 | will run faster if you say N here. | 289 | will run faster if you say N here. |
289 | 290 | ||
290 | Note that if you say Y here and choose architecture "586" or | 291 | Note that if you say Y here and choose architecture "586" or |
@@ -438,42 +439,26 @@ config X86_INTEL_CE | |||
438 | This option compiles in support for the CE4100 SOC for settop | 439 | This option compiles in support for the CE4100 SOC for settop |
439 | boxes and media devices. | 440 | boxes and media devices. |
440 | 441 | ||
441 | config X86_WANT_INTEL_MID | 442 | config X86_INTEL_MID |
442 | bool "Intel MID platform support" | 443 | bool "Intel MID platform support" |
443 | depends on X86_32 | 444 | depends on X86_32 |
444 | depends on X86_EXTENDED_PLATFORM | 445 | depends on X86_EXTENDED_PLATFORM |
445 | ---help--- | ||
446 | Select to build a kernel capable of supporting Intel MID platform | ||
447 | systems which do not have the PCI legacy interfaces (Moorestown, | ||
448 | Medfield). If you are building for a PC class system say N here. | ||
449 | |||
450 | if X86_WANT_INTEL_MID | ||
451 | |||
452 | config X86_INTEL_MID | ||
453 | bool | ||
454 | |||
455 | config X86_MDFLD | ||
456 | bool "Medfield MID platform" | ||
457 | depends on PCI | 446 | depends on PCI |
458 | depends on PCI_GOANY | 447 | depends on PCI_GOANY |
459 | depends on X86_IO_APIC | 448 | depends on X86_IO_APIC |
460 | select X86_INTEL_MID | ||
461 | select SFI | 449 | select SFI |
450 | select I2C | ||
462 | select DW_APB_TIMER | 451 | select DW_APB_TIMER |
463 | select APB_TIMER | 452 | select APB_TIMER |
464 | select I2C | ||
465 | select SPI | ||
466 | select INTEL_SCU_IPC | 453 | select INTEL_SCU_IPC |
467 | select X86_PLATFORM_DEVICES | ||
468 | select MFD_INTEL_MSIC | 454 | select MFD_INTEL_MSIC |
469 | ---help--- | 455 | ---help--- |
470 | Medfield is Intel's Low Power Intel Architecture (LPIA) based Moblin | 456 | Select to build a kernel capable of supporting Intel MID (Mobile |
471 | Internet Device(MID) platform. | 457 | Internet Device) platform systems which do not have the PCI legacy |
472 | Unlike standard x86 PCs, Medfield does not have many legacy devices | 458 | interfaces. If you are building for a PC class system say N here. |
473 | nor standard legacy replacement devices/features. e.g. Medfield does | ||
474 | not contain i8259, i8254, HPET, legacy BIOS, most of the io ports. | ||
475 | 459 | ||
476 | endif | 460 | Intel MID platforms are based on an Intel processor and chipset which |
461 | consume less power than most of the x86 derivatives. | ||
477 | 462 | ||
478 | config X86_INTEL_LPSS | 463 | config X86_INTEL_LPSS |
479 | bool "Intel Low Power Subsystem Support" | 464 | bool "Intel Low Power Subsystem Support" |
@@ -746,6 +731,7 @@ config APB_TIMER | |||
746 | # The code disables itself when not needed. | 731 | # The code disables itself when not needed. |
747 | config DMI | 732 | config DMI |
748 | default y | 733 | default y |
734 | select DMI_SCAN_MACHINE_NON_EFI_FALLBACK | ||
749 | bool "Enable DMI scanning" if EXPERT | 735 | bool "Enable DMI scanning" if EXPERT |
750 | ---help--- | 736 | ---help--- |
751 | Enabled scanning of DMI to identify machine quirks. Say Y | 737 | Enabled scanning of DMI to identify machine quirks. Say Y |
@@ -953,7 +939,7 @@ config X86_ANCIENT_MCE | |||
953 | depends on X86_32 && X86_MCE | 939 | depends on X86_32 && X86_MCE |
954 | ---help--- | 940 | ---help--- |
955 | Include support for machine check handling on old Pentium 5 or WinChip | 941 | Include support for machine check handling on old Pentium 5 or WinChip |
956 | systems. These typically need to be enabled explicitely on the command | 942 | systems. These typically need to be enabled explicitly on the command |
957 | line. | 943 | line. |
958 | 944 | ||
959 | config X86_MCE_THRESHOLD | 945 | config X86_MCE_THRESHOLD |
@@ -1080,10 +1066,6 @@ config MICROCODE_OLD_INTERFACE | |||
1080 | def_bool y | 1066 | def_bool y |
1081 | depends on MICROCODE | 1067 | depends on MICROCODE |
1082 | 1068 | ||
1083 | config MICROCODE_INTEL_LIB | ||
1084 | def_bool y | ||
1085 | depends on MICROCODE_INTEL | ||
1086 | |||
1087 | config MICROCODE_INTEL_EARLY | 1069 | config MICROCODE_INTEL_EARLY |
1088 | def_bool n | 1070 | def_bool n |
1089 | 1071 | ||
@@ -1617,22 +1599,6 @@ config SECCOMP | |||
1617 | 1599 | ||
1618 | If unsure, say Y. Only embedded should say N here. | 1600 | If unsure, say Y. Only embedded should say N here. |
1619 | 1601 | ||
1620 | config CC_STACKPROTECTOR | ||
1621 | bool "Enable -fstack-protector buffer overflow detection" | ||
1622 | ---help--- | ||
1623 | This option turns on the -fstack-protector GCC feature. This | ||
1624 | feature puts, at the beginning of functions, a canary value on | ||
1625 | the stack just before the return address, and validates | ||
1626 | the value just before actually returning. Stack based buffer | ||
1627 | overflows (that need to overwrite this return address) now also | ||
1628 | overwrite the canary, which gets detected and the attack is then | ||
1629 | neutralized via a kernel panic. | ||
1630 | |||
1631 | This feature requires gcc version 4.2 or above, or a distribution | ||
1632 | gcc with the feature backported. Older versions are automatically | ||
1633 | detected and for those versions, this configuration option is | ||
1634 | ignored. (and a warning is printed during bootup) | ||
1635 | |||
1636 | source kernel/Kconfig.hz | 1602 | source kernel/Kconfig.hz |
1637 | 1603 | ||
1638 | config KEXEC | 1604 | config KEXEC |
@@ -1728,16 +1694,67 @@ config RELOCATABLE | |||
1728 | 1694 | ||
1729 | Note: If CONFIG_RELOCATABLE=y, then the kernel runs from the address | 1695 | Note: If CONFIG_RELOCATABLE=y, then the kernel runs from the address |
1730 | it has been loaded at and the compile time physical address | 1696 | it has been loaded at and the compile time physical address |
1731 | (CONFIG_PHYSICAL_START) is ignored. | 1697 | (CONFIG_PHYSICAL_START) is used as the minimum location. |
1732 | 1698 | ||
1733 | # Relocation on x86-32 needs some additional build support | 1699 | config RANDOMIZE_BASE |
1700 | bool "Randomize the address of the kernel image" | ||
1701 | depends on RELOCATABLE | ||
1702 | depends on !HIBERNATION | ||
1703 | default n | ||
1704 | ---help--- | ||
1705 | Randomizes the physical and virtual address at which the | ||
1706 | kernel image is decompressed, as a security feature that | ||
1707 | deters exploit attempts relying on knowledge of the location | ||
1708 | of kernel internals. | ||
1709 | |||
1710 | Entropy is generated using the RDRAND instruction if it is | ||
1711 | supported. If RDTSC is supported, it is used as well. If | ||
1712 | neither RDRAND nor RDTSC are supported, then randomness is | ||
1713 | read from the i8254 timer. | ||
1714 | |||
1715 | The kernel will be offset by up to RANDOMIZE_BASE_MAX_OFFSET, | ||
1716 | and aligned according to PHYSICAL_ALIGN. Since the kernel is | ||
1717 | built using 2GiB addressing, and PHYSICAL_ALGIN must be at a | ||
1718 | minimum of 2MiB, only 10 bits of entropy is theoretically | ||
1719 | possible. At best, due to page table layouts, 64-bit can use | ||
1720 | 9 bits of entropy and 32-bit uses 8 bits. | ||
1721 | |||
1722 | If unsure, say N. | ||
1723 | |||
1724 | config RANDOMIZE_BASE_MAX_OFFSET | ||
1725 | hex "Maximum kASLR offset allowed" if EXPERT | ||
1726 | depends on RANDOMIZE_BASE | ||
1727 | range 0x0 0x20000000 if X86_32 | ||
1728 | default "0x20000000" if X86_32 | ||
1729 | range 0x0 0x40000000 if X86_64 | ||
1730 | default "0x40000000" if X86_64 | ||
1731 | ---help--- | ||
1732 | The lesser of RANDOMIZE_BASE_MAX_OFFSET and available physical | ||
1733 | memory is used to determine the maximal offset in bytes that will | ||
1734 | be applied to the kernel when kernel Address Space Layout | ||
1735 | Randomization (kASLR) is active. This must be a multiple of | ||
1736 | PHYSICAL_ALIGN. | ||
1737 | |||
1738 | On 32-bit this is limited to 512MiB by page table layouts. The | ||
1739 | default is 512MiB. | ||
1740 | |||
1741 | On 64-bit this is limited by how the kernel fixmap page table is | ||
1742 | positioned, so this cannot be larger than 1GiB currently. Without | ||
1743 | RANDOMIZE_BASE, there is a 512MiB to 1.5GiB split between kernel | ||
1744 | and modules. When RANDOMIZE_BASE_MAX_OFFSET is above 512MiB, the | ||
1745 | modules area will shrink to compensate, up to the current maximum | ||
1746 | 1GiB to 1GiB split. The default is 1GiB. | ||
1747 | |||
1748 | If unsure, leave at the default value. | ||
1749 | |||
1750 | # Relocation on x86 needs some additional build support | ||
1734 | config X86_NEED_RELOCS | 1751 | config X86_NEED_RELOCS |
1735 | def_bool y | 1752 | def_bool y |
1736 | depends on X86_32 && RELOCATABLE | 1753 | depends on RANDOMIZE_BASE || (X86_32 && RELOCATABLE) |
1737 | 1754 | ||
1738 | config PHYSICAL_ALIGN | 1755 | config PHYSICAL_ALIGN |
1739 | hex "Alignment value to which kernel should be aligned" | 1756 | hex "Alignment value to which kernel should be aligned" |
1740 | default "0x1000000" | 1757 | default "0x200000" |
1741 | range 0x2000 0x1000000 if X86_32 | 1758 | range 0x2000 0x1000000 if X86_32 |
1742 | range 0x200000 0x1000000 if X86_64 | 1759 | range 0x200000 0x1000000 if X86_64 |
1743 | ---help--- | 1760 | ---help--- |
@@ -2393,6 +2410,14 @@ config X86_DMA_REMAP | |||
2393 | bool | 2410 | bool |
2394 | depends on STA2X11 | 2411 | depends on STA2X11 |
2395 | 2412 | ||
2413 | config IOSF_MBI | ||
2414 | bool | ||
2415 | depends on PCI | ||
2416 | ---help--- | ||
2417 | To be selected by modules requiring access to the Intel OnChip System | ||
2418 | Fabric (IOSF) Sideband MailBox Interface (MBI). For MBI platforms | ||
2419 | enumerable by PCI. | ||
2420 | |||
2396 | source "net/Kconfig" | 2421 | source "net/Kconfig" |
2397 | 2422 | ||
2398 | source "drivers/Kconfig" | 2423 | source "drivers/Kconfig" |
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 57d021507120..13b22e0f681d 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile | |||
@@ -89,13 +89,11 @@ else | |||
89 | KBUILD_CFLAGS += -maccumulate-outgoing-args | 89 | KBUILD_CFLAGS += -maccumulate-outgoing-args |
90 | endif | 90 | endif |
91 | 91 | ||
92 | # Make sure compiler does not have buggy stack-protector support. | ||
92 | ifdef CONFIG_CC_STACKPROTECTOR | 93 | ifdef CONFIG_CC_STACKPROTECTOR |
93 | cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh | 94 | cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh |
94 | ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC) $(KBUILD_CPPFLAGS) $(biarch)),y) | 95 | ifneq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC) $(KBUILD_CPPFLAGS) $(biarch)),y) |
95 | stackp-y := -fstack-protector | 96 | $(warning stack-protector enabled but compiler support broken) |
96 | KBUILD_CFLAGS += $(stackp-y) | ||
97 | else | ||
98 | $(warning stack protector enabled but no compiler support) | ||
99 | endif | 97 | endif |
100 | endif | 98 | endif |
101 | 99 | ||
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index d9c11956fce0..de7066918005 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile | |||
@@ -20,7 +20,7 @@ targets := vmlinux.bin setup.bin setup.elf bzImage | |||
20 | targets += fdimage fdimage144 fdimage288 image.iso mtools.conf | 20 | targets += fdimage fdimage144 fdimage288 image.iso mtools.conf |
21 | subdir- := compressed | 21 | subdir- := compressed |
22 | 22 | ||
23 | setup-y += a20.o bioscall.o cmdline.o copy.o cpu.o cpucheck.o | 23 | setup-y += a20.o bioscall.o cmdline.o copy.o cpu.o cpuflags.o cpucheck.o |
24 | setup-y += early_serial_console.o edd.o header.o main.o mca.o memory.o | 24 | setup-y += early_serial_console.o edd.o header.o main.o mca.o memory.o |
25 | setup-y += pm.o pmjump.o printf.o regs.o string.o tty.o video.o | 25 | setup-y += pm.o pmjump.o printf.o regs.o string.o tty.o video.o |
26 | setup-y += video-mode.o version.o | 26 | setup-y += video-mode.o version.o |
diff --git a/arch/x86/boot/bioscall.S b/arch/x86/boot/bioscall.S index 1dfbf64e52a2..d401b4a262b0 100644 --- a/arch/x86/boot/bioscall.S +++ b/arch/x86/boot/bioscall.S | |||
@@ -1,6 +1,6 @@ | |||
1 | /* ----------------------------------------------------------------------- | 1 | /* ----------------------------------------------------------------------- |
2 | * | 2 | * |
3 | * Copyright 2009 Intel Corporation; author H. Peter Anvin | 3 | * Copyright 2009-2014 Intel Corporation; author H. Peter Anvin |
4 | * | 4 | * |
5 | * This file is part of the Linux kernel, and is made available under | 5 | * This file is part of the Linux kernel, and is made available under |
6 | * the terms of the GNU General Public License version 2 or (at your | 6 | * the terms of the GNU General Public License version 2 or (at your |
@@ -13,8 +13,8 @@ | |||
13 | * touching registers they shouldn't be. | 13 | * touching registers they shouldn't be. |
14 | */ | 14 | */ |
15 | 15 | ||
16 | .code16gcc | 16 | .code16 |
17 | .text | 17 | .section ".inittext","ax" |
18 | .globl intcall | 18 | .globl intcall |
19 | .type intcall, @function | 19 | .type intcall, @function |
20 | intcall: | 20 | intcall: |
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index ef72baeff484..50f8c5e0f37e 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h | |||
@@ -26,9 +26,8 @@ | |||
26 | #include <asm/boot.h> | 26 | #include <asm/boot.h> |
27 | #include <asm/setup.h> | 27 | #include <asm/setup.h> |
28 | #include "bitops.h" | 28 | #include "bitops.h" |
29 | #include <asm/cpufeature.h> | ||
30 | #include <asm/processor-flags.h> | ||
31 | #include "ctype.h" | 29 | #include "ctype.h" |
30 | #include "cpuflags.h" | ||
32 | 31 | ||
33 | /* Useful macros */ | 32 | /* Useful macros */ |
34 | #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) | 33 | #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) |
@@ -307,14 +306,7 @@ static inline int cmdline_find_option_bool(const char *option) | |||
307 | return __cmdline_find_option_bool(cmd_line_ptr, option); | 306 | return __cmdline_find_option_bool(cmd_line_ptr, option); |
308 | } | 307 | } |
309 | 308 | ||
310 | |||
311 | /* cpu.c, cpucheck.c */ | 309 | /* cpu.c, cpucheck.c */ |
312 | struct cpu_features { | ||
313 | int level; /* Family, or 64 for x86-64 */ | ||
314 | int model; | ||
315 | u32 flags[NCAPINTS]; | ||
316 | }; | ||
317 | extern struct cpu_features cpu; | ||
318 | int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr); | 310 | int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr); |
319 | int validate_cpu(void); | 311 | int validate_cpu(void); |
320 | 312 | ||
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index c8a6792e7842..0fcd9133790c 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile | |||
@@ -28,7 +28,7 @@ HOST_EXTRACFLAGS += -I$(srctree)/tools/include | |||
28 | 28 | ||
29 | VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ | 29 | VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ |
30 | $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \ | 30 | $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \ |
31 | $(obj)/piggy.o | 31 | $(obj)/piggy.o $(obj)/cpuflags.o $(obj)/aslr.o |
32 | 32 | ||
33 | $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone | 33 | $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone |
34 | 34 | ||
diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c new file mode 100644 index 000000000000..90a21f430117 --- /dev/null +++ b/arch/x86/boot/compressed/aslr.c | |||
@@ -0,0 +1,316 @@ | |||
1 | #include "misc.h" | ||
2 | |||
3 | #ifdef CONFIG_RANDOMIZE_BASE | ||
4 | #include <asm/msr.h> | ||
5 | #include <asm/archrandom.h> | ||
6 | #include <asm/e820.h> | ||
7 | |||
8 | #include <generated/compile.h> | ||
9 | #include <linux/module.h> | ||
10 | #include <linux/uts.h> | ||
11 | #include <linux/utsname.h> | ||
12 | #include <generated/utsrelease.h> | ||
13 | |||
14 | /* Simplified build-specific string for starting entropy. */ | ||
15 | static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@" | ||
16 | LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION; | ||
17 | |||
18 | #define I8254_PORT_CONTROL 0x43 | ||
19 | #define I8254_PORT_COUNTER0 0x40 | ||
20 | #define I8254_CMD_READBACK 0xC0 | ||
21 | #define I8254_SELECT_COUNTER0 0x02 | ||
22 | #define I8254_STATUS_NOTREADY 0x40 | ||
23 | static inline u16 i8254(void) | ||
24 | { | ||
25 | u16 status, timer; | ||
26 | |||
27 | do { | ||
28 | outb(I8254_PORT_CONTROL, | ||
29 | I8254_CMD_READBACK | I8254_SELECT_COUNTER0); | ||
30 | status = inb(I8254_PORT_COUNTER0); | ||
31 | timer = inb(I8254_PORT_COUNTER0); | ||
32 | timer |= inb(I8254_PORT_COUNTER0) << 8; | ||
33 | } while (status & I8254_STATUS_NOTREADY); | ||
34 | |||
35 | return timer; | ||
36 | } | ||
37 | |||
38 | static unsigned long rotate_xor(unsigned long hash, const void *area, | ||
39 | size_t size) | ||
40 | { | ||
41 | size_t i; | ||
42 | unsigned long *ptr = (unsigned long *)area; | ||
43 | |||
44 | for (i = 0; i < size / sizeof(hash); i++) { | ||
45 | /* Rotate by odd number of bits and XOR. */ | ||
46 | hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7); | ||
47 | hash ^= ptr[i]; | ||
48 | } | ||
49 | |||
50 | return hash; | ||
51 | } | ||
52 | |||
53 | /* Attempt to create a simple but unpredictable starting entropy. */ | ||
54 | static unsigned long get_random_boot(void) | ||
55 | { | ||
56 | unsigned long hash = 0; | ||
57 | |||
58 | hash = rotate_xor(hash, build_str, sizeof(build_str)); | ||
59 | hash = rotate_xor(hash, real_mode, sizeof(*real_mode)); | ||
60 | |||
61 | return hash; | ||
62 | } | ||
63 | |||
64 | static unsigned long get_random_long(void) | ||
65 | { | ||
66 | #ifdef CONFIG_X86_64 | ||
67 | const unsigned long mix_const = 0x5d6008cbf3848dd3UL; | ||
68 | #else | ||
69 | const unsigned long mix_const = 0x3f39e593UL; | ||
70 | #endif | ||
71 | unsigned long raw, random = get_random_boot(); | ||
72 | bool use_i8254 = true; | ||
73 | |||
74 | debug_putstr("KASLR using"); | ||
75 | |||
76 | if (has_cpuflag(X86_FEATURE_RDRAND)) { | ||
77 | debug_putstr(" RDRAND"); | ||
78 | if (rdrand_long(&raw)) { | ||
79 | random ^= raw; | ||
80 | use_i8254 = false; | ||
81 | } | ||
82 | } | ||
83 | |||
84 | if (has_cpuflag(X86_FEATURE_TSC)) { | ||
85 | debug_putstr(" RDTSC"); | ||
86 | rdtscll(raw); | ||
87 | |||
88 | random ^= raw; | ||
89 | use_i8254 = false; | ||
90 | } | ||
91 | |||
92 | if (use_i8254) { | ||
93 | debug_putstr(" i8254"); | ||
94 | random ^= i8254(); | ||
95 | } | ||
96 | |||
97 | /* Circular multiply for better bit diffusion */ | ||
98 | asm("mul %3" | ||
99 | : "=a" (random), "=d" (raw) | ||
100 | : "a" (random), "rm" (mix_const)); | ||
101 | random += raw; | ||
102 | |||
103 | debug_putstr("...\n"); | ||
104 | |||
105 | return random; | ||
106 | } | ||
107 | |||
108 | struct mem_vector { | ||
109 | unsigned long start; | ||
110 | unsigned long size; | ||
111 | }; | ||
112 | |||
113 | #define MEM_AVOID_MAX 5 | ||
114 | struct mem_vector mem_avoid[MEM_AVOID_MAX]; | ||
115 | |||
116 | static bool mem_contains(struct mem_vector *region, struct mem_vector *item) | ||
117 | { | ||
118 | /* Item at least partially before region. */ | ||
119 | if (item->start < region->start) | ||
120 | return false; | ||
121 | /* Item at least partially after region. */ | ||
122 | if (item->start + item->size > region->start + region->size) | ||
123 | return false; | ||
124 | return true; | ||
125 | } | ||
126 | |||
127 | static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two) | ||
128 | { | ||
129 | /* Item one is entirely before item two. */ | ||
130 | if (one->start + one->size <= two->start) | ||
131 | return false; | ||
132 | /* Item one is entirely after item two. */ | ||
133 | if (one->start >= two->start + two->size) | ||
134 | return false; | ||
135 | return true; | ||
136 | } | ||
137 | |||
138 | static void mem_avoid_init(unsigned long input, unsigned long input_size, | ||
139 | unsigned long output, unsigned long output_size) | ||
140 | { | ||
141 | u64 initrd_start, initrd_size; | ||
142 | u64 cmd_line, cmd_line_size; | ||
143 | unsigned long unsafe, unsafe_len; | ||
144 | char *ptr; | ||
145 | |||
146 | /* | ||
147 | * Avoid the region that is unsafe to overlap during | ||
148 | * decompression (see calculations at top of misc.c). | ||
149 | */ | ||
150 | unsafe_len = (output_size >> 12) + 32768 + 18; | ||
151 | unsafe = (unsigned long)input + input_size - unsafe_len; | ||
152 | mem_avoid[0].start = unsafe; | ||
153 | mem_avoid[0].size = unsafe_len; | ||
154 | |||
155 | /* Avoid initrd. */ | ||
156 | initrd_start = (u64)real_mode->ext_ramdisk_image << 32; | ||
157 | initrd_start |= real_mode->hdr.ramdisk_image; | ||
158 | initrd_size = (u64)real_mode->ext_ramdisk_size << 32; | ||
159 | initrd_size |= real_mode->hdr.ramdisk_size; | ||
160 | mem_avoid[1].start = initrd_start; | ||
161 | mem_avoid[1].size = initrd_size; | ||
162 | |||
163 | /* Avoid kernel command line. */ | ||
164 | cmd_line = (u64)real_mode->ext_cmd_line_ptr << 32; | ||
165 | cmd_line |= real_mode->hdr.cmd_line_ptr; | ||
166 | /* Calculate size of cmd_line. */ | ||
167 | ptr = (char *)(unsigned long)cmd_line; | ||
168 | for (cmd_line_size = 0; ptr[cmd_line_size++]; ) | ||
169 | ; | ||
170 | mem_avoid[2].start = cmd_line; | ||
171 | mem_avoid[2].size = cmd_line_size; | ||
172 | |||
173 | /* Avoid heap memory. */ | ||
174 | mem_avoid[3].start = (unsigned long)free_mem_ptr; | ||
175 | mem_avoid[3].size = BOOT_HEAP_SIZE; | ||
176 | |||
177 | /* Avoid stack memory. */ | ||
178 | mem_avoid[4].start = (unsigned long)free_mem_end_ptr; | ||
179 | mem_avoid[4].size = BOOT_STACK_SIZE; | ||
180 | } | ||
181 | |||
182 | /* Does this memory vector overlap a known avoided area? */ | ||
183 | bool mem_avoid_overlap(struct mem_vector *img) | ||
184 | { | ||
185 | int i; | ||
186 | |||
187 | for (i = 0; i < MEM_AVOID_MAX; i++) { | ||
188 | if (mem_overlaps(img, &mem_avoid[i])) | ||
189 | return true; | ||
190 | } | ||
191 | |||
192 | return false; | ||
193 | } | ||
194 | |||
195 | unsigned long slots[CONFIG_RANDOMIZE_BASE_MAX_OFFSET / CONFIG_PHYSICAL_ALIGN]; | ||
196 | unsigned long slot_max = 0; | ||
197 | |||
198 | static void slots_append(unsigned long addr) | ||
199 | { | ||
200 | /* Overflowing the slots list should be impossible. */ | ||
201 | if (slot_max >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET / | ||
202 | CONFIG_PHYSICAL_ALIGN) | ||
203 | return; | ||
204 | |||
205 | slots[slot_max++] = addr; | ||
206 | } | ||
207 | |||
208 | static unsigned long slots_fetch_random(void) | ||
209 | { | ||
210 | /* Handle case of no slots stored. */ | ||
211 | if (slot_max == 0) | ||
212 | return 0; | ||
213 | |||
214 | return slots[get_random_long() % slot_max]; | ||
215 | } | ||
216 | |||
217 | static void process_e820_entry(struct e820entry *entry, | ||
218 | unsigned long minimum, | ||
219 | unsigned long image_size) | ||
220 | { | ||
221 | struct mem_vector region, img; | ||
222 | |||
223 | /* Skip non-RAM entries. */ | ||
224 | if (entry->type != E820_RAM) | ||
225 | return; | ||
226 | |||
227 | /* Ignore entries entirely above our maximum. */ | ||
228 | if (entry->addr >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET) | ||
229 | return; | ||
230 | |||
231 | /* Ignore entries entirely below our minimum. */ | ||
232 | if (entry->addr + entry->size < minimum) | ||
233 | return; | ||
234 | |||
235 | region.start = entry->addr; | ||
236 | region.size = entry->size; | ||
237 | |||
238 | /* Potentially raise address to minimum location. */ | ||
239 | if (region.start < minimum) | ||
240 | region.start = minimum; | ||
241 | |||
242 | /* Potentially raise address to meet alignment requirements. */ | ||
243 | region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN); | ||
244 | |||
245 | /* Did we raise the address above the bounds of this e820 region? */ | ||
246 | if (region.start > entry->addr + entry->size) | ||
247 | return; | ||
248 | |||
249 | /* Reduce size by any delta from the original address. */ | ||
250 | region.size -= region.start - entry->addr; | ||
251 | |||
252 | /* Reduce maximum size to fit end of image within maximum limit. */ | ||
253 | if (region.start + region.size > CONFIG_RANDOMIZE_BASE_MAX_OFFSET) | ||
254 | region.size = CONFIG_RANDOMIZE_BASE_MAX_OFFSET - region.start; | ||
255 | |||
256 | /* Walk each aligned slot and check for avoided areas. */ | ||
257 | for (img.start = region.start, img.size = image_size ; | ||
258 | mem_contains(®ion, &img) ; | ||
259 | img.start += CONFIG_PHYSICAL_ALIGN) { | ||
260 | if (mem_avoid_overlap(&img)) | ||
261 | continue; | ||
262 | slots_append(img.start); | ||
263 | } | ||
264 | } | ||
265 | |||
266 | static unsigned long find_random_addr(unsigned long minimum, | ||
267 | unsigned long size) | ||
268 | { | ||
269 | int i; | ||
270 | unsigned long addr; | ||
271 | |||
272 | /* Make sure minimum is aligned. */ | ||
273 | minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN); | ||
274 | |||
275 | /* Verify potential e820 positions, appending to slots list. */ | ||
276 | for (i = 0; i < real_mode->e820_entries; i++) { | ||
277 | process_e820_entry(&real_mode->e820_map[i], minimum, size); | ||
278 | } | ||
279 | |||
280 | return slots_fetch_random(); | ||
281 | } | ||
282 | |||
283 | unsigned char *choose_kernel_location(unsigned char *input, | ||
284 | unsigned long input_size, | ||
285 | unsigned char *output, | ||
286 | unsigned long output_size) | ||
287 | { | ||
288 | unsigned long choice = (unsigned long)output; | ||
289 | unsigned long random; | ||
290 | |||
291 | if (cmdline_find_option_bool("nokaslr")) { | ||
292 | debug_putstr("KASLR disabled...\n"); | ||
293 | goto out; | ||
294 | } | ||
295 | |||
296 | /* Record the various known unsafe memory ranges. */ | ||
297 | mem_avoid_init((unsigned long)input, input_size, | ||
298 | (unsigned long)output, output_size); | ||
299 | |||
300 | /* Walk e820 and find a random address. */ | ||
301 | random = find_random_addr(choice, output_size); | ||
302 | if (!random) { | ||
303 | debug_putstr("KASLR could not find suitable E820 region...\n"); | ||
304 | goto out; | ||
305 | } | ||
306 | |||
307 | /* Always enforce the minimum. */ | ||
308 | if (random < choice) | ||
309 | goto out; | ||
310 | |||
311 | choice = random; | ||
312 | out: | ||
313 | return (unsigned char *)choice; | ||
314 | } | ||
315 | |||
316 | #endif /* CONFIG_RANDOMIZE_BASE */ | ||
diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c index bffd73b45b1f..b68e3033e6b9 100644 --- a/arch/x86/boot/compressed/cmdline.c +++ b/arch/x86/boot/compressed/cmdline.c | |||
@@ -1,6 +1,6 @@ | |||
1 | #include "misc.h" | 1 | #include "misc.h" |
2 | 2 | ||
3 | #ifdef CONFIG_EARLY_PRINTK | 3 | #if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE |
4 | 4 | ||
5 | static unsigned long fs; | 5 | static unsigned long fs; |
6 | static inline void set_fs(unsigned long seg) | 6 | static inline void set_fs(unsigned long seg) |
diff --git a/arch/x86/boot/compressed/cpuflags.c b/arch/x86/boot/compressed/cpuflags.c new file mode 100644 index 000000000000..aa313466118b --- /dev/null +++ b/arch/x86/boot/compressed/cpuflags.c | |||
@@ -0,0 +1,12 @@ | |||
1 | #ifdef CONFIG_RANDOMIZE_BASE | ||
2 | |||
3 | #include "../cpuflags.c" | ||
4 | |||
5 | bool has_cpuflag(int flag) | ||
6 | { | ||
7 | get_cpuflags(); | ||
8 | |||
9 | return test_bit(flag, cpu.flags); | ||
10 | } | ||
11 | |||
12 | #endif | ||
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 5d6f6891b188..9116aac232c7 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S | |||
@@ -117,9 +117,11 @@ preferred_addr: | |||
117 | addl %eax, %ebx | 117 | addl %eax, %ebx |
118 | notl %eax | 118 | notl %eax |
119 | andl %eax, %ebx | 119 | andl %eax, %ebx |
120 | #else | 120 | cmpl $LOAD_PHYSICAL_ADDR, %ebx |
121 | movl $LOAD_PHYSICAL_ADDR, %ebx | 121 | jge 1f |
122 | #endif | 122 | #endif |
123 | movl $LOAD_PHYSICAL_ADDR, %ebx | ||
124 | 1: | ||
123 | 125 | ||
124 | /* Target address to relocate to for decompression */ | 126 | /* Target address to relocate to for decompression */ |
125 | addl $z_extract_offset, %ebx | 127 | addl $z_extract_offset, %ebx |
@@ -191,14 +193,14 @@ relocated: | |||
191 | leal boot_heap(%ebx), %eax | 193 | leal boot_heap(%ebx), %eax |
192 | pushl %eax /* heap area */ | 194 | pushl %eax /* heap area */ |
193 | pushl %esi /* real mode pointer */ | 195 | pushl %esi /* real mode pointer */ |
194 | call decompress_kernel | 196 | call decompress_kernel /* returns kernel location in %eax */ |
195 | addl $24, %esp | 197 | addl $24, %esp |
196 | 198 | ||
197 | /* | 199 | /* |
198 | * Jump to the decompressed kernel. | 200 | * Jump to the decompressed kernel. |
199 | */ | 201 | */ |
200 | xorl %ebx, %ebx | 202 | xorl %ebx, %ebx |
201 | jmp *%ebp | 203 | jmp *%eax |
202 | 204 | ||
203 | /* | 205 | /* |
204 | * Stack and heap for uncompression | 206 | * Stack and heap for uncompression |
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index c337422b575d..c5c1ae0997e7 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S | |||
@@ -94,9 +94,11 @@ ENTRY(startup_32) | |||
94 | addl %eax, %ebx | 94 | addl %eax, %ebx |
95 | notl %eax | 95 | notl %eax |
96 | andl %eax, %ebx | 96 | andl %eax, %ebx |
97 | #else | 97 | cmpl $LOAD_PHYSICAL_ADDR, %ebx |
98 | movl $LOAD_PHYSICAL_ADDR, %ebx | 98 | jge 1f |
99 | #endif | 99 | #endif |
100 | movl $LOAD_PHYSICAL_ADDR, %ebx | ||
101 | 1: | ||
100 | 102 | ||
101 | /* Target address to relocate to for decompression */ | 103 | /* Target address to relocate to for decompression */ |
102 | addl $z_extract_offset, %ebx | 104 | addl $z_extract_offset, %ebx |
@@ -269,9 +271,11 @@ preferred_addr: | |||
269 | addq %rax, %rbp | 271 | addq %rax, %rbp |
270 | notq %rax | 272 | notq %rax |
271 | andq %rax, %rbp | 273 | andq %rax, %rbp |
272 | #else | 274 | cmpq $LOAD_PHYSICAL_ADDR, %rbp |
273 | movq $LOAD_PHYSICAL_ADDR, %rbp | 275 | jge 1f |
274 | #endif | 276 | #endif |
277 | movq $LOAD_PHYSICAL_ADDR, %rbp | ||
278 | 1: | ||
275 | 279 | ||
276 | /* Target address to relocate to for decompression */ | 280 | /* Target address to relocate to for decompression */ |
277 | leaq z_extract_offset(%rbp), %rbx | 281 | leaq z_extract_offset(%rbp), %rbx |
@@ -339,13 +343,13 @@ relocated: | |||
339 | movl $z_input_len, %ecx /* input_len */ | 343 | movl $z_input_len, %ecx /* input_len */ |
340 | movq %rbp, %r8 /* output target address */ | 344 | movq %rbp, %r8 /* output target address */ |
341 | movq $z_output_len, %r9 /* decompressed length */ | 345 | movq $z_output_len, %r9 /* decompressed length */ |
342 | call decompress_kernel | 346 | call decompress_kernel /* returns kernel location in %rax */ |
343 | popq %rsi | 347 | popq %rsi |
344 | 348 | ||
345 | /* | 349 | /* |
346 | * Jump to the decompressed kernel. | 350 | * Jump to the decompressed kernel. |
347 | */ | 351 | */ |
348 | jmp *%rbp | 352 | jmp *%rax |
349 | 353 | ||
350 | .code32 | 354 | .code32 |
351 | no_longmode: | 355 | no_longmode: |
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 434f077d2c4d..196eaf373a06 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c | |||
@@ -112,14 +112,8 @@ struct boot_params *real_mode; /* Pointer to real-mode data */ | |||
112 | void *memset(void *s, int c, size_t n); | 112 | void *memset(void *s, int c, size_t n); |
113 | void *memcpy(void *dest, const void *src, size_t n); | 113 | void *memcpy(void *dest, const void *src, size_t n); |
114 | 114 | ||
115 | #ifdef CONFIG_X86_64 | 115 | memptr free_mem_ptr; |
116 | #define memptr long | 116 | memptr free_mem_end_ptr; |
117 | #else | ||
118 | #define memptr unsigned | ||
119 | #endif | ||
120 | |||
121 | static memptr free_mem_ptr; | ||
122 | static memptr free_mem_end_ptr; | ||
123 | 117 | ||
124 | static char *vidmem; | 118 | static char *vidmem; |
125 | static int vidport; | 119 | static int vidport; |
@@ -395,7 +389,7 @@ static void parse_elf(void *output) | |||
395 | free(phdrs); | 389 | free(phdrs); |
396 | } | 390 | } |
397 | 391 | ||
398 | asmlinkage void decompress_kernel(void *rmode, memptr heap, | 392 | asmlinkage void *decompress_kernel(void *rmode, memptr heap, |
399 | unsigned char *input_data, | 393 | unsigned char *input_data, |
400 | unsigned long input_len, | 394 | unsigned long input_len, |
401 | unsigned char *output, | 395 | unsigned char *output, |
@@ -422,6 +416,10 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, | |||
422 | free_mem_ptr = heap; /* Heap */ | 416 | free_mem_ptr = heap; /* Heap */ |
423 | free_mem_end_ptr = heap + BOOT_HEAP_SIZE; | 417 | free_mem_end_ptr = heap + BOOT_HEAP_SIZE; |
424 | 418 | ||
419 | output = choose_kernel_location(input_data, input_len, | ||
420 | output, output_len); | ||
421 | |||
422 | /* Validate memory location choices. */ | ||
425 | if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1)) | 423 | if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1)) |
426 | error("Destination address inappropriately aligned"); | 424 | error("Destination address inappropriately aligned"); |
427 | #ifdef CONFIG_X86_64 | 425 | #ifdef CONFIG_X86_64 |
@@ -441,5 +439,5 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, | |||
441 | parse_elf(output); | 439 | parse_elf(output); |
442 | handle_relocations(output, output_len); | 440 | handle_relocations(output, output_len); |
443 | debug_putstr("done.\nBooting the kernel.\n"); | 441 | debug_putstr("done.\nBooting the kernel.\n"); |
444 | return; | 442 | return output; |
445 | } | 443 | } |
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 674019d8e235..24e3e569a13c 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h | |||
@@ -23,7 +23,15 @@ | |||
23 | #define BOOT_BOOT_H | 23 | #define BOOT_BOOT_H |
24 | #include "../ctype.h" | 24 | #include "../ctype.h" |
25 | 25 | ||
26 | #ifdef CONFIG_X86_64 | ||
27 | #define memptr long | ||
28 | #else | ||
29 | #define memptr unsigned | ||
30 | #endif | ||
31 | |||
26 | /* misc.c */ | 32 | /* misc.c */ |
33 | extern memptr free_mem_ptr; | ||
34 | extern memptr free_mem_end_ptr; | ||
27 | extern struct boot_params *real_mode; /* Pointer to real-mode data */ | 35 | extern struct boot_params *real_mode; /* Pointer to real-mode data */ |
28 | void __putstr(const char *s); | 36 | void __putstr(const char *s); |
29 | #define error_putstr(__x) __putstr(__x) | 37 | #define error_putstr(__x) __putstr(__x) |
@@ -39,23 +47,40 @@ static inline void debug_putstr(const char *s) | |||
39 | 47 | ||
40 | #endif | 48 | #endif |
41 | 49 | ||
42 | #ifdef CONFIG_EARLY_PRINTK | 50 | #if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE |
43 | |||
44 | /* cmdline.c */ | 51 | /* cmdline.c */ |
45 | int cmdline_find_option(const char *option, char *buffer, int bufsize); | 52 | int cmdline_find_option(const char *option, char *buffer, int bufsize); |
46 | int cmdline_find_option_bool(const char *option); | 53 | int cmdline_find_option_bool(const char *option); |
54 | #endif | ||
47 | 55 | ||
48 | /* early_serial_console.c */ | ||
49 | extern int early_serial_base; | ||
50 | void console_init(void); | ||
51 | 56 | ||
57 | #if CONFIG_RANDOMIZE_BASE | ||
58 | /* aslr.c */ | ||
59 | unsigned char *choose_kernel_location(unsigned char *input, | ||
60 | unsigned long input_size, | ||
61 | unsigned char *output, | ||
62 | unsigned long output_size); | ||
63 | /* cpuflags.c */ | ||
64 | bool has_cpuflag(int flag); | ||
52 | #else | 65 | #else |
66 | static inline | ||
67 | unsigned char *choose_kernel_location(unsigned char *input, | ||
68 | unsigned long input_size, | ||
69 | unsigned char *output, | ||
70 | unsigned long output_size) | ||
71 | { | ||
72 | return output; | ||
73 | } | ||
74 | #endif | ||
53 | 75 | ||
76 | #ifdef CONFIG_EARLY_PRINTK | ||
54 | /* early_serial_console.c */ | 77 | /* early_serial_console.c */ |
78 | extern int early_serial_base; | ||
79 | void console_init(void); | ||
80 | #else | ||
55 | static const int early_serial_base; | 81 | static const int early_serial_base; |
56 | static inline void console_init(void) | 82 | static inline void console_init(void) |
57 | { } | 83 | { } |
58 | |||
59 | #endif | 84 | #endif |
60 | 85 | ||
61 | #endif | 86 | #endif |
diff --git a/arch/x86/boot/copy.S b/arch/x86/boot/copy.S index 11f272c6f5e9..1eb7d298b47d 100644 --- a/arch/x86/boot/copy.S +++ b/arch/x86/boot/copy.S | |||
@@ -14,7 +14,7 @@ | |||
14 | * Memory copy routines | 14 | * Memory copy routines |
15 | */ | 15 | */ |
16 | 16 | ||
17 | .code16gcc | 17 | .code16 |
18 | .text | 18 | .text |
19 | 19 | ||
20 | GLOBAL(memcpy) | 20 | GLOBAL(memcpy) |
@@ -30,7 +30,7 @@ GLOBAL(memcpy) | |||
30 | rep; movsb | 30 | rep; movsb |
31 | popw %di | 31 | popw %di |
32 | popw %si | 32 | popw %si |
33 | ret | 33 | retl |
34 | ENDPROC(memcpy) | 34 | ENDPROC(memcpy) |
35 | 35 | ||
36 | GLOBAL(memset) | 36 | GLOBAL(memset) |
@@ -45,25 +45,25 @@ GLOBAL(memset) | |||
45 | andw $3, %cx | 45 | andw $3, %cx |
46 | rep; stosb | 46 | rep; stosb |
47 | popw %di | 47 | popw %di |
48 | ret | 48 | retl |
49 | ENDPROC(memset) | 49 | ENDPROC(memset) |
50 | 50 | ||
51 | GLOBAL(copy_from_fs) | 51 | GLOBAL(copy_from_fs) |
52 | pushw %ds | 52 | pushw %ds |
53 | pushw %fs | 53 | pushw %fs |
54 | popw %ds | 54 | popw %ds |
55 | call memcpy | 55 | calll memcpy |
56 | popw %ds | 56 | popw %ds |
57 | ret | 57 | retl |
58 | ENDPROC(copy_from_fs) | 58 | ENDPROC(copy_from_fs) |
59 | 59 | ||
60 | GLOBAL(copy_to_fs) | 60 | GLOBAL(copy_to_fs) |
61 | pushw %es | 61 | pushw %es |
62 | pushw %fs | 62 | pushw %fs |
63 | popw %es | 63 | popw %es |
64 | call memcpy | 64 | calll memcpy |
65 | popw %es | 65 | popw %es |
66 | ret | 66 | retl |
67 | ENDPROC(copy_to_fs) | 67 | ENDPROC(copy_to_fs) |
68 | 68 | ||
69 | #if 0 /* Not currently used, but can be enabled as needed */ | 69 | #if 0 /* Not currently used, but can be enabled as needed */ |
@@ -71,17 +71,17 @@ GLOBAL(copy_from_gs) | |||
71 | pushw %ds | 71 | pushw %ds |
72 | pushw %gs | 72 | pushw %gs |
73 | popw %ds | 73 | popw %ds |
74 | call memcpy | 74 | calll memcpy |
75 | popw %ds | 75 | popw %ds |
76 | ret | 76 | retl |
77 | ENDPROC(copy_from_gs) | 77 | ENDPROC(copy_from_gs) |
78 | 78 | ||
79 | GLOBAL(copy_to_gs) | 79 | GLOBAL(copy_to_gs) |
80 | pushw %es | 80 | pushw %es |
81 | pushw %gs | 81 | pushw %gs |
82 | popw %es | 82 | popw %es |
83 | call memcpy | 83 | calll memcpy |
84 | popw %es | 84 | popw %es |
85 | ret | 85 | retl |
86 | ENDPROC(copy_to_gs) | 86 | ENDPROC(copy_to_gs) |
87 | #endif | 87 | #endif |
diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c index 4d3ff037201f..100a9a10076a 100644 --- a/arch/x86/boot/cpucheck.c +++ b/arch/x86/boot/cpucheck.c | |||
@@ -28,8 +28,6 @@ | |||
28 | #include <asm/required-features.h> | 28 | #include <asm/required-features.h> |
29 | #include <asm/msr-index.h> | 29 | #include <asm/msr-index.h> |
30 | 30 | ||
31 | struct cpu_features cpu; | ||
32 | static u32 cpu_vendor[3]; | ||
33 | static u32 err_flags[NCAPINTS]; | 31 | static u32 err_flags[NCAPINTS]; |
34 | 32 | ||
35 | static const int req_level = CONFIG_X86_MINIMUM_CPU_FAMILY; | 33 | static const int req_level = CONFIG_X86_MINIMUM_CPU_FAMILY; |
@@ -69,92 +67,8 @@ static int is_transmeta(void) | |||
69 | cpu_vendor[2] == A32('M', 'x', '8', '6'); | 67 | cpu_vendor[2] == A32('M', 'x', '8', '6'); |
70 | } | 68 | } |
71 | 69 | ||
72 | static int has_fpu(void) | ||
73 | { | ||
74 | u16 fcw = -1, fsw = -1; | ||
75 | u32 cr0; | ||
76 | |||
77 | asm("movl %%cr0,%0" : "=r" (cr0)); | ||
78 | if (cr0 & (X86_CR0_EM|X86_CR0_TS)) { | ||
79 | cr0 &= ~(X86_CR0_EM|X86_CR0_TS); | ||
80 | asm volatile("movl %0,%%cr0" : : "r" (cr0)); | ||
81 | } | ||
82 | |||
83 | asm volatile("fninit ; fnstsw %0 ; fnstcw %1" | ||
84 | : "+m" (fsw), "+m" (fcw)); | ||
85 | |||
86 | return fsw == 0 && (fcw & 0x103f) == 0x003f; | ||
87 | } | ||
88 | |||
89 | static int has_eflag(u32 mask) | ||
90 | { | ||
91 | u32 f0, f1; | ||
92 | |||
93 | asm("pushfl ; " | ||
94 | "pushfl ; " | ||
95 | "popl %0 ; " | ||
96 | "movl %0,%1 ; " | ||
97 | "xorl %2,%1 ; " | ||
98 | "pushl %1 ; " | ||
99 | "popfl ; " | ||
100 | "pushfl ; " | ||
101 | "popl %1 ; " | ||
102 | "popfl" | ||
103 | : "=&r" (f0), "=&r" (f1) | ||
104 | : "ri" (mask)); | ||
105 | |||
106 | return !!((f0^f1) & mask); | ||
107 | } | ||
108 | |||
109 | static void get_flags(void) | ||
110 | { | ||
111 | u32 max_intel_level, max_amd_level; | ||
112 | u32 tfms; | ||
113 | |||
114 | if (has_fpu()) | ||
115 | set_bit(X86_FEATURE_FPU, cpu.flags); | ||
116 | |||
117 | if (has_eflag(X86_EFLAGS_ID)) { | ||
118 | asm("cpuid" | ||
119 | : "=a" (max_intel_level), | ||
120 | "=b" (cpu_vendor[0]), | ||
121 | "=d" (cpu_vendor[1]), | ||
122 | "=c" (cpu_vendor[2]) | ||
123 | : "a" (0)); | ||
124 | |||
125 | if (max_intel_level >= 0x00000001 && | ||
126 | max_intel_level <= 0x0000ffff) { | ||
127 | asm("cpuid" | ||
128 | : "=a" (tfms), | ||
129 | "=c" (cpu.flags[4]), | ||
130 | "=d" (cpu.flags[0]) | ||
131 | : "a" (0x00000001) | ||
132 | : "ebx"); | ||
133 | cpu.level = (tfms >> 8) & 15; | ||
134 | cpu.model = (tfms >> 4) & 15; | ||
135 | if (cpu.level >= 6) | ||
136 | cpu.model += ((tfms >> 16) & 0xf) << 4; | ||
137 | } | ||
138 | |||
139 | asm("cpuid" | ||
140 | : "=a" (max_amd_level) | ||
141 | : "a" (0x80000000) | ||
142 | : "ebx", "ecx", "edx"); | ||
143 | |||
144 | if (max_amd_level >= 0x80000001 && | ||
145 | max_amd_level <= 0x8000ffff) { | ||
146 | u32 eax = 0x80000001; | ||
147 | asm("cpuid" | ||
148 | : "+a" (eax), | ||
149 | "=c" (cpu.flags[6]), | ||
150 | "=d" (cpu.flags[1]) | ||
151 | : : "ebx"); | ||
152 | } | ||
153 | } | ||
154 | } | ||
155 | |||
156 | /* Returns a bitmask of which words we have error bits in */ | 70 | /* Returns a bitmask of which words we have error bits in */ |
157 | static int check_flags(void) | 71 | static int check_cpuflags(void) |
158 | { | 72 | { |
159 | u32 err; | 73 | u32 err; |
160 | int i; | 74 | int i; |
@@ -187,8 +101,8 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr) | |||
187 | if (has_eflag(X86_EFLAGS_AC)) | 101 | if (has_eflag(X86_EFLAGS_AC)) |
188 | cpu.level = 4; | 102 | cpu.level = 4; |
189 | 103 | ||
190 | get_flags(); | 104 | get_cpuflags(); |
191 | err = check_flags(); | 105 | err = check_cpuflags(); |
192 | 106 | ||
193 | if (test_bit(X86_FEATURE_LM, cpu.flags)) | 107 | if (test_bit(X86_FEATURE_LM, cpu.flags)) |
194 | cpu.level = 64; | 108 | cpu.level = 64; |
@@ -207,8 +121,8 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr) | |||
207 | eax &= ~(1 << 15); | 121 | eax &= ~(1 << 15); |
208 | asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); | 122 | asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); |
209 | 123 | ||
210 | get_flags(); /* Make sure it really did something */ | 124 | get_cpuflags(); /* Make sure it really did something */ |
211 | err = check_flags(); | 125 | err = check_cpuflags(); |
212 | } else if (err == 0x01 && | 126 | } else if (err == 0x01 && |
213 | !(err_flags[0] & ~(1 << X86_FEATURE_CX8)) && | 127 | !(err_flags[0] & ~(1 << X86_FEATURE_CX8)) && |
214 | is_centaur() && cpu.model >= 6) { | 128 | is_centaur() && cpu.model >= 6) { |
@@ -223,7 +137,7 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr) | |||
223 | asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); | 137 | asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); |
224 | 138 | ||
225 | set_bit(X86_FEATURE_CX8, cpu.flags); | 139 | set_bit(X86_FEATURE_CX8, cpu.flags); |
226 | err = check_flags(); | 140 | err = check_cpuflags(); |
227 | } else if (err == 0x01 && is_transmeta()) { | 141 | } else if (err == 0x01 && is_transmeta()) { |
228 | /* Transmeta might have masked feature bits in word 0 */ | 142 | /* Transmeta might have masked feature bits in word 0 */ |
229 | 143 | ||
@@ -238,7 +152,7 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr) | |||
238 | : : "ecx", "ebx"); | 152 | : : "ecx", "ebx"); |
239 | asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); | 153 | asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); |
240 | 154 | ||
241 | err = check_flags(); | 155 | err = check_cpuflags(); |
242 | } | 156 | } |
243 | 157 | ||
244 | if (err_flags_ptr) | 158 | if (err_flags_ptr) |
diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c new file mode 100644 index 000000000000..a9fcb7cfb241 --- /dev/null +++ b/arch/x86/boot/cpuflags.c | |||
@@ -0,0 +1,104 @@ | |||
1 | #include <linux/types.h> | ||
2 | #include "bitops.h" | ||
3 | |||
4 | #include <asm/processor-flags.h> | ||
5 | #include <asm/required-features.h> | ||
6 | #include <asm/msr-index.h> | ||
7 | #include "cpuflags.h" | ||
8 | |||
9 | struct cpu_features cpu; | ||
10 | u32 cpu_vendor[3]; | ||
11 | |||
12 | static bool loaded_flags; | ||
13 | |||
14 | static int has_fpu(void) | ||
15 | { | ||
16 | u16 fcw = -1, fsw = -1; | ||
17 | unsigned long cr0; | ||
18 | |||
19 | asm volatile("mov %%cr0,%0" : "=r" (cr0)); | ||
20 | if (cr0 & (X86_CR0_EM|X86_CR0_TS)) { | ||
21 | cr0 &= ~(X86_CR0_EM|X86_CR0_TS); | ||
22 | asm volatile("mov %0,%%cr0" : : "r" (cr0)); | ||
23 | } | ||
24 | |||
25 | asm volatile("fninit ; fnstsw %0 ; fnstcw %1" | ||
26 | : "+m" (fsw), "+m" (fcw)); | ||
27 | |||
28 | return fsw == 0 && (fcw & 0x103f) == 0x003f; | ||
29 | } | ||
30 | |||
31 | int has_eflag(unsigned long mask) | ||
32 | { | ||
33 | unsigned long f0, f1; | ||
34 | |||
35 | asm volatile("pushf \n\t" | ||
36 | "pushf \n\t" | ||
37 | "pop %0 \n\t" | ||
38 | "mov %0,%1 \n\t" | ||
39 | "xor %2,%1 \n\t" | ||
40 | "push %1 \n\t" | ||
41 | "popf \n\t" | ||
42 | "pushf \n\t" | ||
43 | "pop %1 \n\t" | ||
44 | "popf" | ||
45 | : "=&r" (f0), "=&r" (f1) | ||
46 | : "ri" (mask)); | ||
47 | |||
48 | return !!((f0^f1) & mask); | ||
49 | } | ||
50 | |||
51 | /* Handle x86_32 PIC using ebx. */ | ||
52 | #if defined(__i386__) && defined(__PIC__) | ||
53 | # define EBX_REG "=r" | ||
54 | #else | ||
55 | # define EBX_REG "=b" | ||
56 | #endif | ||
57 | |||
58 | static inline void cpuid(u32 id, u32 *a, u32 *b, u32 *c, u32 *d) | ||
59 | { | ||
60 | asm volatile(".ifnc %%ebx,%3 ; movl %%ebx,%3 ; .endif \n\t" | ||
61 | "cpuid \n\t" | ||
62 | ".ifnc %%ebx,%3 ; xchgl %%ebx,%3 ; .endif \n\t" | ||
63 | : "=a" (*a), "=c" (*c), "=d" (*d), EBX_REG (*b) | ||
64 | : "a" (id) | ||
65 | ); | ||
66 | } | ||
67 | |||
68 | void get_cpuflags(void) | ||
69 | { | ||
70 | u32 max_intel_level, max_amd_level; | ||
71 | u32 tfms; | ||
72 | u32 ignored; | ||
73 | |||
74 | if (loaded_flags) | ||
75 | return; | ||
76 | loaded_flags = true; | ||
77 | |||
78 | if (has_fpu()) | ||
79 | set_bit(X86_FEATURE_FPU, cpu.flags); | ||
80 | |||
81 | if (has_eflag(X86_EFLAGS_ID)) { | ||
82 | cpuid(0x0, &max_intel_level, &cpu_vendor[0], &cpu_vendor[2], | ||
83 | &cpu_vendor[1]); | ||
84 | |||
85 | if (max_intel_level >= 0x00000001 && | ||
86 | max_intel_level <= 0x0000ffff) { | ||
87 | cpuid(0x1, &tfms, &ignored, &cpu.flags[4], | ||
88 | &cpu.flags[0]); | ||
89 | cpu.level = (tfms >> 8) & 15; | ||
90 | cpu.model = (tfms >> 4) & 15; | ||
91 | if (cpu.level >= 6) | ||
92 | cpu.model += ((tfms >> 16) & 0xf) << 4; | ||
93 | } | ||
94 | |||
95 | cpuid(0x80000000, &max_amd_level, &ignored, &ignored, | ||
96 | &ignored); | ||
97 | |||
98 | if (max_amd_level >= 0x80000001 && | ||
99 | max_amd_level <= 0x8000ffff) { | ||
100 | cpuid(0x80000001, &ignored, &ignored, &cpu.flags[6], | ||
101 | &cpu.flags[1]); | ||
102 | } | ||
103 | } | ||
104 | } | ||
diff --git a/arch/x86/boot/cpuflags.h b/arch/x86/boot/cpuflags.h new file mode 100644 index 000000000000..ea97697e51e4 --- /dev/null +++ b/arch/x86/boot/cpuflags.h | |||
@@ -0,0 +1,19 @@ | |||
1 | #ifndef BOOT_CPUFLAGS_H | ||
2 | #define BOOT_CPUFLAGS_H | ||
3 | |||
4 | #include <asm/cpufeature.h> | ||
5 | #include <asm/processor-flags.h> | ||
6 | |||
7 | struct cpu_features { | ||
8 | int level; /* Family, or 64 for x86-64 */ | ||
9 | int model; | ||
10 | u32 flags[NCAPINTS]; | ||
11 | }; | ||
12 | |||
13 | extern struct cpu_features cpu; | ||
14 | extern u32 cpu_vendor[3]; | ||
15 | |||
16 | int has_eflag(unsigned long mask); | ||
17 | void get_cpuflags(void); | ||
18 | |||
19 | #endif | ||
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 9ec06a1f6d61..ec3b8ba68096 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S | |||
@@ -391,7 +391,14 @@ xloadflags: | |||
391 | #else | 391 | #else |
392 | # define XLF23 0 | 392 | # define XLF23 0 |
393 | #endif | 393 | #endif |
394 | .word XLF0 | XLF1 | XLF23 | 394 | |
395 | #if defined(CONFIG_X86_64) && defined(CONFIG_EFI) && defined(CONFIG_KEXEC) | ||
396 | # define XLF4 XLF_EFI_KEXEC | ||
397 | #else | ||
398 | # define XLF4 0 | ||
399 | #endif | ||
400 | |||
401 | .word XLF0 | XLF1 | XLF23 | XLF4 | ||
395 | 402 | ||
396 | cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line, | 403 | cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line, |
397 | #added with boot protocol | 404 | #added with boot protocol |
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index e0fc24db234a..6ba54d640383 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile | |||
@@ -76,6 +76,7 @@ ifeq ($(avx2_supported),yes) | |||
76 | endif | 76 | endif |
77 | 77 | ||
78 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o | 78 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o |
79 | aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o | ||
79 | ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o | 80 | ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o |
80 | sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o | 81 | sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o |
81 | crc32c-intel-y := crc32c-intel_glue.o | 82 | crc32c-intel-y := crc32c-intel_glue.o |
diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S new file mode 100644 index 000000000000..522ab68d1c88 --- /dev/null +++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S | |||
@@ -0,0 +1,2811 @@ | |||
1 | ######################################################################## | ||
2 | # Copyright (c) 2013, Intel Corporation | ||
3 | # | ||
4 | # This software is available to you under a choice of one of two | ||
5 | # licenses. You may choose to be licensed under the terms of the GNU | ||
6 | # General Public License (GPL) Version 2, available from the file | ||
7 | # COPYING in the main directory of this source tree, or the | ||
8 | # OpenIB.org BSD license below: | ||
9 | # | ||
10 | # Redistribution and use in source and binary forms, with or without | ||
11 | # modification, are permitted provided that the following conditions are | ||
12 | # met: | ||
13 | # | ||
14 | # * Redistributions of source code must retain the above copyright | ||
15 | # notice, this list of conditions and the following disclaimer. | ||
16 | # | ||
17 | # * Redistributions in binary form must reproduce the above copyright | ||
18 | # notice, this list of conditions and the following disclaimer in the | ||
19 | # documentation and/or other materials provided with the | ||
20 | # distribution. | ||
21 | # | ||
22 | # * Neither the name of the Intel Corporation nor the names of its | ||
23 | # contributors may be used to endorse or promote products derived from | ||
24 | # this software without specific prior written permission. | ||
25 | # | ||
26 | # | ||
27 | # THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY | ||
28 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
29 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | ||
30 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR | ||
31 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
32 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
33 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES# LOSS OF USE, DATA, OR | ||
34 | # PROFITS# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
35 | # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
36 | # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
37 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
38 | ######################################################################## | ||
39 | ## | ||
40 | ## Authors: | ||
41 | ## Erdinc Ozturk <erdinc.ozturk@intel.com> | ||
42 | ## Vinodh Gopal <vinodh.gopal@intel.com> | ||
43 | ## James Guilford <james.guilford@intel.com> | ||
44 | ## Tim Chen <tim.c.chen@linux.intel.com> | ||
45 | ## | ||
46 | ## References: | ||
47 | ## This code was derived and highly optimized from the code described in paper: | ||
48 | ## Vinodh Gopal et. al. Optimized Galois-Counter-Mode Implementation | ||
49 | ## on Intel Architecture Processors. August, 2010 | ||
50 | ## The details of the implementation is explained in: | ||
51 | ## Erdinc Ozturk et. al. Enabling High-Performance Galois-Counter-Mode | ||
52 | ## on Intel Architecture Processors. October, 2012. | ||
53 | ## | ||
54 | ## Assumptions: | ||
55 | ## | ||
56 | ## | ||
57 | ## | ||
58 | ## iv: | ||
59 | ## 0 1 2 3 | ||
60 | ## 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||
61 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
62 | ## | Salt (From the SA) | | ||
63 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
64 | ## | Initialization Vector | | ||
65 | ## | (This is the sequence number from IPSec header) | | ||
66 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
67 | ## | 0x1 | | ||
68 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
69 | ## | ||
70 | ## | ||
71 | ## | ||
72 | ## AAD: | ||
73 | ## AAD padded to 128 bits with 0 | ||
74 | ## for example, assume AAD is a u32 vector | ||
75 | ## | ||
76 | ## if AAD is 8 bytes: | ||
77 | ## AAD[3] = {A0, A1}# | ||
78 | ## padded AAD in xmm register = {A1 A0 0 0} | ||
79 | ## | ||
80 | ## 0 1 2 3 | ||
81 | ## 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||
82 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
83 | ## | SPI (A1) | | ||
84 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
85 | ## | 32-bit Sequence Number (A0) | | ||
86 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
87 | ## | 0x0 | | ||
88 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
89 | ## | ||
90 | ## AAD Format with 32-bit Sequence Number | ||
91 | ## | ||
92 | ## if AAD is 12 bytes: | ||
93 | ## AAD[3] = {A0, A1, A2}# | ||
94 | ## padded AAD in xmm register = {A2 A1 A0 0} | ||
95 | ## | ||
96 | ## 0 1 2 3 | ||
97 | ## 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||
98 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
99 | ## | SPI (A2) | | ||
100 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
101 | ## | 64-bit Extended Sequence Number {A1,A0} | | ||
102 | ## | | | ||
103 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
104 | ## | 0x0 | | ||
105 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
106 | ## | ||
107 | ## AAD Format with 64-bit Extended Sequence Number | ||
108 | ## | ||
109 | ## | ||
110 | ## aadLen: | ||
111 | ## from the definition of the spec, aadLen can only be 8 or 12 bytes. | ||
112 | ## The code additionally supports aadLen of length 16 bytes. | ||
113 | ## | ||
114 | ## TLen: | ||
115 | ## from the definition of the spec, TLen can only be 8, 12 or 16 bytes. | ||
116 | ## | ||
117 | ## poly = x^128 + x^127 + x^126 + x^121 + 1 | ||
118 | ## throughout the code, one tab and two tab indentations are used. one tab is | ||
119 | ## for GHASH part, two tabs is for AES part. | ||
120 | ## | ||
121 | |||
122 | #include <linux/linkage.h> | ||
123 | #include <asm/inst.h> | ||
124 | |||
125 | .data | ||
126 | .align 16 | ||
127 | |||
128 | POLY: .octa 0xC2000000000000000000000000000001 | ||
129 | POLY2: .octa 0xC20000000000000000000001C2000000 | ||
130 | TWOONE: .octa 0x00000001000000000000000000000001 | ||
131 | |||
132 | # order of these constants should not change. | ||
133 | # more specifically, ALL_F should follow SHIFT_MASK, and ZERO should follow ALL_F | ||
134 | |||
135 | SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F | ||
136 | SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 | ||
137 | ALL_F: .octa 0xffffffffffffffffffffffffffffffff | ||
138 | ZERO: .octa 0x00000000000000000000000000000000 | ||
139 | ONE: .octa 0x00000000000000000000000000000001 | ||
140 | ONEf: .octa 0x01000000000000000000000000000000 | ||
141 | |||
142 | .text | ||
143 | |||
144 | |||
145 | ##define the fields of the gcm aes context | ||
146 | #{ | ||
147 | # u8 expanded_keys[16*11] store expanded keys | ||
148 | # u8 shifted_hkey_1[16] store HashKey <<1 mod poly here | ||
149 | # u8 shifted_hkey_2[16] store HashKey^2 <<1 mod poly here | ||
150 | # u8 shifted_hkey_3[16] store HashKey^3 <<1 mod poly here | ||
151 | # u8 shifted_hkey_4[16] store HashKey^4 <<1 mod poly here | ||
152 | # u8 shifted_hkey_5[16] store HashKey^5 <<1 mod poly here | ||
153 | # u8 shifted_hkey_6[16] store HashKey^6 <<1 mod poly here | ||
154 | # u8 shifted_hkey_7[16] store HashKey^7 <<1 mod poly here | ||
155 | # u8 shifted_hkey_8[16] store HashKey^8 <<1 mod poly here | ||
156 | # u8 shifted_hkey_1_k[16] store XOR HashKey <<1 mod poly here (for Karatsuba purposes) | ||
157 | # u8 shifted_hkey_2_k[16] store XOR HashKey^2 <<1 mod poly here (for Karatsuba purposes) | ||
158 | # u8 shifted_hkey_3_k[16] store XOR HashKey^3 <<1 mod poly here (for Karatsuba purposes) | ||
159 | # u8 shifted_hkey_4_k[16] store XOR HashKey^4 <<1 mod poly here (for Karatsuba purposes) | ||
160 | # u8 shifted_hkey_5_k[16] store XOR HashKey^5 <<1 mod poly here (for Karatsuba purposes) | ||
161 | # u8 shifted_hkey_6_k[16] store XOR HashKey^6 <<1 mod poly here (for Karatsuba purposes) | ||
162 | # u8 shifted_hkey_7_k[16] store XOR HashKey^7 <<1 mod poly here (for Karatsuba purposes) | ||
163 | # u8 shifted_hkey_8_k[16] store XOR HashKey^8 <<1 mod poly here (for Karatsuba purposes) | ||
164 | #} gcm_ctx# | ||
165 | |||
166 | HashKey = 16*11 # store HashKey <<1 mod poly here | ||
167 | HashKey_2 = 16*12 # store HashKey^2 <<1 mod poly here | ||
168 | HashKey_3 = 16*13 # store HashKey^3 <<1 mod poly here | ||
169 | HashKey_4 = 16*14 # store HashKey^4 <<1 mod poly here | ||
170 | HashKey_5 = 16*15 # store HashKey^5 <<1 mod poly here | ||
171 | HashKey_6 = 16*16 # store HashKey^6 <<1 mod poly here | ||
172 | HashKey_7 = 16*17 # store HashKey^7 <<1 mod poly here | ||
173 | HashKey_8 = 16*18 # store HashKey^8 <<1 mod poly here | ||
174 | HashKey_k = 16*19 # store XOR of HashKey <<1 mod poly here (for Karatsuba purposes) | ||
175 | HashKey_2_k = 16*20 # store XOR of HashKey^2 <<1 mod poly here (for Karatsuba purposes) | ||
176 | HashKey_3_k = 16*21 # store XOR of HashKey^3 <<1 mod poly here (for Karatsuba purposes) | ||
177 | HashKey_4_k = 16*22 # store XOR of HashKey^4 <<1 mod poly here (for Karatsuba purposes) | ||
178 | HashKey_5_k = 16*23 # store XOR of HashKey^5 <<1 mod poly here (for Karatsuba purposes) | ||
179 | HashKey_6_k = 16*24 # store XOR of HashKey^6 <<1 mod poly here (for Karatsuba purposes) | ||
180 | HashKey_7_k = 16*25 # store XOR of HashKey^7 <<1 mod poly here (for Karatsuba purposes) | ||
181 | HashKey_8_k = 16*26 # store XOR of HashKey^8 <<1 mod poly here (for Karatsuba purposes) | ||
182 | |||
183 | #define arg1 %rdi | ||
184 | #define arg2 %rsi | ||
185 | #define arg3 %rdx | ||
186 | #define arg4 %rcx | ||
187 | #define arg5 %r8 | ||
188 | #define arg6 %r9 | ||
189 | #define arg7 STACK_OFFSET+8*1(%r14) | ||
190 | #define arg8 STACK_OFFSET+8*2(%r14) | ||
191 | #define arg9 STACK_OFFSET+8*3(%r14) | ||
192 | |||
193 | i = 0 | ||
194 | j = 0 | ||
195 | |||
196 | out_order = 0 | ||
197 | in_order = 1 | ||
198 | DEC = 0 | ||
199 | ENC = 1 | ||
200 | |||
201 | .macro define_reg r n | ||
202 | reg_\r = %xmm\n | ||
203 | .endm | ||
204 | |||
205 | .macro setreg | ||
206 | .altmacro | ||
207 | define_reg i %i | ||
208 | define_reg j %j | ||
209 | .noaltmacro | ||
210 | .endm | ||
211 | |||
212 | # need to push 4 registers into stack to maintain | ||
213 | STACK_OFFSET = 8*4 | ||
214 | |||
215 | TMP1 = 16*0 # Temporary storage for AAD | ||
216 | TMP2 = 16*1 # Temporary storage for AES State 2 (State 1 is stored in an XMM register) | ||
217 | TMP3 = 16*2 # Temporary storage for AES State 3 | ||
218 | TMP4 = 16*3 # Temporary storage for AES State 4 | ||
219 | TMP5 = 16*4 # Temporary storage for AES State 5 | ||
220 | TMP6 = 16*5 # Temporary storage for AES State 6 | ||
221 | TMP7 = 16*6 # Temporary storage for AES State 7 | ||
222 | TMP8 = 16*7 # Temporary storage for AES State 8 | ||
223 | |||
224 | VARIABLE_OFFSET = 16*8 | ||
225 | |||
226 | ################################ | ||
227 | # Utility Macros | ||
228 | ################################ | ||
229 | |||
230 | # Encryption of a single block | ||
231 | .macro ENCRYPT_SINGLE_BLOCK XMM0 | ||
232 | vpxor (arg1), \XMM0, \XMM0 | ||
233 | i = 1 | ||
234 | setreg | ||
235 | .rep 9 | ||
236 | vaesenc 16*i(arg1), \XMM0, \XMM0 | ||
237 | i = (i+1) | ||
238 | setreg | ||
239 | .endr | ||
240 | vaesenclast 16*10(arg1), \XMM0, \XMM0 | ||
241 | .endm | ||
242 | |||
243 | #ifdef CONFIG_AS_AVX | ||
244 | ############################################################################### | ||
245 | # GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) | ||
246 | # Input: A and B (128-bits each, bit-reflected) | ||
247 | # Output: C = A*B*x mod poly, (i.e. >>1 ) | ||
248 | # To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input | ||
249 | # GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly. | ||
250 | ############################################################################### | ||
251 | .macro GHASH_MUL_AVX GH HK T1 T2 T3 T4 T5 | ||
252 | |||
253 | vpshufd $0b01001110, \GH, \T2 | ||
254 | vpshufd $0b01001110, \HK, \T3 | ||
255 | vpxor \GH , \T2, \T2 # T2 = (a1+a0) | ||
256 | vpxor \HK , \T3, \T3 # T3 = (b1+b0) | ||
257 | |||
258 | vpclmulqdq $0x11, \HK, \GH, \T1 # T1 = a1*b1 | ||
259 | vpclmulqdq $0x00, \HK, \GH, \GH # GH = a0*b0 | ||
260 | vpclmulqdq $0x00, \T3, \T2, \T2 # T2 = (a1+a0)*(b1+b0) | ||
261 | vpxor \GH, \T2,\T2 | ||
262 | vpxor \T1, \T2,\T2 # T2 = a0*b1+a1*b0 | ||
263 | |||
264 | vpslldq $8, \T2,\T3 # shift-L T3 2 DWs | ||
265 | vpsrldq $8, \T2,\T2 # shift-R T2 2 DWs | ||
266 | vpxor \T3, \GH, \GH | ||
267 | vpxor \T2, \T1, \T1 # <T1:GH> = GH x HK | ||
268 | |||
269 | #first phase of the reduction | ||
270 | vpslld $31, \GH, \T2 # packed right shifting << 31 | ||
271 | vpslld $30, \GH, \T3 # packed right shifting shift << 30 | ||
272 | vpslld $25, \GH, \T4 # packed right shifting shift << 25 | ||
273 | |||
274 | vpxor \T3, \T2, \T2 # xor the shifted versions | ||
275 | vpxor \T4, \T2, \T2 | ||
276 | |||
277 | vpsrldq $4, \T2, \T5 # shift-R T5 1 DW | ||
278 | |||
279 | vpslldq $12, \T2, \T2 # shift-L T2 3 DWs | ||
280 | vpxor \T2, \GH, \GH # first phase of the reduction complete | ||
281 | |||
282 | #second phase of the reduction | ||
283 | |||
284 | vpsrld $1,\GH, \T2 # packed left shifting >> 1 | ||
285 | vpsrld $2,\GH, \T3 # packed left shifting >> 2 | ||
286 | vpsrld $7,\GH, \T4 # packed left shifting >> 7 | ||
287 | vpxor \T3, \T2, \T2 # xor the shifted versions | ||
288 | vpxor \T4, \T2, \T2 | ||
289 | |||
290 | vpxor \T5, \T2, \T2 | ||
291 | vpxor \T2, \GH, \GH | ||
292 | vpxor \T1, \GH, \GH # the result is in GH | ||
293 | |||
294 | |||
295 | .endm | ||
296 | |||
297 | .macro PRECOMPUTE_AVX HK T1 T2 T3 T4 T5 T6 | ||
298 | |||
299 | # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i | ||
300 | vmovdqa \HK, \T5 | ||
301 | |||
302 | vpshufd $0b01001110, \T5, \T1 | ||
303 | vpxor \T5, \T1, \T1 | ||
304 | vmovdqa \T1, HashKey_k(arg1) | ||
305 | |||
306 | GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^2<<1 mod poly | ||
307 | vmovdqa \T5, HashKey_2(arg1) # [HashKey_2] = HashKey^2<<1 mod poly | ||
308 | vpshufd $0b01001110, \T5, \T1 | ||
309 | vpxor \T5, \T1, \T1 | ||
310 | vmovdqa \T1, HashKey_2_k(arg1) | ||
311 | |||
312 | GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^3<<1 mod poly | ||
313 | vmovdqa \T5, HashKey_3(arg1) | ||
314 | vpshufd $0b01001110, \T5, \T1 | ||
315 | vpxor \T5, \T1, \T1 | ||
316 | vmovdqa \T1, HashKey_3_k(arg1) | ||
317 | |||
318 | GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^4<<1 mod poly | ||
319 | vmovdqa \T5, HashKey_4(arg1) | ||
320 | vpshufd $0b01001110, \T5, \T1 | ||
321 | vpxor \T5, \T1, \T1 | ||
322 | vmovdqa \T1, HashKey_4_k(arg1) | ||
323 | |||
324 | GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^5<<1 mod poly | ||
325 | vmovdqa \T5, HashKey_5(arg1) | ||
326 | vpshufd $0b01001110, \T5, \T1 | ||
327 | vpxor \T5, \T1, \T1 | ||
328 | vmovdqa \T1, HashKey_5_k(arg1) | ||
329 | |||
330 | GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^6<<1 mod poly | ||
331 | vmovdqa \T5, HashKey_6(arg1) | ||
332 | vpshufd $0b01001110, \T5, \T1 | ||
333 | vpxor \T5, \T1, \T1 | ||
334 | vmovdqa \T1, HashKey_6_k(arg1) | ||
335 | |||
336 | GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^7<<1 mod poly | ||
337 | vmovdqa \T5, HashKey_7(arg1) | ||
338 | vpshufd $0b01001110, \T5, \T1 | ||
339 | vpxor \T5, \T1, \T1 | ||
340 | vmovdqa \T1, HashKey_7_k(arg1) | ||
341 | |||
342 | GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^8<<1 mod poly | ||
343 | vmovdqa \T5, HashKey_8(arg1) | ||
344 | vpshufd $0b01001110, \T5, \T1 | ||
345 | vpxor \T5, \T1, \T1 | ||
346 | vmovdqa \T1, HashKey_8_k(arg1) | ||
347 | |||
348 | .endm | ||
349 | |||
350 | ## if a = number of total plaintext bytes | ||
351 | ## b = floor(a/16) | ||
352 | ## num_initial_blocks = b mod 4# | ||
353 | ## encrypt the initial num_initial_blocks blocks and apply ghash on the ciphertext | ||
354 | ## r10, r11, r12, rax are clobbered | ||
355 | ## arg1, arg2, arg3, r14 are used as a pointer only, not modified | ||
356 | |||
357 | .macro INITIAL_BLOCKS_AVX num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC | ||
358 | i = (8-\num_initial_blocks) | ||
359 | setreg | ||
360 | |||
361 | mov arg6, %r10 # r10 = AAD | ||
362 | mov arg7, %r12 # r12 = aadLen | ||
363 | |||
364 | |||
365 | mov %r12, %r11 | ||
366 | |||
367 | vpxor reg_i, reg_i, reg_i | ||
368 | _get_AAD_loop\@: | ||
369 | vmovd (%r10), \T1 | ||
370 | vpslldq $12, \T1, \T1 | ||
371 | vpsrldq $4, reg_i, reg_i | ||
372 | vpxor \T1, reg_i, reg_i | ||
373 | |||
374 | add $4, %r10 | ||
375 | sub $4, %r12 | ||
376 | jg _get_AAD_loop\@ | ||
377 | |||
378 | |||
379 | cmp $16, %r11 | ||
380 | je _get_AAD_loop2_done\@ | ||
381 | mov $16, %r12 | ||
382 | |||
383 | _get_AAD_loop2\@: | ||
384 | vpsrldq $4, reg_i, reg_i | ||
385 | sub $4, %r12 | ||
386 | cmp %r11, %r12 | ||
387 | jg _get_AAD_loop2\@ | ||
388 | |||
389 | _get_AAD_loop2_done\@: | ||
390 | |||
391 | #byte-reflect the AAD data | ||
392 | vpshufb SHUF_MASK(%rip), reg_i, reg_i | ||
393 | |||
394 | # initialize the data pointer offset as zero | ||
395 | xor %r11, %r11 | ||
396 | |||
397 | # start AES for num_initial_blocks blocks | ||
398 | mov arg5, %rax # rax = *Y0 | ||
399 | vmovdqu (%rax), \CTR # CTR = Y0 | ||
400 | vpshufb SHUF_MASK(%rip), \CTR, \CTR | ||
401 | |||
402 | |||
403 | i = (9-\num_initial_blocks) | ||
404 | setreg | ||
405 | .rep \num_initial_blocks | ||
406 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
407 | vmovdqa \CTR, reg_i | ||
408 | vpshufb SHUF_MASK(%rip), reg_i, reg_i # perform a 16Byte swap | ||
409 | i = (i+1) | ||
410 | setreg | ||
411 | .endr | ||
412 | |||
413 | vmovdqa (arg1), \T_key | ||
414 | i = (9-\num_initial_blocks) | ||
415 | setreg | ||
416 | .rep \num_initial_blocks | ||
417 | vpxor \T_key, reg_i, reg_i | ||
418 | i = (i+1) | ||
419 | setreg | ||
420 | .endr | ||
421 | |||
422 | j = 1 | ||
423 | setreg | ||
424 | .rep 9 | ||
425 | vmovdqa 16*j(arg1), \T_key | ||
426 | i = (9-\num_initial_blocks) | ||
427 | setreg | ||
428 | .rep \num_initial_blocks | ||
429 | vaesenc \T_key, reg_i, reg_i | ||
430 | i = (i+1) | ||
431 | setreg | ||
432 | .endr | ||
433 | |||
434 | j = (j+1) | ||
435 | setreg | ||
436 | .endr | ||
437 | |||
438 | |||
439 | vmovdqa 16*10(arg1), \T_key | ||
440 | i = (9-\num_initial_blocks) | ||
441 | setreg | ||
442 | .rep \num_initial_blocks | ||
443 | vaesenclast \T_key, reg_i, reg_i | ||
444 | i = (i+1) | ||
445 | setreg | ||
446 | .endr | ||
447 | |||
448 | i = (9-\num_initial_blocks) | ||
449 | setreg | ||
450 | .rep \num_initial_blocks | ||
451 | vmovdqu (arg3, %r11), \T1 | ||
452 | vpxor \T1, reg_i, reg_i | ||
453 | vmovdqu reg_i, (arg2 , %r11) # write back ciphertext for num_initial_blocks blocks | ||
454 | add $16, %r11 | ||
455 | .if \ENC_DEC == DEC | ||
456 | vmovdqa \T1, reg_i | ||
457 | .endif | ||
458 | vpshufb SHUF_MASK(%rip), reg_i, reg_i # prepare ciphertext for GHASH computations | ||
459 | i = (i+1) | ||
460 | setreg | ||
461 | .endr | ||
462 | |||
463 | |||
464 | i = (8-\num_initial_blocks) | ||
465 | j = (9-\num_initial_blocks) | ||
466 | setreg | ||
467 | GHASH_MUL_AVX reg_i, \T2, \T1, \T3, \T4, \T5, \T6 | ||
468 | |||
469 | .rep \num_initial_blocks | ||
470 | vpxor reg_i, reg_j, reg_j | ||
471 | GHASH_MUL_AVX reg_j, \T2, \T1, \T3, \T4, \T5, \T6 # apply GHASH on num_initial_blocks blocks | ||
472 | i = (i+1) | ||
473 | j = (j+1) | ||
474 | setreg | ||
475 | .endr | ||
476 | # XMM8 has the combined result here | ||
477 | |||
478 | vmovdqa \XMM8, TMP1(%rsp) | ||
479 | vmovdqa \XMM8, \T3 | ||
480 | |||
481 | cmp $128, %r13 | ||
482 | jl _initial_blocks_done\@ # no need for precomputed constants | ||
483 | |||
484 | ############################################################################### | ||
485 | # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i | ||
486 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
487 | vmovdqa \CTR, \XMM1 | ||
488 | vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap | ||
489 | |||
490 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
491 | vmovdqa \CTR, \XMM2 | ||
492 | vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap | ||
493 | |||
494 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
495 | vmovdqa \CTR, \XMM3 | ||
496 | vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap | ||
497 | |||
498 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
499 | vmovdqa \CTR, \XMM4 | ||
500 | vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap | ||
501 | |||
502 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
503 | vmovdqa \CTR, \XMM5 | ||
504 | vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap | ||
505 | |||
506 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
507 | vmovdqa \CTR, \XMM6 | ||
508 | vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap | ||
509 | |||
510 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
511 | vmovdqa \CTR, \XMM7 | ||
512 | vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap | ||
513 | |||
514 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
515 | vmovdqa \CTR, \XMM8 | ||
516 | vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap | ||
517 | |||
518 | vmovdqa (arg1), \T_key | ||
519 | vpxor \T_key, \XMM1, \XMM1 | ||
520 | vpxor \T_key, \XMM2, \XMM2 | ||
521 | vpxor \T_key, \XMM3, \XMM3 | ||
522 | vpxor \T_key, \XMM4, \XMM4 | ||
523 | vpxor \T_key, \XMM5, \XMM5 | ||
524 | vpxor \T_key, \XMM6, \XMM6 | ||
525 | vpxor \T_key, \XMM7, \XMM7 | ||
526 | vpxor \T_key, \XMM8, \XMM8 | ||
527 | |||
528 | i = 1 | ||
529 | setreg | ||
530 | .rep 9 # do 9 rounds | ||
531 | vmovdqa 16*i(arg1), \T_key | ||
532 | vaesenc \T_key, \XMM1, \XMM1 | ||
533 | vaesenc \T_key, \XMM2, \XMM2 | ||
534 | vaesenc \T_key, \XMM3, \XMM3 | ||
535 | vaesenc \T_key, \XMM4, \XMM4 | ||
536 | vaesenc \T_key, \XMM5, \XMM5 | ||
537 | vaesenc \T_key, \XMM6, \XMM6 | ||
538 | vaesenc \T_key, \XMM7, \XMM7 | ||
539 | vaesenc \T_key, \XMM8, \XMM8 | ||
540 | i = (i+1) | ||
541 | setreg | ||
542 | .endr | ||
543 | |||
544 | |||
545 | vmovdqa 16*i(arg1), \T_key | ||
546 | vaesenclast \T_key, \XMM1, \XMM1 | ||
547 | vaesenclast \T_key, \XMM2, \XMM2 | ||
548 | vaesenclast \T_key, \XMM3, \XMM3 | ||
549 | vaesenclast \T_key, \XMM4, \XMM4 | ||
550 | vaesenclast \T_key, \XMM5, \XMM5 | ||
551 | vaesenclast \T_key, \XMM6, \XMM6 | ||
552 | vaesenclast \T_key, \XMM7, \XMM7 | ||
553 | vaesenclast \T_key, \XMM8, \XMM8 | ||
554 | |||
555 | vmovdqu (arg3, %r11), \T1 | ||
556 | vpxor \T1, \XMM1, \XMM1 | ||
557 | vmovdqu \XMM1, (arg2 , %r11) | ||
558 | .if \ENC_DEC == DEC | ||
559 | vmovdqa \T1, \XMM1 | ||
560 | .endif | ||
561 | |||
562 | vmovdqu 16*1(arg3, %r11), \T1 | ||
563 | vpxor \T1, \XMM2, \XMM2 | ||
564 | vmovdqu \XMM2, 16*1(arg2 , %r11) | ||
565 | .if \ENC_DEC == DEC | ||
566 | vmovdqa \T1, \XMM2 | ||
567 | .endif | ||
568 | |||
569 | vmovdqu 16*2(arg3, %r11), \T1 | ||
570 | vpxor \T1, \XMM3, \XMM3 | ||
571 | vmovdqu \XMM3, 16*2(arg2 , %r11) | ||
572 | .if \ENC_DEC == DEC | ||
573 | vmovdqa \T1, \XMM3 | ||
574 | .endif | ||
575 | |||
576 | vmovdqu 16*3(arg3, %r11), \T1 | ||
577 | vpxor \T1, \XMM4, \XMM4 | ||
578 | vmovdqu \XMM4, 16*3(arg2 , %r11) | ||
579 | .if \ENC_DEC == DEC | ||
580 | vmovdqa \T1, \XMM4 | ||
581 | .endif | ||
582 | |||
583 | vmovdqu 16*4(arg3, %r11), \T1 | ||
584 | vpxor \T1, \XMM5, \XMM5 | ||
585 | vmovdqu \XMM5, 16*4(arg2 , %r11) | ||
586 | .if \ENC_DEC == DEC | ||
587 | vmovdqa \T1, \XMM5 | ||
588 | .endif | ||
589 | |||
590 | vmovdqu 16*5(arg3, %r11), \T1 | ||
591 | vpxor \T1, \XMM6, \XMM6 | ||
592 | vmovdqu \XMM6, 16*5(arg2 , %r11) | ||
593 | .if \ENC_DEC == DEC | ||
594 | vmovdqa \T1, \XMM6 | ||
595 | .endif | ||
596 | |||
597 | vmovdqu 16*6(arg3, %r11), \T1 | ||
598 | vpxor \T1, \XMM7, \XMM7 | ||
599 | vmovdqu \XMM7, 16*6(arg2 , %r11) | ||
600 | .if \ENC_DEC == DEC | ||
601 | vmovdqa \T1, \XMM7 | ||
602 | .endif | ||
603 | |||
604 | vmovdqu 16*7(arg3, %r11), \T1 | ||
605 | vpxor \T1, \XMM8, \XMM8 | ||
606 | vmovdqu \XMM8, 16*7(arg2 , %r11) | ||
607 | .if \ENC_DEC == DEC | ||
608 | vmovdqa \T1, \XMM8 | ||
609 | .endif | ||
610 | |||
611 | add $128, %r11 | ||
612 | |||
613 | vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap | ||
614 | vpxor TMP1(%rsp), \XMM1, \XMM1 # combine GHASHed value with the corresponding ciphertext | ||
615 | vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap | ||
616 | vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap | ||
617 | vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap | ||
618 | vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap | ||
619 | vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap | ||
620 | vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap | ||
621 | vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap | ||
622 | |||
623 | ############################################################################### | ||
624 | |||
625 | _initial_blocks_done\@: | ||
626 | |||
627 | .endm | ||
628 | |||
629 | # encrypt 8 blocks at a time | ||
630 | # ghash the 8 previously encrypted ciphertext blocks | ||
631 | # arg1, arg2, arg3 are used as pointers only, not modified | ||
632 | # r11 is the data offset value | ||
633 | .macro GHASH_8_ENCRYPT_8_PARALLEL_AVX T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC | ||
634 | |||
635 | vmovdqa \XMM1, \T2 | ||
636 | vmovdqa \XMM2, TMP2(%rsp) | ||
637 | vmovdqa \XMM3, TMP3(%rsp) | ||
638 | vmovdqa \XMM4, TMP4(%rsp) | ||
639 | vmovdqa \XMM5, TMP5(%rsp) | ||
640 | vmovdqa \XMM6, TMP6(%rsp) | ||
641 | vmovdqa \XMM7, TMP7(%rsp) | ||
642 | vmovdqa \XMM8, TMP8(%rsp) | ||
643 | |||
644 | .if \loop_idx == in_order | ||
645 | vpaddd ONE(%rip), \CTR, \XMM1 # INCR CNT | ||
646 | vpaddd ONE(%rip), \XMM1, \XMM2 | ||
647 | vpaddd ONE(%rip), \XMM2, \XMM3 | ||
648 | vpaddd ONE(%rip), \XMM3, \XMM4 | ||
649 | vpaddd ONE(%rip), \XMM4, \XMM5 | ||
650 | vpaddd ONE(%rip), \XMM5, \XMM6 | ||
651 | vpaddd ONE(%rip), \XMM6, \XMM7 | ||
652 | vpaddd ONE(%rip), \XMM7, \XMM8 | ||
653 | vmovdqa \XMM8, \CTR | ||
654 | |||
655 | vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap | ||
656 | vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap | ||
657 | vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap | ||
658 | vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap | ||
659 | vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap | ||
660 | vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap | ||
661 | vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap | ||
662 | vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap | ||
663 | .else | ||
664 | vpaddd ONEf(%rip), \CTR, \XMM1 # INCR CNT | ||
665 | vpaddd ONEf(%rip), \XMM1, \XMM2 | ||
666 | vpaddd ONEf(%rip), \XMM2, \XMM3 | ||
667 | vpaddd ONEf(%rip), \XMM3, \XMM4 | ||
668 | vpaddd ONEf(%rip), \XMM4, \XMM5 | ||
669 | vpaddd ONEf(%rip), \XMM5, \XMM6 | ||
670 | vpaddd ONEf(%rip), \XMM6, \XMM7 | ||
671 | vpaddd ONEf(%rip), \XMM7, \XMM8 | ||
672 | vmovdqa \XMM8, \CTR | ||
673 | .endif | ||
674 | |||
675 | |||
676 | ####################################################################### | ||
677 | |||
678 | vmovdqu (arg1), \T1 | ||
679 | vpxor \T1, \XMM1, \XMM1 | ||
680 | vpxor \T1, \XMM2, \XMM2 | ||
681 | vpxor \T1, \XMM3, \XMM3 | ||
682 | vpxor \T1, \XMM4, \XMM4 | ||
683 | vpxor \T1, \XMM5, \XMM5 | ||
684 | vpxor \T1, \XMM6, \XMM6 | ||
685 | vpxor \T1, \XMM7, \XMM7 | ||
686 | vpxor \T1, \XMM8, \XMM8 | ||
687 | |||
688 | ####################################################################### | ||
689 | |||
690 | |||
691 | |||
692 | |||
693 | |||
694 | vmovdqu 16*1(arg1), \T1 | ||
695 | vaesenc \T1, \XMM1, \XMM1 | ||
696 | vaesenc \T1, \XMM2, \XMM2 | ||
697 | vaesenc \T1, \XMM3, \XMM3 | ||
698 | vaesenc \T1, \XMM4, \XMM4 | ||
699 | vaesenc \T1, \XMM5, \XMM5 | ||
700 | vaesenc \T1, \XMM6, \XMM6 | ||
701 | vaesenc \T1, \XMM7, \XMM7 | ||
702 | vaesenc \T1, \XMM8, \XMM8 | ||
703 | |||
704 | vmovdqu 16*2(arg1), \T1 | ||
705 | vaesenc \T1, \XMM1, \XMM1 | ||
706 | vaesenc \T1, \XMM2, \XMM2 | ||
707 | vaesenc \T1, \XMM3, \XMM3 | ||
708 | vaesenc \T1, \XMM4, \XMM4 | ||
709 | vaesenc \T1, \XMM5, \XMM5 | ||
710 | vaesenc \T1, \XMM6, \XMM6 | ||
711 | vaesenc \T1, \XMM7, \XMM7 | ||
712 | vaesenc \T1, \XMM8, \XMM8 | ||
713 | |||
714 | |||
715 | ####################################################################### | ||
716 | |||
717 | vmovdqa HashKey_8(arg1), \T5 | ||
718 | vpclmulqdq $0x11, \T5, \T2, \T4 # T4 = a1*b1 | ||
719 | vpclmulqdq $0x00, \T5, \T2, \T7 # T7 = a0*b0 | ||
720 | |||
721 | vpshufd $0b01001110, \T2, \T6 | ||
722 | vpxor \T2, \T6, \T6 | ||
723 | |||
724 | vmovdqa HashKey_8_k(arg1), \T5 | ||
725 | vpclmulqdq $0x00, \T5, \T6, \T6 | ||
726 | |||
727 | vmovdqu 16*3(arg1), \T1 | ||
728 | vaesenc \T1, \XMM1, \XMM1 | ||
729 | vaesenc \T1, \XMM2, \XMM2 | ||
730 | vaesenc \T1, \XMM3, \XMM3 | ||
731 | vaesenc \T1, \XMM4, \XMM4 | ||
732 | vaesenc \T1, \XMM5, \XMM5 | ||
733 | vaesenc \T1, \XMM6, \XMM6 | ||
734 | vaesenc \T1, \XMM7, \XMM7 | ||
735 | vaesenc \T1, \XMM8, \XMM8 | ||
736 | |||
737 | vmovdqa TMP2(%rsp), \T1 | ||
738 | vmovdqa HashKey_7(arg1), \T5 | ||
739 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
740 | vpxor \T3, \T4, \T4 | ||
741 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
742 | vpxor \T3, \T7, \T7 | ||
743 | |||
744 | vpshufd $0b01001110, \T1, \T3 | ||
745 | vpxor \T1, \T3, \T3 | ||
746 | vmovdqa HashKey_7_k(arg1), \T5 | ||
747 | vpclmulqdq $0x10, \T5, \T3, \T3 | ||
748 | vpxor \T3, \T6, \T6 | ||
749 | |||
750 | vmovdqu 16*4(arg1), \T1 | ||
751 | vaesenc \T1, \XMM1, \XMM1 | ||
752 | vaesenc \T1, \XMM2, \XMM2 | ||
753 | vaesenc \T1, \XMM3, \XMM3 | ||
754 | vaesenc \T1, \XMM4, \XMM4 | ||
755 | vaesenc \T1, \XMM5, \XMM5 | ||
756 | vaesenc \T1, \XMM6, \XMM6 | ||
757 | vaesenc \T1, \XMM7, \XMM7 | ||
758 | vaesenc \T1, \XMM8, \XMM8 | ||
759 | |||
760 | ####################################################################### | ||
761 | |||
762 | vmovdqa TMP3(%rsp), \T1 | ||
763 | vmovdqa HashKey_6(arg1), \T5 | ||
764 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
765 | vpxor \T3, \T4, \T4 | ||
766 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
767 | vpxor \T3, \T7, \T7 | ||
768 | |||
769 | vpshufd $0b01001110, \T1, \T3 | ||
770 | vpxor \T1, \T3, \T3 | ||
771 | vmovdqa HashKey_6_k(arg1), \T5 | ||
772 | vpclmulqdq $0x10, \T5, \T3, \T3 | ||
773 | vpxor \T3, \T6, \T6 | ||
774 | |||
775 | vmovdqu 16*5(arg1), \T1 | ||
776 | vaesenc \T1, \XMM1, \XMM1 | ||
777 | vaesenc \T1, \XMM2, \XMM2 | ||
778 | vaesenc \T1, \XMM3, \XMM3 | ||
779 | vaesenc \T1, \XMM4, \XMM4 | ||
780 | vaesenc \T1, \XMM5, \XMM5 | ||
781 | vaesenc \T1, \XMM6, \XMM6 | ||
782 | vaesenc \T1, \XMM7, \XMM7 | ||
783 | vaesenc \T1, \XMM8, \XMM8 | ||
784 | |||
785 | vmovdqa TMP4(%rsp), \T1 | ||
786 | vmovdqa HashKey_5(arg1), \T5 | ||
787 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
788 | vpxor \T3, \T4, \T4 | ||
789 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
790 | vpxor \T3, \T7, \T7 | ||
791 | |||
792 | vpshufd $0b01001110, \T1, \T3 | ||
793 | vpxor \T1, \T3, \T3 | ||
794 | vmovdqa HashKey_5_k(arg1), \T5 | ||
795 | vpclmulqdq $0x10, \T5, \T3, \T3 | ||
796 | vpxor \T3, \T6, \T6 | ||
797 | |||
798 | vmovdqu 16*6(arg1), \T1 | ||
799 | vaesenc \T1, \XMM1, \XMM1 | ||
800 | vaesenc \T1, \XMM2, \XMM2 | ||
801 | vaesenc \T1, \XMM3, \XMM3 | ||
802 | vaesenc \T1, \XMM4, \XMM4 | ||
803 | vaesenc \T1, \XMM5, \XMM5 | ||
804 | vaesenc \T1, \XMM6, \XMM6 | ||
805 | vaesenc \T1, \XMM7, \XMM7 | ||
806 | vaesenc \T1, \XMM8, \XMM8 | ||
807 | |||
808 | |||
809 | vmovdqa TMP5(%rsp), \T1 | ||
810 | vmovdqa HashKey_4(arg1), \T5 | ||
811 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
812 | vpxor \T3, \T4, \T4 | ||
813 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
814 | vpxor \T3, \T7, \T7 | ||
815 | |||
816 | vpshufd $0b01001110, \T1, \T3 | ||
817 | vpxor \T1, \T3, \T3 | ||
818 | vmovdqa HashKey_4_k(arg1), \T5 | ||
819 | vpclmulqdq $0x10, \T5, \T3, \T3 | ||
820 | vpxor \T3, \T6, \T6 | ||
821 | |||
822 | vmovdqu 16*7(arg1), \T1 | ||
823 | vaesenc \T1, \XMM1, \XMM1 | ||
824 | vaesenc \T1, \XMM2, \XMM2 | ||
825 | vaesenc \T1, \XMM3, \XMM3 | ||
826 | vaesenc \T1, \XMM4, \XMM4 | ||
827 | vaesenc \T1, \XMM5, \XMM5 | ||
828 | vaesenc \T1, \XMM6, \XMM6 | ||
829 | vaesenc \T1, \XMM7, \XMM7 | ||
830 | vaesenc \T1, \XMM8, \XMM8 | ||
831 | |||
832 | vmovdqa TMP6(%rsp), \T1 | ||
833 | vmovdqa HashKey_3(arg1), \T5 | ||
834 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
835 | vpxor \T3, \T4, \T4 | ||
836 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
837 | vpxor \T3, \T7, \T7 | ||
838 | |||
839 | vpshufd $0b01001110, \T1, \T3 | ||
840 | vpxor \T1, \T3, \T3 | ||
841 | vmovdqa HashKey_3_k(arg1), \T5 | ||
842 | vpclmulqdq $0x10, \T5, \T3, \T3 | ||
843 | vpxor \T3, \T6, \T6 | ||
844 | |||
845 | |||
846 | vmovdqu 16*8(arg1), \T1 | ||
847 | vaesenc \T1, \XMM1, \XMM1 | ||
848 | vaesenc \T1, \XMM2, \XMM2 | ||
849 | vaesenc \T1, \XMM3, \XMM3 | ||
850 | vaesenc \T1, \XMM4, \XMM4 | ||
851 | vaesenc \T1, \XMM5, \XMM5 | ||
852 | vaesenc \T1, \XMM6, \XMM6 | ||
853 | vaesenc \T1, \XMM7, \XMM7 | ||
854 | vaesenc \T1, \XMM8, \XMM8 | ||
855 | |||
856 | vmovdqa TMP7(%rsp), \T1 | ||
857 | vmovdqa HashKey_2(arg1), \T5 | ||
858 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
859 | vpxor \T3, \T4, \T4 | ||
860 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
861 | vpxor \T3, \T7, \T7 | ||
862 | |||
863 | vpshufd $0b01001110, \T1, \T3 | ||
864 | vpxor \T1, \T3, \T3 | ||
865 | vmovdqa HashKey_2_k(arg1), \T5 | ||
866 | vpclmulqdq $0x10, \T5, \T3, \T3 | ||
867 | vpxor \T3, \T6, \T6 | ||
868 | |||
869 | ####################################################################### | ||
870 | |||
871 | vmovdqu 16*9(arg1), \T5 | ||
872 | vaesenc \T5, \XMM1, \XMM1 | ||
873 | vaesenc \T5, \XMM2, \XMM2 | ||
874 | vaesenc \T5, \XMM3, \XMM3 | ||
875 | vaesenc \T5, \XMM4, \XMM4 | ||
876 | vaesenc \T5, \XMM5, \XMM5 | ||
877 | vaesenc \T5, \XMM6, \XMM6 | ||
878 | vaesenc \T5, \XMM7, \XMM7 | ||
879 | vaesenc \T5, \XMM8, \XMM8 | ||
880 | |||
881 | vmovdqa TMP8(%rsp), \T1 | ||
882 | vmovdqa HashKey(arg1), \T5 | ||
883 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
884 | vpxor \T3, \T4, \T4 | ||
885 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
886 | vpxor \T3, \T7, \T7 | ||
887 | |||
888 | vpshufd $0b01001110, \T1, \T3 | ||
889 | vpxor \T1, \T3, \T3 | ||
890 | vmovdqa HashKey_k(arg1), \T5 | ||
891 | vpclmulqdq $0x10, \T5, \T3, \T3 | ||
892 | vpxor \T3, \T6, \T6 | ||
893 | |||
894 | vpxor \T4, \T6, \T6 | ||
895 | vpxor \T7, \T6, \T6 | ||
896 | |||
897 | vmovdqu 16*10(arg1), \T5 | ||
898 | |||
899 | i = 0 | ||
900 | j = 1 | ||
901 | setreg | ||
902 | .rep 8 | ||
903 | vpxor 16*i(arg3, %r11), \T5, \T2 | ||
904 | .if \ENC_DEC == ENC | ||
905 | vaesenclast \T2, reg_j, reg_j | ||
906 | .else | ||
907 | vaesenclast \T2, reg_j, \T3 | ||
908 | vmovdqu 16*i(arg3, %r11), reg_j | ||
909 | vmovdqu \T3, 16*i(arg2, %r11) | ||
910 | .endif | ||
911 | i = (i+1) | ||
912 | j = (j+1) | ||
913 | setreg | ||
914 | .endr | ||
915 | ####################################################################### | ||
916 | |||
917 | |||
918 | vpslldq $8, \T6, \T3 # shift-L T3 2 DWs | ||
919 | vpsrldq $8, \T6, \T6 # shift-R T2 2 DWs | ||
920 | vpxor \T3, \T7, \T7 | ||
921 | vpxor \T4, \T6, \T6 # accumulate the results in T6:T7 | ||
922 | |||
923 | |||
924 | |||
925 | ####################################################################### | ||
926 | #first phase of the reduction | ||
927 | ####################################################################### | ||
928 | vpslld $31, \T7, \T2 # packed right shifting << 31 | ||
929 | vpslld $30, \T7, \T3 # packed right shifting shift << 30 | ||
930 | vpslld $25, \T7, \T4 # packed right shifting shift << 25 | ||
931 | |||
932 | vpxor \T3, \T2, \T2 # xor the shifted versions | ||
933 | vpxor \T4, \T2, \T2 | ||
934 | |||
935 | vpsrldq $4, \T2, \T1 # shift-R T1 1 DW | ||
936 | |||
937 | vpslldq $12, \T2, \T2 # shift-L T2 3 DWs | ||
938 | vpxor \T2, \T7, \T7 # first phase of the reduction complete | ||
939 | ####################################################################### | ||
940 | .if \ENC_DEC == ENC | ||
941 | vmovdqu \XMM1, 16*0(arg2,%r11) # Write to the Ciphertext buffer | ||
942 | vmovdqu \XMM2, 16*1(arg2,%r11) # Write to the Ciphertext buffer | ||
943 | vmovdqu \XMM3, 16*2(arg2,%r11) # Write to the Ciphertext buffer | ||
944 | vmovdqu \XMM4, 16*3(arg2,%r11) # Write to the Ciphertext buffer | ||
945 | vmovdqu \XMM5, 16*4(arg2,%r11) # Write to the Ciphertext buffer | ||
946 | vmovdqu \XMM6, 16*5(arg2,%r11) # Write to the Ciphertext buffer | ||
947 | vmovdqu \XMM7, 16*6(arg2,%r11) # Write to the Ciphertext buffer | ||
948 | vmovdqu \XMM8, 16*7(arg2,%r11) # Write to the Ciphertext buffer | ||
949 | .endif | ||
950 | |||
951 | ####################################################################### | ||
952 | #second phase of the reduction | ||
953 | vpsrld $1, \T7, \T2 # packed left shifting >> 1 | ||
954 | vpsrld $2, \T7, \T3 # packed left shifting >> 2 | ||
955 | vpsrld $7, \T7, \T4 # packed left shifting >> 7 | ||
956 | vpxor \T3, \T2, \T2 # xor the shifted versions | ||
957 | vpxor \T4, \T2, \T2 | ||
958 | |||
959 | vpxor \T1, \T2, \T2 | ||
960 | vpxor \T2, \T7, \T7 | ||
961 | vpxor \T7, \T6, \T6 # the result is in T6 | ||
962 | ####################################################################### | ||
963 | |||
964 | vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap | ||
965 | vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap | ||
966 | vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap | ||
967 | vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap | ||
968 | vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap | ||
969 | vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap | ||
970 | vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap | ||
971 | vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap | ||
972 | |||
973 | |||
974 | vpxor \T6, \XMM1, \XMM1 | ||
975 | |||
976 | |||
977 | |||
978 | .endm | ||
979 | |||
980 | |||
981 | # GHASH the last 4 ciphertext blocks. | ||
982 | .macro GHASH_LAST_8_AVX T1 T2 T3 T4 T5 T6 T7 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 | ||
983 | |||
984 | ## Karatsuba Method | ||
985 | |||
986 | |||
987 | vpshufd $0b01001110, \XMM1, \T2 | ||
988 | vpxor \XMM1, \T2, \T2 | ||
989 | vmovdqa HashKey_8(arg1), \T5 | ||
990 | vpclmulqdq $0x11, \T5, \XMM1, \T6 | ||
991 | vpclmulqdq $0x00, \T5, \XMM1, \T7 | ||
992 | |||
993 | vmovdqa HashKey_8_k(arg1), \T3 | ||
994 | vpclmulqdq $0x00, \T3, \T2, \XMM1 | ||
995 | |||
996 | ###################### | ||
997 | |||
998 | vpshufd $0b01001110, \XMM2, \T2 | ||
999 | vpxor \XMM2, \T2, \T2 | ||
1000 | vmovdqa HashKey_7(arg1), \T5 | ||
1001 | vpclmulqdq $0x11, \T5, \XMM2, \T4 | ||
1002 | vpxor \T4, \T6, \T6 | ||
1003 | |||
1004 | vpclmulqdq $0x00, \T5, \XMM2, \T4 | ||
1005 | vpxor \T4, \T7, \T7 | ||
1006 | |||
1007 | vmovdqa HashKey_7_k(arg1), \T3 | ||
1008 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
1009 | vpxor \T2, \XMM1, \XMM1 | ||
1010 | |||
1011 | ###################### | ||
1012 | |||
1013 | vpshufd $0b01001110, \XMM3, \T2 | ||
1014 | vpxor \XMM3, \T2, \T2 | ||
1015 | vmovdqa HashKey_6(arg1), \T5 | ||
1016 | vpclmulqdq $0x11, \T5, \XMM3, \T4 | ||
1017 | vpxor \T4, \T6, \T6 | ||
1018 | |||
1019 | vpclmulqdq $0x00, \T5, \XMM3, \T4 | ||
1020 | vpxor \T4, \T7, \T7 | ||
1021 | |||
1022 | vmovdqa HashKey_6_k(arg1), \T3 | ||
1023 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
1024 | vpxor \T2, \XMM1, \XMM1 | ||
1025 | |||
1026 | ###################### | ||
1027 | |||
1028 | vpshufd $0b01001110, \XMM4, \T2 | ||
1029 | vpxor \XMM4, \T2, \T2 | ||
1030 | vmovdqa HashKey_5(arg1), \T5 | ||
1031 | vpclmulqdq $0x11, \T5, \XMM4, \T4 | ||
1032 | vpxor \T4, \T6, \T6 | ||
1033 | |||
1034 | vpclmulqdq $0x00, \T5, \XMM4, \T4 | ||
1035 | vpxor \T4, \T7, \T7 | ||
1036 | |||
1037 | vmovdqa HashKey_5_k(arg1), \T3 | ||
1038 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
1039 | vpxor \T2, \XMM1, \XMM1 | ||
1040 | |||
1041 | ###################### | ||
1042 | |||
1043 | vpshufd $0b01001110, \XMM5, \T2 | ||
1044 | vpxor \XMM5, \T2, \T2 | ||
1045 | vmovdqa HashKey_4(arg1), \T5 | ||
1046 | vpclmulqdq $0x11, \T5, \XMM5, \T4 | ||
1047 | vpxor \T4, \T6, \T6 | ||
1048 | |||
1049 | vpclmulqdq $0x00, \T5, \XMM5, \T4 | ||
1050 | vpxor \T4, \T7, \T7 | ||
1051 | |||
1052 | vmovdqa HashKey_4_k(arg1), \T3 | ||
1053 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
1054 | vpxor \T2, \XMM1, \XMM1 | ||
1055 | |||
1056 | ###################### | ||
1057 | |||
1058 | vpshufd $0b01001110, \XMM6, \T2 | ||
1059 | vpxor \XMM6, \T2, \T2 | ||
1060 | vmovdqa HashKey_3(arg1), \T5 | ||
1061 | vpclmulqdq $0x11, \T5, \XMM6, \T4 | ||
1062 | vpxor \T4, \T6, \T6 | ||
1063 | |||
1064 | vpclmulqdq $0x00, \T5, \XMM6, \T4 | ||
1065 | vpxor \T4, \T7, \T7 | ||
1066 | |||
1067 | vmovdqa HashKey_3_k(arg1), \T3 | ||
1068 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
1069 | vpxor \T2, \XMM1, \XMM1 | ||
1070 | |||
1071 | ###################### | ||
1072 | |||
1073 | vpshufd $0b01001110, \XMM7, \T2 | ||
1074 | vpxor \XMM7, \T2, \T2 | ||
1075 | vmovdqa HashKey_2(arg1), \T5 | ||
1076 | vpclmulqdq $0x11, \T5, \XMM7, \T4 | ||
1077 | vpxor \T4, \T6, \T6 | ||
1078 | |||
1079 | vpclmulqdq $0x00, \T5, \XMM7, \T4 | ||
1080 | vpxor \T4, \T7, \T7 | ||
1081 | |||
1082 | vmovdqa HashKey_2_k(arg1), \T3 | ||
1083 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
1084 | vpxor \T2, \XMM1, \XMM1 | ||
1085 | |||
1086 | ###################### | ||
1087 | |||
1088 | vpshufd $0b01001110, \XMM8, \T2 | ||
1089 | vpxor \XMM8, \T2, \T2 | ||
1090 | vmovdqa HashKey(arg1), \T5 | ||
1091 | vpclmulqdq $0x11, \T5, \XMM8, \T4 | ||
1092 | vpxor \T4, \T6, \T6 | ||
1093 | |||
1094 | vpclmulqdq $0x00, \T5, \XMM8, \T4 | ||
1095 | vpxor \T4, \T7, \T7 | ||
1096 | |||
1097 | vmovdqa HashKey_k(arg1), \T3 | ||
1098 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
1099 | |||
1100 | vpxor \T2, \XMM1, \XMM1 | ||
1101 | vpxor \T6, \XMM1, \XMM1 | ||
1102 | vpxor \T7, \XMM1, \T2 | ||
1103 | |||
1104 | |||
1105 | |||
1106 | |||
1107 | vpslldq $8, \T2, \T4 | ||
1108 | vpsrldq $8, \T2, \T2 | ||
1109 | |||
1110 | vpxor \T4, \T7, \T7 | ||
1111 | vpxor \T2, \T6, \T6 # <T6:T7> holds the result of | ||
1112 | # the accumulated carry-less multiplications | ||
1113 | |||
1114 | ####################################################################### | ||
1115 | #first phase of the reduction | ||
1116 | vpslld $31, \T7, \T2 # packed right shifting << 31 | ||
1117 | vpslld $30, \T7, \T3 # packed right shifting shift << 30 | ||
1118 | vpslld $25, \T7, \T4 # packed right shifting shift << 25 | ||
1119 | |||
1120 | vpxor \T3, \T2, \T2 # xor the shifted versions | ||
1121 | vpxor \T4, \T2, \T2 | ||
1122 | |||
1123 | vpsrldq $4, \T2, \T1 # shift-R T1 1 DW | ||
1124 | |||
1125 | vpslldq $12, \T2, \T2 # shift-L T2 3 DWs | ||
1126 | vpxor \T2, \T7, \T7 # first phase of the reduction complete | ||
1127 | ####################################################################### | ||
1128 | |||
1129 | |||
1130 | #second phase of the reduction | ||
1131 | vpsrld $1, \T7, \T2 # packed left shifting >> 1 | ||
1132 | vpsrld $2, \T7, \T3 # packed left shifting >> 2 | ||
1133 | vpsrld $7, \T7, \T4 # packed left shifting >> 7 | ||
1134 | vpxor \T3, \T2, \T2 # xor the shifted versions | ||
1135 | vpxor \T4, \T2, \T2 | ||
1136 | |||
1137 | vpxor \T1, \T2, \T2 | ||
1138 | vpxor \T2, \T7, \T7 | ||
1139 | vpxor \T7, \T6, \T6 # the result is in T6 | ||
1140 | |||
1141 | .endm | ||
1142 | |||
1143 | |||
1144 | # combined for GCM encrypt and decrypt functions | ||
1145 | # clobbering all xmm registers | ||
1146 | # clobbering r10, r11, r12, r13, r14, r15 | ||
1147 | .macro GCM_ENC_DEC_AVX ENC_DEC | ||
1148 | |||
1149 | #the number of pushes must equal STACK_OFFSET | ||
1150 | push %r12 | ||
1151 | push %r13 | ||
1152 | push %r14 | ||
1153 | push %r15 | ||
1154 | |||
1155 | mov %rsp, %r14 | ||
1156 | |||
1157 | |||
1158 | |||
1159 | |||
1160 | sub $VARIABLE_OFFSET, %rsp | ||
1161 | and $~63, %rsp # align rsp to 64 bytes | ||
1162 | |||
1163 | |||
1164 | vmovdqu HashKey(arg1), %xmm13 # xmm13 = HashKey | ||
1165 | |||
1166 | mov arg4, %r13 # save the number of bytes of plaintext/ciphertext | ||
1167 | and $-16, %r13 # r13 = r13 - (r13 mod 16) | ||
1168 | |||
1169 | mov %r13, %r12 | ||
1170 | shr $4, %r12 | ||
1171 | and $7, %r12 | ||
1172 | jz _initial_num_blocks_is_0\@ | ||
1173 | |||
1174 | cmp $7, %r12 | ||
1175 | je _initial_num_blocks_is_7\@ | ||
1176 | cmp $6, %r12 | ||
1177 | je _initial_num_blocks_is_6\@ | ||
1178 | cmp $5, %r12 | ||
1179 | je _initial_num_blocks_is_5\@ | ||
1180 | cmp $4, %r12 | ||
1181 | je _initial_num_blocks_is_4\@ | ||
1182 | cmp $3, %r12 | ||
1183 | je _initial_num_blocks_is_3\@ | ||
1184 | cmp $2, %r12 | ||
1185 | je _initial_num_blocks_is_2\@ | ||
1186 | |||
1187 | jmp _initial_num_blocks_is_1\@ | ||
1188 | |||
1189 | _initial_num_blocks_is_7\@: | ||
1190 | INITIAL_BLOCKS_AVX 7, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
1191 | sub $16*7, %r13 | ||
1192 | jmp _initial_blocks_encrypted\@ | ||
1193 | |||
1194 | _initial_num_blocks_is_6\@: | ||
1195 | INITIAL_BLOCKS_AVX 6, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
1196 | sub $16*6, %r13 | ||
1197 | jmp _initial_blocks_encrypted\@ | ||
1198 | |||
1199 | _initial_num_blocks_is_5\@: | ||
1200 | INITIAL_BLOCKS_AVX 5, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
1201 | sub $16*5, %r13 | ||
1202 | jmp _initial_blocks_encrypted\@ | ||
1203 | |||
1204 | _initial_num_blocks_is_4\@: | ||
1205 | INITIAL_BLOCKS_AVX 4, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
1206 | sub $16*4, %r13 | ||
1207 | jmp _initial_blocks_encrypted\@ | ||
1208 | |||
1209 | _initial_num_blocks_is_3\@: | ||
1210 | INITIAL_BLOCKS_AVX 3, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
1211 | sub $16*3, %r13 | ||
1212 | jmp _initial_blocks_encrypted\@ | ||
1213 | |||
1214 | _initial_num_blocks_is_2\@: | ||
1215 | INITIAL_BLOCKS_AVX 2, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
1216 | sub $16*2, %r13 | ||
1217 | jmp _initial_blocks_encrypted\@ | ||
1218 | |||
1219 | _initial_num_blocks_is_1\@: | ||
1220 | INITIAL_BLOCKS_AVX 1, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
1221 | sub $16*1, %r13 | ||
1222 | jmp _initial_blocks_encrypted\@ | ||
1223 | |||
1224 | _initial_num_blocks_is_0\@: | ||
1225 | INITIAL_BLOCKS_AVX 0, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
1226 | |||
1227 | |||
1228 | _initial_blocks_encrypted\@: | ||
1229 | cmp $0, %r13 | ||
1230 | je _zero_cipher_left\@ | ||
1231 | |||
1232 | sub $128, %r13 | ||
1233 | je _eight_cipher_left\@ | ||
1234 | |||
1235 | |||
1236 | |||
1237 | |||
1238 | vmovd %xmm9, %r15d | ||
1239 | and $255, %r15d | ||
1240 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
1241 | |||
1242 | |||
1243 | _encrypt_by_8_new\@: | ||
1244 | cmp $(255-8), %r15d | ||
1245 | jg _encrypt_by_8\@ | ||
1246 | |||
1247 | |||
1248 | |||
1249 | add $8, %r15b | ||
1250 | GHASH_8_ENCRYPT_8_PARALLEL_AVX %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, out_order, \ENC_DEC | ||
1251 | add $128, %r11 | ||
1252 | sub $128, %r13 | ||
1253 | jne _encrypt_by_8_new\@ | ||
1254 | |||
1255 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
1256 | jmp _eight_cipher_left\@ | ||
1257 | |||
1258 | _encrypt_by_8\@: | ||
1259 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
1260 | add $8, %r15b | ||
1261 | GHASH_8_ENCRYPT_8_PARALLEL_AVX %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, in_order, \ENC_DEC | ||
1262 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
1263 | add $128, %r11 | ||
1264 | sub $128, %r13 | ||
1265 | jne _encrypt_by_8_new\@ | ||
1266 | |||
1267 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
1268 | |||
1269 | |||
1270 | |||
1271 | |||
1272 | _eight_cipher_left\@: | ||
1273 | GHASH_LAST_8_AVX %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8 | ||
1274 | |||
1275 | |||
1276 | _zero_cipher_left\@: | ||
1277 | cmp $16, arg4 | ||
1278 | jl _only_less_than_16\@ | ||
1279 | |||
1280 | mov arg4, %r13 | ||
1281 | and $15, %r13 # r13 = (arg4 mod 16) | ||
1282 | |||
1283 | je _multiple_of_16_bytes\@ | ||
1284 | |||
1285 | # handle the last <16 Byte block seperately | ||
1286 | |||
1287 | |||
1288 | vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn | ||
1289 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
1290 | ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Yn) | ||
1291 | |||
1292 | sub $16, %r11 | ||
1293 | add %r13, %r11 | ||
1294 | vmovdqu (arg3, %r11), %xmm1 # receive the last <16 Byte block | ||
1295 | |||
1296 | lea SHIFT_MASK+16(%rip), %r12 | ||
1297 | sub %r13, %r12 # adjust the shuffle mask pointer to be | ||
1298 | # able to shift 16-r13 bytes (r13 is the | ||
1299 | # number of bytes in plaintext mod 16) | ||
1300 | vmovdqu (%r12), %xmm2 # get the appropriate shuffle mask | ||
1301 | vpshufb %xmm2, %xmm1, %xmm1 # shift right 16-r13 bytes | ||
1302 | jmp _final_ghash_mul\@ | ||
1303 | |||
1304 | _only_less_than_16\@: | ||
1305 | # check for 0 length | ||
1306 | mov arg4, %r13 | ||
1307 | and $15, %r13 # r13 = (arg4 mod 16) | ||
1308 | |||
1309 | je _multiple_of_16_bytes\@ | ||
1310 | |||
1311 | # handle the last <16 Byte block seperately | ||
1312 | |||
1313 | |||
1314 | vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn | ||
1315 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
1316 | ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Yn) | ||
1317 | |||
1318 | |||
1319 | lea SHIFT_MASK+16(%rip), %r12 | ||
1320 | sub %r13, %r12 # adjust the shuffle mask pointer to be | ||
1321 | # able to shift 16-r13 bytes (r13 is the | ||
1322 | # number of bytes in plaintext mod 16) | ||
1323 | |||
1324 | _get_last_16_byte_loop\@: | ||
1325 | movb (arg3, %r11), %al | ||
1326 | movb %al, TMP1 (%rsp , %r11) | ||
1327 | add $1, %r11 | ||
1328 | cmp %r13, %r11 | ||
1329 | jne _get_last_16_byte_loop\@ | ||
1330 | |||
1331 | vmovdqu TMP1(%rsp), %xmm1 | ||
1332 | |||
1333 | sub $16, %r11 | ||
1334 | |||
1335 | _final_ghash_mul\@: | ||
1336 | .if \ENC_DEC == DEC | ||
1337 | vmovdqa %xmm1, %xmm2 | ||
1338 | vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) | ||
1339 | vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to | ||
1340 | # mask out top 16-r13 bytes of xmm9 | ||
1341 | vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9 | ||
1342 | vpand %xmm1, %xmm2, %xmm2 | ||
1343 | vpshufb SHUF_MASK(%rip), %xmm2, %xmm2 | ||
1344 | vpxor %xmm2, %xmm14, %xmm14 | ||
1345 | #GHASH computation for the last <16 Byte block | ||
1346 | GHASH_MUL_AVX %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 | ||
1347 | sub %r13, %r11 | ||
1348 | add $16, %r11 | ||
1349 | .else | ||
1350 | vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) | ||
1351 | vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to | ||
1352 | # mask out top 16-r13 bytes of xmm9 | ||
1353 | vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9 | ||
1354 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
1355 | vpxor %xmm9, %xmm14, %xmm14 | ||
1356 | #GHASH computation for the last <16 Byte block | ||
1357 | GHASH_MUL_AVX %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 | ||
1358 | sub %r13, %r11 | ||
1359 | add $16, %r11 | ||
1360 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 # shuffle xmm9 back to output as ciphertext | ||
1361 | .endif | ||
1362 | |||
1363 | |||
1364 | ############################# | ||
1365 | # output r13 Bytes | ||
1366 | vmovq %xmm9, %rax | ||
1367 | cmp $8, %r13 | ||
1368 | jle _less_than_8_bytes_left\@ | ||
1369 | |||
1370 | mov %rax, (arg2 , %r11) | ||
1371 | add $8, %r11 | ||
1372 | vpsrldq $8, %xmm9, %xmm9 | ||
1373 | vmovq %xmm9, %rax | ||
1374 | sub $8, %r13 | ||
1375 | |||
1376 | _less_than_8_bytes_left\@: | ||
1377 | movb %al, (arg2 , %r11) | ||
1378 | add $1, %r11 | ||
1379 | shr $8, %rax | ||
1380 | sub $1, %r13 | ||
1381 | jne _less_than_8_bytes_left\@ | ||
1382 | ############################# | ||
1383 | |||
1384 | _multiple_of_16_bytes\@: | ||
1385 | mov arg7, %r12 # r12 = aadLen (number of bytes) | ||
1386 | shl $3, %r12 # convert into number of bits | ||
1387 | vmovd %r12d, %xmm15 # len(A) in xmm15 | ||
1388 | |||
1389 | shl $3, arg4 # len(C) in bits (*128) | ||
1390 | vmovq arg4, %xmm1 | ||
1391 | vpslldq $8, %xmm15, %xmm15 # xmm15 = len(A)|| 0x0000000000000000 | ||
1392 | vpxor %xmm1, %xmm15, %xmm15 # xmm15 = len(A)||len(C) | ||
1393 | |||
1394 | vpxor %xmm15, %xmm14, %xmm14 | ||
1395 | GHASH_MUL_AVX %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 # final GHASH computation | ||
1396 | vpshufb SHUF_MASK(%rip), %xmm14, %xmm14 # perform a 16Byte swap | ||
1397 | |||
1398 | mov arg5, %rax # rax = *Y0 | ||
1399 | vmovdqu (%rax), %xmm9 # xmm9 = Y0 | ||
1400 | |||
1401 | ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Y0) | ||
1402 | |||
1403 | vpxor %xmm14, %xmm9, %xmm9 | ||
1404 | |||
1405 | |||
1406 | |||
1407 | _return_T\@: | ||
1408 | mov arg8, %r10 # r10 = authTag | ||
1409 | mov arg9, %r11 # r11 = auth_tag_len | ||
1410 | |||
1411 | cmp $16, %r11 | ||
1412 | je _T_16\@ | ||
1413 | |||
1414 | cmp $12, %r11 | ||
1415 | je _T_12\@ | ||
1416 | |||
1417 | _T_8\@: | ||
1418 | vmovq %xmm9, %rax | ||
1419 | mov %rax, (%r10) | ||
1420 | jmp _return_T_done\@ | ||
1421 | _T_12\@: | ||
1422 | vmovq %xmm9, %rax | ||
1423 | mov %rax, (%r10) | ||
1424 | vpsrldq $8, %xmm9, %xmm9 | ||
1425 | vmovd %xmm9, %eax | ||
1426 | mov %eax, 8(%r10) | ||
1427 | jmp _return_T_done\@ | ||
1428 | |||
1429 | _T_16\@: | ||
1430 | vmovdqu %xmm9, (%r10) | ||
1431 | |||
1432 | _return_T_done\@: | ||
1433 | mov %r14, %rsp | ||
1434 | |||
1435 | pop %r15 | ||
1436 | pop %r14 | ||
1437 | pop %r13 | ||
1438 | pop %r12 | ||
1439 | .endm | ||
1440 | |||
1441 | |||
1442 | ############################################################# | ||
1443 | #void aesni_gcm_precomp_avx_gen2 | ||
1444 | # (gcm_data *my_ctx_data, | ||
1445 | # u8 *hash_subkey)# /* H, the Hash sub key input. Data starts on a 16-byte boundary. */ | ||
1446 | ############################################################# | ||
1447 | ENTRY(aesni_gcm_precomp_avx_gen2) | ||
1448 | #the number of pushes must equal STACK_OFFSET | ||
1449 | push %r12 | ||
1450 | push %r13 | ||
1451 | push %r14 | ||
1452 | push %r15 | ||
1453 | |||
1454 | mov %rsp, %r14 | ||
1455 | |||
1456 | |||
1457 | |||
1458 | sub $VARIABLE_OFFSET, %rsp | ||
1459 | and $~63, %rsp # align rsp to 64 bytes | ||
1460 | |||
1461 | vmovdqu (arg2), %xmm6 # xmm6 = HashKey | ||
1462 | |||
1463 | vpshufb SHUF_MASK(%rip), %xmm6, %xmm6 | ||
1464 | ############### PRECOMPUTATION of HashKey<<1 mod poly from the HashKey | ||
1465 | vmovdqa %xmm6, %xmm2 | ||
1466 | vpsllq $1, %xmm6, %xmm6 | ||
1467 | vpsrlq $63, %xmm2, %xmm2 | ||
1468 | vmovdqa %xmm2, %xmm1 | ||
1469 | vpslldq $8, %xmm2, %xmm2 | ||
1470 | vpsrldq $8, %xmm1, %xmm1 | ||
1471 | vpor %xmm2, %xmm6, %xmm6 | ||
1472 | #reduction | ||
1473 | vpshufd $0b00100100, %xmm1, %xmm2 | ||
1474 | vpcmpeqd TWOONE(%rip), %xmm2, %xmm2 | ||
1475 | vpand POLY(%rip), %xmm2, %xmm2 | ||
1476 | vpxor %xmm2, %xmm6, %xmm6 # xmm6 holds the HashKey<<1 mod poly | ||
1477 | ####################################################################### | ||
1478 | vmovdqa %xmm6, HashKey(arg1) # store HashKey<<1 mod poly | ||
1479 | |||
1480 | |||
1481 | PRECOMPUTE_AVX %xmm6, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5 | ||
1482 | |||
1483 | mov %r14, %rsp | ||
1484 | |||
1485 | pop %r15 | ||
1486 | pop %r14 | ||
1487 | pop %r13 | ||
1488 | pop %r12 | ||
1489 | ret | ||
1490 | ENDPROC(aesni_gcm_precomp_avx_gen2) | ||
1491 | |||
1492 | ############################################################################### | ||
1493 | #void aesni_gcm_enc_avx_gen2( | ||
1494 | # gcm_data *my_ctx_data, /* aligned to 16 Bytes */ | ||
1495 | # u8 *out, /* Ciphertext output. Encrypt in-place is allowed. */ | ||
1496 | # const u8 *in, /* Plaintext input */ | ||
1497 | # u64 plaintext_len, /* Length of data in Bytes for encryption. */ | ||
1498 | # u8 *iv, /* Pre-counter block j0: 4 byte salt | ||
1499 | # (from Security Association) concatenated with 8 byte | ||
1500 | # Initialisation Vector (from IPSec ESP Payload) | ||
1501 | # concatenated with 0x00000001. 16-byte aligned pointer. */ | ||
1502 | # const u8 *aad, /* Additional Authentication Data (AAD)*/ | ||
1503 | # u64 aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */ | ||
1504 | # u8 *auth_tag, /* Authenticated Tag output. */ | ||
1505 | # u64 auth_tag_len)# /* Authenticated Tag Length in bytes. | ||
1506 | # Valid values are 16 (most likely), 12 or 8. */ | ||
1507 | ############################################################################### | ||
1508 | ENTRY(aesni_gcm_enc_avx_gen2) | ||
1509 | GCM_ENC_DEC_AVX ENC | ||
1510 | ret | ||
1511 | ENDPROC(aesni_gcm_enc_avx_gen2) | ||
1512 | |||
1513 | ############################################################################### | ||
1514 | #void aesni_gcm_dec_avx_gen2( | ||
1515 | # gcm_data *my_ctx_data, /* aligned to 16 Bytes */ | ||
1516 | # u8 *out, /* Plaintext output. Decrypt in-place is allowed. */ | ||
1517 | # const u8 *in, /* Ciphertext input */ | ||
1518 | # u64 plaintext_len, /* Length of data in Bytes for encryption. */ | ||
1519 | # u8 *iv, /* Pre-counter block j0: 4 byte salt | ||
1520 | # (from Security Association) concatenated with 8 byte | ||
1521 | # Initialisation Vector (from IPSec ESP Payload) | ||
1522 | # concatenated with 0x00000001. 16-byte aligned pointer. */ | ||
1523 | # const u8 *aad, /* Additional Authentication Data (AAD)*/ | ||
1524 | # u64 aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */ | ||
1525 | # u8 *auth_tag, /* Authenticated Tag output. */ | ||
1526 | # u64 auth_tag_len)# /* Authenticated Tag Length in bytes. | ||
1527 | # Valid values are 16 (most likely), 12 or 8. */ | ||
1528 | ############################################################################### | ||
1529 | ENTRY(aesni_gcm_dec_avx_gen2) | ||
1530 | GCM_ENC_DEC_AVX DEC | ||
1531 | ret | ||
1532 | ENDPROC(aesni_gcm_dec_avx_gen2) | ||
1533 | #endif /* CONFIG_AS_AVX */ | ||
1534 | |||
1535 | #ifdef CONFIG_AS_AVX2 | ||
1536 | ############################################################################### | ||
1537 | # GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) | ||
1538 | # Input: A and B (128-bits each, bit-reflected) | ||
1539 | # Output: C = A*B*x mod poly, (i.e. >>1 ) | ||
1540 | # To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input | ||
1541 | # GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly. | ||
1542 | ############################################################################### | ||
1543 | .macro GHASH_MUL_AVX2 GH HK T1 T2 T3 T4 T5 | ||
1544 | |||
1545 | vpclmulqdq $0x11,\HK,\GH,\T1 # T1 = a1*b1 | ||
1546 | vpclmulqdq $0x00,\HK,\GH,\T2 # T2 = a0*b0 | ||
1547 | vpclmulqdq $0x01,\HK,\GH,\T3 # T3 = a1*b0 | ||
1548 | vpclmulqdq $0x10,\HK,\GH,\GH # GH = a0*b1 | ||
1549 | vpxor \T3, \GH, \GH | ||
1550 | |||
1551 | |||
1552 | vpsrldq $8 , \GH, \T3 # shift-R GH 2 DWs | ||
1553 | vpslldq $8 , \GH, \GH # shift-L GH 2 DWs | ||
1554 | |||
1555 | vpxor \T3, \T1, \T1 | ||
1556 | vpxor \T2, \GH, \GH | ||
1557 | |||
1558 | ####################################################################### | ||
1559 | #first phase of the reduction | ||
1560 | vmovdqa POLY2(%rip), \T3 | ||
1561 | |||
1562 | vpclmulqdq $0x01, \GH, \T3, \T2 | ||
1563 | vpslldq $8, \T2, \T2 # shift-L T2 2 DWs | ||
1564 | |||
1565 | vpxor \T2, \GH, \GH # first phase of the reduction complete | ||
1566 | ####################################################################### | ||
1567 | #second phase of the reduction | ||
1568 | vpclmulqdq $0x00, \GH, \T3, \T2 | ||
1569 | vpsrldq $4, \T2, \T2 # shift-R T2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R) | ||
1570 | |||
1571 | vpclmulqdq $0x10, \GH, \T3, \GH | ||
1572 | vpslldq $4, \GH, \GH # shift-L GH 1 DW (Shift-L 1-DW to obtain result with no shifts) | ||
1573 | |||
1574 | vpxor \T2, \GH, \GH # second phase of the reduction complete | ||
1575 | ####################################################################### | ||
1576 | vpxor \T1, \GH, \GH # the result is in GH | ||
1577 | |||
1578 | |||
1579 | .endm | ||
1580 | |||
1581 | .macro PRECOMPUTE_AVX2 HK T1 T2 T3 T4 T5 T6 | ||
1582 | |||
1583 | # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i | ||
1584 | vmovdqa \HK, \T5 | ||
1585 | GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^2<<1 mod poly | ||
1586 | vmovdqa \T5, HashKey_2(arg1) # [HashKey_2] = HashKey^2<<1 mod poly | ||
1587 | |||
1588 | GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^3<<1 mod poly | ||
1589 | vmovdqa \T5, HashKey_3(arg1) | ||
1590 | |||
1591 | GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^4<<1 mod poly | ||
1592 | vmovdqa \T5, HashKey_4(arg1) | ||
1593 | |||
1594 | GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^5<<1 mod poly | ||
1595 | vmovdqa \T5, HashKey_5(arg1) | ||
1596 | |||
1597 | GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^6<<1 mod poly | ||
1598 | vmovdqa \T5, HashKey_6(arg1) | ||
1599 | |||
1600 | GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^7<<1 mod poly | ||
1601 | vmovdqa \T5, HashKey_7(arg1) | ||
1602 | |||
1603 | GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^8<<1 mod poly | ||
1604 | vmovdqa \T5, HashKey_8(arg1) | ||
1605 | |||
1606 | .endm | ||
1607 | |||
1608 | |||
1609 | ## if a = number of total plaintext bytes | ||
1610 | ## b = floor(a/16) | ||
1611 | ## num_initial_blocks = b mod 4# | ||
1612 | ## encrypt the initial num_initial_blocks blocks and apply ghash on the ciphertext | ||
1613 | ## r10, r11, r12, rax are clobbered | ||
1614 | ## arg1, arg2, arg3, r14 are used as a pointer only, not modified | ||
1615 | |||
1616 | .macro INITIAL_BLOCKS_AVX2 num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC VER | ||
1617 | i = (8-\num_initial_blocks) | ||
1618 | setreg | ||
1619 | |||
1620 | mov arg6, %r10 # r10 = AAD | ||
1621 | mov arg7, %r12 # r12 = aadLen | ||
1622 | |||
1623 | |||
1624 | mov %r12, %r11 | ||
1625 | |||
1626 | vpxor reg_i, reg_i, reg_i | ||
1627 | _get_AAD_loop\@: | ||
1628 | vmovd (%r10), \T1 | ||
1629 | vpslldq $12, \T1, \T1 | ||
1630 | vpsrldq $4, reg_i, reg_i | ||
1631 | vpxor \T1, reg_i, reg_i | ||
1632 | |||
1633 | add $4, %r10 | ||
1634 | sub $4, %r12 | ||
1635 | jg _get_AAD_loop\@ | ||
1636 | |||
1637 | |||
1638 | cmp $16, %r11 | ||
1639 | je _get_AAD_loop2_done\@ | ||
1640 | mov $16, %r12 | ||
1641 | |||
1642 | _get_AAD_loop2\@: | ||
1643 | vpsrldq $4, reg_i, reg_i | ||
1644 | sub $4, %r12 | ||
1645 | cmp %r11, %r12 | ||
1646 | jg _get_AAD_loop2\@ | ||
1647 | |||
1648 | _get_AAD_loop2_done\@: | ||
1649 | |||
1650 | #byte-reflect the AAD data | ||
1651 | vpshufb SHUF_MASK(%rip), reg_i, reg_i | ||
1652 | |||
1653 | # initialize the data pointer offset as zero | ||
1654 | xor %r11, %r11 | ||
1655 | |||
1656 | # start AES for num_initial_blocks blocks | ||
1657 | mov arg5, %rax # rax = *Y0 | ||
1658 | vmovdqu (%rax), \CTR # CTR = Y0 | ||
1659 | vpshufb SHUF_MASK(%rip), \CTR, \CTR | ||
1660 | |||
1661 | |||
1662 | i = (9-\num_initial_blocks) | ||
1663 | setreg | ||
1664 | .rep \num_initial_blocks | ||
1665 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
1666 | vmovdqa \CTR, reg_i | ||
1667 | vpshufb SHUF_MASK(%rip), reg_i, reg_i # perform a 16Byte swap | ||
1668 | i = (i+1) | ||
1669 | setreg | ||
1670 | .endr | ||
1671 | |||
1672 | vmovdqa (arg1), \T_key | ||
1673 | i = (9-\num_initial_blocks) | ||
1674 | setreg | ||
1675 | .rep \num_initial_blocks | ||
1676 | vpxor \T_key, reg_i, reg_i | ||
1677 | i = (i+1) | ||
1678 | setreg | ||
1679 | .endr | ||
1680 | |||
1681 | j = 1 | ||
1682 | setreg | ||
1683 | .rep 9 | ||
1684 | vmovdqa 16*j(arg1), \T_key | ||
1685 | i = (9-\num_initial_blocks) | ||
1686 | setreg | ||
1687 | .rep \num_initial_blocks | ||
1688 | vaesenc \T_key, reg_i, reg_i | ||
1689 | i = (i+1) | ||
1690 | setreg | ||
1691 | .endr | ||
1692 | |||
1693 | j = (j+1) | ||
1694 | setreg | ||
1695 | .endr | ||
1696 | |||
1697 | |||
1698 | vmovdqa 16*10(arg1), \T_key | ||
1699 | i = (9-\num_initial_blocks) | ||
1700 | setreg | ||
1701 | .rep \num_initial_blocks | ||
1702 | vaesenclast \T_key, reg_i, reg_i | ||
1703 | i = (i+1) | ||
1704 | setreg | ||
1705 | .endr | ||
1706 | |||
1707 | i = (9-\num_initial_blocks) | ||
1708 | setreg | ||
1709 | .rep \num_initial_blocks | ||
1710 | vmovdqu (arg3, %r11), \T1 | ||
1711 | vpxor \T1, reg_i, reg_i | ||
1712 | vmovdqu reg_i, (arg2 , %r11) # write back ciphertext for | ||
1713 | # num_initial_blocks blocks | ||
1714 | add $16, %r11 | ||
1715 | .if \ENC_DEC == DEC | ||
1716 | vmovdqa \T1, reg_i | ||
1717 | .endif | ||
1718 | vpshufb SHUF_MASK(%rip), reg_i, reg_i # prepare ciphertext for GHASH computations | ||
1719 | i = (i+1) | ||
1720 | setreg | ||
1721 | .endr | ||
1722 | |||
1723 | |||
1724 | i = (8-\num_initial_blocks) | ||
1725 | j = (9-\num_initial_blocks) | ||
1726 | setreg | ||
1727 | GHASH_MUL_AVX2 reg_i, \T2, \T1, \T3, \T4, \T5, \T6 | ||
1728 | |||
1729 | .rep \num_initial_blocks | ||
1730 | vpxor reg_i, reg_j, reg_j | ||
1731 | GHASH_MUL_AVX2 reg_j, \T2, \T1, \T3, \T4, \T5, \T6 # apply GHASH on num_initial_blocks blocks | ||
1732 | i = (i+1) | ||
1733 | j = (j+1) | ||
1734 | setreg | ||
1735 | .endr | ||
1736 | # XMM8 has the combined result here | ||
1737 | |||
1738 | vmovdqa \XMM8, TMP1(%rsp) | ||
1739 | vmovdqa \XMM8, \T3 | ||
1740 | |||
1741 | cmp $128, %r13 | ||
1742 | jl _initial_blocks_done\@ # no need for precomputed constants | ||
1743 | |||
1744 | ############################################################################### | ||
1745 | # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i | ||
1746 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
1747 | vmovdqa \CTR, \XMM1 | ||
1748 | vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap | ||
1749 | |||
1750 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
1751 | vmovdqa \CTR, \XMM2 | ||
1752 | vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap | ||
1753 | |||
1754 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
1755 | vmovdqa \CTR, \XMM3 | ||
1756 | vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap | ||
1757 | |||
1758 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
1759 | vmovdqa \CTR, \XMM4 | ||
1760 | vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap | ||
1761 | |||
1762 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
1763 | vmovdqa \CTR, \XMM5 | ||
1764 | vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap | ||
1765 | |||
1766 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
1767 | vmovdqa \CTR, \XMM6 | ||
1768 | vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap | ||
1769 | |||
1770 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
1771 | vmovdqa \CTR, \XMM7 | ||
1772 | vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap | ||
1773 | |||
1774 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
1775 | vmovdqa \CTR, \XMM8 | ||
1776 | vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap | ||
1777 | |||
1778 | vmovdqa (arg1), \T_key | ||
1779 | vpxor \T_key, \XMM1, \XMM1 | ||
1780 | vpxor \T_key, \XMM2, \XMM2 | ||
1781 | vpxor \T_key, \XMM3, \XMM3 | ||
1782 | vpxor \T_key, \XMM4, \XMM4 | ||
1783 | vpxor \T_key, \XMM5, \XMM5 | ||
1784 | vpxor \T_key, \XMM6, \XMM6 | ||
1785 | vpxor \T_key, \XMM7, \XMM7 | ||
1786 | vpxor \T_key, \XMM8, \XMM8 | ||
1787 | |||
1788 | i = 1 | ||
1789 | setreg | ||
1790 | .rep 9 # do 9 rounds | ||
1791 | vmovdqa 16*i(arg1), \T_key | ||
1792 | vaesenc \T_key, \XMM1, \XMM1 | ||
1793 | vaesenc \T_key, \XMM2, \XMM2 | ||
1794 | vaesenc \T_key, \XMM3, \XMM3 | ||
1795 | vaesenc \T_key, \XMM4, \XMM4 | ||
1796 | vaesenc \T_key, \XMM5, \XMM5 | ||
1797 | vaesenc \T_key, \XMM6, \XMM6 | ||
1798 | vaesenc \T_key, \XMM7, \XMM7 | ||
1799 | vaesenc \T_key, \XMM8, \XMM8 | ||
1800 | i = (i+1) | ||
1801 | setreg | ||
1802 | .endr | ||
1803 | |||
1804 | |||
1805 | vmovdqa 16*i(arg1), \T_key | ||
1806 | vaesenclast \T_key, \XMM1, \XMM1 | ||
1807 | vaesenclast \T_key, \XMM2, \XMM2 | ||
1808 | vaesenclast \T_key, \XMM3, \XMM3 | ||
1809 | vaesenclast \T_key, \XMM4, \XMM4 | ||
1810 | vaesenclast \T_key, \XMM5, \XMM5 | ||
1811 | vaesenclast \T_key, \XMM6, \XMM6 | ||
1812 | vaesenclast \T_key, \XMM7, \XMM7 | ||
1813 | vaesenclast \T_key, \XMM8, \XMM8 | ||
1814 | |||
1815 | vmovdqu (arg3, %r11), \T1 | ||
1816 | vpxor \T1, \XMM1, \XMM1 | ||
1817 | vmovdqu \XMM1, (arg2 , %r11) | ||
1818 | .if \ENC_DEC == DEC | ||
1819 | vmovdqa \T1, \XMM1 | ||
1820 | .endif | ||
1821 | |||
1822 | vmovdqu 16*1(arg3, %r11), \T1 | ||
1823 | vpxor \T1, \XMM2, \XMM2 | ||
1824 | vmovdqu \XMM2, 16*1(arg2 , %r11) | ||
1825 | .if \ENC_DEC == DEC | ||
1826 | vmovdqa \T1, \XMM2 | ||
1827 | .endif | ||
1828 | |||
1829 | vmovdqu 16*2(arg3, %r11), \T1 | ||
1830 | vpxor \T1, \XMM3, \XMM3 | ||
1831 | vmovdqu \XMM3, 16*2(arg2 , %r11) | ||
1832 | .if \ENC_DEC == DEC | ||
1833 | vmovdqa \T1, \XMM3 | ||
1834 | .endif | ||
1835 | |||
1836 | vmovdqu 16*3(arg3, %r11), \T1 | ||
1837 | vpxor \T1, \XMM4, \XMM4 | ||
1838 | vmovdqu \XMM4, 16*3(arg2 , %r11) | ||
1839 | .if \ENC_DEC == DEC | ||
1840 | vmovdqa \T1, \XMM4 | ||
1841 | .endif | ||
1842 | |||
1843 | vmovdqu 16*4(arg3, %r11), \T1 | ||
1844 | vpxor \T1, \XMM5, \XMM5 | ||
1845 | vmovdqu \XMM5, 16*4(arg2 , %r11) | ||
1846 | .if \ENC_DEC == DEC | ||
1847 | vmovdqa \T1, \XMM5 | ||
1848 | .endif | ||
1849 | |||
1850 | vmovdqu 16*5(arg3, %r11), \T1 | ||
1851 | vpxor \T1, \XMM6, \XMM6 | ||
1852 | vmovdqu \XMM6, 16*5(arg2 , %r11) | ||
1853 | .if \ENC_DEC == DEC | ||
1854 | vmovdqa \T1, \XMM6 | ||
1855 | .endif | ||
1856 | |||
1857 | vmovdqu 16*6(arg3, %r11), \T1 | ||
1858 | vpxor \T1, \XMM7, \XMM7 | ||
1859 | vmovdqu \XMM7, 16*6(arg2 , %r11) | ||
1860 | .if \ENC_DEC == DEC | ||
1861 | vmovdqa \T1, \XMM7 | ||
1862 | .endif | ||
1863 | |||
1864 | vmovdqu 16*7(arg3, %r11), \T1 | ||
1865 | vpxor \T1, \XMM8, \XMM8 | ||
1866 | vmovdqu \XMM8, 16*7(arg2 , %r11) | ||
1867 | .if \ENC_DEC == DEC | ||
1868 | vmovdqa \T1, \XMM8 | ||
1869 | .endif | ||
1870 | |||
1871 | add $128, %r11 | ||
1872 | |||
1873 | vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap | ||
1874 | vpxor TMP1(%rsp), \XMM1, \XMM1 # combine GHASHed value with | ||
1875 | # the corresponding ciphertext | ||
1876 | vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap | ||
1877 | vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap | ||
1878 | vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap | ||
1879 | vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap | ||
1880 | vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap | ||
1881 | vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap | ||
1882 | vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap | ||
1883 | |||
1884 | ############################################################################### | ||
1885 | |||
1886 | _initial_blocks_done\@: | ||
1887 | |||
1888 | |||
1889 | .endm | ||
1890 | |||
1891 | |||
1892 | |||
1893 | # encrypt 8 blocks at a time | ||
1894 | # ghash the 8 previously encrypted ciphertext blocks | ||
1895 | # arg1, arg2, arg3 are used as pointers only, not modified | ||
1896 | # r11 is the data offset value | ||
1897 | .macro GHASH_8_ENCRYPT_8_PARALLEL_AVX2 T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC | ||
1898 | |||
1899 | vmovdqa \XMM1, \T2 | ||
1900 | vmovdqa \XMM2, TMP2(%rsp) | ||
1901 | vmovdqa \XMM3, TMP3(%rsp) | ||
1902 | vmovdqa \XMM4, TMP4(%rsp) | ||
1903 | vmovdqa \XMM5, TMP5(%rsp) | ||
1904 | vmovdqa \XMM6, TMP6(%rsp) | ||
1905 | vmovdqa \XMM7, TMP7(%rsp) | ||
1906 | vmovdqa \XMM8, TMP8(%rsp) | ||
1907 | |||
1908 | .if \loop_idx == in_order | ||
1909 | vpaddd ONE(%rip), \CTR, \XMM1 # INCR CNT | ||
1910 | vpaddd ONE(%rip), \XMM1, \XMM2 | ||
1911 | vpaddd ONE(%rip), \XMM2, \XMM3 | ||
1912 | vpaddd ONE(%rip), \XMM3, \XMM4 | ||
1913 | vpaddd ONE(%rip), \XMM4, \XMM5 | ||
1914 | vpaddd ONE(%rip), \XMM5, \XMM6 | ||
1915 | vpaddd ONE(%rip), \XMM6, \XMM7 | ||
1916 | vpaddd ONE(%rip), \XMM7, \XMM8 | ||
1917 | vmovdqa \XMM8, \CTR | ||
1918 | |||
1919 | vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap | ||
1920 | vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap | ||
1921 | vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap | ||
1922 | vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap | ||
1923 | vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap | ||
1924 | vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap | ||
1925 | vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap | ||
1926 | vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap | ||
1927 | .else | ||
1928 | vpaddd ONEf(%rip), \CTR, \XMM1 # INCR CNT | ||
1929 | vpaddd ONEf(%rip), \XMM1, \XMM2 | ||
1930 | vpaddd ONEf(%rip), \XMM2, \XMM3 | ||
1931 | vpaddd ONEf(%rip), \XMM3, \XMM4 | ||
1932 | vpaddd ONEf(%rip), \XMM4, \XMM5 | ||
1933 | vpaddd ONEf(%rip), \XMM5, \XMM6 | ||
1934 | vpaddd ONEf(%rip), \XMM6, \XMM7 | ||
1935 | vpaddd ONEf(%rip), \XMM7, \XMM8 | ||
1936 | vmovdqa \XMM8, \CTR | ||
1937 | .endif | ||
1938 | |||
1939 | |||
1940 | ####################################################################### | ||
1941 | |||
1942 | vmovdqu (arg1), \T1 | ||
1943 | vpxor \T1, \XMM1, \XMM1 | ||
1944 | vpxor \T1, \XMM2, \XMM2 | ||
1945 | vpxor \T1, \XMM3, \XMM3 | ||
1946 | vpxor \T1, \XMM4, \XMM4 | ||
1947 | vpxor \T1, \XMM5, \XMM5 | ||
1948 | vpxor \T1, \XMM6, \XMM6 | ||
1949 | vpxor \T1, \XMM7, \XMM7 | ||
1950 | vpxor \T1, \XMM8, \XMM8 | ||
1951 | |||
1952 | ####################################################################### | ||
1953 | |||
1954 | |||
1955 | |||
1956 | |||
1957 | |||
1958 | vmovdqu 16*1(arg1), \T1 | ||
1959 | vaesenc \T1, \XMM1, \XMM1 | ||
1960 | vaesenc \T1, \XMM2, \XMM2 | ||
1961 | vaesenc \T1, \XMM3, \XMM3 | ||
1962 | vaesenc \T1, \XMM4, \XMM4 | ||
1963 | vaesenc \T1, \XMM5, \XMM5 | ||
1964 | vaesenc \T1, \XMM6, \XMM6 | ||
1965 | vaesenc \T1, \XMM7, \XMM7 | ||
1966 | vaesenc \T1, \XMM8, \XMM8 | ||
1967 | |||
1968 | vmovdqu 16*2(arg1), \T1 | ||
1969 | vaesenc \T1, \XMM1, \XMM1 | ||
1970 | vaesenc \T1, \XMM2, \XMM2 | ||
1971 | vaesenc \T1, \XMM3, \XMM3 | ||
1972 | vaesenc \T1, \XMM4, \XMM4 | ||
1973 | vaesenc \T1, \XMM5, \XMM5 | ||
1974 | vaesenc \T1, \XMM6, \XMM6 | ||
1975 | vaesenc \T1, \XMM7, \XMM7 | ||
1976 | vaesenc \T1, \XMM8, \XMM8 | ||
1977 | |||
1978 | |||
1979 | ####################################################################### | ||
1980 | |||
1981 | vmovdqa HashKey_8(arg1), \T5 | ||
1982 | vpclmulqdq $0x11, \T5, \T2, \T4 # T4 = a1*b1 | ||
1983 | vpclmulqdq $0x00, \T5, \T2, \T7 # T7 = a0*b0 | ||
1984 | vpclmulqdq $0x01, \T5, \T2, \T6 # T6 = a1*b0 | ||
1985 | vpclmulqdq $0x10, \T5, \T2, \T5 # T5 = a0*b1 | ||
1986 | vpxor \T5, \T6, \T6 | ||
1987 | |||
1988 | vmovdqu 16*3(arg1), \T1 | ||
1989 | vaesenc \T1, \XMM1, \XMM1 | ||
1990 | vaesenc \T1, \XMM2, \XMM2 | ||
1991 | vaesenc \T1, \XMM3, \XMM3 | ||
1992 | vaesenc \T1, \XMM4, \XMM4 | ||
1993 | vaesenc \T1, \XMM5, \XMM5 | ||
1994 | vaesenc \T1, \XMM6, \XMM6 | ||
1995 | vaesenc \T1, \XMM7, \XMM7 | ||
1996 | vaesenc \T1, \XMM8, \XMM8 | ||
1997 | |||
1998 | vmovdqa TMP2(%rsp), \T1 | ||
1999 | vmovdqa HashKey_7(arg1), \T5 | ||
2000 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
2001 | vpxor \T3, \T4, \T4 | ||
2002 | |||
2003 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
2004 | vpxor \T3, \T7, \T7 | ||
2005 | |||
2006 | vpclmulqdq $0x01, \T5, \T1, \T3 | ||
2007 | vpxor \T3, \T6, \T6 | ||
2008 | |||
2009 | vpclmulqdq $0x10, \T5, \T1, \T3 | ||
2010 | vpxor \T3, \T6, \T6 | ||
2011 | |||
2012 | vmovdqu 16*4(arg1), \T1 | ||
2013 | vaesenc \T1, \XMM1, \XMM1 | ||
2014 | vaesenc \T1, \XMM2, \XMM2 | ||
2015 | vaesenc \T1, \XMM3, \XMM3 | ||
2016 | vaesenc \T1, \XMM4, \XMM4 | ||
2017 | vaesenc \T1, \XMM5, \XMM5 | ||
2018 | vaesenc \T1, \XMM6, \XMM6 | ||
2019 | vaesenc \T1, \XMM7, \XMM7 | ||
2020 | vaesenc \T1, \XMM8, \XMM8 | ||
2021 | |||
2022 | ####################################################################### | ||
2023 | |||
2024 | vmovdqa TMP3(%rsp), \T1 | ||
2025 | vmovdqa HashKey_6(arg1), \T5 | ||
2026 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
2027 | vpxor \T3, \T4, \T4 | ||
2028 | |||
2029 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
2030 | vpxor \T3, \T7, \T7 | ||
2031 | |||
2032 | vpclmulqdq $0x01, \T5, \T1, \T3 | ||
2033 | vpxor \T3, \T6, \T6 | ||
2034 | |||
2035 | vpclmulqdq $0x10, \T5, \T1, \T3 | ||
2036 | vpxor \T3, \T6, \T6 | ||
2037 | |||
2038 | vmovdqu 16*5(arg1), \T1 | ||
2039 | vaesenc \T1, \XMM1, \XMM1 | ||
2040 | vaesenc \T1, \XMM2, \XMM2 | ||
2041 | vaesenc \T1, \XMM3, \XMM3 | ||
2042 | vaesenc \T1, \XMM4, \XMM4 | ||
2043 | vaesenc \T1, \XMM5, \XMM5 | ||
2044 | vaesenc \T1, \XMM6, \XMM6 | ||
2045 | vaesenc \T1, \XMM7, \XMM7 | ||
2046 | vaesenc \T1, \XMM8, \XMM8 | ||
2047 | |||
2048 | vmovdqa TMP4(%rsp), \T1 | ||
2049 | vmovdqa HashKey_5(arg1), \T5 | ||
2050 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
2051 | vpxor \T3, \T4, \T4 | ||
2052 | |||
2053 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
2054 | vpxor \T3, \T7, \T7 | ||
2055 | |||
2056 | vpclmulqdq $0x01, \T5, \T1, \T3 | ||
2057 | vpxor \T3, \T6, \T6 | ||
2058 | |||
2059 | vpclmulqdq $0x10, \T5, \T1, \T3 | ||
2060 | vpxor \T3, \T6, \T6 | ||
2061 | |||
2062 | vmovdqu 16*6(arg1), \T1 | ||
2063 | vaesenc \T1, \XMM1, \XMM1 | ||
2064 | vaesenc \T1, \XMM2, \XMM2 | ||
2065 | vaesenc \T1, \XMM3, \XMM3 | ||
2066 | vaesenc \T1, \XMM4, \XMM4 | ||
2067 | vaesenc \T1, \XMM5, \XMM5 | ||
2068 | vaesenc \T1, \XMM6, \XMM6 | ||
2069 | vaesenc \T1, \XMM7, \XMM7 | ||
2070 | vaesenc \T1, \XMM8, \XMM8 | ||
2071 | |||
2072 | |||
2073 | vmovdqa TMP5(%rsp), \T1 | ||
2074 | vmovdqa HashKey_4(arg1), \T5 | ||
2075 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
2076 | vpxor \T3, \T4, \T4 | ||
2077 | |||
2078 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
2079 | vpxor \T3, \T7, \T7 | ||
2080 | |||
2081 | vpclmulqdq $0x01, \T5, \T1, \T3 | ||
2082 | vpxor \T3, \T6, \T6 | ||
2083 | |||
2084 | vpclmulqdq $0x10, \T5, \T1, \T3 | ||
2085 | vpxor \T3, \T6, \T6 | ||
2086 | |||
2087 | vmovdqu 16*7(arg1), \T1 | ||
2088 | vaesenc \T1, \XMM1, \XMM1 | ||
2089 | vaesenc \T1, \XMM2, \XMM2 | ||
2090 | vaesenc \T1, \XMM3, \XMM3 | ||
2091 | vaesenc \T1, \XMM4, \XMM4 | ||
2092 | vaesenc \T1, \XMM5, \XMM5 | ||
2093 | vaesenc \T1, \XMM6, \XMM6 | ||
2094 | vaesenc \T1, \XMM7, \XMM7 | ||
2095 | vaesenc \T1, \XMM8, \XMM8 | ||
2096 | |||
2097 | vmovdqa TMP6(%rsp), \T1 | ||
2098 | vmovdqa HashKey_3(arg1), \T5 | ||
2099 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
2100 | vpxor \T3, \T4, \T4 | ||
2101 | |||
2102 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
2103 | vpxor \T3, \T7, \T7 | ||
2104 | |||
2105 | vpclmulqdq $0x01, \T5, \T1, \T3 | ||
2106 | vpxor \T3, \T6, \T6 | ||
2107 | |||
2108 | vpclmulqdq $0x10, \T5, \T1, \T3 | ||
2109 | vpxor \T3, \T6, \T6 | ||
2110 | |||
2111 | vmovdqu 16*8(arg1), \T1 | ||
2112 | vaesenc \T1, \XMM1, \XMM1 | ||
2113 | vaesenc \T1, \XMM2, \XMM2 | ||
2114 | vaesenc \T1, \XMM3, \XMM3 | ||
2115 | vaesenc \T1, \XMM4, \XMM4 | ||
2116 | vaesenc \T1, \XMM5, \XMM5 | ||
2117 | vaesenc \T1, \XMM6, \XMM6 | ||
2118 | vaesenc \T1, \XMM7, \XMM7 | ||
2119 | vaesenc \T1, \XMM8, \XMM8 | ||
2120 | |||
2121 | vmovdqa TMP7(%rsp), \T1 | ||
2122 | vmovdqa HashKey_2(arg1), \T5 | ||
2123 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
2124 | vpxor \T3, \T4, \T4 | ||
2125 | |||
2126 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
2127 | vpxor \T3, \T7, \T7 | ||
2128 | |||
2129 | vpclmulqdq $0x01, \T5, \T1, \T3 | ||
2130 | vpxor \T3, \T6, \T6 | ||
2131 | |||
2132 | vpclmulqdq $0x10, \T5, \T1, \T3 | ||
2133 | vpxor \T3, \T6, \T6 | ||
2134 | |||
2135 | |||
2136 | ####################################################################### | ||
2137 | |||
2138 | vmovdqu 16*9(arg1), \T5 | ||
2139 | vaesenc \T5, \XMM1, \XMM1 | ||
2140 | vaesenc \T5, \XMM2, \XMM2 | ||
2141 | vaesenc \T5, \XMM3, \XMM3 | ||
2142 | vaesenc \T5, \XMM4, \XMM4 | ||
2143 | vaesenc \T5, \XMM5, \XMM5 | ||
2144 | vaesenc \T5, \XMM6, \XMM6 | ||
2145 | vaesenc \T5, \XMM7, \XMM7 | ||
2146 | vaesenc \T5, \XMM8, \XMM8 | ||
2147 | |||
2148 | vmovdqa TMP8(%rsp), \T1 | ||
2149 | vmovdqa HashKey(arg1), \T5 | ||
2150 | |||
2151 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
2152 | vpxor \T3, \T7, \T7 | ||
2153 | |||
2154 | vpclmulqdq $0x01, \T5, \T1, \T3 | ||
2155 | vpxor \T3, \T6, \T6 | ||
2156 | |||
2157 | vpclmulqdq $0x10, \T5, \T1, \T3 | ||
2158 | vpxor \T3, \T6, \T6 | ||
2159 | |||
2160 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
2161 | vpxor \T3, \T4, \T1 | ||
2162 | |||
2163 | |||
2164 | vmovdqu 16*10(arg1), \T5 | ||
2165 | |||
2166 | i = 0 | ||
2167 | j = 1 | ||
2168 | setreg | ||
2169 | .rep 8 | ||
2170 | vpxor 16*i(arg3, %r11), \T5, \T2 | ||
2171 | .if \ENC_DEC == ENC | ||
2172 | vaesenclast \T2, reg_j, reg_j | ||
2173 | .else | ||
2174 | vaesenclast \T2, reg_j, \T3 | ||
2175 | vmovdqu 16*i(arg3, %r11), reg_j | ||
2176 | vmovdqu \T3, 16*i(arg2, %r11) | ||
2177 | .endif | ||
2178 | i = (i+1) | ||
2179 | j = (j+1) | ||
2180 | setreg | ||
2181 | .endr | ||
2182 | ####################################################################### | ||
2183 | |||
2184 | |||
2185 | vpslldq $8, \T6, \T3 # shift-L T3 2 DWs | ||
2186 | vpsrldq $8, \T6, \T6 # shift-R T2 2 DWs | ||
2187 | vpxor \T3, \T7, \T7 | ||
2188 | vpxor \T6, \T1, \T1 # accumulate the results in T1:T7 | ||
2189 | |||
2190 | |||
2191 | |||
2192 | ####################################################################### | ||
2193 | #first phase of the reduction | ||
2194 | vmovdqa POLY2(%rip), \T3 | ||
2195 | |||
2196 | vpclmulqdq $0x01, \T7, \T3, \T2 | ||
2197 | vpslldq $8, \T2, \T2 # shift-L xmm2 2 DWs | ||
2198 | |||
2199 | vpxor \T2, \T7, \T7 # first phase of the reduction complete | ||
2200 | ####################################################################### | ||
2201 | .if \ENC_DEC == ENC | ||
2202 | vmovdqu \XMM1, 16*0(arg2,%r11) # Write to the Ciphertext buffer | ||
2203 | vmovdqu \XMM2, 16*1(arg2,%r11) # Write to the Ciphertext buffer | ||
2204 | vmovdqu \XMM3, 16*2(arg2,%r11) # Write to the Ciphertext buffer | ||
2205 | vmovdqu \XMM4, 16*3(arg2,%r11) # Write to the Ciphertext buffer | ||
2206 | vmovdqu \XMM5, 16*4(arg2,%r11) # Write to the Ciphertext buffer | ||
2207 | vmovdqu \XMM6, 16*5(arg2,%r11) # Write to the Ciphertext buffer | ||
2208 | vmovdqu \XMM7, 16*6(arg2,%r11) # Write to the Ciphertext buffer | ||
2209 | vmovdqu \XMM8, 16*7(arg2,%r11) # Write to the Ciphertext buffer | ||
2210 | .endif | ||
2211 | |||
2212 | ####################################################################### | ||
2213 | #second phase of the reduction | ||
2214 | vpclmulqdq $0x00, \T7, \T3, \T2 | ||
2215 | vpsrldq $4, \T2, \T2 # shift-R xmm2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R) | ||
2216 | |||
2217 | vpclmulqdq $0x10, \T7, \T3, \T4 | ||
2218 | vpslldq $4, \T4, \T4 # shift-L xmm0 1 DW (Shift-L 1-DW to obtain result with no shifts) | ||
2219 | |||
2220 | vpxor \T2, \T4, \T4 # second phase of the reduction complete | ||
2221 | ####################################################################### | ||
2222 | vpxor \T4, \T1, \T1 # the result is in T1 | ||
2223 | |||
2224 | vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap | ||
2225 | vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap | ||
2226 | vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap | ||
2227 | vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap | ||
2228 | vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap | ||
2229 | vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap | ||
2230 | vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap | ||
2231 | vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap | ||
2232 | |||
2233 | |||
2234 | vpxor \T1, \XMM1, \XMM1 | ||
2235 | |||
2236 | |||
2237 | |||
2238 | .endm | ||
2239 | |||
2240 | |||
2241 | # GHASH the last 4 ciphertext blocks. | ||
2242 | .macro GHASH_LAST_8_AVX2 T1 T2 T3 T4 T5 T6 T7 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 | ||
2243 | |||
2244 | ## Karatsuba Method | ||
2245 | |||
2246 | vmovdqa HashKey_8(arg1), \T5 | ||
2247 | |||
2248 | vpshufd $0b01001110, \XMM1, \T2 | ||
2249 | vpshufd $0b01001110, \T5, \T3 | ||
2250 | vpxor \XMM1, \T2, \T2 | ||
2251 | vpxor \T5, \T3, \T3 | ||
2252 | |||
2253 | vpclmulqdq $0x11, \T5, \XMM1, \T6 | ||
2254 | vpclmulqdq $0x00, \T5, \XMM1, \T7 | ||
2255 | |||
2256 | vpclmulqdq $0x00, \T3, \T2, \XMM1 | ||
2257 | |||
2258 | ###################### | ||
2259 | |||
2260 | vmovdqa HashKey_7(arg1), \T5 | ||
2261 | vpshufd $0b01001110, \XMM2, \T2 | ||
2262 | vpshufd $0b01001110, \T5, \T3 | ||
2263 | vpxor \XMM2, \T2, \T2 | ||
2264 | vpxor \T5, \T3, \T3 | ||
2265 | |||
2266 | vpclmulqdq $0x11, \T5, \XMM2, \T4 | ||
2267 | vpxor \T4, \T6, \T6 | ||
2268 | |||
2269 | vpclmulqdq $0x00, \T5, \XMM2, \T4 | ||
2270 | vpxor \T4, \T7, \T7 | ||
2271 | |||
2272 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
2273 | |||
2274 | vpxor \T2, \XMM1, \XMM1 | ||
2275 | |||
2276 | ###################### | ||
2277 | |||
2278 | vmovdqa HashKey_6(arg1), \T5 | ||
2279 | vpshufd $0b01001110, \XMM3, \T2 | ||
2280 | vpshufd $0b01001110, \T5, \T3 | ||
2281 | vpxor \XMM3, \T2, \T2 | ||
2282 | vpxor \T5, \T3, \T3 | ||
2283 | |||
2284 | vpclmulqdq $0x11, \T5, \XMM3, \T4 | ||
2285 | vpxor \T4, \T6, \T6 | ||
2286 | |||
2287 | vpclmulqdq $0x00, \T5, \XMM3, \T4 | ||
2288 | vpxor \T4, \T7, \T7 | ||
2289 | |||
2290 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
2291 | |||
2292 | vpxor \T2, \XMM1, \XMM1 | ||
2293 | |||
2294 | ###################### | ||
2295 | |||
2296 | vmovdqa HashKey_5(arg1), \T5 | ||
2297 | vpshufd $0b01001110, \XMM4, \T2 | ||
2298 | vpshufd $0b01001110, \T5, \T3 | ||
2299 | vpxor \XMM4, \T2, \T2 | ||
2300 | vpxor \T5, \T3, \T3 | ||
2301 | |||
2302 | vpclmulqdq $0x11, \T5, \XMM4, \T4 | ||
2303 | vpxor \T4, \T6, \T6 | ||
2304 | |||
2305 | vpclmulqdq $0x00, \T5, \XMM4, \T4 | ||
2306 | vpxor \T4, \T7, \T7 | ||
2307 | |||
2308 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
2309 | |||
2310 | vpxor \T2, \XMM1, \XMM1 | ||
2311 | |||
2312 | ###################### | ||
2313 | |||
2314 | vmovdqa HashKey_4(arg1), \T5 | ||
2315 | vpshufd $0b01001110, \XMM5, \T2 | ||
2316 | vpshufd $0b01001110, \T5, \T3 | ||
2317 | vpxor \XMM5, \T2, \T2 | ||
2318 | vpxor \T5, \T3, \T3 | ||
2319 | |||
2320 | vpclmulqdq $0x11, \T5, \XMM5, \T4 | ||
2321 | vpxor \T4, \T6, \T6 | ||
2322 | |||
2323 | vpclmulqdq $0x00, \T5, \XMM5, \T4 | ||
2324 | vpxor \T4, \T7, \T7 | ||
2325 | |||
2326 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
2327 | |||
2328 | vpxor \T2, \XMM1, \XMM1 | ||
2329 | |||
2330 | ###################### | ||
2331 | |||
2332 | vmovdqa HashKey_3(arg1), \T5 | ||
2333 | vpshufd $0b01001110, \XMM6, \T2 | ||
2334 | vpshufd $0b01001110, \T5, \T3 | ||
2335 | vpxor \XMM6, \T2, \T2 | ||
2336 | vpxor \T5, \T3, \T3 | ||
2337 | |||
2338 | vpclmulqdq $0x11, \T5, \XMM6, \T4 | ||
2339 | vpxor \T4, \T6, \T6 | ||
2340 | |||
2341 | vpclmulqdq $0x00, \T5, \XMM6, \T4 | ||
2342 | vpxor \T4, \T7, \T7 | ||
2343 | |||
2344 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
2345 | |||
2346 | vpxor \T2, \XMM1, \XMM1 | ||
2347 | |||
2348 | ###################### | ||
2349 | |||
2350 | vmovdqa HashKey_2(arg1), \T5 | ||
2351 | vpshufd $0b01001110, \XMM7, \T2 | ||
2352 | vpshufd $0b01001110, \T5, \T3 | ||
2353 | vpxor \XMM7, \T2, \T2 | ||
2354 | vpxor \T5, \T3, \T3 | ||
2355 | |||
2356 | vpclmulqdq $0x11, \T5, \XMM7, \T4 | ||
2357 | vpxor \T4, \T6, \T6 | ||
2358 | |||
2359 | vpclmulqdq $0x00, \T5, \XMM7, \T4 | ||
2360 | vpxor \T4, \T7, \T7 | ||
2361 | |||
2362 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
2363 | |||
2364 | vpxor \T2, \XMM1, \XMM1 | ||
2365 | |||
2366 | ###################### | ||
2367 | |||
2368 | vmovdqa HashKey(arg1), \T5 | ||
2369 | vpshufd $0b01001110, \XMM8, \T2 | ||
2370 | vpshufd $0b01001110, \T5, \T3 | ||
2371 | vpxor \XMM8, \T2, \T2 | ||
2372 | vpxor \T5, \T3, \T3 | ||
2373 | |||
2374 | vpclmulqdq $0x11, \T5, \XMM8, \T4 | ||
2375 | vpxor \T4, \T6, \T6 | ||
2376 | |||
2377 | vpclmulqdq $0x00, \T5, \XMM8, \T4 | ||
2378 | vpxor \T4, \T7, \T7 | ||
2379 | |||
2380 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
2381 | |||
2382 | vpxor \T2, \XMM1, \XMM1 | ||
2383 | vpxor \T6, \XMM1, \XMM1 | ||
2384 | vpxor \T7, \XMM1, \T2 | ||
2385 | |||
2386 | |||
2387 | |||
2388 | |||
2389 | vpslldq $8, \T2, \T4 | ||
2390 | vpsrldq $8, \T2, \T2 | ||
2391 | |||
2392 | vpxor \T4, \T7, \T7 | ||
2393 | vpxor \T2, \T6, \T6 # <T6:T7> holds the result of the | ||
2394 | # accumulated carry-less multiplications | ||
2395 | |||
2396 | ####################################################################### | ||
2397 | #first phase of the reduction | ||
2398 | vmovdqa POLY2(%rip), \T3 | ||
2399 | |||
2400 | vpclmulqdq $0x01, \T7, \T3, \T2 | ||
2401 | vpslldq $8, \T2, \T2 # shift-L xmm2 2 DWs | ||
2402 | |||
2403 | vpxor \T2, \T7, \T7 # first phase of the reduction complete | ||
2404 | ####################################################################### | ||
2405 | |||
2406 | |||
2407 | #second phase of the reduction | ||
2408 | vpclmulqdq $0x00, \T7, \T3, \T2 | ||
2409 | vpsrldq $4, \T2, \T2 # shift-R T2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R) | ||
2410 | |||
2411 | vpclmulqdq $0x10, \T7, \T3, \T4 | ||
2412 | vpslldq $4, \T4, \T4 # shift-L T4 1 DW (Shift-L 1-DW to obtain result with no shifts) | ||
2413 | |||
2414 | vpxor \T2, \T4, \T4 # second phase of the reduction complete | ||
2415 | ####################################################################### | ||
2416 | vpxor \T4, \T6, \T6 # the result is in T6 | ||
2417 | .endm | ||
2418 | |||
2419 | |||
2420 | |||
2421 | # combined for GCM encrypt and decrypt functions | ||
2422 | # clobbering all xmm registers | ||
2423 | # clobbering r10, r11, r12, r13, r14, r15 | ||
2424 | .macro GCM_ENC_DEC_AVX2 ENC_DEC | ||
2425 | |||
2426 | #the number of pushes must equal STACK_OFFSET | ||
2427 | push %r12 | ||
2428 | push %r13 | ||
2429 | push %r14 | ||
2430 | push %r15 | ||
2431 | |||
2432 | mov %rsp, %r14 | ||
2433 | |||
2434 | |||
2435 | |||
2436 | |||
2437 | sub $VARIABLE_OFFSET, %rsp | ||
2438 | and $~63, %rsp # align rsp to 64 bytes | ||
2439 | |||
2440 | |||
2441 | vmovdqu HashKey(arg1), %xmm13 # xmm13 = HashKey | ||
2442 | |||
2443 | mov arg4, %r13 # save the number of bytes of plaintext/ciphertext | ||
2444 | and $-16, %r13 # r13 = r13 - (r13 mod 16) | ||
2445 | |||
2446 | mov %r13, %r12 | ||
2447 | shr $4, %r12 | ||
2448 | and $7, %r12 | ||
2449 | jz _initial_num_blocks_is_0\@ | ||
2450 | |||
2451 | cmp $7, %r12 | ||
2452 | je _initial_num_blocks_is_7\@ | ||
2453 | cmp $6, %r12 | ||
2454 | je _initial_num_blocks_is_6\@ | ||
2455 | cmp $5, %r12 | ||
2456 | je _initial_num_blocks_is_5\@ | ||
2457 | cmp $4, %r12 | ||
2458 | je _initial_num_blocks_is_4\@ | ||
2459 | cmp $3, %r12 | ||
2460 | je _initial_num_blocks_is_3\@ | ||
2461 | cmp $2, %r12 | ||
2462 | je _initial_num_blocks_is_2\@ | ||
2463 | |||
2464 | jmp _initial_num_blocks_is_1\@ | ||
2465 | |||
2466 | _initial_num_blocks_is_7\@: | ||
2467 | INITIAL_BLOCKS_AVX2 7, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
2468 | sub $16*7, %r13 | ||
2469 | jmp _initial_blocks_encrypted\@ | ||
2470 | |||
2471 | _initial_num_blocks_is_6\@: | ||
2472 | INITIAL_BLOCKS_AVX2 6, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
2473 | sub $16*6, %r13 | ||
2474 | jmp _initial_blocks_encrypted\@ | ||
2475 | |||
2476 | _initial_num_blocks_is_5\@: | ||
2477 | INITIAL_BLOCKS_AVX2 5, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
2478 | sub $16*5, %r13 | ||
2479 | jmp _initial_blocks_encrypted\@ | ||
2480 | |||
2481 | _initial_num_blocks_is_4\@: | ||
2482 | INITIAL_BLOCKS_AVX2 4, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
2483 | sub $16*4, %r13 | ||
2484 | jmp _initial_blocks_encrypted\@ | ||
2485 | |||
2486 | _initial_num_blocks_is_3\@: | ||
2487 | INITIAL_BLOCKS_AVX2 3, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
2488 | sub $16*3, %r13 | ||
2489 | jmp _initial_blocks_encrypted\@ | ||
2490 | |||
2491 | _initial_num_blocks_is_2\@: | ||
2492 | INITIAL_BLOCKS_AVX2 2, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
2493 | sub $16*2, %r13 | ||
2494 | jmp _initial_blocks_encrypted\@ | ||
2495 | |||
2496 | _initial_num_blocks_is_1\@: | ||
2497 | INITIAL_BLOCKS_AVX2 1, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
2498 | sub $16*1, %r13 | ||
2499 | jmp _initial_blocks_encrypted\@ | ||
2500 | |||
2501 | _initial_num_blocks_is_0\@: | ||
2502 | INITIAL_BLOCKS_AVX2 0, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
2503 | |||
2504 | |||
2505 | _initial_blocks_encrypted\@: | ||
2506 | cmp $0, %r13 | ||
2507 | je _zero_cipher_left\@ | ||
2508 | |||
2509 | sub $128, %r13 | ||
2510 | je _eight_cipher_left\@ | ||
2511 | |||
2512 | |||
2513 | |||
2514 | |||
2515 | vmovd %xmm9, %r15d | ||
2516 | and $255, %r15d | ||
2517 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
2518 | |||
2519 | |||
2520 | _encrypt_by_8_new\@: | ||
2521 | cmp $(255-8), %r15d | ||
2522 | jg _encrypt_by_8\@ | ||
2523 | |||
2524 | |||
2525 | |||
2526 | add $8, %r15b | ||
2527 | GHASH_8_ENCRYPT_8_PARALLEL_AVX2 %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, out_order, \ENC_DEC | ||
2528 | add $128, %r11 | ||
2529 | sub $128, %r13 | ||
2530 | jne _encrypt_by_8_new\@ | ||
2531 | |||
2532 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
2533 | jmp _eight_cipher_left\@ | ||
2534 | |||
2535 | _encrypt_by_8\@: | ||
2536 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
2537 | add $8, %r15b | ||
2538 | GHASH_8_ENCRYPT_8_PARALLEL_AVX2 %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, in_order, \ENC_DEC | ||
2539 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
2540 | add $128, %r11 | ||
2541 | sub $128, %r13 | ||
2542 | jne _encrypt_by_8_new\@ | ||
2543 | |||
2544 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
2545 | |||
2546 | |||
2547 | |||
2548 | |||
2549 | _eight_cipher_left\@: | ||
2550 | GHASH_LAST_8_AVX2 %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8 | ||
2551 | |||
2552 | |||
2553 | _zero_cipher_left\@: | ||
2554 | cmp $16, arg4 | ||
2555 | jl _only_less_than_16\@ | ||
2556 | |||
2557 | mov arg4, %r13 | ||
2558 | and $15, %r13 # r13 = (arg4 mod 16) | ||
2559 | |||
2560 | je _multiple_of_16_bytes\@ | ||
2561 | |||
2562 | # handle the last <16 Byte block seperately | ||
2563 | |||
2564 | |||
2565 | vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn | ||
2566 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
2567 | ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Yn) | ||
2568 | |||
2569 | sub $16, %r11 | ||
2570 | add %r13, %r11 | ||
2571 | vmovdqu (arg3, %r11), %xmm1 # receive the last <16 Byte block | ||
2572 | |||
2573 | lea SHIFT_MASK+16(%rip), %r12 | ||
2574 | sub %r13, %r12 # adjust the shuffle mask pointer | ||
2575 | # to be able to shift 16-r13 bytes | ||
2576 | # (r13 is the number of bytes in plaintext mod 16) | ||
2577 | vmovdqu (%r12), %xmm2 # get the appropriate shuffle mask | ||
2578 | vpshufb %xmm2, %xmm1, %xmm1 # shift right 16-r13 bytes | ||
2579 | jmp _final_ghash_mul\@ | ||
2580 | |||
2581 | _only_less_than_16\@: | ||
2582 | # check for 0 length | ||
2583 | mov arg4, %r13 | ||
2584 | and $15, %r13 # r13 = (arg4 mod 16) | ||
2585 | |||
2586 | je _multiple_of_16_bytes\@ | ||
2587 | |||
2588 | # handle the last <16 Byte block seperately | ||
2589 | |||
2590 | |||
2591 | vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn | ||
2592 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
2593 | ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Yn) | ||
2594 | |||
2595 | |||
2596 | lea SHIFT_MASK+16(%rip), %r12 | ||
2597 | sub %r13, %r12 # adjust the shuffle mask pointer to be | ||
2598 | # able to shift 16-r13 bytes (r13 is the | ||
2599 | # number of bytes in plaintext mod 16) | ||
2600 | |||
2601 | _get_last_16_byte_loop\@: | ||
2602 | movb (arg3, %r11), %al | ||
2603 | movb %al, TMP1 (%rsp , %r11) | ||
2604 | add $1, %r11 | ||
2605 | cmp %r13, %r11 | ||
2606 | jne _get_last_16_byte_loop\@ | ||
2607 | |||
2608 | vmovdqu TMP1(%rsp), %xmm1 | ||
2609 | |||
2610 | sub $16, %r11 | ||
2611 | |||
2612 | _final_ghash_mul\@: | ||
2613 | .if \ENC_DEC == DEC | ||
2614 | vmovdqa %xmm1, %xmm2 | ||
2615 | vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) | ||
2616 | vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to mask out top 16-r13 bytes of xmm9 | ||
2617 | vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9 | ||
2618 | vpand %xmm1, %xmm2, %xmm2 | ||
2619 | vpshufb SHUF_MASK(%rip), %xmm2, %xmm2 | ||
2620 | vpxor %xmm2, %xmm14, %xmm14 | ||
2621 | #GHASH computation for the last <16 Byte block | ||
2622 | GHASH_MUL_AVX2 %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 | ||
2623 | sub %r13, %r11 | ||
2624 | add $16, %r11 | ||
2625 | .else | ||
2626 | vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) | ||
2627 | vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to mask out top 16-r13 bytes of xmm9 | ||
2628 | vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9 | ||
2629 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
2630 | vpxor %xmm9, %xmm14, %xmm14 | ||
2631 | #GHASH computation for the last <16 Byte block | ||
2632 | GHASH_MUL_AVX2 %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 | ||
2633 | sub %r13, %r11 | ||
2634 | add $16, %r11 | ||
2635 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 # shuffle xmm9 back to output as ciphertext | ||
2636 | .endif | ||
2637 | |||
2638 | |||
2639 | ############################# | ||
2640 | # output r13 Bytes | ||
2641 | vmovq %xmm9, %rax | ||
2642 | cmp $8, %r13 | ||
2643 | jle _less_than_8_bytes_left\@ | ||
2644 | |||
2645 | mov %rax, (arg2 , %r11) | ||
2646 | add $8, %r11 | ||
2647 | vpsrldq $8, %xmm9, %xmm9 | ||
2648 | vmovq %xmm9, %rax | ||
2649 | sub $8, %r13 | ||
2650 | |||
2651 | _less_than_8_bytes_left\@: | ||
2652 | movb %al, (arg2 , %r11) | ||
2653 | add $1, %r11 | ||
2654 | shr $8, %rax | ||
2655 | sub $1, %r13 | ||
2656 | jne _less_than_8_bytes_left\@ | ||
2657 | ############################# | ||
2658 | |||
2659 | _multiple_of_16_bytes\@: | ||
2660 | mov arg7, %r12 # r12 = aadLen (number of bytes) | ||
2661 | shl $3, %r12 # convert into number of bits | ||
2662 | vmovd %r12d, %xmm15 # len(A) in xmm15 | ||
2663 | |||
2664 | shl $3, arg4 # len(C) in bits (*128) | ||
2665 | vmovq arg4, %xmm1 | ||
2666 | vpslldq $8, %xmm15, %xmm15 # xmm15 = len(A)|| 0x0000000000000000 | ||
2667 | vpxor %xmm1, %xmm15, %xmm15 # xmm15 = len(A)||len(C) | ||
2668 | |||
2669 | vpxor %xmm15, %xmm14, %xmm14 | ||
2670 | GHASH_MUL_AVX2 %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 # final GHASH computation | ||
2671 | vpshufb SHUF_MASK(%rip), %xmm14, %xmm14 # perform a 16Byte swap | ||
2672 | |||
2673 | mov arg5, %rax # rax = *Y0 | ||
2674 | vmovdqu (%rax), %xmm9 # xmm9 = Y0 | ||
2675 | |||
2676 | ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Y0) | ||
2677 | |||
2678 | vpxor %xmm14, %xmm9, %xmm9 | ||
2679 | |||
2680 | |||
2681 | |||
2682 | _return_T\@: | ||
2683 | mov arg8, %r10 # r10 = authTag | ||
2684 | mov arg9, %r11 # r11 = auth_tag_len | ||
2685 | |||
2686 | cmp $16, %r11 | ||
2687 | je _T_16\@ | ||
2688 | |||
2689 | cmp $12, %r11 | ||
2690 | je _T_12\@ | ||
2691 | |||
2692 | _T_8\@: | ||
2693 | vmovq %xmm9, %rax | ||
2694 | mov %rax, (%r10) | ||
2695 | jmp _return_T_done\@ | ||
2696 | _T_12\@: | ||
2697 | vmovq %xmm9, %rax | ||
2698 | mov %rax, (%r10) | ||
2699 | vpsrldq $8, %xmm9, %xmm9 | ||
2700 | vmovd %xmm9, %eax | ||
2701 | mov %eax, 8(%r10) | ||
2702 | jmp _return_T_done\@ | ||
2703 | |||
2704 | _T_16\@: | ||
2705 | vmovdqu %xmm9, (%r10) | ||
2706 | |||
2707 | _return_T_done\@: | ||
2708 | mov %r14, %rsp | ||
2709 | |||
2710 | pop %r15 | ||
2711 | pop %r14 | ||
2712 | pop %r13 | ||
2713 | pop %r12 | ||
2714 | .endm | ||
2715 | |||
2716 | |||
2717 | ############################################################# | ||
2718 | #void aesni_gcm_precomp_avx_gen4 | ||
2719 | # (gcm_data *my_ctx_data, | ||
2720 | # u8 *hash_subkey)# /* H, the Hash sub key input. | ||
2721 | # Data starts on a 16-byte boundary. */ | ||
2722 | ############################################################# | ||
2723 | ENTRY(aesni_gcm_precomp_avx_gen4) | ||
2724 | #the number of pushes must equal STACK_OFFSET | ||
2725 | push %r12 | ||
2726 | push %r13 | ||
2727 | push %r14 | ||
2728 | push %r15 | ||
2729 | |||
2730 | mov %rsp, %r14 | ||
2731 | |||
2732 | |||
2733 | |||
2734 | sub $VARIABLE_OFFSET, %rsp | ||
2735 | and $~63, %rsp # align rsp to 64 bytes | ||
2736 | |||
2737 | vmovdqu (arg2), %xmm6 # xmm6 = HashKey | ||
2738 | |||
2739 | vpshufb SHUF_MASK(%rip), %xmm6, %xmm6 | ||
2740 | ############### PRECOMPUTATION of HashKey<<1 mod poly from the HashKey | ||
2741 | vmovdqa %xmm6, %xmm2 | ||
2742 | vpsllq $1, %xmm6, %xmm6 | ||
2743 | vpsrlq $63, %xmm2, %xmm2 | ||
2744 | vmovdqa %xmm2, %xmm1 | ||
2745 | vpslldq $8, %xmm2, %xmm2 | ||
2746 | vpsrldq $8, %xmm1, %xmm1 | ||
2747 | vpor %xmm2, %xmm6, %xmm6 | ||
2748 | #reduction | ||
2749 | vpshufd $0b00100100, %xmm1, %xmm2 | ||
2750 | vpcmpeqd TWOONE(%rip), %xmm2, %xmm2 | ||
2751 | vpand POLY(%rip), %xmm2, %xmm2 | ||
2752 | vpxor %xmm2, %xmm6, %xmm6 # xmm6 holds the HashKey<<1 mod poly | ||
2753 | ####################################################################### | ||
2754 | vmovdqa %xmm6, HashKey(arg1) # store HashKey<<1 mod poly | ||
2755 | |||
2756 | |||
2757 | PRECOMPUTE_AVX2 %xmm6, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5 | ||
2758 | |||
2759 | mov %r14, %rsp | ||
2760 | |||
2761 | pop %r15 | ||
2762 | pop %r14 | ||
2763 | pop %r13 | ||
2764 | pop %r12 | ||
2765 | ret | ||
2766 | ENDPROC(aesni_gcm_precomp_avx_gen4) | ||
2767 | |||
2768 | |||
2769 | ############################################################################### | ||
2770 | #void aesni_gcm_enc_avx_gen4( | ||
2771 | # gcm_data *my_ctx_data, /* aligned to 16 Bytes */ | ||
2772 | # u8 *out, /* Ciphertext output. Encrypt in-place is allowed. */ | ||
2773 | # const u8 *in, /* Plaintext input */ | ||
2774 | # u64 plaintext_len, /* Length of data in Bytes for encryption. */ | ||
2775 | # u8 *iv, /* Pre-counter block j0: 4 byte salt | ||
2776 | # (from Security Association) concatenated with 8 byte | ||
2777 | # Initialisation Vector (from IPSec ESP Payload) | ||
2778 | # concatenated with 0x00000001. 16-byte aligned pointer. */ | ||
2779 | # const u8 *aad, /* Additional Authentication Data (AAD)*/ | ||
2780 | # u64 aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */ | ||
2781 | # u8 *auth_tag, /* Authenticated Tag output. */ | ||
2782 | # u64 auth_tag_len)# /* Authenticated Tag Length in bytes. | ||
2783 | # Valid values are 16 (most likely), 12 or 8. */ | ||
2784 | ############################################################################### | ||
2785 | ENTRY(aesni_gcm_enc_avx_gen4) | ||
2786 | GCM_ENC_DEC_AVX2 ENC | ||
2787 | ret | ||
2788 | ENDPROC(aesni_gcm_enc_avx_gen4) | ||
2789 | |||
2790 | ############################################################################### | ||
2791 | #void aesni_gcm_dec_avx_gen4( | ||
2792 | # gcm_data *my_ctx_data, /* aligned to 16 Bytes */ | ||
2793 | # u8 *out, /* Plaintext output. Decrypt in-place is allowed. */ | ||
2794 | # const u8 *in, /* Ciphertext input */ | ||
2795 | # u64 plaintext_len, /* Length of data in Bytes for encryption. */ | ||
2796 | # u8 *iv, /* Pre-counter block j0: 4 byte salt | ||
2797 | # (from Security Association) concatenated with 8 byte | ||
2798 | # Initialisation Vector (from IPSec ESP Payload) | ||
2799 | # concatenated with 0x00000001. 16-byte aligned pointer. */ | ||
2800 | # const u8 *aad, /* Additional Authentication Data (AAD)*/ | ||
2801 | # u64 aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */ | ||
2802 | # u8 *auth_tag, /* Authenticated Tag output. */ | ||
2803 | # u64 auth_tag_len)# /* Authenticated Tag Length in bytes. | ||
2804 | # Valid values are 16 (most likely), 12 or 8. */ | ||
2805 | ############################################################################### | ||
2806 | ENTRY(aesni_gcm_dec_avx_gen4) | ||
2807 | GCM_ENC_DEC_AVX2 DEC | ||
2808 | ret | ||
2809 | ENDPROC(aesni_gcm_dec_avx_gen4) | ||
2810 | |||
2811 | #endif /* CONFIG_AS_AVX2 */ | ||
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 835488b745ee..948ad0e77741 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c | |||
@@ -101,6 +101,9 @@ asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, | |||
101 | int crypto_fpu_init(void); | 101 | int crypto_fpu_init(void); |
102 | void crypto_fpu_exit(void); | 102 | void crypto_fpu_exit(void); |
103 | 103 | ||
104 | #define AVX_GEN2_OPTSIZE 640 | ||
105 | #define AVX_GEN4_OPTSIZE 4096 | ||
106 | |||
104 | #ifdef CONFIG_X86_64 | 107 | #ifdef CONFIG_X86_64 |
105 | asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, | 108 | asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, |
106 | const u8 *in, unsigned int len, u8 *iv); | 109 | const u8 *in, unsigned int len, u8 *iv); |
@@ -150,6 +153,123 @@ asmlinkage void aesni_gcm_dec(void *ctx, u8 *out, | |||
150 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, | 153 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, |
151 | u8 *auth_tag, unsigned long auth_tag_len); | 154 | u8 *auth_tag, unsigned long auth_tag_len); |
152 | 155 | ||
156 | |||
157 | #ifdef CONFIG_AS_AVX | ||
158 | /* | ||
159 | * asmlinkage void aesni_gcm_precomp_avx_gen2() | ||
160 | * gcm_data *my_ctx_data, context data | ||
161 | * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary. | ||
162 | */ | ||
163 | asmlinkage void aesni_gcm_precomp_avx_gen2(void *my_ctx_data, u8 *hash_subkey); | ||
164 | |||
165 | asmlinkage void aesni_gcm_enc_avx_gen2(void *ctx, u8 *out, | ||
166 | const u8 *in, unsigned long plaintext_len, u8 *iv, | ||
167 | const u8 *aad, unsigned long aad_len, | ||
168 | u8 *auth_tag, unsigned long auth_tag_len); | ||
169 | |||
170 | asmlinkage void aesni_gcm_dec_avx_gen2(void *ctx, u8 *out, | ||
171 | const u8 *in, unsigned long ciphertext_len, u8 *iv, | ||
172 | const u8 *aad, unsigned long aad_len, | ||
173 | u8 *auth_tag, unsigned long auth_tag_len); | ||
174 | |||
175 | static void aesni_gcm_enc_avx(void *ctx, u8 *out, | ||
176 | const u8 *in, unsigned long plaintext_len, u8 *iv, | ||
177 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, | ||
178 | u8 *auth_tag, unsigned long auth_tag_len) | ||
179 | { | ||
180 | if (plaintext_len < AVX_GEN2_OPTSIZE) { | ||
181 | aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad, | ||
182 | aad_len, auth_tag, auth_tag_len); | ||
183 | } else { | ||
184 | aesni_gcm_precomp_avx_gen2(ctx, hash_subkey); | ||
185 | aesni_gcm_enc_avx_gen2(ctx, out, in, plaintext_len, iv, aad, | ||
186 | aad_len, auth_tag, auth_tag_len); | ||
187 | } | ||
188 | } | ||
189 | |||
190 | static void aesni_gcm_dec_avx(void *ctx, u8 *out, | ||
191 | const u8 *in, unsigned long ciphertext_len, u8 *iv, | ||
192 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, | ||
193 | u8 *auth_tag, unsigned long auth_tag_len) | ||
194 | { | ||
195 | if (ciphertext_len < AVX_GEN2_OPTSIZE) { | ||
196 | aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey, aad, | ||
197 | aad_len, auth_tag, auth_tag_len); | ||
198 | } else { | ||
199 | aesni_gcm_precomp_avx_gen2(ctx, hash_subkey); | ||
200 | aesni_gcm_dec_avx_gen2(ctx, out, in, ciphertext_len, iv, aad, | ||
201 | aad_len, auth_tag, auth_tag_len); | ||
202 | } | ||
203 | } | ||
204 | #endif | ||
205 | |||
206 | #ifdef CONFIG_AS_AVX2 | ||
207 | /* | ||
208 | * asmlinkage void aesni_gcm_precomp_avx_gen4() | ||
209 | * gcm_data *my_ctx_data, context data | ||
210 | * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary. | ||
211 | */ | ||
212 | asmlinkage void aesni_gcm_precomp_avx_gen4(void *my_ctx_data, u8 *hash_subkey); | ||
213 | |||
214 | asmlinkage void aesni_gcm_enc_avx_gen4(void *ctx, u8 *out, | ||
215 | const u8 *in, unsigned long plaintext_len, u8 *iv, | ||
216 | const u8 *aad, unsigned long aad_len, | ||
217 | u8 *auth_tag, unsigned long auth_tag_len); | ||
218 | |||
219 | asmlinkage void aesni_gcm_dec_avx_gen4(void *ctx, u8 *out, | ||
220 | const u8 *in, unsigned long ciphertext_len, u8 *iv, | ||
221 | const u8 *aad, unsigned long aad_len, | ||
222 | u8 *auth_tag, unsigned long auth_tag_len); | ||
223 | |||
224 | static void aesni_gcm_enc_avx2(void *ctx, u8 *out, | ||
225 | const u8 *in, unsigned long plaintext_len, u8 *iv, | ||
226 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, | ||
227 | u8 *auth_tag, unsigned long auth_tag_len) | ||
228 | { | ||
229 | if (plaintext_len < AVX_GEN2_OPTSIZE) { | ||
230 | aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad, | ||
231 | aad_len, auth_tag, auth_tag_len); | ||
232 | } else if (plaintext_len < AVX_GEN4_OPTSIZE) { | ||
233 | aesni_gcm_precomp_avx_gen2(ctx, hash_subkey); | ||
234 | aesni_gcm_enc_avx_gen2(ctx, out, in, plaintext_len, iv, aad, | ||
235 | aad_len, auth_tag, auth_tag_len); | ||
236 | } else { | ||
237 | aesni_gcm_precomp_avx_gen4(ctx, hash_subkey); | ||
238 | aesni_gcm_enc_avx_gen4(ctx, out, in, plaintext_len, iv, aad, | ||
239 | aad_len, auth_tag, auth_tag_len); | ||
240 | } | ||
241 | } | ||
242 | |||
243 | static void aesni_gcm_dec_avx2(void *ctx, u8 *out, | ||
244 | const u8 *in, unsigned long ciphertext_len, u8 *iv, | ||
245 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, | ||
246 | u8 *auth_tag, unsigned long auth_tag_len) | ||
247 | { | ||
248 | if (ciphertext_len < AVX_GEN2_OPTSIZE) { | ||
249 | aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey, | ||
250 | aad, aad_len, auth_tag, auth_tag_len); | ||
251 | } else if (ciphertext_len < AVX_GEN4_OPTSIZE) { | ||
252 | aesni_gcm_precomp_avx_gen2(ctx, hash_subkey); | ||
253 | aesni_gcm_dec_avx_gen2(ctx, out, in, ciphertext_len, iv, aad, | ||
254 | aad_len, auth_tag, auth_tag_len); | ||
255 | } else { | ||
256 | aesni_gcm_precomp_avx_gen4(ctx, hash_subkey); | ||
257 | aesni_gcm_dec_avx_gen4(ctx, out, in, ciphertext_len, iv, aad, | ||
258 | aad_len, auth_tag, auth_tag_len); | ||
259 | } | ||
260 | } | ||
261 | #endif | ||
262 | |||
263 | static void (*aesni_gcm_enc_tfm)(void *ctx, u8 *out, | ||
264 | const u8 *in, unsigned long plaintext_len, u8 *iv, | ||
265 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, | ||
266 | u8 *auth_tag, unsigned long auth_tag_len); | ||
267 | |||
268 | static void (*aesni_gcm_dec_tfm)(void *ctx, u8 *out, | ||
269 | const u8 *in, unsigned long ciphertext_len, u8 *iv, | ||
270 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, | ||
271 | u8 *auth_tag, unsigned long auth_tag_len); | ||
272 | |||
153 | static inline struct | 273 | static inline struct |
154 | aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm) | 274 | aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm) |
155 | { | 275 | { |
@@ -915,7 +1035,7 @@ static int __driver_rfc4106_encrypt(struct aead_request *req) | |||
915 | dst = src; | 1035 | dst = src; |
916 | } | 1036 | } |
917 | 1037 | ||
918 | aesni_gcm_enc(aes_ctx, dst, src, (unsigned long)req->cryptlen, iv, | 1038 | aesni_gcm_enc_tfm(aes_ctx, dst, src, (unsigned long)req->cryptlen, iv, |
919 | ctx->hash_subkey, assoc, (unsigned long)req->assoclen, dst | 1039 | ctx->hash_subkey, assoc, (unsigned long)req->assoclen, dst |
920 | + ((unsigned long)req->cryptlen), auth_tag_len); | 1040 | + ((unsigned long)req->cryptlen), auth_tag_len); |
921 | 1041 | ||
@@ -996,12 +1116,12 @@ static int __driver_rfc4106_decrypt(struct aead_request *req) | |||
996 | dst = src; | 1116 | dst = src; |
997 | } | 1117 | } |
998 | 1118 | ||
999 | aesni_gcm_dec(aes_ctx, dst, src, tempCipherLen, iv, | 1119 | aesni_gcm_dec_tfm(aes_ctx, dst, src, tempCipherLen, iv, |
1000 | ctx->hash_subkey, assoc, (unsigned long)req->assoclen, | 1120 | ctx->hash_subkey, assoc, (unsigned long)req->assoclen, |
1001 | authTag, auth_tag_len); | 1121 | authTag, auth_tag_len); |
1002 | 1122 | ||
1003 | /* Compare generated tag with passed in tag. */ | 1123 | /* Compare generated tag with passed in tag. */ |
1004 | retval = memcmp(src + tempCipherLen, authTag, auth_tag_len) ? | 1124 | retval = crypto_memneq(src + tempCipherLen, authTag, auth_tag_len) ? |
1005 | -EBADMSG : 0; | 1125 | -EBADMSG : 0; |
1006 | 1126 | ||
1007 | if (one_entry_in_sg) { | 1127 | if (one_entry_in_sg) { |
@@ -1353,6 +1473,27 @@ static int __init aesni_init(void) | |||
1353 | 1473 | ||
1354 | if (!x86_match_cpu(aesni_cpu_id)) | 1474 | if (!x86_match_cpu(aesni_cpu_id)) |
1355 | return -ENODEV; | 1475 | return -ENODEV; |
1476 | #ifdef CONFIG_X86_64 | ||
1477 | #ifdef CONFIG_AS_AVX2 | ||
1478 | if (boot_cpu_has(X86_FEATURE_AVX2)) { | ||
1479 | pr_info("AVX2 version of gcm_enc/dec engaged.\n"); | ||
1480 | aesni_gcm_enc_tfm = aesni_gcm_enc_avx2; | ||
1481 | aesni_gcm_dec_tfm = aesni_gcm_dec_avx2; | ||
1482 | } else | ||
1483 | #endif | ||
1484 | #ifdef CONFIG_AS_AVX | ||
1485 | if (boot_cpu_has(X86_FEATURE_AVX)) { | ||
1486 | pr_info("AVX version of gcm_enc/dec engaged.\n"); | ||
1487 | aesni_gcm_enc_tfm = aesni_gcm_enc_avx; | ||
1488 | aesni_gcm_dec_tfm = aesni_gcm_dec_avx; | ||
1489 | } else | ||
1490 | #endif | ||
1491 | { | ||
1492 | pr_info("SSE version of gcm_enc/dec engaged.\n"); | ||
1493 | aesni_gcm_enc_tfm = aesni_gcm_enc; | ||
1494 | aesni_gcm_dec_tfm = aesni_gcm_dec; | ||
1495 | } | ||
1496 | #endif | ||
1356 | 1497 | ||
1357 | err = crypto_fpu_init(); | 1498 | err = crypto_fpu_init(); |
1358 | if (err) | 1499 | if (err) |
diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h index 0d9ec770f2f8..e6a92455740e 100644 --- a/arch/x86/include/asm/archrandom.h +++ b/arch/x86/include/asm/archrandom.h | |||
@@ -39,6 +39,20 @@ | |||
39 | 39 | ||
40 | #ifdef CONFIG_ARCH_RANDOM | 40 | #ifdef CONFIG_ARCH_RANDOM |
41 | 41 | ||
42 | /* Instead of arch_get_random_long() when alternatives haven't run. */ | ||
43 | static inline int rdrand_long(unsigned long *v) | ||
44 | { | ||
45 | int ok; | ||
46 | asm volatile("1: " RDRAND_LONG "\n\t" | ||
47 | "jc 2f\n\t" | ||
48 | "decl %0\n\t" | ||
49 | "jnz 1b\n\t" | ||
50 | "2:" | ||
51 | : "=r" (ok), "=a" (*v) | ||
52 | : "0" (RDRAND_RETRY_LOOPS)); | ||
53 | return ok; | ||
54 | } | ||
55 | |||
42 | #define GET_RANDOM(name, type, rdrand, nop) \ | 56 | #define GET_RANDOM(name, type, rdrand, nop) \ |
43 | static inline int name(type *v) \ | 57 | static inline int name(type *v) \ |
44 | { \ | 58 | { \ |
@@ -68,6 +82,13 @@ GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP3); | |||
68 | 82 | ||
69 | #endif /* CONFIG_X86_64 */ | 83 | #endif /* CONFIG_X86_64 */ |
70 | 84 | ||
85 | #else | ||
86 | |||
87 | static inline int rdrand_long(unsigned long *v) | ||
88 | { | ||
89 | return 0; | ||
90 | } | ||
91 | |||
71 | #endif /* CONFIG_ARCH_RANDOM */ | 92 | #endif /* CONFIG_ARCH_RANDOM */ |
72 | 93 | ||
73 | extern void x86_init_rdrand(struct cpuinfo_x86 *c); | 94 | extern void x86_init_rdrand(struct cpuinfo_x86 *c); |
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index c6cd358a1eec..04a48903b2eb 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h | |||
@@ -92,12 +92,53 @@ | |||
92 | #endif | 92 | #endif |
93 | #define smp_read_barrier_depends() read_barrier_depends() | 93 | #define smp_read_barrier_depends() read_barrier_depends() |
94 | #define set_mb(var, value) do { (void)xchg(&var, value); } while (0) | 94 | #define set_mb(var, value) do { (void)xchg(&var, value); } while (0) |
95 | #else | 95 | #else /* !SMP */ |
96 | #define smp_mb() barrier() | 96 | #define smp_mb() barrier() |
97 | #define smp_rmb() barrier() | 97 | #define smp_rmb() barrier() |
98 | #define smp_wmb() barrier() | 98 | #define smp_wmb() barrier() |
99 | #define smp_read_barrier_depends() do { } while (0) | 99 | #define smp_read_barrier_depends() do { } while (0) |
100 | #define set_mb(var, value) do { var = value; barrier(); } while (0) | 100 | #define set_mb(var, value) do { var = value; barrier(); } while (0) |
101 | #endif /* SMP */ | ||
102 | |||
103 | #if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE) | ||
104 | |||
105 | /* | ||
106 | * For either of these options x86 doesn't have a strong TSO memory | ||
107 | * model and we should fall back to full barriers. | ||
108 | */ | ||
109 | |||
110 | #define smp_store_release(p, v) \ | ||
111 | do { \ | ||
112 | compiletime_assert_atomic_type(*p); \ | ||
113 | smp_mb(); \ | ||
114 | ACCESS_ONCE(*p) = (v); \ | ||
115 | } while (0) | ||
116 | |||
117 | #define smp_load_acquire(p) \ | ||
118 | ({ \ | ||
119 | typeof(*p) ___p1 = ACCESS_ONCE(*p); \ | ||
120 | compiletime_assert_atomic_type(*p); \ | ||
121 | smp_mb(); \ | ||
122 | ___p1; \ | ||
123 | }) | ||
124 | |||
125 | #else /* regular x86 TSO memory ordering */ | ||
126 | |||
127 | #define smp_store_release(p, v) \ | ||
128 | do { \ | ||
129 | compiletime_assert_atomic_type(*p); \ | ||
130 | barrier(); \ | ||
131 | ACCESS_ONCE(*p) = (v); \ | ||
132 | } while (0) | ||
133 | |||
134 | #define smp_load_acquire(p) \ | ||
135 | ({ \ | ||
136 | typeof(*p) ___p1 = ACCESS_ONCE(*p); \ | ||
137 | compiletime_assert_atomic_type(*p); \ | ||
138 | barrier(); \ | ||
139 | ___p1; \ | ||
140 | }) | ||
141 | |||
101 | #endif | 142 | #endif |
102 | 143 | ||
103 | /* | 144 | /* |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 89270b4318db..e099f9502ace 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
@@ -216,6 +216,7 @@ | |||
216 | #define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */ | 216 | #define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */ |
217 | #define X86_FEATURE_INVPCID (9*32+10) /* Invalidate Processor Context ID */ | 217 | #define X86_FEATURE_INVPCID (9*32+10) /* Invalidate Processor Context ID */ |
218 | #define X86_FEATURE_RTM (9*32+11) /* Restricted Transactional Memory */ | 218 | #define X86_FEATURE_RTM (9*32+11) /* Restricted Transactional Memory */ |
219 | #define X86_FEATURE_MPX (9*32+14) /* Memory Protection Extension */ | ||
219 | #define X86_FEATURE_RDSEED (9*32+18) /* The RDSEED instruction */ | 220 | #define X86_FEATURE_RDSEED (9*32+18) /* The RDSEED instruction */ |
220 | #define X86_FEATURE_ADX (9*32+19) /* The ADCX and ADOX instructions */ | 221 | #define X86_FEATURE_ADX (9*32+19) /* The ADCX and ADOX instructions */ |
221 | #define X86_FEATURE_SMAP (9*32+20) /* Supervisor Mode Access Prevention */ | 222 | #define X86_FEATURE_SMAP (9*32+20) /* Supervisor Mode Access Prevention */ |
diff --git a/arch/x86/include/asm/dmi.h b/arch/x86/include/asm/dmi.h index fd8f9e2ca35f..535192f6bfad 100644 --- a/arch/x86/include/asm/dmi.h +++ b/arch/x86/include/asm/dmi.h | |||
@@ -13,7 +13,9 @@ static __always_inline __init void *dmi_alloc(unsigned len) | |||
13 | } | 13 | } |
14 | 14 | ||
15 | /* Use early IO mappings for DMI because it's initialized early */ | 15 | /* Use early IO mappings for DMI because it's initialized early */ |
16 | #define dmi_ioremap early_ioremap | 16 | #define dmi_early_remap early_ioremap |
17 | #define dmi_iounmap early_iounmap | 17 | #define dmi_early_unmap early_iounmap |
18 | #define dmi_remap ioremap | ||
19 | #define dmi_unmap iounmap | ||
18 | 20 | ||
19 | #endif /* _ASM_X86_DMI_H */ | 21 | #endif /* _ASM_X86_DMI_H */ |
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 65c6e6e3a552..3b978c472d08 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h | |||
@@ -1,6 +1,24 @@ | |||
1 | #ifndef _ASM_X86_EFI_H | 1 | #ifndef _ASM_X86_EFI_H |
2 | #define _ASM_X86_EFI_H | 2 | #define _ASM_X86_EFI_H |
3 | 3 | ||
4 | /* | ||
5 | * We map the EFI regions needed for runtime services non-contiguously, | ||
6 | * with preserved alignment on virtual addresses starting from -4G down | ||
7 | * for a total max space of 64G. This way, we provide for stable runtime | ||
8 | * services addresses across kernels so that a kexec'd kernel can still | ||
9 | * use them. | ||
10 | * | ||
11 | * This is the main reason why we're doing stable VA mappings for RT | ||
12 | * services. | ||
13 | * | ||
14 | * This flag is used in conjuction with a chicken bit called | ||
15 | * "efi=old_map" which can be used as a fallback to the old runtime | ||
16 | * services mapping method in case there's some b0rkage with a | ||
17 | * particular EFI implementation (haha, it is hard to hold up the | ||
18 | * sarcasm here...). | ||
19 | */ | ||
20 | #define EFI_OLD_MEMMAP EFI_ARCH_1 | ||
21 | |||
4 | #ifdef CONFIG_X86_32 | 22 | #ifdef CONFIG_X86_32 |
5 | 23 | ||
6 | #define EFI_LOADER_SIGNATURE "EL32" | 24 | #define EFI_LOADER_SIGNATURE "EL32" |
@@ -69,24 +87,31 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3, | |||
69 | efi_call6((f), (u64)(a1), (u64)(a2), (u64)(a3), \ | 87 | efi_call6((f), (u64)(a1), (u64)(a2), (u64)(a3), \ |
70 | (u64)(a4), (u64)(a5), (u64)(a6)) | 88 | (u64)(a4), (u64)(a5), (u64)(a6)) |
71 | 89 | ||
90 | #define _efi_call_virtX(x, f, ...) \ | ||
91 | ({ \ | ||
92 | efi_status_t __s; \ | ||
93 | \ | ||
94 | efi_sync_low_kernel_mappings(); \ | ||
95 | preempt_disable(); \ | ||
96 | __s = efi_call##x((void *)efi.systab->runtime->f, __VA_ARGS__); \ | ||
97 | preempt_enable(); \ | ||
98 | __s; \ | ||
99 | }) | ||
100 | |||
72 | #define efi_call_virt0(f) \ | 101 | #define efi_call_virt0(f) \ |
73 | efi_call0((efi.systab->runtime->f)) | 102 | _efi_call_virtX(0, f) |
74 | #define efi_call_virt1(f, a1) \ | 103 | #define efi_call_virt1(f, a1) \ |
75 | efi_call1((efi.systab->runtime->f), (u64)(a1)) | 104 | _efi_call_virtX(1, f, (u64)(a1)) |
76 | #define efi_call_virt2(f, a1, a2) \ | 105 | #define efi_call_virt2(f, a1, a2) \ |
77 | efi_call2((efi.systab->runtime->f), (u64)(a1), (u64)(a2)) | 106 | _efi_call_virtX(2, f, (u64)(a1), (u64)(a2)) |
78 | #define efi_call_virt3(f, a1, a2, a3) \ | 107 | #define efi_call_virt3(f, a1, a2, a3) \ |
79 | efi_call3((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ | 108 | _efi_call_virtX(3, f, (u64)(a1), (u64)(a2), (u64)(a3)) |
80 | (u64)(a3)) | 109 | #define efi_call_virt4(f, a1, a2, a3, a4) \ |
81 | #define efi_call_virt4(f, a1, a2, a3, a4) \ | 110 | _efi_call_virtX(4, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4)) |
82 | efi_call4((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ | 111 | #define efi_call_virt5(f, a1, a2, a3, a4, a5) \ |
83 | (u64)(a3), (u64)(a4)) | 112 | _efi_call_virtX(5, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4), (u64)(a5)) |
84 | #define efi_call_virt5(f, a1, a2, a3, a4, a5) \ | 113 | #define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \ |
85 | efi_call5((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ | 114 | _efi_call_virtX(6, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6)) |
86 | (u64)(a3), (u64)(a4), (u64)(a5)) | ||
87 | #define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \ | ||
88 | efi_call6((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ | ||
89 | (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6)) | ||
90 | 115 | ||
91 | extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, | 116 | extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, |
92 | u32 type, u64 attribute); | 117 | u32 type, u64 attribute); |
@@ -95,12 +120,28 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, | |||
95 | 120 | ||
96 | extern int add_efi_memmap; | 121 | extern int add_efi_memmap; |
97 | extern unsigned long x86_efi_facility; | 122 | extern unsigned long x86_efi_facility; |
123 | extern struct efi_scratch efi_scratch; | ||
98 | extern void efi_set_executable(efi_memory_desc_t *md, bool executable); | 124 | extern void efi_set_executable(efi_memory_desc_t *md, bool executable); |
99 | extern int efi_memblock_x86_reserve_range(void); | 125 | extern int efi_memblock_x86_reserve_range(void); |
100 | extern void efi_call_phys_prelog(void); | 126 | extern void efi_call_phys_prelog(void); |
101 | extern void efi_call_phys_epilog(void); | 127 | extern void efi_call_phys_epilog(void); |
102 | extern void efi_unmap_memmap(void); | 128 | extern void efi_unmap_memmap(void); |
103 | extern void efi_memory_uc(u64 addr, unsigned long size); | 129 | extern void efi_memory_uc(u64 addr, unsigned long size); |
130 | extern void __init efi_map_region(efi_memory_desc_t *md); | ||
131 | extern void __init efi_map_region_fixed(efi_memory_desc_t *md); | ||
132 | extern void efi_sync_low_kernel_mappings(void); | ||
133 | extern void efi_setup_page_tables(void); | ||
134 | extern void __init old_map_region(efi_memory_desc_t *md); | ||
135 | |||
136 | struct efi_setup_data { | ||
137 | u64 fw_vendor; | ||
138 | u64 runtime; | ||
139 | u64 tables; | ||
140 | u64 smbios; | ||
141 | u64 reserved[8]; | ||
142 | }; | ||
143 | |||
144 | extern u64 efi_setup; | ||
104 | 145 | ||
105 | #ifdef CONFIG_EFI | 146 | #ifdef CONFIG_EFI |
106 | 147 | ||
@@ -110,7 +151,7 @@ static inline bool efi_is_native(void) | |||
110 | } | 151 | } |
111 | 152 | ||
112 | extern struct console early_efi_console; | 153 | extern struct console early_efi_console; |
113 | 154 | extern void parse_efi_setup(u64 phys_addr, u32 data_len); | |
114 | #else | 155 | #else |
115 | /* | 156 | /* |
116 | * IF EFI is not configured, have the EFI calls return -ENOSYS. | 157 | * IF EFI is not configured, have the EFI calls return -ENOSYS. |
@@ -122,6 +163,7 @@ extern struct console early_efi_console; | |||
122 | #define efi_call4(_f, _a1, _a2, _a3, _a4) (-ENOSYS) | 163 | #define efi_call4(_f, _a1, _a2, _a3, _a4) (-ENOSYS) |
123 | #define efi_call5(_f, _a1, _a2, _a3, _a4, _a5) (-ENOSYS) | 164 | #define efi_call5(_f, _a1, _a2, _a3, _a4, _a5) (-ENOSYS) |
124 | #define efi_call6(_f, _a1, _a2, _a3, _a4, _a5, _a6) (-ENOSYS) | 165 | #define efi_call6(_f, _a1, _a2, _a3, _a4, _a5, _a6) (-ENOSYS) |
166 | static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {} | ||
125 | #endif /* CONFIG_EFI */ | 167 | #endif /* CONFIG_EFI */ |
126 | 168 | ||
127 | #endif /* _ASM_X86_EFI_H */ | 169 | #endif /* _ASM_X86_EFI_H */ |
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index e846225265ed..7252cd339175 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h | |||
@@ -175,64 +175,7 @@ static inline void __set_fixmap(enum fixed_addresses idx, | |||
175 | } | 175 | } |
176 | #endif | 176 | #endif |
177 | 177 | ||
178 | #define set_fixmap(idx, phys) \ | 178 | #include <asm-generic/fixmap.h> |
179 | __set_fixmap(idx, phys, PAGE_KERNEL) | ||
180 | |||
181 | /* | ||
182 | * Some hardware wants to get fixmapped without caching. | ||
183 | */ | ||
184 | #define set_fixmap_nocache(idx, phys) \ | ||
185 | __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE) | ||
186 | |||
187 | #define clear_fixmap(idx) \ | ||
188 | __set_fixmap(idx, 0, __pgprot(0)) | ||
189 | |||
190 | #define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) | ||
191 | #define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) | ||
192 | |||
193 | extern void __this_fixmap_does_not_exist(void); | ||
194 | |||
195 | /* | ||
196 | * 'index to address' translation. If anyone tries to use the idx | ||
197 | * directly without translation, we catch the bug with a NULL-deference | ||
198 | * kernel oops. Illegal ranges of incoming indices are caught too. | ||
199 | */ | ||
200 | static __always_inline unsigned long fix_to_virt(const unsigned int idx) | ||
201 | { | ||
202 | /* | ||
203 | * this branch gets completely eliminated after inlining, | ||
204 | * except when someone tries to use fixaddr indices in an | ||
205 | * illegal way. (such as mixing up address types or using | ||
206 | * out-of-range indices). | ||
207 | * | ||
208 | * If it doesn't get removed, the linker will complain | ||
209 | * loudly with a reasonably clear error message.. | ||
210 | */ | ||
211 | if (idx >= __end_of_fixed_addresses) | ||
212 | __this_fixmap_does_not_exist(); | ||
213 | |||
214 | return __fix_to_virt(idx); | ||
215 | } | ||
216 | |||
217 | static inline unsigned long virt_to_fix(const unsigned long vaddr) | ||
218 | { | ||
219 | BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); | ||
220 | return __virt_to_fix(vaddr); | ||
221 | } | ||
222 | |||
223 | /* Return an pointer with offset calculated */ | ||
224 | static __always_inline unsigned long | ||
225 | __set_fixmap_offset(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) | ||
226 | { | ||
227 | __set_fixmap(idx, phys, flags); | ||
228 | return fix_to_virt(idx) + (phys & (PAGE_SIZE - 1)); | ||
229 | } | ||
230 | |||
231 | #define set_fixmap_offset(idx, phys) \ | ||
232 | __set_fixmap_offset(idx, phys, PAGE_KERNEL) | ||
233 | |||
234 | #define set_fixmap_offset_nocache(idx, phys) \ | ||
235 | __set_fixmap_offset(idx, phys, PAGE_KERNEL_NOCACHE) | ||
236 | 179 | ||
237 | #endif /* !__ASSEMBLY__ */ | 180 | #endif /* !__ASSEMBLY__ */ |
238 | #endif /* _ASM_X86_FIXMAP_H */ | 181 | #endif /* _ASM_X86_FIXMAP_H */ |
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index c49a613c6452..cea1c76d49bf 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h | |||
@@ -293,12 +293,13 @@ static inline int restore_fpu_checking(struct task_struct *tsk) | |||
293 | /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception | 293 | /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception |
294 | is pending. Clear the x87 state here by setting it to fixed | 294 | is pending. Clear the x87 state here by setting it to fixed |
295 | values. "m" is a random variable that should be in L1 */ | 295 | values. "m" is a random variable that should be in L1 */ |
296 | alternative_input( | 296 | if (unlikely(static_cpu_has(X86_FEATURE_FXSAVE_LEAK))) { |
297 | ASM_NOP8 ASM_NOP2, | 297 | asm volatile( |
298 | "emms\n\t" /* clear stack tags */ | 298 | "fnclex\n\t" |
299 | "fildl %P[addr]", /* set F?P to defined value */ | 299 | "emms\n\t" |
300 | X86_FEATURE_FXSAVE_LEAK, | 300 | "fildl %P[addr]" /* set F?P to defined value */ |
301 | [addr] "m" (tsk->thread.fpu.has_fpu)); | 301 | : : [addr] "m" (tsk->thread.fpu.has_fpu)); |
302 | } | ||
302 | 303 | ||
303 | return fpu_restore_checking(&tsk->thread.fpu); | 304 | return fpu_restore_checking(&tsk->thread.fpu); |
304 | } | 305 | } |
diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h index be27ba1e947a..b4c1f5453436 100644 --- a/arch/x86/include/asm/futex.h +++ b/arch/x86/include/asm/futex.h | |||
@@ -110,26 +110,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) | |||
110 | static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, | 110 | static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, |
111 | u32 oldval, u32 newval) | 111 | u32 oldval, u32 newval) |
112 | { | 112 | { |
113 | int ret = 0; | 113 | return user_atomic_cmpxchg_inatomic(uval, uaddr, oldval, newval); |
114 | |||
115 | if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) | ||
116 | return -EFAULT; | ||
117 | |||
118 | asm volatile("\t" ASM_STAC "\n" | ||
119 | "1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n" | ||
120 | "2:\t" ASM_CLAC "\n" | ||
121 | "\t.section .fixup, \"ax\"\n" | ||
122 | "3:\tmov %3, %0\n" | ||
123 | "\tjmp 2b\n" | ||
124 | "\t.previous\n" | ||
125 | _ASM_EXTABLE(1b, 3b) | ||
126 | : "+r" (ret), "=a" (oldval), "+m" (*uaddr) | ||
127 | : "i" (-EFAULT), "r" (newval), "1" (oldval) | ||
128 | : "memory" | ||
129 | ); | ||
130 | |||
131 | *uval = oldval; | ||
132 | return ret; | ||
133 | } | 114 | } |
134 | 115 | ||
135 | #endif | 116 | #endif |
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index cba45d99ac1a..67d69b8e2d20 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h | |||
@@ -191,6 +191,9 @@ extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void); | |||
191 | #define trace_interrupt interrupt | 191 | #define trace_interrupt interrupt |
192 | #endif | 192 | #endif |
193 | 193 | ||
194 | #define VECTOR_UNDEFINED -1 | ||
195 | #define VECTOR_RETRIGGERED -2 | ||
196 | |||
194 | typedef int vector_irq_t[NR_VECTORS]; | 197 | typedef int vector_irq_t[NR_VECTORS]; |
195 | DECLARE_PER_CPU(vector_irq_t, vector_irq); | 198 | DECLARE_PER_CPU(vector_irq_t, vector_irq); |
196 | extern void setup_vector_irq(int cpu); | 199 | extern void setup_vector_irq(int cpu); |
diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h index 459769d39263..e34e097b6f9d 100644 --- a/arch/x86/include/asm/intel-mid.h +++ b/arch/x86/include/asm/intel-mid.h | |||
@@ -51,10 +51,41 @@ struct devs_id { | |||
51 | enum intel_mid_cpu_type { | 51 | enum intel_mid_cpu_type { |
52 | /* 1 was Moorestown */ | 52 | /* 1 was Moorestown */ |
53 | INTEL_MID_CPU_CHIP_PENWELL = 2, | 53 | INTEL_MID_CPU_CHIP_PENWELL = 2, |
54 | INTEL_MID_CPU_CHIP_CLOVERVIEW, | ||
55 | INTEL_MID_CPU_CHIP_TANGIER, | ||
54 | }; | 56 | }; |
55 | 57 | ||
56 | extern enum intel_mid_cpu_type __intel_mid_cpu_chip; | 58 | extern enum intel_mid_cpu_type __intel_mid_cpu_chip; |
57 | 59 | ||
60 | /** | ||
61 | * struct intel_mid_ops - Interface between intel-mid & sub archs | ||
62 | * @arch_setup: arch_setup function to re-initialize platform | ||
63 | * structures (x86_init, x86_platform_init) | ||
64 | * | ||
65 | * This structure can be extended if any new interface is required | ||
66 | * between intel-mid & its sub arch files. | ||
67 | */ | ||
68 | struct intel_mid_ops { | ||
69 | void (*arch_setup)(void); | ||
70 | }; | ||
71 | |||
72 | /* Helper API's for INTEL_MID_OPS_INIT */ | ||
73 | #define DECLARE_INTEL_MID_OPS_INIT(cpuname, cpuid) \ | ||
74 | [cpuid] = get_##cpuname##_ops | ||
75 | |||
76 | /* Maximum number of CPU ops */ | ||
77 | #define MAX_CPU_OPS(a) (sizeof(a)/sizeof(void *)) | ||
78 | |||
79 | /* | ||
80 | * For every new cpu addition, a weak get_<cpuname>_ops() function needs be | ||
81 | * declared in arch/x86/platform/intel_mid/intel_mid_weak_decls.h. | ||
82 | */ | ||
83 | #define INTEL_MID_OPS_INIT {\ | ||
84 | DECLARE_INTEL_MID_OPS_INIT(penwell, INTEL_MID_CPU_CHIP_PENWELL), \ | ||
85 | DECLARE_INTEL_MID_OPS_INIT(cloverview, INTEL_MID_CPU_CHIP_CLOVERVIEW), \ | ||
86 | DECLARE_INTEL_MID_OPS_INIT(tangier, INTEL_MID_CPU_CHIP_TANGIER) \ | ||
87 | }; | ||
88 | |||
58 | #ifdef CONFIG_X86_INTEL_MID | 89 | #ifdef CONFIG_X86_INTEL_MID |
59 | 90 | ||
60 | static inline enum intel_mid_cpu_type intel_mid_identify_cpu(void) | 91 | static inline enum intel_mid_cpu_type intel_mid_identify_cpu(void) |
@@ -86,8 +117,21 @@ extern enum intel_mid_timer_options intel_mid_timer_options; | |||
86 | * Penwell uses spread spectrum clock, so the freq number is not exactly | 117 | * Penwell uses spread spectrum clock, so the freq number is not exactly |
87 | * the same as reported by MSR based on SDM. | 118 | * the same as reported by MSR based on SDM. |
88 | */ | 119 | */ |
89 | #define PENWELL_FSB_FREQ_83SKU 83200 | 120 | #define FSB_FREQ_83SKU 83200 |
90 | #define PENWELL_FSB_FREQ_100SKU 99840 | 121 | #define FSB_FREQ_100SKU 99840 |
122 | #define FSB_FREQ_133SKU 133000 | ||
123 | |||
124 | #define FSB_FREQ_167SKU 167000 | ||
125 | #define FSB_FREQ_200SKU 200000 | ||
126 | #define FSB_FREQ_267SKU 267000 | ||
127 | #define FSB_FREQ_333SKU 333000 | ||
128 | #define FSB_FREQ_400SKU 400000 | ||
129 | |||
130 | /* Bus Select SoC Fuse value */ | ||
131 | #define BSEL_SOC_FUSE_MASK 0x7 | ||
132 | #define BSEL_SOC_FUSE_001 0x1 /* FSB 133MHz */ | ||
133 | #define BSEL_SOC_FUSE_101 0x5 /* FSB 100MHz */ | ||
134 | #define BSEL_SOC_FUSE_111 0x7 /* FSB 83MHz */ | ||
91 | 135 | ||
92 | #define SFI_MTMR_MAX_NUM 8 | 136 | #define SFI_MTMR_MAX_NUM 8 |
93 | #define SFI_MRTC_MAX 8 | 137 | #define SFI_MRTC_MAX 8 |
diff --git a/arch/x86/include/asm/iosf_mbi.h b/arch/x86/include/asm/iosf_mbi.h new file mode 100644 index 000000000000..8e71c7941767 --- /dev/null +++ b/arch/x86/include/asm/iosf_mbi.h | |||
@@ -0,0 +1,90 @@ | |||
1 | /* | ||
2 | * iosf_mbi.h: Intel OnChip System Fabric MailBox access support | ||
3 | */ | ||
4 | |||
5 | #ifndef IOSF_MBI_SYMS_H | ||
6 | #define IOSF_MBI_SYMS_H | ||
7 | |||
8 | #define MBI_MCR_OFFSET 0xD0 | ||
9 | #define MBI_MDR_OFFSET 0xD4 | ||
10 | #define MBI_MCRX_OFFSET 0xD8 | ||
11 | |||
12 | #define MBI_RD_MASK 0xFEFFFFFF | ||
13 | #define MBI_WR_MASK 0X01000000 | ||
14 | |||
15 | #define MBI_MASK_HI 0xFFFFFF00 | ||
16 | #define MBI_MASK_LO 0x000000FF | ||
17 | #define MBI_ENABLE 0xF0 | ||
18 | |||
19 | /* Baytrail available units */ | ||
20 | #define BT_MBI_UNIT_AUNIT 0x00 | ||
21 | #define BT_MBI_UNIT_SMC 0x01 | ||
22 | #define BT_MBI_UNIT_CPU 0x02 | ||
23 | #define BT_MBI_UNIT_BUNIT 0x03 | ||
24 | #define BT_MBI_UNIT_PMC 0x04 | ||
25 | #define BT_MBI_UNIT_GFX 0x06 | ||
26 | #define BT_MBI_UNIT_SMI 0x0C | ||
27 | #define BT_MBI_UNIT_USB 0x43 | ||
28 | #define BT_MBI_UNIT_SATA 0xA3 | ||
29 | #define BT_MBI_UNIT_PCIE 0xA6 | ||
30 | |||
31 | /* Baytrail read/write opcodes */ | ||
32 | #define BT_MBI_AUNIT_READ 0x10 | ||
33 | #define BT_MBI_AUNIT_WRITE 0x11 | ||
34 | #define BT_MBI_SMC_READ 0x10 | ||
35 | #define BT_MBI_SMC_WRITE 0x11 | ||
36 | #define BT_MBI_CPU_READ 0x10 | ||
37 | #define BT_MBI_CPU_WRITE 0x11 | ||
38 | #define BT_MBI_BUNIT_READ 0x10 | ||
39 | #define BT_MBI_BUNIT_WRITE 0x11 | ||
40 | #define BT_MBI_PMC_READ 0x06 | ||
41 | #define BT_MBI_PMC_WRITE 0x07 | ||
42 | #define BT_MBI_GFX_READ 0x00 | ||
43 | #define BT_MBI_GFX_WRITE 0x01 | ||
44 | #define BT_MBI_SMIO_READ 0x06 | ||
45 | #define BT_MBI_SMIO_WRITE 0x07 | ||
46 | #define BT_MBI_USB_READ 0x06 | ||
47 | #define BT_MBI_USB_WRITE 0x07 | ||
48 | #define BT_MBI_SATA_READ 0x00 | ||
49 | #define BT_MBI_SATA_WRITE 0x01 | ||
50 | #define BT_MBI_PCIE_READ 0x00 | ||
51 | #define BT_MBI_PCIE_WRITE 0x01 | ||
52 | |||
53 | /** | ||
54 | * iosf_mbi_read() - MailBox Interface read command | ||
55 | * @port: port indicating subunit being accessed | ||
56 | * @opcode: port specific read or write opcode | ||
57 | * @offset: register address offset | ||
58 | * @mdr: register data to be read | ||
59 | * | ||
60 | * Locking is handled by spinlock - cannot sleep. | ||
61 | * Return: Nonzero on error | ||
62 | */ | ||
63 | int iosf_mbi_read(u8 port, u8 opcode, u32 offset, u32 *mdr); | ||
64 | |||
65 | /** | ||
66 | * iosf_mbi_write() - MailBox unmasked write command | ||
67 | * @port: port indicating subunit being accessed | ||
68 | * @opcode: port specific read or write opcode | ||
69 | * @offset: register address offset | ||
70 | * @mdr: register data to be written | ||
71 | * | ||
72 | * Locking is handled by spinlock - cannot sleep. | ||
73 | * Return: Nonzero on error | ||
74 | */ | ||
75 | int iosf_mbi_write(u8 port, u8 opcode, u32 offset, u32 mdr); | ||
76 | |||
77 | /** | ||
78 | * iosf_mbi_modify() - MailBox masked write command | ||
79 | * @port: port indicating subunit being accessed | ||
80 | * @opcode: port specific read or write opcode | ||
81 | * @offset: register address offset | ||
82 | * @mdr: register data being modified | ||
83 | * @mask: mask indicating bits in mdr to be modified | ||
84 | * | ||
85 | * Locking is handled by spinlock - cannot sleep. | ||
86 | * Return: Nonzero on error | ||
87 | */ | ||
88 | int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask); | ||
89 | |||
90 | #endif /* IOSF_MBI_SYMS_H */ | ||
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 0ea10f27d613..cb6cfcd034cf 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h | |||
@@ -25,6 +25,7 @@ extern void irq_ctx_init(int cpu); | |||
25 | 25 | ||
26 | #ifdef CONFIG_HOTPLUG_CPU | 26 | #ifdef CONFIG_HOTPLUG_CPU |
27 | #include <linux/cpumask.h> | 27 | #include <linux/cpumask.h> |
28 | extern int check_irq_vectors_for_cpu_disable(void); | ||
28 | extern void fixup_irqs(void); | 29 | extern void fixup_irqs(void); |
29 | extern void irq_force_complete_move(int); | 30 | extern void irq_force_complete_move(int); |
30 | #endif | 31 | #endif |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ae5d7830855c..fdf83afbb7d9 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -605,6 +605,7 @@ struct kvm_arch { | |||
605 | /* fields used by HYPER-V emulation */ | 605 | /* fields used by HYPER-V emulation */ |
606 | u64 hv_guest_os_id; | 606 | u64 hv_guest_os_id; |
607 | u64 hv_hypercall; | 607 | u64 hv_hypercall; |
608 | u64 hv_tsc_page; | ||
608 | 609 | ||
609 | #ifdef CONFIG_KVM_MMU_AUDIT | 610 | #ifdef CONFIG_KVM_MMU_AUDIT |
610 | int audit_point; | 611 | int audit_point; |
@@ -699,6 +700,8 @@ struct kvm_x86_ops { | |||
699 | void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); | 700 | void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); |
700 | void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); | 701 | void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); |
701 | void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); | 702 | void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); |
703 | u64 (*get_dr6)(struct kvm_vcpu *vcpu); | ||
704 | void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value); | ||
702 | void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value); | 705 | void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value); |
703 | void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); | 706 | void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); |
704 | unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); | 707 | unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); |
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index c696a8687567..6e4ce2df87cf 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
@@ -118,7 +118,6 @@ extern void mce_register_decode_chain(struct notifier_block *nb); | |||
118 | extern void mce_unregister_decode_chain(struct notifier_block *nb); | 118 | extern void mce_unregister_decode_chain(struct notifier_block *nb); |
119 | 119 | ||
120 | #include <linux/percpu.h> | 120 | #include <linux/percpu.h> |
121 | #include <linux/init.h> | ||
122 | #include <linux/atomic.h> | 121 | #include <linux/atomic.h> |
123 | 122 | ||
124 | extern int mce_p5_enabled; | 123 | extern int mce_p5_enabled; |
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index f98bd6625318..b59827e76529 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h | |||
@@ -1,6 +1,21 @@ | |||
1 | #ifndef _ASM_X86_MICROCODE_H | 1 | #ifndef _ASM_X86_MICROCODE_H |
2 | #define _ASM_X86_MICROCODE_H | 2 | #define _ASM_X86_MICROCODE_H |
3 | 3 | ||
4 | #define native_rdmsr(msr, val1, val2) \ | ||
5 | do { \ | ||
6 | u64 __val = native_read_msr((msr)); \ | ||
7 | (void)((val1) = (u32)__val); \ | ||
8 | (void)((val2) = (u32)(__val >> 32)); \ | ||
9 | } while (0) | ||
10 | |||
11 | #define native_wrmsr(msr, low, high) \ | ||
12 | native_write_msr(msr, low, high) | ||
13 | |||
14 | #define native_wrmsrl(msr, val) \ | ||
15 | native_write_msr((msr), \ | ||
16 | (u32)((u64)(val)), \ | ||
17 | (u32)((u64)(val) >> 32)) | ||
18 | |||
4 | struct cpu_signature { | 19 | struct cpu_signature { |
5 | unsigned int sig; | 20 | unsigned int sig; |
6 | unsigned int pf; | 21 | unsigned int pf; |
diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h index 4c019179a57d..b7b10b82d3e5 100644 --- a/arch/x86/include/asm/microcode_amd.h +++ b/arch/x86/include/asm/microcode_amd.h | |||
@@ -61,11 +61,10 @@ extern int __apply_microcode_amd(struct microcode_amd *mc_amd); | |||
61 | extern int apply_microcode_amd(int cpu); | 61 | extern int apply_microcode_amd(int cpu); |
62 | extern enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size); | 62 | extern enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size); |
63 | 63 | ||
64 | #define PATCH_MAX_SIZE PAGE_SIZE | ||
65 | extern u8 amd_ucode_patch[PATCH_MAX_SIZE]; | ||
66 | |||
64 | #ifdef CONFIG_MICROCODE_AMD_EARLY | 67 | #ifdef CONFIG_MICROCODE_AMD_EARLY |
65 | #ifdef CONFIG_X86_32 | ||
66 | #define MPB_MAX_SIZE PAGE_SIZE | ||
67 | extern u8 amd_bsp_mpb[MPB_MAX_SIZE]; | ||
68 | #endif | ||
69 | extern void __init load_ucode_amd_bsp(void); | 68 | extern void __init load_ucode_amd_bsp(void); |
70 | extern void load_ucode_amd_ap(void); | 69 | extern void load_ucode_amd_ap(void); |
71 | extern int __init save_microcode_in_initrd_amd(void); | 70 | extern int __init save_microcode_in_initrd_amd(void); |
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 3142a94c7b4b..3e6b4920ef5d 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h | |||
@@ -1,7 +1,6 @@ | |||
1 | #ifndef _ASM_X86_MPSPEC_H | 1 | #ifndef _ASM_X86_MPSPEC_H |
2 | #define _ASM_X86_MPSPEC_H | 2 | #define _ASM_X86_MPSPEC_H |
3 | 3 | ||
4 | #include <linux/init.h> | ||
5 | 4 | ||
6 | #include <asm/mpspec_def.h> | 5 | #include <asm/mpspec_def.h> |
7 | #include <asm/x86_init.h> | 6 | #include <asm/x86_init.h> |
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index 2f366d0ac6b4..1da25a5f96f9 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef _ASM_X86_MWAIT_H | 1 | #ifndef _ASM_X86_MWAIT_H |
2 | #define _ASM_X86_MWAIT_H | 2 | #define _ASM_X86_MWAIT_H |
3 | 3 | ||
4 | #include <linux/sched.h> | ||
5 | |||
4 | #define MWAIT_SUBSTATE_MASK 0xf | 6 | #define MWAIT_SUBSTATE_MASK 0xf |
5 | #define MWAIT_CSTATE_MASK 0xf | 7 | #define MWAIT_CSTATE_MASK 0xf |
6 | #define MWAIT_SUBSTATE_SIZE 4 | 8 | #define MWAIT_SUBSTATE_SIZE 4 |
@@ -13,4 +15,45 @@ | |||
13 | 15 | ||
14 | #define MWAIT_ECX_INTERRUPT_BREAK 0x1 | 16 | #define MWAIT_ECX_INTERRUPT_BREAK 0x1 |
15 | 17 | ||
18 | static inline void __monitor(const void *eax, unsigned long ecx, | ||
19 | unsigned long edx) | ||
20 | { | ||
21 | /* "monitor %eax, %ecx, %edx;" */ | ||
22 | asm volatile(".byte 0x0f, 0x01, 0xc8;" | ||
23 | :: "a" (eax), "c" (ecx), "d"(edx)); | ||
24 | } | ||
25 | |||
26 | static inline void __mwait(unsigned long eax, unsigned long ecx) | ||
27 | { | ||
28 | /* "mwait %eax, %ecx;" */ | ||
29 | asm volatile(".byte 0x0f, 0x01, 0xc9;" | ||
30 | :: "a" (eax), "c" (ecx)); | ||
31 | } | ||
32 | |||
33 | /* | ||
34 | * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, | ||
35 | * which can obviate IPI to trigger checking of need_resched. | ||
36 | * We execute MONITOR against need_resched and enter optimized wait state | ||
37 | * through MWAIT. Whenever someone changes need_resched, we would be woken | ||
38 | * up from MWAIT (without an IPI). | ||
39 | * | ||
40 | * New with Core Duo processors, MWAIT can take some hints based on CPU | ||
41 | * capability. | ||
42 | */ | ||
43 | static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) | ||
44 | { | ||
45 | if (!current_set_polling_and_test()) { | ||
46 | if (static_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) { | ||
47 | mb(); | ||
48 | clflush((void *)¤t_thread_info()->flags); | ||
49 | mb(); | ||
50 | } | ||
51 | |||
52 | __monitor((void *)¤t_thread_info()->flags, 0, 0); | ||
53 | if (!need_resched()) | ||
54 | __mwait(eax, ecx); | ||
55 | } | ||
56 | current_clr_polling(); | ||
57 | } | ||
58 | |||
16 | #endif /* _ASM_X86_MWAIT_H */ | 59 | #endif /* _ASM_X86_MWAIT_H */ |
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index c87892442e53..775873d3be55 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h | |||
@@ -71,6 +71,7 @@ extern bool __virt_addr_valid(unsigned long kaddr); | |||
71 | #include <asm-generic/getorder.h> | 71 | #include <asm-generic/getorder.h> |
72 | 72 | ||
73 | #define __HAVE_ARCH_GATE_AREA 1 | 73 | #define __HAVE_ARCH_GATE_AREA 1 |
74 | #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA | ||
74 | 75 | ||
75 | #endif /* __KERNEL__ */ | 76 | #endif /* __KERNEL__ */ |
76 | #endif /* _ASM_X86_PAGE_H */ | 77 | #endif /* _ASM_X86_PAGE_H */ |
diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h index 4d550d04b609..904f528cc8e8 100644 --- a/arch/x86/include/asm/page_32.h +++ b/arch/x86/include/asm/page_32.h | |||
@@ -5,10 +5,6 @@ | |||
5 | 5 | ||
6 | #ifndef __ASSEMBLY__ | 6 | #ifndef __ASSEMBLY__ |
7 | 7 | ||
8 | #ifdef CONFIG_HUGETLB_PAGE | ||
9 | #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA | ||
10 | #endif | ||
11 | |||
12 | #define __phys_addr_nodebug(x) ((x) - PAGE_OFFSET) | 8 | #define __phys_addr_nodebug(x) ((x) - PAGE_OFFSET) |
13 | #ifdef CONFIG_DEBUG_VIRTUAL | 9 | #ifdef CONFIG_DEBUG_VIRTUAL |
14 | extern unsigned long __phys_addr(unsigned long); | 10 | extern unsigned long __phys_addr(unsigned long); |
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 43dcd804ebd5..8de6d9cf3b95 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h | |||
@@ -39,9 +39,18 @@ | |||
39 | #define __VIRTUAL_MASK_SHIFT 47 | 39 | #define __VIRTUAL_MASK_SHIFT 47 |
40 | 40 | ||
41 | /* | 41 | /* |
42 | * Kernel image size is limited to 512 MB (see level2_kernel_pgt in | 42 | * Kernel image size is limited to 1GiB due to the fixmap living in the |
43 | * arch/x86/kernel/head_64.S), and it is mapped here: | 43 | * next 1GiB (see level2_kernel_pgt in arch/x86/kernel/head_64.S). Use |
44 | * 512MiB by default, leaving 1.5GiB for modules once the page tables | ||
45 | * are fully set up. If kernel ASLR is configured, it can extend the | ||
46 | * kernel page table mapping, reducing the size of the modules area. | ||
44 | */ | 47 | */ |
45 | #define KERNEL_IMAGE_SIZE (512 * 1024 * 1024) | 48 | #define KERNEL_IMAGE_SIZE_DEFAULT (512 * 1024 * 1024) |
49 | #if defined(CONFIG_RANDOMIZE_BASE) && \ | ||
50 | CONFIG_RANDOMIZE_BASE_MAX_OFFSET > KERNEL_IMAGE_SIZE_DEFAULT | ||
51 | #define KERNEL_IMAGE_SIZE CONFIG_RANDOMIZE_BASE_MAX_OFFSET | ||
52 | #else | ||
53 | #define KERNEL_IMAGE_SIZE KERNEL_IMAGE_SIZE_DEFAULT | ||
54 | #endif | ||
46 | 55 | ||
47 | #endif /* _ASM_X86_PAGE_64_DEFS_H */ | 56 | #endif /* _ASM_X86_PAGE_64_DEFS_H */ |
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index f97fbe3abb67..2f59cce3b38a 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h | |||
@@ -51,9 +51,9 @@ extern int devmem_is_allowed(unsigned long pagenr); | |||
51 | extern unsigned long max_low_pfn_mapped; | 51 | extern unsigned long max_low_pfn_mapped; |
52 | extern unsigned long max_pfn_mapped; | 52 | extern unsigned long max_pfn_mapped; |
53 | 53 | ||
54 | static inline phys_addr_t get_max_mapped(void) | 54 | static inline phys_addr_t get_max_low_mapped(void) |
55 | { | 55 | { |
56 | return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT; | 56 | return (phys_addr_t)max_low_pfn_mapped << PAGE_SHIFT; |
57 | } | 57 | } |
58 | 58 | ||
59 | bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn); | 59 | bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn); |
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 947b5c417e83..1ac6114c9ea5 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h | |||
@@ -104,7 +104,7 @@ extern void pci_iommu_alloc(void); | |||
104 | struct msi_desc; | 104 | struct msi_desc; |
105 | int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); | 105 | int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); |
106 | void native_teardown_msi_irq(unsigned int irq); | 106 | void native_teardown_msi_irq(unsigned int irq); |
107 | void native_restore_msi_irqs(struct pci_dev *dev, int irq); | 107 | void native_restore_msi_irqs(struct pci_dev *dev); |
108 | int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, | 108 | int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, |
109 | unsigned int irq_base, unsigned int irq_offset); | 109 | unsigned int irq_base, unsigned int irq_offset); |
110 | #else | 110 | #else |
@@ -125,7 +125,6 @@ int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, | |||
125 | 125 | ||
126 | /* generic pci stuff */ | 126 | /* generic pci stuff */ |
127 | #include <asm-generic/pci.h> | 127 | #include <asm-generic/pci.h> |
128 | #define PCIBIOS_MAX_MEM_32 0xffffffff | ||
129 | 128 | ||
130 | #ifdef CONFIG_NUMA | 129 | #ifdef CONFIG_NUMA |
131 | /* Returns the node based on pci bus */ | 130 | /* Returns the node based on pci bus */ |
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h index 3bf2dd0cf61f..0d193e234647 100644 --- a/arch/x86/include/asm/pgtable-2level.h +++ b/arch/x86/include/asm/pgtable-2level.h | |||
@@ -55,6 +55,13 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) | |||
55 | #define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp) | 55 | #define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp) |
56 | #endif | 56 | #endif |
57 | 57 | ||
58 | /* Bit manipulation helper on pte/pgoff entry */ | ||
59 | static inline unsigned long pte_bitop(unsigned long value, unsigned int rightshift, | ||
60 | unsigned long mask, unsigned int leftshift) | ||
61 | { | ||
62 | return ((value >> rightshift) & mask) << leftshift; | ||
63 | } | ||
64 | |||
58 | #ifdef CONFIG_MEM_SOFT_DIRTY | 65 | #ifdef CONFIG_MEM_SOFT_DIRTY |
59 | 66 | ||
60 | /* | 67 | /* |
@@ -71,31 +78,34 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) | |||
71 | #define PTE_FILE_BITS2 (PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1) | 78 | #define PTE_FILE_BITS2 (PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1) |
72 | #define PTE_FILE_BITS3 (PTE_FILE_SHIFT4 - PTE_FILE_SHIFT3 - 1) | 79 | #define PTE_FILE_BITS3 (PTE_FILE_SHIFT4 - PTE_FILE_SHIFT3 - 1) |
73 | 80 | ||
74 | #define pte_to_pgoff(pte) \ | 81 | #define PTE_FILE_MASK1 ((1U << PTE_FILE_BITS1) - 1) |
75 | ((((pte).pte_low >> (PTE_FILE_SHIFT1)) \ | 82 | #define PTE_FILE_MASK2 ((1U << PTE_FILE_BITS2) - 1) |
76 | & ((1U << PTE_FILE_BITS1) - 1))) \ | 83 | #define PTE_FILE_MASK3 ((1U << PTE_FILE_BITS3) - 1) |
77 | + ((((pte).pte_low >> (PTE_FILE_SHIFT2)) \ | 84 | |
78 | & ((1U << PTE_FILE_BITS2) - 1)) \ | 85 | #define PTE_FILE_LSHIFT2 (PTE_FILE_BITS1) |
79 | << (PTE_FILE_BITS1)) \ | 86 | #define PTE_FILE_LSHIFT3 (PTE_FILE_BITS1 + PTE_FILE_BITS2) |
80 | + ((((pte).pte_low >> (PTE_FILE_SHIFT3)) \ | 87 | #define PTE_FILE_LSHIFT4 (PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3) |
81 | & ((1U << PTE_FILE_BITS3) - 1)) \ | 88 | |
82 | << (PTE_FILE_BITS1 + PTE_FILE_BITS2)) \ | 89 | static __always_inline pgoff_t pte_to_pgoff(pte_t pte) |
83 | + ((((pte).pte_low >> (PTE_FILE_SHIFT4))) \ | 90 | { |
84 | << (PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3)) | 91 | return (pgoff_t) |
85 | 92 | (pte_bitop(pte.pte_low, PTE_FILE_SHIFT1, PTE_FILE_MASK1, 0) + | |
86 | #define pgoff_to_pte(off) \ | 93 | pte_bitop(pte.pte_low, PTE_FILE_SHIFT2, PTE_FILE_MASK2, PTE_FILE_LSHIFT2) + |
87 | ((pte_t) { .pte_low = \ | 94 | pte_bitop(pte.pte_low, PTE_FILE_SHIFT3, PTE_FILE_MASK3, PTE_FILE_LSHIFT3) + |
88 | ((((off)) & ((1U << PTE_FILE_BITS1) - 1)) << PTE_FILE_SHIFT1) \ | 95 | pte_bitop(pte.pte_low, PTE_FILE_SHIFT4, -1UL, PTE_FILE_LSHIFT4)); |
89 | + ((((off) >> PTE_FILE_BITS1) \ | 96 | } |
90 | & ((1U << PTE_FILE_BITS2) - 1)) \ | 97 | |
91 | << PTE_FILE_SHIFT2) \ | 98 | static __always_inline pte_t pgoff_to_pte(pgoff_t off) |
92 | + ((((off) >> (PTE_FILE_BITS1 + PTE_FILE_BITS2)) \ | 99 | { |
93 | & ((1U << PTE_FILE_BITS3) - 1)) \ | 100 | return (pte_t){ |
94 | << PTE_FILE_SHIFT3) \ | 101 | .pte_low = |
95 | + ((((off) >> \ | 102 | pte_bitop(off, 0, PTE_FILE_MASK1, PTE_FILE_SHIFT1) + |
96 | (PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3))) \ | 103 | pte_bitop(off, PTE_FILE_LSHIFT2, PTE_FILE_MASK2, PTE_FILE_SHIFT2) + |
97 | << PTE_FILE_SHIFT4) \ | 104 | pte_bitop(off, PTE_FILE_LSHIFT3, PTE_FILE_MASK3, PTE_FILE_SHIFT3) + |
98 | + _PAGE_FILE }) | 105 | pte_bitop(off, PTE_FILE_LSHIFT4, -1UL, PTE_FILE_SHIFT4) + |
106 | _PAGE_FILE, | ||
107 | }; | ||
108 | } | ||
99 | 109 | ||
100 | #else /* CONFIG_MEM_SOFT_DIRTY */ | 110 | #else /* CONFIG_MEM_SOFT_DIRTY */ |
101 | 111 | ||
@@ -115,22 +125,30 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) | |||
115 | #define PTE_FILE_BITS1 (PTE_FILE_SHIFT2 - PTE_FILE_SHIFT1 - 1) | 125 | #define PTE_FILE_BITS1 (PTE_FILE_SHIFT2 - PTE_FILE_SHIFT1 - 1) |
116 | #define PTE_FILE_BITS2 (PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1) | 126 | #define PTE_FILE_BITS2 (PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1) |
117 | 127 | ||
118 | #define pte_to_pgoff(pte) \ | 128 | #define PTE_FILE_MASK1 ((1U << PTE_FILE_BITS1) - 1) |
119 | ((((pte).pte_low >> PTE_FILE_SHIFT1) \ | 129 | #define PTE_FILE_MASK2 ((1U << PTE_FILE_BITS2) - 1) |
120 | & ((1U << PTE_FILE_BITS1) - 1)) \ | 130 | |
121 | + ((((pte).pte_low >> PTE_FILE_SHIFT2) \ | 131 | #define PTE_FILE_LSHIFT2 (PTE_FILE_BITS1) |
122 | & ((1U << PTE_FILE_BITS2) - 1)) << PTE_FILE_BITS1) \ | 132 | #define PTE_FILE_LSHIFT3 (PTE_FILE_BITS1 + PTE_FILE_BITS2) |
123 | + (((pte).pte_low >> PTE_FILE_SHIFT3) \ | 133 | |
124 | << (PTE_FILE_BITS1 + PTE_FILE_BITS2))) | 134 | static __always_inline pgoff_t pte_to_pgoff(pte_t pte) |
125 | 135 | { | |
126 | #define pgoff_to_pte(off) \ | 136 | return (pgoff_t) |
127 | ((pte_t) { .pte_low = \ | 137 | (pte_bitop(pte.pte_low, PTE_FILE_SHIFT1, PTE_FILE_MASK1, 0) + |
128 | (((off) & ((1U << PTE_FILE_BITS1) - 1)) << PTE_FILE_SHIFT1) \ | 138 | pte_bitop(pte.pte_low, PTE_FILE_SHIFT2, PTE_FILE_MASK2, PTE_FILE_LSHIFT2) + |
129 | + ((((off) >> PTE_FILE_BITS1) & ((1U << PTE_FILE_BITS2) - 1)) \ | 139 | pte_bitop(pte.pte_low, PTE_FILE_SHIFT3, -1UL, PTE_FILE_LSHIFT3)); |
130 | << PTE_FILE_SHIFT2) \ | 140 | } |
131 | + (((off) >> (PTE_FILE_BITS1 + PTE_FILE_BITS2)) \ | 141 | |
132 | << PTE_FILE_SHIFT3) \ | 142 | static __always_inline pte_t pgoff_to_pte(pgoff_t off) |
133 | + _PAGE_FILE }) | 143 | { |
144 | return (pte_t){ | ||
145 | .pte_low = | ||
146 | pte_bitop(off, 0, PTE_FILE_MASK1, PTE_FILE_SHIFT1) + | ||
147 | pte_bitop(off, PTE_FILE_LSHIFT2, PTE_FILE_MASK2, PTE_FILE_SHIFT2) + | ||
148 | pte_bitop(off, PTE_FILE_LSHIFT3, -1UL, PTE_FILE_SHIFT3) + | ||
149 | _PAGE_FILE, | ||
150 | }; | ||
151 | } | ||
134 | 152 | ||
135 | #endif /* CONFIG_MEM_SOFT_DIRTY */ | 153 | #endif /* CONFIG_MEM_SOFT_DIRTY */ |
136 | 154 | ||
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 2d883440cb9a..c883bf726398 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h | |||
@@ -58,7 +58,7 @@ typedef struct { pteval_t pte; } pte_t; | |||
58 | #define VMALLOC_START _AC(0xffffc90000000000, UL) | 58 | #define VMALLOC_START _AC(0xffffc90000000000, UL) |
59 | #define VMALLOC_END _AC(0xffffe8ffffffffff, UL) | 59 | #define VMALLOC_END _AC(0xffffe8ffffffffff, UL) |
60 | #define VMEMMAP_START _AC(0xffffea0000000000, UL) | 60 | #define VMEMMAP_START _AC(0xffffea0000000000, UL) |
61 | #define MODULES_VADDR _AC(0xffffffffa0000000, UL) | 61 | #define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) |
62 | #define MODULES_END _AC(0xffffffffff000000, UL) | 62 | #define MODULES_END _AC(0xffffffffff000000, UL) |
63 | #define MODULES_LEN (MODULES_END - MODULES_VADDR) | 63 | #define MODULES_LEN (MODULES_END - MODULES_VADDR) |
64 | 64 | ||
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 0ecac257fb26..a83aa44bb1fb 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h | |||
@@ -382,7 +382,8 @@ static inline void update_page_count(int level, unsigned long pages) { } | |||
382 | */ | 382 | */ |
383 | extern pte_t *lookup_address(unsigned long address, unsigned int *level); | 383 | extern pte_t *lookup_address(unsigned long address, unsigned int *level); |
384 | extern phys_addr_t slow_virt_to_phys(void *__address); | 384 | extern phys_addr_t slow_virt_to_phys(void *__address); |
385 | 385 | extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, | |
386 | unsigned numpages, unsigned long page_flags); | ||
386 | #endif /* !__ASSEMBLY__ */ | 387 | #endif /* !__ASSEMBLY__ */ |
387 | 388 | ||
388 | #endif /* _ASM_X86_PGTABLE_DEFS_H */ | 389 | #endif /* _ASM_X86_PGTABLE_DEFS_H */ |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 7b034a4057f9..fdedd38fd0fc 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -27,7 +27,6 @@ struct mm_struct; | |||
27 | #include <linux/cache.h> | 27 | #include <linux/cache.h> |
28 | #include <linux/threads.h> | 28 | #include <linux/threads.h> |
29 | #include <linux/math64.h> | 29 | #include <linux/math64.h> |
30 | #include <linux/init.h> | ||
31 | #include <linux/err.h> | 30 | #include <linux/err.h> |
32 | #include <linux/irqflags.h> | 31 | #include <linux/irqflags.h> |
33 | 32 | ||
@@ -72,6 +71,7 @@ extern u16 __read_mostly tlb_lli_4m[NR_INFO]; | |||
72 | extern u16 __read_mostly tlb_lld_4k[NR_INFO]; | 71 | extern u16 __read_mostly tlb_lld_4k[NR_INFO]; |
73 | extern u16 __read_mostly tlb_lld_2m[NR_INFO]; | 72 | extern u16 __read_mostly tlb_lld_2m[NR_INFO]; |
74 | extern u16 __read_mostly tlb_lld_4m[NR_INFO]; | 73 | extern u16 __read_mostly tlb_lld_4m[NR_INFO]; |
74 | extern u16 __read_mostly tlb_lld_1g[NR_INFO]; | ||
75 | extern s8 __read_mostly tlb_flushall_shift; | 75 | extern s8 __read_mostly tlb_flushall_shift; |
76 | 76 | ||
77 | /* | 77 | /* |
@@ -370,6 +370,20 @@ struct ymmh_struct { | |||
370 | u32 ymmh_space[64]; | 370 | u32 ymmh_space[64]; |
371 | }; | 371 | }; |
372 | 372 | ||
373 | /* We don't support LWP yet: */ | ||
374 | struct lwp_struct { | ||
375 | u8 reserved[128]; | ||
376 | }; | ||
377 | |||
378 | struct bndregs_struct { | ||
379 | u64 bndregs[8]; | ||
380 | } __packed; | ||
381 | |||
382 | struct bndcsr_struct { | ||
383 | u64 cfg_reg_u; | ||
384 | u64 status_reg; | ||
385 | } __packed; | ||
386 | |||
373 | struct xsave_hdr_struct { | 387 | struct xsave_hdr_struct { |
374 | u64 xstate_bv; | 388 | u64 xstate_bv; |
375 | u64 reserved1[2]; | 389 | u64 reserved1[2]; |
@@ -380,6 +394,9 @@ struct xsave_struct { | |||
380 | struct i387_fxsave_struct i387; | 394 | struct i387_fxsave_struct i387; |
381 | struct xsave_hdr_struct xsave_hdr; | 395 | struct xsave_hdr_struct xsave_hdr; |
382 | struct ymmh_struct ymmh; | 396 | struct ymmh_struct ymmh; |
397 | struct lwp_struct lwp; | ||
398 | struct bndregs_struct bndregs; | ||
399 | struct bndcsr_struct bndcsr; | ||
383 | /* new processor state extensions will go here */ | 400 | /* new processor state extensions will go here */ |
384 | } __attribute__ ((packed, aligned (64))); | 401 | } __attribute__ ((packed, aligned (64))); |
385 | 402 | ||
@@ -700,29 +717,6 @@ static inline void sync_core(void) | |||
700 | #endif | 717 | #endif |
701 | } | 718 | } |
702 | 719 | ||
703 | static inline void __monitor(const void *eax, unsigned long ecx, | ||
704 | unsigned long edx) | ||
705 | { | ||
706 | /* "monitor %eax, %ecx, %edx;" */ | ||
707 | asm volatile(".byte 0x0f, 0x01, 0xc8;" | ||
708 | :: "a" (eax), "c" (ecx), "d"(edx)); | ||
709 | } | ||
710 | |||
711 | static inline void __mwait(unsigned long eax, unsigned long ecx) | ||
712 | { | ||
713 | /* "mwait %eax, %ecx;" */ | ||
714 | asm volatile(".byte 0x0f, 0x01, 0xc9;" | ||
715 | :: "a" (eax), "c" (ecx)); | ||
716 | } | ||
717 | |||
718 | static inline void __sti_mwait(unsigned long eax, unsigned long ecx) | ||
719 | { | ||
720 | trace_hardirqs_on(); | ||
721 | /* "mwait %eax, %ecx;" */ | ||
722 | asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" | ||
723 | :: "a" (eax), "c" (ecx)); | ||
724 | } | ||
725 | |||
726 | extern void select_idle_routine(const struct cpuinfo_x86 *c); | 720 | extern void select_idle_routine(const struct cpuinfo_x86 *c); |
727 | extern void init_amd_e400_c1e_mask(void); | 721 | extern void init_amd_e400_c1e_mask(void); |
728 | 722 | ||
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 942a08623a1a..14fd6fd75a19 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h | |||
@@ -60,7 +60,6 @@ struct pt_regs { | |||
60 | 60 | ||
61 | #endif /* !__i386__ */ | 61 | #endif /* !__i386__ */ |
62 | 62 | ||
63 | #include <linux/init.h> | ||
64 | #ifdef CONFIG_PARAVIRT | 63 | #ifdef CONFIG_PARAVIRT |
65 | #include <asm/paravirt_types.h> | 64 | #include <asm/paravirt_types.h> |
66 | #endif | 65 | #endif |
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 59bcf4e22418..d62c9f809bc5 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h | |||
@@ -3,7 +3,6 @@ | |||
3 | 3 | ||
4 | #include <uapi/asm/setup.h> | 4 | #include <uapi/asm/setup.h> |
5 | 5 | ||
6 | |||
7 | #define COMMAND_LINE_SIZE 2048 | 6 | #define COMMAND_LINE_SIZE 2048 |
8 | 7 | ||
9 | #include <linux/linkage.h> | 8 | #include <linux/linkage.h> |
@@ -29,6 +28,8 @@ | |||
29 | #include <asm/bootparam.h> | 28 | #include <asm/bootparam.h> |
30 | #include <asm/x86_init.h> | 29 | #include <asm/x86_init.h> |
31 | 30 | ||
31 | extern u64 relocated_ramdisk; | ||
32 | |||
32 | /* Interrupt control for vSMPowered x86_64 systems */ | 33 | /* Interrupt control for vSMPowered x86_64 systems */ |
33 | #ifdef CONFIG_X86_64 | 34 | #ifdef CONFIG_X86_64 |
34 | void vsmp_init(void); | 35 | void vsmp_init(void); |
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 4137890e88e3..8cd27e08e23c 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h | |||
@@ -2,7 +2,6 @@ | |||
2 | #define _ASM_X86_SMP_H | 2 | #define _ASM_X86_SMP_H |
3 | #ifndef __ASSEMBLY__ | 3 | #ifndef __ASSEMBLY__ |
4 | #include <linux/cpumask.h> | 4 | #include <linux/cpumask.h> |
5 | #include <linux/init.h> | ||
6 | #include <asm/percpu.h> | 5 | #include <asm/percpu.h> |
7 | 6 | ||
8 | /* | 7 | /* |
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index 34baa0eb5d0c..a04eabd43d06 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h | |||
@@ -1,9 +1,9 @@ | |||
1 | #ifndef _ASM_X86_TIMER_H | 1 | #ifndef _ASM_X86_TIMER_H |
2 | #define _ASM_X86_TIMER_H | 2 | #define _ASM_X86_TIMER_H |
3 | #include <linux/init.h> | ||
4 | #include <linux/pm.h> | 3 | #include <linux/pm.h> |
5 | #include <linux/percpu.h> | 4 | #include <linux/percpu.h> |
6 | #include <linux/interrupt.h> | 5 | #include <linux/interrupt.h> |
6 | #include <linux/math64.h> | ||
7 | 7 | ||
8 | #define TICK_SIZE (tick_nsec / 1000) | 8 | #define TICK_SIZE (tick_nsec / 1000) |
9 | 9 | ||
@@ -12,68 +12,26 @@ extern int recalibrate_cpu_khz(void); | |||
12 | 12 | ||
13 | extern int no_timer_check; | 13 | extern int no_timer_check; |
14 | 14 | ||
15 | /* Accelerators for sched_clock() | 15 | /* |
16 | * convert from cycles(64bits) => nanoseconds (64bits) | 16 | * We use the full linear equation: f(x) = a + b*x, in order to allow |
17 | * basic equation: | 17 | * a continuous function in the face of dynamic freq changes. |
18 | * ns = cycles / (freq / ns_per_sec) | ||
19 | * ns = cycles * (ns_per_sec / freq) | ||
20 | * ns = cycles * (10^9 / (cpu_khz * 10^3)) | ||
21 | * ns = cycles * (10^6 / cpu_khz) | ||
22 | * | 18 | * |
23 | * Then we use scaling math (suggested by george@mvista.com) to get: | 19 | * Continuity means that when our frequency changes our slope (b); we want to |
24 | * ns = cycles * (10^6 * SC / cpu_khz) / SC | 20 | * ensure that: f(t) == f'(t), which gives: a + b*t == a' + b'*t. |
25 | * ns = cycles * cyc2ns_scale / SC | ||
26 | * | 21 | * |
27 | * And since SC is a constant power of two, we can convert the div | 22 | * Without an offset (a) the above would not be possible. |
28 | * into a shift. | ||
29 | * | 23 | * |
30 | * We can use khz divisor instead of mhz to keep a better precision, since | 24 | * See the comment near cycles_2_ns() for details on how we compute (b). |
31 | * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. | ||
32 | * (mathieu.desnoyers@polymtl.ca) | ||
33 | * | ||
34 | * -johnstul@us.ibm.com "math is hard, lets go shopping!" | ||
35 | * | ||
36 | * In: | ||
37 | * | ||
38 | * ns = cycles * cyc2ns_scale / SC | ||
39 | * | ||
40 | * Although we may still have enough bits to store the value of ns, | ||
41 | * in some cases, we may not have enough bits to store cycles * cyc2ns_scale, | ||
42 | * leading to an incorrect result. | ||
43 | * | ||
44 | * To avoid this, we can decompose 'cycles' into quotient and remainder | ||
45 | * of division by SC. Then, | ||
46 | * | ||
47 | * ns = (quot * SC + rem) * cyc2ns_scale / SC | ||
48 | * = quot * cyc2ns_scale + (rem * cyc2ns_scale) / SC | ||
49 | * | ||
50 | * - sqazi@google.com | ||
51 | */ | 25 | */ |
52 | 26 | struct cyc2ns_data { | |
53 | DECLARE_PER_CPU(unsigned long, cyc2ns); | 27 | u32 cyc2ns_mul; |
54 | DECLARE_PER_CPU(unsigned long long, cyc2ns_offset); | 28 | u32 cyc2ns_shift; |
55 | 29 | u64 cyc2ns_offset; | |
56 | #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ | 30 | u32 __count; |
57 | 31 | /* u32 hole */ | |
58 | static inline unsigned long long __cycles_2_ns(unsigned long long cyc) | 32 | }; /* 24 bytes -- do not grow */ |
59 | { | 33 | |
60 | int cpu = smp_processor_id(); | 34 | extern struct cyc2ns_data *cyc2ns_read_begin(void); |
61 | unsigned long long ns = per_cpu(cyc2ns_offset, cpu); | 35 | extern void cyc2ns_read_end(struct cyc2ns_data *); |
62 | ns += mult_frac(cyc, per_cpu(cyc2ns, cpu), | ||
63 | (1UL << CYC2NS_SCALE_FACTOR)); | ||
64 | return ns; | ||
65 | } | ||
66 | |||
67 | static inline unsigned long long cycles_2_ns(unsigned long long cyc) | ||
68 | { | ||
69 | unsigned long long ns; | ||
70 | unsigned long flags; | ||
71 | |||
72 | local_irq_save(flags); | ||
73 | ns = __cycles_2_ns(cyc); | ||
74 | local_irq_restore(flags); | ||
75 | |||
76 | return ns; | ||
77 | } | ||
78 | 36 | ||
79 | #endif /* _ASM_X86_TIMER_H */ | 37 | #endif /* _ASM_X86_TIMER_H */ |
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 235be70d5bb4..57ae63cd6ee2 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h | |||
@@ -65,4 +65,7 @@ extern int notsc_setup(char *); | |||
65 | extern void tsc_save_sched_clock_state(void); | 65 | extern void tsc_save_sched_clock_state(void); |
66 | extern void tsc_restore_sched_clock_state(void); | 66 | extern void tsc_restore_sched_clock_state(void); |
67 | 67 | ||
68 | /* MSR based TSC calibration for Intel Atom SoC platforms */ | ||
69 | int try_msr_calibrate_tsc(unsigned long *fast_calibrate); | ||
70 | |||
68 | #endif /* _ASM_X86_TSC_H */ | 71 | #endif /* _ASM_X86_TSC_H */ |
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 8ec57c07b125..0d592e0a5b84 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h | |||
@@ -40,22 +40,30 @@ | |||
40 | /* | 40 | /* |
41 | * Test whether a block of memory is a valid user space address. | 41 | * Test whether a block of memory is a valid user space address. |
42 | * Returns 0 if the range is valid, nonzero otherwise. | 42 | * Returns 0 if the range is valid, nonzero otherwise. |
43 | * | ||
44 | * This is equivalent to the following test: | ||
45 | * (u33)addr + (u33)size > (u33)current->addr_limit.seg (u65 for x86_64) | ||
46 | * | ||
47 | * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry... | ||
48 | */ | 43 | */ |
44 | static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, unsigned long limit) | ||
45 | { | ||
46 | /* | ||
47 | * If we have used "sizeof()" for the size, | ||
48 | * we know it won't overflow the limit (but | ||
49 | * it might overflow the 'addr', so it's | ||
50 | * important to subtract the size from the | ||
51 | * limit, not add it to the address). | ||
52 | */ | ||
53 | if (__builtin_constant_p(size)) | ||
54 | return addr > limit - size; | ||
55 | |||
56 | /* Arbitrary sizes? Be careful about overflow */ | ||
57 | addr += size; | ||
58 | if (addr < size) | ||
59 | return true; | ||
60 | return addr > limit; | ||
61 | } | ||
49 | 62 | ||
50 | #define __range_not_ok(addr, size, limit) \ | 63 | #define __range_not_ok(addr, size, limit) \ |
51 | ({ \ | 64 | ({ \ |
52 | unsigned long flag, roksum; \ | ||
53 | __chk_user_ptr(addr); \ | 65 | __chk_user_ptr(addr); \ |
54 | asm("add %3,%1 ; sbb %0,%0 ; cmp %1,%4 ; sbb $0,%0" \ | 66 | __chk_range_not_ok((unsigned long __force)(addr), size, limit); \ |
55 | : "=&r" (flag), "=r" (roksum) \ | ||
56 | : "1" (addr), "g" ((long)(size)), \ | ||
57 | "rm" (limit)); \ | ||
58 | flag; \ | ||
59 | }) | 67 | }) |
60 | 68 | ||
61 | /** | 69 | /** |
@@ -78,7 +86,7 @@ | |||
78 | * this function, memory access functions may still return -EFAULT. | 86 | * this function, memory access functions may still return -EFAULT. |
79 | */ | 87 | */ |
80 | #define access_ok(type, addr, size) \ | 88 | #define access_ok(type, addr, size) \ |
81 | (likely(__range_not_ok(addr, size, user_addr_max()) == 0)) | 89 | likely(!__range_not_ok(addr, size, user_addr_max())) |
82 | 90 | ||
83 | /* | 91 | /* |
84 | * The exception table consists of pairs of addresses relative to the | 92 | * The exception table consists of pairs of addresses relative to the |
@@ -525,6 +533,98 @@ extern __must_check long strnlen_user(const char __user *str, long n); | |||
525 | unsigned long __must_check clear_user(void __user *mem, unsigned long len); | 533 | unsigned long __must_check clear_user(void __user *mem, unsigned long len); |
526 | unsigned long __must_check __clear_user(void __user *mem, unsigned long len); | 534 | unsigned long __must_check __clear_user(void __user *mem, unsigned long len); |
527 | 535 | ||
536 | extern void __cmpxchg_wrong_size(void) | ||
537 | __compiletime_error("Bad argument size for cmpxchg"); | ||
538 | |||
539 | #define __user_atomic_cmpxchg_inatomic(uval, ptr, old, new, size) \ | ||
540 | ({ \ | ||
541 | int __ret = 0; \ | ||
542 | __typeof__(ptr) __uval = (uval); \ | ||
543 | __typeof__(*(ptr)) __old = (old); \ | ||
544 | __typeof__(*(ptr)) __new = (new); \ | ||
545 | switch (size) { \ | ||
546 | case 1: \ | ||
547 | { \ | ||
548 | asm volatile("\t" ASM_STAC "\n" \ | ||
549 | "1:\t" LOCK_PREFIX "cmpxchgb %4, %2\n" \ | ||
550 | "2:\t" ASM_CLAC "\n" \ | ||
551 | "\t.section .fixup, \"ax\"\n" \ | ||
552 | "3:\tmov %3, %0\n" \ | ||
553 | "\tjmp 2b\n" \ | ||
554 | "\t.previous\n" \ | ||
555 | _ASM_EXTABLE(1b, 3b) \ | ||
556 | : "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \ | ||
557 | : "i" (-EFAULT), "q" (__new), "1" (__old) \ | ||
558 | : "memory" \ | ||
559 | ); \ | ||
560 | break; \ | ||
561 | } \ | ||
562 | case 2: \ | ||
563 | { \ | ||
564 | asm volatile("\t" ASM_STAC "\n" \ | ||
565 | "1:\t" LOCK_PREFIX "cmpxchgw %4, %2\n" \ | ||
566 | "2:\t" ASM_CLAC "\n" \ | ||
567 | "\t.section .fixup, \"ax\"\n" \ | ||
568 | "3:\tmov %3, %0\n" \ | ||
569 | "\tjmp 2b\n" \ | ||
570 | "\t.previous\n" \ | ||
571 | _ASM_EXTABLE(1b, 3b) \ | ||
572 | : "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \ | ||
573 | : "i" (-EFAULT), "r" (__new), "1" (__old) \ | ||
574 | : "memory" \ | ||
575 | ); \ | ||
576 | break; \ | ||
577 | } \ | ||
578 | case 4: \ | ||
579 | { \ | ||
580 | asm volatile("\t" ASM_STAC "\n" \ | ||
581 | "1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n" \ | ||
582 | "2:\t" ASM_CLAC "\n" \ | ||
583 | "\t.section .fixup, \"ax\"\n" \ | ||
584 | "3:\tmov %3, %0\n" \ | ||
585 | "\tjmp 2b\n" \ | ||
586 | "\t.previous\n" \ | ||
587 | _ASM_EXTABLE(1b, 3b) \ | ||
588 | : "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \ | ||
589 | : "i" (-EFAULT), "r" (__new), "1" (__old) \ | ||
590 | : "memory" \ | ||
591 | ); \ | ||
592 | break; \ | ||
593 | } \ | ||
594 | case 8: \ | ||
595 | { \ | ||
596 | if (!IS_ENABLED(CONFIG_X86_64)) \ | ||
597 | __cmpxchg_wrong_size(); \ | ||
598 | \ | ||
599 | asm volatile("\t" ASM_STAC "\n" \ | ||
600 | "1:\t" LOCK_PREFIX "cmpxchgq %4, %2\n" \ | ||
601 | "2:\t" ASM_CLAC "\n" \ | ||
602 | "\t.section .fixup, \"ax\"\n" \ | ||
603 | "3:\tmov %3, %0\n" \ | ||
604 | "\tjmp 2b\n" \ | ||
605 | "\t.previous\n" \ | ||
606 | _ASM_EXTABLE(1b, 3b) \ | ||
607 | : "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \ | ||
608 | : "i" (-EFAULT), "r" (__new), "1" (__old) \ | ||
609 | : "memory" \ | ||
610 | ); \ | ||
611 | break; \ | ||
612 | } \ | ||
613 | default: \ | ||
614 | __cmpxchg_wrong_size(); \ | ||
615 | } \ | ||
616 | *__uval = __old; \ | ||
617 | __ret; \ | ||
618 | }) | ||
619 | |||
620 | #define user_atomic_cmpxchg_inatomic(uval, ptr, old, new) \ | ||
621 | ({ \ | ||
622 | access_ok(VERIFY_WRITE, (ptr), sizeof(*(ptr))) ? \ | ||
623 | __user_atomic_cmpxchg_inatomic((uval), (ptr), \ | ||
624 | (old), (new), sizeof(*(ptr))) : \ | ||
625 | -EFAULT; \ | ||
626 | }) | ||
627 | |||
528 | /* | 628 | /* |
529 | * movsl can be slow when source and dest are not both 8-byte aligned | 629 | * movsl can be slow when source and dest are not both 8-byte aligned |
530 | */ | 630 | */ |
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 190413d0de57..12a26b979bf1 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h | |||
@@ -204,13 +204,13 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size) | |||
204 | static __must_check __always_inline int | 204 | static __must_check __always_inline int |
205 | __copy_from_user_inatomic(void *dst, const void __user *src, unsigned size) | 205 | __copy_from_user_inatomic(void *dst, const void __user *src, unsigned size) |
206 | { | 206 | { |
207 | return __copy_from_user_nocheck(dst, (__force const void *)src, size); | 207 | return __copy_from_user_nocheck(dst, src, size); |
208 | } | 208 | } |
209 | 209 | ||
210 | static __must_check __always_inline int | 210 | static __must_check __always_inline int |
211 | __copy_to_user_inatomic(void __user *dst, const void *src, unsigned size) | 211 | __copy_to_user_inatomic(void __user *dst, const void *src, unsigned size) |
212 | { | 212 | { |
213 | return __copy_to_user_nocheck((__force void *)dst, src, size); | 213 | return __copy_to_user_nocheck(dst, src, size); |
214 | } | 214 | } |
215 | 215 | ||
216 | extern long __copy_user_nocache(void *dst, const void __user *src, | 216 | extern long __copy_user_nocache(void *dst, const void __user *src, |
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 966502d4682e..2067264fb7f5 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
@@ -100,6 +100,7 @@ | |||
100 | 100 | ||
101 | #define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f | 101 | #define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f |
102 | #define VMX_MISC_SAVE_EFER_LMA 0x00000020 | 102 | #define VMX_MISC_SAVE_EFER_LMA 0x00000020 |
103 | #define VMX_MISC_ACTIVITY_HLT 0x00000040 | ||
103 | 104 | ||
104 | /* VMCS Encodings */ | 105 | /* VMCS Encodings */ |
105 | enum vmcs_field { | 106 | enum vmcs_field { |
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 0f1be11e43d2..e45e4da96bf1 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h | |||
@@ -181,7 +181,7 @@ struct x86_msi_ops { | |||
181 | u8 hpet_id); | 181 | u8 hpet_id); |
182 | void (*teardown_msi_irq)(unsigned int irq); | 182 | void (*teardown_msi_irq)(unsigned int irq); |
183 | void (*teardown_msi_irqs)(struct pci_dev *dev); | 183 | void (*teardown_msi_irqs)(struct pci_dev *dev); |
184 | void (*restore_msi_irqs)(struct pci_dev *dev, int irq); | 184 | void (*restore_msi_irqs)(struct pci_dev *dev); |
185 | int (*setup_hpet_msi)(unsigned int irq, unsigned int id); | 185 | int (*setup_hpet_msi)(unsigned int irq, unsigned int id); |
186 | u32 (*msi_mask_irq)(struct msi_desc *desc, u32 mask, u32 flag); | 186 | u32 (*msi_mask_irq)(struct msi_desc *desc, u32 mask, u32 flag); |
187 | u32 (*msix_mask_irq)(struct msi_desc *desc, u32 flag); | 187 | u32 (*msix_mask_irq)(struct msi_desc *desc, u32 flag); |
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index b913915e8e63..3e276eb23d1b 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h | |||
@@ -167,7 +167,12 @@ static inline xpaddr_t machine_to_phys(xmaddr_t machine) | |||
167 | */ | 167 | */ |
168 | static inline unsigned long mfn_to_local_pfn(unsigned long mfn) | 168 | static inline unsigned long mfn_to_local_pfn(unsigned long mfn) |
169 | { | 169 | { |
170 | unsigned long pfn = mfn_to_pfn(mfn); | 170 | unsigned long pfn; |
171 | |||
172 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
173 | return mfn; | ||
174 | |||
175 | pfn = mfn_to_pfn(mfn); | ||
171 | if (get_phys_to_machine(pfn) != mfn) | 176 | if (get_phys_to_machine(pfn) != mfn) |
172 | return -1; /* force !pfn_valid() */ | 177 | return -1; /* force !pfn_valid() */ |
173 | return pfn; | 178 | return pfn; |
@@ -222,5 +227,6 @@ void make_lowmem_page_readonly(void *vaddr); | |||
222 | void make_lowmem_page_readwrite(void *vaddr); | 227 | void make_lowmem_page_readwrite(void *vaddr); |
223 | 228 | ||
224 | #define xen_remap(cookie, size) ioremap((cookie), (size)); | 229 | #define xen_remap(cookie, size) ioremap((cookie), (size)); |
230 | #define xen_unmap(cookie) iounmap((cookie)) | ||
225 | 231 | ||
226 | #endif /* _ASM_X86_XEN_PAGE_H */ | 232 | #endif /* _ASM_X86_XEN_PAGE_H */ |
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 0415cdabb5a6..554738963b28 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h | |||
@@ -9,6 +9,8 @@ | |||
9 | #define XSTATE_FP 0x1 | 9 | #define XSTATE_FP 0x1 |
10 | #define XSTATE_SSE 0x2 | 10 | #define XSTATE_SSE 0x2 |
11 | #define XSTATE_YMM 0x4 | 11 | #define XSTATE_YMM 0x4 |
12 | #define XSTATE_BNDREGS 0x8 | ||
13 | #define XSTATE_BNDCSR 0x10 | ||
12 | 14 | ||
13 | #define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE) | 15 | #define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE) |
14 | 16 | ||
@@ -20,10 +22,14 @@ | |||
20 | #define XSAVE_YMM_SIZE 256 | 22 | #define XSAVE_YMM_SIZE 256 |
21 | #define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) | 23 | #define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) |
22 | 24 | ||
23 | /* | 25 | /* Supported features which support lazy state saving */ |
24 | * These are the features that the OS can handle currently. | 26 | #define XSTATE_LAZY (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) |
25 | */ | 27 | |
26 | #define XCNTXT_MASK (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) | 28 | /* Supported features which require eager state saving */ |
29 | #define XSTATE_EAGER (XSTATE_BNDREGS | XSTATE_BNDCSR) | ||
30 | |||
31 | /* All currently supported features */ | ||
32 | #define XCNTXT_MASK (XSTATE_LAZY | XSTATE_EAGER) | ||
27 | 33 | ||
28 | #ifdef CONFIG_X86_64 | 34 | #ifdef CONFIG_X86_64 |
29 | #define REX_PREFIX "0x48, " | 35 | #define REX_PREFIX "0x48, " |
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index 9c3733c5f8f7..225b0988043a 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h | |||
@@ -6,6 +6,7 @@ | |||
6 | #define SETUP_E820_EXT 1 | 6 | #define SETUP_E820_EXT 1 |
7 | #define SETUP_DTB 2 | 7 | #define SETUP_DTB 2 |
8 | #define SETUP_PCI 3 | 8 | #define SETUP_PCI 3 |
9 | #define SETUP_EFI 4 | ||
9 | 10 | ||
10 | /* ram_size flags */ | 11 | /* ram_size flags */ |
11 | #define RAMDISK_IMAGE_START_MASK 0x07FF | 12 | #define RAMDISK_IMAGE_START_MASK 0x07FF |
@@ -23,6 +24,7 @@ | |||
23 | #define XLF_CAN_BE_LOADED_ABOVE_4G (1<<1) | 24 | #define XLF_CAN_BE_LOADED_ABOVE_4G (1<<1) |
24 | #define XLF_EFI_HANDOVER_32 (1<<2) | 25 | #define XLF_EFI_HANDOVER_32 (1<<2) |
25 | #define XLF_EFI_HANDOVER_64 (1<<3) | 26 | #define XLF_EFI_HANDOVER_64 (1<<3) |
27 | #define XLF_EFI_KEXEC (1<<4) | ||
26 | 28 | ||
27 | #ifndef __ASSEMBLY__ | 29 | #ifndef __ASSEMBLY__ |
28 | 30 | ||
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index b8f1c0176cbc..462efe746d77 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h | |||
@@ -28,6 +28,9 @@ | |||
28 | /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ | 28 | /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ |
29 | #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) | 29 | #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) |
30 | 30 | ||
31 | /* A partition's reference time stamp counter (TSC) page */ | ||
32 | #define HV_X64_MSR_REFERENCE_TSC 0x40000021 | ||
33 | |||
31 | /* | 34 | /* |
32 | * There is a single feature flag that signifies the presence of the MSR | 35 | * There is a single feature flag that signifies the presence of the MSR |
33 | * that can be used to retrieve both the local APIC Timer frequency as | 36 | * that can be used to retrieve both the local APIC Timer frequency as |
@@ -198,6 +201,9 @@ | |||
198 | #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ | 201 | #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ |
199 | (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) | 202 | (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) |
200 | 203 | ||
204 | #define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 | ||
205 | #define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 | ||
206 | |||
201 | #define HV_PROCESSOR_POWER_STATE_C0 0 | 207 | #define HV_PROCESSOR_POWER_STATE_C0 0 |
202 | #define HV_PROCESSOR_POWER_STATE_C1 1 | 208 | #define HV_PROCESSOR_POWER_STATE_C1 1 |
203 | #define HV_PROCESSOR_POWER_STATE_C2 2 | 209 | #define HV_PROCESSOR_POWER_STATE_C2 2 |
@@ -210,4 +216,11 @@ | |||
210 | #define HV_STATUS_INVALID_ALIGNMENT 4 | 216 | #define HV_STATUS_INVALID_ALIGNMENT 4 |
211 | #define HV_STATUS_INSUFFICIENT_BUFFERS 19 | 217 | #define HV_STATUS_INSUFFICIENT_BUFFERS 19 |
212 | 218 | ||
219 | typedef struct _HV_REFERENCE_TSC_PAGE { | ||
220 | __u32 tsc_sequence; | ||
221 | __u32 res1; | ||
222 | __u64 tsc_scale; | ||
223 | __s64 tsc_offset; | ||
224 | } HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; | ||
225 | |||
213 | #endif | 226 | #endif |
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index 37813b5ddc37..c19fc60ff062 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h | |||
@@ -184,6 +184,7 @@ | |||
184 | #define MSR_AMD64_PATCH_LOADER 0xc0010020 | 184 | #define MSR_AMD64_PATCH_LOADER 0xc0010020 |
185 | #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 | 185 | #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 |
186 | #define MSR_AMD64_OSVW_STATUS 0xc0010141 | 186 | #define MSR_AMD64_OSVW_STATUS 0xc0010141 |
187 | #define MSR_AMD64_LS_CFG 0xc0011020 | ||
187 | #define MSR_AMD64_DC_CFG 0xc0011022 | 188 | #define MSR_AMD64_DC_CFG 0xc0011022 |
188 | #define MSR_AMD64_BU_CFG2 0xc001102a | 189 | #define MSR_AMD64_BU_CFG2 0xc001102a |
189 | #define MSR_AMD64_IBSFETCHCTL 0xc0011030 | 190 | #define MSR_AMD64_IBSFETCHCTL 0xc0011030 |
@@ -527,6 +528,7 @@ | |||
527 | #define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e | 528 | #define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e |
528 | #define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f | 529 | #define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f |
529 | #define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490 | 530 | #define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490 |
531 | #define MSR_IA32_VMX_VMFUNC 0x00000491 | ||
530 | 532 | ||
531 | /* VMX_BASIC bits and bitmasks */ | 533 | /* VMX_BASIC bits and bitmasks */ |
532 | #define VMX_BASIC_VMCS_SIZE_SHIFT 32 | 534 | #define VMX_BASIC_VMCS_SIZE_SHIFT 32 |
diff --git a/arch/x86/include/uapi/asm/stat.h b/arch/x86/include/uapi/asm/stat.h index 7b3ddc348585..bc03eb5d6360 100644 --- a/arch/x86/include/uapi/asm/stat.h +++ b/arch/x86/include/uapi/asm/stat.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef _ASM_X86_STAT_H | 1 | #ifndef _ASM_X86_STAT_H |
2 | #define _ASM_X86_STAT_H | 2 | #define _ASM_X86_STAT_H |
3 | 3 | ||
4 | #include <asm/posix_types.h> | ||
5 | |||
4 | #define STAT_HAVE_NSEC 1 | 6 | #define STAT_HAVE_NSEC 1 |
5 | 7 | ||
6 | #ifdef __i386__ | 8 | #ifdef __i386__ |
@@ -78,26 +80,26 @@ struct stat64 { | |||
78 | #else /* __i386__ */ | 80 | #else /* __i386__ */ |
79 | 81 | ||
80 | struct stat { | 82 | struct stat { |
81 | unsigned long st_dev; | 83 | __kernel_ulong_t st_dev; |
82 | unsigned long st_ino; | 84 | __kernel_ulong_t st_ino; |
83 | unsigned long st_nlink; | 85 | __kernel_ulong_t st_nlink; |
84 | 86 | ||
85 | unsigned int st_mode; | 87 | unsigned int st_mode; |
86 | unsigned int st_uid; | 88 | unsigned int st_uid; |
87 | unsigned int st_gid; | 89 | unsigned int st_gid; |
88 | unsigned int __pad0; | 90 | unsigned int __pad0; |
89 | unsigned long st_rdev; | 91 | __kernel_ulong_t st_rdev; |
90 | long st_size; | 92 | __kernel_long_t st_size; |
91 | long st_blksize; | 93 | __kernel_long_t st_blksize; |
92 | long st_blocks; /* Number 512-byte blocks allocated. */ | 94 | __kernel_long_t st_blocks; /* Number 512-byte blocks allocated. */ |
93 | 95 | ||
94 | unsigned long st_atime; | 96 | __kernel_ulong_t st_atime; |
95 | unsigned long st_atime_nsec; | 97 | __kernel_ulong_t st_atime_nsec; |
96 | unsigned long st_mtime; | 98 | __kernel_ulong_t st_mtime; |
97 | unsigned long st_mtime_nsec; | 99 | __kernel_ulong_t st_mtime_nsec; |
98 | unsigned long st_ctime; | 100 | __kernel_ulong_t st_ctime; |
99 | unsigned long st_ctime_nsec; | 101 | __kernel_ulong_t st_ctime_nsec; |
100 | long __unused[3]; | 102 | __kernel_long_t __unused[3]; |
101 | }; | 103 | }; |
102 | 104 | ||
103 | /* We don't need to memset the whole thing just to initialize the padding */ | 105 | /* We don't need to memset the whole thing just to initialize the padding */ |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 9b0a34e2cd79..cb648c84b327 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -29,10 +29,11 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o | |||
29 | obj-y += syscall_$(BITS).o | 29 | obj-y += syscall_$(BITS).o |
30 | obj-$(CONFIG_X86_64) += vsyscall_64.o | 30 | obj-$(CONFIG_X86_64) += vsyscall_64.o |
31 | obj-$(CONFIG_X86_64) += vsyscall_emu_64.o | 31 | obj-$(CONFIG_X86_64) += vsyscall_emu_64.o |
32 | obj-$(CONFIG_SYSFS) += ksysfs.o | ||
32 | obj-y += bootflag.o e820.o | 33 | obj-y += bootflag.o e820.o |
33 | obj-y += pci-dma.o quirks.o topology.o kdebugfs.o | 34 | obj-y += pci-dma.o quirks.o topology.o kdebugfs.o |
34 | obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o | 35 | obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o |
35 | obj-y += tsc.o io_delay.o rtc.o | 36 | obj-y += tsc.o tsc_msr.o io_delay.o rtc.o |
36 | obj-y += pci-iommu_table.o | 37 | obj-y += pci-iommu_table.o |
37 | obj-y += resource.o | 38 | obj-y += resource.o |
38 | 39 | ||
@@ -91,15 +92,6 @@ obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o | |||
91 | 92 | ||
92 | obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o | 93 | obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o |
93 | 94 | ||
94 | obj-$(CONFIG_MICROCODE_EARLY) += microcode_core_early.o | ||
95 | obj-$(CONFIG_MICROCODE_INTEL_EARLY) += microcode_intel_early.o | ||
96 | obj-$(CONFIG_MICROCODE_INTEL_LIB) += microcode_intel_lib.o | ||
97 | microcode-y := microcode_core.o | ||
98 | microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o | ||
99 | microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o | ||
100 | obj-$(CONFIG_MICROCODE_AMD_EARLY) += microcode_amd_early.o | ||
101 | obj-$(CONFIG_MICROCODE) += microcode.o | ||
102 | |||
103 | obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o | 95 | obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o |
104 | 96 | ||
105 | obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o | 97 | obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o |
@@ -111,6 +103,7 @@ obj-$(CONFIG_EFI) += sysfb_efi.o | |||
111 | 103 | ||
112 | obj-$(CONFIG_PERF_EVENTS) += perf_regs.o | 104 | obj-$(CONFIG_PERF_EVENTS) += perf_regs.o |
113 | obj-$(CONFIG_TRACING) += tracepoint.o | 105 | obj-$(CONFIG_TRACING) += tracepoint.o |
106 | obj-$(CONFIG_IOSF_MBI) += iosf_mbi.o | ||
114 | 107 | ||
115 | ### | 108 | ### |
116 | # 64 bit specific files | 109 | # 64 bit specific files |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 0b0b91b83d51..1dac94265b59 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -1033,9 +1033,7 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger, | |||
1033 | 1033 | ||
1034 | if (!acpi_ioapic) | 1034 | if (!acpi_ioapic) |
1035 | return 0; | 1035 | return 0; |
1036 | if (!dev) | 1036 | if (!dev || !dev_is_pci(dev)) |
1037 | return 0; | ||
1038 | if (dev->bus != &pci_bus_type) | ||
1039 | return 0; | 1037 | return 0; |
1040 | 1038 | ||
1041 | pdev = to_pci_dev(dev); | 1039 | pdev = to_pci_dev(dev); |
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index d2b7f27781bc..e69182fd01cf 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c | |||
@@ -150,29 +150,6 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, | |||
150 | } | 150 | } |
151 | EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe); | 151 | EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe); |
152 | 152 | ||
153 | /* | ||
154 | * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, | ||
155 | * which can obviate IPI to trigger checking of need_resched. | ||
156 | * We execute MONITOR against need_resched and enter optimized wait state | ||
157 | * through MWAIT. Whenever someone changes need_resched, we would be woken | ||
158 | * up from MWAIT (without an IPI). | ||
159 | * | ||
160 | * New with Core Duo processors, MWAIT can take some hints based on CPU | ||
161 | * capability. | ||
162 | */ | ||
163 | void mwait_idle_with_hints(unsigned long ax, unsigned long cx) | ||
164 | { | ||
165 | if (!need_resched()) { | ||
166 | if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) | ||
167 | clflush((void *)¤t_thread_info()->flags); | ||
168 | |||
169 | __monitor((void *)¤t_thread_info()->flags, 0, 0); | ||
170 | smp_mb(); | ||
171 | if (!need_resched()) | ||
172 | __mwait(ax, cx); | ||
173 | } | ||
174 | } | ||
175 | |||
176 | void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx) | 153 | void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx) |
177 | { | 154 | { |
178 | unsigned int cpu = smp_processor_id(); | 155 | unsigned int cpu = smp_processor_id(); |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index d278736bf774..7f26c9a70a9e 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -75,6 +75,13 @@ unsigned int max_physical_apicid; | |||
75 | physid_mask_t phys_cpu_present_map; | 75 | physid_mask_t phys_cpu_present_map; |
76 | 76 | ||
77 | /* | 77 | /* |
78 | * Processor to be disabled specified by kernel parameter | ||
79 | * disable_cpu_apicid=<int>, mostly used for the kdump 2nd kernel to | ||
80 | * avoid undefined behaviour caused by sending INIT from AP to BSP. | ||
81 | */ | ||
82 | static unsigned int disabled_cpu_apicid __read_mostly = BAD_APICID; | ||
83 | |||
84 | /* | ||
78 | * Map cpu index to physical APIC ID | 85 | * Map cpu index to physical APIC ID |
79 | */ | 86 | */ |
80 | DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID); | 87 | DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID); |
@@ -1968,7 +1975,7 @@ __visible void smp_trace_spurious_interrupt(struct pt_regs *regs) | |||
1968 | */ | 1975 | */ |
1969 | static inline void __smp_error_interrupt(struct pt_regs *regs) | 1976 | static inline void __smp_error_interrupt(struct pt_regs *regs) |
1970 | { | 1977 | { |
1971 | u32 v0, v1; | 1978 | u32 v; |
1972 | u32 i = 0; | 1979 | u32 i = 0; |
1973 | static const char * const error_interrupt_reason[] = { | 1980 | static const char * const error_interrupt_reason[] = { |
1974 | "Send CS error", /* APIC Error Bit 0 */ | 1981 | "Send CS error", /* APIC Error Bit 0 */ |
@@ -1982,21 +1989,20 @@ static inline void __smp_error_interrupt(struct pt_regs *regs) | |||
1982 | }; | 1989 | }; |
1983 | 1990 | ||
1984 | /* First tickle the hardware, only then report what went on. -- REW */ | 1991 | /* First tickle the hardware, only then report what went on. -- REW */ |
1985 | v0 = apic_read(APIC_ESR); | ||
1986 | apic_write(APIC_ESR, 0); | 1992 | apic_write(APIC_ESR, 0); |
1987 | v1 = apic_read(APIC_ESR); | 1993 | v = apic_read(APIC_ESR); |
1988 | ack_APIC_irq(); | 1994 | ack_APIC_irq(); |
1989 | atomic_inc(&irq_err_count); | 1995 | atomic_inc(&irq_err_count); |
1990 | 1996 | ||
1991 | apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x(%02x)", | 1997 | apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x", |
1992 | smp_processor_id(), v0 , v1); | 1998 | smp_processor_id(), v); |
1993 | 1999 | ||
1994 | v1 = v1 & 0xff; | 2000 | v &= 0xff; |
1995 | while (v1) { | 2001 | while (v) { |
1996 | if (v1 & 0x1) | 2002 | if (v & 0x1) |
1997 | apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]); | 2003 | apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]); |
1998 | i++; | 2004 | i++; |
1999 | v1 >>= 1; | 2005 | v >>= 1; |
2000 | } | 2006 | } |
2001 | 2007 | ||
2002 | apic_printk(APIC_DEBUG, KERN_CONT "\n"); | 2008 | apic_printk(APIC_DEBUG, KERN_CONT "\n"); |
@@ -2115,6 +2121,39 @@ int generic_processor_info(int apicid, int version) | |||
2115 | phys_cpu_present_map); | 2121 | phys_cpu_present_map); |
2116 | 2122 | ||
2117 | /* | 2123 | /* |
2124 | * boot_cpu_physical_apicid is designed to have the apicid | ||
2125 | * returned by read_apic_id(), i.e, the apicid of the | ||
2126 | * currently booting-up processor. However, on some platforms, | ||
2127 | * it is temporarily modified by the apicid reported as BSP | ||
2128 | * through MP table. Concretely: | ||
2129 | * | ||
2130 | * - arch/x86/kernel/mpparse.c: MP_processor_info() | ||
2131 | * - arch/x86/mm/amdtopology.c: amd_numa_init() | ||
2132 | * - arch/x86/platform/visws/visws_quirks.c: MP_processor_info() | ||
2133 | * | ||
2134 | * This function is executed with the modified | ||
2135 | * boot_cpu_physical_apicid. So, disabled_cpu_apicid kernel | ||
2136 | * parameter doesn't work to disable APs on kdump 2nd kernel. | ||
2137 | * | ||
2138 | * Since fixing handling of boot_cpu_physical_apicid requires | ||
2139 | * another discussion and tests on each platform, we leave it | ||
2140 | * for now and here we use read_apic_id() directly in this | ||
2141 | * function, generic_processor_info(). | ||
2142 | */ | ||
2143 | if (disabled_cpu_apicid != BAD_APICID && | ||
2144 | disabled_cpu_apicid != read_apic_id() && | ||
2145 | disabled_cpu_apicid == apicid) { | ||
2146 | int thiscpu = num_processors + disabled_cpus; | ||
2147 | |||
2148 | pr_warning("APIC: Disabling requested cpu." | ||
2149 | " Processor %d/0x%x ignored.\n", | ||
2150 | thiscpu, apicid); | ||
2151 | |||
2152 | disabled_cpus++; | ||
2153 | return -ENODEV; | ||
2154 | } | ||
2155 | |||
2156 | /* | ||
2118 | * If boot cpu has not been detected yet, then only allow upto | 2157 | * If boot cpu has not been detected yet, then only allow upto |
2119 | * nr_cpu_ids - 1 processors and keep one slot free for boot cpu | 2158 | * nr_cpu_ids - 1 processors and keep one slot free for boot cpu |
2120 | */ | 2159 | */ |
@@ -2592,3 +2631,12 @@ static int __init lapic_insert_resource(void) | |||
2592 | * that is using request_resource | 2631 | * that is using request_resource |
2593 | */ | 2632 | */ |
2594 | late_initcall(lapic_insert_resource); | 2633 | late_initcall(lapic_insert_resource); |
2634 | |||
2635 | static int __init apic_set_disabled_cpu_apicid(char *arg) | ||
2636 | { | ||
2637 | if (!arg || !get_option(&arg, &disabled_cpu_apicid)) | ||
2638 | return -EINVAL; | ||
2639 | |||
2640 | return 0; | ||
2641 | } | ||
2642 | early_param("disable_cpu_apicid", apic_set_disabled_cpu_apicid); | ||
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index ccbf857d1d55..2c621a6b901a 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c | |||
@@ -14,7 +14,6 @@ | |||
14 | #include <linux/string.h> | 14 | #include <linux/string.h> |
15 | #include <linux/kernel.h> | 15 | #include <linux/kernel.h> |
16 | #include <linux/ctype.h> | 16 | #include <linux/ctype.h> |
17 | #include <linux/init.h> | ||
18 | #include <linux/hardirq.h> | 17 | #include <linux/hardirq.h> |
19 | #include <linux/module.h> | 18 | #include <linux/module.h> |
20 | #include <asm/smp.h> | 19 | #include <asm/smp.h> |
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index e145f28b4099..191ce75c0e54 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c | |||
@@ -15,7 +15,6 @@ | |||
15 | #include <linux/string.h> | 15 | #include <linux/string.h> |
16 | #include <linux/kernel.h> | 16 | #include <linux/kernel.h> |
17 | #include <linux/ctype.h> | 17 | #include <linux/ctype.h> |
18 | #include <linux/init.h> | ||
19 | #include <linux/errno.h> | 18 | #include <linux/errno.h> |
20 | #include <asm/fixmap.h> | 19 | #include <asm/fixmap.h> |
21 | #include <asm/mpspec.h> | 20 | #include <asm/mpspec.h> |
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 4d67a7531d45..6ad4658de705 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -1139,9 +1139,10 @@ next: | |||
1139 | if (test_bit(vector, used_vectors)) | 1139 | if (test_bit(vector, used_vectors)) |
1140 | goto next; | 1140 | goto next; |
1141 | 1141 | ||
1142 | for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) | 1142 | for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) { |
1143 | if (per_cpu(vector_irq, new_cpu)[vector] != -1) | 1143 | if (per_cpu(vector_irq, new_cpu)[vector] > VECTOR_UNDEFINED) |
1144 | goto next; | 1144 | goto next; |
1145 | } | ||
1145 | /* Found one! */ | 1146 | /* Found one! */ |
1146 | current_vector = vector; | 1147 | current_vector = vector; |
1147 | current_offset = offset; | 1148 | current_offset = offset; |
@@ -1180,7 +1181,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg) | |||
1180 | 1181 | ||
1181 | vector = cfg->vector; | 1182 | vector = cfg->vector; |
1182 | for_each_cpu_and(cpu, cfg->domain, cpu_online_mask) | 1183 | for_each_cpu_and(cpu, cfg->domain, cpu_online_mask) |
1183 | per_cpu(vector_irq, cpu)[vector] = -1; | 1184 | per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED; |
1184 | 1185 | ||
1185 | cfg->vector = 0; | 1186 | cfg->vector = 0; |
1186 | cpumask_clear(cfg->domain); | 1187 | cpumask_clear(cfg->domain); |
@@ -1188,11 +1189,10 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg) | |||
1188 | if (likely(!cfg->move_in_progress)) | 1189 | if (likely(!cfg->move_in_progress)) |
1189 | return; | 1190 | return; |
1190 | for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) { | 1191 | for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) { |
1191 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; | 1192 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { |
1192 | vector++) { | ||
1193 | if (per_cpu(vector_irq, cpu)[vector] != irq) | 1193 | if (per_cpu(vector_irq, cpu)[vector] != irq) |
1194 | continue; | 1194 | continue; |
1195 | per_cpu(vector_irq, cpu)[vector] = -1; | 1195 | per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED; |
1196 | break; | 1196 | break; |
1197 | } | 1197 | } |
1198 | } | 1198 | } |
@@ -1225,12 +1225,12 @@ void __setup_vector_irq(int cpu) | |||
1225 | /* Mark the free vectors */ | 1225 | /* Mark the free vectors */ |
1226 | for (vector = 0; vector < NR_VECTORS; ++vector) { | 1226 | for (vector = 0; vector < NR_VECTORS; ++vector) { |
1227 | irq = per_cpu(vector_irq, cpu)[vector]; | 1227 | irq = per_cpu(vector_irq, cpu)[vector]; |
1228 | if (irq < 0) | 1228 | if (irq <= VECTOR_UNDEFINED) |
1229 | continue; | 1229 | continue; |
1230 | 1230 | ||
1231 | cfg = irq_cfg(irq); | 1231 | cfg = irq_cfg(irq); |
1232 | if (!cpumask_test_cpu(cpu, cfg->domain)) | 1232 | if (!cpumask_test_cpu(cpu, cfg->domain)) |
1233 | per_cpu(vector_irq, cpu)[vector] = -1; | 1233 | per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED; |
1234 | } | 1234 | } |
1235 | raw_spin_unlock(&vector_lock); | 1235 | raw_spin_unlock(&vector_lock); |
1236 | } | 1236 | } |
@@ -2199,13 +2199,13 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) | |||
2199 | 2199 | ||
2200 | me = smp_processor_id(); | 2200 | me = smp_processor_id(); |
2201 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { | 2201 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { |
2202 | unsigned int irq; | 2202 | int irq; |
2203 | unsigned int irr; | 2203 | unsigned int irr; |
2204 | struct irq_desc *desc; | 2204 | struct irq_desc *desc; |
2205 | struct irq_cfg *cfg; | 2205 | struct irq_cfg *cfg; |
2206 | irq = __this_cpu_read(vector_irq[vector]); | 2206 | irq = __this_cpu_read(vector_irq[vector]); |
2207 | 2207 | ||
2208 | if (irq == -1) | 2208 | if (irq <= VECTOR_UNDEFINED) |
2209 | continue; | 2209 | continue; |
2210 | 2210 | ||
2211 | desc = irq_to_desc(irq); | 2211 | desc = irq_to_desc(irq); |
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index 7434d8556d09..62071569bd50 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c | |||
@@ -1,6 +1,5 @@ | |||
1 | #include <linux/cpumask.h> | 1 | #include <linux/cpumask.h> |
2 | #include <linux/interrupt.h> | 2 | #include <linux/interrupt.h> |
3 | #include <linux/init.h> | ||
4 | 3 | ||
5 | #include <linux/mm.h> | 4 | #include <linux/mm.h> |
6 | #include <linux/delay.h> | 5 | #include <linux/delay.h> |
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index 77c95c0e1bf7..00146f9b0254 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c | |||
@@ -29,7 +29,6 @@ | |||
29 | #define pr_fmt(fmt) "summit: %s: " fmt, __func__ | 29 | #define pr_fmt(fmt) "summit: %s: " fmt, __func__ |
30 | 30 | ||
31 | #include <linux/mm.h> | 31 | #include <linux/mm.h> |
32 | #include <linux/init.h> | ||
33 | #include <asm/io.h> | 32 | #include <asm/io.h> |
34 | #include <asm/bios_ebda.h> | 33 | #include <asm/bios_ebda.h> |
35 | 34 | ||
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 140e29db478d..cac85ee6913f 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c | |||
@@ -3,7 +3,6 @@ | |||
3 | #include <linux/string.h> | 3 | #include <linux/string.h> |
4 | #include <linux/kernel.h> | 4 | #include <linux/kernel.h> |
5 | #include <linux/ctype.h> | 5 | #include <linux/ctype.h> |
6 | #include <linux/init.h> | ||
7 | #include <linux/dmar.h> | 6 | #include <linux/dmar.h> |
8 | #include <linux/cpu.h> | 7 | #include <linux/cpu.h> |
9 | 8 | ||
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 562a76d433c8..de231e328cae 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c | |||
@@ -3,7 +3,6 @@ | |||
3 | #include <linux/string.h> | 3 | #include <linux/string.h> |
4 | #include <linux/kernel.h> | 4 | #include <linux/kernel.h> |
5 | #include <linux/ctype.h> | 5 | #include <linux/ctype.h> |
6 | #include <linux/init.h> | ||
7 | #include <linux/dmar.h> | 6 | #include <linux/dmar.h> |
8 | 7 | ||
9 | #include <asm/smp.h> | 8 | #include <asm/smp.h> |
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c index e2dbcb7dabdd..83a7995625a6 100644 --- a/arch/x86/kernel/check.c +++ b/arch/x86/kernel/check.c | |||
@@ -91,7 +91,7 @@ void __init setup_bios_corruption_check(void) | |||
91 | 91 | ||
92 | corruption_check_size = round_up(corruption_check_size, PAGE_SIZE); | 92 | corruption_check_size = round_up(corruption_check_size, PAGE_SIZE); |
93 | 93 | ||
94 | for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL) { | 94 | for_each_free_mem_range(i, NUMA_NO_NODE, &start, &end, NULL) { |
95 | start = clamp_t(phys_addr_t, round_up(start, PAGE_SIZE), | 95 | start = clamp_t(phys_addr_t, round_up(start, PAGE_SIZE), |
96 | PAGE_SIZE, corruption_check_size); | 96 | PAGE_SIZE, corruption_check_size); |
97 | end = clamp_t(phys_addr_t, round_down(end, PAGE_SIZE), | 97 | end = clamp_t(phys_addr_t, round_down(end, PAGE_SIZE), |
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 47b56a7e99cb..7fd54f09b011 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -36,12 +36,13 @@ obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o | |||
36 | endif | 36 | endif |
37 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o | 37 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o |
38 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o | 38 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o |
39 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o | 39 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o perf_event_intel_rapl.o |
40 | endif | 40 | endif |
41 | 41 | ||
42 | 42 | ||
43 | obj-$(CONFIG_X86_MCE) += mcheck/ | 43 | obj-$(CONFIG_X86_MCE) += mcheck/ |
44 | obj-$(CONFIG_MTRR) += mtrr/ | 44 | obj-$(CONFIG_MTRR) += mtrr/ |
45 | obj-$(CONFIG_MICROCODE) += microcode/ | ||
45 | 46 | ||
46 | obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o perf_event_amd_ibs.o | 47 | obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o perf_event_amd_ibs.o |
47 | 48 | ||
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index bca023bdd6b2..d3153e281d72 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -1,5 +1,4 @@ | |||
1 | #include <linux/export.h> | 1 | #include <linux/export.h> |
2 | #include <linux/init.h> | ||
3 | #include <linux/bitops.h> | 2 | #include <linux/bitops.h> |
4 | #include <linux/elf.h> | 3 | #include <linux/elf.h> |
5 | #include <linux/mm.h> | 4 | #include <linux/mm.h> |
@@ -487,7 +486,7 @@ static void early_init_amd(struct cpuinfo_x86 *c) | |||
487 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 486 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
488 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); | 487 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); |
489 | if (!check_tsc_unstable()) | 488 | if (!check_tsc_unstable()) |
490 | sched_clock_stable = 1; | 489 | set_sched_clock_stable(); |
491 | } | 490 | } |
492 | 491 | ||
493 | #ifdef CONFIG_X86_64 | 492 | #ifdef CONFIG_X86_64 |
@@ -508,6 +507,16 @@ static void early_init_amd(struct cpuinfo_x86 *c) | |||
508 | set_cpu_cap(c, X86_FEATURE_EXTD_APICID); | 507 | set_cpu_cap(c, X86_FEATURE_EXTD_APICID); |
509 | } | 508 | } |
510 | #endif | 509 | #endif |
510 | |||
511 | /* F16h erratum 793, CVE-2013-6885 */ | ||
512 | if (c->x86 == 0x16 && c->x86_model <= 0xf) { | ||
513 | u64 val; | ||
514 | |||
515 | rdmsrl(MSR_AMD64_LS_CFG, val); | ||
516 | if (!(val & BIT(15))) | ||
517 | wrmsrl(MSR_AMD64_LS_CFG, val | BIT(15)); | ||
518 | } | ||
519 | |||
511 | } | 520 | } |
512 | 521 | ||
513 | static const int amd_erratum_383[]; | 522 | static const int amd_erratum_383[]; |
@@ -790,14 +799,10 @@ static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c) | |||
790 | } | 799 | } |
791 | 800 | ||
792 | /* Handle DTLB 2M and 4M sizes, fall back to L1 if L2 is disabled */ | 801 | /* Handle DTLB 2M and 4M sizes, fall back to L1 if L2 is disabled */ |
793 | if (!((eax >> 16) & mask)) { | 802 | if (!((eax >> 16) & mask)) |
794 | u32 a, b, c, d; | 803 | tlb_lld_2m[ENTRIES] = (cpuid_eax(0x80000005) >> 16) & 0xff; |
795 | 804 | else | |
796 | cpuid(0x80000005, &a, &b, &c, &d); | ||
797 | tlb_lld_2m[ENTRIES] = (a >> 16) & 0xff; | ||
798 | } else { | ||
799 | tlb_lld_2m[ENTRIES] = (eax >> 16) & mask; | 805 | tlb_lld_2m[ENTRIES] = (eax >> 16) & mask; |
800 | } | ||
801 | 806 | ||
802 | /* a 4M entry uses two 2M entries */ | 807 | /* a 4M entry uses two 2M entries */ |
803 | tlb_lld_4m[ENTRIES] = tlb_lld_2m[ENTRIES] >> 1; | 808 | tlb_lld_4m[ENTRIES] = tlb_lld_2m[ENTRIES] >> 1; |
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 8d5652dc99dd..8779edab684e 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c | |||
@@ -1,6 +1,5 @@ | |||
1 | #include <linux/bitops.h> | 1 | #include <linux/bitops.h> |
2 | #include <linux/kernel.h> | 2 | #include <linux/kernel.h> |
3 | #include <linux/init.h> | ||
4 | 3 | ||
5 | #include <asm/processor.h> | 4 | #include <asm/processor.h> |
6 | #include <asm/e820.h> | 5 | #include <asm/e820.h> |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 6abc172b8258..24b6fd10625a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -472,6 +472,7 @@ u16 __read_mostly tlb_lli_4m[NR_INFO]; | |||
472 | u16 __read_mostly tlb_lld_4k[NR_INFO]; | 472 | u16 __read_mostly tlb_lld_4k[NR_INFO]; |
473 | u16 __read_mostly tlb_lld_2m[NR_INFO]; | 473 | u16 __read_mostly tlb_lld_2m[NR_INFO]; |
474 | u16 __read_mostly tlb_lld_4m[NR_INFO]; | 474 | u16 __read_mostly tlb_lld_4m[NR_INFO]; |
475 | u16 __read_mostly tlb_lld_1g[NR_INFO]; | ||
475 | 476 | ||
476 | /* | 477 | /* |
477 | * tlb_flushall_shift shows the balance point in replacing cr3 write | 478 | * tlb_flushall_shift shows the balance point in replacing cr3 write |
@@ -486,13 +487,13 @@ void cpu_detect_tlb(struct cpuinfo_x86 *c) | |||
486 | if (this_cpu->c_detect_tlb) | 487 | if (this_cpu->c_detect_tlb) |
487 | this_cpu->c_detect_tlb(c); | 488 | this_cpu->c_detect_tlb(c); |
488 | 489 | ||
489 | printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ | 490 | printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" |
490 | "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ | 491 | "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d, 1GB %d\n" |
491 | "tlb_flushall_shift: %d\n", | 492 | "tlb_flushall_shift: %d\n", |
492 | tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], | 493 | tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], |
493 | tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES], | 494 | tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES], |
494 | tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES], | 495 | tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES], |
495 | tlb_flushall_shift); | 496 | tlb_lld_1g[ENTRIES], tlb_flushall_shift); |
496 | } | 497 | } |
497 | 498 | ||
498 | void detect_ht(struct cpuinfo_x86 *c) | 499 | void detect_ht(struct cpuinfo_x86 *c) |
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index d0969c75ab54..aaf152e79637 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c | |||
@@ -1,4 +1,3 @@ | |||
1 | #include <linux/init.h> | ||
2 | #include <linux/bitops.h> | 1 | #include <linux/bitops.h> |
3 | #include <linux/delay.h> | 2 | #include <linux/delay.h> |
4 | #include <linux/pci.h> | 3 | #include <linux/pci.h> |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index ea04b342c026..3db61c644e44 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -1,4 +1,3 @@ | |||
1 | #include <linux/init.h> | ||
2 | #include <linux/kernel.h> | 1 | #include <linux/kernel.h> |
3 | 2 | ||
4 | #include <linux/string.h> | 3 | #include <linux/string.h> |
@@ -93,7 +92,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) | |||
93 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 92 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
94 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); | 93 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); |
95 | if (!check_tsc_unstable()) | 94 | if (!check_tsc_unstable()) |
96 | sched_clock_stable = 1; | 95 | set_sched_clock_stable(); |
97 | } | 96 | } |
98 | 97 | ||
99 | /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */ | 98 | /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */ |
@@ -506,6 +505,7 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) | |||
506 | #define TLB_DATA0_2M_4M 0x23 | 505 | #define TLB_DATA0_2M_4M 0x23 |
507 | 506 | ||
508 | #define STLB_4K 0x41 | 507 | #define STLB_4K 0x41 |
508 | #define STLB_4K_2M 0x42 | ||
509 | 509 | ||
510 | static const struct _tlb_table intel_tlb_table[] = { | 510 | static const struct _tlb_table intel_tlb_table[] = { |
511 | { 0x01, TLB_INST_4K, 32, " TLB_INST 4 KByte pages, 4-way set associative" }, | 511 | { 0x01, TLB_INST_4K, 32, " TLB_INST 4 KByte pages, 4-way set associative" }, |
@@ -526,13 +526,20 @@ static const struct _tlb_table intel_tlb_table[] = { | |||
526 | { 0x5b, TLB_DATA_4K_4M, 64, " TLB_DATA 4 KByte and 4 MByte pages" }, | 526 | { 0x5b, TLB_DATA_4K_4M, 64, " TLB_DATA 4 KByte and 4 MByte pages" }, |
527 | { 0x5c, TLB_DATA_4K_4M, 128, " TLB_DATA 4 KByte and 4 MByte pages" }, | 527 | { 0x5c, TLB_DATA_4K_4M, 128, " TLB_DATA 4 KByte and 4 MByte pages" }, |
528 | { 0x5d, TLB_DATA_4K_4M, 256, " TLB_DATA 4 KByte and 4 MByte pages" }, | 528 | { 0x5d, TLB_DATA_4K_4M, 256, " TLB_DATA 4 KByte and 4 MByte pages" }, |
529 | { 0x61, TLB_INST_4K, 48, " TLB_INST 4 KByte pages, full associative" }, | ||
530 | { 0x63, TLB_DATA_1G, 4, " TLB_DATA 1 GByte pages, 4-way set associative" }, | ||
531 | { 0x76, TLB_INST_2M_4M, 8, " TLB_INST 2-MByte or 4-MByte pages, fully associative" }, | ||
529 | { 0xb0, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 4-way set associative" }, | 532 | { 0xb0, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 4-way set associative" }, |
530 | { 0xb1, TLB_INST_2M_4M, 4, " TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries" }, | 533 | { 0xb1, TLB_INST_2M_4M, 4, " TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries" }, |
531 | { 0xb2, TLB_INST_4K, 64, " TLB_INST 4KByte pages, 4-way set associative" }, | 534 | { 0xb2, TLB_INST_4K, 64, " TLB_INST 4KByte pages, 4-way set associative" }, |
532 | { 0xb3, TLB_DATA_4K, 128, " TLB_DATA 4 KByte pages, 4-way set associative" }, | 535 | { 0xb3, TLB_DATA_4K, 128, " TLB_DATA 4 KByte pages, 4-way set associative" }, |
533 | { 0xb4, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 4-way associative" }, | 536 | { 0xb4, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 4-way associative" }, |
537 | { 0xb5, TLB_INST_4K, 64, " TLB_INST 4 KByte pages, 8-way set ssociative" }, | ||
538 | { 0xb6, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 8-way set ssociative" }, | ||
534 | { 0xba, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way associative" }, | 539 | { 0xba, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way associative" }, |
535 | { 0xc0, TLB_DATA_4K_4M, 8, " TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" }, | 540 | { 0xc0, TLB_DATA_4K_4M, 8, " TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" }, |
541 | { 0xc1, STLB_4K_2M, 1024, " STLB 4 KByte and 2 MByte pages, 8-way associative" }, | ||
542 | { 0xc2, TLB_DATA_2M_4M, 16, " DTLB 2 MByte/4MByte pages, 4-way associative" }, | ||
536 | { 0xca, STLB_4K, 512, " STLB 4 KByte pages, 4-way associative" }, | 543 | { 0xca, STLB_4K, 512, " STLB 4 KByte pages, 4-way associative" }, |
537 | { 0x00, 0, 0 } | 544 | { 0x00, 0, 0 } |
538 | }; | 545 | }; |
@@ -558,6 +565,20 @@ static void intel_tlb_lookup(const unsigned char desc) | |||
558 | if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) | 565 | if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) |
559 | tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; | 566 | tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; |
560 | break; | 567 | break; |
568 | case STLB_4K_2M: | ||
569 | if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) | ||
570 | tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; | ||
571 | if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) | ||
572 | tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; | ||
573 | if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries) | ||
574 | tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries; | ||
575 | if (tlb_lld_2m[ENTRIES] < intel_tlb_table[k].entries) | ||
576 | tlb_lld_2m[ENTRIES] = intel_tlb_table[k].entries; | ||
577 | if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) | ||
578 | tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; | ||
579 | if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) | ||
580 | tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; | ||
581 | break; | ||
561 | case TLB_INST_ALL: | 582 | case TLB_INST_ALL: |
562 | if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) | 583 | if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) |
563 | tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; | 584 | tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; |
@@ -603,6 +624,10 @@ static void intel_tlb_lookup(const unsigned char desc) | |||
603 | if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) | 624 | if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) |
604 | tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; | 625 | tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; |
605 | break; | 626 | break; |
627 | case TLB_DATA_1G: | ||
628 | if (tlb_lld_1g[ENTRIES] < intel_tlb_table[k].entries) | ||
629 | tlb_lld_1g[ENTRIES] = intel_tlb_table[k].entries; | ||
630 | break; | ||
606 | } | 631 | } |
607 | } | 632 | } |
608 | 633 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c index de8b60a53f69..a1aef9533154 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-apei.c +++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c | |||
@@ -33,22 +33,28 @@ | |||
33 | #include <linux/acpi.h> | 33 | #include <linux/acpi.h> |
34 | #include <linux/cper.h> | 34 | #include <linux/cper.h> |
35 | #include <acpi/apei.h> | 35 | #include <acpi/apei.h> |
36 | #include <acpi/ghes.h> | ||
36 | #include <asm/mce.h> | 37 | #include <asm/mce.h> |
37 | 38 | ||
38 | #include "mce-internal.h" | 39 | #include "mce-internal.h" |
39 | 40 | ||
40 | void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err) | 41 | void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err) |
41 | { | 42 | { |
42 | struct mce m; | 43 | struct mce m; |
43 | 44 | ||
44 | /* Only corrected MC is reported */ | 45 | if (!(mem_err->validation_bits & CPER_MEM_VALID_PA)) |
45 | if (!corrected || !(mem_err->validation_bits & CPER_MEM_VALID_PA)) | ||
46 | return; | 46 | return; |
47 | 47 | ||
48 | mce_setup(&m); | 48 | mce_setup(&m); |
49 | m.bank = 1; | 49 | m.bank = 1; |
50 | /* Fake a memory read corrected error with unknown channel */ | 50 | /* Fake a memory read error with unknown channel */ |
51 | m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f; | 51 | m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f; |
52 | |||
53 | if (severity >= GHES_SEV_RECOVERABLE) | ||
54 | m.status |= MCI_STATUS_UC; | ||
55 | if (severity >= GHES_SEV_PANIC) | ||
56 | m.status |= MCI_STATUS_PCC; | ||
57 | |||
52 | m.addr = mem_err->physical_addr; | 58 | m.addr = mem_err->physical_addr; |
53 | mce_log(&m); | 59 | mce_log(&m); |
54 | mce_notify_irq(); | 60 | mce_notify_irq(); |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index b3218cdee95f..4d5419b249da 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -1638,15 +1638,15 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) | |||
1638 | 1638 | ||
1639 | static void mce_start_timer(unsigned int cpu, struct timer_list *t) | 1639 | static void mce_start_timer(unsigned int cpu, struct timer_list *t) |
1640 | { | 1640 | { |
1641 | unsigned long iv = mce_adjust_timer(check_interval * HZ); | 1641 | unsigned long iv = check_interval * HZ; |
1642 | |||
1643 | __this_cpu_write(mce_next_interval, iv); | ||
1644 | 1642 | ||
1645 | if (mca_cfg.ignore_ce || !iv) | 1643 | if (mca_cfg.ignore_ce || !iv) |
1646 | return; | 1644 | return; |
1647 | 1645 | ||
1646 | per_cpu(mce_next_interval, cpu) = iv; | ||
1647 | |||
1648 | t->expires = round_jiffies(jiffies + iv); | 1648 | t->expires = round_jiffies(jiffies + iv); |
1649 | add_timer_on(t, smp_processor_id()); | 1649 | add_timer_on(t, cpu); |
1650 | } | 1650 | } |
1651 | 1651 | ||
1652 | static void __mcheck_cpu_init_timer(void) | 1652 | static void __mcheck_cpu_init_timer(void) |
@@ -2272,8 +2272,10 @@ static int mce_device_create(unsigned int cpu) | |||
2272 | dev->release = &mce_device_release; | 2272 | dev->release = &mce_device_release; |
2273 | 2273 | ||
2274 | err = device_register(dev); | 2274 | err = device_register(dev); |
2275 | if (err) | 2275 | if (err) { |
2276 | put_device(dev); | ||
2276 | return err; | 2277 | return err; |
2278 | } | ||
2277 | 2279 | ||
2278 | for (i = 0; mce_device_attrs[i]; i++) { | 2280 | for (i = 0; mce_device_attrs[i]; i++) { |
2279 | err = device_create_file(dev, mce_device_attrs[i]); | 2281 | err = device_create_file(dev, mce_device_attrs[i]); |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 4cfe0458ca66..fb6156fee6f7 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c | |||
@@ -6,7 +6,6 @@ | |||
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include <linux/gfp.h> | 8 | #include <linux/gfp.h> |
9 | #include <linux/init.h> | ||
10 | #include <linux/interrupt.h> | 9 | #include <linux/interrupt.h> |
11 | #include <linux/percpu.h> | 10 | #include <linux/percpu.h> |
12 | #include <linux/sched.h> | 11 | #include <linux/sched.h> |
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c index 1c044b1ccc59..a3042989398c 100644 --- a/arch/x86/kernel/cpu/mcheck/p5.c +++ b/arch/x86/kernel/cpu/mcheck/p5.c | |||
@@ -5,7 +5,6 @@ | |||
5 | #include <linux/interrupt.h> | 5 | #include <linux/interrupt.h> |
6 | #include <linux/kernel.h> | 6 | #include <linux/kernel.h> |
7 | #include <linux/types.h> | 7 | #include <linux/types.h> |
8 | #include <linux/init.h> | ||
9 | #include <linux/smp.h> | 8 | #include <linux/smp.h> |
10 | 9 | ||
11 | #include <asm/processor.h> | 10 | #include <asm/processor.h> |
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c index e9a701aecaa1..7dc5564d0cdf 100644 --- a/arch/x86/kernel/cpu/mcheck/winchip.c +++ b/arch/x86/kernel/cpu/mcheck/winchip.c | |||
@@ -5,7 +5,6 @@ | |||
5 | #include <linux/interrupt.h> | 5 | #include <linux/interrupt.h> |
6 | #include <linux/kernel.h> | 6 | #include <linux/kernel.h> |
7 | #include <linux/types.h> | 7 | #include <linux/types.h> |
8 | #include <linux/init.h> | ||
9 | 8 | ||
10 | #include <asm/processor.h> | 9 | #include <asm/processor.h> |
11 | #include <asm/mce.h> | 10 | #include <asm/mce.h> |
diff --git a/arch/x86/kernel/cpu/microcode/Makefile b/arch/x86/kernel/cpu/microcode/Makefile new file mode 100644 index 000000000000..285c85427c32 --- /dev/null +++ b/arch/x86/kernel/cpu/microcode/Makefile | |||
@@ -0,0 +1,7 @@ | |||
1 | microcode-y := core.o | ||
2 | obj-$(CONFIG_MICROCODE) += microcode.o | ||
3 | microcode-$(CONFIG_MICROCODE_INTEL) += intel.o intel_lib.o | ||
4 | microcode-$(CONFIG_MICROCODE_AMD) += amd.o | ||
5 | obj-$(CONFIG_MICROCODE_EARLY) += core_early.o | ||
6 | obj-$(CONFIG_MICROCODE_INTEL_EARLY) += intel_early.o | ||
7 | obj-$(CONFIG_MICROCODE_AMD_EARLY) += amd_early.o | ||
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/cpu/microcode/amd.c index c3d4cc972eca..8fffd845e22b 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c | |||
@@ -182,10 +182,10 @@ int __apply_microcode_amd(struct microcode_amd *mc_amd) | |||
182 | { | 182 | { |
183 | u32 rev, dummy; | 183 | u32 rev, dummy; |
184 | 184 | ||
185 | wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code); | 185 | native_wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code); |
186 | 186 | ||
187 | /* verify patch application was successful */ | 187 | /* verify patch application was successful */ |
188 | rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); | 188 | native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); |
189 | if (rev != mc_amd->hdr.patch_id) | 189 | if (rev != mc_amd->hdr.patch_id) |
190 | return -1; | 190 | return -1; |
191 | 191 | ||
@@ -332,6 +332,9 @@ static int verify_and_add_patch(u8 family, u8 *fw, unsigned int leftover) | |||
332 | patch->patch_id = mc_hdr->patch_id; | 332 | patch->patch_id = mc_hdr->patch_id; |
333 | patch->equiv_cpu = proc_id; | 333 | patch->equiv_cpu = proc_id; |
334 | 334 | ||
335 | pr_debug("%s: Added patch_id: 0x%08x, proc_id: 0x%04x\n", | ||
336 | __func__, patch->patch_id, proc_id); | ||
337 | |||
335 | /* ... and add to cache. */ | 338 | /* ... and add to cache. */ |
336 | update_cache(patch); | 339 | update_cache(patch); |
337 | 340 | ||
@@ -390,9 +393,9 @@ enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size) | |||
390 | if (cpu_data(smp_processor_id()).cpu_index == boot_cpu_data.cpu_index) { | 393 | if (cpu_data(smp_processor_id()).cpu_index == boot_cpu_data.cpu_index) { |
391 | struct ucode_patch *p = find_patch(smp_processor_id()); | 394 | struct ucode_patch *p = find_patch(smp_processor_id()); |
392 | if (p) { | 395 | if (p) { |
393 | memset(amd_bsp_mpb, 0, MPB_MAX_SIZE); | 396 | memset(amd_ucode_patch, 0, PATCH_MAX_SIZE); |
394 | memcpy(amd_bsp_mpb, p->data, min_t(u32, ksize(p->data), | 397 | memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data), |
395 | MPB_MAX_SIZE)); | 398 | PATCH_MAX_SIZE)); |
396 | } | 399 | } |
397 | } | 400 | } |
398 | #endif | 401 | #endif |
@@ -430,7 +433,7 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device, | |||
430 | if (c->x86 >= 0x15) | 433 | if (c->x86 >= 0x15) |
431 | snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86); | 434 | snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86); |
432 | 435 | ||
433 | if (request_firmware(&fw, (const char *)fw_name, device)) { | 436 | if (request_firmware_direct(&fw, (const char *)fw_name, device)) { |
434 | pr_debug("failed to load file %s\n", fw_name); | 437 | pr_debug("failed to load file %s\n", fw_name); |
435 | goto out; | 438 | goto out; |
436 | } | 439 | } |
diff --git a/arch/x86/kernel/microcode_amd_early.c b/arch/x86/kernel/cpu/microcode/amd_early.c index 6073104ccaa3..8384c0fa206f 100644 --- a/arch/x86/kernel/microcode_amd_early.c +++ b/arch/x86/kernel/cpu/microcode/amd_early.c | |||
@@ -2,6 +2,7 @@ | |||
2 | * Copyright (C) 2013 Advanced Micro Devices, Inc. | 2 | * Copyright (C) 2013 Advanced Micro Devices, Inc. |
3 | * | 3 | * |
4 | * Author: Jacob Shin <jacob.shin@amd.com> | 4 | * Author: Jacob Shin <jacob.shin@amd.com> |
5 | * Fixes: Borislav Petkov <bp@suse.de> | ||
5 | * | 6 | * |
6 | * This program is free software; you can redistribute it and/or modify | 7 | * This program is free software; you can redistribute it and/or modify |
7 | * it under the terms of the GNU General Public License version 2 as | 8 | * it under the terms of the GNU General Public License version 2 as |
@@ -15,10 +16,18 @@ | |||
15 | #include <asm/setup.h> | 16 | #include <asm/setup.h> |
16 | #include <asm/microcode_amd.h> | 17 | #include <asm/microcode_amd.h> |
17 | 18 | ||
18 | static bool ucode_loaded; | 19 | /* |
20 | * This points to the current valid container of microcode patches which we will | ||
21 | * save from the initrd before jettisoning its contents. | ||
22 | */ | ||
23 | static u8 *container; | ||
24 | static size_t container_size; | ||
25 | |||
19 | static u32 ucode_new_rev; | 26 | static u32 ucode_new_rev; |
20 | static unsigned long ucode_offset; | 27 | u8 amd_ucode_patch[PATCH_MAX_SIZE]; |
21 | static size_t ucode_size; | 28 | static u16 this_equiv_id; |
29 | |||
30 | struct cpio_data ucode_cpio; | ||
22 | 31 | ||
23 | /* | 32 | /* |
24 | * Microcode patch container file is prepended to the initrd in cpio format. | 33 | * Microcode patch container file is prepended to the initrd in cpio format. |
@@ -32,9 +41,6 @@ static struct cpio_data __init find_ucode_in_initrd(void) | |||
32 | char *path; | 41 | char *path; |
33 | void *start; | 42 | void *start; |
34 | size_t size; | 43 | size_t size; |
35 | unsigned long *uoffset; | ||
36 | size_t *usize; | ||
37 | struct cpio_data cd; | ||
38 | 44 | ||
39 | #ifdef CONFIG_X86_32 | 45 | #ifdef CONFIG_X86_32 |
40 | struct boot_params *p; | 46 | struct boot_params *p; |
@@ -47,30 +53,50 @@ static struct cpio_data __init find_ucode_in_initrd(void) | |||
47 | path = (char *)__pa_nodebug(ucode_path); | 53 | path = (char *)__pa_nodebug(ucode_path); |
48 | start = (void *)p->hdr.ramdisk_image; | 54 | start = (void *)p->hdr.ramdisk_image; |
49 | size = p->hdr.ramdisk_size; | 55 | size = p->hdr.ramdisk_size; |
50 | uoffset = (unsigned long *)__pa_nodebug(&ucode_offset); | ||
51 | usize = (size_t *)__pa_nodebug(&ucode_size); | ||
52 | #else | 56 | #else |
53 | path = ucode_path; | 57 | path = ucode_path; |
54 | start = (void *)(boot_params.hdr.ramdisk_image + PAGE_OFFSET); | 58 | start = (void *)(boot_params.hdr.ramdisk_image + PAGE_OFFSET); |
55 | size = boot_params.hdr.ramdisk_size; | 59 | size = boot_params.hdr.ramdisk_size; |
56 | uoffset = &ucode_offset; | ||
57 | usize = &ucode_size; | ||
58 | #endif | 60 | #endif |
59 | 61 | ||
60 | cd = find_cpio_data(path, start, size, &offset); | 62 | return find_cpio_data(path, start, size, &offset); |
61 | if (!cd.data) | 63 | } |
62 | return cd; | ||
63 | 64 | ||
64 | if (*(u32 *)cd.data != UCODE_MAGIC) { | 65 | static size_t compute_container_size(u8 *data, u32 total_size) |
65 | cd.data = NULL; | 66 | { |
66 | cd.size = 0; | 67 | size_t size = 0; |
67 | return cd; | 68 | u32 *header = (u32 *)data; |
68 | } | ||
69 | 69 | ||
70 | *uoffset = (u8 *)cd.data - (u8 *)start; | 70 | if (header[0] != UCODE_MAGIC || |
71 | *usize = cd.size; | 71 | header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */ |
72 | header[2] == 0) /* size */ | ||
73 | return size; | ||
72 | 74 | ||
73 | return cd; | 75 | size = header[2] + CONTAINER_HDR_SZ; |
76 | total_size -= size; | ||
77 | data += size; | ||
78 | |||
79 | while (total_size) { | ||
80 | u16 patch_size; | ||
81 | |||
82 | header = (u32 *)data; | ||
83 | |||
84 | if (header[0] != UCODE_UCODE_TYPE) | ||
85 | break; | ||
86 | |||
87 | /* | ||
88 | * Sanity-check patch size. | ||
89 | */ | ||
90 | patch_size = header[1]; | ||
91 | if (patch_size > PATCH_MAX_SIZE) | ||
92 | break; | ||
93 | |||
94 | size += patch_size + SECTION_HDR_SIZE; | ||
95 | data += patch_size + SECTION_HDR_SIZE; | ||
96 | total_size -= patch_size + SECTION_HDR_SIZE; | ||
97 | } | ||
98 | |||
99 | return size; | ||
74 | } | 100 | } |
75 | 101 | ||
76 | /* | 102 | /* |
@@ -85,23 +111,22 @@ static struct cpio_data __init find_ucode_in_initrd(void) | |||
85 | static void apply_ucode_in_initrd(void *ucode, size_t size) | 111 | static void apply_ucode_in_initrd(void *ucode, size_t size) |
86 | { | 112 | { |
87 | struct equiv_cpu_entry *eq; | 113 | struct equiv_cpu_entry *eq; |
114 | size_t *cont_sz; | ||
88 | u32 *header; | 115 | u32 *header; |
89 | u8 *data; | 116 | u8 *data, **cont; |
90 | u16 eq_id = 0; | 117 | u16 eq_id = 0; |
91 | int offset, left; | 118 | int offset, left; |
92 | u32 rev, eax; | 119 | u32 rev, eax, ebx, ecx, edx; |
93 | u32 *new_rev; | 120 | u32 *new_rev; |
94 | unsigned long *uoffset; | ||
95 | size_t *usize; | ||
96 | 121 | ||
97 | #ifdef CONFIG_X86_32 | 122 | #ifdef CONFIG_X86_32 |
98 | new_rev = (u32 *)__pa_nodebug(&ucode_new_rev); | 123 | new_rev = (u32 *)__pa_nodebug(&ucode_new_rev); |
99 | uoffset = (unsigned long *)__pa_nodebug(&ucode_offset); | 124 | cont_sz = (size_t *)__pa_nodebug(&container_size); |
100 | usize = (size_t *)__pa_nodebug(&ucode_size); | 125 | cont = (u8 **)__pa_nodebug(&container); |
101 | #else | 126 | #else |
102 | new_rev = &ucode_new_rev; | 127 | new_rev = &ucode_new_rev; |
103 | uoffset = &ucode_offset; | 128 | cont_sz = &container_size; |
104 | usize = &ucode_size; | 129 | cont = &container; |
105 | #endif | 130 | #endif |
106 | 131 | ||
107 | data = ucode; | 132 | data = ucode; |
@@ -109,23 +134,37 @@ static void apply_ucode_in_initrd(void *ucode, size_t size) | |||
109 | header = (u32 *)data; | 134 | header = (u32 *)data; |
110 | 135 | ||
111 | /* find equiv cpu table */ | 136 | /* find equiv cpu table */ |
112 | 137 | if (header[0] != UCODE_MAGIC || | |
113 | if (header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */ | 138 | header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */ |
114 | header[2] == 0) /* size */ | 139 | header[2] == 0) /* size */ |
115 | return; | 140 | return; |
116 | 141 | ||
117 | eax = cpuid_eax(0x00000001); | 142 | eax = 0x00000001; |
143 | ecx = 0; | ||
144 | native_cpuid(&eax, &ebx, &ecx, &edx); | ||
118 | 145 | ||
119 | while (left > 0) { | 146 | while (left > 0) { |
120 | eq = (struct equiv_cpu_entry *)(data + CONTAINER_HDR_SZ); | 147 | eq = (struct equiv_cpu_entry *)(data + CONTAINER_HDR_SZ); |
121 | 148 | ||
149 | *cont = data; | ||
150 | |||
151 | /* Advance past the container header */ | ||
122 | offset = header[2] + CONTAINER_HDR_SZ; | 152 | offset = header[2] + CONTAINER_HDR_SZ; |
123 | data += offset; | 153 | data += offset; |
124 | left -= offset; | 154 | left -= offset; |
125 | 155 | ||
126 | eq_id = find_equiv_id(eq, eax); | 156 | eq_id = find_equiv_id(eq, eax); |
127 | if (eq_id) | 157 | if (eq_id) { |
158 | this_equiv_id = eq_id; | ||
159 | *cont_sz = compute_container_size(*cont, left + offset); | ||
160 | |||
161 | /* | ||
162 | * truncate how much we need to iterate over in the | ||
163 | * ucode update loop below | ||
164 | */ | ||
165 | left = *cont_sz - offset; | ||
128 | break; | 166 | break; |
167 | } | ||
129 | 168 | ||
130 | /* | 169 | /* |
131 | * support multiple container files appended together. if this | 170 | * support multiple container files appended together. if this |
@@ -145,19 +184,18 @@ static void apply_ucode_in_initrd(void *ucode, size_t size) | |||
145 | 184 | ||
146 | /* mark where the next microcode container file starts */ | 185 | /* mark where the next microcode container file starts */ |
147 | offset = data - (u8 *)ucode; | 186 | offset = data - (u8 *)ucode; |
148 | *uoffset += offset; | ||
149 | *usize -= offset; | ||
150 | ucode = data; | 187 | ucode = data; |
151 | } | 188 | } |
152 | 189 | ||
153 | if (!eq_id) { | 190 | if (!eq_id) { |
154 | *usize = 0; | 191 | *cont = NULL; |
192 | *cont_sz = 0; | ||
155 | return; | 193 | return; |
156 | } | 194 | } |
157 | 195 | ||
158 | /* find ucode and update if needed */ | 196 | /* find ucode and update if needed */ |
159 | 197 | ||
160 | rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax); | 198 | native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax); |
161 | 199 | ||
162 | while (left > 0) { | 200 | while (left > 0) { |
163 | struct microcode_amd *mc; | 201 | struct microcode_amd *mc; |
@@ -168,73 +206,83 @@ static void apply_ucode_in_initrd(void *ucode, size_t size) | |||
168 | break; | 206 | break; |
169 | 207 | ||
170 | mc = (struct microcode_amd *)(data + SECTION_HDR_SIZE); | 208 | mc = (struct microcode_amd *)(data + SECTION_HDR_SIZE); |
171 | if (eq_id == mc->hdr.processor_rev_id && rev < mc->hdr.patch_id) | 209 | |
172 | if (__apply_microcode_amd(mc) == 0) { | 210 | if (eq_id == mc->hdr.processor_rev_id && rev < mc->hdr.patch_id) { |
211 | |||
212 | if (!__apply_microcode_amd(mc)) { | ||
173 | rev = mc->hdr.patch_id; | 213 | rev = mc->hdr.patch_id; |
174 | *new_rev = rev; | 214 | *new_rev = rev; |
215 | |||
216 | /* save ucode patch */ | ||
217 | memcpy(amd_ucode_patch, mc, | ||
218 | min_t(u32, header[1], PATCH_MAX_SIZE)); | ||
175 | } | 219 | } |
220 | } | ||
176 | 221 | ||
177 | offset = header[1] + SECTION_HDR_SIZE; | 222 | offset = header[1] + SECTION_HDR_SIZE; |
178 | data += offset; | 223 | data += offset; |
179 | left -= offset; | 224 | left -= offset; |
180 | } | 225 | } |
181 | |||
182 | /* mark where this microcode container file ends */ | ||
183 | offset = *usize - (data - (u8 *)ucode); | ||
184 | *usize -= offset; | ||
185 | |||
186 | if (!(*new_rev)) | ||
187 | *usize = 0; | ||
188 | } | 226 | } |
189 | 227 | ||
190 | void __init load_ucode_amd_bsp(void) | 228 | void __init load_ucode_amd_bsp(void) |
191 | { | 229 | { |
192 | struct cpio_data cd = find_ucode_in_initrd(); | 230 | struct cpio_data cp; |
193 | if (!cd.data) | 231 | void **data; |
232 | size_t *size; | ||
233 | |||
234 | #ifdef CONFIG_X86_32 | ||
235 | data = (void **)__pa_nodebug(&ucode_cpio.data); | ||
236 | size = (size_t *)__pa_nodebug(&ucode_cpio.size); | ||
237 | #else | ||
238 | data = &ucode_cpio.data; | ||
239 | size = &ucode_cpio.size; | ||
240 | #endif | ||
241 | |||
242 | cp = find_ucode_in_initrd(); | ||
243 | if (!cp.data) | ||
194 | return; | 244 | return; |
195 | 245 | ||
196 | apply_ucode_in_initrd(cd.data, cd.size); | 246 | *data = cp.data; |
247 | *size = cp.size; | ||
248 | |||
249 | apply_ucode_in_initrd(cp.data, cp.size); | ||
197 | } | 250 | } |
198 | 251 | ||
199 | #ifdef CONFIG_X86_32 | 252 | #ifdef CONFIG_X86_32 |
200 | u8 amd_bsp_mpb[MPB_MAX_SIZE]; | ||
201 | |||
202 | /* | 253 | /* |
203 | * On 32-bit, since AP's early load occurs before paging is turned on, we | 254 | * On 32-bit, since AP's early load occurs before paging is turned on, we |
204 | * cannot traverse cpu_equiv_table and pcache in kernel heap memory. So during | 255 | * cannot traverse cpu_equiv_table and pcache in kernel heap memory. So during |
205 | * cold boot, AP will apply_ucode_in_initrd() just like the BSP. During | 256 | * cold boot, AP will apply_ucode_in_initrd() just like the BSP. During |
206 | * save_microcode_in_initrd_amd() BSP's patch is copied to amd_bsp_mpb, which | 257 | * save_microcode_in_initrd_amd() BSP's patch is copied to amd_ucode_patch, |
207 | * is used upon resume from suspend. | 258 | * which is used upon resume from suspend. |
208 | */ | 259 | */ |
209 | void load_ucode_amd_ap(void) | 260 | void load_ucode_amd_ap(void) |
210 | { | 261 | { |
211 | struct microcode_amd *mc; | 262 | struct microcode_amd *mc; |
212 | unsigned long *initrd; | ||
213 | unsigned long *uoffset; | ||
214 | size_t *usize; | 263 | size_t *usize; |
215 | void *ucode; | 264 | void **ucode; |
216 | 265 | ||
217 | mc = (struct microcode_amd *)__pa(amd_bsp_mpb); | 266 | mc = (struct microcode_amd *)__pa(amd_ucode_patch); |
218 | if (mc->hdr.patch_id && mc->hdr.processor_rev_id) { | 267 | if (mc->hdr.patch_id && mc->hdr.processor_rev_id) { |
219 | __apply_microcode_amd(mc); | 268 | __apply_microcode_amd(mc); |
220 | return; | 269 | return; |
221 | } | 270 | } |
222 | 271 | ||
223 | initrd = (unsigned long *)__pa(&initrd_start); | 272 | ucode = (void *)__pa_nodebug(&container); |
224 | uoffset = (unsigned long *)__pa(&ucode_offset); | 273 | usize = (size_t *)__pa_nodebug(&container_size); |
225 | usize = (size_t *)__pa(&ucode_size); | ||
226 | 274 | ||
227 | if (!*usize || !*initrd) | 275 | if (!*ucode || !*usize) |
228 | return; | 276 | return; |
229 | 277 | ||
230 | ucode = (void *)((unsigned long)__pa(*initrd) + *uoffset); | 278 | apply_ucode_in_initrd(*ucode, *usize); |
231 | apply_ucode_in_initrd(ucode, *usize); | ||
232 | } | 279 | } |
233 | 280 | ||
234 | static void __init collect_cpu_sig_on_bsp(void *arg) | 281 | static void __init collect_cpu_sig_on_bsp(void *arg) |
235 | { | 282 | { |
236 | unsigned int cpu = smp_processor_id(); | 283 | unsigned int cpu = smp_processor_id(); |
237 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 284 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
285 | |||
238 | uci->cpu_sig.sig = cpuid_eax(0x00000001); | 286 | uci->cpu_sig.sig = cpuid_eax(0x00000001); |
239 | } | 287 | } |
240 | #else | 288 | #else |
@@ -242,36 +290,54 @@ void load_ucode_amd_ap(void) | |||
242 | { | 290 | { |
243 | unsigned int cpu = smp_processor_id(); | 291 | unsigned int cpu = smp_processor_id(); |
244 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 292 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
293 | struct equiv_cpu_entry *eq; | ||
294 | struct microcode_amd *mc; | ||
245 | u32 rev, eax; | 295 | u32 rev, eax; |
296 | u16 eq_id; | ||
297 | |||
298 | /* Exit if called on the BSP. */ | ||
299 | if (!cpu) | ||
300 | return; | ||
301 | |||
302 | if (!container) | ||
303 | return; | ||
246 | 304 | ||
247 | rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax); | 305 | rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax); |
248 | eax = cpuid_eax(0x00000001); | ||
249 | 306 | ||
250 | uci->cpu_sig.rev = rev; | 307 | uci->cpu_sig.rev = rev; |
251 | uci->cpu_sig.sig = eax; | 308 | uci->cpu_sig.sig = eax; |
252 | 309 | ||
253 | if (cpu && !ucode_loaded) { | 310 | eax = cpuid_eax(0x00000001); |
254 | void *ucode; | 311 | eq = (struct equiv_cpu_entry *)(container + CONTAINER_HDR_SZ); |
255 | 312 | ||
256 | if (!ucode_size || !initrd_start) | 313 | eq_id = find_equiv_id(eq, eax); |
257 | return; | 314 | if (!eq_id) |
315 | return; | ||
316 | |||
317 | if (eq_id == this_equiv_id) { | ||
318 | mc = (struct microcode_amd *)amd_ucode_patch; | ||
258 | 319 | ||
259 | ucode = (void *)(initrd_start + ucode_offset); | 320 | if (mc && rev < mc->hdr.patch_id) { |
260 | eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); | 321 | if (!__apply_microcode_amd(mc)) |
261 | if (load_microcode_amd(eax, ucode, ucode_size) != UCODE_OK) | 322 | ucode_new_rev = mc->hdr.patch_id; |
323 | } | ||
324 | |||
325 | } else { | ||
326 | if (!ucode_cpio.data) | ||
262 | return; | 327 | return; |
263 | 328 | ||
264 | ucode_loaded = true; | 329 | /* |
330 | * AP has a different equivalence ID than BSP, looks like | ||
331 | * mixed-steppings silicon so go through the ucode blob anew. | ||
332 | */ | ||
333 | apply_ucode_in_initrd(ucode_cpio.data, ucode_cpio.size); | ||
265 | } | 334 | } |
266 | |||
267 | apply_microcode_amd(cpu); | ||
268 | } | 335 | } |
269 | #endif | 336 | #endif |
270 | 337 | ||
271 | int __init save_microcode_in_initrd_amd(void) | 338 | int __init save_microcode_in_initrd_amd(void) |
272 | { | 339 | { |
273 | enum ucode_state ret; | 340 | enum ucode_state ret; |
274 | void *ucode; | ||
275 | u32 eax; | 341 | u32 eax; |
276 | 342 | ||
277 | #ifdef CONFIG_X86_32 | 343 | #ifdef CONFIG_X86_32 |
@@ -280,22 +346,35 @@ int __init save_microcode_in_initrd_amd(void) | |||
280 | 346 | ||
281 | if (!uci->cpu_sig.sig) | 347 | if (!uci->cpu_sig.sig) |
282 | smp_call_function_single(bsp, collect_cpu_sig_on_bsp, NULL, 1); | 348 | smp_call_function_single(bsp, collect_cpu_sig_on_bsp, NULL, 1); |
349 | |||
350 | /* | ||
351 | * Take into account the fact that the ramdisk might get relocated | ||
352 | * and therefore we need to recompute the container's position in | ||
353 | * virtual memory space. | ||
354 | */ | ||
355 | container = (u8 *)(__va((u32)relocated_ramdisk) + | ||
356 | ((u32)container - boot_params.hdr.ramdisk_image)); | ||
283 | #endif | 357 | #endif |
284 | if (ucode_new_rev) | 358 | if (ucode_new_rev) |
285 | pr_info("microcode: updated early to new patch_level=0x%08x\n", | 359 | pr_info("microcode: updated early to new patch_level=0x%08x\n", |
286 | ucode_new_rev); | 360 | ucode_new_rev); |
287 | 361 | ||
288 | if (ucode_loaded || !ucode_size || !initrd_start) | 362 | if (!container) |
289 | return 0; | 363 | return -EINVAL; |
290 | 364 | ||
291 | ucode = (void *)(initrd_start + ucode_offset); | ||
292 | eax = cpuid_eax(0x00000001); | 365 | eax = cpuid_eax(0x00000001); |
293 | eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); | 366 | eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); |
294 | 367 | ||
295 | ret = load_microcode_amd(eax, ucode, ucode_size); | 368 | ret = load_microcode_amd(eax, container, container_size); |
296 | if (ret != UCODE_OK) | 369 | if (ret != UCODE_OK) |
297 | return -EINVAL; | 370 | return -EINVAL; |
298 | 371 | ||
299 | ucode_loaded = true; | 372 | /* |
373 | * This will be freed any msec now, stash patches for the current | ||
374 | * family and switch to patch cache for cpu hotplug, etc later. | ||
375 | */ | ||
376 | container = NULL; | ||
377 | container_size = 0; | ||
378 | |||
300 | return 0; | 379 | return 0; |
301 | } | 380 | } |
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/cpu/microcode/core.c index 15c987698b0f..15c987698b0f 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/cpu/microcode/core.c | |||
diff --git a/arch/x86/kernel/microcode_core_early.c b/arch/x86/kernel/cpu/microcode/core_early.c index be7f8514f577..be7f8514f577 100644 --- a/arch/x86/kernel/microcode_core_early.c +++ b/arch/x86/kernel/cpu/microcode/core_early.c | |||
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 5fb2cebf556b..a276fa75d9b5 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c | |||
@@ -278,7 +278,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device, | |||
278 | sprintf(name, "intel-ucode/%02x-%02x-%02x", | 278 | sprintf(name, "intel-ucode/%02x-%02x-%02x", |
279 | c->x86, c->x86_model, c->x86_mask); | 279 | c->x86, c->x86_model, c->x86_mask); |
280 | 280 | ||
281 | if (request_firmware(&firmware, name, device)) { | 281 | if (request_firmware_direct(&firmware, name, device)) { |
282 | pr_debug("data file %s load failed\n", name); | 282 | pr_debug("data file %s load failed\n", name); |
283 | return UCODE_NFOUND; | 283 | return UCODE_NFOUND; |
284 | } | 284 | } |
diff --git a/arch/x86/kernel/microcode_intel_early.c b/arch/x86/kernel/cpu/microcode/intel_early.c index 1575deb2e636..18f739129e72 100644 --- a/arch/x86/kernel/microcode_intel_early.c +++ b/arch/x86/kernel/cpu/microcode/intel_early.c | |||
@@ -365,16 +365,6 @@ out: | |||
365 | return state; | 365 | return state; |
366 | } | 366 | } |
367 | 367 | ||
368 | #define native_rdmsr(msr, val1, val2) \ | ||
369 | do { \ | ||
370 | u64 __val = native_read_msr((msr)); \ | ||
371 | (void)((val1) = (u32)__val); \ | ||
372 | (void)((val2) = (u32)(__val >> 32)); \ | ||
373 | } while (0) | ||
374 | |||
375 | #define native_wrmsr(msr, low, high) \ | ||
376 | native_write_msr(msr, low, high); | ||
377 | |||
378 | static int collect_cpu_info_early(struct ucode_cpu_info *uci) | 368 | static int collect_cpu_info_early(struct ucode_cpu_info *uci) |
379 | { | 369 | { |
380 | unsigned int val[2]; | 370 | unsigned int val[2]; |
diff --git a/arch/x86/kernel/microcode_intel_lib.c b/arch/x86/kernel/cpu/microcode/intel_lib.c index ce69320d0179..ce69320d0179 100644 --- a/arch/x86/kernel/microcode_intel_lib.c +++ b/arch/x86/kernel/cpu/microcode/intel_lib.c | |||
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 8e132931614d..b88645191fe5 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -1883,21 +1883,27 @@ static struct pmu pmu = { | |||
1883 | 1883 | ||
1884 | void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) | 1884 | void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) |
1885 | { | 1885 | { |
1886 | struct cyc2ns_data *data; | ||
1887 | |||
1886 | userpg->cap_user_time = 0; | 1888 | userpg->cap_user_time = 0; |
1887 | userpg->cap_user_time_zero = 0; | 1889 | userpg->cap_user_time_zero = 0; |
1888 | userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc; | 1890 | userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc; |
1889 | userpg->pmc_width = x86_pmu.cntval_bits; | 1891 | userpg->pmc_width = x86_pmu.cntval_bits; |
1890 | 1892 | ||
1891 | if (!sched_clock_stable) | 1893 | if (!sched_clock_stable()) |
1892 | return; | 1894 | return; |
1893 | 1895 | ||
1896 | data = cyc2ns_read_begin(); | ||
1897 | |||
1894 | userpg->cap_user_time = 1; | 1898 | userpg->cap_user_time = 1; |
1895 | userpg->time_mult = this_cpu_read(cyc2ns); | 1899 | userpg->time_mult = data->cyc2ns_mul; |
1896 | userpg->time_shift = CYC2NS_SCALE_FACTOR; | 1900 | userpg->time_shift = data->cyc2ns_shift; |
1897 | userpg->time_offset = this_cpu_read(cyc2ns_offset) - now; | 1901 | userpg->time_offset = data->cyc2ns_offset - now; |
1898 | 1902 | ||
1899 | userpg->cap_user_time_zero = 1; | 1903 | userpg->cap_user_time_zero = 1; |
1900 | userpg->time_zero = this_cpu_read(cyc2ns_offset); | 1904 | userpg->time_zero = data->cyc2ns_offset; |
1905 | |||
1906 | cyc2ns_read_end(data); | ||
1901 | } | 1907 | } |
1902 | 1908 | ||
1903 | /* | 1909 | /* |
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c index e09f0bfb7b8f..4b8e4d3cd6ea 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
11 | #include <linux/pci.h> | 11 | #include <linux/pci.h> |
12 | #include <linux/ptrace.h> | 12 | #include <linux/ptrace.h> |
13 | #include <linux/syscore_ops.h> | ||
13 | 14 | ||
14 | #include <asm/apic.h> | 15 | #include <asm/apic.h> |
15 | 16 | ||
@@ -816,6 +817,18 @@ out: | |||
816 | return ret; | 817 | return ret; |
817 | } | 818 | } |
818 | 819 | ||
820 | static void ibs_eilvt_setup(void) | ||
821 | { | ||
822 | /* | ||
823 | * Force LVT offset assignment for family 10h: The offsets are | ||
824 | * not assigned by the BIOS for this family, so the OS is | ||
825 | * responsible for doing it. If the OS assignment fails, fall | ||
826 | * back to BIOS settings and try to setup this. | ||
827 | */ | ||
828 | if (boot_cpu_data.x86 == 0x10) | ||
829 | force_ibs_eilvt_setup(); | ||
830 | } | ||
831 | |||
819 | static inline int get_ibs_lvt_offset(void) | 832 | static inline int get_ibs_lvt_offset(void) |
820 | { | 833 | { |
821 | u64 val; | 834 | u64 val; |
@@ -851,6 +864,36 @@ static void clear_APIC_ibs(void *dummy) | |||
851 | setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1); | 864 | setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1); |
852 | } | 865 | } |
853 | 866 | ||
867 | #ifdef CONFIG_PM | ||
868 | |||
869 | static int perf_ibs_suspend(void) | ||
870 | { | ||
871 | clear_APIC_ibs(NULL); | ||
872 | return 0; | ||
873 | } | ||
874 | |||
875 | static void perf_ibs_resume(void) | ||
876 | { | ||
877 | ibs_eilvt_setup(); | ||
878 | setup_APIC_ibs(NULL); | ||
879 | } | ||
880 | |||
881 | static struct syscore_ops perf_ibs_syscore_ops = { | ||
882 | .resume = perf_ibs_resume, | ||
883 | .suspend = perf_ibs_suspend, | ||
884 | }; | ||
885 | |||
886 | static void perf_ibs_pm_init(void) | ||
887 | { | ||
888 | register_syscore_ops(&perf_ibs_syscore_ops); | ||
889 | } | ||
890 | |||
891 | #else | ||
892 | |||
893 | static inline void perf_ibs_pm_init(void) { } | ||
894 | |||
895 | #endif | ||
896 | |||
854 | static int | 897 | static int |
855 | perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) | 898 | perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) |
856 | { | 899 | { |
@@ -877,18 +920,12 @@ static __init int amd_ibs_init(void) | |||
877 | if (!caps) | 920 | if (!caps) |
878 | return -ENODEV; /* ibs not supported by the cpu */ | 921 | return -ENODEV; /* ibs not supported by the cpu */ |
879 | 922 | ||
880 | /* | 923 | ibs_eilvt_setup(); |
881 | * Force LVT offset assignment for family 10h: The offsets are | ||
882 | * not assigned by the BIOS for this family, so the OS is | ||
883 | * responsible for doing it. If the OS assignment fails, fall | ||
884 | * back to BIOS settings and try to setup this. | ||
885 | */ | ||
886 | if (boot_cpu_data.x86 == 0x10) | ||
887 | force_ibs_eilvt_setup(); | ||
888 | 924 | ||
889 | if (!ibs_eilvt_valid()) | 925 | if (!ibs_eilvt_valid()) |
890 | goto out; | 926 | goto out; |
891 | 927 | ||
928 | perf_ibs_pm_init(); | ||
892 | get_online_cpus(); | 929 | get_online_cpus(); |
893 | ibs_caps = caps; | 930 | ibs_caps = caps; |
894 | /* make ibs_caps visible to other cpus: */ | 931 | /* make ibs_caps visible to other cpus: */ |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c new file mode 100644 index 000000000000..5ad35ad94d0f --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c | |||
@@ -0,0 +1,679 @@ | |||
1 | /* | ||
2 | * perf_event_intel_rapl.c: support Intel RAPL energy consumption counters | ||
3 | * Copyright (C) 2013 Google, Inc., Stephane Eranian | ||
4 | * | ||
5 | * Intel RAPL interface is specified in the IA-32 Manual Vol3b | ||
6 | * section 14.7.1 (September 2013) | ||
7 | * | ||
8 | * RAPL provides more controls than just reporting energy consumption | ||
9 | * however here we only expose the 3 energy consumption free running | ||
10 | * counters (pp0, pkg, dram). | ||
11 | * | ||
12 | * Each of those counters increments in a power unit defined by the | ||
13 | * RAPL_POWER_UNIT MSR. On SandyBridge, this unit is 1/(2^16) Joules | ||
14 | * but it can vary. | ||
15 | * | ||
16 | * Counter to rapl events mappings: | ||
17 | * | ||
18 | * pp0 counter: consumption of all physical cores (power plane 0) | ||
19 | * event: rapl_energy_cores | ||
20 | * perf code: 0x1 | ||
21 | * | ||
22 | * pkg counter: consumption of the whole processor package | ||
23 | * event: rapl_energy_pkg | ||
24 | * perf code: 0x2 | ||
25 | * | ||
26 | * dram counter: consumption of the dram domain (servers only) | ||
27 | * event: rapl_energy_dram | ||
28 | * perf code: 0x3 | ||
29 | * | ||
30 | * dram counter: consumption of the builtin-gpu domain (client only) | ||
31 | * event: rapl_energy_gpu | ||
32 | * perf code: 0x4 | ||
33 | * | ||
34 | * We manage those counters as free running (read-only). They may be | ||
35 | * use simultaneously by other tools, such as turbostat. | ||
36 | * | ||
37 | * The events only support system-wide mode counting. There is no | ||
38 | * sampling support because it does not make sense and is not | ||
39 | * supported by the RAPL hardware. | ||
40 | * | ||
41 | * Because we want to avoid floating-point operations in the kernel, | ||
42 | * the events are all reported in fixed point arithmetic (32.32). | ||
43 | * Tools must adjust the counts to convert them to Watts using | ||
44 | * the duration of the measurement. Tools may use a function such as | ||
45 | * ldexp(raw_count, -32); | ||
46 | */ | ||
47 | #include <linux/module.h> | ||
48 | #include <linux/slab.h> | ||
49 | #include <linux/perf_event.h> | ||
50 | #include <asm/cpu_device_id.h> | ||
51 | #include "perf_event.h" | ||
52 | |||
53 | /* | ||
54 | * RAPL energy status counters | ||
55 | */ | ||
56 | #define RAPL_IDX_PP0_NRG_STAT 0 /* all cores */ | ||
57 | #define INTEL_RAPL_PP0 0x1 /* pseudo-encoding */ | ||
58 | #define RAPL_IDX_PKG_NRG_STAT 1 /* entire package */ | ||
59 | #define INTEL_RAPL_PKG 0x2 /* pseudo-encoding */ | ||
60 | #define RAPL_IDX_RAM_NRG_STAT 2 /* DRAM */ | ||
61 | #define INTEL_RAPL_RAM 0x3 /* pseudo-encoding */ | ||
62 | #define RAPL_IDX_PP1_NRG_STAT 3 /* DRAM */ | ||
63 | #define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */ | ||
64 | |||
65 | /* Clients have PP0, PKG */ | ||
66 | #define RAPL_IDX_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\ | ||
67 | 1<<RAPL_IDX_PKG_NRG_STAT|\ | ||
68 | 1<<RAPL_IDX_PP1_NRG_STAT) | ||
69 | |||
70 | /* Servers have PP0, PKG, RAM */ | ||
71 | #define RAPL_IDX_SRV (1<<RAPL_IDX_PP0_NRG_STAT|\ | ||
72 | 1<<RAPL_IDX_PKG_NRG_STAT|\ | ||
73 | 1<<RAPL_IDX_RAM_NRG_STAT) | ||
74 | |||
75 | /* | ||
76 | * event code: LSB 8 bits, passed in attr->config | ||
77 | * any other bit is reserved | ||
78 | */ | ||
79 | #define RAPL_EVENT_MASK 0xFFULL | ||
80 | |||
81 | #define DEFINE_RAPL_FORMAT_ATTR(_var, _name, _format) \ | ||
82 | static ssize_t __rapl_##_var##_show(struct kobject *kobj, \ | ||
83 | struct kobj_attribute *attr, \ | ||
84 | char *page) \ | ||
85 | { \ | ||
86 | BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ | ||
87 | return sprintf(page, _format "\n"); \ | ||
88 | } \ | ||
89 | static struct kobj_attribute format_attr_##_var = \ | ||
90 | __ATTR(_name, 0444, __rapl_##_var##_show, NULL) | ||
91 | |||
92 | #define RAPL_EVENT_DESC(_name, _config) \ | ||
93 | { \ | ||
94 | .attr = __ATTR(_name, 0444, rapl_event_show, NULL), \ | ||
95 | .config = _config, \ | ||
96 | } | ||
97 | |||
98 | #define RAPL_CNTR_WIDTH 32 /* 32-bit rapl counters */ | ||
99 | |||
100 | struct rapl_pmu { | ||
101 | spinlock_t lock; | ||
102 | int hw_unit; /* 1/2^hw_unit Joule */ | ||
103 | int n_active; /* number of active events */ | ||
104 | struct list_head active_list; | ||
105 | struct pmu *pmu; /* pointer to rapl_pmu_class */ | ||
106 | ktime_t timer_interval; /* in ktime_t unit */ | ||
107 | struct hrtimer hrtimer; | ||
108 | }; | ||
109 | |||
110 | static struct pmu rapl_pmu_class; | ||
111 | static cpumask_t rapl_cpu_mask; | ||
112 | static int rapl_cntr_mask; | ||
113 | |||
114 | static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu); | ||
115 | static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free); | ||
116 | |||
117 | static inline u64 rapl_read_counter(struct perf_event *event) | ||
118 | { | ||
119 | u64 raw; | ||
120 | rdmsrl(event->hw.event_base, raw); | ||
121 | return raw; | ||
122 | } | ||
123 | |||
124 | static inline u64 rapl_scale(u64 v) | ||
125 | { | ||
126 | /* | ||
127 | * scale delta to smallest unit (1/2^32) | ||
128 | * users must then scale back: count * 1/(1e9*2^32) to get Joules | ||
129 | * or use ldexp(count, -32). | ||
130 | * Watts = Joules/Time delta | ||
131 | */ | ||
132 | return v << (32 - __get_cpu_var(rapl_pmu)->hw_unit); | ||
133 | } | ||
134 | |||
135 | static u64 rapl_event_update(struct perf_event *event) | ||
136 | { | ||
137 | struct hw_perf_event *hwc = &event->hw; | ||
138 | u64 prev_raw_count, new_raw_count; | ||
139 | s64 delta, sdelta; | ||
140 | int shift = RAPL_CNTR_WIDTH; | ||
141 | |||
142 | again: | ||
143 | prev_raw_count = local64_read(&hwc->prev_count); | ||
144 | rdmsrl(event->hw.event_base, new_raw_count); | ||
145 | |||
146 | if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, | ||
147 | new_raw_count) != prev_raw_count) { | ||
148 | cpu_relax(); | ||
149 | goto again; | ||
150 | } | ||
151 | |||
152 | /* | ||
153 | * Now we have the new raw value and have updated the prev | ||
154 | * timestamp already. We can now calculate the elapsed delta | ||
155 | * (event-)time and add that to the generic event. | ||
156 | * | ||
157 | * Careful, not all hw sign-extends above the physical width | ||
158 | * of the count. | ||
159 | */ | ||
160 | delta = (new_raw_count << shift) - (prev_raw_count << shift); | ||
161 | delta >>= shift; | ||
162 | |||
163 | sdelta = rapl_scale(delta); | ||
164 | |||
165 | local64_add(sdelta, &event->count); | ||
166 | |||
167 | return new_raw_count; | ||
168 | } | ||
169 | |||
170 | static void rapl_start_hrtimer(struct rapl_pmu *pmu) | ||
171 | { | ||
172 | __hrtimer_start_range_ns(&pmu->hrtimer, | ||
173 | pmu->timer_interval, 0, | ||
174 | HRTIMER_MODE_REL_PINNED, 0); | ||
175 | } | ||
176 | |||
177 | static void rapl_stop_hrtimer(struct rapl_pmu *pmu) | ||
178 | { | ||
179 | hrtimer_cancel(&pmu->hrtimer); | ||
180 | } | ||
181 | |||
182 | static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer) | ||
183 | { | ||
184 | struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu); | ||
185 | struct perf_event *event; | ||
186 | unsigned long flags; | ||
187 | |||
188 | if (!pmu->n_active) | ||
189 | return HRTIMER_NORESTART; | ||
190 | |||
191 | spin_lock_irqsave(&pmu->lock, flags); | ||
192 | |||
193 | list_for_each_entry(event, &pmu->active_list, active_entry) { | ||
194 | rapl_event_update(event); | ||
195 | } | ||
196 | |||
197 | spin_unlock_irqrestore(&pmu->lock, flags); | ||
198 | |||
199 | hrtimer_forward_now(hrtimer, pmu->timer_interval); | ||
200 | |||
201 | return HRTIMER_RESTART; | ||
202 | } | ||
203 | |||
204 | static void rapl_hrtimer_init(struct rapl_pmu *pmu) | ||
205 | { | ||
206 | struct hrtimer *hr = &pmu->hrtimer; | ||
207 | |||
208 | hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
209 | hr->function = rapl_hrtimer_handle; | ||
210 | } | ||
211 | |||
212 | static void __rapl_pmu_event_start(struct rapl_pmu *pmu, | ||
213 | struct perf_event *event) | ||
214 | { | ||
215 | if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) | ||
216 | return; | ||
217 | |||
218 | event->hw.state = 0; | ||
219 | |||
220 | list_add_tail(&event->active_entry, &pmu->active_list); | ||
221 | |||
222 | local64_set(&event->hw.prev_count, rapl_read_counter(event)); | ||
223 | |||
224 | pmu->n_active++; | ||
225 | if (pmu->n_active == 1) | ||
226 | rapl_start_hrtimer(pmu); | ||
227 | } | ||
228 | |||
229 | static void rapl_pmu_event_start(struct perf_event *event, int mode) | ||
230 | { | ||
231 | struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu); | ||
232 | unsigned long flags; | ||
233 | |||
234 | spin_lock_irqsave(&pmu->lock, flags); | ||
235 | __rapl_pmu_event_start(pmu, event); | ||
236 | spin_unlock_irqrestore(&pmu->lock, flags); | ||
237 | } | ||
238 | |||
239 | static void rapl_pmu_event_stop(struct perf_event *event, int mode) | ||
240 | { | ||
241 | struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu); | ||
242 | struct hw_perf_event *hwc = &event->hw; | ||
243 | unsigned long flags; | ||
244 | |||
245 | spin_lock_irqsave(&pmu->lock, flags); | ||
246 | |||
247 | /* mark event as deactivated and stopped */ | ||
248 | if (!(hwc->state & PERF_HES_STOPPED)) { | ||
249 | WARN_ON_ONCE(pmu->n_active <= 0); | ||
250 | pmu->n_active--; | ||
251 | if (pmu->n_active == 0) | ||
252 | rapl_stop_hrtimer(pmu); | ||
253 | |||
254 | list_del(&event->active_entry); | ||
255 | |||
256 | WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); | ||
257 | hwc->state |= PERF_HES_STOPPED; | ||
258 | } | ||
259 | |||
260 | /* check if update of sw counter is necessary */ | ||
261 | if ((mode & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { | ||
262 | /* | ||
263 | * Drain the remaining delta count out of a event | ||
264 | * that we are disabling: | ||
265 | */ | ||
266 | rapl_event_update(event); | ||
267 | hwc->state |= PERF_HES_UPTODATE; | ||
268 | } | ||
269 | |||
270 | spin_unlock_irqrestore(&pmu->lock, flags); | ||
271 | } | ||
272 | |||
273 | static int rapl_pmu_event_add(struct perf_event *event, int mode) | ||
274 | { | ||
275 | struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu); | ||
276 | struct hw_perf_event *hwc = &event->hw; | ||
277 | unsigned long flags; | ||
278 | |||
279 | spin_lock_irqsave(&pmu->lock, flags); | ||
280 | |||
281 | hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; | ||
282 | |||
283 | if (mode & PERF_EF_START) | ||
284 | __rapl_pmu_event_start(pmu, event); | ||
285 | |||
286 | spin_unlock_irqrestore(&pmu->lock, flags); | ||
287 | |||
288 | return 0; | ||
289 | } | ||
290 | |||
291 | static void rapl_pmu_event_del(struct perf_event *event, int flags) | ||
292 | { | ||
293 | rapl_pmu_event_stop(event, PERF_EF_UPDATE); | ||
294 | } | ||
295 | |||
296 | static int rapl_pmu_event_init(struct perf_event *event) | ||
297 | { | ||
298 | u64 cfg = event->attr.config & RAPL_EVENT_MASK; | ||
299 | int bit, msr, ret = 0; | ||
300 | |||
301 | /* only look at RAPL events */ | ||
302 | if (event->attr.type != rapl_pmu_class.type) | ||
303 | return -ENOENT; | ||
304 | |||
305 | /* check only supported bits are set */ | ||
306 | if (event->attr.config & ~RAPL_EVENT_MASK) | ||
307 | return -EINVAL; | ||
308 | |||
309 | /* | ||
310 | * check event is known (determines counter) | ||
311 | */ | ||
312 | switch (cfg) { | ||
313 | case INTEL_RAPL_PP0: | ||
314 | bit = RAPL_IDX_PP0_NRG_STAT; | ||
315 | msr = MSR_PP0_ENERGY_STATUS; | ||
316 | break; | ||
317 | case INTEL_RAPL_PKG: | ||
318 | bit = RAPL_IDX_PKG_NRG_STAT; | ||
319 | msr = MSR_PKG_ENERGY_STATUS; | ||
320 | break; | ||
321 | case INTEL_RAPL_RAM: | ||
322 | bit = RAPL_IDX_RAM_NRG_STAT; | ||
323 | msr = MSR_DRAM_ENERGY_STATUS; | ||
324 | break; | ||
325 | case INTEL_RAPL_PP1: | ||
326 | bit = RAPL_IDX_PP1_NRG_STAT; | ||
327 | msr = MSR_PP1_ENERGY_STATUS; | ||
328 | break; | ||
329 | default: | ||
330 | return -EINVAL; | ||
331 | } | ||
332 | /* check event supported */ | ||
333 | if (!(rapl_cntr_mask & (1 << bit))) | ||
334 | return -EINVAL; | ||
335 | |||
336 | /* unsupported modes and filters */ | ||
337 | if (event->attr.exclude_user || | ||
338 | event->attr.exclude_kernel || | ||
339 | event->attr.exclude_hv || | ||
340 | event->attr.exclude_idle || | ||
341 | event->attr.exclude_host || | ||
342 | event->attr.exclude_guest || | ||
343 | event->attr.sample_period) /* no sampling */ | ||
344 | return -EINVAL; | ||
345 | |||
346 | /* must be done before validate_group */ | ||
347 | event->hw.event_base = msr; | ||
348 | event->hw.config = cfg; | ||
349 | event->hw.idx = bit; | ||
350 | |||
351 | return ret; | ||
352 | } | ||
353 | |||
354 | static void rapl_pmu_event_read(struct perf_event *event) | ||
355 | { | ||
356 | rapl_event_update(event); | ||
357 | } | ||
358 | |||
359 | static ssize_t rapl_get_attr_cpumask(struct device *dev, | ||
360 | struct device_attribute *attr, char *buf) | ||
361 | { | ||
362 | int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &rapl_cpu_mask); | ||
363 | |||
364 | buf[n++] = '\n'; | ||
365 | buf[n] = '\0'; | ||
366 | return n; | ||
367 | } | ||
368 | |||
369 | static DEVICE_ATTR(cpumask, S_IRUGO, rapl_get_attr_cpumask, NULL); | ||
370 | |||
371 | static struct attribute *rapl_pmu_attrs[] = { | ||
372 | &dev_attr_cpumask.attr, | ||
373 | NULL, | ||
374 | }; | ||
375 | |||
376 | static struct attribute_group rapl_pmu_attr_group = { | ||
377 | .attrs = rapl_pmu_attrs, | ||
378 | }; | ||
379 | |||
380 | EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01"); | ||
381 | EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02"); | ||
382 | EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03"); | ||
383 | EVENT_ATTR_STR(energy-gpu , rapl_gpu, "event=0x04"); | ||
384 | |||
385 | EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules"); | ||
386 | EVENT_ATTR_STR(energy-pkg.unit , rapl_pkg_unit, "Joules"); | ||
387 | EVENT_ATTR_STR(energy-ram.unit , rapl_ram_unit, "Joules"); | ||
388 | EVENT_ATTR_STR(energy-gpu.unit , rapl_gpu_unit, "Joules"); | ||
389 | |||
390 | /* | ||
391 | * we compute in 0.23 nJ increments regardless of MSR | ||
392 | */ | ||
393 | EVENT_ATTR_STR(energy-cores.scale, rapl_cores_scale, "2.3283064365386962890625e-10"); | ||
394 | EVENT_ATTR_STR(energy-pkg.scale, rapl_pkg_scale, "2.3283064365386962890625e-10"); | ||
395 | EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890625e-10"); | ||
396 | EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, "2.3283064365386962890625e-10"); | ||
397 | |||
398 | static struct attribute *rapl_events_srv_attr[] = { | ||
399 | EVENT_PTR(rapl_cores), | ||
400 | EVENT_PTR(rapl_pkg), | ||
401 | EVENT_PTR(rapl_ram), | ||
402 | |||
403 | EVENT_PTR(rapl_cores_unit), | ||
404 | EVENT_PTR(rapl_pkg_unit), | ||
405 | EVENT_PTR(rapl_ram_unit), | ||
406 | |||
407 | EVENT_PTR(rapl_cores_scale), | ||
408 | EVENT_PTR(rapl_pkg_scale), | ||
409 | EVENT_PTR(rapl_ram_scale), | ||
410 | NULL, | ||
411 | }; | ||
412 | |||
413 | static struct attribute *rapl_events_cln_attr[] = { | ||
414 | EVENT_PTR(rapl_cores), | ||
415 | EVENT_PTR(rapl_pkg), | ||
416 | EVENT_PTR(rapl_gpu), | ||
417 | |||
418 | EVENT_PTR(rapl_cores_unit), | ||
419 | EVENT_PTR(rapl_pkg_unit), | ||
420 | EVENT_PTR(rapl_gpu_unit), | ||
421 | |||
422 | EVENT_PTR(rapl_cores_scale), | ||
423 | EVENT_PTR(rapl_pkg_scale), | ||
424 | EVENT_PTR(rapl_gpu_scale), | ||
425 | NULL, | ||
426 | }; | ||
427 | |||
428 | static struct attribute_group rapl_pmu_events_group = { | ||
429 | .name = "events", | ||
430 | .attrs = NULL, /* patched at runtime */ | ||
431 | }; | ||
432 | |||
433 | DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7"); | ||
434 | static struct attribute *rapl_formats_attr[] = { | ||
435 | &format_attr_event.attr, | ||
436 | NULL, | ||
437 | }; | ||
438 | |||
439 | static struct attribute_group rapl_pmu_format_group = { | ||
440 | .name = "format", | ||
441 | .attrs = rapl_formats_attr, | ||
442 | }; | ||
443 | |||
444 | const struct attribute_group *rapl_attr_groups[] = { | ||
445 | &rapl_pmu_attr_group, | ||
446 | &rapl_pmu_format_group, | ||
447 | &rapl_pmu_events_group, | ||
448 | NULL, | ||
449 | }; | ||
450 | |||
451 | static struct pmu rapl_pmu_class = { | ||
452 | .attr_groups = rapl_attr_groups, | ||
453 | .task_ctx_nr = perf_invalid_context, /* system-wide only */ | ||
454 | .event_init = rapl_pmu_event_init, | ||
455 | .add = rapl_pmu_event_add, /* must have */ | ||
456 | .del = rapl_pmu_event_del, /* must have */ | ||
457 | .start = rapl_pmu_event_start, | ||
458 | .stop = rapl_pmu_event_stop, | ||
459 | .read = rapl_pmu_event_read, | ||
460 | }; | ||
461 | |||
462 | static void rapl_cpu_exit(int cpu) | ||
463 | { | ||
464 | struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); | ||
465 | int i, phys_id = topology_physical_package_id(cpu); | ||
466 | int target = -1; | ||
467 | |||
468 | /* find a new cpu on same package */ | ||
469 | for_each_online_cpu(i) { | ||
470 | if (i == cpu) | ||
471 | continue; | ||
472 | if (phys_id == topology_physical_package_id(i)) { | ||
473 | target = i; | ||
474 | break; | ||
475 | } | ||
476 | } | ||
477 | /* | ||
478 | * clear cpu from cpumask | ||
479 | * if was set in cpumask and still some cpu on package, | ||
480 | * then move to new cpu | ||
481 | */ | ||
482 | if (cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask) && target >= 0) | ||
483 | cpumask_set_cpu(target, &rapl_cpu_mask); | ||
484 | |||
485 | WARN_ON(cpumask_empty(&rapl_cpu_mask)); | ||
486 | /* | ||
487 | * migrate events and context to new cpu | ||
488 | */ | ||
489 | if (target >= 0) | ||
490 | perf_pmu_migrate_context(pmu->pmu, cpu, target); | ||
491 | |||
492 | /* cancel overflow polling timer for CPU */ | ||
493 | rapl_stop_hrtimer(pmu); | ||
494 | } | ||
495 | |||
496 | static void rapl_cpu_init(int cpu) | ||
497 | { | ||
498 | int i, phys_id = topology_physical_package_id(cpu); | ||
499 | |||
500 | /* check if phys_is is already covered */ | ||
501 | for_each_cpu(i, &rapl_cpu_mask) { | ||
502 | if (phys_id == topology_physical_package_id(i)) | ||
503 | return; | ||
504 | } | ||
505 | /* was not found, so add it */ | ||
506 | cpumask_set_cpu(cpu, &rapl_cpu_mask); | ||
507 | } | ||
508 | |||
509 | static int rapl_cpu_prepare(int cpu) | ||
510 | { | ||
511 | struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); | ||
512 | int phys_id = topology_physical_package_id(cpu); | ||
513 | u64 ms; | ||
514 | |||
515 | if (pmu) | ||
516 | return 0; | ||
517 | |||
518 | if (phys_id < 0) | ||
519 | return -1; | ||
520 | |||
521 | pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu)); | ||
522 | if (!pmu) | ||
523 | return -1; | ||
524 | |||
525 | spin_lock_init(&pmu->lock); | ||
526 | |||
527 | INIT_LIST_HEAD(&pmu->active_list); | ||
528 | |||
529 | /* | ||
530 | * grab power unit as: 1/2^unit Joules | ||
531 | * | ||
532 | * we cache in local PMU instance | ||
533 | */ | ||
534 | rdmsrl(MSR_RAPL_POWER_UNIT, pmu->hw_unit); | ||
535 | pmu->hw_unit = (pmu->hw_unit >> 8) & 0x1FULL; | ||
536 | pmu->pmu = &rapl_pmu_class; | ||
537 | |||
538 | /* | ||
539 | * use reference of 200W for scaling the timeout | ||
540 | * to avoid missing counter overflows. | ||
541 | * 200W = 200 Joules/sec | ||
542 | * divide interval by 2 to avoid lockstep (2 * 100) | ||
543 | * if hw unit is 32, then we use 2 ms 1/200/2 | ||
544 | */ | ||
545 | if (pmu->hw_unit < 32) | ||
546 | ms = (1000 / (2 * 100)) * (1ULL << (32 - pmu->hw_unit - 1)); | ||
547 | else | ||
548 | ms = 2; | ||
549 | |||
550 | pmu->timer_interval = ms_to_ktime(ms); | ||
551 | |||
552 | rapl_hrtimer_init(pmu); | ||
553 | |||
554 | /* set RAPL pmu for this cpu for now */ | ||
555 | per_cpu(rapl_pmu, cpu) = pmu; | ||
556 | per_cpu(rapl_pmu_to_free, cpu) = NULL; | ||
557 | |||
558 | return 0; | ||
559 | } | ||
560 | |||
561 | static void rapl_cpu_kfree(int cpu) | ||
562 | { | ||
563 | struct rapl_pmu *pmu = per_cpu(rapl_pmu_to_free, cpu); | ||
564 | |||
565 | kfree(pmu); | ||
566 | |||
567 | per_cpu(rapl_pmu_to_free, cpu) = NULL; | ||
568 | } | ||
569 | |||
570 | static int rapl_cpu_dying(int cpu) | ||
571 | { | ||
572 | struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); | ||
573 | |||
574 | if (!pmu) | ||
575 | return 0; | ||
576 | |||
577 | per_cpu(rapl_pmu, cpu) = NULL; | ||
578 | |||
579 | per_cpu(rapl_pmu_to_free, cpu) = pmu; | ||
580 | |||
581 | return 0; | ||
582 | } | ||
583 | |||
584 | static int rapl_cpu_notifier(struct notifier_block *self, | ||
585 | unsigned long action, void *hcpu) | ||
586 | { | ||
587 | unsigned int cpu = (long)hcpu; | ||
588 | |||
589 | switch (action & ~CPU_TASKS_FROZEN) { | ||
590 | case CPU_UP_PREPARE: | ||
591 | rapl_cpu_prepare(cpu); | ||
592 | break; | ||
593 | case CPU_STARTING: | ||
594 | rapl_cpu_init(cpu); | ||
595 | break; | ||
596 | case CPU_UP_CANCELED: | ||
597 | case CPU_DYING: | ||
598 | rapl_cpu_dying(cpu); | ||
599 | break; | ||
600 | case CPU_ONLINE: | ||
601 | case CPU_DEAD: | ||
602 | rapl_cpu_kfree(cpu); | ||
603 | break; | ||
604 | case CPU_DOWN_PREPARE: | ||
605 | rapl_cpu_exit(cpu); | ||
606 | break; | ||
607 | default: | ||
608 | break; | ||
609 | } | ||
610 | |||
611 | return NOTIFY_OK; | ||
612 | } | ||
613 | |||
614 | static const struct x86_cpu_id rapl_cpu_match[] = { | ||
615 | [0] = { .vendor = X86_VENDOR_INTEL, .family = 6 }, | ||
616 | [1] = {}, | ||
617 | }; | ||
618 | |||
619 | static int __init rapl_pmu_init(void) | ||
620 | { | ||
621 | struct rapl_pmu *pmu; | ||
622 | int cpu, ret; | ||
623 | |||
624 | /* | ||
625 | * check for Intel processor family 6 | ||
626 | */ | ||
627 | if (!x86_match_cpu(rapl_cpu_match)) | ||
628 | return 0; | ||
629 | |||
630 | /* check supported CPU */ | ||
631 | switch (boot_cpu_data.x86_model) { | ||
632 | case 42: /* Sandy Bridge */ | ||
633 | case 58: /* Ivy Bridge */ | ||
634 | case 60: /* Haswell */ | ||
635 | case 69: /* Haswell-Celeron */ | ||
636 | rapl_cntr_mask = RAPL_IDX_CLN; | ||
637 | rapl_pmu_events_group.attrs = rapl_events_cln_attr; | ||
638 | break; | ||
639 | case 45: /* Sandy Bridge-EP */ | ||
640 | case 62: /* IvyTown */ | ||
641 | rapl_cntr_mask = RAPL_IDX_SRV; | ||
642 | rapl_pmu_events_group.attrs = rapl_events_srv_attr; | ||
643 | break; | ||
644 | |||
645 | default: | ||
646 | /* unsupported */ | ||
647 | return 0; | ||
648 | } | ||
649 | get_online_cpus(); | ||
650 | |||
651 | for_each_online_cpu(cpu) { | ||
652 | rapl_cpu_prepare(cpu); | ||
653 | rapl_cpu_init(cpu); | ||
654 | } | ||
655 | |||
656 | perf_cpu_notifier(rapl_cpu_notifier); | ||
657 | |||
658 | ret = perf_pmu_register(&rapl_pmu_class, "power", -1); | ||
659 | if (WARN_ON(ret)) { | ||
660 | pr_info("RAPL PMU detected, registration failed (%d), RAPL PMU disabled\n", ret); | ||
661 | put_online_cpus(); | ||
662 | return -1; | ||
663 | } | ||
664 | |||
665 | pmu = __get_cpu_var(rapl_pmu); | ||
666 | |||
667 | pr_info("RAPL PMU detected, hw unit 2^-%d Joules," | ||
668 | " API unit is 2^-32 Joules," | ||
669 | " %d fixed counters" | ||
670 | " %llu ms ovfl timer\n", | ||
671 | pmu->hw_unit, | ||
672 | hweight32(rapl_cntr_mask), | ||
673 | ktime_to_ms(pmu->timer_interval)); | ||
674 | |||
675 | put_online_cpus(); | ||
676 | |||
677 | return 0; | ||
678 | } | ||
679 | device_initcall(rapl_pmu_init); | ||
diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c index 88db010845cb..384df5105fbc 100644 --- a/arch/x86/kernel/cpu/rdrand.c +++ b/arch/x86/kernel/cpu/rdrand.c | |||
@@ -31,20 +31,6 @@ static int __init x86_rdrand_setup(char *s) | |||
31 | } | 31 | } |
32 | __setup("nordrand", x86_rdrand_setup); | 32 | __setup("nordrand", x86_rdrand_setup); |
33 | 33 | ||
34 | /* We can't use arch_get_random_long() here since alternatives haven't run */ | ||
35 | static inline int rdrand_long(unsigned long *v) | ||
36 | { | ||
37 | int ok; | ||
38 | asm volatile("1: " RDRAND_LONG "\n\t" | ||
39 | "jc 2f\n\t" | ||
40 | "decl %0\n\t" | ||
41 | "jnz 1b\n\t" | ||
42 | "2:" | ||
43 | : "=r" (ok), "=a" (*v) | ||
44 | : "0" (RDRAND_RETRY_LOOPS)); | ||
45 | return ok; | ||
46 | } | ||
47 | |||
48 | /* | 34 | /* |
49 | * Force a reseed cycle; we are architecturally guaranteed a reseed | 35 | * Force a reseed cycle; we are architecturally guaranteed a reseed |
50 | * after no more than 512 128-bit chunks of random data. This also | 36 | * after no more than 512 128-bit chunks of random data. This also |
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index aa0430d69b90..3fa0e5ad86b4 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c | |||
@@ -1,6 +1,5 @@ | |||
1 | #include <linux/kernel.h> | 1 | #include <linux/kernel.h> |
2 | #include <linux/mm.h> | 2 | #include <linux/mm.h> |
3 | #include <linux/init.h> | ||
4 | #include <asm/processor.h> | 3 | #include <asm/processor.h> |
5 | #include <asm/msr.h> | 4 | #include <asm/msr.h> |
6 | #include "cpu.h" | 5 | #include "cpu.h" |
diff --git a/arch/x86/kernel/cpu/umc.c b/arch/x86/kernel/cpu/umc.c index 75c5ad5d35cc..ef9c2a0078bd 100644 --- a/arch/x86/kernel/cpu/umc.c +++ b/arch/x86/kernel/cpu/umc.c | |||
@@ -1,5 +1,4 @@ | |||
1 | #include <linux/kernel.h> | 1 | #include <linux/kernel.h> |
2 | #include <linux/init.h> | ||
3 | #include <asm/processor.h> | 2 | #include <asm/processor.h> |
4 | #include "cpu.h" | 3 | #include "cpu.h" |
5 | 4 | ||
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 18677a90d6a3..a57902efe2d5 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c | |||
@@ -7,7 +7,6 @@ | |||
7 | * | 7 | * |
8 | */ | 8 | */ |
9 | 9 | ||
10 | #include <linux/init.h> | ||
11 | #include <linux/types.h> | 10 | #include <linux/types.h> |
12 | #include <linux/kernel.h> | 11 | #include <linux/kernel.h> |
13 | #include <linux/smp.h> | 12 | #include <linux/smp.h> |
diff --git a/arch/x86/kernel/doublefault.c b/arch/x86/kernel/doublefault.c index 5d3fe8d36e4a..f6dfd9334b67 100644 --- a/arch/x86/kernel/doublefault.c +++ b/arch/x86/kernel/doublefault.c | |||
@@ -1,6 +1,5 @@ | |||
1 | #include <linux/mm.h> | 1 | #include <linux/mm.h> |
2 | #include <linux/sched.h> | 2 | #include <linux/sched.h> |
3 | #include <linux/init.h> | ||
4 | #include <linux/init_task.h> | 3 | #include <linux/init_task.h> |
5 | #include <linux/fs.h> | 4 | #include <linux/fs.h> |
6 | 5 | ||
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 174da5fc5a7b..988c00a1f60d 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -1120,7 +1120,7 @@ void __init memblock_find_dma_reserve(void) | |||
1120 | nr_pages += end_pfn - start_pfn; | 1120 | nr_pages += end_pfn - start_pfn; |
1121 | } | 1121 | } |
1122 | 1122 | ||
1123 | for_each_free_mem_range(u, MAX_NUMNODES, &start, &end, NULL) { | 1123 | for_each_free_mem_range(u, NUMA_NO_NODE, &start, &end, NULL) { |
1124 | start_pfn = min_t(unsigned long, PFN_UP(start), MAX_DMA_PFN); | 1124 | start_pfn = min_t(unsigned long, PFN_UP(start), MAX_DMA_PFN); |
1125 | end_pfn = min_t(unsigned long, PFN_DOWN(end), MAX_DMA_PFN); | 1125 | end_pfn = min_t(unsigned long, PFN_DOWN(end), MAX_DMA_PFN); |
1126 | if (start_pfn < end_pfn) | 1126 | if (start_pfn < end_pfn) |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 51e2988c5728..a2a4f4697889 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -1082,7 +1082,7 @@ ENTRY(ftrace_caller) | |||
1082 | pushl $0 /* Pass NULL as regs pointer */ | 1082 | pushl $0 /* Pass NULL as regs pointer */ |
1083 | movl 4*4(%esp), %eax | 1083 | movl 4*4(%esp), %eax |
1084 | movl 0x4(%ebp), %edx | 1084 | movl 0x4(%ebp), %edx |
1085 | leal function_trace_op, %ecx | 1085 | movl function_trace_op, %ecx |
1086 | subl $MCOUNT_INSN_SIZE, %eax | 1086 | subl $MCOUNT_INSN_SIZE, %eax |
1087 | 1087 | ||
1088 | .globl ftrace_call | 1088 | .globl ftrace_call |
@@ -1140,7 +1140,7 @@ ENTRY(ftrace_regs_caller) | |||
1140 | movl 12*4(%esp), %eax /* Load ip (1st parameter) */ | 1140 | movl 12*4(%esp), %eax /* Load ip (1st parameter) */ |
1141 | subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */ | 1141 | subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */ |
1142 | movl 0x4(%ebp), %edx /* Load parent ip (2nd parameter) */ | 1142 | movl 0x4(%ebp), %edx /* Load parent ip (2nd parameter) */ |
1143 | leal function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */ | 1143 | movl function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */ |
1144 | pushl %esp /* Save pt_regs as 4th parameter */ | 1144 | pushl %esp /* Save pt_regs as 4th parameter */ |
1145 | 1145 | ||
1146 | GLOBAL(ftrace_regs_call) | 1146 | GLOBAL(ftrace_regs_call) |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index e21b0785a85b..1e96c3628bf2 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -88,7 +88,7 @@ END(function_hook) | |||
88 | MCOUNT_SAVE_FRAME \skip | 88 | MCOUNT_SAVE_FRAME \skip |
89 | 89 | ||
90 | /* Load the ftrace_ops into the 3rd parameter */ | 90 | /* Load the ftrace_ops into the 3rd parameter */ |
91 | leaq function_trace_op, %rdx | 91 | movq function_trace_op(%rip), %rdx |
92 | 92 | ||
93 | /* Load ip into the first parameter */ | 93 | /* Load ip into the first parameter */ |
94 | movq RIP(%rsp), %rdi | 94 | movq RIP(%rsp), %rdi |
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index f66ff162dce8..a67b47c31314 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c | |||
@@ -38,7 +38,6 @@ | |||
38 | #include <linux/kernel.h> | 38 | #include <linux/kernel.h> |
39 | #include <linux/module.h> | 39 | #include <linux/module.h> |
40 | #include <linux/sched.h> | 40 | #include <linux/sched.h> |
41 | #include <linux/init.h> | ||
42 | #include <linux/smp.h> | 41 | #include <linux/smp.h> |
43 | 42 | ||
44 | #include <asm/hw_breakpoint.h> | 43 | #include <asm/hw_breakpoint.h> |
diff --git a/arch/x86/kernel/iosf_mbi.c b/arch/x86/kernel/iosf_mbi.c new file mode 100644 index 000000000000..c3aae6672843 --- /dev/null +++ b/arch/x86/kernel/iosf_mbi.c | |||
@@ -0,0 +1,226 @@ | |||
1 | /* | ||
2 | * IOSF-SB MailBox Interface Driver | ||
3 | * Copyright (c) 2013, Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | * | ||
15 | * The IOSF-SB is a fabric bus available on Atom based SOC's that uses a | ||
16 | * mailbox interface (MBI) to communicate with mutiple devices. This | ||
17 | * driver implements access to this interface for those platforms that can | ||
18 | * enumerate the device using PCI. | ||
19 | */ | ||
20 | |||
21 | #include <linux/module.h> | ||
22 | #include <linux/init.h> | ||
23 | #include <linux/spinlock.h> | ||
24 | #include <linux/pci.h> | ||
25 | |||
26 | #include <asm/iosf_mbi.h> | ||
27 | |||
28 | static DEFINE_SPINLOCK(iosf_mbi_lock); | ||
29 | |||
30 | static inline u32 iosf_mbi_form_mcr(u8 op, u8 port, u8 offset) | ||
31 | { | ||
32 | return (op << 24) | (port << 16) | (offset << 8) | MBI_ENABLE; | ||
33 | } | ||
34 | |||
35 | static struct pci_dev *mbi_pdev; /* one mbi device */ | ||
36 | |||
37 | static int iosf_mbi_pci_read_mdr(u32 mcrx, u32 mcr, u32 *mdr) | ||
38 | { | ||
39 | int result; | ||
40 | |||
41 | if (!mbi_pdev) | ||
42 | return -ENODEV; | ||
43 | |||
44 | if (mcrx) { | ||
45 | result = pci_write_config_dword(mbi_pdev, MBI_MCRX_OFFSET, | ||
46 | mcrx); | ||
47 | if (result < 0) | ||
48 | goto fail_read; | ||
49 | } | ||
50 | |||
51 | result = pci_write_config_dword(mbi_pdev, MBI_MCR_OFFSET, mcr); | ||
52 | if (result < 0) | ||
53 | goto fail_read; | ||
54 | |||
55 | result = pci_read_config_dword(mbi_pdev, MBI_MDR_OFFSET, mdr); | ||
56 | if (result < 0) | ||
57 | goto fail_read; | ||
58 | |||
59 | return 0; | ||
60 | |||
61 | fail_read: | ||
62 | dev_err(&mbi_pdev->dev, "PCI config access failed with %d\n", result); | ||
63 | return result; | ||
64 | } | ||
65 | |||
66 | static int iosf_mbi_pci_write_mdr(u32 mcrx, u32 mcr, u32 mdr) | ||
67 | { | ||
68 | int result; | ||
69 | |||
70 | if (!mbi_pdev) | ||
71 | return -ENODEV; | ||
72 | |||
73 | result = pci_write_config_dword(mbi_pdev, MBI_MDR_OFFSET, mdr); | ||
74 | if (result < 0) | ||
75 | goto fail_write; | ||
76 | |||
77 | if (mcrx) { | ||
78 | result = pci_write_config_dword(mbi_pdev, MBI_MCRX_OFFSET, | ||
79 | mcrx); | ||
80 | if (result < 0) | ||
81 | goto fail_write; | ||
82 | } | ||
83 | |||
84 | result = pci_write_config_dword(mbi_pdev, MBI_MCR_OFFSET, mcr); | ||
85 | if (result < 0) | ||
86 | goto fail_write; | ||
87 | |||
88 | return 0; | ||
89 | |||
90 | fail_write: | ||
91 | dev_err(&mbi_pdev->dev, "PCI config access failed with %d\n", result); | ||
92 | return result; | ||
93 | } | ||
94 | |||
95 | int iosf_mbi_read(u8 port, u8 opcode, u32 offset, u32 *mdr) | ||
96 | { | ||
97 | u32 mcr, mcrx; | ||
98 | unsigned long flags; | ||
99 | int ret; | ||
100 | |||
101 | /*Access to the GFX unit is handled by GPU code */ | ||
102 | if (port == BT_MBI_UNIT_GFX) { | ||
103 | WARN_ON(1); | ||
104 | return -EPERM; | ||
105 | } | ||
106 | |||
107 | mcr = iosf_mbi_form_mcr(opcode, port, offset & MBI_MASK_LO); | ||
108 | mcrx = offset & MBI_MASK_HI; | ||
109 | |||
110 | spin_lock_irqsave(&iosf_mbi_lock, flags); | ||
111 | ret = iosf_mbi_pci_read_mdr(mcrx, mcr, mdr); | ||
112 | spin_unlock_irqrestore(&iosf_mbi_lock, flags); | ||
113 | |||
114 | return ret; | ||
115 | } | ||
116 | EXPORT_SYMBOL(iosf_mbi_read); | ||
117 | |||
118 | int iosf_mbi_write(u8 port, u8 opcode, u32 offset, u32 mdr) | ||
119 | { | ||
120 | u32 mcr, mcrx; | ||
121 | unsigned long flags; | ||
122 | int ret; | ||
123 | |||
124 | /*Access to the GFX unit is handled by GPU code */ | ||
125 | if (port == BT_MBI_UNIT_GFX) { | ||
126 | WARN_ON(1); | ||
127 | return -EPERM; | ||
128 | } | ||
129 | |||
130 | mcr = iosf_mbi_form_mcr(opcode, port, offset & MBI_MASK_LO); | ||
131 | mcrx = offset & MBI_MASK_HI; | ||
132 | |||
133 | spin_lock_irqsave(&iosf_mbi_lock, flags); | ||
134 | ret = iosf_mbi_pci_write_mdr(mcrx, mcr, mdr); | ||
135 | spin_unlock_irqrestore(&iosf_mbi_lock, flags); | ||
136 | |||
137 | return ret; | ||
138 | } | ||
139 | EXPORT_SYMBOL(iosf_mbi_write); | ||
140 | |||
141 | int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask) | ||
142 | { | ||
143 | u32 mcr, mcrx; | ||
144 | u32 value; | ||
145 | unsigned long flags; | ||
146 | int ret; | ||
147 | |||
148 | /*Access to the GFX unit is handled by GPU code */ | ||
149 | if (port == BT_MBI_UNIT_GFX) { | ||
150 | WARN_ON(1); | ||
151 | return -EPERM; | ||
152 | } | ||
153 | |||
154 | mcr = iosf_mbi_form_mcr(opcode, port, offset & MBI_MASK_LO); | ||
155 | mcrx = offset & MBI_MASK_HI; | ||
156 | |||
157 | spin_lock_irqsave(&iosf_mbi_lock, flags); | ||
158 | |||
159 | /* Read current mdr value */ | ||
160 | ret = iosf_mbi_pci_read_mdr(mcrx, mcr & MBI_RD_MASK, &value); | ||
161 | if (ret < 0) { | ||
162 | spin_unlock_irqrestore(&iosf_mbi_lock, flags); | ||
163 | return ret; | ||
164 | } | ||
165 | |||
166 | /* Apply mask */ | ||
167 | value &= ~mask; | ||
168 | mdr &= mask; | ||
169 | value |= mdr; | ||
170 | |||
171 | /* Write back */ | ||
172 | ret = iosf_mbi_pci_write_mdr(mcrx, mcr | MBI_WR_MASK, value); | ||
173 | |||
174 | spin_unlock_irqrestore(&iosf_mbi_lock, flags); | ||
175 | |||
176 | return ret; | ||
177 | } | ||
178 | EXPORT_SYMBOL(iosf_mbi_modify); | ||
179 | |||
180 | static int iosf_mbi_probe(struct pci_dev *pdev, | ||
181 | const struct pci_device_id *unused) | ||
182 | { | ||
183 | int ret; | ||
184 | |||
185 | ret = pci_enable_device(pdev); | ||
186 | if (ret < 0) { | ||
187 | dev_err(&pdev->dev, "error: could not enable device\n"); | ||
188 | return ret; | ||
189 | } | ||
190 | |||
191 | mbi_pdev = pci_dev_get(pdev); | ||
192 | return 0; | ||
193 | } | ||
194 | |||
195 | static DEFINE_PCI_DEVICE_TABLE(iosf_mbi_pci_ids) = { | ||
196 | { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x0F00) }, | ||
197 | { 0, }, | ||
198 | }; | ||
199 | MODULE_DEVICE_TABLE(pci, iosf_mbi_pci_ids); | ||
200 | |||
201 | static struct pci_driver iosf_mbi_pci_driver = { | ||
202 | .name = "iosf_mbi_pci", | ||
203 | .probe = iosf_mbi_probe, | ||
204 | .id_table = iosf_mbi_pci_ids, | ||
205 | }; | ||
206 | |||
207 | static int __init iosf_mbi_init(void) | ||
208 | { | ||
209 | return pci_register_driver(&iosf_mbi_pci_driver); | ||
210 | } | ||
211 | |||
212 | static void __exit iosf_mbi_exit(void) | ||
213 | { | ||
214 | pci_unregister_driver(&iosf_mbi_pci_driver); | ||
215 | if (mbi_pdev) { | ||
216 | pci_dev_put(mbi_pdev); | ||
217 | mbi_pdev = NULL; | ||
218 | } | ||
219 | } | ||
220 | |||
221 | module_init(iosf_mbi_init); | ||
222 | module_exit(iosf_mbi_exit); | ||
223 | |||
224 | MODULE_AUTHOR("David E. Box <david.e.box@linux.intel.com>"); | ||
225 | MODULE_DESCRIPTION("IOSF Mailbox Interface accessor"); | ||
226 | MODULE_LICENSE("GPL v2"); | ||
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 22d0687e7fda..dbb60878b744 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -193,9 +193,13 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs) | |||
193 | if (!handle_irq(irq, regs)) { | 193 | if (!handle_irq(irq, regs)) { |
194 | ack_APIC_irq(); | 194 | ack_APIC_irq(); |
195 | 195 | ||
196 | if (printk_ratelimit()) | 196 | if (irq != VECTOR_RETRIGGERED) { |
197 | pr_emerg("%s: %d.%d No irq handler for vector (irq %d)\n", | 197 | pr_emerg_ratelimited("%s: %d.%d No irq handler for vector (irq %d)\n", |
198 | __func__, smp_processor_id(), vector, irq); | 198 | __func__, smp_processor_id(), |
199 | vector, irq); | ||
200 | } else { | ||
201 | __this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED); | ||
202 | } | ||
199 | } | 203 | } |
200 | 204 | ||
201 | irq_exit(); | 205 | irq_exit(); |
@@ -262,6 +266,76 @@ __visible void smp_trace_x86_platform_ipi(struct pt_regs *regs) | |||
262 | EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); | 266 | EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); |
263 | 267 | ||
264 | #ifdef CONFIG_HOTPLUG_CPU | 268 | #ifdef CONFIG_HOTPLUG_CPU |
269 | /* | ||
270 | * This cpu is going to be removed and its vectors migrated to the remaining | ||
271 | * online cpus. Check to see if there are enough vectors in the remaining cpus. | ||
272 | * This function is protected by stop_machine(). | ||
273 | */ | ||
274 | int check_irq_vectors_for_cpu_disable(void) | ||
275 | { | ||
276 | int irq, cpu; | ||
277 | unsigned int this_cpu, vector, this_count, count; | ||
278 | struct irq_desc *desc; | ||
279 | struct irq_data *data; | ||
280 | struct cpumask affinity_new, online_new; | ||
281 | |||
282 | this_cpu = smp_processor_id(); | ||
283 | cpumask_copy(&online_new, cpu_online_mask); | ||
284 | cpu_clear(this_cpu, online_new); | ||
285 | |||
286 | this_count = 0; | ||
287 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { | ||
288 | irq = __this_cpu_read(vector_irq[vector]); | ||
289 | if (irq >= 0) { | ||
290 | desc = irq_to_desc(irq); | ||
291 | data = irq_desc_get_irq_data(desc); | ||
292 | cpumask_copy(&affinity_new, data->affinity); | ||
293 | cpu_clear(this_cpu, affinity_new); | ||
294 | |||
295 | /* Do not count inactive or per-cpu irqs. */ | ||
296 | if (!irq_has_action(irq) || irqd_is_per_cpu(data)) | ||
297 | continue; | ||
298 | |||
299 | /* | ||
300 | * A single irq may be mapped to multiple | ||
301 | * cpu's vector_irq[] (for example IOAPIC cluster | ||
302 | * mode). In this case we have two | ||
303 | * possibilities: | ||
304 | * | ||
305 | * 1) the resulting affinity mask is empty; that is | ||
306 | * this the down'd cpu is the last cpu in the irq's | ||
307 | * affinity mask, or | ||
308 | * | ||
309 | * 2) the resulting affinity mask is no longer | ||
310 | * a subset of the online cpus but the affinity | ||
311 | * mask is not zero; that is the down'd cpu is the | ||
312 | * last online cpu in a user set affinity mask. | ||
313 | */ | ||
314 | if (cpumask_empty(&affinity_new) || | ||
315 | !cpumask_subset(&affinity_new, &online_new)) | ||
316 | this_count++; | ||
317 | } | ||
318 | } | ||
319 | |||
320 | count = 0; | ||
321 | for_each_online_cpu(cpu) { | ||
322 | if (cpu == this_cpu) | ||
323 | continue; | ||
324 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; | ||
325 | vector++) { | ||
326 | if (per_cpu(vector_irq, cpu)[vector] < 0) | ||
327 | count++; | ||
328 | } | ||
329 | } | ||
330 | |||
331 | if (count < this_count) { | ||
332 | pr_warn("CPU %d disable failed: CPU has %u vectors assigned and there are only %u available.\n", | ||
333 | this_cpu, this_count, count); | ||
334 | return -ERANGE; | ||
335 | } | ||
336 | return 0; | ||
337 | } | ||
338 | |||
265 | /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ | 339 | /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ |
266 | void fixup_irqs(void) | 340 | void fixup_irqs(void) |
267 | { | 341 | { |
@@ -344,7 +418,7 @@ void fixup_irqs(void) | |||
344 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { | 418 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { |
345 | unsigned int irr; | 419 | unsigned int irr; |
346 | 420 | ||
347 | if (__this_cpu_read(vector_irq[vector]) < 0) | 421 | if (__this_cpu_read(vector_irq[vector]) <= VECTOR_UNDEFINED) |
348 | continue; | 422 | continue; |
349 | 423 | ||
350 | irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); | 424 | irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); |
@@ -355,11 +429,14 @@ void fixup_irqs(void) | |||
355 | data = irq_desc_get_irq_data(desc); | 429 | data = irq_desc_get_irq_data(desc); |
356 | chip = irq_data_get_irq_chip(data); | 430 | chip = irq_data_get_irq_chip(data); |
357 | raw_spin_lock(&desc->lock); | 431 | raw_spin_lock(&desc->lock); |
358 | if (chip->irq_retrigger) | 432 | if (chip->irq_retrigger) { |
359 | chip->irq_retrigger(data); | 433 | chip->irq_retrigger(data); |
434 | __this_cpu_write(vector_irq[vector], VECTOR_RETRIGGERED); | ||
435 | } | ||
360 | raw_spin_unlock(&desc->lock); | 436 | raw_spin_unlock(&desc->lock); |
361 | } | 437 | } |
362 | __this_cpu_write(vector_irq[vector], -1); | 438 | if (__this_cpu_read(vector_irq[vector]) != VECTOR_RETRIGGERED) |
439 | __this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED); | ||
363 | } | 440 | } |
364 | } | 441 | } |
365 | #endif | 442 | #endif |
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index a2a1fbc594ff..7f50156542fb 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
@@ -52,7 +52,7 @@ static struct irqaction irq2 = { | |||
52 | }; | 52 | }; |
53 | 53 | ||
54 | DEFINE_PER_CPU(vector_irq_t, vector_irq) = { | 54 | DEFINE_PER_CPU(vector_irq_t, vector_irq) = { |
55 | [0 ... NR_VECTORS - 1] = -1, | 55 | [0 ... NR_VECTORS - 1] = VECTOR_UNDEFINED, |
56 | }; | 56 | }; |
57 | 57 | ||
58 | int vector_used_by_percpu_irq(unsigned int vector) | 58 | int vector_used_by_percpu_irq(unsigned int vector) |
@@ -60,7 +60,7 @@ int vector_used_by_percpu_irq(unsigned int vector) | |||
60 | int cpu; | 60 | int cpu; |
61 | 61 | ||
62 | for_each_online_cpu(cpu) { | 62 | for_each_online_cpu(cpu) { |
63 | if (per_cpu(vector_irq, cpu)[vector] != -1) | 63 | if (per_cpu(vector_irq, cpu)[vector] > VECTOR_UNDEFINED) |
64 | return 1; | 64 | return 1; |
65 | } | 65 | } |
66 | 66 | ||
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 836f8322960e..7ec1d5f8d283 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
@@ -39,7 +39,6 @@ | |||
39 | #include <linux/sched.h> | 39 | #include <linux/sched.h> |
40 | #include <linux/delay.h> | 40 | #include <linux/delay.h> |
41 | #include <linux/kgdb.h> | 41 | #include <linux/kgdb.h> |
42 | #include <linux/init.h> | ||
43 | #include <linux/smp.h> | 42 | #include <linux/smp.h> |
44 | #include <linux/nmi.h> | 43 | #include <linux/nmi.h> |
45 | #include <linux/hw_breakpoint.h> | 44 | #include <linux/hw_breakpoint.h> |
diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c new file mode 100644 index 000000000000..c2bedaea11f7 --- /dev/null +++ b/arch/x86/kernel/ksysfs.c | |||
@@ -0,0 +1,340 @@ | |||
1 | /* | ||
2 | * Architecture specific sysfs attributes in /sys/kernel | ||
3 | * | ||
4 | * Copyright (C) 2007, Intel Corp. | ||
5 | * Huang Ying <ying.huang@intel.com> | ||
6 | * Copyright (C) 2013, 2013 Red Hat, Inc. | ||
7 | * Dave Young <dyoung@redhat.com> | ||
8 | * | ||
9 | * This file is released under the GPLv2 | ||
10 | */ | ||
11 | |||
12 | #include <linux/kobject.h> | ||
13 | #include <linux/string.h> | ||
14 | #include <linux/sysfs.h> | ||
15 | #include <linux/init.h> | ||
16 | #include <linux/stat.h> | ||
17 | #include <linux/slab.h> | ||
18 | #include <linux/mm.h> | ||
19 | |||
20 | #include <asm/io.h> | ||
21 | #include <asm/setup.h> | ||
22 | |||
23 | static ssize_t version_show(struct kobject *kobj, | ||
24 | struct kobj_attribute *attr, char *buf) | ||
25 | { | ||
26 | return sprintf(buf, "0x%04x\n", boot_params.hdr.version); | ||
27 | } | ||
28 | |||
29 | static struct kobj_attribute boot_params_version_attr = __ATTR_RO(version); | ||
30 | |||
31 | static ssize_t boot_params_data_read(struct file *fp, struct kobject *kobj, | ||
32 | struct bin_attribute *bin_attr, | ||
33 | char *buf, loff_t off, size_t count) | ||
34 | { | ||
35 | memcpy(buf, (void *)&boot_params + off, count); | ||
36 | return count; | ||
37 | } | ||
38 | |||
39 | static struct bin_attribute boot_params_data_attr = { | ||
40 | .attr = { | ||
41 | .name = "data", | ||
42 | .mode = S_IRUGO, | ||
43 | }, | ||
44 | .read = boot_params_data_read, | ||
45 | .size = sizeof(boot_params), | ||
46 | }; | ||
47 | |||
48 | static struct attribute *boot_params_version_attrs[] = { | ||
49 | &boot_params_version_attr.attr, | ||
50 | NULL, | ||
51 | }; | ||
52 | |||
53 | static struct bin_attribute *boot_params_data_attrs[] = { | ||
54 | &boot_params_data_attr, | ||
55 | NULL, | ||
56 | }; | ||
57 | |||
58 | static struct attribute_group boot_params_attr_group = { | ||
59 | .attrs = boot_params_version_attrs, | ||
60 | .bin_attrs = boot_params_data_attrs, | ||
61 | }; | ||
62 | |||
63 | static int kobj_to_setup_data_nr(struct kobject *kobj, int *nr) | ||
64 | { | ||
65 | const char *name; | ||
66 | |||
67 | name = kobject_name(kobj); | ||
68 | return kstrtoint(name, 10, nr); | ||
69 | } | ||
70 | |||
71 | static int get_setup_data_paddr(int nr, u64 *paddr) | ||
72 | { | ||
73 | int i = 0; | ||
74 | struct setup_data *data; | ||
75 | u64 pa_data = boot_params.hdr.setup_data; | ||
76 | |||
77 | while (pa_data) { | ||
78 | if (nr == i) { | ||
79 | *paddr = pa_data; | ||
80 | return 0; | ||
81 | } | ||
82 | data = ioremap_cache(pa_data, sizeof(*data)); | ||
83 | if (!data) | ||
84 | return -ENOMEM; | ||
85 | |||
86 | pa_data = data->next; | ||
87 | iounmap(data); | ||
88 | i++; | ||
89 | } | ||
90 | return -EINVAL; | ||
91 | } | ||
92 | |||
93 | static int __init get_setup_data_size(int nr, size_t *size) | ||
94 | { | ||
95 | int i = 0; | ||
96 | struct setup_data *data; | ||
97 | u64 pa_data = boot_params.hdr.setup_data; | ||
98 | |||
99 | while (pa_data) { | ||
100 | data = ioremap_cache(pa_data, sizeof(*data)); | ||
101 | if (!data) | ||
102 | return -ENOMEM; | ||
103 | if (nr == i) { | ||
104 | *size = data->len; | ||
105 | iounmap(data); | ||
106 | return 0; | ||
107 | } | ||
108 | |||
109 | pa_data = data->next; | ||
110 | iounmap(data); | ||
111 | i++; | ||
112 | } | ||
113 | return -EINVAL; | ||
114 | } | ||
115 | |||
116 | static ssize_t type_show(struct kobject *kobj, | ||
117 | struct kobj_attribute *attr, char *buf) | ||
118 | { | ||
119 | int nr, ret; | ||
120 | u64 paddr; | ||
121 | struct setup_data *data; | ||
122 | |||
123 | ret = kobj_to_setup_data_nr(kobj, &nr); | ||
124 | if (ret) | ||
125 | return ret; | ||
126 | |||
127 | ret = get_setup_data_paddr(nr, &paddr); | ||
128 | if (ret) | ||
129 | return ret; | ||
130 | data = ioremap_cache(paddr, sizeof(*data)); | ||
131 | if (!data) | ||
132 | return -ENOMEM; | ||
133 | |||
134 | ret = sprintf(buf, "0x%x\n", data->type); | ||
135 | iounmap(data); | ||
136 | return ret; | ||
137 | } | ||
138 | |||
139 | static ssize_t setup_data_data_read(struct file *fp, | ||
140 | struct kobject *kobj, | ||
141 | struct bin_attribute *bin_attr, | ||
142 | char *buf, | ||
143 | loff_t off, size_t count) | ||
144 | { | ||
145 | int nr, ret = 0; | ||
146 | u64 paddr; | ||
147 | struct setup_data *data; | ||
148 | void *p; | ||
149 | |||
150 | ret = kobj_to_setup_data_nr(kobj, &nr); | ||
151 | if (ret) | ||
152 | return ret; | ||
153 | |||
154 | ret = get_setup_data_paddr(nr, &paddr); | ||
155 | if (ret) | ||
156 | return ret; | ||
157 | data = ioremap_cache(paddr, sizeof(*data)); | ||
158 | if (!data) | ||
159 | return -ENOMEM; | ||
160 | |||
161 | if (off > data->len) { | ||
162 | ret = -EINVAL; | ||
163 | goto out; | ||
164 | } | ||
165 | |||
166 | if (count > data->len - off) | ||
167 | count = data->len - off; | ||
168 | |||
169 | if (!count) | ||
170 | goto out; | ||
171 | |||
172 | ret = count; | ||
173 | p = ioremap_cache(paddr + sizeof(*data), data->len); | ||
174 | if (!p) { | ||
175 | ret = -ENOMEM; | ||
176 | goto out; | ||
177 | } | ||
178 | memcpy(buf, p + off, count); | ||
179 | iounmap(p); | ||
180 | out: | ||
181 | iounmap(data); | ||
182 | return ret; | ||
183 | } | ||
184 | |||
185 | static struct kobj_attribute type_attr = __ATTR_RO(type); | ||
186 | |||
187 | static struct bin_attribute data_attr = { | ||
188 | .attr = { | ||
189 | .name = "data", | ||
190 | .mode = S_IRUGO, | ||
191 | }, | ||
192 | .read = setup_data_data_read, | ||
193 | }; | ||
194 | |||
195 | static struct attribute *setup_data_type_attrs[] = { | ||
196 | &type_attr.attr, | ||
197 | NULL, | ||
198 | }; | ||
199 | |||
200 | static struct bin_attribute *setup_data_data_attrs[] = { | ||
201 | &data_attr, | ||
202 | NULL, | ||
203 | }; | ||
204 | |||
205 | static struct attribute_group setup_data_attr_group = { | ||
206 | .attrs = setup_data_type_attrs, | ||
207 | .bin_attrs = setup_data_data_attrs, | ||
208 | }; | ||
209 | |||
210 | static int __init create_setup_data_node(struct kobject *parent, | ||
211 | struct kobject **kobjp, int nr) | ||
212 | { | ||
213 | int ret = 0; | ||
214 | size_t size; | ||
215 | struct kobject *kobj; | ||
216 | char name[16]; /* should be enough for setup_data nodes numbers */ | ||
217 | snprintf(name, 16, "%d", nr); | ||
218 | |||
219 | kobj = kobject_create_and_add(name, parent); | ||
220 | if (!kobj) | ||
221 | return -ENOMEM; | ||
222 | |||
223 | ret = get_setup_data_size(nr, &size); | ||
224 | if (ret) | ||
225 | goto out_kobj; | ||
226 | |||
227 | data_attr.size = size; | ||
228 | ret = sysfs_create_group(kobj, &setup_data_attr_group); | ||
229 | if (ret) | ||
230 | goto out_kobj; | ||
231 | *kobjp = kobj; | ||
232 | |||
233 | return 0; | ||
234 | out_kobj: | ||
235 | kobject_put(kobj); | ||
236 | return ret; | ||
237 | } | ||
238 | |||
239 | static void __init cleanup_setup_data_node(struct kobject *kobj) | ||
240 | { | ||
241 | sysfs_remove_group(kobj, &setup_data_attr_group); | ||
242 | kobject_put(kobj); | ||
243 | } | ||
244 | |||
245 | static int __init get_setup_data_total_num(u64 pa_data, int *nr) | ||
246 | { | ||
247 | int ret = 0; | ||
248 | struct setup_data *data; | ||
249 | |||
250 | *nr = 0; | ||
251 | while (pa_data) { | ||
252 | *nr += 1; | ||
253 | data = ioremap_cache(pa_data, sizeof(*data)); | ||
254 | if (!data) { | ||
255 | ret = -ENOMEM; | ||
256 | goto out; | ||
257 | } | ||
258 | pa_data = data->next; | ||
259 | iounmap(data); | ||
260 | } | ||
261 | |||
262 | out: | ||
263 | return ret; | ||
264 | } | ||
265 | |||
266 | static int __init create_setup_data_nodes(struct kobject *parent) | ||
267 | { | ||
268 | struct kobject *setup_data_kobj, **kobjp; | ||
269 | u64 pa_data; | ||
270 | int i, j, nr, ret = 0; | ||
271 | |||
272 | pa_data = boot_params.hdr.setup_data; | ||
273 | if (!pa_data) | ||
274 | return 0; | ||
275 | |||
276 | setup_data_kobj = kobject_create_and_add("setup_data", parent); | ||
277 | if (!setup_data_kobj) { | ||
278 | ret = -ENOMEM; | ||
279 | goto out; | ||
280 | } | ||
281 | |||
282 | ret = get_setup_data_total_num(pa_data, &nr); | ||
283 | if (ret) | ||
284 | goto out_setup_data_kobj; | ||
285 | |||
286 | kobjp = kmalloc(sizeof(*kobjp) * nr, GFP_KERNEL); | ||
287 | if (!kobjp) { | ||
288 | ret = -ENOMEM; | ||
289 | goto out_setup_data_kobj; | ||
290 | } | ||
291 | |||
292 | for (i = 0; i < nr; i++) { | ||
293 | ret = create_setup_data_node(setup_data_kobj, kobjp + i, i); | ||
294 | if (ret) | ||
295 | goto out_clean_nodes; | ||
296 | } | ||
297 | |||
298 | kfree(kobjp); | ||
299 | return 0; | ||
300 | |||
301 | out_clean_nodes: | ||
302 | for (j = i - 1; j > 0; j--) | ||
303 | cleanup_setup_data_node(*(kobjp + j)); | ||
304 | kfree(kobjp); | ||
305 | out_setup_data_kobj: | ||
306 | kobject_put(setup_data_kobj); | ||
307 | out: | ||
308 | return ret; | ||
309 | } | ||
310 | |||
311 | static int __init boot_params_ksysfs_init(void) | ||
312 | { | ||
313 | int ret; | ||
314 | struct kobject *boot_params_kobj; | ||
315 | |||
316 | boot_params_kobj = kobject_create_and_add("boot_params", | ||
317 | kernel_kobj); | ||
318 | if (!boot_params_kobj) { | ||
319 | ret = -ENOMEM; | ||
320 | goto out; | ||
321 | } | ||
322 | |||
323 | ret = sysfs_create_group(boot_params_kobj, &boot_params_attr_group); | ||
324 | if (ret) | ||
325 | goto out_boot_params_kobj; | ||
326 | |||
327 | ret = create_setup_data_nodes(boot_params_kobj); | ||
328 | if (ret) | ||
329 | goto out_create_group; | ||
330 | |||
331 | return 0; | ||
332 | out_create_group: | ||
333 | sysfs_remove_group(boot_params_kobj, &boot_params_attr_group); | ||
334 | out_boot_params_kobj: | ||
335 | kobject_put(boot_params_kobj); | ||
336 | out: | ||
337 | return ret; | ||
338 | } | ||
339 | |||
340 | arch_initcall(boot_params_ksysfs_init); | ||
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 5b19e4d78b00..1667b1de8d5d 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c | |||
@@ -9,7 +9,6 @@ | |||
9 | #include <linux/mm.h> | 9 | #include <linux/mm.h> |
10 | #include <linux/kexec.h> | 10 | #include <linux/kexec.h> |
11 | #include <linux/delay.h> | 11 | #include <linux/delay.h> |
12 | #include <linux/init.h> | ||
13 | #include <linux/numa.h> | 12 | #include <linux/numa.h> |
14 | #include <linux/ftrace.h> | 13 | #include <linux/ftrace.h> |
15 | #include <linux/suspend.h> | 14 | #include <linux/suspend.h> |
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 871be4a84c7d..da15918d1c81 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c | |||
@@ -3,7 +3,6 @@ | |||
3 | #include <linux/dma-mapping.h> | 3 | #include <linux/dma-mapping.h> |
4 | #include <linux/scatterlist.h> | 4 | #include <linux/scatterlist.h> |
5 | #include <linux/string.h> | 5 | #include <linux/string.h> |
6 | #include <linux/init.h> | ||
7 | #include <linux/gfp.h> | 6 | #include <linux/gfp.h> |
8 | #include <linux/pci.h> | 7 | #include <linux/pci.h> |
9 | #include <linux/mm.h> | 8 | #include <linux/mm.h> |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 6f1236c29c4b..0de43e98ce08 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -24,7 +24,6 @@ | |||
24 | #include <linux/interrupt.h> | 24 | #include <linux/interrupt.h> |
25 | #include <linux/delay.h> | 25 | #include <linux/delay.h> |
26 | #include <linux/reboot.h> | 26 | #include <linux/reboot.h> |
27 | #include <linux/init.h> | ||
28 | #include <linux/mc146818rtc.h> | 27 | #include <linux/mc146818rtc.h> |
29 | #include <linux/module.h> | 28 | #include <linux/module.h> |
30 | #include <linux/kallsyms.h> | 29 | #include <linux/kallsyms.h> |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index cb233bc9dee3..c9675594d7ca 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -295,6 +295,8 @@ static void __init reserve_brk(void) | |||
295 | _brk_start = 0; | 295 | _brk_start = 0; |
296 | } | 296 | } |
297 | 297 | ||
298 | u64 relocated_ramdisk; | ||
299 | |||
298 | #ifdef CONFIG_BLK_DEV_INITRD | 300 | #ifdef CONFIG_BLK_DEV_INITRD |
299 | 301 | ||
300 | static u64 __init get_ramdisk_image(void) | 302 | static u64 __init get_ramdisk_image(void) |
@@ -321,25 +323,24 @@ static void __init relocate_initrd(void) | |||
321 | u64 ramdisk_image = get_ramdisk_image(); | 323 | u64 ramdisk_image = get_ramdisk_image(); |
322 | u64 ramdisk_size = get_ramdisk_size(); | 324 | u64 ramdisk_size = get_ramdisk_size(); |
323 | u64 area_size = PAGE_ALIGN(ramdisk_size); | 325 | u64 area_size = PAGE_ALIGN(ramdisk_size); |
324 | u64 ramdisk_here; | ||
325 | unsigned long slop, clen, mapaddr; | 326 | unsigned long slop, clen, mapaddr; |
326 | char *p, *q; | 327 | char *p, *q; |
327 | 328 | ||
328 | /* We need to move the initrd down into directly mapped mem */ | 329 | /* We need to move the initrd down into directly mapped mem */ |
329 | ramdisk_here = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), | 330 | relocated_ramdisk = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), |
330 | area_size, PAGE_SIZE); | 331 | area_size, PAGE_SIZE); |
331 | 332 | ||
332 | if (!ramdisk_here) | 333 | if (!relocated_ramdisk) |
333 | panic("Cannot find place for new RAMDISK of size %lld\n", | 334 | panic("Cannot find place for new RAMDISK of size %lld\n", |
334 | ramdisk_size); | 335 | ramdisk_size); |
335 | 336 | ||
336 | /* Note: this includes all the mem currently occupied by | 337 | /* Note: this includes all the mem currently occupied by |
337 | the initrd, we rely on that fact to keep the data intact. */ | 338 | the initrd, we rely on that fact to keep the data intact. */ |
338 | memblock_reserve(ramdisk_here, area_size); | 339 | memblock_reserve(relocated_ramdisk, area_size); |
339 | initrd_start = ramdisk_here + PAGE_OFFSET; | 340 | initrd_start = relocated_ramdisk + PAGE_OFFSET; |
340 | initrd_end = initrd_start + ramdisk_size; | 341 | initrd_end = initrd_start + ramdisk_size; |
341 | printk(KERN_INFO "Allocated new RAMDISK: [mem %#010llx-%#010llx]\n", | 342 | printk(KERN_INFO "Allocated new RAMDISK: [mem %#010llx-%#010llx]\n", |
342 | ramdisk_here, ramdisk_here + ramdisk_size - 1); | 343 | relocated_ramdisk, relocated_ramdisk + ramdisk_size - 1); |
343 | 344 | ||
344 | q = (char *)initrd_start; | 345 | q = (char *)initrd_start; |
345 | 346 | ||
@@ -363,7 +364,7 @@ static void __init relocate_initrd(void) | |||
363 | printk(KERN_INFO "Move RAMDISK from [mem %#010llx-%#010llx] to" | 364 | printk(KERN_INFO "Move RAMDISK from [mem %#010llx-%#010llx] to" |
364 | " [mem %#010llx-%#010llx]\n", | 365 | " [mem %#010llx-%#010llx]\n", |
365 | ramdisk_image, ramdisk_image + ramdisk_size - 1, | 366 | ramdisk_image, ramdisk_image + ramdisk_size - 1, |
366 | ramdisk_here, ramdisk_here + ramdisk_size - 1); | 367 | relocated_ramdisk, relocated_ramdisk + ramdisk_size - 1); |
367 | } | 368 | } |
368 | 369 | ||
369 | static void __init early_reserve_initrd(void) | 370 | static void __init early_reserve_initrd(void) |
@@ -447,6 +448,9 @@ static void __init parse_setup_data(void) | |||
447 | case SETUP_DTB: | 448 | case SETUP_DTB: |
448 | add_dtb(pa_data); | 449 | add_dtb(pa_data); |
449 | break; | 450 | break; |
451 | case SETUP_EFI: | ||
452 | parse_efi_setup(pa_data, data_len); | ||
453 | break; | ||
450 | default: | 454 | default: |
451 | break; | 455 | break; |
452 | } | 456 | } |
@@ -824,6 +828,20 @@ static void __init trim_low_memory_range(void) | |||
824 | } | 828 | } |
825 | 829 | ||
826 | /* | 830 | /* |
831 | * Dump out kernel offset information on panic. | ||
832 | */ | ||
833 | static int | ||
834 | dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p) | ||
835 | { | ||
836 | pr_emerg("Kernel Offset: 0x%lx from 0x%lx " | ||
837 | "(relocation range: 0x%lx-0x%lx)\n", | ||
838 | (unsigned long)&_text - __START_KERNEL, __START_KERNEL, | ||
839 | __START_KERNEL_map, MODULES_VADDR-1); | ||
840 | |||
841 | return 0; | ||
842 | } | ||
843 | |||
844 | /* | ||
827 | * Determine if we were loaded by an EFI loader. If so, then we have also been | 845 | * Determine if we were loaded by an EFI loader. If so, then we have also been |
828 | * passed the efi memmap, systab, etc., so we should use these data structures | 846 | * passed the efi memmap, systab, etc., so we should use these data structures |
829 | * for initialization. Note, the efi init code path is determined by the | 847 | * for initialization. Note, the efi init code path is determined by the |
@@ -924,8 +942,6 @@ void __init setup_arch(char **cmdline_p) | |||
924 | iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1; | 942 | iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1; |
925 | setup_memory_map(); | 943 | setup_memory_map(); |
926 | parse_setup_data(); | 944 | parse_setup_data(); |
927 | /* update the e820_saved too */ | ||
928 | e820_reserve_setup_data(); | ||
929 | 945 | ||
930 | copy_edd(); | 946 | copy_edd(); |
931 | 947 | ||
@@ -987,6 +1003,8 @@ void __init setup_arch(char **cmdline_p) | |||
987 | early_dump_pci_devices(); | 1003 | early_dump_pci_devices(); |
988 | #endif | 1004 | #endif |
989 | 1005 | ||
1006 | /* update the e820_saved too */ | ||
1007 | e820_reserve_setup_data(); | ||
990 | finish_e820_parsing(); | 1008 | finish_e820_parsing(); |
991 | 1009 | ||
992 | if (efi_enabled(EFI_BOOT)) | 1010 | if (efi_enabled(EFI_BOOT)) |
@@ -1101,7 +1119,7 @@ void __init setup_arch(char **cmdline_p) | |||
1101 | 1119 | ||
1102 | setup_real_mode(); | 1120 | setup_real_mode(); |
1103 | 1121 | ||
1104 | memblock_set_current_limit(get_max_mapped()); | 1122 | memblock_set_current_limit(get_max_low_mapped()); |
1105 | dma_contiguous_reserve(0); | 1123 | dma_contiguous_reserve(0); |
1106 | 1124 | ||
1107 | /* | 1125 | /* |
@@ -1248,3 +1266,15 @@ void __init i386_reserve_resources(void) | |||
1248 | } | 1266 | } |
1249 | 1267 | ||
1250 | #endif /* CONFIG_X86_32 */ | 1268 | #endif /* CONFIG_X86_32 */ |
1269 | |||
1270 | static struct notifier_block kernel_offset_notifier = { | ||
1271 | .notifier_call = dump_kernel_offset | ||
1272 | }; | ||
1273 | |||
1274 | static int __init register_kernel_offset_dumper(void) | ||
1275 | { | ||
1276 | atomic_notifier_chain_register(&panic_notifier_list, | ||
1277 | &kernel_offset_notifier); | ||
1278 | return 0; | ||
1279 | } | ||
1280 | __initcall(register_kernel_offset_dumper); | ||
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 85dc05a3aa02..a32da804252e 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -1312,6 +1312,12 @@ void cpu_disable_common(void) | |||
1312 | 1312 | ||
1313 | int native_cpu_disable(void) | 1313 | int native_cpu_disable(void) |
1314 | { | 1314 | { |
1315 | int ret; | ||
1316 | |||
1317 | ret = check_irq_vectors_for_cpu_disable(); | ||
1318 | if (ret) | ||
1319 | return ret; | ||
1320 | |||
1315 | clear_local_APIC(); | 1321 | clear_local_APIC(); |
1316 | 1322 | ||
1317 | cpu_disable_common(); | 1323 | cpu_disable_common(); |
@@ -1417,7 +1423,9 @@ static inline void mwait_play_dead(void) | |||
1417 | * The WBINVD is insufficient due to the spurious-wakeup | 1423 | * The WBINVD is insufficient due to the spurious-wakeup |
1418 | * case where we return around the loop. | 1424 | * case where we return around the loop. |
1419 | */ | 1425 | */ |
1426 | mb(); | ||
1420 | clflush(mwait_ptr); | 1427 | clflush(mwait_ptr); |
1428 | mb(); | ||
1421 | __monitor(mwait_ptr, 0, 0); | 1429 | __monitor(mwait_ptr, 0, 0); |
1422 | mb(); | 1430 | mb(); |
1423 | __mwait(eax, 0); | 1431 | __mwait(eax, 0); |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index b857ed890b4c..57409f6b8c62 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -211,21 +211,17 @@ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ | |||
211 | exception_exit(prev_state); \ | 211 | exception_exit(prev_state); \ |
212 | } | 212 | } |
213 | 213 | ||
214 | DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV, | 214 | DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip ) |
215 | regs->ip) | 215 | DO_ERROR (X86_TRAP_OF, SIGSEGV, "overflow", overflow ) |
216 | DO_ERROR(X86_TRAP_OF, SIGSEGV, "overflow", overflow) | 216 | DO_ERROR (X86_TRAP_BR, SIGSEGV, "bounds", bounds ) |
217 | DO_ERROR(X86_TRAP_BR, SIGSEGV, "bounds", bounds) | 217 | DO_ERROR_INFO(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip ) |
218 | DO_ERROR_INFO(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, | 218 | DO_ERROR (X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun ) |
219 | regs->ip) | 219 | DO_ERROR (X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS ) |
220 | DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun", | 220 | DO_ERROR (X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present ) |
221 | coprocessor_segment_overrun) | ||
222 | DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS) | ||
223 | DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present) | ||
224 | #ifdef CONFIG_X86_32 | 221 | #ifdef CONFIG_X86_32 |
225 | DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) | 222 | DO_ERROR (X86_TRAP_SS, SIGBUS, "stack segment", stack_segment ) |
226 | #endif | 223 | #endif |
227 | DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check, | 224 | DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0 ) |
228 | BUS_ADRALN, 0) | ||
229 | 225 | ||
230 | #ifdef CONFIG_X86_64 | 226 | #ifdef CONFIG_X86_64 |
231 | /* Runs on IST stack */ | 227 | /* Runs on IST stack */ |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 930e5d48f560..a3acbac2ee72 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/clocksource.h> | 11 | #include <linux/clocksource.h> |
12 | #include <linux/percpu.h> | 12 | #include <linux/percpu.h> |
13 | #include <linux/timex.h> | 13 | #include <linux/timex.h> |
14 | #include <linux/static_key.h> | ||
14 | 15 | ||
15 | #include <asm/hpet.h> | 16 | #include <asm/hpet.h> |
16 | #include <asm/timer.h> | 17 | #include <asm/timer.h> |
@@ -37,13 +38,244 @@ static int __read_mostly tsc_unstable; | |||
37 | erroneous rdtsc usage on !cpu_has_tsc processors */ | 38 | erroneous rdtsc usage on !cpu_has_tsc processors */ |
38 | static int __read_mostly tsc_disabled = -1; | 39 | static int __read_mostly tsc_disabled = -1; |
39 | 40 | ||
41 | static struct static_key __use_tsc = STATIC_KEY_INIT; | ||
42 | |||
40 | int tsc_clocksource_reliable; | 43 | int tsc_clocksource_reliable; |
44 | |||
45 | /* | ||
46 | * Use a ring-buffer like data structure, where a writer advances the head by | ||
47 | * writing a new data entry and a reader advances the tail when it observes a | ||
48 | * new entry. | ||
49 | * | ||
50 | * Writers are made to wait on readers until there's space to write a new | ||
51 | * entry. | ||
52 | * | ||
53 | * This means that we can always use an {offset, mul} pair to compute a ns | ||
54 | * value that is 'roughly' in the right direction, even if we're writing a new | ||
55 | * {offset, mul} pair during the clock read. | ||
56 | * | ||
57 | * The down-side is that we can no longer guarantee strict monotonicity anymore | ||
58 | * (assuming the TSC was that to begin with), because while we compute the | ||
59 | * intersection point of the two clock slopes and make sure the time is | ||
60 | * continuous at the point of switching; we can no longer guarantee a reader is | ||
61 | * strictly before or after the switch point. | ||
62 | * | ||
63 | * It does mean a reader no longer needs to disable IRQs in order to avoid | ||
64 | * CPU-Freq updates messing with his times, and similarly an NMI reader will | ||
65 | * no longer run the risk of hitting half-written state. | ||
66 | */ | ||
67 | |||
68 | struct cyc2ns { | ||
69 | struct cyc2ns_data data[2]; /* 0 + 2*24 = 48 */ | ||
70 | struct cyc2ns_data *head; /* 48 + 8 = 56 */ | ||
71 | struct cyc2ns_data *tail; /* 56 + 8 = 64 */ | ||
72 | }; /* exactly fits one cacheline */ | ||
73 | |||
74 | static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns); | ||
75 | |||
76 | struct cyc2ns_data *cyc2ns_read_begin(void) | ||
77 | { | ||
78 | struct cyc2ns_data *head; | ||
79 | |||
80 | preempt_disable(); | ||
81 | |||
82 | head = this_cpu_read(cyc2ns.head); | ||
83 | /* | ||
84 | * Ensure we observe the entry when we observe the pointer to it. | ||
85 | * matches the wmb from cyc2ns_write_end(). | ||
86 | */ | ||
87 | smp_read_barrier_depends(); | ||
88 | head->__count++; | ||
89 | barrier(); | ||
90 | |||
91 | return head; | ||
92 | } | ||
93 | |||
94 | void cyc2ns_read_end(struct cyc2ns_data *head) | ||
95 | { | ||
96 | barrier(); | ||
97 | /* | ||
98 | * If we're the outer most nested read; update the tail pointer | ||
99 | * when we're done. This notifies possible pending writers | ||
100 | * that we've observed the head pointer and that the other | ||
101 | * entry is now free. | ||
102 | */ | ||
103 | if (!--head->__count) { | ||
104 | /* | ||
105 | * x86-TSO does not reorder writes with older reads; | ||
106 | * therefore once this write becomes visible to another | ||
107 | * cpu, we must be finished reading the cyc2ns_data. | ||
108 | * | ||
109 | * matches with cyc2ns_write_begin(). | ||
110 | */ | ||
111 | this_cpu_write(cyc2ns.tail, head); | ||
112 | } | ||
113 | preempt_enable(); | ||
114 | } | ||
115 | |||
116 | /* | ||
117 | * Begin writing a new @data entry for @cpu. | ||
118 | * | ||
119 | * Assumes some sort of write side lock; currently 'provided' by the assumption | ||
120 | * that cpufreq will call its notifiers sequentially. | ||
121 | */ | ||
122 | static struct cyc2ns_data *cyc2ns_write_begin(int cpu) | ||
123 | { | ||
124 | struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu); | ||
125 | struct cyc2ns_data *data = c2n->data; | ||
126 | |||
127 | if (data == c2n->head) | ||
128 | data++; | ||
129 | |||
130 | /* XXX send an IPI to @cpu in order to guarantee a read? */ | ||
131 | |||
132 | /* | ||
133 | * When we observe the tail write from cyc2ns_read_end(), | ||
134 | * the cpu must be done with that entry and its safe | ||
135 | * to start writing to it. | ||
136 | */ | ||
137 | while (c2n->tail == data) | ||
138 | cpu_relax(); | ||
139 | |||
140 | return data; | ||
141 | } | ||
142 | |||
143 | static void cyc2ns_write_end(int cpu, struct cyc2ns_data *data) | ||
144 | { | ||
145 | struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu); | ||
146 | |||
147 | /* | ||
148 | * Ensure the @data writes are visible before we publish the | ||
149 | * entry. Matches the data-depencency in cyc2ns_read_begin(). | ||
150 | */ | ||
151 | smp_wmb(); | ||
152 | |||
153 | ACCESS_ONCE(c2n->head) = data; | ||
154 | } | ||
155 | |||
156 | /* | ||
157 | * Accelerators for sched_clock() | ||
158 | * convert from cycles(64bits) => nanoseconds (64bits) | ||
159 | * basic equation: | ||
160 | * ns = cycles / (freq / ns_per_sec) | ||
161 | * ns = cycles * (ns_per_sec / freq) | ||
162 | * ns = cycles * (10^9 / (cpu_khz * 10^3)) | ||
163 | * ns = cycles * (10^6 / cpu_khz) | ||
164 | * | ||
165 | * Then we use scaling math (suggested by george@mvista.com) to get: | ||
166 | * ns = cycles * (10^6 * SC / cpu_khz) / SC | ||
167 | * ns = cycles * cyc2ns_scale / SC | ||
168 | * | ||
169 | * And since SC is a constant power of two, we can convert the div | ||
170 | * into a shift. | ||
171 | * | ||
172 | * We can use khz divisor instead of mhz to keep a better precision, since | ||
173 | * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. | ||
174 | * (mathieu.desnoyers@polymtl.ca) | ||
175 | * | ||
176 | * -johnstul@us.ibm.com "math is hard, lets go shopping!" | ||
177 | */ | ||
178 | |||
179 | #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ | ||
180 | |||
181 | static void cyc2ns_data_init(struct cyc2ns_data *data) | ||
182 | { | ||
183 | data->cyc2ns_mul = 1U << CYC2NS_SCALE_FACTOR; | ||
184 | data->cyc2ns_shift = CYC2NS_SCALE_FACTOR; | ||
185 | data->cyc2ns_offset = 0; | ||
186 | data->__count = 0; | ||
187 | } | ||
188 | |||
189 | static void cyc2ns_init(int cpu) | ||
190 | { | ||
191 | struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu); | ||
192 | |||
193 | cyc2ns_data_init(&c2n->data[0]); | ||
194 | cyc2ns_data_init(&c2n->data[1]); | ||
195 | |||
196 | c2n->head = c2n->data; | ||
197 | c2n->tail = c2n->data; | ||
198 | } | ||
199 | |||
200 | static inline unsigned long long cycles_2_ns(unsigned long long cyc) | ||
201 | { | ||
202 | struct cyc2ns_data *data, *tail; | ||
203 | unsigned long long ns; | ||
204 | |||
205 | /* | ||
206 | * See cyc2ns_read_*() for details; replicated in order to avoid | ||
207 | * an extra few instructions that came with the abstraction. | ||
208 | * Notable, it allows us to only do the __count and tail update | ||
209 | * dance when its actually needed. | ||
210 | */ | ||
211 | |||
212 | preempt_disable(); | ||
213 | data = this_cpu_read(cyc2ns.head); | ||
214 | tail = this_cpu_read(cyc2ns.tail); | ||
215 | |||
216 | if (likely(data == tail)) { | ||
217 | ns = data->cyc2ns_offset; | ||
218 | ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR); | ||
219 | } else { | ||
220 | data->__count++; | ||
221 | |||
222 | barrier(); | ||
223 | |||
224 | ns = data->cyc2ns_offset; | ||
225 | ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR); | ||
226 | |||
227 | barrier(); | ||
228 | |||
229 | if (!--data->__count) | ||
230 | this_cpu_write(cyc2ns.tail, data); | ||
231 | } | ||
232 | preempt_enable(); | ||
233 | |||
234 | return ns; | ||
235 | } | ||
236 | |||
237 | /* XXX surely we already have this someplace in the kernel?! */ | ||
238 | #define DIV_ROUND(n, d) (((n) + ((d) / 2)) / (d)) | ||
239 | |||
240 | static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) | ||
241 | { | ||
242 | unsigned long long tsc_now, ns_now; | ||
243 | struct cyc2ns_data *data; | ||
244 | unsigned long flags; | ||
245 | |||
246 | local_irq_save(flags); | ||
247 | sched_clock_idle_sleep_event(); | ||
248 | |||
249 | if (!cpu_khz) | ||
250 | goto done; | ||
251 | |||
252 | data = cyc2ns_write_begin(cpu); | ||
253 | |||
254 | rdtscll(tsc_now); | ||
255 | ns_now = cycles_2_ns(tsc_now); | ||
256 | |||
257 | /* | ||
258 | * Compute a new multiplier as per the above comment and ensure our | ||
259 | * time function is continuous; see the comment near struct | ||
260 | * cyc2ns_data. | ||
261 | */ | ||
262 | data->cyc2ns_mul = DIV_ROUND(NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR, cpu_khz); | ||
263 | data->cyc2ns_shift = CYC2NS_SCALE_FACTOR; | ||
264 | data->cyc2ns_offset = ns_now - | ||
265 | mul_u64_u32_shr(tsc_now, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR); | ||
266 | |||
267 | cyc2ns_write_end(cpu, data); | ||
268 | |||
269 | done: | ||
270 | sched_clock_idle_wakeup_event(0); | ||
271 | local_irq_restore(flags); | ||
272 | } | ||
41 | /* | 273 | /* |
42 | * Scheduler clock - returns current time in nanosec units. | 274 | * Scheduler clock - returns current time in nanosec units. |
43 | */ | 275 | */ |
44 | u64 native_sched_clock(void) | 276 | u64 native_sched_clock(void) |
45 | { | 277 | { |
46 | u64 this_offset; | 278 | u64 tsc_now; |
47 | 279 | ||
48 | /* | 280 | /* |
49 | * Fall back to jiffies if there's no TSC available: | 281 | * Fall back to jiffies if there's no TSC available: |
@@ -53,16 +285,16 @@ u64 native_sched_clock(void) | |||
53 | * very important for it to be as fast as the platform | 285 | * very important for it to be as fast as the platform |
54 | * can achieve it. ) | 286 | * can achieve it. ) |
55 | */ | 287 | */ |
56 | if (unlikely(tsc_disabled)) { | 288 | if (!static_key_false(&__use_tsc)) { |
57 | /* No locking but a rare wrong value is not a big deal: */ | 289 | /* No locking but a rare wrong value is not a big deal: */ |
58 | return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); | 290 | return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); |
59 | } | 291 | } |
60 | 292 | ||
61 | /* read the Time Stamp Counter: */ | 293 | /* read the Time Stamp Counter: */ |
62 | rdtscll(this_offset); | 294 | rdtscll(tsc_now); |
63 | 295 | ||
64 | /* return the value in ns */ | 296 | /* return the value in ns */ |
65 | return __cycles_2_ns(this_offset); | 297 | return cycles_2_ns(tsc_now); |
66 | } | 298 | } |
67 | 299 | ||
68 | /* We need to define a real function for sched_clock, to override the | 300 | /* We need to define a real function for sched_clock, to override the |
@@ -419,6 +651,16 @@ unsigned long native_calibrate_tsc(void) | |||
419 | unsigned long flags, latch, ms, fast_calibrate; | 651 | unsigned long flags, latch, ms, fast_calibrate; |
420 | int hpet = is_hpet_enabled(), i, loopmin; | 652 | int hpet = is_hpet_enabled(), i, loopmin; |
421 | 653 | ||
654 | /* Calibrate TSC using MSR for Intel Atom SoCs */ | ||
655 | local_irq_save(flags); | ||
656 | i = try_msr_calibrate_tsc(&fast_calibrate); | ||
657 | local_irq_restore(flags); | ||
658 | if (i >= 0) { | ||
659 | if (i == 0) | ||
660 | pr_warn("Fast TSC calibration using MSR failed\n"); | ||
661 | return fast_calibrate; | ||
662 | } | ||
663 | |||
422 | local_irq_save(flags); | 664 | local_irq_save(flags); |
423 | fast_calibrate = quick_pit_calibrate(); | 665 | fast_calibrate = quick_pit_calibrate(); |
424 | local_irq_restore(flags); | 666 | local_irq_restore(flags); |
@@ -589,61 +831,11 @@ int recalibrate_cpu_khz(void) | |||
589 | EXPORT_SYMBOL(recalibrate_cpu_khz); | 831 | EXPORT_SYMBOL(recalibrate_cpu_khz); |
590 | 832 | ||
591 | 833 | ||
592 | /* Accelerators for sched_clock() | ||
593 | * convert from cycles(64bits) => nanoseconds (64bits) | ||
594 | * basic equation: | ||
595 | * ns = cycles / (freq / ns_per_sec) | ||
596 | * ns = cycles * (ns_per_sec / freq) | ||
597 | * ns = cycles * (10^9 / (cpu_khz * 10^3)) | ||
598 | * ns = cycles * (10^6 / cpu_khz) | ||
599 | * | ||
600 | * Then we use scaling math (suggested by george@mvista.com) to get: | ||
601 | * ns = cycles * (10^6 * SC / cpu_khz) / SC | ||
602 | * ns = cycles * cyc2ns_scale / SC | ||
603 | * | ||
604 | * And since SC is a constant power of two, we can convert the div | ||
605 | * into a shift. | ||
606 | * | ||
607 | * We can use khz divisor instead of mhz to keep a better precision, since | ||
608 | * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. | ||
609 | * (mathieu.desnoyers@polymtl.ca) | ||
610 | * | ||
611 | * -johnstul@us.ibm.com "math is hard, lets go shopping!" | ||
612 | */ | ||
613 | |||
614 | DEFINE_PER_CPU(unsigned long, cyc2ns); | ||
615 | DEFINE_PER_CPU(unsigned long long, cyc2ns_offset); | ||
616 | |||
617 | static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) | ||
618 | { | ||
619 | unsigned long long tsc_now, ns_now, *offset; | ||
620 | unsigned long flags, *scale; | ||
621 | |||
622 | local_irq_save(flags); | ||
623 | sched_clock_idle_sleep_event(); | ||
624 | |||
625 | scale = &per_cpu(cyc2ns, cpu); | ||
626 | offset = &per_cpu(cyc2ns_offset, cpu); | ||
627 | |||
628 | rdtscll(tsc_now); | ||
629 | ns_now = __cycles_2_ns(tsc_now); | ||
630 | |||
631 | if (cpu_khz) { | ||
632 | *scale = ((NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR) + | ||
633 | cpu_khz / 2) / cpu_khz; | ||
634 | *offset = ns_now - mult_frac(tsc_now, *scale, | ||
635 | (1UL << CYC2NS_SCALE_FACTOR)); | ||
636 | } | ||
637 | |||
638 | sched_clock_idle_wakeup_event(0); | ||
639 | local_irq_restore(flags); | ||
640 | } | ||
641 | |||
642 | static unsigned long long cyc2ns_suspend; | 834 | static unsigned long long cyc2ns_suspend; |
643 | 835 | ||
644 | void tsc_save_sched_clock_state(void) | 836 | void tsc_save_sched_clock_state(void) |
645 | { | 837 | { |
646 | if (!sched_clock_stable) | 838 | if (!sched_clock_stable()) |
647 | return; | 839 | return; |
648 | 840 | ||
649 | cyc2ns_suspend = sched_clock(); | 841 | cyc2ns_suspend = sched_clock(); |
@@ -663,16 +855,26 @@ void tsc_restore_sched_clock_state(void) | |||
663 | unsigned long flags; | 855 | unsigned long flags; |
664 | int cpu; | 856 | int cpu; |
665 | 857 | ||
666 | if (!sched_clock_stable) | 858 | if (!sched_clock_stable()) |
667 | return; | 859 | return; |
668 | 860 | ||
669 | local_irq_save(flags); | 861 | local_irq_save(flags); |
670 | 862 | ||
671 | __this_cpu_write(cyc2ns_offset, 0); | 863 | /* |
864 | * We're comming out of suspend, there's no concurrency yet; don't | ||
865 | * bother being nice about the RCU stuff, just write to both | ||
866 | * data fields. | ||
867 | */ | ||
868 | |||
869 | this_cpu_write(cyc2ns.data[0].cyc2ns_offset, 0); | ||
870 | this_cpu_write(cyc2ns.data[1].cyc2ns_offset, 0); | ||
871 | |||
672 | offset = cyc2ns_suspend - sched_clock(); | 872 | offset = cyc2ns_suspend - sched_clock(); |
673 | 873 | ||
674 | for_each_possible_cpu(cpu) | 874 | for_each_possible_cpu(cpu) { |
675 | per_cpu(cyc2ns_offset, cpu) = offset; | 875 | per_cpu(cyc2ns.data[0].cyc2ns_offset, cpu) = offset; |
876 | per_cpu(cyc2ns.data[1].cyc2ns_offset, cpu) = offset; | ||
877 | } | ||
676 | 878 | ||
677 | local_irq_restore(flags); | 879 | local_irq_restore(flags); |
678 | } | 880 | } |
@@ -795,7 +997,7 @@ void mark_tsc_unstable(char *reason) | |||
795 | { | 997 | { |
796 | if (!tsc_unstable) { | 998 | if (!tsc_unstable) { |
797 | tsc_unstable = 1; | 999 | tsc_unstable = 1; |
798 | sched_clock_stable = 0; | 1000 | clear_sched_clock_stable(); |
799 | disable_sched_clock_irqtime(); | 1001 | disable_sched_clock_irqtime(); |
800 | pr_info("Marking TSC unstable due to %s\n", reason); | 1002 | pr_info("Marking TSC unstable due to %s\n", reason); |
801 | /* Change only the rating, when not registered */ | 1003 | /* Change only the rating, when not registered */ |
@@ -995,14 +1197,18 @@ void __init tsc_init(void) | |||
995 | * speed as the bootup CPU. (cpufreq notifiers will fix this | 1197 | * speed as the bootup CPU. (cpufreq notifiers will fix this |
996 | * up if their speed diverges) | 1198 | * up if their speed diverges) |
997 | */ | 1199 | */ |
998 | for_each_possible_cpu(cpu) | 1200 | for_each_possible_cpu(cpu) { |
1201 | cyc2ns_init(cpu); | ||
999 | set_cyc2ns_scale(cpu_khz, cpu); | 1202 | set_cyc2ns_scale(cpu_khz, cpu); |
1203 | } | ||
1000 | 1204 | ||
1001 | if (tsc_disabled > 0) | 1205 | if (tsc_disabled > 0) |
1002 | return; | 1206 | return; |
1003 | 1207 | ||
1004 | /* now allow native_sched_clock() to use rdtsc */ | 1208 | /* now allow native_sched_clock() to use rdtsc */ |
1209 | |||
1005 | tsc_disabled = 0; | 1210 | tsc_disabled = 0; |
1211 | static_key_slow_inc(&__use_tsc); | ||
1006 | 1212 | ||
1007 | if (!no_sched_irq_time) | 1213 | if (!no_sched_irq_time) |
1008 | enable_sched_clock_irqtime(); | 1214 | enable_sched_clock_irqtime(); |
diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c new file mode 100644 index 000000000000..8b5434f4389f --- /dev/null +++ b/arch/x86/kernel/tsc_msr.c | |||
@@ -0,0 +1,127 @@ | |||
1 | /* | ||
2 | * tsc_msr.c - MSR based TSC calibration on Intel Atom SoC platforms. | ||
3 | * | ||
4 | * TSC in Intel Atom SoC runs at a constant rate which can be figured | ||
5 | * by this formula: | ||
6 | * <maximum core-clock to bus-clock ratio> * <maximum resolved frequency> | ||
7 | * See Intel 64 and IA-32 System Programming Guid section 16.12 and 30.11.5 | ||
8 | * for details. | ||
9 | * Especially some Intel Atom SoCs don't have PIT(i8254) or HPET, so MSR | ||
10 | * based calibration is the only option. | ||
11 | * | ||
12 | * | ||
13 | * Copyright (C) 2013 Intel Corporation | ||
14 | * Author: Bin Gao <bin.gao@intel.com> | ||
15 | * | ||
16 | * This file is released under the GPLv2. | ||
17 | */ | ||
18 | |||
19 | #include <linux/kernel.h> | ||
20 | #include <asm/processor.h> | ||
21 | #include <asm/setup.h> | ||
22 | #include <asm/apic.h> | ||
23 | #include <asm/param.h> | ||
24 | |||
25 | /* CPU reference clock frequency: in KHz */ | ||
26 | #define FREQ_83 83200 | ||
27 | #define FREQ_100 99840 | ||
28 | #define FREQ_133 133200 | ||
29 | #define FREQ_166 166400 | ||
30 | |||
31 | #define MAX_NUM_FREQS 8 | ||
32 | |||
33 | /* | ||
34 | * According to Intel 64 and IA-32 System Programming Guide, | ||
35 | * if MSR_PERF_STAT[31] is set, the maximum resolved bus ratio can be | ||
36 | * read in MSR_PLATFORM_ID[12:8], otherwise in MSR_PERF_STAT[44:40]. | ||
37 | * Unfortunately some Intel Atom SoCs aren't quite compliant to this, | ||
38 | * so we need manually differentiate SoC families. This is what the | ||
39 | * field msr_plat does. | ||
40 | */ | ||
41 | struct freq_desc { | ||
42 | u8 x86_family; /* CPU family */ | ||
43 | u8 x86_model; /* model */ | ||
44 | u8 msr_plat; /* 1: use MSR_PLATFORM_INFO, 0: MSR_IA32_PERF_STATUS */ | ||
45 | u32 freqs[MAX_NUM_FREQS]; | ||
46 | }; | ||
47 | |||
48 | static struct freq_desc freq_desc_tables[] = { | ||
49 | /* PNW */ | ||
50 | { 6, 0x27, 0, { 0, 0, 0, 0, 0, FREQ_100, 0, FREQ_83 } }, | ||
51 | /* CLV+ */ | ||
52 | { 6, 0x35, 0, { 0, FREQ_133, 0, 0, 0, FREQ_100, 0, FREQ_83 } }, | ||
53 | /* TNG */ | ||
54 | { 6, 0x4a, 1, { 0, FREQ_100, FREQ_133, 0, 0, 0, 0, 0 } }, | ||
55 | /* VLV2 */ | ||
56 | { 6, 0x37, 1, { 0, FREQ_100, FREQ_133, FREQ_166, 0, 0, 0, 0 } }, | ||
57 | /* ANN */ | ||
58 | { 6, 0x5a, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_100, 0, 0, 0, 0 } }, | ||
59 | }; | ||
60 | |||
61 | static int match_cpu(u8 family, u8 model) | ||
62 | { | ||
63 | int i; | ||
64 | |||
65 | for (i = 0; i < ARRAY_SIZE(freq_desc_tables); i++) { | ||
66 | if ((family == freq_desc_tables[i].x86_family) && | ||
67 | (model == freq_desc_tables[i].x86_model)) | ||
68 | return i; | ||
69 | } | ||
70 | |||
71 | return -1; | ||
72 | } | ||
73 | |||
74 | /* Map CPU reference clock freq ID(0-7) to CPU reference clock freq(KHz) */ | ||
75 | #define id_to_freq(cpu_index, freq_id) \ | ||
76 | (freq_desc_tables[cpu_index].freqs[freq_id]) | ||
77 | |||
78 | /* | ||
79 | * Do MSR calibration only for known/supported CPUs. | ||
80 | * Return values: | ||
81 | * -1: CPU is unknown/unsupported for MSR based calibration | ||
82 | * 0: CPU is known/supported, but calibration failed | ||
83 | * 1: CPU is known/supported, and calibration succeeded | ||
84 | */ | ||
85 | int try_msr_calibrate_tsc(unsigned long *fast_calibrate) | ||
86 | { | ||
87 | int cpu_index; | ||
88 | u32 lo, hi, ratio, freq_id, freq; | ||
89 | |||
90 | cpu_index = match_cpu(boot_cpu_data.x86, boot_cpu_data.x86_model); | ||
91 | if (cpu_index < 0) | ||
92 | return -1; | ||
93 | |||
94 | *fast_calibrate = 0; | ||
95 | |||
96 | if (freq_desc_tables[cpu_index].msr_plat) { | ||
97 | rdmsr(MSR_PLATFORM_INFO, lo, hi); | ||
98 | ratio = (lo >> 8) & 0x1f; | ||
99 | } else { | ||
100 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | ||
101 | ratio = (hi >> 8) & 0x1f; | ||
102 | } | ||
103 | pr_info("Maximum core-clock to bus-clock ratio: 0x%x\n", ratio); | ||
104 | |||
105 | if (!ratio) | ||
106 | return 0; | ||
107 | |||
108 | /* Get FSB FREQ ID */ | ||
109 | rdmsr(MSR_FSB_FREQ, lo, hi); | ||
110 | freq_id = lo & 0x7; | ||
111 | freq = id_to_freq(cpu_index, freq_id); | ||
112 | pr_info("Resolved frequency ID: %u, frequency: %u KHz\n", | ||
113 | freq_id, freq); | ||
114 | if (!freq) | ||
115 | return 0; | ||
116 | |||
117 | /* TSC frequency = maximum resolved freq * maximum resolved bus ratio */ | ||
118 | *fast_calibrate = freq * ratio; | ||
119 | pr_info("TSC runs at %lu KHz\n", *fast_calibrate); | ||
120 | |||
121 | #ifdef CONFIG_X86_LOCAL_APIC | ||
122 | lapic_timer_frequency = (freq * 1000) / HZ; | ||
123 | pr_info("lapic_timer_frequency = %d\n", lapic_timer_frequency); | ||
124 | #endif | ||
125 | |||
126 | return 1; | ||
127 | } | ||
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index adfdf56a3714..26488487bc61 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c | |||
@@ -16,7 +16,6 @@ | |||
16 | */ | 16 | */ |
17 | #include <linux/spinlock.h> | 17 | #include <linux/spinlock.h> |
18 | #include <linux/kernel.h> | 18 | #include <linux/kernel.h> |
19 | #include <linux/init.h> | ||
20 | #include <linux/smp.h> | 19 | #include <linux/smp.h> |
21 | #include <linux/nmi.h> | 20 | #include <linux/nmi.h> |
22 | #include <asm/tsc.h> | 21 | #include <asm/tsc.h> |
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 021783b1f46a..e48b674639cc 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c | |||
@@ -136,9 +136,9 @@ void arch_teardown_msi_irq(unsigned int irq) | |||
136 | x86_msi.teardown_msi_irq(irq); | 136 | x86_msi.teardown_msi_irq(irq); |
137 | } | 137 | } |
138 | 138 | ||
139 | void arch_restore_msi_irqs(struct pci_dev *dev, int irq) | 139 | void arch_restore_msi_irqs(struct pci_dev *dev) |
140 | { | 140 | { |
141 | x86_msi.restore_msi_irqs(dev, irq); | 141 | x86_msi.restore_msi_irqs(dev); |
142 | } | 142 | } |
143 | u32 arch_msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag) | 143 | u32 arch_msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag) |
144 | { | 144 | { |
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 422fd8223470..a4b451c6addf 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c | |||
@@ -562,6 +562,16 @@ static void __init xstate_enable_boot_cpu(void) | |||
562 | if (cpu_has_xsaveopt && eagerfpu != DISABLE) | 562 | if (cpu_has_xsaveopt && eagerfpu != DISABLE) |
563 | eagerfpu = ENABLE; | 563 | eagerfpu = ENABLE; |
564 | 564 | ||
565 | if (pcntxt_mask & XSTATE_EAGER) { | ||
566 | if (eagerfpu == DISABLE) { | ||
567 | pr_err("eagerfpu not present, disabling some xstate features: 0x%llx\n", | ||
568 | pcntxt_mask & XSTATE_EAGER); | ||
569 | pcntxt_mask &= ~XSTATE_EAGER; | ||
570 | } else { | ||
571 | eagerfpu = ENABLE; | ||
572 | } | ||
573 | } | ||
574 | |||
565 | pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", | 575 | pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", |
566 | pcntxt_mask, xstate_size); | 576 | pcntxt_mask, xstate_size); |
567 | } | 577 | } |
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index b89c5db2b832..287e4c85fff9 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
@@ -80,7 +80,7 @@ config KVM_MMU_AUDIT | |||
80 | depends on KVM && TRACEPOINTS | 80 | depends on KVM && TRACEPOINTS |
81 | ---help--- | 81 | ---help--- |
82 | This option adds a R/W kVM module parameter 'mmu_audit', which allows | 82 | This option adds a R/W kVM module parameter 'mmu_audit', which allows |
83 | audit KVM MMU at runtime. | 83 | auditing of KVM MMU events at runtime. |
84 | 84 | ||
85 | config KVM_DEVICE_ASSIGNMENT | 85 | config KVM_DEVICE_ASSIGNMENT |
86 | bool "KVM legacy PCI device assignment support" | 86 | bool "KVM legacy PCI device assignment support" |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 412a5aa0ef94..518d86471b76 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
@@ -37,6 +37,7 @@ | |||
37 | 37 | ||
38 | #include "irq.h" | 38 | #include "irq.h" |
39 | #include "i8254.h" | 39 | #include "i8254.h" |
40 | #include "x86.h" | ||
40 | 41 | ||
41 | #ifndef CONFIG_X86_64 | 42 | #ifndef CONFIG_X86_64 |
42 | #define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) | 43 | #define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) |
@@ -349,6 +350,23 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period) | |||
349 | atomic_set(&ps->pending, 0); | 350 | atomic_set(&ps->pending, 0); |
350 | ps->irq_ack = 1; | 351 | ps->irq_ack = 1; |
351 | 352 | ||
353 | /* | ||
354 | * Do not allow the guest to program periodic timers with small | ||
355 | * interval, since the hrtimers are not throttled by the host | ||
356 | * scheduler. | ||
357 | */ | ||
358 | if (ps->is_periodic) { | ||
359 | s64 min_period = min_timer_period_us * 1000LL; | ||
360 | |||
361 | if (ps->period < min_period) { | ||
362 | pr_info_ratelimited( | ||
363 | "kvm: requested %lld ns " | ||
364 | "i8254 timer period limited to %lld ns\n", | ||
365 | ps->period, min_period); | ||
366 | ps->period = min_period; | ||
367 | } | ||
368 | } | ||
369 | |||
352 | hrtimer_start(&ps->timer, ktime_add_ns(ktime_get(), interval), | 370 | hrtimer_start(&ps->timer, ktime_add_ns(ktime_get(), interval), |
353 | HRTIMER_MODE_ABS); | 371 | HRTIMER_MODE_ABS); |
354 | } | 372 | } |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 1673940cf9c3..9736529ade08 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -71,9 +71,6 @@ | |||
71 | #define VEC_POS(v) ((v) & (32 - 1)) | 71 | #define VEC_POS(v) ((v) & (32 - 1)) |
72 | #define REG_POS(v) (((v) >> 5) << 4) | 72 | #define REG_POS(v) (((v) >> 5) << 4) |
73 | 73 | ||
74 | static unsigned int min_timer_period_us = 500; | ||
75 | module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); | ||
76 | |||
77 | static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val) | 74 | static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val) |
78 | { | 75 | { |
79 | *((u32 *) (apic->regs + reg_off)) = val; | 76 | *((u32 *) (apic->regs + reg_off)) = val; |
@@ -435,7 +432,7 @@ static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu) | |||
435 | u8 val; | 432 | u8 val; |
436 | if (pv_eoi_get_user(vcpu, &val) < 0) | 433 | if (pv_eoi_get_user(vcpu, &val) < 0) |
437 | apic_debug("Can't read EOI MSR value: 0x%llx\n", | 434 | apic_debug("Can't read EOI MSR value: 0x%llx\n", |
438 | (unsigned long long)vcpi->arch.pv_eoi.msr_val); | 435 | (unsigned long long)vcpu->arch.pv_eoi.msr_val); |
439 | return val & 0x1; | 436 | return val & 0x1; |
440 | } | 437 | } |
441 | 438 | ||
@@ -443,7 +440,7 @@ static void pv_eoi_set_pending(struct kvm_vcpu *vcpu) | |||
443 | { | 440 | { |
444 | if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) { | 441 | if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) { |
445 | apic_debug("Can't set EOI MSR value: 0x%llx\n", | 442 | apic_debug("Can't set EOI MSR value: 0x%llx\n", |
446 | (unsigned long long)vcpi->arch.pv_eoi.msr_val); | 443 | (unsigned long long)vcpu->arch.pv_eoi.msr_val); |
447 | return; | 444 | return; |
448 | } | 445 | } |
449 | __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); | 446 | __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); |
@@ -453,7 +450,7 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) | |||
453 | { | 450 | { |
454 | if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) { | 451 | if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) { |
455 | apic_debug("Can't clear EOI MSR value: 0x%llx\n", | 452 | apic_debug("Can't clear EOI MSR value: 0x%llx\n", |
456 | (unsigned long long)vcpi->arch.pv_eoi.msr_val); | 453 | (unsigned long long)vcpu->arch.pv_eoi.msr_val); |
457 | return; | 454 | return; |
458 | } | 455 | } |
459 | __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); | 456 | __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); |
@@ -1355,7 +1352,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) | |||
1355 | vcpu->arch.apic_base = value; | 1352 | vcpu->arch.apic_base = value; |
1356 | 1353 | ||
1357 | /* update jump label if enable bit changes */ | 1354 | /* update jump label if enable bit changes */ |
1358 | if ((vcpu->arch.apic_base ^ value) & MSR_IA32_APICBASE_ENABLE) { | 1355 | if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) { |
1359 | if (value & MSR_IA32_APICBASE_ENABLE) | 1356 | if (value & MSR_IA32_APICBASE_ENABLE) |
1360 | static_key_slow_dec_deferred(&apic_hw_disabled); | 1357 | static_key_slow_dec_deferred(&apic_hw_disabled); |
1361 | else | 1358 | else |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 40772ef0f2b1..e50425d0f5f7 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -2659,6 +2659,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
2659 | int emulate = 0; | 2659 | int emulate = 0; |
2660 | gfn_t pseudo_gfn; | 2660 | gfn_t pseudo_gfn; |
2661 | 2661 | ||
2662 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | ||
2663 | return 0; | ||
2664 | |||
2662 | for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { | 2665 | for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { |
2663 | if (iterator.level == level) { | 2666 | if (iterator.level == level) { |
2664 | mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, | 2667 | mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, |
@@ -2829,6 +2832,9 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, | |||
2829 | bool ret = false; | 2832 | bool ret = false; |
2830 | u64 spte = 0ull; | 2833 | u64 spte = 0ull; |
2831 | 2834 | ||
2835 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | ||
2836 | return false; | ||
2837 | |||
2832 | if (!page_fault_can_be_fast(error_code)) | 2838 | if (!page_fault_can_be_fast(error_code)) |
2833 | return false; | 2839 | return false; |
2834 | 2840 | ||
@@ -3224,6 +3230,9 @@ static u64 walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr) | |||
3224 | struct kvm_shadow_walk_iterator iterator; | 3230 | struct kvm_shadow_walk_iterator iterator; |
3225 | u64 spte = 0ull; | 3231 | u64 spte = 0ull; |
3226 | 3232 | ||
3233 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | ||
3234 | return spte; | ||
3235 | |||
3227 | walk_shadow_page_lockless_begin(vcpu); | 3236 | walk_shadow_page_lockless_begin(vcpu); |
3228 | for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) | 3237 | for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) |
3229 | if (!is_shadow_present_pte(spte)) | 3238 | if (!is_shadow_present_pte(spte)) |
@@ -4510,6 +4519,9 @@ int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]) | |||
4510 | u64 spte; | 4519 | u64 spte; |
4511 | int nr_sptes = 0; | 4520 | int nr_sptes = 0; |
4512 | 4521 | ||
4522 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | ||
4523 | return nr_sptes; | ||
4524 | |||
4513 | walk_shadow_page_lockless_begin(vcpu); | 4525 | walk_shadow_page_lockless_begin(vcpu); |
4514 | for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) { | 4526 | for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) { |
4515 | sptes[iterator.level-1] = spte; | 4527 | sptes[iterator.level-1] = spte; |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index ad75d77999d0..cba218a2f08d 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -569,6 +569,9 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
569 | if (FNAME(gpte_changed)(vcpu, gw, top_level)) | 569 | if (FNAME(gpte_changed)(vcpu, gw, top_level)) |
570 | goto out_gpte_changed; | 570 | goto out_gpte_changed; |
571 | 571 | ||
572 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | ||
573 | goto out_gpte_changed; | ||
574 | |||
572 | for (shadow_walk_init(&it, vcpu, addr); | 575 | for (shadow_walk_init(&it, vcpu, addr); |
573 | shadow_walk_okay(&it) && it.level > gw->level; | 576 | shadow_walk_okay(&it) && it.level > gw->level; |
574 | shadow_walk_next(&it)) { | 577 | shadow_walk_next(&it)) { |
@@ -820,6 +823,11 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
820 | */ | 823 | */ |
821 | mmu_topup_memory_caches(vcpu); | 824 | mmu_topup_memory_caches(vcpu); |
822 | 825 | ||
826 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) { | ||
827 | WARN_ON(1); | ||
828 | return; | ||
829 | } | ||
830 | |||
823 | spin_lock(&vcpu->kvm->mmu_lock); | 831 | spin_lock(&vcpu->kvm->mmu_lock); |
824 | for_each_shadow_entry(vcpu, gva, iterator) { | 832 | for_each_shadow_entry(vcpu, gva, iterator) { |
825 | level = iterator.level; | 833 | level = iterator.level; |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index c7168a5cff1b..e81df8fce027 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -1671,6 +1671,19 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) | |||
1671 | mark_dirty(svm->vmcb, VMCB_ASID); | 1671 | mark_dirty(svm->vmcb, VMCB_ASID); |
1672 | } | 1672 | } |
1673 | 1673 | ||
1674 | static u64 svm_get_dr6(struct kvm_vcpu *vcpu) | ||
1675 | { | ||
1676 | return to_svm(vcpu)->vmcb->save.dr6; | ||
1677 | } | ||
1678 | |||
1679 | static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value) | ||
1680 | { | ||
1681 | struct vcpu_svm *svm = to_svm(vcpu); | ||
1682 | |||
1683 | svm->vmcb->save.dr6 = value; | ||
1684 | mark_dirty(svm->vmcb, VMCB_DR); | ||
1685 | } | ||
1686 | |||
1674 | static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) | 1687 | static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) |
1675 | { | 1688 | { |
1676 | struct vcpu_svm *svm = to_svm(vcpu); | 1689 | struct vcpu_svm *svm = to_svm(vcpu); |
@@ -4286,6 +4299,8 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
4286 | .set_idt = svm_set_idt, | 4299 | .set_idt = svm_set_idt, |
4287 | .get_gdt = svm_get_gdt, | 4300 | .get_gdt = svm_get_gdt, |
4288 | .set_gdt = svm_set_gdt, | 4301 | .set_gdt = svm_set_gdt, |
4302 | .get_dr6 = svm_get_dr6, | ||
4303 | .set_dr6 = svm_set_dr6, | ||
4289 | .set_dr7 = svm_set_dr7, | 4304 | .set_dr7 = svm_set_dr7, |
4290 | .cache_reg = svm_cache_reg, | 4305 | .cache_reg = svm_cache_reg, |
4291 | .get_rflags = svm_get_rflags, | 4306 | .get_rflags = svm_get_rflags, |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index da7837e1349d..5c8879127cfa 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -418,6 +418,8 @@ struct vcpu_vmx { | |||
418 | u64 msr_host_kernel_gs_base; | 418 | u64 msr_host_kernel_gs_base; |
419 | u64 msr_guest_kernel_gs_base; | 419 | u64 msr_guest_kernel_gs_base; |
420 | #endif | 420 | #endif |
421 | u32 vm_entry_controls_shadow; | ||
422 | u32 vm_exit_controls_shadow; | ||
421 | /* | 423 | /* |
422 | * loaded_vmcs points to the VMCS currently used in this vcpu. For a | 424 | * loaded_vmcs points to the VMCS currently used in this vcpu. For a |
423 | * non-nested (L1) guest, it always points to vmcs01. For a nested | 425 | * non-nested (L1) guest, it always points to vmcs01. For a nested |
@@ -1056,7 +1058,9 @@ static inline bool is_exception(u32 intr_info) | |||
1056 | == (INTR_TYPE_HARD_EXCEPTION | INTR_INFO_VALID_MASK); | 1058 | == (INTR_TYPE_HARD_EXCEPTION | INTR_INFO_VALID_MASK); |
1057 | } | 1059 | } |
1058 | 1060 | ||
1059 | static void nested_vmx_vmexit(struct kvm_vcpu *vcpu); | 1061 | static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, |
1062 | u32 exit_intr_info, | ||
1063 | unsigned long exit_qualification); | ||
1060 | static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu, | 1064 | static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu, |
1061 | struct vmcs12 *vmcs12, | 1065 | struct vmcs12 *vmcs12, |
1062 | u32 reason, unsigned long qualification); | 1066 | u32 reason, unsigned long qualification); |
@@ -1326,6 +1330,62 @@ static void vmcs_set_bits(unsigned long field, u32 mask) | |||
1326 | vmcs_writel(field, vmcs_readl(field) | mask); | 1330 | vmcs_writel(field, vmcs_readl(field) | mask); |
1327 | } | 1331 | } |
1328 | 1332 | ||
1333 | static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val) | ||
1334 | { | ||
1335 | vmcs_write32(VM_ENTRY_CONTROLS, val); | ||
1336 | vmx->vm_entry_controls_shadow = val; | ||
1337 | } | ||
1338 | |||
1339 | static inline void vm_entry_controls_set(struct vcpu_vmx *vmx, u32 val) | ||
1340 | { | ||
1341 | if (vmx->vm_entry_controls_shadow != val) | ||
1342 | vm_entry_controls_init(vmx, val); | ||
1343 | } | ||
1344 | |||
1345 | static inline u32 vm_entry_controls_get(struct vcpu_vmx *vmx) | ||
1346 | { | ||
1347 | return vmx->vm_entry_controls_shadow; | ||
1348 | } | ||
1349 | |||
1350 | |||
1351 | static inline void vm_entry_controls_setbit(struct vcpu_vmx *vmx, u32 val) | ||
1352 | { | ||
1353 | vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) | val); | ||
1354 | } | ||
1355 | |||
1356 | static inline void vm_entry_controls_clearbit(struct vcpu_vmx *vmx, u32 val) | ||
1357 | { | ||
1358 | vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) & ~val); | ||
1359 | } | ||
1360 | |||
1361 | static inline void vm_exit_controls_init(struct vcpu_vmx *vmx, u32 val) | ||
1362 | { | ||
1363 | vmcs_write32(VM_EXIT_CONTROLS, val); | ||
1364 | vmx->vm_exit_controls_shadow = val; | ||
1365 | } | ||
1366 | |||
1367 | static inline void vm_exit_controls_set(struct vcpu_vmx *vmx, u32 val) | ||
1368 | { | ||
1369 | if (vmx->vm_exit_controls_shadow != val) | ||
1370 | vm_exit_controls_init(vmx, val); | ||
1371 | } | ||
1372 | |||
1373 | static inline u32 vm_exit_controls_get(struct vcpu_vmx *vmx) | ||
1374 | { | ||
1375 | return vmx->vm_exit_controls_shadow; | ||
1376 | } | ||
1377 | |||
1378 | |||
1379 | static inline void vm_exit_controls_setbit(struct vcpu_vmx *vmx, u32 val) | ||
1380 | { | ||
1381 | vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) | val); | ||
1382 | } | ||
1383 | |||
1384 | static inline void vm_exit_controls_clearbit(struct vcpu_vmx *vmx, u32 val) | ||
1385 | { | ||
1386 | vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) & ~val); | ||
1387 | } | ||
1388 | |||
1329 | static void vmx_segment_cache_clear(struct vcpu_vmx *vmx) | 1389 | static void vmx_segment_cache_clear(struct vcpu_vmx *vmx) |
1330 | { | 1390 | { |
1331 | vmx->segment_cache.bitmask = 0; | 1391 | vmx->segment_cache.bitmask = 0; |
@@ -1410,11 +1470,11 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) | |||
1410 | vmcs_write32(EXCEPTION_BITMAP, eb); | 1470 | vmcs_write32(EXCEPTION_BITMAP, eb); |
1411 | } | 1471 | } |
1412 | 1472 | ||
1413 | static void clear_atomic_switch_msr_special(unsigned long entry, | 1473 | static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, |
1414 | unsigned long exit) | 1474 | unsigned long entry, unsigned long exit) |
1415 | { | 1475 | { |
1416 | vmcs_clear_bits(VM_ENTRY_CONTROLS, entry); | 1476 | vm_entry_controls_clearbit(vmx, entry); |
1417 | vmcs_clear_bits(VM_EXIT_CONTROLS, exit); | 1477 | vm_exit_controls_clearbit(vmx, exit); |
1418 | } | 1478 | } |
1419 | 1479 | ||
1420 | static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) | 1480 | static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) |
@@ -1425,14 +1485,15 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) | |||
1425 | switch (msr) { | 1485 | switch (msr) { |
1426 | case MSR_EFER: | 1486 | case MSR_EFER: |
1427 | if (cpu_has_load_ia32_efer) { | 1487 | if (cpu_has_load_ia32_efer) { |
1428 | clear_atomic_switch_msr_special(VM_ENTRY_LOAD_IA32_EFER, | 1488 | clear_atomic_switch_msr_special(vmx, |
1489 | VM_ENTRY_LOAD_IA32_EFER, | ||
1429 | VM_EXIT_LOAD_IA32_EFER); | 1490 | VM_EXIT_LOAD_IA32_EFER); |
1430 | return; | 1491 | return; |
1431 | } | 1492 | } |
1432 | break; | 1493 | break; |
1433 | case MSR_CORE_PERF_GLOBAL_CTRL: | 1494 | case MSR_CORE_PERF_GLOBAL_CTRL: |
1434 | if (cpu_has_load_perf_global_ctrl) { | 1495 | if (cpu_has_load_perf_global_ctrl) { |
1435 | clear_atomic_switch_msr_special( | 1496 | clear_atomic_switch_msr_special(vmx, |
1436 | VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, | 1497 | VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, |
1437 | VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL); | 1498 | VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL); |
1438 | return; | 1499 | return; |
@@ -1453,14 +1514,15 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) | |||
1453 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr); | 1514 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr); |
1454 | } | 1515 | } |
1455 | 1516 | ||
1456 | static void add_atomic_switch_msr_special(unsigned long entry, | 1517 | static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx, |
1457 | unsigned long exit, unsigned long guest_val_vmcs, | 1518 | unsigned long entry, unsigned long exit, |
1458 | unsigned long host_val_vmcs, u64 guest_val, u64 host_val) | 1519 | unsigned long guest_val_vmcs, unsigned long host_val_vmcs, |
1520 | u64 guest_val, u64 host_val) | ||
1459 | { | 1521 | { |
1460 | vmcs_write64(guest_val_vmcs, guest_val); | 1522 | vmcs_write64(guest_val_vmcs, guest_val); |
1461 | vmcs_write64(host_val_vmcs, host_val); | 1523 | vmcs_write64(host_val_vmcs, host_val); |
1462 | vmcs_set_bits(VM_ENTRY_CONTROLS, entry); | 1524 | vm_entry_controls_setbit(vmx, entry); |
1463 | vmcs_set_bits(VM_EXIT_CONTROLS, exit); | 1525 | vm_exit_controls_setbit(vmx, exit); |
1464 | } | 1526 | } |
1465 | 1527 | ||
1466 | static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, | 1528 | static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, |
@@ -1472,7 +1534,8 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, | |||
1472 | switch (msr) { | 1534 | switch (msr) { |
1473 | case MSR_EFER: | 1535 | case MSR_EFER: |
1474 | if (cpu_has_load_ia32_efer) { | 1536 | if (cpu_has_load_ia32_efer) { |
1475 | add_atomic_switch_msr_special(VM_ENTRY_LOAD_IA32_EFER, | 1537 | add_atomic_switch_msr_special(vmx, |
1538 | VM_ENTRY_LOAD_IA32_EFER, | ||
1476 | VM_EXIT_LOAD_IA32_EFER, | 1539 | VM_EXIT_LOAD_IA32_EFER, |
1477 | GUEST_IA32_EFER, | 1540 | GUEST_IA32_EFER, |
1478 | HOST_IA32_EFER, | 1541 | HOST_IA32_EFER, |
@@ -1482,7 +1545,7 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, | |||
1482 | break; | 1545 | break; |
1483 | case MSR_CORE_PERF_GLOBAL_CTRL: | 1546 | case MSR_CORE_PERF_GLOBAL_CTRL: |
1484 | if (cpu_has_load_perf_global_ctrl) { | 1547 | if (cpu_has_load_perf_global_ctrl) { |
1485 | add_atomic_switch_msr_special( | 1548 | add_atomic_switch_msr_special(vmx, |
1486 | VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, | 1549 | VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, |
1487 | VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL, | 1550 | VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL, |
1488 | GUEST_IA32_PERF_GLOBAL_CTRL, | 1551 | GUEST_IA32_PERF_GLOBAL_CTRL, |
@@ -1906,7 +1969,9 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr) | |||
1906 | if (!(vmcs12->exception_bitmap & (1u << nr))) | 1969 | if (!(vmcs12->exception_bitmap & (1u << nr))) |
1907 | return 0; | 1970 | return 0; |
1908 | 1971 | ||
1909 | nested_vmx_vmexit(vcpu); | 1972 | nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason, |
1973 | vmcs_read32(VM_EXIT_INTR_INFO), | ||
1974 | vmcs_readl(EXIT_QUALIFICATION)); | ||
1910 | return 1; | 1975 | return 1; |
1911 | } | 1976 | } |
1912 | 1977 | ||
@@ -2279,6 +2344,7 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
2279 | rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high); | 2344 | rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high); |
2280 | nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK | | 2345 | nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK | |
2281 | VMX_MISC_SAVE_EFER_LMA; | 2346 | VMX_MISC_SAVE_EFER_LMA; |
2347 | nested_vmx_misc_low |= VMX_MISC_ACTIVITY_HLT; | ||
2282 | nested_vmx_misc_high = 0; | 2348 | nested_vmx_misc_high = 0; |
2283 | } | 2349 | } |
2284 | 2350 | ||
@@ -2295,32 +2361,10 @@ static inline u64 vmx_control_msr(u32 low, u32 high) | |||
2295 | return low | ((u64)high << 32); | 2361 | return low | ((u64)high << 32); |
2296 | } | 2362 | } |
2297 | 2363 | ||
2298 | /* | 2364 | /* Returns 0 on success, non-0 otherwise. */ |
2299 | * If we allow our guest to use VMX instructions (i.e., nested VMX), we should | ||
2300 | * also let it use VMX-specific MSRs. | ||
2301 | * vmx_get_vmx_msr() and vmx_set_vmx_msr() return 1 when we handled a | ||
2302 | * VMX-specific MSR, or 0 when we haven't (and the caller should handle it | ||
2303 | * like all other MSRs). | ||
2304 | */ | ||
2305 | static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | 2365 | static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) |
2306 | { | 2366 | { |
2307 | if (!nested_vmx_allowed(vcpu) && msr_index >= MSR_IA32_VMX_BASIC && | ||
2308 | msr_index <= MSR_IA32_VMX_TRUE_ENTRY_CTLS) { | ||
2309 | /* | ||
2310 | * According to the spec, processors which do not support VMX | ||
2311 | * should throw a #GP(0) when VMX capability MSRs are read. | ||
2312 | */ | ||
2313 | kvm_queue_exception_e(vcpu, GP_VECTOR, 0); | ||
2314 | return 1; | ||
2315 | } | ||
2316 | |||
2317 | switch (msr_index) { | 2367 | switch (msr_index) { |
2318 | case MSR_IA32_FEATURE_CONTROL: | ||
2319 | if (nested_vmx_allowed(vcpu)) { | ||
2320 | *pdata = to_vmx(vcpu)->nested.msr_ia32_feature_control; | ||
2321 | break; | ||
2322 | } | ||
2323 | return 0; | ||
2324 | case MSR_IA32_VMX_BASIC: | 2368 | case MSR_IA32_VMX_BASIC: |
2325 | /* | 2369 | /* |
2326 | * This MSR reports some information about VMX support. We | 2370 | * This MSR reports some information about VMX support. We |
@@ -2387,34 +2431,9 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
2387 | *pdata = nested_vmx_ept_caps; | 2431 | *pdata = nested_vmx_ept_caps; |
2388 | break; | 2432 | break; |
2389 | default: | 2433 | default: |
2390 | return 0; | ||
2391 | } | ||
2392 | |||
2393 | return 1; | ||
2394 | } | ||
2395 | |||
2396 | static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | ||
2397 | { | ||
2398 | u32 msr_index = msr_info->index; | ||
2399 | u64 data = msr_info->data; | ||
2400 | bool host_initialized = msr_info->host_initiated; | ||
2401 | |||
2402 | if (!nested_vmx_allowed(vcpu)) | ||
2403 | return 0; | ||
2404 | |||
2405 | if (msr_index == MSR_IA32_FEATURE_CONTROL) { | ||
2406 | if (!host_initialized && | ||
2407 | to_vmx(vcpu)->nested.msr_ia32_feature_control | ||
2408 | & FEATURE_CONTROL_LOCKED) | ||
2409 | return 0; | ||
2410 | to_vmx(vcpu)->nested.msr_ia32_feature_control = data; | ||
2411 | return 1; | 2434 | return 1; |
2412 | } | 2435 | } |
2413 | 2436 | ||
2414 | /* | ||
2415 | * No need to treat VMX capability MSRs specially: If we don't handle | ||
2416 | * them, handle_wrmsr will #GP(0), which is correct (they are readonly) | ||
2417 | */ | ||
2418 | return 0; | 2437 | return 0; |
2419 | } | 2438 | } |
2420 | 2439 | ||
@@ -2460,13 +2479,20 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
2460 | case MSR_IA32_SYSENTER_ESP: | 2479 | case MSR_IA32_SYSENTER_ESP: |
2461 | data = vmcs_readl(GUEST_SYSENTER_ESP); | 2480 | data = vmcs_readl(GUEST_SYSENTER_ESP); |
2462 | break; | 2481 | break; |
2482 | case MSR_IA32_FEATURE_CONTROL: | ||
2483 | if (!nested_vmx_allowed(vcpu)) | ||
2484 | return 1; | ||
2485 | data = to_vmx(vcpu)->nested.msr_ia32_feature_control; | ||
2486 | break; | ||
2487 | case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: | ||
2488 | if (!nested_vmx_allowed(vcpu)) | ||
2489 | return 1; | ||
2490 | return vmx_get_vmx_msr(vcpu, msr_index, pdata); | ||
2463 | case MSR_TSC_AUX: | 2491 | case MSR_TSC_AUX: |
2464 | if (!to_vmx(vcpu)->rdtscp_enabled) | 2492 | if (!to_vmx(vcpu)->rdtscp_enabled) |
2465 | return 1; | 2493 | return 1; |
2466 | /* Otherwise falls through */ | 2494 | /* Otherwise falls through */ |
2467 | default: | 2495 | default: |
2468 | if (vmx_get_vmx_msr(vcpu, msr_index, pdata)) | ||
2469 | return 0; | ||
2470 | msr = find_msr_entry(to_vmx(vcpu), msr_index); | 2496 | msr = find_msr_entry(to_vmx(vcpu), msr_index); |
2471 | if (msr) { | 2497 | if (msr) { |
2472 | data = msr->data; | 2498 | data = msr->data; |
@@ -2479,6 +2505,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
2479 | return 0; | 2505 | return 0; |
2480 | } | 2506 | } |
2481 | 2507 | ||
2508 | static void vmx_leave_nested(struct kvm_vcpu *vcpu); | ||
2509 | |||
2482 | /* | 2510 | /* |
2483 | * Writes msr value into into the appropriate "register". | 2511 | * Writes msr value into into the appropriate "register". |
2484 | * Returns 0 on success, non-0 otherwise. | 2512 | * Returns 0 on success, non-0 otherwise. |
@@ -2533,6 +2561,17 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2533 | case MSR_IA32_TSC_ADJUST: | 2561 | case MSR_IA32_TSC_ADJUST: |
2534 | ret = kvm_set_msr_common(vcpu, msr_info); | 2562 | ret = kvm_set_msr_common(vcpu, msr_info); |
2535 | break; | 2563 | break; |
2564 | case MSR_IA32_FEATURE_CONTROL: | ||
2565 | if (!nested_vmx_allowed(vcpu) || | ||
2566 | (to_vmx(vcpu)->nested.msr_ia32_feature_control & | ||
2567 | FEATURE_CONTROL_LOCKED && !msr_info->host_initiated)) | ||
2568 | return 1; | ||
2569 | vmx->nested.msr_ia32_feature_control = data; | ||
2570 | if (msr_info->host_initiated && data == 0) | ||
2571 | vmx_leave_nested(vcpu); | ||
2572 | break; | ||
2573 | case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: | ||
2574 | return 1; /* they are read-only */ | ||
2536 | case MSR_TSC_AUX: | 2575 | case MSR_TSC_AUX: |
2537 | if (!vmx->rdtscp_enabled) | 2576 | if (!vmx->rdtscp_enabled) |
2538 | return 1; | 2577 | return 1; |
@@ -2541,8 +2580,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2541 | return 1; | 2580 | return 1; |
2542 | /* Otherwise falls through */ | 2581 | /* Otherwise falls through */ |
2543 | default: | 2582 | default: |
2544 | if (vmx_set_vmx_msr(vcpu, msr_info)) | ||
2545 | break; | ||
2546 | msr = find_msr_entry(vmx, msr_index); | 2583 | msr = find_msr_entry(vmx, msr_index); |
2547 | if (msr) { | 2584 | if (msr) { |
2548 | msr->data = data; | 2585 | msr->data = data; |
@@ -3182,14 +3219,10 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
3182 | vmx_load_host_state(to_vmx(vcpu)); | 3219 | vmx_load_host_state(to_vmx(vcpu)); |
3183 | vcpu->arch.efer = efer; | 3220 | vcpu->arch.efer = efer; |
3184 | if (efer & EFER_LMA) { | 3221 | if (efer & EFER_LMA) { |
3185 | vmcs_write32(VM_ENTRY_CONTROLS, | 3222 | vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); |
3186 | vmcs_read32(VM_ENTRY_CONTROLS) | | ||
3187 | VM_ENTRY_IA32E_MODE); | ||
3188 | msr->data = efer; | 3223 | msr->data = efer; |
3189 | } else { | 3224 | } else { |
3190 | vmcs_write32(VM_ENTRY_CONTROLS, | 3225 | vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); |
3191 | vmcs_read32(VM_ENTRY_CONTROLS) & | ||
3192 | ~VM_ENTRY_IA32E_MODE); | ||
3193 | 3226 | ||
3194 | msr->data = efer & ~EFER_LME; | 3227 | msr->data = efer & ~EFER_LME; |
3195 | } | 3228 | } |
@@ -3217,9 +3250,7 @@ static void enter_lmode(struct kvm_vcpu *vcpu) | |||
3217 | 3250 | ||
3218 | static void exit_lmode(struct kvm_vcpu *vcpu) | 3251 | static void exit_lmode(struct kvm_vcpu *vcpu) |
3219 | { | 3252 | { |
3220 | vmcs_write32(VM_ENTRY_CONTROLS, | 3253 | vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); |
3221 | vmcs_read32(VM_ENTRY_CONTROLS) | ||
3222 | & ~VM_ENTRY_IA32E_MODE); | ||
3223 | vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA); | 3254 | vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA); |
3224 | } | 3255 | } |
3225 | 3256 | ||
@@ -4346,10 +4377,11 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
4346 | ++vmx->nmsrs; | 4377 | ++vmx->nmsrs; |
4347 | } | 4378 | } |
4348 | 4379 | ||
4349 | vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); | 4380 | |
4381 | vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl); | ||
4350 | 4382 | ||
4351 | /* 22.2.1, 20.8.1 */ | 4383 | /* 22.2.1, 20.8.1 */ |
4352 | vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl); | 4384 | vm_entry_controls_init(vmx, vmcs_config.vmentry_ctrl); |
4353 | 4385 | ||
4354 | vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); | 4386 | vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); |
4355 | set_cr4_guest_host_mask(vmx); | 4387 | set_cr4_guest_host_mask(vmx); |
@@ -4588,15 +4620,12 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) | |||
4588 | static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) | 4620 | static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) |
4589 | { | 4621 | { |
4590 | if (is_guest_mode(vcpu)) { | 4622 | if (is_guest_mode(vcpu)) { |
4591 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | ||
4592 | |||
4593 | if (to_vmx(vcpu)->nested.nested_run_pending) | 4623 | if (to_vmx(vcpu)->nested.nested_run_pending) |
4594 | return 0; | 4624 | return 0; |
4595 | if (nested_exit_on_nmi(vcpu)) { | 4625 | if (nested_exit_on_nmi(vcpu)) { |
4596 | nested_vmx_vmexit(vcpu); | 4626 | nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, |
4597 | vmcs12->vm_exit_reason = EXIT_REASON_EXCEPTION_NMI; | 4627 | NMI_VECTOR | INTR_TYPE_NMI_INTR | |
4598 | vmcs12->vm_exit_intr_info = NMI_VECTOR | | 4628 | INTR_INFO_VALID_MASK, 0); |
4599 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK; | ||
4600 | /* | 4629 | /* |
4601 | * The NMI-triggered VM exit counts as injection: | 4630 | * The NMI-triggered VM exit counts as injection: |
4602 | * clear this one and block further NMIs. | 4631 | * clear this one and block further NMIs. |
@@ -4618,15 +4647,11 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) | |||
4618 | static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) | 4647 | static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) |
4619 | { | 4648 | { |
4620 | if (is_guest_mode(vcpu)) { | 4649 | if (is_guest_mode(vcpu)) { |
4621 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | ||
4622 | |||
4623 | if (to_vmx(vcpu)->nested.nested_run_pending) | 4650 | if (to_vmx(vcpu)->nested.nested_run_pending) |
4624 | return 0; | 4651 | return 0; |
4625 | if (nested_exit_on_intr(vcpu)) { | 4652 | if (nested_exit_on_intr(vcpu)) { |
4626 | nested_vmx_vmexit(vcpu); | 4653 | nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, |
4627 | vmcs12->vm_exit_reason = | 4654 | 0, 0); |
4628 | EXIT_REASON_EXTERNAL_INTERRUPT; | ||
4629 | vmcs12->vm_exit_intr_info = 0; | ||
4630 | /* | 4655 | /* |
4631 | * fall through to normal code, but now in L1, not L2 | 4656 | * fall through to normal code, but now in L1, not L2 |
4632 | */ | 4657 | */ |
@@ -4812,7 +4837,8 @@ static int handle_exception(struct kvm_vcpu *vcpu) | |||
4812 | dr6 = vmcs_readl(EXIT_QUALIFICATION); | 4837 | dr6 = vmcs_readl(EXIT_QUALIFICATION); |
4813 | if (!(vcpu->guest_debug & | 4838 | if (!(vcpu->guest_debug & |
4814 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { | 4839 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { |
4815 | vcpu->arch.dr6 = dr6 | DR6_FIXED_1; | 4840 | vcpu->arch.dr6 &= ~15; |
4841 | vcpu->arch.dr6 |= dr6; | ||
4816 | kvm_queue_exception(vcpu, DB_VECTOR); | 4842 | kvm_queue_exception(vcpu, DB_VECTOR); |
4817 | return 1; | 4843 | return 1; |
4818 | } | 4844 | } |
@@ -5080,14 +5106,27 @@ static int handle_dr(struct kvm_vcpu *vcpu) | |||
5080 | reg = DEBUG_REG_ACCESS_REG(exit_qualification); | 5106 | reg = DEBUG_REG_ACCESS_REG(exit_qualification); |
5081 | if (exit_qualification & TYPE_MOV_FROM_DR) { | 5107 | if (exit_qualification & TYPE_MOV_FROM_DR) { |
5082 | unsigned long val; | 5108 | unsigned long val; |
5083 | if (!kvm_get_dr(vcpu, dr, &val)) | 5109 | |
5084 | kvm_register_write(vcpu, reg, val); | 5110 | if (kvm_get_dr(vcpu, dr, &val)) |
5111 | return 1; | ||
5112 | kvm_register_write(vcpu, reg, val); | ||
5085 | } else | 5113 | } else |
5086 | kvm_set_dr(vcpu, dr, vcpu->arch.regs[reg]); | 5114 | if (kvm_set_dr(vcpu, dr, vcpu->arch.regs[reg])) |
5115 | return 1; | ||
5116 | |||
5087 | skip_emulated_instruction(vcpu); | 5117 | skip_emulated_instruction(vcpu); |
5088 | return 1; | 5118 | return 1; |
5089 | } | 5119 | } |
5090 | 5120 | ||
5121 | static u64 vmx_get_dr6(struct kvm_vcpu *vcpu) | ||
5122 | { | ||
5123 | return vcpu->arch.dr6; | ||
5124 | } | ||
5125 | |||
5126 | static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val) | ||
5127 | { | ||
5128 | } | ||
5129 | |||
5091 | static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) | 5130 | static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) |
5092 | { | 5131 | { |
5093 | vmcs_writel(GUEST_DR7, val); | 5132 | vmcs_writel(GUEST_DR7, val); |
@@ -6460,11 +6499,8 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, | |||
6460 | int size; | 6499 | int size; |
6461 | u8 b; | 6500 | u8 b; |
6462 | 6501 | ||
6463 | if (nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING)) | ||
6464 | return 1; | ||
6465 | |||
6466 | if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) | 6502 | if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) |
6467 | return 0; | 6503 | return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING); |
6468 | 6504 | ||
6469 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 6505 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
6470 | 6506 | ||
@@ -6628,6 +6664,13 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
6628 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | 6664 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
6629 | u32 exit_reason = vmx->exit_reason; | 6665 | u32 exit_reason = vmx->exit_reason; |
6630 | 6666 | ||
6667 | trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason, | ||
6668 | vmcs_readl(EXIT_QUALIFICATION), | ||
6669 | vmx->idt_vectoring_info, | ||
6670 | intr_info, | ||
6671 | vmcs_read32(VM_EXIT_INTR_ERROR_CODE), | ||
6672 | KVM_ISA_VMX); | ||
6673 | |||
6631 | if (vmx->nested.nested_run_pending) | 6674 | if (vmx->nested.nested_run_pending) |
6632 | return 0; | 6675 | return 0; |
6633 | 6676 | ||
@@ -6777,7 +6820,9 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
6777 | return handle_invalid_guest_state(vcpu); | 6820 | return handle_invalid_guest_state(vcpu); |
6778 | 6821 | ||
6779 | if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) { | 6822 | if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) { |
6780 | nested_vmx_vmexit(vcpu); | 6823 | nested_vmx_vmexit(vcpu, exit_reason, |
6824 | vmcs_read32(VM_EXIT_INTR_INFO), | ||
6825 | vmcs_readl(EXIT_QUALIFICATION)); | ||
6781 | return 1; | 6826 | return 1; |
6782 | } | 6827 | } |
6783 | 6828 | ||
@@ -7332,8 +7377,8 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) | |||
7332 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 7377 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
7333 | 7378 | ||
7334 | free_vpid(vmx); | 7379 | free_vpid(vmx); |
7335 | free_nested(vmx); | ||
7336 | free_loaded_vmcs(vmx->loaded_vmcs); | 7380 | free_loaded_vmcs(vmx->loaded_vmcs); |
7381 | free_nested(vmx); | ||
7337 | kfree(vmx->guest_msrs); | 7382 | kfree(vmx->guest_msrs); |
7338 | kvm_vcpu_uninit(vcpu); | 7383 | kvm_vcpu_uninit(vcpu); |
7339 | kmem_cache_free(kvm_vcpu_cache, vmx); | 7384 | kmem_cache_free(kvm_vcpu_cache, vmx); |
@@ -7518,15 +7563,14 @@ static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) | |||
7518 | static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, | 7563 | static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, |
7519 | struct x86_exception *fault) | 7564 | struct x86_exception *fault) |
7520 | { | 7565 | { |
7521 | struct vmcs12 *vmcs12; | 7566 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
7522 | nested_vmx_vmexit(vcpu); | 7567 | u32 exit_reason; |
7523 | vmcs12 = get_vmcs12(vcpu); | ||
7524 | 7568 | ||
7525 | if (fault->error_code & PFERR_RSVD_MASK) | 7569 | if (fault->error_code & PFERR_RSVD_MASK) |
7526 | vmcs12->vm_exit_reason = EXIT_REASON_EPT_MISCONFIG; | 7570 | exit_reason = EXIT_REASON_EPT_MISCONFIG; |
7527 | else | 7571 | else |
7528 | vmcs12->vm_exit_reason = EXIT_REASON_EPT_VIOLATION; | 7572 | exit_reason = EXIT_REASON_EPT_VIOLATION; |
7529 | vmcs12->exit_qualification = vcpu->arch.exit_qualification; | 7573 | nested_vmx_vmexit(vcpu, exit_reason, 0, vcpu->arch.exit_qualification); |
7530 | vmcs12->guest_physical_address = fault->address; | 7574 | vmcs12->guest_physical_address = fault->address; |
7531 | } | 7575 | } |
7532 | 7576 | ||
@@ -7564,7 +7608,9 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu, | |||
7564 | 7608 | ||
7565 | /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */ | 7609 | /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */ |
7566 | if (vmcs12->exception_bitmap & (1u << PF_VECTOR)) | 7610 | if (vmcs12->exception_bitmap & (1u << PF_VECTOR)) |
7567 | nested_vmx_vmexit(vcpu); | 7611 | nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason, |
7612 | vmcs_read32(VM_EXIT_INTR_INFO), | ||
7613 | vmcs_readl(EXIT_QUALIFICATION)); | ||
7568 | else | 7614 | else |
7569 | kvm_inject_page_fault(vcpu, fault); | 7615 | kvm_inject_page_fault(vcpu, fault); |
7570 | } | 7616 | } |
@@ -7706,6 +7752,11 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7706 | else | 7752 | else |
7707 | vmcs_write64(APIC_ACCESS_ADDR, | 7753 | vmcs_write64(APIC_ACCESS_ADDR, |
7708 | page_to_phys(vmx->nested.apic_access_page)); | 7754 | page_to_phys(vmx->nested.apic_access_page)); |
7755 | } else if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) { | ||
7756 | exec_control |= | ||
7757 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | ||
7758 | vmcs_write64(APIC_ACCESS_ADDR, | ||
7759 | page_to_phys(vcpu->kvm->arch.apic_access_page)); | ||
7709 | } | 7760 | } |
7710 | 7761 | ||
7711 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | 7762 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); |
@@ -7759,12 +7810,12 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7759 | exit_control = vmcs_config.vmexit_ctrl; | 7810 | exit_control = vmcs_config.vmexit_ctrl; |
7760 | if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) | 7811 | if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) |
7761 | exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; | 7812 | exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; |
7762 | vmcs_write32(VM_EXIT_CONTROLS, exit_control); | 7813 | vm_exit_controls_init(vmx, exit_control); |
7763 | 7814 | ||
7764 | /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are | 7815 | /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are |
7765 | * emulated by vmx_set_efer(), below. | 7816 | * emulated by vmx_set_efer(), below. |
7766 | */ | 7817 | */ |
7767 | vmcs_write32(VM_ENTRY_CONTROLS, | 7818 | vm_entry_controls_init(vmx, |
7768 | (vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER & | 7819 | (vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER & |
7769 | ~VM_ENTRY_IA32E_MODE) | | 7820 | ~VM_ENTRY_IA32E_MODE) | |
7770 | (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); | 7821 | (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); |
@@ -7882,7 +7933,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
7882 | return 1; | 7933 | return 1; |
7883 | } | 7934 | } |
7884 | 7935 | ||
7885 | if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE) { | 7936 | if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE && |
7937 | vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) { | ||
7886 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); | 7938 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); |
7887 | return 1; | 7939 | return 1; |
7888 | } | 7940 | } |
@@ -7994,8 +8046,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
7994 | 8046 | ||
7995 | enter_guest_mode(vcpu); | 8047 | enter_guest_mode(vcpu); |
7996 | 8048 | ||
7997 | vmx->nested.nested_run_pending = 1; | ||
7998 | |||
7999 | vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); | 8049 | vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); |
8000 | 8050 | ||
8001 | cpu = get_cpu(); | 8051 | cpu = get_cpu(); |
@@ -8011,6 +8061,11 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
8011 | 8061 | ||
8012 | prepare_vmcs02(vcpu, vmcs12); | 8062 | prepare_vmcs02(vcpu, vmcs12); |
8013 | 8063 | ||
8064 | if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) | ||
8065 | return kvm_emulate_halt(vcpu); | ||
8066 | |||
8067 | vmx->nested.nested_run_pending = 1; | ||
8068 | |||
8014 | /* | 8069 | /* |
8015 | * Note no nested_vmx_succeed or nested_vmx_fail here. At this point | 8070 | * Note no nested_vmx_succeed or nested_vmx_fail here. At this point |
8016 | * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet | 8071 | * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet |
@@ -8110,7 +8165,9 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, | |||
8110 | * exit-information fields only. Other fields are modified by L1 with VMWRITE, | 8165 | * exit-information fields only. Other fields are modified by L1 with VMWRITE, |
8111 | * which already writes to vmcs12 directly. | 8166 | * which already writes to vmcs12 directly. |
8112 | */ | 8167 | */ |
8113 | static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | 8168 | static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, |
8169 | u32 exit_reason, u32 exit_intr_info, | ||
8170 | unsigned long exit_qualification) | ||
8114 | { | 8171 | { |
8115 | /* update guest state fields: */ | 8172 | /* update guest state fields: */ |
8116 | vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); | 8173 | vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); |
@@ -8162,6 +8219,10 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
8162 | vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); | 8219 | vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); |
8163 | vmcs12->guest_pending_dbg_exceptions = | 8220 | vmcs12->guest_pending_dbg_exceptions = |
8164 | vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); | 8221 | vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); |
8222 | if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) | ||
8223 | vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT; | ||
8224 | else | ||
8225 | vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE; | ||
8165 | 8226 | ||
8166 | if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) && | 8227 | if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) && |
8167 | (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) | 8228 | (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) |
@@ -8186,7 +8247,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
8186 | 8247 | ||
8187 | vmcs12->vm_entry_controls = | 8248 | vmcs12->vm_entry_controls = |
8188 | (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | | 8249 | (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | |
8189 | (vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE); | 8250 | (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE); |
8190 | 8251 | ||
8191 | /* TODO: These cannot have changed unless we have MSR bitmaps and | 8252 | /* TODO: These cannot have changed unless we have MSR bitmaps and |
8192 | * the relevant bit asks not to trap the change */ | 8253 | * the relevant bit asks not to trap the change */ |
@@ -8201,10 +8262,10 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
8201 | 8262 | ||
8202 | /* update exit information fields: */ | 8263 | /* update exit information fields: */ |
8203 | 8264 | ||
8204 | vmcs12->vm_exit_reason = to_vmx(vcpu)->exit_reason; | 8265 | vmcs12->vm_exit_reason = exit_reason; |
8205 | vmcs12->exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 8266 | vmcs12->exit_qualification = exit_qualification; |
8206 | 8267 | ||
8207 | vmcs12->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | 8268 | vmcs12->vm_exit_intr_info = exit_intr_info; |
8208 | if ((vmcs12->vm_exit_intr_info & | 8269 | if ((vmcs12->vm_exit_intr_info & |
8209 | (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) == | 8270 | (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) == |
8210 | (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) | 8271 | (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) |
@@ -8370,7 +8431,9 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, | |||
8370 | * and modify vmcs12 to make it see what it would expect to see there if | 8431 | * and modify vmcs12 to make it see what it would expect to see there if |
8371 | * L2 was its real guest. Must only be called when in L2 (is_guest_mode()) | 8432 | * L2 was its real guest. Must only be called when in L2 (is_guest_mode()) |
8372 | */ | 8433 | */ |
8373 | static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) | 8434 | static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, |
8435 | u32 exit_intr_info, | ||
8436 | unsigned long exit_qualification) | ||
8374 | { | 8437 | { |
8375 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 8438 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
8376 | int cpu; | 8439 | int cpu; |
@@ -8380,7 +8443,15 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) | |||
8380 | WARN_ON_ONCE(vmx->nested.nested_run_pending); | 8443 | WARN_ON_ONCE(vmx->nested.nested_run_pending); |
8381 | 8444 | ||
8382 | leave_guest_mode(vcpu); | 8445 | leave_guest_mode(vcpu); |
8383 | prepare_vmcs12(vcpu, vmcs12); | 8446 | prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info, |
8447 | exit_qualification); | ||
8448 | |||
8449 | trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason, | ||
8450 | vmcs12->exit_qualification, | ||
8451 | vmcs12->idt_vectoring_info_field, | ||
8452 | vmcs12->vm_exit_intr_info, | ||
8453 | vmcs12->vm_exit_intr_error_code, | ||
8454 | KVM_ISA_VMX); | ||
8384 | 8455 | ||
8385 | cpu = get_cpu(); | 8456 | cpu = get_cpu(); |
8386 | vmx->loaded_vmcs = &vmx->vmcs01; | 8457 | vmx->loaded_vmcs = &vmx->vmcs01; |
@@ -8389,6 +8460,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) | |||
8389 | vcpu->cpu = cpu; | 8460 | vcpu->cpu = cpu; |
8390 | put_cpu(); | 8461 | put_cpu(); |
8391 | 8462 | ||
8463 | vm_entry_controls_init(vmx, vmcs_read32(VM_ENTRY_CONTROLS)); | ||
8464 | vm_exit_controls_init(vmx, vmcs_read32(VM_EXIT_CONTROLS)); | ||
8392 | vmx_segment_cache_clear(vmx); | 8465 | vmx_segment_cache_clear(vmx); |
8393 | 8466 | ||
8394 | /* if no vmcs02 cache requested, remove the one we used */ | 8467 | /* if no vmcs02 cache requested, remove the one we used */ |
@@ -8424,6 +8497,16 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) | |||
8424 | } | 8497 | } |
8425 | 8498 | ||
8426 | /* | 8499 | /* |
8500 | * Forcibly leave nested mode in order to be able to reset the VCPU later on. | ||
8501 | */ | ||
8502 | static void vmx_leave_nested(struct kvm_vcpu *vcpu) | ||
8503 | { | ||
8504 | if (is_guest_mode(vcpu)) | ||
8505 | nested_vmx_vmexit(vcpu, -1, 0, 0); | ||
8506 | free_nested(to_vmx(vcpu)); | ||
8507 | } | ||
8508 | |||
8509 | /* | ||
8427 | * L1's failure to enter L2 is a subset of a normal exit, as explained in | 8510 | * L1's failure to enter L2 is a subset of a normal exit, as explained in |
8428 | * 23.7 "VM-entry failures during or after loading guest state" (this also | 8511 | * 23.7 "VM-entry failures during or after loading guest state" (this also |
8429 | * lists the acceptable exit-reason and exit-qualification parameters). | 8512 | * lists the acceptable exit-reason and exit-qualification parameters). |
@@ -8486,6 +8569,8 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
8486 | .set_idt = vmx_set_idt, | 8569 | .set_idt = vmx_set_idt, |
8487 | .get_gdt = vmx_get_gdt, | 8570 | .get_gdt = vmx_get_gdt, |
8488 | .set_gdt = vmx_set_gdt, | 8571 | .set_gdt = vmx_set_gdt, |
8572 | .get_dr6 = vmx_get_dr6, | ||
8573 | .set_dr6 = vmx_set_dr6, | ||
8489 | .set_dr7 = vmx_set_dr7, | 8574 | .set_dr7 = vmx_set_dr7, |
8490 | .cache_reg = vmx_cache_reg, | 8575 | .cache_reg = vmx_cache_reg, |
8491 | .get_rflags = vmx_get_rflags, | 8576 | .get_rflags = vmx_get_rflags, |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5d004da1e35d..0c76f7cfdb32 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -94,6 +94,9 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops); | |||
94 | static bool ignore_msrs = 0; | 94 | static bool ignore_msrs = 0; |
95 | module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); | 95 | module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); |
96 | 96 | ||
97 | unsigned int min_timer_period_us = 500; | ||
98 | module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); | ||
99 | |||
97 | bool kvm_has_tsc_control; | 100 | bool kvm_has_tsc_control; |
98 | EXPORT_SYMBOL_GPL(kvm_has_tsc_control); | 101 | EXPORT_SYMBOL_GPL(kvm_has_tsc_control); |
99 | u32 kvm_max_guest_tsc_khz; | 102 | u32 kvm_max_guest_tsc_khz; |
@@ -719,6 +722,12 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) | |||
719 | } | 722 | } |
720 | EXPORT_SYMBOL_GPL(kvm_get_cr8); | 723 | EXPORT_SYMBOL_GPL(kvm_get_cr8); |
721 | 724 | ||
725 | static void kvm_update_dr6(struct kvm_vcpu *vcpu) | ||
726 | { | ||
727 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) | ||
728 | kvm_x86_ops->set_dr6(vcpu, vcpu->arch.dr6); | ||
729 | } | ||
730 | |||
722 | static void kvm_update_dr7(struct kvm_vcpu *vcpu) | 731 | static void kvm_update_dr7(struct kvm_vcpu *vcpu) |
723 | { | 732 | { |
724 | unsigned long dr7; | 733 | unsigned long dr7; |
@@ -747,6 +756,7 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) | |||
747 | if (val & 0xffffffff00000000ULL) | 756 | if (val & 0xffffffff00000000ULL) |
748 | return -1; /* #GP */ | 757 | return -1; /* #GP */ |
749 | vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; | 758 | vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; |
759 | kvm_update_dr6(vcpu); | ||
750 | break; | 760 | break; |
751 | case 5: | 761 | case 5: |
752 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | 762 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) |
@@ -788,7 +798,10 @@ static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) | |||
788 | return 1; | 798 | return 1; |
789 | /* fall through */ | 799 | /* fall through */ |
790 | case 6: | 800 | case 6: |
791 | *val = vcpu->arch.dr6; | 801 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) |
802 | *val = vcpu->arch.dr6; | ||
803 | else | ||
804 | *val = kvm_x86_ops->get_dr6(vcpu); | ||
792 | break; | 805 | break; |
793 | case 5: | 806 | case 5: |
794 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | 807 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) |
@@ -836,11 +849,12 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); | |||
836 | * kvm-specific. Those are put in the beginning of the list. | 849 | * kvm-specific. Those are put in the beginning of the list. |
837 | */ | 850 | */ |
838 | 851 | ||
839 | #define KVM_SAVE_MSRS_BEGIN 10 | 852 | #define KVM_SAVE_MSRS_BEGIN 12 |
840 | static u32 msrs_to_save[] = { | 853 | static u32 msrs_to_save[] = { |
841 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, | 854 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, |
842 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, | 855 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, |
843 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, | 856 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, |
857 | HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC, | ||
844 | HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, | 858 | HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, |
845 | MSR_KVM_PV_EOI_EN, | 859 | MSR_KVM_PV_EOI_EN, |
846 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, | 860 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, |
@@ -1275,8 +1289,6 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) | |||
1275 | kvm->arch.last_tsc_write = data; | 1289 | kvm->arch.last_tsc_write = data; |
1276 | kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz; | 1290 | kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz; |
1277 | 1291 | ||
1278 | /* Reset of TSC must disable overshoot protection below */ | ||
1279 | vcpu->arch.hv_clock.tsc_timestamp = 0; | ||
1280 | vcpu->arch.last_guest_tsc = data; | 1292 | vcpu->arch.last_guest_tsc = data; |
1281 | 1293 | ||
1282 | /* Keep track of which generation this VCPU has synchronized to */ | 1294 | /* Keep track of which generation this VCPU has synchronized to */ |
@@ -1484,7 +1496,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1484 | unsigned long flags, this_tsc_khz; | 1496 | unsigned long flags, this_tsc_khz; |
1485 | struct kvm_vcpu_arch *vcpu = &v->arch; | 1497 | struct kvm_vcpu_arch *vcpu = &v->arch; |
1486 | struct kvm_arch *ka = &v->kvm->arch; | 1498 | struct kvm_arch *ka = &v->kvm->arch; |
1487 | s64 kernel_ns, max_kernel_ns; | 1499 | s64 kernel_ns; |
1488 | u64 tsc_timestamp, host_tsc; | 1500 | u64 tsc_timestamp, host_tsc; |
1489 | struct pvclock_vcpu_time_info guest_hv_clock; | 1501 | struct pvclock_vcpu_time_info guest_hv_clock; |
1490 | u8 pvclock_flags; | 1502 | u8 pvclock_flags; |
@@ -1543,37 +1555,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1543 | if (!vcpu->pv_time_enabled) | 1555 | if (!vcpu->pv_time_enabled) |
1544 | return 0; | 1556 | return 0; |
1545 | 1557 | ||
1546 | /* | ||
1547 | * Time as measured by the TSC may go backwards when resetting the base | ||
1548 | * tsc_timestamp. The reason for this is that the TSC resolution is | ||
1549 | * higher than the resolution of the other clock scales. Thus, many | ||
1550 | * possible measurments of the TSC correspond to one measurement of any | ||
1551 | * other clock, and so a spread of values is possible. This is not a | ||
1552 | * problem for the computation of the nanosecond clock; with TSC rates | ||
1553 | * around 1GHZ, there can only be a few cycles which correspond to one | ||
1554 | * nanosecond value, and any path through this code will inevitably | ||
1555 | * take longer than that. However, with the kernel_ns value itself, | ||
1556 | * the precision may be much lower, down to HZ granularity. If the | ||
1557 | * first sampling of TSC against kernel_ns ends in the low part of the | ||
1558 | * range, and the second in the high end of the range, we can get: | ||
1559 | * | ||
1560 | * (TSC - offset_low) * S + kns_old > (TSC - offset_high) * S + kns_new | ||
1561 | * | ||
1562 | * As the sampling errors potentially range in the thousands of cycles, | ||
1563 | * it is possible such a time value has already been observed by the | ||
1564 | * guest. To protect against this, we must compute the system time as | ||
1565 | * observed by the guest and ensure the new system time is greater. | ||
1566 | */ | ||
1567 | max_kernel_ns = 0; | ||
1568 | if (vcpu->hv_clock.tsc_timestamp) { | ||
1569 | max_kernel_ns = vcpu->last_guest_tsc - | ||
1570 | vcpu->hv_clock.tsc_timestamp; | ||
1571 | max_kernel_ns = pvclock_scale_delta(max_kernel_ns, | ||
1572 | vcpu->hv_clock.tsc_to_system_mul, | ||
1573 | vcpu->hv_clock.tsc_shift); | ||
1574 | max_kernel_ns += vcpu->last_kernel_ns; | ||
1575 | } | ||
1576 | |||
1577 | if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) { | 1558 | if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) { |
1578 | kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz, | 1559 | kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz, |
1579 | &vcpu->hv_clock.tsc_shift, | 1560 | &vcpu->hv_clock.tsc_shift, |
@@ -1581,14 +1562,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1581 | vcpu->hw_tsc_khz = this_tsc_khz; | 1562 | vcpu->hw_tsc_khz = this_tsc_khz; |
1582 | } | 1563 | } |
1583 | 1564 | ||
1584 | /* with a master <monotonic time, tsc value> tuple, | ||
1585 | * pvclock clock reads always increase at the (scaled) rate | ||
1586 | * of guest TSC - no need to deal with sampling errors. | ||
1587 | */ | ||
1588 | if (!use_master_clock) { | ||
1589 | if (max_kernel_ns > kernel_ns) | ||
1590 | kernel_ns = max_kernel_ns; | ||
1591 | } | ||
1592 | /* With all the info we got, fill in the values */ | 1565 | /* With all the info we got, fill in the values */ |
1593 | vcpu->hv_clock.tsc_timestamp = tsc_timestamp; | 1566 | vcpu->hv_clock.tsc_timestamp = tsc_timestamp; |
1594 | vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; | 1567 | vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; |
@@ -1826,6 +1799,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) | |||
1826 | switch (msr) { | 1799 | switch (msr) { |
1827 | case HV_X64_MSR_GUEST_OS_ID: | 1800 | case HV_X64_MSR_GUEST_OS_ID: |
1828 | case HV_X64_MSR_HYPERCALL: | 1801 | case HV_X64_MSR_HYPERCALL: |
1802 | case HV_X64_MSR_REFERENCE_TSC: | ||
1803 | case HV_X64_MSR_TIME_REF_COUNT: | ||
1829 | r = true; | 1804 | r = true; |
1830 | break; | 1805 | break; |
1831 | } | 1806 | } |
@@ -1867,6 +1842,20 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1867 | kvm->arch.hv_hypercall = data; | 1842 | kvm->arch.hv_hypercall = data; |
1868 | break; | 1843 | break; |
1869 | } | 1844 | } |
1845 | case HV_X64_MSR_REFERENCE_TSC: { | ||
1846 | u64 gfn; | ||
1847 | HV_REFERENCE_TSC_PAGE tsc_ref; | ||
1848 | memset(&tsc_ref, 0, sizeof(tsc_ref)); | ||
1849 | kvm->arch.hv_tsc_page = data; | ||
1850 | if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) | ||
1851 | break; | ||
1852 | gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; | ||
1853 | if (kvm_write_guest(kvm, data, | ||
1854 | &tsc_ref, sizeof(tsc_ref))) | ||
1855 | return 1; | ||
1856 | mark_page_dirty(kvm, gfn); | ||
1857 | break; | ||
1858 | } | ||
1870 | default: | 1859 | default: |
1871 | vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x " | 1860 | vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x " |
1872 | "data 0x%llx\n", msr, data); | 1861 | "data 0x%llx\n", msr, data); |
@@ -2291,6 +2280,14 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
2291 | case HV_X64_MSR_HYPERCALL: | 2280 | case HV_X64_MSR_HYPERCALL: |
2292 | data = kvm->arch.hv_hypercall; | 2281 | data = kvm->arch.hv_hypercall; |
2293 | break; | 2282 | break; |
2283 | case HV_X64_MSR_TIME_REF_COUNT: { | ||
2284 | data = | ||
2285 | div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); | ||
2286 | break; | ||
2287 | } | ||
2288 | case HV_X64_MSR_REFERENCE_TSC: | ||
2289 | data = kvm->arch.hv_tsc_page; | ||
2290 | break; | ||
2294 | default: | 2291 | default: |
2295 | vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); | 2292 | vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); |
2296 | return 1; | 2293 | return 1; |
@@ -2604,6 +2601,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
2604 | #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT | 2601 | #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT |
2605 | case KVM_CAP_ASSIGN_DEV_IRQ: | 2602 | case KVM_CAP_ASSIGN_DEV_IRQ: |
2606 | case KVM_CAP_PCI_2_3: | 2603 | case KVM_CAP_PCI_2_3: |
2604 | case KVM_CAP_HYPERV_TIME: | ||
2607 | #endif | 2605 | #endif |
2608 | r = 1; | 2606 | r = 1; |
2609 | break; | 2607 | break; |
@@ -2972,8 +2970,11 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
2972 | static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, | 2970 | static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, |
2973 | struct kvm_debugregs *dbgregs) | 2971 | struct kvm_debugregs *dbgregs) |
2974 | { | 2972 | { |
2973 | unsigned long val; | ||
2974 | |||
2975 | memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); | 2975 | memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); |
2976 | dbgregs->dr6 = vcpu->arch.dr6; | 2976 | _kvm_get_dr(vcpu, 6, &val); |
2977 | dbgregs->dr6 = val; | ||
2977 | dbgregs->dr7 = vcpu->arch.dr7; | 2978 | dbgregs->dr7 = vcpu->arch.dr7; |
2978 | dbgregs->flags = 0; | 2979 | dbgregs->flags = 0; |
2979 | memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved)); | 2980 | memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved)); |
@@ -2987,7 +2988,9 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, | |||
2987 | 2988 | ||
2988 | memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); | 2989 | memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); |
2989 | vcpu->arch.dr6 = dbgregs->dr6; | 2990 | vcpu->arch.dr6 = dbgregs->dr6; |
2991 | kvm_update_dr6(vcpu); | ||
2990 | vcpu->arch.dr7 = dbgregs->dr7; | 2992 | vcpu->arch.dr7 = dbgregs->dr7; |
2993 | kvm_update_dr7(vcpu); | ||
2991 | 2994 | ||
2992 | return 0; | 2995 | return 0; |
2993 | } | 2996 | } |
@@ -5834,6 +5837,11 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) | |||
5834 | kvm_apic_update_tmr(vcpu, tmr); | 5837 | kvm_apic_update_tmr(vcpu, tmr); |
5835 | } | 5838 | } |
5836 | 5839 | ||
5840 | /* | ||
5841 | * Returns 1 to let __vcpu_run() continue the guest execution loop without | ||
5842 | * exiting to the userspace. Otherwise, the value will be returned to the | ||
5843 | * userspace. | ||
5844 | */ | ||
5837 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | 5845 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) |
5838 | { | 5846 | { |
5839 | int r; | 5847 | int r; |
@@ -6089,7 +6097,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
6089 | } | 6097 | } |
6090 | if (need_resched()) { | 6098 | if (need_resched()) { |
6091 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); | 6099 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); |
6092 | kvm_resched(vcpu); | 6100 | cond_resched(); |
6093 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); | 6101 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); |
6094 | } | 6102 | } |
6095 | } | 6103 | } |
@@ -6717,6 +6725,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu) | |||
6717 | 6725 | ||
6718 | memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); | 6726 | memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); |
6719 | vcpu->arch.dr6 = DR6_FIXED_1; | 6727 | vcpu->arch.dr6 = DR6_FIXED_1; |
6728 | kvm_update_dr6(vcpu); | ||
6720 | vcpu->arch.dr7 = DR7_FIXED_1; | 6729 | vcpu->arch.dr7 = DR7_FIXED_1; |
6721 | kvm_update_dr7(vcpu); | 6730 | kvm_update_dr7(vcpu); |
6722 | 6731 | ||
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 587fb9ede436..8da5823bcde6 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
@@ -125,5 +125,7 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, | |||
125 | #define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) | 125 | #define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) |
126 | extern u64 host_xcr0; | 126 | extern u64 host_xcr0; |
127 | 127 | ||
128 | extern unsigned int min_timer_period_us; | ||
129 | |||
128 | extern struct static_key kvm_no_apic_vcpu; | 130 | extern struct static_key kvm_no_apic_vcpu; |
129 | #endif | 131 | #endif |
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index a30ca15be21c..dee945d55594 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S | |||
@@ -186,7 +186,7 @@ ENTRY(copy_user_generic_unrolled) | |||
186 | 30: shll $6,%ecx | 186 | 30: shll $6,%ecx |
187 | addl %ecx,%edx | 187 | addl %ecx,%edx |
188 | jmp 60f | 188 | jmp 60f |
189 | 40: lea (%rdx,%rcx,8),%rdx | 189 | 40: leal (%rdx,%rcx,8),%edx |
190 | jmp 60f | 190 | jmp 60f |
191 | 50: movl %ecx,%edx | 191 | 50: movl %ecx,%edx |
192 | 60: jmp copy_user_handle_tail /* ecx is zerorest also */ | 192 | 60: jmp copy_user_handle_tail /* ecx is zerorest also */ |
@@ -236,8 +236,6 @@ ENDPROC(copy_user_generic_unrolled) | |||
236 | ENTRY(copy_user_generic_string) | 236 | ENTRY(copy_user_generic_string) |
237 | CFI_STARTPROC | 237 | CFI_STARTPROC |
238 | ASM_STAC | 238 | ASM_STAC |
239 | andl %edx,%edx | ||
240 | jz 4f | ||
241 | cmpl $8,%edx | 239 | cmpl $8,%edx |
242 | jb 2f /* less than 8 bytes, go to byte copy loop */ | 240 | jb 2f /* less than 8 bytes, go to byte copy loop */ |
243 | ALIGN_DESTINATION | 241 | ALIGN_DESTINATION |
@@ -249,12 +247,12 @@ ENTRY(copy_user_generic_string) | |||
249 | 2: movl %edx,%ecx | 247 | 2: movl %edx,%ecx |
250 | 3: rep | 248 | 3: rep |
251 | movsb | 249 | movsb |
252 | 4: xorl %eax,%eax | 250 | xorl %eax,%eax |
253 | ASM_CLAC | 251 | ASM_CLAC |
254 | ret | 252 | ret |
255 | 253 | ||
256 | .section .fixup,"ax" | 254 | .section .fixup,"ax" |
257 | 11: lea (%rdx,%rcx,8),%rcx | 255 | 11: leal (%rdx,%rcx,8),%ecx |
258 | 12: movl %ecx,%edx /* ecx is zerorest also */ | 256 | 12: movl %ecx,%edx /* ecx is zerorest also */ |
259 | jmp copy_user_handle_tail | 257 | jmp copy_user_handle_tail |
260 | .previous | 258 | .previous |
@@ -279,12 +277,10 @@ ENDPROC(copy_user_generic_string) | |||
279 | ENTRY(copy_user_enhanced_fast_string) | 277 | ENTRY(copy_user_enhanced_fast_string) |
280 | CFI_STARTPROC | 278 | CFI_STARTPROC |
281 | ASM_STAC | 279 | ASM_STAC |
282 | andl %edx,%edx | ||
283 | jz 2f | ||
284 | movl %edx,%ecx | 280 | movl %edx,%ecx |
285 | 1: rep | 281 | 1: rep |
286 | movsb | 282 | movsb |
287 | 2: xorl %eax,%eax | 283 | xorl %eax,%eax |
288 | ASM_CLAC | 284 | ASM_CLAC |
289 | ret | 285 | ret |
290 | 286 | ||
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index 7c3bee636e2f..39d6a3db0b96 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c | |||
@@ -16,7 +16,6 @@ | |||
16 | #include <linux/timex.h> | 16 | #include <linux/timex.h> |
17 | #include <linux/preempt.h> | 17 | #include <linux/preempt.h> |
18 | #include <linux/delay.h> | 18 | #include <linux/delay.h> |
19 | #include <linux/init.h> | ||
20 | 19 | ||
21 | #include <asm/processor.h> | 20 | #include <asm/processor.h> |
22 | #include <asm/delay.h> | 21 | #include <asm/delay.h> |
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 533a85e3a07e..1a2be7c6895d 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt | |||
@@ -346,8 +346,8 @@ AVXcode: 1 | |||
346 | 17: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1) | 346 | 17: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1) |
347 | 18: Grp16 (1A) | 347 | 18: Grp16 (1A) |
348 | 19: | 348 | 19: |
349 | 1a: | 349 | 1a: BNDCL Ev,Gv | BNDCU Ev,Gv | BNDMOV Gv,Ev | BNDLDX Gv,Ev,Gv |
350 | 1b: | 350 | 1b: BNDCN Ev,Gv | BNDMOV Ev,Gv | BNDMK Gv,Ev | BNDSTX Ev,GV,Gv |
351 | 1c: | 351 | 1c: |
352 | 1d: | 352 | 1d: |
353 | 1e: | 353 | 1e: |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 9ff85bb8dd69..9d591c895803 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -641,6 +641,20 @@ no_context(struct pt_regs *regs, unsigned long error_code, | |||
641 | 641 | ||
642 | /* Are we prepared to handle this kernel fault? */ | 642 | /* Are we prepared to handle this kernel fault? */ |
643 | if (fixup_exception(regs)) { | 643 | if (fixup_exception(regs)) { |
644 | /* | ||
645 | * Any interrupt that takes a fault gets the fixup. This makes | ||
646 | * the below recursive fault logic only apply to a faults from | ||
647 | * task context. | ||
648 | */ | ||
649 | if (in_interrupt()) | ||
650 | return; | ||
651 | |||
652 | /* | ||
653 | * Per the above we're !in_interrupt(), aka. task context. | ||
654 | * | ||
655 | * In this case we need to make sure we're not recursively | ||
656 | * faulting through the emulate_vsyscall() logic. | ||
657 | */ | ||
644 | if (current_thread_info()->sig_on_uaccess_error && signal) { | 658 | if (current_thread_info()->sig_on_uaccess_error && signal) { |
645 | tsk->thread.trap_nr = X86_TRAP_PF; | 659 | tsk->thread.trap_nr = X86_TRAP_PF; |
646 | tsk->thread.error_code = error_code | PF_USER; | 660 | tsk->thread.error_code = error_code | PF_USER; |
@@ -649,6 +663,10 @@ no_context(struct pt_regs *regs, unsigned long error_code, | |||
649 | /* XXX: hwpoison faults will set the wrong code. */ | 663 | /* XXX: hwpoison faults will set the wrong code. */ |
650 | force_sig_info_fault(signal, si_code, address, tsk, 0); | 664 | force_sig_info_fault(signal, si_code, address, tsk, 0); |
651 | } | 665 | } |
666 | |||
667 | /* | ||
668 | * Barring that, we can do the fixup and be happy. | ||
669 | */ | ||
652 | return; | 670 | return; |
653 | } | 671 | } |
654 | 672 | ||
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 0596e8e0cc19..207d9aef662d 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c | |||
@@ -108,8 +108,8 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, | |||
108 | 108 | ||
109 | static inline void get_head_page_multiple(struct page *page, int nr) | 109 | static inline void get_head_page_multiple(struct page *page, int nr) |
110 | { | 110 | { |
111 | VM_BUG_ON(page != compound_head(page)); | 111 | VM_BUG_ON_PAGE(page != compound_head(page), page); |
112 | VM_BUG_ON(page_count(page) == 0); | 112 | VM_BUG_ON_PAGE(page_count(page) == 0, page); |
113 | atomic_add(nr, &page->_count); | 113 | atomic_add(nr, &page->_count); |
114 | SetPageReferenced(page); | 114 | SetPageReferenced(page); |
115 | } | 115 | } |
@@ -135,7 +135,7 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, | |||
135 | head = pte_page(pte); | 135 | head = pte_page(pte); |
136 | page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); | 136 | page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); |
137 | do { | 137 | do { |
138 | VM_BUG_ON(compound_head(page) != head); | 138 | VM_BUG_ON_PAGE(compound_head(page) != head, page); |
139 | pages[*nr] = page; | 139 | pages[*nr] = page; |
140 | if (PageTail(page)) | 140 | if (PageTail(page)) |
141 | get_huge_page_tail(page); | 141 | get_huge_page_tail(page); |
@@ -212,7 +212,7 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr, | |||
212 | head = pte_page(pte); | 212 | head = pte_page(pte); |
213 | page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); | 213 | page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); |
214 | do { | 214 | do { |
215 | VM_BUG_ON(compound_head(page) != head); | 215 | VM_BUG_ON_PAGE(compound_head(page) != head, page); |
216 | pages[*nr] = page; | 216 | pages[*nr] = page; |
217 | if (PageTail(page)) | 217 | if (PageTail(page)) |
218 | get_huge_page_tail(page); | 218 | get_huge_page_tail(page); |
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 9d980d88b747..8c9f647ff9e1 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c | |||
@@ -87,9 +87,7 @@ int pmd_huge_support(void) | |||
87 | } | 87 | } |
88 | #endif | 88 | #endif |
89 | 89 | ||
90 | /* x86_64 also uses this file */ | 90 | #ifdef CONFIG_HUGETLB_PAGE |
91 | |||
92 | #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA | ||
93 | static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, | 91 | static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, |
94 | unsigned long addr, unsigned long len, | 92 | unsigned long addr, unsigned long len, |
95 | unsigned long pgoff, unsigned long flags) | 93 | unsigned long pgoff, unsigned long flags) |
@@ -99,7 +97,7 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, | |||
99 | 97 | ||
100 | info.flags = 0; | 98 | info.flags = 0; |
101 | info.length = len; | 99 | info.length = len; |
102 | info.low_limit = TASK_UNMAPPED_BASE; | 100 | info.low_limit = current->mm->mmap_legacy_base; |
103 | info.high_limit = TASK_SIZE; | 101 | info.high_limit = TASK_SIZE; |
104 | info.align_mask = PAGE_MASK & ~huge_page_mask(h); | 102 | info.align_mask = PAGE_MASK & ~huge_page_mask(h); |
105 | info.align_offset = 0; | 103 | info.align_offset = 0; |
@@ -172,8 +170,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | |||
172 | return hugetlb_get_unmapped_area_topdown(file, addr, len, | 170 | return hugetlb_get_unmapped_area_topdown(file, addr, len, |
173 | pgoff, flags); | 171 | pgoff, flags); |
174 | } | 172 | } |
175 | 173 | #endif /* CONFIG_HUGETLB_PAGE */ | |
176 | #endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/ | ||
177 | 174 | ||
178 | #ifdef CONFIG_X86_64 | 175 | #ifdef CONFIG_X86_64 |
179 | static __init int setup_hugepagesz(char *opt) | 176 | static __init int setup_hugepagesz(char *opt) |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 4287f1ffba7e..e39504878aec 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -665,7 +665,7 @@ void __init initmem_init(void) | |||
665 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; | 665 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; |
666 | #endif | 666 | #endif |
667 | 667 | ||
668 | memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0); | 668 | memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0); |
669 | sparse_memory_present_with_active_regions(0); | 669 | sparse_memory_present_with_active_regions(0); |
670 | 670 | ||
671 | #ifdef CONFIG_FLATMEM | 671 | #ifdef CONFIG_FLATMEM |
@@ -806,6 +806,9 @@ void __init mem_init(void) | |||
806 | BUILD_BUG_ON(VMALLOC_START >= VMALLOC_END); | 806 | BUILD_BUG_ON(VMALLOC_START >= VMALLOC_END); |
807 | #undef high_memory | 807 | #undef high_memory |
808 | #undef __FIXADDR_TOP | 808 | #undef __FIXADDR_TOP |
809 | #ifdef CONFIG_RANDOMIZE_BASE | ||
810 | BUILD_BUG_ON(CONFIG_RANDOMIZE_BASE_MAX_OFFSET > KERNEL_IMAGE_SIZE); | ||
811 | #endif | ||
809 | 812 | ||
810 | #ifdef CONFIG_HIGHMEM | 813 | #ifdef CONFIG_HIGHMEM |
811 | BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START); | 814 | BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START); |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 104d56a9245f..f35c66c5959a 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -643,7 +643,7 @@ kernel_physical_mapping_init(unsigned long start, | |||
643 | #ifndef CONFIG_NUMA | 643 | #ifndef CONFIG_NUMA |
644 | void __init initmem_init(void) | 644 | void __init initmem_init(void) |
645 | { | 645 | { |
646 | memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0); | 646 | memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0); |
647 | } | 647 | } |
648 | #endif | 648 | #endif |
649 | 649 | ||
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index e5d5e2ce9f77..637ab34ed632 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c | |||
@@ -11,7 +11,6 @@ | |||
11 | #include <linux/rculist.h> | 11 | #include <linux/rculist.h> |
12 | #include <linux/spinlock.h> | 12 | #include <linux/spinlock.h> |
13 | #include <linux/hash.h> | 13 | #include <linux/hash.h> |
14 | #include <linux/init.h> | ||
15 | #include <linux/module.h> | 14 | #include <linux/module.h> |
16 | #include <linux/kernel.h> | 15 | #include <linux/kernel.h> |
17 | #include <linux/uaccess.h> | 16 | #include <linux/uaccess.h> |
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c index 8dabbed409ee..1e9da795767a 100644 --- a/arch/x86/mm/memtest.c +++ b/arch/x86/mm/memtest.c | |||
@@ -74,7 +74,7 @@ static void __init do_one_pass(u64 pattern, u64 start, u64 end) | |||
74 | u64 i; | 74 | u64 i; |
75 | phys_addr_t this_start, this_end; | 75 | phys_addr_t this_start, this_end; |
76 | 76 | ||
77 | for_each_free_mem_range(i, MAX_NUMNODES, &this_start, &this_end, NULL) { | 77 | for_each_free_mem_range(i, NUMA_NO_NODE, &this_start, &this_end, NULL) { |
78 | this_start = clamp_t(phys_addr_t, this_start, start, end); | 78 | this_start = clamp_t(phys_addr_t, this_start, start, end); |
79 | this_end = clamp_t(phys_addr_t, this_end, start, end); | 79 | this_end = clamp_t(phys_addr_t, this_end, start, end); |
80 | if (this_start < this_end) { | 80 | if (this_start < this_end) { |
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 24aec58d6afd..81b2750f3666 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c | |||
@@ -211,9 +211,13 @@ static void __init setup_node_data(int nid, u64 start, u64 end) | |||
211 | */ | 211 | */ |
212 | nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid); | 212 | nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid); |
213 | if (!nd_pa) { | 213 | if (!nd_pa) { |
214 | pr_err("Cannot find %zu bytes in node %d\n", | 214 | nd_pa = __memblock_alloc_base(nd_size, SMP_CACHE_BYTES, |
215 | nd_size, nid); | 215 | MEMBLOCK_ALLOC_ACCESSIBLE); |
216 | return; | 216 | if (!nd_pa) { |
217 | pr_err("Cannot find %zu bytes in node %d\n", | ||
218 | nd_size, nid); | ||
219 | return; | ||
220 | } | ||
217 | } | 221 | } |
218 | nd = __va(nd_pa); | 222 | nd = __va(nd_pa); |
219 | 223 | ||
@@ -487,7 +491,16 @@ static int __init numa_register_memblks(struct numa_meminfo *mi) | |||
487 | 491 | ||
488 | for (i = 0; i < mi->nr_blks; i++) { | 492 | for (i = 0; i < mi->nr_blks; i++) { |
489 | struct numa_memblk *mb = &mi->blk[i]; | 493 | struct numa_memblk *mb = &mi->blk[i]; |
490 | memblock_set_node(mb->start, mb->end - mb->start, mb->nid); | 494 | memblock_set_node(mb->start, mb->end - mb->start, |
495 | &memblock.memory, mb->nid); | ||
496 | |||
497 | /* | ||
498 | * At this time, all memory regions reserved by memblock are | ||
499 | * used by the kernel. Set the nid in memblock.reserved will | ||
500 | * mark out all the nodes the kernel resides in. | ||
501 | */ | ||
502 | memblock_set_node(mb->start, mb->end - mb->start, | ||
503 | &memblock.reserved, mb->nid); | ||
491 | } | 504 | } |
492 | 505 | ||
493 | /* | 506 | /* |
@@ -549,6 +562,30 @@ static void __init numa_init_array(void) | |||
549 | } | 562 | } |
550 | } | 563 | } |
551 | 564 | ||
565 | static void __init numa_clear_kernel_node_hotplug(void) | ||
566 | { | ||
567 | int i, nid; | ||
568 | nodemask_t numa_kernel_nodes; | ||
569 | unsigned long start, end; | ||
570 | struct memblock_type *type = &memblock.reserved; | ||
571 | |||
572 | /* Mark all kernel nodes. */ | ||
573 | for (i = 0; i < type->cnt; i++) | ||
574 | node_set(type->regions[i].nid, numa_kernel_nodes); | ||
575 | |||
576 | /* Clear MEMBLOCK_HOTPLUG flag for memory in kernel nodes. */ | ||
577 | for (i = 0; i < numa_meminfo.nr_blks; i++) { | ||
578 | nid = numa_meminfo.blk[i].nid; | ||
579 | if (!node_isset(nid, numa_kernel_nodes)) | ||
580 | continue; | ||
581 | |||
582 | start = numa_meminfo.blk[i].start; | ||
583 | end = numa_meminfo.blk[i].end; | ||
584 | |||
585 | memblock_clear_hotplug(start, end - start); | ||
586 | } | ||
587 | } | ||
588 | |||
552 | static int __init numa_init(int (*init_func)(void)) | 589 | static int __init numa_init(int (*init_func)(void)) |
553 | { | 590 | { |
554 | int i; | 591 | int i; |
@@ -561,7 +598,12 @@ static int __init numa_init(int (*init_func)(void)) | |||
561 | nodes_clear(node_possible_map); | 598 | nodes_clear(node_possible_map); |
562 | nodes_clear(node_online_map); | 599 | nodes_clear(node_online_map); |
563 | memset(&numa_meminfo, 0, sizeof(numa_meminfo)); | 600 | memset(&numa_meminfo, 0, sizeof(numa_meminfo)); |
564 | WARN_ON(memblock_set_node(0, ULLONG_MAX, MAX_NUMNODES)); | 601 | WARN_ON(memblock_set_node(0, ULLONG_MAX, &memblock.memory, |
602 | MAX_NUMNODES)); | ||
603 | WARN_ON(memblock_set_node(0, ULLONG_MAX, &memblock.reserved, | ||
604 | MAX_NUMNODES)); | ||
605 | /* In case that parsing SRAT failed. */ | ||
606 | WARN_ON(memblock_clear_hotplug(0, ULLONG_MAX)); | ||
565 | numa_reset_distance(); | 607 | numa_reset_distance(); |
566 | 608 | ||
567 | ret = init_func(); | 609 | ret = init_func(); |
@@ -597,6 +639,16 @@ static int __init numa_init(int (*init_func)(void)) | |||
597 | numa_clear_node(i); | 639 | numa_clear_node(i); |
598 | } | 640 | } |
599 | numa_init_array(); | 641 | numa_init_array(); |
642 | |||
643 | /* | ||
644 | * At very early time, the kernel have to use some memory such as | ||
645 | * loading the kernel image. We cannot prevent this anyway. So any | ||
646 | * node the kernel resides in should be un-hotpluggable. | ||
647 | * | ||
648 | * And when we come here, numa_init() won't fail. | ||
649 | */ | ||
650 | numa_clear_kernel_node_hotplug(); | ||
651 | |||
600 | return 0; | 652 | return 0; |
601 | } | 653 | } |
602 | 654 | ||
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c index d0b1773d9d2e..461bc8289024 100644 --- a/arch/x86/mm/pageattr-test.c +++ b/arch/x86/mm/pageattr-test.c | |||
@@ -8,7 +8,6 @@ | |||
8 | #include <linux/kthread.h> | 8 | #include <linux/kthread.h> |
9 | #include <linux/random.h> | 9 | #include <linux/random.h> |
10 | #include <linux/kernel.h> | 10 | #include <linux/kernel.h> |
11 | #include <linux/init.h> | ||
12 | #include <linux/mm.h> | 11 | #include <linux/mm.h> |
13 | 12 | ||
14 | #include <asm/cacheflush.h> | 13 | #include <asm/cacheflush.h> |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index bb32480c2d71..b3b19f46c016 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -30,6 +30,7 @@ | |||
30 | */ | 30 | */ |
31 | struct cpa_data { | 31 | struct cpa_data { |
32 | unsigned long *vaddr; | 32 | unsigned long *vaddr; |
33 | pgd_t *pgd; | ||
33 | pgprot_t mask_set; | 34 | pgprot_t mask_set; |
34 | pgprot_t mask_clr; | 35 | pgprot_t mask_clr; |
35 | int numpages; | 36 | int numpages; |
@@ -322,17 +323,9 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, | |||
322 | return prot; | 323 | return prot; |
323 | } | 324 | } |
324 | 325 | ||
325 | /* | 326 | static pte_t *__lookup_address_in_pgd(pgd_t *pgd, unsigned long address, |
326 | * Lookup the page table entry for a virtual address. Return a pointer | 327 | unsigned int *level) |
327 | * to the entry and the level of the mapping. | ||
328 | * | ||
329 | * Note: We return pud and pmd either when the entry is marked large | ||
330 | * or when the present bit is not set. Otherwise we would return a | ||
331 | * pointer to a nonexisting mapping. | ||
332 | */ | ||
333 | pte_t *lookup_address(unsigned long address, unsigned int *level) | ||
334 | { | 328 | { |
335 | pgd_t *pgd = pgd_offset_k(address); | ||
336 | pud_t *pud; | 329 | pud_t *pud; |
337 | pmd_t *pmd; | 330 | pmd_t *pmd; |
338 | 331 | ||
@@ -361,8 +354,31 @@ pte_t *lookup_address(unsigned long address, unsigned int *level) | |||
361 | 354 | ||
362 | return pte_offset_kernel(pmd, address); | 355 | return pte_offset_kernel(pmd, address); |
363 | } | 356 | } |
357 | |||
358 | /* | ||
359 | * Lookup the page table entry for a virtual address. Return a pointer | ||
360 | * to the entry and the level of the mapping. | ||
361 | * | ||
362 | * Note: We return pud and pmd either when the entry is marked large | ||
363 | * or when the present bit is not set. Otherwise we would return a | ||
364 | * pointer to a nonexisting mapping. | ||
365 | */ | ||
366 | pte_t *lookup_address(unsigned long address, unsigned int *level) | ||
367 | { | ||
368 | return __lookup_address_in_pgd(pgd_offset_k(address), address, level); | ||
369 | } | ||
364 | EXPORT_SYMBOL_GPL(lookup_address); | 370 | EXPORT_SYMBOL_GPL(lookup_address); |
365 | 371 | ||
372 | static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address, | ||
373 | unsigned int *level) | ||
374 | { | ||
375 | if (cpa->pgd) | ||
376 | return __lookup_address_in_pgd(cpa->pgd + pgd_index(address), | ||
377 | address, level); | ||
378 | |||
379 | return lookup_address(address, level); | ||
380 | } | ||
381 | |||
366 | /* | 382 | /* |
367 | * This is necessary because __pa() does not work on some | 383 | * This is necessary because __pa() does not work on some |
368 | * kinds of memory, like vmalloc() or the alloc_remap() | 384 | * kinds of memory, like vmalloc() or the alloc_remap() |
@@ -437,7 +453,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, | |||
437 | * Check for races, another CPU might have split this page | 453 | * Check for races, another CPU might have split this page |
438 | * up already: | 454 | * up already: |
439 | */ | 455 | */ |
440 | tmp = lookup_address(address, &level); | 456 | tmp = _lookup_address_cpa(cpa, address, &level); |
441 | if (tmp != kpte) | 457 | if (tmp != kpte) |
442 | goto out_unlock; | 458 | goto out_unlock; |
443 | 459 | ||
@@ -543,7 +559,8 @@ out_unlock: | |||
543 | } | 559 | } |
544 | 560 | ||
545 | static int | 561 | static int |
546 | __split_large_page(pte_t *kpte, unsigned long address, struct page *base) | 562 | __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address, |
563 | struct page *base) | ||
547 | { | 564 | { |
548 | pte_t *pbase = (pte_t *)page_address(base); | 565 | pte_t *pbase = (pte_t *)page_address(base); |
549 | unsigned long pfn, pfninc = 1; | 566 | unsigned long pfn, pfninc = 1; |
@@ -556,7 +573,7 @@ __split_large_page(pte_t *kpte, unsigned long address, struct page *base) | |||
556 | * Check for races, another CPU might have split this page | 573 | * Check for races, another CPU might have split this page |
557 | * up for us already: | 574 | * up for us already: |
558 | */ | 575 | */ |
559 | tmp = lookup_address(address, &level); | 576 | tmp = _lookup_address_cpa(cpa, address, &level); |
560 | if (tmp != kpte) { | 577 | if (tmp != kpte) { |
561 | spin_unlock(&pgd_lock); | 578 | spin_unlock(&pgd_lock); |
562 | return 1; | 579 | return 1; |
@@ -632,7 +649,8 @@ __split_large_page(pte_t *kpte, unsigned long address, struct page *base) | |||
632 | return 0; | 649 | return 0; |
633 | } | 650 | } |
634 | 651 | ||
635 | static int split_large_page(pte_t *kpte, unsigned long address) | 652 | static int split_large_page(struct cpa_data *cpa, pte_t *kpte, |
653 | unsigned long address) | ||
636 | { | 654 | { |
637 | struct page *base; | 655 | struct page *base; |
638 | 656 | ||
@@ -644,15 +662,390 @@ static int split_large_page(pte_t *kpte, unsigned long address) | |||
644 | if (!base) | 662 | if (!base) |
645 | return -ENOMEM; | 663 | return -ENOMEM; |
646 | 664 | ||
647 | if (__split_large_page(kpte, address, base)) | 665 | if (__split_large_page(cpa, kpte, address, base)) |
648 | __free_page(base); | 666 | __free_page(base); |
649 | 667 | ||
650 | return 0; | 668 | return 0; |
651 | } | 669 | } |
652 | 670 | ||
671 | static bool try_to_free_pte_page(pte_t *pte) | ||
672 | { | ||
673 | int i; | ||
674 | |||
675 | for (i = 0; i < PTRS_PER_PTE; i++) | ||
676 | if (!pte_none(pte[i])) | ||
677 | return false; | ||
678 | |||
679 | free_page((unsigned long)pte); | ||
680 | return true; | ||
681 | } | ||
682 | |||
683 | static bool try_to_free_pmd_page(pmd_t *pmd) | ||
684 | { | ||
685 | int i; | ||
686 | |||
687 | for (i = 0; i < PTRS_PER_PMD; i++) | ||
688 | if (!pmd_none(pmd[i])) | ||
689 | return false; | ||
690 | |||
691 | free_page((unsigned long)pmd); | ||
692 | return true; | ||
693 | } | ||
694 | |||
695 | static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end) | ||
696 | { | ||
697 | pte_t *pte = pte_offset_kernel(pmd, start); | ||
698 | |||
699 | while (start < end) { | ||
700 | set_pte(pte, __pte(0)); | ||
701 | |||
702 | start += PAGE_SIZE; | ||
703 | pte++; | ||
704 | } | ||
705 | |||
706 | if (try_to_free_pte_page((pte_t *)pmd_page_vaddr(*pmd))) { | ||
707 | pmd_clear(pmd); | ||
708 | return true; | ||
709 | } | ||
710 | return false; | ||
711 | } | ||
712 | |||
713 | static void __unmap_pmd_range(pud_t *pud, pmd_t *pmd, | ||
714 | unsigned long start, unsigned long end) | ||
715 | { | ||
716 | if (unmap_pte_range(pmd, start, end)) | ||
717 | if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud))) | ||
718 | pud_clear(pud); | ||
719 | } | ||
720 | |||
721 | static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end) | ||
722 | { | ||
723 | pmd_t *pmd = pmd_offset(pud, start); | ||
724 | |||
725 | /* | ||
726 | * Not on a 2MB page boundary? | ||
727 | */ | ||
728 | if (start & (PMD_SIZE - 1)) { | ||
729 | unsigned long next_page = (start + PMD_SIZE) & PMD_MASK; | ||
730 | unsigned long pre_end = min_t(unsigned long, end, next_page); | ||
731 | |||
732 | __unmap_pmd_range(pud, pmd, start, pre_end); | ||
733 | |||
734 | start = pre_end; | ||
735 | pmd++; | ||
736 | } | ||
737 | |||
738 | /* | ||
739 | * Try to unmap in 2M chunks. | ||
740 | */ | ||
741 | while (end - start >= PMD_SIZE) { | ||
742 | if (pmd_large(*pmd)) | ||
743 | pmd_clear(pmd); | ||
744 | else | ||
745 | __unmap_pmd_range(pud, pmd, start, start + PMD_SIZE); | ||
746 | |||
747 | start += PMD_SIZE; | ||
748 | pmd++; | ||
749 | } | ||
750 | |||
751 | /* | ||
752 | * 4K leftovers? | ||
753 | */ | ||
754 | if (start < end) | ||
755 | return __unmap_pmd_range(pud, pmd, start, end); | ||
756 | |||
757 | /* | ||
758 | * Try again to free the PMD page if haven't succeeded above. | ||
759 | */ | ||
760 | if (!pud_none(*pud)) | ||
761 | if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud))) | ||
762 | pud_clear(pud); | ||
763 | } | ||
764 | |||
765 | static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) | ||
766 | { | ||
767 | pud_t *pud = pud_offset(pgd, start); | ||
768 | |||
769 | /* | ||
770 | * Not on a GB page boundary? | ||
771 | */ | ||
772 | if (start & (PUD_SIZE - 1)) { | ||
773 | unsigned long next_page = (start + PUD_SIZE) & PUD_MASK; | ||
774 | unsigned long pre_end = min_t(unsigned long, end, next_page); | ||
775 | |||
776 | unmap_pmd_range(pud, start, pre_end); | ||
777 | |||
778 | start = pre_end; | ||
779 | pud++; | ||
780 | } | ||
781 | |||
782 | /* | ||
783 | * Try to unmap in 1G chunks? | ||
784 | */ | ||
785 | while (end - start >= PUD_SIZE) { | ||
786 | |||
787 | if (pud_large(*pud)) | ||
788 | pud_clear(pud); | ||
789 | else | ||
790 | unmap_pmd_range(pud, start, start + PUD_SIZE); | ||
791 | |||
792 | start += PUD_SIZE; | ||
793 | pud++; | ||
794 | } | ||
795 | |||
796 | /* | ||
797 | * 2M leftovers? | ||
798 | */ | ||
799 | if (start < end) | ||
800 | unmap_pmd_range(pud, start, end); | ||
801 | |||
802 | /* | ||
803 | * No need to try to free the PUD page because we'll free it in | ||
804 | * populate_pgd's error path | ||
805 | */ | ||
806 | } | ||
807 | |||
808 | static int alloc_pte_page(pmd_t *pmd) | ||
809 | { | ||
810 | pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); | ||
811 | if (!pte) | ||
812 | return -1; | ||
813 | |||
814 | set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE)); | ||
815 | return 0; | ||
816 | } | ||
817 | |||
818 | static int alloc_pmd_page(pud_t *pud) | ||
819 | { | ||
820 | pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); | ||
821 | if (!pmd) | ||
822 | return -1; | ||
823 | |||
824 | set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); | ||
825 | return 0; | ||
826 | } | ||
827 | |||
828 | static void populate_pte(struct cpa_data *cpa, | ||
829 | unsigned long start, unsigned long end, | ||
830 | unsigned num_pages, pmd_t *pmd, pgprot_t pgprot) | ||
831 | { | ||
832 | pte_t *pte; | ||
833 | |||
834 | pte = pte_offset_kernel(pmd, start); | ||
835 | |||
836 | while (num_pages-- && start < end) { | ||
837 | |||
838 | /* deal with the NX bit */ | ||
839 | if (!(pgprot_val(pgprot) & _PAGE_NX)) | ||
840 | cpa->pfn &= ~_PAGE_NX; | ||
841 | |||
842 | set_pte(pte, pfn_pte(cpa->pfn >> PAGE_SHIFT, pgprot)); | ||
843 | |||
844 | start += PAGE_SIZE; | ||
845 | cpa->pfn += PAGE_SIZE; | ||
846 | pte++; | ||
847 | } | ||
848 | } | ||
849 | |||
850 | static int populate_pmd(struct cpa_data *cpa, | ||
851 | unsigned long start, unsigned long end, | ||
852 | unsigned num_pages, pud_t *pud, pgprot_t pgprot) | ||
853 | { | ||
854 | unsigned int cur_pages = 0; | ||
855 | pmd_t *pmd; | ||
856 | |||
857 | /* | ||
858 | * Not on a 2M boundary? | ||
859 | */ | ||
860 | if (start & (PMD_SIZE - 1)) { | ||
861 | unsigned long pre_end = start + (num_pages << PAGE_SHIFT); | ||
862 | unsigned long next_page = (start + PMD_SIZE) & PMD_MASK; | ||
863 | |||
864 | pre_end = min_t(unsigned long, pre_end, next_page); | ||
865 | cur_pages = (pre_end - start) >> PAGE_SHIFT; | ||
866 | cur_pages = min_t(unsigned int, num_pages, cur_pages); | ||
867 | |||
868 | /* | ||
869 | * Need a PTE page? | ||
870 | */ | ||
871 | pmd = pmd_offset(pud, start); | ||
872 | if (pmd_none(*pmd)) | ||
873 | if (alloc_pte_page(pmd)) | ||
874 | return -1; | ||
875 | |||
876 | populate_pte(cpa, start, pre_end, cur_pages, pmd, pgprot); | ||
877 | |||
878 | start = pre_end; | ||
879 | } | ||
880 | |||
881 | /* | ||
882 | * We mapped them all? | ||
883 | */ | ||
884 | if (num_pages == cur_pages) | ||
885 | return cur_pages; | ||
886 | |||
887 | while (end - start >= PMD_SIZE) { | ||
888 | |||
889 | /* | ||
890 | * We cannot use a 1G page so allocate a PMD page if needed. | ||
891 | */ | ||
892 | if (pud_none(*pud)) | ||
893 | if (alloc_pmd_page(pud)) | ||
894 | return -1; | ||
895 | |||
896 | pmd = pmd_offset(pud, start); | ||
897 | |||
898 | set_pmd(pmd, __pmd(cpa->pfn | _PAGE_PSE | massage_pgprot(pgprot))); | ||
899 | |||
900 | start += PMD_SIZE; | ||
901 | cpa->pfn += PMD_SIZE; | ||
902 | cur_pages += PMD_SIZE >> PAGE_SHIFT; | ||
903 | } | ||
904 | |||
905 | /* | ||
906 | * Map trailing 4K pages. | ||
907 | */ | ||
908 | if (start < end) { | ||
909 | pmd = pmd_offset(pud, start); | ||
910 | if (pmd_none(*pmd)) | ||
911 | if (alloc_pte_page(pmd)) | ||
912 | return -1; | ||
913 | |||
914 | populate_pte(cpa, start, end, num_pages - cur_pages, | ||
915 | pmd, pgprot); | ||
916 | } | ||
917 | return num_pages; | ||
918 | } | ||
919 | |||
920 | static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, | ||
921 | pgprot_t pgprot) | ||
922 | { | ||
923 | pud_t *pud; | ||
924 | unsigned long end; | ||
925 | int cur_pages = 0; | ||
926 | |||
927 | end = start + (cpa->numpages << PAGE_SHIFT); | ||
928 | |||
929 | /* | ||
930 | * Not on a Gb page boundary? => map everything up to it with | ||
931 | * smaller pages. | ||
932 | */ | ||
933 | if (start & (PUD_SIZE - 1)) { | ||
934 | unsigned long pre_end; | ||
935 | unsigned long next_page = (start + PUD_SIZE) & PUD_MASK; | ||
936 | |||
937 | pre_end = min_t(unsigned long, end, next_page); | ||
938 | cur_pages = (pre_end - start) >> PAGE_SHIFT; | ||
939 | cur_pages = min_t(int, (int)cpa->numpages, cur_pages); | ||
940 | |||
941 | pud = pud_offset(pgd, start); | ||
942 | |||
943 | /* | ||
944 | * Need a PMD page? | ||
945 | */ | ||
946 | if (pud_none(*pud)) | ||
947 | if (alloc_pmd_page(pud)) | ||
948 | return -1; | ||
949 | |||
950 | cur_pages = populate_pmd(cpa, start, pre_end, cur_pages, | ||
951 | pud, pgprot); | ||
952 | if (cur_pages < 0) | ||
953 | return cur_pages; | ||
954 | |||
955 | start = pre_end; | ||
956 | } | ||
957 | |||
958 | /* We mapped them all? */ | ||
959 | if (cpa->numpages == cur_pages) | ||
960 | return cur_pages; | ||
961 | |||
962 | pud = pud_offset(pgd, start); | ||
963 | |||
964 | /* | ||
965 | * Map everything starting from the Gb boundary, possibly with 1G pages | ||
966 | */ | ||
967 | while (end - start >= PUD_SIZE) { | ||
968 | set_pud(pud, __pud(cpa->pfn | _PAGE_PSE | massage_pgprot(pgprot))); | ||
969 | |||
970 | start += PUD_SIZE; | ||
971 | cpa->pfn += PUD_SIZE; | ||
972 | cur_pages += PUD_SIZE >> PAGE_SHIFT; | ||
973 | pud++; | ||
974 | } | ||
975 | |||
976 | /* Map trailing leftover */ | ||
977 | if (start < end) { | ||
978 | int tmp; | ||
979 | |||
980 | pud = pud_offset(pgd, start); | ||
981 | if (pud_none(*pud)) | ||
982 | if (alloc_pmd_page(pud)) | ||
983 | return -1; | ||
984 | |||
985 | tmp = populate_pmd(cpa, start, end, cpa->numpages - cur_pages, | ||
986 | pud, pgprot); | ||
987 | if (tmp < 0) | ||
988 | return cur_pages; | ||
989 | |||
990 | cur_pages += tmp; | ||
991 | } | ||
992 | return cur_pages; | ||
993 | } | ||
994 | |||
995 | /* | ||
996 | * Restrictions for kernel page table do not necessarily apply when mapping in | ||
997 | * an alternate PGD. | ||
998 | */ | ||
999 | static int populate_pgd(struct cpa_data *cpa, unsigned long addr) | ||
1000 | { | ||
1001 | pgprot_t pgprot = __pgprot(_KERNPG_TABLE); | ||
1002 | bool allocd_pgd = false; | ||
1003 | pgd_t *pgd_entry; | ||
1004 | pud_t *pud = NULL; /* shut up gcc */ | ||
1005 | int ret; | ||
1006 | |||
1007 | pgd_entry = cpa->pgd + pgd_index(addr); | ||
1008 | |||
1009 | /* | ||
1010 | * Allocate a PUD page and hand it down for mapping. | ||
1011 | */ | ||
1012 | if (pgd_none(*pgd_entry)) { | ||
1013 | pud = (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); | ||
1014 | if (!pud) | ||
1015 | return -1; | ||
1016 | |||
1017 | set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE)); | ||
1018 | allocd_pgd = true; | ||
1019 | } | ||
1020 | |||
1021 | pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr); | ||
1022 | pgprot_val(pgprot) |= pgprot_val(cpa->mask_set); | ||
1023 | |||
1024 | ret = populate_pud(cpa, addr, pgd_entry, pgprot); | ||
1025 | if (ret < 0) { | ||
1026 | unmap_pud_range(pgd_entry, addr, | ||
1027 | addr + (cpa->numpages << PAGE_SHIFT)); | ||
1028 | |||
1029 | if (allocd_pgd) { | ||
1030 | /* | ||
1031 | * If I allocated this PUD page, I can just as well | ||
1032 | * free it in this error path. | ||
1033 | */ | ||
1034 | pgd_clear(pgd_entry); | ||
1035 | free_page((unsigned long)pud); | ||
1036 | } | ||
1037 | return ret; | ||
1038 | } | ||
1039 | cpa->numpages = ret; | ||
1040 | return 0; | ||
1041 | } | ||
1042 | |||
653 | static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr, | 1043 | static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr, |
654 | int primary) | 1044 | int primary) |
655 | { | 1045 | { |
1046 | if (cpa->pgd) | ||
1047 | return populate_pgd(cpa, vaddr); | ||
1048 | |||
656 | /* | 1049 | /* |
657 | * Ignore all non primary paths. | 1050 | * Ignore all non primary paths. |
658 | */ | 1051 | */ |
@@ -697,7 +1090,7 @@ static int __change_page_attr(struct cpa_data *cpa, int primary) | |||
697 | else | 1090 | else |
698 | address = *cpa->vaddr; | 1091 | address = *cpa->vaddr; |
699 | repeat: | 1092 | repeat: |
700 | kpte = lookup_address(address, &level); | 1093 | kpte = _lookup_address_cpa(cpa, address, &level); |
701 | if (!kpte) | 1094 | if (!kpte) |
702 | return __cpa_process_fault(cpa, address, primary); | 1095 | return __cpa_process_fault(cpa, address, primary); |
703 | 1096 | ||
@@ -761,7 +1154,7 @@ repeat: | |||
761 | /* | 1154 | /* |
762 | * We have to split the large page: | 1155 | * We have to split the large page: |
763 | */ | 1156 | */ |
764 | err = split_large_page(kpte, address); | 1157 | err = split_large_page(cpa, kpte, address); |
765 | if (!err) { | 1158 | if (!err) { |
766 | /* | 1159 | /* |
767 | * Do a global flush tlb after splitting the large page | 1160 | * Do a global flush tlb after splitting the large page |
@@ -910,6 +1303,8 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
910 | int ret, cache, checkalias; | 1303 | int ret, cache, checkalias; |
911 | unsigned long baddr = 0; | 1304 | unsigned long baddr = 0; |
912 | 1305 | ||
1306 | memset(&cpa, 0, sizeof(cpa)); | ||
1307 | |||
913 | /* | 1308 | /* |
914 | * Check, if we are requested to change a not supported | 1309 | * Check, if we are requested to change a not supported |
915 | * feature: | 1310 | * feature: |
@@ -1356,6 +1751,7 @@ static int __set_pages_p(struct page *page, int numpages) | |||
1356 | { | 1751 | { |
1357 | unsigned long tempaddr = (unsigned long) page_address(page); | 1752 | unsigned long tempaddr = (unsigned long) page_address(page); |
1358 | struct cpa_data cpa = { .vaddr = &tempaddr, | 1753 | struct cpa_data cpa = { .vaddr = &tempaddr, |
1754 | .pgd = NULL, | ||
1359 | .numpages = numpages, | 1755 | .numpages = numpages, |
1360 | .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW), | 1756 | .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW), |
1361 | .mask_clr = __pgprot(0), | 1757 | .mask_clr = __pgprot(0), |
@@ -1374,6 +1770,7 @@ static int __set_pages_np(struct page *page, int numpages) | |||
1374 | { | 1770 | { |
1375 | unsigned long tempaddr = (unsigned long) page_address(page); | 1771 | unsigned long tempaddr = (unsigned long) page_address(page); |
1376 | struct cpa_data cpa = { .vaddr = &tempaddr, | 1772 | struct cpa_data cpa = { .vaddr = &tempaddr, |
1773 | .pgd = NULL, | ||
1377 | .numpages = numpages, | 1774 | .numpages = numpages, |
1378 | .mask_set = __pgprot(0), | 1775 | .mask_set = __pgprot(0), |
1379 | .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW), | 1776 | .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW), |
@@ -1434,6 +1831,36 @@ bool kernel_page_present(struct page *page) | |||
1434 | 1831 | ||
1435 | #endif /* CONFIG_DEBUG_PAGEALLOC */ | 1832 | #endif /* CONFIG_DEBUG_PAGEALLOC */ |
1436 | 1833 | ||
1834 | int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, | ||
1835 | unsigned numpages, unsigned long page_flags) | ||
1836 | { | ||
1837 | int retval = -EINVAL; | ||
1838 | |||
1839 | struct cpa_data cpa = { | ||
1840 | .vaddr = &address, | ||
1841 | .pfn = pfn, | ||
1842 | .pgd = pgd, | ||
1843 | .numpages = numpages, | ||
1844 | .mask_set = __pgprot(0), | ||
1845 | .mask_clr = __pgprot(0), | ||
1846 | .flags = 0, | ||
1847 | }; | ||
1848 | |||
1849 | if (!(__supported_pte_mask & _PAGE_NX)) | ||
1850 | goto out; | ||
1851 | |||
1852 | if (!(page_flags & _PAGE_NX)) | ||
1853 | cpa.mask_clr = __pgprot(_PAGE_NX); | ||
1854 | |||
1855 | cpa.mask_set = __pgprot(_PAGE_PRESENT | page_flags); | ||
1856 | |||
1857 | retval = __change_page_attr_set_clr(&cpa, 0); | ||
1858 | __flush_tlb_all(); | ||
1859 | |||
1860 | out: | ||
1861 | return retval; | ||
1862 | } | ||
1863 | |||
1437 | /* | 1864 | /* |
1438 | * The testcases use internal knowledge of the implementation that shouldn't | 1865 | * The testcases use internal knowledge of the implementation that shouldn't |
1439 | * be exposed to the rest of the kernel. Include these directly here. | 1866 | * be exposed to the rest of the kernel. Include these directly here. |
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index 266ca912f62e..1a25187e151e 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c | |||
@@ -181,6 +181,11 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) | |||
181 | (unsigned long long) start, (unsigned long long) end - 1, | 181 | (unsigned long long) start, (unsigned long long) end - 1, |
182 | hotpluggable ? " hotplug" : ""); | 182 | hotpluggable ? " hotplug" : ""); |
183 | 183 | ||
184 | /* Mark hotplug range in memblock. */ | ||
185 | if (hotpluggable && memblock_mark_hotplug(start, ma->length)) | ||
186 | pr_warn("SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock\n", | ||
187 | (unsigned long long)start, (unsigned long long)end - 1); | ||
188 | |||
184 | return 0; | 189 | return 0; |
185 | out_err_bad_srat: | 190 | out_err_bad_srat: |
186 | bad_srat(); | 191 | bad_srat(); |
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 26328e800869..4ed75dd81d05 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c | |||
@@ -359,15 +359,21 @@ void bpf_jit_compile(struct sk_filter *fp) | |||
359 | EMIT2(0x89, 0xd0); /* mov %edx,%eax */ | 359 | EMIT2(0x89, 0xd0); /* mov %edx,%eax */ |
360 | break; | 360 | break; |
361 | case BPF_S_ALU_MOD_K: /* A %= K; */ | 361 | case BPF_S_ALU_MOD_K: /* A %= K; */ |
362 | if (K == 1) { | ||
363 | CLEAR_A(); | ||
364 | break; | ||
365 | } | ||
362 | EMIT2(0x31, 0xd2); /* xor %edx,%edx */ | 366 | EMIT2(0x31, 0xd2); /* xor %edx,%edx */ |
363 | EMIT1(0xb9);EMIT(K, 4); /* mov imm32,%ecx */ | 367 | EMIT1(0xb9);EMIT(K, 4); /* mov imm32,%ecx */ |
364 | EMIT2(0xf7, 0xf1); /* div %ecx */ | 368 | EMIT2(0xf7, 0xf1); /* div %ecx */ |
365 | EMIT2(0x89, 0xd0); /* mov %edx,%eax */ | 369 | EMIT2(0x89, 0xd0); /* mov %edx,%eax */ |
366 | break; | 370 | break; |
367 | case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K); */ | 371 | case BPF_S_ALU_DIV_K: /* A /= K */ |
368 | EMIT3(0x48, 0x69, 0xc0); /* imul imm32,%rax,%rax */ | 372 | if (K == 1) |
369 | EMIT(K, 4); | 373 | break; |
370 | EMIT4(0x48, 0xc1, 0xe8, 0x20); /* shr $0x20,%rax */ | 374 | EMIT2(0x31, 0xd2); /* xor %edx,%edx */ |
375 | EMIT1(0xb9);EMIT(K, 4); /* mov imm32,%ecx */ | ||
376 | EMIT2(0xf7, 0xf1); /* div %ecx */ | ||
371 | break; | 377 | break; |
372 | case BPF_S_ALU_AND_X: | 378 | case BPF_S_ALU_AND_X: |
373 | seen |= SEEN_XREG; | 379 | seen |= SEEN_XREG; |
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index b046e070e088..bca9e85daaa5 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c | |||
@@ -5,7 +5,6 @@ | |||
5 | #include <linux/delay.h> | 5 | #include <linux/delay.h> |
6 | #include <linux/dmi.h> | 6 | #include <linux/dmi.h> |
7 | #include <linux/pci.h> | 7 | #include <linux/pci.h> |
8 | #include <linux/init.h> | ||
9 | #include <linux/vgaarb.h> | 8 | #include <linux/vgaarb.h> |
10 | #include <asm/pci_x86.h> | 9 | #include <asm/pci_x86.h> |
11 | 10 | ||
diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c index 51384ca727ad..84b9d672843d 100644 --- a/arch/x86/pci/intel_mid_pci.c +++ b/arch/x86/pci/intel_mid_pci.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <asm/pci_x86.h> | 31 | #include <asm/pci_x86.h> |
32 | #include <asm/hw_irq.h> | 32 | #include <asm/hw_irq.h> |
33 | #include <asm/io_apic.h> | 33 | #include <asm/io_apic.h> |
34 | #include <asm/intel-mid.h> | ||
34 | 35 | ||
35 | #define PCIE_CAP_OFFSET 0x100 | 36 | #define PCIE_CAP_OFFSET 0x100 |
36 | 37 | ||
@@ -219,7 +220,10 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev) | |||
219 | irq_attr.ioapic = mp_find_ioapic(dev->irq); | 220 | irq_attr.ioapic = mp_find_ioapic(dev->irq); |
220 | irq_attr.ioapic_pin = dev->irq; | 221 | irq_attr.ioapic_pin = dev->irq; |
221 | irq_attr.trigger = 1; /* level */ | 222 | irq_attr.trigger = 1; /* level */ |
222 | irq_attr.polarity = 1; /* active low */ | 223 | if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_TANGIER) |
224 | irq_attr.polarity = 0; /* active high */ | ||
225 | else | ||
226 | irq_attr.polarity = 1; /* active low */ | ||
223 | io_apic_set_pci_routing(&dev->dev, dev->irq, &irq_attr); | 227 | io_apic_set_pci_routing(&dev->dev, dev->irq, &irq_attr); |
224 | 228 | ||
225 | return 0; | 229 | return 0; |
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 5eee4959785d..103e702ec5a7 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c | |||
@@ -337,7 +337,7 @@ out: | |||
337 | return ret; | 337 | return ret; |
338 | } | 338 | } |
339 | 339 | ||
340 | static void xen_initdom_restore_msi_irqs(struct pci_dev *dev, int irq) | 340 | static void xen_initdom_restore_msi_irqs(struct pci_dev *dev) |
341 | { | 341 | { |
342 | int ret = 0; | 342 | int ret = 0; |
343 | 343 | ||
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index cceb813044ef..d62ec87a2b26 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c | |||
@@ -12,6 +12,8 @@ | |||
12 | * Bibo Mao <bibo.mao@intel.com> | 12 | * Bibo Mao <bibo.mao@intel.com> |
13 | * Chandramouli Narayanan <mouli@linux.intel.com> | 13 | * Chandramouli Narayanan <mouli@linux.intel.com> |
14 | * Huang Ying <ying.huang@intel.com> | 14 | * Huang Ying <ying.huang@intel.com> |
15 | * Copyright (C) 2013 SuSE Labs | ||
16 | * Borislav Petkov <bp@suse.de> - runtime services VA mapping | ||
15 | * | 17 | * |
16 | * Copied from efi_32.c to eliminate the duplicated code between EFI | 18 | * Copied from efi_32.c to eliminate the duplicated code between EFI |
17 | * 32/64 support code. --ying 2007-10-26 | 19 | * 32/64 support code. --ying 2007-10-26 |
@@ -51,7 +53,7 @@ | |||
51 | #include <asm/x86_init.h> | 53 | #include <asm/x86_init.h> |
52 | #include <asm/rtc.h> | 54 | #include <asm/rtc.h> |
53 | 55 | ||
54 | #define EFI_DEBUG 1 | 56 | #define EFI_DEBUG |
55 | 57 | ||
56 | #define EFI_MIN_RESERVE 5120 | 58 | #define EFI_MIN_RESERVE 5120 |
57 | 59 | ||
@@ -74,6 +76,8 @@ static __initdata efi_config_table_type_t arch_tables[] = { | |||
74 | {NULL_GUID, NULL, NULL}, | 76 | {NULL_GUID, NULL, NULL}, |
75 | }; | 77 | }; |
76 | 78 | ||
79 | u64 efi_setup; /* efi setup_data physical address */ | ||
80 | |||
77 | /* | 81 | /* |
78 | * Returns 1 if 'facility' is enabled, 0 otherwise. | 82 | * Returns 1 if 'facility' is enabled, 0 otherwise. |
79 | */ | 83 | */ |
@@ -110,7 +114,6 @@ static int __init setup_storage_paranoia(char *arg) | |||
110 | } | 114 | } |
111 | early_param("efi_no_storage_paranoia", setup_storage_paranoia); | 115 | early_param("efi_no_storage_paranoia", setup_storage_paranoia); |
112 | 116 | ||
113 | |||
114 | static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) | 117 | static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) |
115 | { | 118 | { |
116 | unsigned long flags; | 119 | unsigned long flags; |
@@ -398,9 +401,9 @@ int __init efi_memblock_x86_reserve_range(void) | |||
398 | return 0; | 401 | return 0; |
399 | } | 402 | } |
400 | 403 | ||
401 | #if EFI_DEBUG | ||
402 | static void __init print_efi_memmap(void) | 404 | static void __init print_efi_memmap(void) |
403 | { | 405 | { |
406 | #ifdef EFI_DEBUG | ||
404 | efi_memory_desc_t *md; | 407 | efi_memory_desc_t *md; |
405 | void *p; | 408 | void *p; |
406 | int i; | 409 | int i; |
@@ -415,8 +418,8 @@ static void __init print_efi_memmap(void) | |||
415 | md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), | 418 | md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), |
416 | (md->num_pages >> (20 - EFI_PAGE_SHIFT))); | 419 | (md->num_pages >> (20 - EFI_PAGE_SHIFT))); |
417 | } | 420 | } |
418 | } | ||
419 | #endif /* EFI_DEBUG */ | 421 | #endif /* EFI_DEBUG */ |
422 | } | ||
420 | 423 | ||
421 | void __init efi_reserve_boot_services(void) | 424 | void __init efi_reserve_boot_services(void) |
422 | { | 425 | { |
@@ -436,7 +439,7 @@ void __init efi_reserve_boot_services(void) | |||
436 | * - Not within any part of the kernel | 439 | * - Not within any part of the kernel |
437 | * - Not the bios reserved area | 440 | * - Not the bios reserved area |
438 | */ | 441 | */ |
439 | if ((start+size >= __pa_symbol(_text) | 442 | if ((start + size > __pa_symbol(_text) |
440 | && start <= __pa_symbol(_end)) || | 443 | && start <= __pa_symbol(_end)) || |
441 | !e820_all_mapped(start, start+size, E820_RAM) || | 444 | !e820_all_mapped(start, start+size, E820_RAM) || |
442 | memblock_is_region_reserved(start, size)) { | 445 | memblock_is_region_reserved(start, size)) { |
@@ -489,18 +492,27 @@ static int __init efi_systab_init(void *phys) | |||
489 | { | 492 | { |
490 | if (efi_enabled(EFI_64BIT)) { | 493 | if (efi_enabled(EFI_64BIT)) { |
491 | efi_system_table_64_t *systab64; | 494 | efi_system_table_64_t *systab64; |
495 | struct efi_setup_data *data = NULL; | ||
492 | u64 tmp = 0; | 496 | u64 tmp = 0; |
493 | 497 | ||
498 | if (efi_setup) { | ||
499 | data = early_memremap(efi_setup, sizeof(*data)); | ||
500 | if (!data) | ||
501 | return -ENOMEM; | ||
502 | } | ||
494 | systab64 = early_ioremap((unsigned long)phys, | 503 | systab64 = early_ioremap((unsigned long)phys, |
495 | sizeof(*systab64)); | 504 | sizeof(*systab64)); |
496 | if (systab64 == NULL) { | 505 | if (systab64 == NULL) { |
497 | pr_err("Couldn't map the system table!\n"); | 506 | pr_err("Couldn't map the system table!\n"); |
507 | if (data) | ||
508 | early_iounmap(data, sizeof(*data)); | ||
498 | return -ENOMEM; | 509 | return -ENOMEM; |
499 | } | 510 | } |
500 | 511 | ||
501 | efi_systab.hdr = systab64->hdr; | 512 | efi_systab.hdr = systab64->hdr; |
502 | efi_systab.fw_vendor = systab64->fw_vendor; | 513 | efi_systab.fw_vendor = data ? (unsigned long)data->fw_vendor : |
503 | tmp |= systab64->fw_vendor; | 514 | systab64->fw_vendor; |
515 | tmp |= data ? data->fw_vendor : systab64->fw_vendor; | ||
504 | efi_systab.fw_revision = systab64->fw_revision; | 516 | efi_systab.fw_revision = systab64->fw_revision; |
505 | efi_systab.con_in_handle = systab64->con_in_handle; | 517 | efi_systab.con_in_handle = systab64->con_in_handle; |
506 | tmp |= systab64->con_in_handle; | 518 | tmp |= systab64->con_in_handle; |
@@ -514,15 +526,20 @@ static int __init efi_systab_init(void *phys) | |||
514 | tmp |= systab64->stderr_handle; | 526 | tmp |= systab64->stderr_handle; |
515 | efi_systab.stderr = systab64->stderr; | 527 | efi_systab.stderr = systab64->stderr; |
516 | tmp |= systab64->stderr; | 528 | tmp |= systab64->stderr; |
517 | efi_systab.runtime = (void *)(unsigned long)systab64->runtime; | 529 | efi_systab.runtime = data ? |
518 | tmp |= systab64->runtime; | 530 | (void *)(unsigned long)data->runtime : |
531 | (void *)(unsigned long)systab64->runtime; | ||
532 | tmp |= data ? data->runtime : systab64->runtime; | ||
519 | efi_systab.boottime = (void *)(unsigned long)systab64->boottime; | 533 | efi_systab.boottime = (void *)(unsigned long)systab64->boottime; |
520 | tmp |= systab64->boottime; | 534 | tmp |= systab64->boottime; |
521 | efi_systab.nr_tables = systab64->nr_tables; | 535 | efi_systab.nr_tables = systab64->nr_tables; |
522 | efi_systab.tables = systab64->tables; | 536 | efi_systab.tables = data ? (unsigned long)data->tables : |
523 | tmp |= systab64->tables; | 537 | systab64->tables; |
538 | tmp |= data ? data->tables : systab64->tables; | ||
524 | 539 | ||
525 | early_iounmap(systab64, sizeof(*systab64)); | 540 | early_iounmap(systab64, sizeof(*systab64)); |
541 | if (data) | ||
542 | early_iounmap(data, sizeof(*data)); | ||
526 | #ifdef CONFIG_X86_32 | 543 | #ifdef CONFIG_X86_32 |
527 | if (tmp >> 32) { | 544 | if (tmp >> 32) { |
528 | pr_err("EFI data located above 4GB, disabling EFI.\n"); | 545 | pr_err("EFI data located above 4GB, disabling EFI.\n"); |
@@ -626,6 +643,62 @@ static int __init efi_memmap_init(void) | |||
626 | return 0; | 643 | return 0; |
627 | } | 644 | } |
628 | 645 | ||
646 | /* | ||
647 | * A number of config table entries get remapped to virtual addresses | ||
648 | * after entering EFI virtual mode. However, the kexec kernel requires | ||
649 | * their physical addresses therefore we pass them via setup_data and | ||
650 | * correct those entries to their respective physical addresses here. | ||
651 | * | ||
652 | * Currently only handles smbios which is necessary for some firmware | ||
653 | * implementation. | ||
654 | */ | ||
655 | static int __init efi_reuse_config(u64 tables, int nr_tables) | ||
656 | { | ||
657 | int i, sz, ret = 0; | ||
658 | void *p, *tablep; | ||
659 | struct efi_setup_data *data; | ||
660 | |||
661 | if (!efi_setup) | ||
662 | return 0; | ||
663 | |||
664 | if (!efi_enabled(EFI_64BIT)) | ||
665 | return 0; | ||
666 | |||
667 | data = early_memremap(efi_setup, sizeof(*data)); | ||
668 | if (!data) { | ||
669 | ret = -ENOMEM; | ||
670 | goto out; | ||
671 | } | ||
672 | |||
673 | if (!data->smbios) | ||
674 | goto out_memremap; | ||
675 | |||
676 | sz = sizeof(efi_config_table_64_t); | ||
677 | |||
678 | p = tablep = early_memremap(tables, nr_tables * sz); | ||
679 | if (!p) { | ||
680 | pr_err("Could not map Configuration table!\n"); | ||
681 | ret = -ENOMEM; | ||
682 | goto out_memremap; | ||
683 | } | ||
684 | |||
685 | for (i = 0; i < efi.systab->nr_tables; i++) { | ||
686 | efi_guid_t guid; | ||
687 | |||
688 | guid = ((efi_config_table_64_t *)p)->guid; | ||
689 | |||
690 | if (!efi_guidcmp(guid, SMBIOS_TABLE_GUID)) | ||
691 | ((efi_config_table_64_t *)p)->table = data->smbios; | ||
692 | p += sz; | ||
693 | } | ||
694 | early_iounmap(tablep, nr_tables * sz); | ||
695 | |||
696 | out_memremap: | ||
697 | early_iounmap(data, sizeof(*data)); | ||
698 | out: | ||
699 | return ret; | ||
700 | } | ||
701 | |||
629 | void __init efi_init(void) | 702 | void __init efi_init(void) |
630 | { | 703 | { |
631 | efi_char16_t *c16; | 704 | efi_char16_t *c16; |
@@ -651,6 +724,10 @@ void __init efi_init(void) | |||
651 | 724 | ||
652 | set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility); | 725 | set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility); |
653 | 726 | ||
727 | efi.config_table = (unsigned long)efi.systab->tables; | ||
728 | efi.fw_vendor = (unsigned long)efi.systab->fw_vendor; | ||
729 | efi.runtime = (unsigned long)efi.systab->runtime; | ||
730 | |||
654 | /* | 731 | /* |
655 | * Show what we know for posterity | 732 | * Show what we know for posterity |
656 | */ | 733 | */ |
@@ -667,6 +744,9 @@ void __init efi_init(void) | |||
667 | efi.systab->hdr.revision >> 16, | 744 | efi.systab->hdr.revision >> 16, |
668 | efi.systab->hdr.revision & 0xffff, vendor); | 745 | efi.systab->hdr.revision & 0xffff, vendor); |
669 | 746 | ||
747 | if (efi_reuse_config(efi.systab->tables, efi.systab->nr_tables)) | ||
748 | return; | ||
749 | |||
670 | if (efi_config_init(arch_tables)) | 750 | if (efi_config_init(arch_tables)) |
671 | return; | 751 | return; |
672 | 752 | ||
@@ -684,15 +764,12 @@ void __init efi_init(void) | |||
684 | return; | 764 | return; |
685 | set_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility); | 765 | set_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility); |
686 | } | 766 | } |
687 | |||
688 | if (efi_memmap_init()) | 767 | if (efi_memmap_init()) |
689 | return; | 768 | return; |
690 | 769 | ||
691 | set_bit(EFI_MEMMAP, &x86_efi_facility); | 770 | set_bit(EFI_MEMMAP, &x86_efi_facility); |
692 | 771 | ||
693 | #if EFI_DEBUG | ||
694 | print_efi_memmap(); | 772 | print_efi_memmap(); |
695 | #endif | ||
696 | } | 773 | } |
697 | 774 | ||
698 | void __init efi_late_init(void) | 775 | void __init efi_late_init(void) |
@@ -741,36 +818,38 @@ void efi_memory_uc(u64 addr, unsigned long size) | |||
741 | set_memory_uc(addr, npages); | 818 | set_memory_uc(addr, npages); |
742 | } | 819 | } |
743 | 820 | ||
744 | /* | 821 | void __init old_map_region(efi_memory_desc_t *md) |
745 | * This function will switch the EFI runtime services to virtual mode. | ||
746 | * Essentially, look through the EFI memmap and map every region that | ||
747 | * has the runtime attribute bit set in its memory descriptor and update | ||
748 | * that memory descriptor with the virtual address obtained from ioremap(). | ||
749 | * This enables the runtime services to be called without having to | ||
750 | * thunk back into physical mode for every invocation. | ||
751 | */ | ||
752 | void __init efi_enter_virtual_mode(void) | ||
753 | { | 822 | { |
754 | efi_memory_desc_t *md, *prev_md = NULL; | 823 | u64 start_pfn, end_pfn, end; |
755 | efi_status_t status; | ||
756 | unsigned long size; | 824 | unsigned long size; |
757 | u64 end, systab, start_pfn, end_pfn; | 825 | void *va; |
758 | void *p, *va, *new_memmap = NULL; | ||
759 | int count = 0; | ||
760 | 826 | ||
761 | efi.systab = NULL; | 827 | start_pfn = PFN_DOWN(md->phys_addr); |
828 | size = md->num_pages << PAGE_SHIFT; | ||
829 | end = md->phys_addr + size; | ||
830 | end_pfn = PFN_UP(end); | ||
762 | 831 | ||
763 | /* | 832 | if (pfn_range_is_mapped(start_pfn, end_pfn)) { |
764 | * We don't do virtual mode, since we don't do runtime services, on | 833 | va = __va(md->phys_addr); |
765 | * non-native EFI | ||
766 | */ | ||
767 | 834 | ||
768 | if (!efi_is_native()) { | 835 | if (!(md->attribute & EFI_MEMORY_WB)) |
769 | efi_unmap_memmap(); | 836 | efi_memory_uc((u64)(unsigned long)va, size); |
770 | return; | 837 | } else |
771 | } | 838 | va = efi_ioremap(md->phys_addr, size, |
839 | md->type, md->attribute); | ||
840 | |||
841 | md->virt_addr = (u64) (unsigned long) va; | ||
842 | if (!va) | ||
843 | pr_err("ioremap of 0x%llX failed!\n", | ||
844 | (unsigned long long)md->phys_addr); | ||
845 | } | ||
846 | |||
847 | /* Merge contiguous regions of the same type and attribute */ | ||
848 | static void __init efi_merge_regions(void) | ||
849 | { | ||
850 | void *p; | ||
851 | efi_memory_desc_t *md, *prev_md = NULL; | ||
772 | 852 | ||
773 | /* Merge contiguous regions of the same type and attribute */ | ||
774 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | 853 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { |
775 | u64 prev_size; | 854 | u64 prev_size; |
776 | md = p; | 855 | md = p; |
@@ -796,6 +875,77 @@ void __init efi_enter_virtual_mode(void) | |||
796 | } | 875 | } |
797 | prev_md = md; | 876 | prev_md = md; |
798 | } | 877 | } |
878 | } | ||
879 | |||
880 | static void __init get_systab_virt_addr(efi_memory_desc_t *md) | ||
881 | { | ||
882 | unsigned long size; | ||
883 | u64 end, systab; | ||
884 | |||
885 | size = md->num_pages << EFI_PAGE_SHIFT; | ||
886 | end = md->phys_addr + size; | ||
887 | systab = (u64)(unsigned long)efi_phys.systab; | ||
888 | if (md->phys_addr <= systab && systab < end) { | ||
889 | systab += md->virt_addr - md->phys_addr; | ||
890 | efi.systab = (efi_system_table_t *)(unsigned long)systab; | ||
891 | } | ||
892 | } | ||
893 | |||
894 | static int __init save_runtime_map(void) | ||
895 | { | ||
896 | efi_memory_desc_t *md; | ||
897 | void *tmp, *p, *q = NULL; | ||
898 | int count = 0; | ||
899 | |||
900 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | ||
901 | md = p; | ||
902 | |||
903 | if (!(md->attribute & EFI_MEMORY_RUNTIME) || | ||
904 | (md->type == EFI_BOOT_SERVICES_CODE) || | ||
905 | (md->type == EFI_BOOT_SERVICES_DATA)) | ||
906 | continue; | ||
907 | tmp = krealloc(q, (count + 1) * memmap.desc_size, GFP_KERNEL); | ||
908 | if (!tmp) | ||
909 | goto out; | ||
910 | q = tmp; | ||
911 | |||
912 | memcpy(q + count * memmap.desc_size, md, memmap.desc_size); | ||
913 | count++; | ||
914 | } | ||
915 | |||
916 | efi_runtime_map_setup(q, count, memmap.desc_size); | ||
917 | |||
918 | return 0; | ||
919 | out: | ||
920 | kfree(q); | ||
921 | return -ENOMEM; | ||
922 | } | ||
923 | |||
924 | /* | ||
925 | * Map efi regions which were passed via setup_data. The virt_addr is a fixed | ||
926 | * addr which was used in first kernel of a kexec boot. | ||
927 | */ | ||
928 | static void __init efi_map_regions_fixed(void) | ||
929 | { | ||
930 | void *p; | ||
931 | efi_memory_desc_t *md; | ||
932 | |||
933 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | ||
934 | md = p; | ||
935 | efi_map_region_fixed(md); /* FIXME: add error handling */ | ||
936 | get_systab_virt_addr(md); | ||
937 | } | ||
938 | |||
939 | } | ||
940 | |||
941 | /* | ||
942 | * Map efi memory ranges for runtime serivce and update new_memmap with virtual | ||
943 | * addresses. | ||
944 | */ | ||
945 | static void * __init efi_map_regions(int *count) | ||
946 | { | ||
947 | efi_memory_desc_t *md; | ||
948 | void *p, *tmp, *new_memmap = NULL; | ||
799 | 949 | ||
800 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | 950 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { |
801 | md = p; | 951 | md = p; |
@@ -807,53 +957,95 @@ void __init efi_enter_virtual_mode(void) | |||
807 | continue; | 957 | continue; |
808 | } | 958 | } |
809 | 959 | ||
810 | size = md->num_pages << EFI_PAGE_SHIFT; | 960 | efi_map_region(md); |
811 | end = md->phys_addr + size; | 961 | get_systab_virt_addr(md); |
812 | 962 | ||
813 | start_pfn = PFN_DOWN(md->phys_addr); | 963 | tmp = krealloc(new_memmap, (*count + 1) * memmap.desc_size, |
814 | end_pfn = PFN_UP(end); | 964 | GFP_KERNEL); |
815 | if (pfn_range_is_mapped(start_pfn, end_pfn)) { | 965 | if (!tmp) |
816 | va = __va(md->phys_addr); | 966 | goto out; |
967 | new_memmap = tmp; | ||
968 | memcpy(new_memmap + (*count * memmap.desc_size), md, | ||
969 | memmap.desc_size); | ||
970 | (*count)++; | ||
971 | } | ||
817 | 972 | ||
818 | if (!(md->attribute & EFI_MEMORY_WB)) | 973 | return new_memmap; |
819 | efi_memory_uc((u64)(unsigned long)va, size); | 974 | out: |
820 | } else | 975 | kfree(new_memmap); |
821 | va = efi_ioremap(md->phys_addr, size, | 976 | return NULL; |
822 | md->type, md->attribute); | 977 | } |
978 | |||
979 | /* | ||
980 | * This function will switch the EFI runtime services to virtual mode. | ||
981 | * Essentially, we look through the EFI memmap and map every region that | ||
982 | * has the runtime attribute bit set in its memory descriptor into the | ||
983 | * ->trampoline_pgd page table using a top-down VA allocation scheme. | ||
984 | * | ||
985 | * The old method which used to update that memory descriptor with the | ||
986 | * virtual address obtained from ioremap() is still supported when the | ||
987 | * kernel is booted with efi=old_map on its command line. Same old | ||
988 | * method enabled the runtime services to be called without having to | ||
989 | * thunk back into physical mode for every invocation. | ||
990 | * | ||
991 | * The new method does a pagetable switch in a preemption-safe manner | ||
992 | * so that we're in a different address space when calling a runtime | ||
993 | * function. For function arguments passing we do copy the PGDs of the | ||
994 | * kernel page table into ->trampoline_pgd prior to each call. | ||
995 | * | ||
996 | * Specially for kexec boot, efi runtime maps in previous kernel should | ||
997 | * be passed in via setup_data. In that case runtime ranges will be mapped | ||
998 | * to the same virtual addresses as the first kernel. | ||
999 | */ | ||
1000 | void __init efi_enter_virtual_mode(void) | ||
1001 | { | ||
1002 | efi_status_t status; | ||
1003 | void *new_memmap = NULL; | ||
1004 | int err, count = 0; | ||
823 | 1005 | ||
824 | md->virt_addr = (u64) (unsigned long) va; | 1006 | efi.systab = NULL; |
825 | 1007 | ||
826 | if (!va) { | 1008 | /* |
827 | pr_err("ioremap of 0x%llX failed!\n", | 1009 | * We don't do virtual mode, since we don't do runtime services, on |
828 | (unsigned long long)md->phys_addr); | 1010 | * non-native EFI |
829 | continue; | 1011 | */ |
830 | } | 1012 | if (!efi_is_native()) { |
1013 | efi_unmap_memmap(); | ||
1014 | return; | ||
1015 | } | ||
831 | 1016 | ||
832 | systab = (u64) (unsigned long) efi_phys.systab; | 1017 | if (efi_setup) { |
833 | if (md->phys_addr <= systab && systab < end) { | 1018 | efi_map_regions_fixed(); |
834 | systab += md->virt_addr - md->phys_addr; | 1019 | } else { |
835 | efi.systab = (efi_system_table_t *) (unsigned long) systab; | 1020 | efi_merge_regions(); |
1021 | new_memmap = efi_map_regions(&count); | ||
1022 | if (!new_memmap) { | ||
1023 | pr_err("Error reallocating memory, EFI runtime non-functional!\n"); | ||
1024 | return; | ||
836 | } | 1025 | } |
837 | new_memmap = krealloc(new_memmap, | ||
838 | (count + 1) * memmap.desc_size, | ||
839 | GFP_KERNEL); | ||
840 | memcpy(new_memmap + (count * memmap.desc_size), md, | ||
841 | memmap.desc_size); | ||
842 | count++; | ||
843 | } | 1026 | } |
844 | 1027 | ||
1028 | err = save_runtime_map(); | ||
1029 | if (err) | ||
1030 | pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n"); | ||
1031 | |||
845 | BUG_ON(!efi.systab); | 1032 | BUG_ON(!efi.systab); |
846 | 1033 | ||
847 | status = phys_efi_set_virtual_address_map( | 1034 | efi_setup_page_tables(); |
848 | memmap.desc_size * count, | 1035 | efi_sync_low_kernel_mappings(); |
849 | memmap.desc_size, | ||
850 | memmap.desc_version, | ||
851 | (efi_memory_desc_t *)__pa(new_memmap)); | ||
852 | 1036 | ||
853 | if (status != EFI_SUCCESS) { | 1037 | if (!efi_setup) { |
854 | pr_alert("Unable to switch EFI into virtual mode " | 1038 | status = phys_efi_set_virtual_address_map( |
855 | "(status=%lx)!\n", status); | 1039 | memmap.desc_size * count, |
856 | panic("EFI call to SetVirtualAddressMap() failed!"); | 1040 | memmap.desc_size, |
1041 | memmap.desc_version, | ||
1042 | (efi_memory_desc_t *)__pa(new_memmap)); | ||
1043 | |||
1044 | if (status != EFI_SUCCESS) { | ||
1045 | pr_alert("Unable to switch EFI into virtual mode (status=%lx)!\n", | ||
1046 | status); | ||
1047 | panic("EFI call to SetVirtualAddressMap() failed!"); | ||
1048 | } | ||
857 | } | 1049 | } |
858 | 1050 | ||
859 | /* | 1051 | /* |
@@ -876,7 +1068,8 @@ void __init efi_enter_virtual_mode(void) | |||
876 | efi.query_variable_info = virt_efi_query_variable_info; | 1068 | efi.query_variable_info = virt_efi_query_variable_info; |
877 | efi.update_capsule = virt_efi_update_capsule; | 1069 | efi.update_capsule = virt_efi_update_capsule; |
878 | efi.query_capsule_caps = virt_efi_query_capsule_caps; | 1070 | efi.query_capsule_caps = virt_efi_query_capsule_caps; |
879 | if (__supported_pte_mask & _PAGE_NX) | 1071 | |
1072 | if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX)) | ||
880 | runtime_code_page_mkexec(); | 1073 | runtime_code_page_mkexec(); |
881 | 1074 | ||
882 | kfree(new_memmap); | 1075 | kfree(new_memmap); |
@@ -1006,3 +1199,15 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) | |||
1006 | return EFI_SUCCESS; | 1199 | return EFI_SUCCESS; |
1007 | } | 1200 | } |
1008 | EXPORT_SYMBOL_GPL(efi_query_variable_store); | 1201 | EXPORT_SYMBOL_GPL(efi_query_variable_store); |
1202 | |||
1203 | static int __init parse_efi_cmdline(char *str) | ||
1204 | { | ||
1205 | if (*str == '=') | ||
1206 | str++; | ||
1207 | |||
1208 | if (!strncmp(str, "old_map", 7)) | ||
1209 | set_bit(EFI_OLD_MEMMAP, &x86_efi_facility); | ||
1210 | |||
1211 | return 0; | ||
1212 | } | ||
1213 | early_param("efi", parse_efi_cmdline); | ||
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index 40e446941dd7..249b183cf417 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c | |||
@@ -37,9 +37,19 @@ | |||
37 | * claim EFI runtime service handler exclusively and to duplicate a memory in | 37 | * claim EFI runtime service handler exclusively and to duplicate a memory in |
38 | * low memory space say 0 - 3G. | 38 | * low memory space say 0 - 3G. |
39 | */ | 39 | */ |
40 | |||
41 | static unsigned long efi_rt_eflags; | 40 | static unsigned long efi_rt_eflags; |
42 | 41 | ||
42 | void efi_sync_low_kernel_mappings(void) {} | ||
43 | void efi_setup_page_tables(void) {} | ||
44 | |||
45 | void __init efi_map_region(efi_memory_desc_t *md) | ||
46 | { | ||
47 | old_map_region(md); | ||
48 | } | ||
49 | |||
50 | void __init efi_map_region_fixed(efi_memory_desc_t *md) {} | ||
51 | void __init parse_efi_setup(u64 phys_addr, u32 data_len) {} | ||
52 | |||
43 | void efi_call_phys_prelog(void) | 53 | void efi_call_phys_prelog(void) |
44 | { | 54 | { |
45 | struct desc_ptr gdt_descr; | 55 | struct desc_ptr gdt_descr; |
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 39a0e7f1f0a3..6284f158a47d 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c | |||
@@ -38,10 +38,28 @@ | |||
38 | #include <asm/efi.h> | 38 | #include <asm/efi.h> |
39 | #include <asm/cacheflush.h> | 39 | #include <asm/cacheflush.h> |
40 | #include <asm/fixmap.h> | 40 | #include <asm/fixmap.h> |
41 | #include <asm/realmode.h> | ||
41 | 42 | ||
42 | static pgd_t *save_pgd __initdata; | 43 | static pgd_t *save_pgd __initdata; |
43 | static unsigned long efi_flags __initdata; | 44 | static unsigned long efi_flags __initdata; |
44 | 45 | ||
46 | /* | ||
47 | * We allocate runtime services regions bottom-up, starting from -4G, i.e. | ||
48 | * 0xffff_ffff_0000_0000 and limit EFI VA mapping space to 64G. | ||
49 | */ | ||
50 | static u64 efi_va = -4 * (1UL << 30); | ||
51 | #define EFI_VA_END (-68 * (1UL << 30)) | ||
52 | |||
53 | /* | ||
54 | * Scratch space used for switching the pagetable in the EFI stub | ||
55 | */ | ||
56 | struct efi_scratch { | ||
57 | u64 r15; | ||
58 | u64 prev_cr3; | ||
59 | pgd_t *efi_pgt; | ||
60 | bool use_pgd; | ||
61 | }; | ||
62 | |||
45 | static void __init early_code_mapping_set_exec(int executable) | 63 | static void __init early_code_mapping_set_exec(int executable) |
46 | { | 64 | { |
47 | efi_memory_desc_t *md; | 65 | efi_memory_desc_t *md; |
@@ -65,6 +83,9 @@ void __init efi_call_phys_prelog(void) | |||
65 | int pgd; | 83 | int pgd; |
66 | int n_pgds; | 84 | int n_pgds; |
67 | 85 | ||
86 | if (!efi_enabled(EFI_OLD_MEMMAP)) | ||
87 | return; | ||
88 | |||
68 | early_code_mapping_set_exec(1); | 89 | early_code_mapping_set_exec(1); |
69 | local_irq_save(efi_flags); | 90 | local_irq_save(efi_flags); |
70 | 91 | ||
@@ -86,6 +107,10 @@ void __init efi_call_phys_epilog(void) | |||
86 | */ | 107 | */ |
87 | int pgd; | 108 | int pgd; |
88 | int n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); | 109 | int n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); |
110 | |||
111 | if (!efi_enabled(EFI_OLD_MEMMAP)) | ||
112 | return; | ||
113 | |||
89 | for (pgd = 0; pgd < n_pgds; pgd++) | 114 | for (pgd = 0; pgd < n_pgds; pgd++) |
90 | set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]); | 115 | set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]); |
91 | kfree(save_pgd); | 116 | kfree(save_pgd); |
@@ -94,6 +119,96 @@ void __init efi_call_phys_epilog(void) | |||
94 | early_code_mapping_set_exec(0); | 119 | early_code_mapping_set_exec(0); |
95 | } | 120 | } |
96 | 121 | ||
122 | /* | ||
123 | * Add low kernel mappings for passing arguments to EFI functions. | ||
124 | */ | ||
125 | void efi_sync_low_kernel_mappings(void) | ||
126 | { | ||
127 | unsigned num_pgds; | ||
128 | pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); | ||
129 | |||
130 | if (efi_enabled(EFI_OLD_MEMMAP)) | ||
131 | return; | ||
132 | |||
133 | num_pgds = pgd_index(MODULES_END - 1) - pgd_index(PAGE_OFFSET); | ||
134 | |||
135 | memcpy(pgd + pgd_index(PAGE_OFFSET), | ||
136 | init_mm.pgd + pgd_index(PAGE_OFFSET), | ||
137 | sizeof(pgd_t) * num_pgds); | ||
138 | } | ||
139 | |||
140 | void efi_setup_page_tables(void) | ||
141 | { | ||
142 | efi_scratch.efi_pgt = (pgd_t *)(unsigned long)real_mode_header->trampoline_pgd; | ||
143 | |||
144 | if (!efi_enabled(EFI_OLD_MEMMAP)) | ||
145 | efi_scratch.use_pgd = true; | ||
146 | } | ||
147 | |||
148 | static void __init __map_region(efi_memory_desc_t *md, u64 va) | ||
149 | { | ||
150 | pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); | ||
151 | unsigned long pf = 0; | ||
152 | |||
153 | if (!(md->attribute & EFI_MEMORY_WB)) | ||
154 | pf |= _PAGE_PCD; | ||
155 | |||
156 | if (kernel_map_pages_in_pgd(pgd, md->phys_addr, va, md->num_pages, pf)) | ||
157 | pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n", | ||
158 | md->phys_addr, va); | ||
159 | } | ||
160 | |||
161 | void __init efi_map_region(efi_memory_desc_t *md) | ||
162 | { | ||
163 | unsigned long size = md->num_pages << PAGE_SHIFT; | ||
164 | u64 pa = md->phys_addr; | ||
165 | |||
166 | if (efi_enabled(EFI_OLD_MEMMAP)) | ||
167 | return old_map_region(md); | ||
168 | |||
169 | /* | ||
170 | * Make sure the 1:1 mappings are present as a catch-all for b0rked | ||
171 | * firmware which doesn't update all internal pointers after switching | ||
172 | * to virtual mode and would otherwise crap on us. | ||
173 | */ | ||
174 | __map_region(md, md->phys_addr); | ||
175 | |||
176 | efi_va -= size; | ||
177 | |||
178 | /* Is PA 2M-aligned? */ | ||
179 | if (!(pa & (PMD_SIZE - 1))) { | ||
180 | efi_va &= PMD_MASK; | ||
181 | } else { | ||
182 | u64 pa_offset = pa & (PMD_SIZE - 1); | ||
183 | u64 prev_va = efi_va; | ||
184 | |||
185 | /* get us the same offset within this 2M page */ | ||
186 | efi_va = (efi_va & PMD_MASK) + pa_offset; | ||
187 | |||
188 | if (efi_va > prev_va) | ||
189 | efi_va -= PMD_SIZE; | ||
190 | } | ||
191 | |||
192 | if (efi_va < EFI_VA_END) { | ||
193 | pr_warn(FW_WARN "VA address range overflow!\n"); | ||
194 | return; | ||
195 | } | ||
196 | |||
197 | /* Do the VA map */ | ||
198 | __map_region(md, efi_va); | ||
199 | md->virt_addr = efi_va; | ||
200 | } | ||
201 | |||
202 | /* | ||
203 | * kexec kernel will use efi_map_region_fixed to map efi runtime memory ranges. | ||
204 | * md->virt_addr is the original virtual address which had been mapped in kexec | ||
205 | * 1st kernel. | ||
206 | */ | ||
207 | void __init efi_map_region_fixed(efi_memory_desc_t *md) | ||
208 | { | ||
209 | __map_region(md, md->virt_addr); | ||
210 | } | ||
211 | |||
97 | void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, | 212 | void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, |
98 | u32 type, u64 attribute) | 213 | u32 type, u64 attribute) |
99 | { | 214 | { |
@@ -113,3 +228,8 @@ void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, | |||
113 | 228 | ||
114 | return (void __iomem *)__va(phys_addr); | 229 | return (void __iomem *)__va(phys_addr); |
115 | } | 230 | } |
231 | |||
232 | void __init parse_efi_setup(u64 phys_addr, u32 data_len) | ||
233 | { | ||
234 | efi_setup = phys_addr + sizeof(struct setup_data); | ||
235 | } | ||
diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index 4c07ccab8146..88073b140298 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S | |||
@@ -34,10 +34,47 @@ | |||
34 | mov %rsi, %cr0; \ | 34 | mov %rsi, %cr0; \ |
35 | mov (%rsp), %rsp | 35 | mov (%rsp), %rsp |
36 | 36 | ||
37 | /* stolen from gcc */ | ||
38 | .macro FLUSH_TLB_ALL | ||
39 | movq %r15, efi_scratch(%rip) | ||
40 | movq %r14, efi_scratch+8(%rip) | ||
41 | movq %cr4, %r15 | ||
42 | movq %r15, %r14 | ||
43 | andb $0x7f, %r14b | ||
44 | movq %r14, %cr4 | ||
45 | movq %r15, %cr4 | ||
46 | movq efi_scratch+8(%rip), %r14 | ||
47 | movq efi_scratch(%rip), %r15 | ||
48 | .endm | ||
49 | |||
50 | .macro SWITCH_PGT | ||
51 | cmpb $0, efi_scratch+24(%rip) | ||
52 | je 1f | ||
53 | movq %r15, efi_scratch(%rip) # r15 | ||
54 | # save previous CR3 | ||
55 | movq %cr3, %r15 | ||
56 | movq %r15, efi_scratch+8(%rip) # prev_cr3 | ||
57 | movq efi_scratch+16(%rip), %r15 # EFI pgt | ||
58 | movq %r15, %cr3 | ||
59 | 1: | ||
60 | .endm | ||
61 | |||
62 | .macro RESTORE_PGT | ||
63 | cmpb $0, efi_scratch+24(%rip) | ||
64 | je 2f | ||
65 | movq efi_scratch+8(%rip), %r15 | ||
66 | movq %r15, %cr3 | ||
67 | movq efi_scratch(%rip), %r15 | ||
68 | FLUSH_TLB_ALL | ||
69 | 2: | ||
70 | .endm | ||
71 | |||
37 | ENTRY(efi_call0) | 72 | ENTRY(efi_call0) |
38 | SAVE_XMM | 73 | SAVE_XMM |
39 | subq $32, %rsp | 74 | subq $32, %rsp |
75 | SWITCH_PGT | ||
40 | call *%rdi | 76 | call *%rdi |
77 | RESTORE_PGT | ||
41 | addq $32, %rsp | 78 | addq $32, %rsp |
42 | RESTORE_XMM | 79 | RESTORE_XMM |
43 | ret | 80 | ret |
@@ -47,7 +84,9 @@ ENTRY(efi_call1) | |||
47 | SAVE_XMM | 84 | SAVE_XMM |
48 | subq $32, %rsp | 85 | subq $32, %rsp |
49 | mov %rsi, %rcx | 86 | mov %rsi, %rcx |
87 | SWITCH_PGT | ||
50 | call *%rdi | 88 | call *%rdi |
89 | RESTORE_PGT | ||
51 | addq $32, %rsp | 90 | addq $32, %rsp |
52 | RESTORE_XMM | 91 | RESTORE_XMM |
53 | ret | 92 | ret |
@@ -57,7 +96,9 @@ ENTRY(efi_call2) | |||
57 | SAVE_XMM | 96 | SAVE_XMM |
58 | subq $32, %rsp | 97 | subq $32, %rsp |
59 | mov %rsi, %rcx | 98 | mov %rsi, %rcx |
99 | SWITCH_PGT | ||
60 | call *%rdi | 100 | call *%rdi |
101 | RESTORE_PGT | ||
61 | addq $32, %rsp | 102 | addq $32, %rsp |
62 | RESTORE_XMM | 103 | RESTORE_XMM |
63 | ret | 104 | ret |
@@ -68,7 +109,9 @@ ENTRY(efi_call3) | |||
68 | subq $32, %rsp | 109 | subq $32, %rsp |
69 | mov %rcx, %r8 | 110 | mov %rcx, %r8 |
70 | mov %rsi, %rcx | 111 | mov %rsi, %rcx |
112 | SWITCH_PGT | ||
71 | call *%rdi | 113 | call *%rdi |
114 | RESTORE_PGT | ||
72 | addq $32, %rsp | 115 | addq $32, %rsp |
73 | RESTORE_XMM | 116 | RESTORE_XMM |
74 | ret | 117 | ret |
@@ -80,7 +123,9 @@ ENTRY(efi_call4) | |||
80 | mov %r8, %r9 | 123 | mov %r8, %r9 |
81 | mov %rcx, %r8 | 124 | mov %rcx, %r8 |
82 | mov %rsi, %rcx | 125 | mov %rsi, %rcx |
126 | SWITCH_PGT | ||
83 | call *%rdi | 127 | call *%rdi |
128 | RESTORE_PGT | ||
84 | addq $32, %rsp | 129 | addq $32, %rsp |
85 | RESTORE_XMM | 130 | RESTORE_XMM |
86 | ret | 131 | ret |
@@ -93,7 +138,9 @@ ENTRY(efi_call5) | |||
93 | mov %r8, %r9 | 138 | mov %r8, %r9 |
94 | mov %rcx, %r8 | 139 | mov %rcx, %r8 |
95 | mov %rsi, %rcx | 140 | mov %rsi, %rcx |
141 | SWITCH_PGT | ||
96 | call *%rdi | 142 | call *%rdi |
143 | RESTORE_PGT | ||
97 | addq $48, %rsp | 144 | addq $48, %rsp |
98 | RESTORE_XMM | 145 | RESTORE_XMM |
99 | ret | 146 | ret |
@@ -109,8 +156,15 @@ ENTRY(efi_call6) | |||
109 | mov %r8, %r9 | 156 | mov %r8, %r9 |
110 | mov %rcx, %r8 | 157 | mov %rcx, %r8 |
111 | mov %rsi, %rcx | 158 | mov %rsi, %rcx |
159 | SWITCH_PGT | ||
112 | call *%rdi | 160 | call *%rdi |
161 | RESTORE_PGT | ||
113 | addq $48, %rsp | 162 | addq $48, %rsp |
114 | RESTORE_XMM | 163 | RESTORE_XMM |
115 | ret | 164 | ret |
116 | ENDPROC(efi_call6) | 165 | ENDPROC(efi_call6) |
166 | |||
167 | .data | ||
168 | ENTRY(efi_scratch) | ||
169 | .fill 3,8,0 | ||
170 | .byte 0 | ||
diff --git a/arch/x86/platform/intel-mid/Makefile b/arch/x86/platform/intel-mid/Makefile index 01cc29ea5ff7..0a8ee703b9fa 100644 --- a/arch/x86/platform/intel-mid/Makefile +++ b/arch/x86/platform/intel-mid/Makefile | |||
@@ -1,6 +1,6 @@ | |||
1 | obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o | 1 | obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o intel_mid_vrtc.o mfld.o mrfl.o |
2 | obj-$(CONFIG_X86_INTEL_MID) += intel_mid_vrtc.o | ||
3 | obj-$(CONFIG_EARLY_PRINTK_INTEL_MID) += early_printk_intel_mid.o | 2 | obj-$(CONFIG_EARLY_PRINTK_INTEL_MID) += early_printk_intel_mid.o |
3 | |||
4 | # SFI specific code | 4 | # SFI specific code |
5 | ifdef CONFIG_X86_INTEL_MID | 5 | ifdef CONFIG_X86_INTEL_MID |
6 | obj-$(CONFIG_SFI) += sfi.o device_libs/ | 6 | obj-$(CONFIG_SFI) += sfi.o device_libs/ |
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_emc1403.c b/arch/x86/platform/intel-mid/device_libs/platform_emc1403.c index 0d942c1d26d5..69a783689d21 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_emc1403.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_emc1403.c | |||
@@ -22,7 +22,9 @@ static void __init *emc1403_platform_data(void *info) | |||
22 | int intr = get_gpio_by_name("thermal_int"); | 22 | int intr = get_gpio_by_name("thermal_int"); |
23 | int intr2nd = get_gpio_by_name("thermal_alert"); | 23 | int intr2nd = get_gpio_by_name("thermal_alert"); |
24 | 24 | ||
25 | if (intr == -1 || intr2nd == -1) | 25 | if (intr < 0) |
26 | return NULL; | ||
27 | if (intr2nd < 0) | ||
26 | return NULL; | 28 | return NULL; |
27 | 29 | ||
28 | i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET; | 30 | i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET; |
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c b/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c index a013a4834bbe..dccae6b0413f 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c | |||
@@ -66,7 +66,7 @@ static int __init pb_keys_init(void) | |||
66 | gb[i].gpio = get_gpio_by_name(gb[i].desc); | 66 | gb[i].gpio = get_gpio_by_name(gb[i].desc); |
67 | pr_debug("info[%2d]: name = %s, gpio = %d\n", i, gb[i].desc, | 67 | pr_debug("info[%2d]: name = %s, gpio = %d\n", i, gb[i].desc, |
68 | gb[i].gpio); | 68 | gb[i].gpio); |
69 | if (gb[i].gpio == -1) | 69 | if (gb[i].gpio < 0) |
70 | continue; | 70 | continue; |
71 | 71 | ||
72 | if (i != good) | 72 | if (i != good) |
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_lis331.c b/arch/x86/platform/intel-mid/device_libs/platform_lis331.c index 15278c11f714..54226de7541a 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_lis331.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_lis331.c | |||
@@ -21,7 +21,9 @@ static void __init *lis331dl_platform_data(void *info) | |||
21 | int intr = get_gpio_by_name("accel_int"); | 21 | int intr = get_gpio_by_name("accel_int"); |
22 | int intr2nd = get_gpio_by_name("accel_2"); | 22 | int intr2nd = get_gpio_by_name("accel_2"); |
23 | 23 | ||
24 | if (intr == -1 || intr2nd == -1) | 24 | if (intr < 0) |
25 | return NULL; | ||
26 | if (intr2nd < 0) | ||
25 | return NULL; | 27 | return NULL; |
26 | 28 | ||
27 | i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET; | 29 | i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET; |
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_max7315.c b/arch/x86/platform/intel-mid/device_libs/platform_max7315.c index 94ade10024ae..2c8acbc1e9ad 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_max7315.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_max7315.c | |||
@@ -48,7 +48,7 @@ static void __init *max7315_platform_data(void *info) | |||
48 | gpio_base = get_gpio_by_name(base_pin_name); | 48 | gpio_base = get_gpio_by_name(base_pin_name); |
49 | intr = get_gpio_by_name(intr_pin_name); | 49 | intr = get_gpio_by_name(intr_pin_name); |
50 | 50 | ||
51 | if (gpio_base == -1) | 51 | if (gpio_base < 0) |
52 | return NULL; | 52 | return NULL; |
53 | max7315->gpio_base = gpio_base; | 53 | max7315->gpio_base = gpio_base; |
54 | if (intr != -1) { | 54 | if (intr != -1) { |
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c b/arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c index dd28d63c84fb..cfe9a47a1e87 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c | |||
@@ -19,7 +19,7 @@ static void *mpu3050_platform_data(void *info) | |||
19 | struct i2c_board_info *i2c_info = info; | 19 | struct i2c_board_info *i2c_info = info; |
20 | int intr = get_gpio_by_name("mpu3050_int"); | 20 | int intr = get_gpio_by_name("mpu3050_int"); |
21 | 21 | ||
22 | if (intr == -1) | 22 | if (intr < 0) |
23 | return NULL; | 23 | return NULL; |
24 | 24 | ||
25 | i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET; | 25 | i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET; |
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c b/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c index d87182a09263..65c2a9a19db4 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c | |||
@@ -26,7 +26,7 @@ static void __init *pmic_gpio_platform_data(void *info) | |||
26 | static struct intel_pmic_gpio_platform_data pmic_gpio_pdata; | 26 | static struct intel_pmic_gpio_platform_data pmic_gpio_pdata; |
27 | int gpio_base = get_gpio_by_name("pmic_gpio_base"); | 27 | int gpio_base = get_gpio_by_name("pmic_gpio_base"); |
28 | 28 | ||
29 | if (gpio_base == -1) | 29 | if (gpio_base < 0) |
30 | gpio_base = 64; | 30 | gpio_base = 64; |
31 | pmic_gpio_pdata.gpio_base = gpio_base; | 31 | pmic_gpio_pdata.gpio_base = gpio_base; |
32 | pmic_gpio_pdata.irq_base = gpio_base + INTEL_MID_IRQ_OFFSET; | 32 | pmic_gpio_pdata.irq_base = gpio_base + INTEL_MID_IRQ_OFFSET; |
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_tca6416.c b/arch/x86/platform/intel-mid/device_libs/platform_tca6416.c index 22881c9a6737..33be0b3be6e1 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_tca6416.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_tca6416.c | |||
@@ -34,10 +34,10 @@ static void *tca6416_platform_data(void *info) | |||
34 | gpio_base = get_gpio_by_name(base_pin_name); | 34 | gpio_base = get_gpio_by_name(base_pin_name); |
35 | intr = get_gpio_by_name(intr_pin_name); | 35 | intr = get_gpio_by_name(intr_pin_name); |
36 | 36 | ||
37 | if (gpio_base == -1) | 37 | if (gpio_base < 0) |
38 | return NULL; | 38 | return NULL; |
39 | tca6416.gpio_base = gpio_base; | 39 | tca6416.gpio_base = gpio_base; |
40 | if (intr != -1) { | 40 | if (intr >= 0) { |
41 | i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET; | 41 | i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET; |
42 | tca6416.irq_base = gpio_base + INTEL_MID_IRQ_OFFSET; | 42 | tca6416.irq_base = gpio_base + INTEL_MID_IRQ_OFFSET; |
43 | } else { | 43 | } else { |
diff --git a/arch/x86/platform/intel-mid/early_printk_intel_mid.c b/arch/x86/platform/intel-mid/early_printk_intel_mid.c index 4f702f554f6e..e0bd082a80e0 100644 --- a/arch/x86/platform/intel-mid/early_printk_intel_mid.c +++ b/arch/x86/platform/intel-mid/early_printk_intel_mid.c | |||
@@ -22,7 +22,6 @@ | |||
22 | #include <linux/console.h> | 22 | #include <linux/console.h> |
23 | #include <linux/kernel.h> | 23 | #include <linux/kernel.h> |
24 | #include <linux/delay.h> | 24 | #include <linux/delay.h> |
25 | #include <linux/init.h> | ||
26 | #include <linux/io.h> | 25 | #include <linux/io.h> |
27 | 26 | ||
28 | #include <asm/fixmap.h> | 27 | #include <asm/fixmap.h> |
diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c index f90e290f689f..1bbedc4b0f88 100644 --- a/arch/x86/platform/intel-mid/intel-mid.c +++ b/arch/x86/platform/intel-mid/intel-mid.c | |||
@@ -35,6 +35,8 @@ | |||
35 | #include <asm/apb_timer.h> | 35 | #include <asm/apb_timer.h> |
36 | #include <asm/reboot.h> | 36 | #include <asm/reboot.h> |
37 | 37 | ||
38 | #include "intel_mid_weak_decls.h" | ||
39 | |||
38 | /* | 40 | /* |
39 | * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock, | 41 | * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock, |
40 | * cmdline option x86_intel_mid_timer can be used to override the configuration | 42 | * cmdline option x86_intel_mid_timer can be used to override the configuration |
@@ -58,12 +60,16 @@ | |||
58 | 60 | ||
59 | enum intel_mid_timer_options intel_mid_timer_options; | 61 | enum intel_mid_timer_options intel_mid_timer_options; |
60 | 62 | ||
63 | /* intel_mid_ops to store sub arch ops */ | ||
64 | struct intel_mid_ops *intel_mid_ops; | ||
65 | /* getter function for sub arch ops*/ | ||
66 | static void *(*get_intel_mid_ops[])(void) = INTEL_MID_OPS_INIT; | ||
61 | enum intel_mid_cpu_type __intel_mid_cpu_chip; | 67 | enum intel_mid_cpu_type __intel_mid_cpu_chip; |
62 | EXPORT_SYMBOL_GPL(__intel_mid_cpu_chip); | 68 | EXPORT_SYMBOL_GPL(__intel_mid_cpu_chip); |
63 | 69 | ||
64 | static void intel_mid_power_off(void) | 70 | static void intel_mid_power_off(void) |
65 | { | 71 | { |
66 | } | 72 | }; |
67 | 73 | ||
68 | static void intel_mid_reboot(void) | 74 | static void intel_mid_reboot(void) |
69 | { | 75 | { |
@@ -72,32 +78,6 @@ static void intel_mid_reboot(void) | |||
72 | 78 | ||
73 | static unsigned long __init intel_mid_calibrate_tsc(void) | 79 | static unsigned long __init intel_mid_calibrate_tsc(void) |
74 | { | 80 | { |
75 | unsigned long fast_calibrate; | ||
76 | u32 lo, hi, ratio, fsb; | ||
77 | |||
78 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | ||
79 | pr_debug("IA32 perf status is 0x%x, 0x%0x\n", lo, hi); | ||
80 | ratio = (hi >> 8) & 0x1f; | ||
81 | pr_debug("ratio is %d\n", ratio); | ||
82 | if (!ratio) { | ||
83 | pr_err("read a zero ratio, should be incorrect!\n"); | ||
84 | pr_err("force tsc ratio to 16 ...\n"); | ||
85 | ratio = 16; | ||
86 | } | ||
87 | rdmsr(MSR_FSB_FREQ, lo, hi); | ||
88 | if ((lo & 0x7) == 0x7) | ||
89 | fsb = PENWELL_FSB_FREQ_83SKU; | ||
90 | else | ||
91 | fsb = PENWELL_FSB_FREQ_100SKU; | ||
92 | fast_calibrate = ratio * fsb; | ||
93 | pr_debug("read penwell tsc %lu khz\n", fast_calibrate); | ||
94 | lapic_timer_frequency = fsb * 1000 / HZ; | ||
95 | /* mark tsc clocksource as reliable */ | ||
96 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE); | ||
97 | |||
98 | if (fast_calibrate) | ||
99 | return fast_calibrate; | ||
100 | |||
101 | return 0; | 81 | return 0; |
102 | } | 82 | } |
103 | 83 | ||
@@ -125,13 +105,37 @@ static void __init intel_mid_time_init(void) | |||
125 | 105 | ||
126 | static void intel_mid_arch_setup(void) | 106 | static void intel_mid_arch_setup(void) |
127 | { | 107 | { |
128 | if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27) | 108 | if (boot_cpu_data.x86 != 6) { |
129 | __intel_mid_cpu_chip = INTEL_MID_CPU_CHIP_PENWELL; | ||
130 | else { | ||
131 | pr_err("Unknown Intel MID CPU (%d:%d), default to Penwell\n", | 109 | pr_err("Unknown Intel MID CPU (%d:%d), default to Penwell\n", |
132 | boot_cpu_data.x86, boot_cpu_data.x86_model); | 110 | boot_cpu_data.x86, boot_cpu_data.x86_model); |
133 | __intel_mid_cpu_chip = INTEL_MID_CPU_CHIP_PENWELL; | 111 | __intel_mid_cpu_chip = INTEL_MID_CPU_CHIP_PENWELL; |
112 | goto out; | ||
134 | } | 113 | } |
114 | |||
115 | switch (boot_cpu_data.x86_model) { | ||
116 | case 0x35: | ||
117 | __intel_mid_cpu_chip = INTEL_MID_CPU_CHIP_CLOVERVIEW; | ||
118 | break; | ||
119 | case 0x3C: | ||
120 | case 0x4A: | ||
121 | __intel_mid_cpu_chip = INTEL_MID_CPU_CHIP_TANGIER; | ||
122 | break; | ||
123 | case 0x27: | ||
124 | default: | ||
125 | __intel_mid_cpu_chip = INTEL_MID_CPU_CHIP_PENWELL; | ||
126 | break; | ||
127 | } | ||
128 | |||
129 | if (__intel_mid_cpu_chip < MAX_CPU_OPS(get_intel_mid_ops)) | ||
130 | intel_mid_ops = get_intel_mid_ops[__intel_mid_cpu_chip](); | ||
131 | else { | ||
132 | intel_mid_ops = get_intel_mid_ops[INTEL_MID_CPU_CHIP_PENWELL](); | ||
133 | pr_info("ARCH: Uknown SoC, assuming PENWELL!\n"); | ||
134 | } | ||
135 | |||
136 | out: | ||
137 | if (intel_mid_ops->arch_setup) | ||
138 | intel_mid_ops->arch_setup(); | ||
135 | } | 139 | } |
136 | 140 | ||
137 | /* MID systems don't have i8042 controller */ | 141 | /* MID systems don't have i8042 controller */ |
diff --git a/arch/x86/platform/intel-mid/intel_mid_weak_decls.h b/arch/x86/platform/intel-mid/intel_mid_weak_decls.h new file mode 100644 index 000000000000..a537ffc16299 --- /dev/null +++ b/arch/x86/platform/intel-mid/intel_mid_weak_decls.h | |||
@@ -0,0 +1,19 @@ | |||
1 | /* | ||
2 | * intel_mid_weak_decls.h: Weak declarations of intel-mid.c | ||
3 | * | ||
4 | * (C) Copyright 2013 Intel Corporation | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; version 2 | ||
9 | * of the License. | ||
10 | */ | ||
11 | |||
12 | |||
13 | /* __attribute__((weak)) makes these declarations overridable */ | ||
14 | /* For every CPU addition a new get_<cpuname>_ops interface needs | ||
15 | * to be added. | ||
16 | */ | ||
17 | extern void * __cpuinit get_penwell_ops(void) __attribute__((weak)); | ||
18 | extern void * __cpuinit get_cloverview_ops(void) __attribute__((weak)); | ||
19 | extern void * __init get_tangier_ops(void) __attribute__((weak)); | ||
diff --git a/arch/x86/platform/intel-mid/mfld.c b/arch/x86/platform/intel-mid/mfld.c new file mode 100644 index 000000000000..4f7884eebc14 --- /dev/null +++ b/arch/x86/platform/intel-mid/mfld.c | |||
@@ -0,0 +1,75 @@ | |||
1 | /* | ||
2 | * mfld.c: Intel Medfield platform setup code | ||
3 | * | ||
4 | * (C) Copyright 2013 Intel Corporation | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; version 2 | ||
9 | * of the License. | ||
10 | */ | ||
11 | |||
12 | #include <linux/init.h> | ||
13 | |||
14 | #include <asm/apic.h> | ||
15 | #include <asm/intel-mid.h> | ||
16 | #include <asm/intel_mid_vrtc.h> | ||
17 | |||
18 | #include "intel_mid_weak_decls.h" | ||
19 | |||
20 | static void penwell_arch_setup(void); | ||
21 | /* penwell arch ops */ | ||
22 | static struct intel_mid_ops penwell_ops = { | ||
23 | .arch_setup = penwell_arch_setup, | ||
24 | }; | ||
25 | |||
26 | static void mfld_power_off(void) | ||
27 | { | ||
28 | } | ||
29 | |||
30 | static unsigned long __init mfld_calibrate_tsc(void) | ||
31 | { | ||
32 | unsigned long fast_calibrate; | ||
33 | u32 lo, hi, ratio, fsb; | ||
34 | |||
35 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | ||
36 | pr_debug("IA32 perf status is 0x%x, 0x%0x\n", lo, hi); | ||
37 | ratio = (hi >> 8) & 0x1f; | ||
38 | pr_debug("ratio is %d\n", ratio); | ||
39 | if (!ratio) { | ||
40 | pr_err("read a zero ratio, should be incorrect!\n"); | ||
41 | pr_err("force tsc ratio to 16 ...\n"); | ||
42 | ratio = 16; | ||
43 | } | ||
44 | rdmsr(MSR_FSB_FREQ, lo, hi); | ||
45 | if ((lo & 0x7) == 0x7) | ||
46 | fsb = FSB_FREQ_83SKU; | ||
47 | else | ||
48 | fsb = FSB_FREQ_100SKU; | ||
49 | fast_calibrate = ratio * fsb; | ||
50 | pr_debug("read penwell tsc %lu khz\n", fast_calibrate); | ||
51 | lapic_timer_frequency = fsb * 1000 / HZ; | ||
52 | /* mark tsc clocksource as reliable */ | ||
53 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE); | ||
54 | |||
55 | if (fast_calibrate) | ||
56 | return fast_calibrate; | ||
57 | |||
58 | return 0; | ||
59 | } | ||
60 | |||
61 | static void __init penwell_arch_setup() | ||
62 | { | ||
63 | x86_platform.calibrate_tsc = mfld_calibrate_tsc; | ||
64 | pm_power_off = mfld_power_off; | ||
65 | } | ||
66 | |||
67 | void * __cpuinit get_penwell_ops() | ||
68 | { | ||
69 | return &penwell_ops; | ||
70 | } | ||
71 | |||
72 | void * __cpuinit get_cloverview_ops() | ||
73 | { | ||
74 | return &penwell_ops; | ||
75 | } | ||
diff --git a/arch/x86/platform/intel-mid/mrfl.c b/arch/x86/platform/intel-mid/mrfl.c new file mode 100644 index 000000000000..09d10159e7b7 --- /dev/null +++ b/arch/x86/platform/intel-mid/mrfl.c | |||
@@ -0,0 +1,103 @@ | |||
1 | /* | ||
2 | * mrfl.c: Intel Merrifield platform specific setup code | ||
3 | * | ||
4 | * (C) Copyright 2013 Intel Corporation | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; version 2 | ||
9 | * of the License. | ||
10 | */ | ||
11 | |||
12 | #include <linux/init.h> | ||
13 | |||
14 | #include <asm/apic.h> | ||
15 | #include <asm/intel-mid.h> | ||
16 | |||
17 | #include "intel_mid_weak_decls.h" | ||
18 | |||
19 | static unsigned long __init tangier_calibrate_tsc(void) | ||
20 | { | ||
21 | unsigned long fast_calibrate; | ||
22 | u32 lo, hi, ratio, fsb, bus_freq; | ||
23 | |||
24 | /* *********************** */ | ||
25 | /* Compute TSC:Ratio * FSB */ | ||
26 | /* *********************** */ | ||
27 | |||
28 | /* Compute Ratio */ | ||
29 | rdmsr(MSR_PLATFORM_INFO, lo, hi); | ||
30 | pr_debug("IA32 PLATFORM_INFO is 0x%x : %x\n", hi, lo); | ||
31 | |||
32 | ratio = (lo >> 8) & 0xFF; | ||
33 | pr_debug("ratio is %d\n", ratio); | ||
34 | if (!ratio) { | ||
35 | pr_err("Read a zero ratio, force tsc ratio to 4 ...\n"); | ||
36 | ratio = 4; | ||
37 | } | ||
38 | |||
39 | /* Compute FSB */ | ||
40 | rdmsr(MSR_FSB_FREQ, lo, hi); | ||
41 | pr_debug("Actual FSB frequency detected by SOC 0x%x : %x\n", | ||
42 | hi, lo); | ||
43 | |||
44 | bus_freq = lo & 0x7; | ||
45 | pr_debug("bus_freq = 0x%x\n", bus_freq); | ||
46 | |||
47 | if (bus_freq == 0) | ||
48 | fsb = FSB_FREQ_100SKU; | ||
49 | else if (bus_freq == 1) | ||
50 | fsb = FSB_FREQ_100SKU; | ||
51 | else if (bus_freq == 2) | ||
52 | fsb = FSB_FREQ_133SKU; | ||
53 | else if (bus_freq == 3) | ||
54 | fsb = FSB_FREQ_167SKU; | ||
55 | else if (bus_freq == 4) | ||
56 | fsb = FSB_FREQ_83SKU; | ||
57 | else if (bus_freq == 5) | ||
58 | fsb = FSB_FREQ_400SKU; | ||
59 | else if (bus_freq == 6) | ||
60 | fsb = FSB_FREQ_267SKU; | ||
61 | else if (bus_freq == 7) | ||
62 | fsb = FSB_FREQ_333SKU; | ||
63 | else { | ||
64 | BUG(); | ||
65 | pr_err("Invalid bus_freq! Setting to minimal value!\n"); | ||
66 | fsb = FSB_FREQ_100SKU; | ||
67 | } | ||
68 | |||
69 | /* TSC = FSB Freq * Resolved HFM Ratio */ | ||
70 | fast_calibrate = ratio * fsb; | ||
71 | pr_debug("calculate tangier tsc %lu KHz\n", fast_calibrate); | ||
72 | |||
73 | /* ************************************ */ | ||
74 | /* Calculate Local APIC Timer Frequency */ | ||
75 | /* ************************************ */ | ||
76 | lapic_timer_frequency = (fsb * 1000) / HZ; | ||
77 | |||
78 | pr_debug("Setting lapic_timer_frequency = %d\n", | ||
79 | lapic_timer_frequency); | ||
80 | |||
81 | /* mark tsc clocksource as reliable */ | ||
82 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE); | ||
83 | |||
84 | if (fast_calibrate) | ||
85 | return fast_calibrate; | ||
86 | |||
87 | return 0; | ||
88 | } | ||
89 | |||
90 | static void __init tangier_arch_setup(void) | ||
91 | { | ||
92 | x86_platform.calibrate_tsc = tangier_calibrate_tsc; | ||
93 | } | ||
94 | |||
95 | /* tangier arch ops */ | ||
96 | static struct intel_mid_ops tangier_ops = { | ||
97 | .arch_setup = tangier_arch_setup, | ||
98 | }; | ||
99 | |||
100 | void * __cpuinit get_tangier_ops() | ||
101 | { | ||
102 | return &tangier_ops; | ||
103 | } | ||
diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c index c84c1ca396bf..994c40bd7cb7 100644 --- a/arch/x86/platform/intel-mid/sfi.c +++ b/arch/x86/platform/intel-mid/sfi.c | |||
@@ -224,7 +224,7 @@ int get_gpio_by_name(const char *name) | |||
224 | if (!strncmp(name, pentry->pin_name, SFI_NAME_LEN)) | 224 | if (!strncmp(name, pentry->pin_name, SFI_NAME_LEN)) |
225 | return pentry->pin_no; | 225 | return pentry->pin_no; |
226 | } | 226 | } |
227 | return -1; | 227 | return -EINVAL; |
228 | } | 228 | } |
229 | 229 | ||
230 | void __init intel_scu_device_register(struct platform_device *pdev) | 230 | void __init intel_scu_device_register(struct platform_device *pdev) |
@@ -250,7 +250,7 @@ static void __init intel_scu_spi_device_register(struct spi_board_info *sdev) | |||
250 | sdev->modalias); | 250 | sdev->modalias); |
251 | return; | 251 | return; |
252 | } | 252 | } |
253 | memcpy(new_dev, sdev, sizeof(*sdev)); | 253 | *new_dev = *sdev; |
254 | 254 | ||
255 | spi_devs[spi_next_dev++] = new_dev; | 255 | spi_devs[spi_next_dev++] = new_dev; |
256 | } | 256 | } |
@@ -271,7 +271,7 @@ static void __init intel_scu_i2c_device_register(int bus, | |||
271 | idev->type); | 271 | idev->type); |
272 | return; | 272 | return; |
273 | } | 273 | } |
274 | memcpy(new_dev, idev, sizeof(*idev)); | 274 | *new_dev = *idev; |
275 | 275 | ||
276 | i2c_bus[i2c_next_dev] = bus; | 276 | i2c_bus[i2c_next_dev] = bus; |
277 | i2c_devs[i2c_next_dev++] = new_dev; | 277 | i2c_devs[i2c_next_dev++] = new_dev; |
@@ -337,6 +337,8 @@ static void __init sfi_handle_ipc_dev(struct sfi_device_table_entry *pentry, | |||
337 | pr_debug("IPC bus, name = %16.16s, irq = 0x%2x\n", | 337 | pr_debug("IPC bus, name = %16.16s, irq = 0x%2x\n", |
338 | pentry->name, pentry->irq); | 338 | pentry->name, pentry->irq); |
339 | pdata = intel_mid_sfi_get_pdata(dev, pentry); | 339 | pdata = intel_mid_sfi_get_pdata(dev, pentry); |
340 | if (IS_ERR(pdata)) | ||
341 | return; | ||
340 | 342 | ||
341 | pdev = platform_device_alloc(pentry->name, 0); | 343 | pdev = platform_device_alloc(pentry->name, 0); |
342 | if (pdev == NULL) { | 344 | if (pdev == NULL) { |
@@ -370,6 +372,8 @@ static void __init sfi_handle_spi_dev(struct sfi_device_table_entry *pentry, | |||
370 | spi_info.chip_select); | 372 | spi_info.chip_select); |
371 | 373 | ||
372 | pdata = intel_mid_sfi_get_pdata(dev, &spi_info); | 374 | pdata = intel_mid_sfi_get_pdata(dev, &spi_info); |
375 | if (IS_ERR(pdata)) | ||
376 | return; | ||
373 | 377 | ||
374 | spi_info.platform_data = pdata; | 378 | spi_info.platform_data = pdata; |
375 | if (dev->delay) | 379 | if (dev->delay) |
@@ -395,6 +399,8 @@ static void __init sfi_handle_i2c_dev(struct sfi_device_table_entry *pentry, | |||
395 | i2c_info.addr); | 399 | i2c_info.addr); |
396 | pdata = intel_mid_sfi_get_pdata(dev, &i2c_info); | 400 | pdata = intel_mid_sfi_get_pdata(dev, &i2c_info); |
397 | i2c_info.platform_data = pdata; | 401 | i2c_info.platform_data = pdata; |
402 | if (IS_ERR(pdata)) | ||
403 | return; | ||
398 | 404 | ||
399 | if (dev->delay) | 405 | if (dev->delay) |
400 | intel_scu_i2c_device_register(pentry->host_num, &i2c_info); | 406 | intel_scu_i2c_device_register(pentry->host_num, &i2c_info); |
@@ -443,13 +449,35 @@ static int __init sfi_parse_devs(struct sfi_table_header *table) | |||
443 | * so we have to enable them one by one here | 449 | * so we have to enable them one by one here |
444 | */ | 450 | */ |
445 | ioapic = mp_find_ioapic(irq); | 451 | ioapic = mp_find_ioapic(irq); |
446 | irq_attr.ioapic = ioapic; | 452 | if (ioapic >= 0) { |
447 | irq_attr.ioapic_pin = irq; | 453 | irq_attr.ioapic = ioapic; |
448 | irq_attr.trigger = 1; | 454 | irq_attr.ioapic_pin = irq; |
449 | irq_attr.polarity = 1; | 455 | irq_attr.trigger = 1; |
450 | io_apic_set_pci_routing(NULL, irq, &irq_attr); | 456 | if (intel_mid_identify_cpu() == |
451 | } else | 457 | INTEL_MID_CPU_CHIP_TANGIER) { |
458 | if (!strncmp(pentry->name, | ||
459 | "r69001-ts-i2c", 13)) | ||
460 | /* active low */ | ||
461 | irq_attr.polarity = 1; | ||
462 | else if (!strncmp(pentry->name, | ||
463 | "synaptics_3202", 14)) | ||
464 | /* active low */ | ||
465 | irq_attr.polarity = 1; | ||
466 | else if (irq == 41) | ||
467 | /* fast_int_1 */ | ||
468 | irq_attr.polarity = 1; | ||
469 | else | ||
470 | /* active high */ | ||
471 | irq_attr.polarity = 0; | ||
472 | } else { | ||
473 | /* PNW and CLV go with active low */ | ||
474 | irq_attr.polarity = 1; | ||
475 | } | ||
476 | io_apic_set_pci_routing(NULL, irq, &irq_attr); | ||
477 | } | ||
478 | } else { | ||
452 | irq = 0; /* No irq */ | 479 | irq = 0; /* No irq */ |
480 | } | ||
453 | 481 | ||
454 | dev = get_device_id(pentry->type, pentry->name); | 482 | dev = get_device_id(pentry->type, pentry->name); |
455 | 483 | ||
diff --git a/arch/x86/platform/iris/iris.c b/arch/x86/platform/iris/iris.c index e6cb80f620af..4d171e8640ef 100644 --- a/arch/x86/platform/iris/iris.c +++ b/arch/x86/platform/iris/iris.c | |||
@@ -27,7 +27,6 @@ | |||
27 | #include <linux/kernel.h> | 27 | #include <linux/kernel.h> |
28 | #include <linux/errno.h> | 28 | #include <linux/errno.h> |
29 | #include <linux/delay.h> | 29 | #include <linux/delay.h> |
30 | #include <linux/init.h> | ||
31 | #include <linux/pm.h> | 30 | #include <linux/pm.h> |
32 | #include <asm/io.h> | 31 | #include <asm/io.h> |
33 | 32 | ||
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index efe4d7220397..dfe605ac1bcd 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c | |||
@@ -433,15 +433,49 @@ static void reset_with_ipi(struct pnmask *distribution, struct bau_control *bcp) | |||
433 | return; | 433 | return; |
434 | } | 434 | } |
435 | 435 | ||
436 | static inline unsigned long cycles_2_us(unsigned long long cyc) | 436 | /* |
437 | * Not to be confused with cycles_2_ns() from tsc.c; this gives a relative | ||
438 | * number, not an absolute. It converts a duration in cycles to a duration in | ||
439 | * ns. | ||
440 | */ | ||
441 | static inline unsigned long long cycles_2_ns(unsigned long long cyc) | ||
437 | { | 442 | { |
443 | struct cyc2ns_data *data = cyc2ns_read_begin(); | ||
438 | unsigned long long ns; | 444 | unsigned long long ns; |
439 | unsigned long us; | ||
440 | int cpu = smp_processor_id(); | ||
441 | 445 | ||
442 | ns = (cyc * per_cpu(cyc2ns, cpu)) >> CYC2NS_SCALE_FACTOR; | 446 | ns = mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift); |
443 | us = ns / 1000; | 447 | |
444 | return us; | 448 | cyc2ns_read_end(data); |
449 | return ns; | ||
450 | } | ||
451 | |||
452 | /* | ||
453 | * The reverse of the above; converts a duration in ns to a duration in cycles. | ||
454 | */ | ||
455 | static inline unsigned long long ns_2_cycles(unsigned long long ns) | ||
456 | { | ||
457 | struct cyc2ns_data *data = cyc2ns_read_begin(); | ||
458 | unsigned long long cyc; | ||
459 | |||
460 | cyc = (ns << data->cyc2ns_shift) / data->cyc2ns_mul; | ||
461 | |||
462 | cyc2ns_read_end(data); | ||
463 | return cyc; | ||
464 | } | ||
465 | |||
466 | static inline unsigned long cycles_2_us(unsigned long long cyc) | ||
467 | { | ||
468 | return cycles_2_ns(cyc) / NSEC_PER_USEC; | ||
469 | } | ||
470 | |||
471 | static inline cycles_t sec_2_cycles(unsigned long sec) | ||
472 | { | ||
473 | return ns_2_cycles(sec * NSEC_PER_SEC); | ||
474 | } | ||
475 | |||
476 | static inline unsigned long long usec_2_cycles(unsigned long usec) | ||
477 | { | ||
478 | return ns_2_cycles(usec * NSEC_PER_USEC); | ||
445 | } | 479 | } |
446 | 480 | ||
447 | /* | 481 | /* |
@@ -668,16 +702,6 @@ static int wait_completion(struct bau_desc *bau_desc, | |||
668 | bcp, try); | 702 | bcp, try); |
669 | } | 703 | } |
670 | 704 | ||
671 | static inline cycles_t sec_2_cycles(unsigned long sec) | ||
672 | { | ||
673 | unsigned long ns; | ||
674 | cycles_t cyc; | ||
675 | |||
676 | ns = sec * 1000000000; | ||
677 | cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id())); | ||
678 | return cyc; | ||
679 | } | ||
680 | |||
681 | /* | 705 | /* |
682 | * Our retries are blocked by all destination sw ack resources being | 706 | * Our retries are blocked by all destination sw ack resources being |
683 | * in use, and a timeout is pending. In that case hardware immediately | 707 | * in use, and a timeout is pending. In that case hardware immediately |
@@ -1327,16 +1351,6 @@ static void ptc_seq_stop(struct seq_file *file, void *data) | |||
1327 | { | 1351 | { |
1328 | } | 1352 | } |
1329 | 1353 | ||
1330 | static inline unsigned long long usec_2_cycles(unsigned long microsec) | ||
1331 | { | ||
1332 | unsigned long ns; | ||
1333 | unsigned long long cyc; | ||
1334 | |||
1335 | ns = microsec * 1000; | ||
1336 | cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id())); | ||
1337 | return cyc; | ||
1338 | } | ||
1339 | |||
1340 | /* | 1354 | /* |
1341 | * Display the statistics thru /proc/sgi_uv/ptc_statistics | 1355 | * Display the statistics thru /proc/sgi_uv/ptc_statistics |
1342 | * 'data' points to the cpu number | 1356 | * 'data' points to the cpu number |
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index a44f457e70a1..bad628a620c4 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c | |||
@@ -29,12 +29,10 @@ void __init reserve_real_mode(void) | |||
29 | void __init setup_real_mode(void) | 29 | void __init setup_real_mode(void) |
30 | { | 30 | { |
31 | u16 real_mode_seg; | 31 | u16 real_mode_seg; |
32 | u32 *rel; | 32 | const u32 *rel; |
33 | u32 count; | 33 | u32 count; |
34 | u32 *ptr; | ||
35 | u16 *seg; | ||
36 | int i; | ||
37 | unsigned char *base; | 34 | unsigned char *base; |
35 | unsigned long phys_base; | ||
38 | struct trampoline_header *trampoline_header; | 36 | struct trampoline_header *trampoline_header; |
39 | size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob); | 37 | size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob); |
40 | #ifdef CONFIG_X86_64 | 38 | #ifdef CONFIG_X86_64 |
@@ -46,23 +44,23 @@ void __init setup_real_mode(void) | |||
46 | 44 | ||
47 | memcpy(base, real_mode_blob, size); | 45 | memcpy(base, real_mode_blob, size); |
48 | 46 | ||
49 | real_mode_seg = __pa(base) >> 4; | 47 | phys_base = __pa(base); |
48 | real_mode_seg = phys_base >> 4; | ||
49 | |||
50 | rel = (u32 *) real_mode_relocs; | 50 | rel = (u32 *) real_mode_relocs; |
51 | 51 | ||
52 | /* 16-bit segment relocations. */ | 52 | /* 16-bit segment relocations. */ |
53 | count = rel[0]; | 53 | count = *rel++; |
54 | rel = &rel[1]; | 54 | while (count--) { |
55 | for (i = 0; i < count; i++) { | 55 | u16 *seg = (u16 *) (base + *rel++); |
56 | seg = (u16 *) (base + rel[i]); | ||
57 | *seg = real_mode_seg; | 56 | *seg = real_mode_seg; |
58 | } | 57 | } |
59 | 58 | ||
60 | /* 32-bit linear relocations. */ | 59 | /* 32-bit linear relocations. */ |
61 | count = rel[i]; | 60 | count = *rel++; |
62 | rel = &rel[i + 1]; | 61 | while (count--) { |
63 | for (i = 0; i < count; i++) { | 62 | u32 *ptr = (u32 *) (base + *rel++); |
64 | ptr = (u32 *) (base + rel[i]); | 63 | *ptr += phys_base; |
65 | *ptr += __pa(base); | ||
66 | } | 64 | } |
67 | 65 | ||
68 | /* Must be perfomed *after* relocation. */ | 66 | /* Must be perfomed *after* relocation. */ |
diff --git a/arch/x86/realmode/rm/reboot.S b/arch/x86/realmode/rm/reboot.S index f932ea61d1c8..d66c607bdc58 100644 --- a/arch/x86/realmode/rm/reboot.S +++ b/arch/x86/realmode/rm/reboot.S | |||
@@ -1,5 +1,4 @@ | |||
1 | #include <linux/linkage.h> | 1 | #include <linux/linkage.h> |
2 | #include <linux/init.h> | ||
3 | #include <asm/segment.h> | 2 | #include <asm/segment.h> |
4 | #include <asm/page_types.h> | 3 | #include <asm/page_types.h> |
5 | #include <asm/processor-flags.h> | 4 | #include <asm/processor-flags.h> |
diff --git a/arch/x86/realmode/rm/trampoline_32.S b/arch/x86/realmode/rm/trampoline_32.S index c1b2791183e7..48ddd76bc4c3 100644 --- a/arch/x86/realmode/rm/trampoline_32.S +++ b/arch/x86/realmode/rm/trampoline_32.S | |||
@@ -20,7 +20,6 @@ | |||
20 | */ | 20 | */ |
21 | 21 | ||
22 | #include <linux/linkage.h> | 22 | #include <linux/linkage.h> |
23 | #include <linux/init.h> | ||
24 | #include <asm/segment.h> | 23 | #include <asm/segment.h> |
25 | #include <asm/page_types.h> | 24 | #include <asm/page_types.h> |
26 | #include "realmode.h" | 25 | #include "realmode.h" |
diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S index bb360dc39d21..dac7b20d2f9d 100644 --- a/arch/x86/realmode/rm/trampoline_64.S +++ b/arch/x86/realmode/rm/trampoline_64.S | |||
@@ -25,7 +25,6 @@ | |||
25 | */ | 25 | */ |
26 | 26 | ||
27 | #include <linux/linkage.h> | 27 | #include <linux/linkage.h> |
28 | #include <linux/init.h> | ||
29 | #include <asm/pgtable_types.h> | 28 | #include <asm/pgtable_types.h> |
30 | #include <asm/page_types.h> | 29 | #include <asm/page_types.h> |
31 | #include <asm/msr.h> | 30 | #include <asm/msr.h> |
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index aabfb8380a1c..96bc506ac6de 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl | |||
@@ -357,3 +357,5 @@ | |||
357 | 348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev | 357 | 348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev |
358 | 349 i386 kcmp sys_kcmp | 358 | 349 i386 kcmp sys_kcmp |
359 | 350 i386 finit_module sys_finit_module | 359 | 350 i386 finit_module sys_finit_module |
360 | 351 i386 sched_setattr sys_sched_setattr | ||
361 | 352 i386 sched_getattr sys_sched_getattr | ||
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index 38ae65dfd14f..a12bddc7ccea 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl | |||
@@ -320,6 +320,8 @@ | |||
320 | 311 64 process_vm_writev sys_process_vm_writev | 320 | 311 64 process_vm_writev sys_process_vm_writev |
321 | 312 common kcmp sys_kcmp | 321 | 312 common kcmp sys_kcmp |
322 | 313 common finit_module sys_finit_module | 322 | 313 common finit_module sys_finit_module |
323 | 314 common sched_setattr sys_sched_setattr | ||
324 | 315 common sched_getattr sys_sched_getattr | ||
323 | 325 | ||
324 | # | 326 | # |
325 | # x32-specific system call numbers start at 512 to avoid cache impact | 327 | # x32-specific system call numbers start at 512 to avoid cache impact |
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index f7bab68a4b83..11f9285a2ff6 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c | |||
@@ -722,15 +722,25 @@ static void percpu_init(void) | |||
722 | 722 | ||
723 | /* | 723 | /* |
724 | * Check to see if a symbol lies in the .data..percpu section. | 724 | * Check to see if a symbol lies in the .data..percpu section. |
725 | * For some as yet not understood reason the "__init_begin" | 725 | * |
726 | * symbol which immediately preceeds the .data..percpu section | 726 | * The linker incorrectly associates some symbols with the |
727 | * also shows up as it it were part of it so we do an explict | 727 | * .data..percpu section so we also need to check the symbol |
728 | * check for that symbol name and ignore it. | 728 | * name to make sure that we classify the symbol correctly. |
729 | * | ||
730 | * The GNU linker incorrectly associates: | ||
731 | * __init_begin | ||
732 | * __per_cpu_load | ||
733 | * | ||
734 | * The "gold" linker incorrectly associates: | ||
735 | * init_per_cpu__irq_stack_union | ||
736 | * init_per_cpu__gdt_page | ||
729 | */ | 737 | */ |
730 | static int is_percpu_sym(ElfW(Sym) *sym, const char *symname) | 738 | static int is_percpu_sym(ElfW(Sym) *sym, const char *symname) |
731 | { | 739 | { |
732 | return (sym->st_shndx == per_cpu_shndx) && | 740 | return (sym->st_shndx == per_cpu_shndx) && |
733 | strcmp(symname, "__init_begin"); | 741 | strcmp(symname, "__init_begin") && |
742 | strcmp(symname, "__per_cpu_load") && | ||
743 | strncmp(symname, "init_per_cpu_", 13); | ||
734 | } | 744 | } |
735 | 745 | ||
736 | 746 | ||
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 2ada505067cc..eb5d7a56f8d4 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c | |||
@@ -178,7 +178,7 @@ notrace static int __always_inline do_realtime(struct timespec *ts) | |||
178 | 178 | ||
179 | ts->tv_nsec = 0; | 179 | ts->tv_nsec = 0; |
180 | do { | 180 | do { |
181 | seq = read_seqcount_begin_no_lockdep(>od->seq); | 181 | seq = raw_read_seqcount_begin(>od->seq); |
182 | mode = gtod->clock.vclock_mode; | 182 | mode = gtod->clock.vclock_mode; |
183 | ts->tv_sec = gtod->wall_time_sec; | 183 | ts->tv_sec = gtod->wall_time_sec; |
184 | ns = gtod->wall_time_snsec; | 184 | ns = gtod->wall_time_snsec; |
@@ -198,7 +198,7 @@ notrace static int do_monotonic(struct timespec *ts) | |||
198 | 198 | ||
199 | ts->tv_nsec = 0; | 199 | ts->tv_nsec = 0; |
200 | do { | 200 | do { |
201 | seq = read_seqcount_begin_no_lockdep(>od->seq); | 201 | seq = raw_read_seqcount_begin(>od->seq); |
202 | mode = gtod->clock.vclock_mode; | 202 | mode = gtod->clock.vclock_mode; |
203 | ts->tv_sec = gtod->monotonic_time_sec; | 203 | ts->tv_sec = gtod->monotonic_time_sec; |
204 | ns = gtod->monotonic_time_snsec; | 204 | ns = gtod->monotonic_time_snsec; |
@@ -214,7 +214,7 @@ notrace static int do_realtime_coarse(struct timespec *ts) | |||
214 | { | 214 | { |
215 | unsigned long seq; | 215 | unsigned long seq; |
216 | do { | 216 | do { |
217 | seq = read_seqcount_begin_no_lockdep(>od->seq); | 217 | seq = raw_read_seqcount_begin(>od->seq); |
218 | ts->tv_sec = gtod->wall_time_coarse.tv_sec; | 218 | ts->tv_sec = gtod->wall_time_coarse.tv_sec; |
219 | ts->tv_nsec = gtod->wall_time_coarse.tv_nsec; | 219 | ts->tv_nsec = gtod->wall_time_coarse.tv_nsec; |
220 | } while (unlikely(read_seqcount_retry(>od->seq, seq))); | 220 | } while (unlikely(read_seqcount_retry(>od->seq, seq))); |
@@ -225,7 +225,7 @@ notrace static int do_monotonic_coarse(struct timespec *ts) | |||
225 | { | 225 | { |
226 | unsigned long seq; | 226 | unsigned long seq; |
227 | do { | 227 | do { |
228 | seq = read_seqcount_begin_no_lockdep(>od->seq); | 228 | seq = raw_read_seqcount_begin(>od->seq); |
229 | ts->tv_sec = gtod->monotonic_time_coarse.tv_sec; | 229 | ts->tv_sec = gtod->monotonic_time_coarse.tv_sec; |
230 | ts->tv_nsec = gtod->monotonic_time_coarse.tv_nsec; | 230 | ts->tv_nsec = gtod->monotonic_time_coarse.tv_nsec; |
231 | } while (unlikely(read_seqcount_retry(>od->seq, seq))); | 231 | } while (unlikely(read_seqcount_retry(>od->seq, seq))); |
diff --git a/arch/x86/vdso/vdso.S b/arch/x86/vdso/vdso.S index 01f5e3b4613c..1e13eb8c9656 100644 --- a/arch/x86/vdso/vdso.S +++ b/arch/x86/vdso/vdso.S | |||
@@ -1,6 +1,5 @@ | |||
1 | #include <asm/page_types.h> | 1 | #include <asm/page_types.h> |
2 | #include <linux/linkage.h> | 2 | #include <linux/linkage.h> |
3 | #include <linux/init.h> | ||
4 | 3 | ||
5 | __PAGE_ALIGNED_DATA | 4 | __PAGE_ALIGNED_DATA |
6 | 5 | ||
diff --git a/arch/x86/vdso/vdsox32.S b/arch/x86/vdso/vdsox32.S index d6b9a7f42a8a..295f1c7543d8 100644 --- a/arch/x86/vdso/vdsox32.S +++ b/arch/x86/vdso/vdsox32.S | |||
@@ -1,6 +1,5 @@ | |||
1 | #include <asm/page_types.h> | 1 | #include <asm/page_types.h> |
2 | #include <linux/linkage.h> | 2 | #include <linux/linkage.h> |
3 | #include <linux/init.h> | ||
4 | 3 | ||
5 | __PAGE_ALIGNED_DATA | 4 | __PAGE_ALIGNED_DATA |
6 | 5 | ||
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 1a3c76505649..01b90261fa38 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig | |||
@@ -51,3 +51,7 @@ config XEN_DEBUG_FS | |||
51 | Enable statistics output and various tuning options in debugfs. | 51 | Enable statistics output and various tuning options in debugfs. |
52 | Enabling this option may incur a significant performance overhead. | 52 | Enabling this option may incur a significant performance overhead. |
53 | 53 | ||
54 | config XEN_PVH | ||
55 | bool "Support for running as a PVH guest" | ||
56 | depends on X86_64 && XEN && XEN_PVHVM | ||
57 | def_bool n | ||
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index fa6ade76ef3f..a4d7b647867f 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -262,8 +262,9 @@ static void __init xen_banner(void) | |||
262 | struct xen_extraversion extra; | 262 | struct xen_extraversion extra; |
263 | HYPERVISOR_xen_version(XENVER_extraversion, &extra); | 263 | HYPERVISOR_xen_version(XENVER_extraversion, &extra); |
264 | 264 | ||
265 | printk(KERN_INFO "Booting paravirtualized kernel on %s\n", | 265 | pr_info("Booting paravirtualized kernel %son %s\n", |
266 | pv_info.name); | 266 | xen_feature(XENFEAT_auto_translated_physmap) ? |
267 | "with PVH extensions " : "", pv_info.name); | ||
267 | printk(KERN_INFO "Xen version: %d.%d%s%s\n", | 268 | printk(KERN_INFO "Xen version: %d.%d%s%s\n", |
268 | version >> 16, version & 0xffff, extra.extraversion, | 269 | version >> 16, version & 0xffff, extra.extraversion, |
269 | xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); | 270 | xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); |
@@ -433,7 +434,7 @@ static void __init xen_init_cpuid_mask(void) | |||
433 | 434 | ||
434 | ax = 1; | 435 | ax = 1; |
435 | cx = 0; | 436 | cx = 0; |
436 | xen_cpuid(&ax, &bx, &cx, &dx); | 437 | cpuid(1, &ax, &bx, &cx, &dx); |
437 | 438 | ||
438 | xsave_mask = | 439 | xsave_mask = |
439 | (1 << (X86_FEATURE_XSAVE % 32)) | | 440 | (1 << (X86_FEATURE_XSAVE % 32)) | |
@@ -1142,8 +1143,9 @@ void xen_setup_vcpu_info_placement(void) | |||
1142 | xen_vcpu_setup(cpu); | 1143 | xen_vcpu_setup(cpu); |
1143 | 1144 | ||
1144 | /* xen_vcpu_setup managed to place the vcpu_info within the | 1145 | /* xen_vcpu_setup managed to place the vcpu_info within the |
1145 | percpu area for all cpus, so make use of it */ | 1146 | * percpu area for all cpus, so make use of it. Note that for |
1146 | if (have_vcpu_info_placement) { | 1147 | * PVH we want to use native IRQ mechanism. */ |
1148 | if (have_vcpu_info_placement && !xen_pvh_domain()) { | ||
1147 | pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct); | 1149 | pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct); |
1148 | pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct); | 1150 | pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct); |
1149 | pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); | 1151 | pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); |
@@ -1407,9 +1409,49 @@ static void __init xen_boot_params_init_edd(void) | |||
1407 | * Set up the GDT and segment registers for -fstack-protector. Until | 1409 | * Set up the GDT and segment registers for -fstack-protector. Until |
1408 | * we do this, we have to be careful not to call any stack-protected | 1410 | * we do this, we have to be careful not to call any stack-protected |
1409 | * function, which is most of the kernel. | 1411 | * function, which is most of the kernel. |
1412 | * | ||
1413 | * Note, that it is __ref because the only caller of this after init | ||
1414 | * is PVH which is not going to use xen_load_gdt_boot or other | ||
1415 | * __init functions. | ||
1410 | */ | 1416 | */ |
1411 | static void __init xen_setup_stackprotector(void) | 1417 | static void __ref xen_setup_gdt(int cpu) |
1412 | { | 1418 | { |
1419 | if (xen_feature(XENFEAT_auto_translated_physmap)) { | ||
1420 | #ifdef CONFIG_X86_64 | ||
1421 | unsigned long dummy; | ||
1422 | |||
1423 | load_percpu_segment(cpu); /* We need to access per-cpu area */ | ||
1424 | switch_to_new_gdt(cpu); /* GDT and GS set */ | ||
1425 | |||
1426 | /* We are switching of the Xen provided GDT to our HVM mode | ||
1427 | * GDT. The new GDT has __KERNEL_CS with CS.L = 1 | ||
1428 | * and we are jumping to reload it. | ||
1429 | */ | ||
1430 | asm volatile ("pushq %0\n" | ||
1431 | "leaq 1f(%%rip),%0\n" | ||
1432 | "pushq %0\n" | ||
1433 | "lretq\n" | ||
1434 | "1:\n" | ||
1435 | : "=&r" (dummy) : "0" (__KERNEL_CS)); | ||
1436 | |||
1437 | /* | ||
1438 | * While not needed, we also set the %es, %ds, and %fs | ||
1439 | * to zero. We don't care about %ss as it is NULL. | ||
1440 | * Strictly speaking this is not needed as Xen zeros those | ||
1441 | * out (and also MSR_FS_BASE, MSR_GS_BASE, MSR_KERNEL_GS_BASE) | ||
1442 | * | ||
1443 | * Linux zeros them in cpu_init() and in secondary_startup_64 | ||
1444 | * (for BSP). | ||
1445 | */ | ||
1446 | loadsegment(es, 0); | ||
1447 | loadsegment(ds, 0); | ||
1448 | loadsegment(fs, 0); | ||
1449 | #else | ||
1450 | /* PVH: TODO Implement. */ | ||
1451 | BUG(); | ||
1452 | #endif | ||
1453 | return; /* PVH does not need any PV GDT ops. */ | ||
1454 | } | ||
1413 | pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot; | 1455 | pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot; |
1414 | pv_cpu_ops.load_gdt = xen_load_gdt_boot; | 1456 | pv_cpu_ops.load_gdt = xen_load_gdt_boot; |
1415 | 1457 | ||
@@ -1420,6 +1462,46 @@ static void __init xen_setup_stackprotector(void) | |||
1420 | pv_cpu_ops.load_gdt = xen_load_gdt; | 1462 | pv_cpu_ops.load_gdt = xen_load_gdt; |
1421 | } | 1463 | } |
1422 | 1464 | ||
1465 | /* | ||
1466 | * A PV guest starts with default flags that are not set for PVH, set them | ||
1467 | * here asap. | ||
1468 | */ | ||
1469 | static void xen_pvh_set_cr_flags(int cpu) | ||
1470 | { | ||
1471 | |||
1472 | /* Some of these are setup in 'secondary_startup_64'. The others: | ||
1473 | * X86_CR0_TS, X86_CR0_PE, X86_CR0_ET are set by Xen for HVM guests | ||
1474 | * (which PVH shared codepaths), while X86_CR0_PG is for PVH. */ | ||
1475 | write_cr0(read_cr0() | X86_CR0_MP | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM); | ||
1476 | } | ||
1477 | |||
1478 | /* | ||
1479 | * Note, that it is ref - because the only caller of this after init | ||
1480 | * is PVH which is not going to use xen_load_gdt_boot or other | ||
1481 | * __init functions. | ||
1482 | */ | ||
1483 | void __ref xen_pvh_secondary_vcpu_init(int cpu) | ||
1484 | { | ||
1485 | xen_setup_gdt(cpu); | ||
1486 | xen_pvh_set_cr_flags(cpu); | ||
1487 | } | ||
1488 | |||
1489 | static void __init xen_pvh_early_guest_init(void) | ||
1490 | { | ||
1491 | if (!xen_feature(XENFEAT_auto_translated_physmap)) | ||
1492 | return; | ||
1493 | |||
1494 | if (!xen_feature(XENFEAT_hvm_callback_vector)) | ||
1495 | return; | ||
1496 | |||
1497 | xen_have_vector_callback = 1; | ||
1498 | xen_pvh_set_cr_flags(0); | ||
1499 | |||
1500 | #ifdef CONFIG_X86_32 | ||
1501 | BUG(); /* PVH: Implement proper support. */ | ||
1502 | #endif | ||
1503 | } | ||
1504 | |||
1423 | /* First C function to be called on Xen boot */ | 1505 | /* First C function to be called on Xen boot */ |
1424 | asmlinkage void __init xen_start_kernel(void) | 1506 | asmlinkage void __init xen_start_kernel(void) |
1425 | { | 1507 | { |
@@ -1431,13 +1513,16 @@ asmlinkage void __init xen_start_kernel(void) | |||
1431 | 1513 | ||
1432 | xen_domain_type = XEN_PV_DOMAIN; | 1514 | xen_domain_type = XEN_PV_DOMAIN; |
1433 | 1515 | ||
1516 | xen_setup_features(); | ||
1517 | xen_pvh_early_guest_init(); | ||
1434 | xen_setup_machphys_mapping(); | 1518 | xen_setup_machphys_mapping(); |
1435 | 1519 | ||
1436 | /* Install Xen paravirt ops */ | 1520 | /* Install Xen paravirt ops */ |
1437 | pv_info = xen_info; | 1521 | pv_info = xen_info; |
1438 | pv_init_ops = xen_init_ops; | 1522 | pv_init_ops = xen_init_ops; |
1439 | pv_cpu_ops = xen_cpu_ops; | ||
1440 | pv_apic_ops = xen_apic_ops; | 1523 | pv_apic_ops = xen_apic_ops; |
1524 | if (!xen_pvh_domain()) | ||
1525 | pv_cpu_ops = xen_cpu_ops; | ||
1441 | 1526 | ||
1442 | x86_init.resources.memory_setup = xen_memory_setup; | 1527 | x86_init.resources.memory_setup = xen_memory_setup; |
1443 | x86_init.oem.arch_setup = xen_arch_setup; | 1528 | x86_init.oem.arch_setup = xen_arch_setup; |
@@ -1469,17 +1554,14 @@ asmlinkage void __init xen_start_kernel(void) | |||
1469 | /* Work out if we support NX */ | 1554 | /* Work out if we support NX */ |
1470 | x86_configure_nx(); | 1555 | x86_configure_nx(); |
1471 | 1556 | ||
1472 | xen_setup_features(); | ||
1473 | |||
1474 | /* Get mfn list */ | 1557 | /* Get mfn list */ |
1475 | if (!xen_feature(XENFEAT_auto_translated_physmap)) | 1558 | xen_build_dynamic_phys_to_machine(); |
1476 | xen_build_dynamic_phys_to_machine(); | ||
1477 | 1559 | ||
1478 | /* | 1560 | /* |
1479 | * Set up kernel GDT and segment registers, mainly so that | 1561 | * Set up kernel GDT and segment registers, mainly so that |
1480 | * -fstack-protector code can be executed. | 1562 | * -fstack-protector code can be executed. |
1481 | */ | 1563 | */ |
1482 | xen_setup_stackprotector(); | 1564 | xen_setup_gdt(0); |
1483 | 1565 | ||
1484 | xen_init_irq_ops(); | 1566 | xen_init_irq_ops(); |
1485 | xen_init_cpuid_mask(); | 1567 | xen_init_cpuid_mask(); |
@@ -1548,14 +1630,18 @@ asmlinkage void __init xen_start_kernel(void) | |||
1548 | /* set the limit of our address space */ | 1630 | /* set the limit of our address space */ |
1549 | xen_reserve_top(); | 1631 | xen_reserve_top(); |
1550 | 1632 | ||
1551 | /* We used to do this in xen_arch_setup, but that is too late on AMD | 1633 | /* PVH: runs at default kernel iopl of 0 */ |
1552 | * were early_cpu_init (run before ->arch_setup()) calls early_amd_init | 1634 | if (!xen_pvh_domain()) { |
1553 | * which pokes 0xcf8 port. | 1635 | /* |
1554 | */ | 1636 | * We used to do this in xen_arch_setup, but that is too late |
1555 | set_iopl.iopl = 1; | 1637 | * on AMD were early_cpu_init (run before ->arch_setup()) calls |
1556 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); | 1638 | * early_amd_init which pokes 0xcf8 port. |
1557 | if (rc != 0) | 1639 | */ |
1558 | xen_raw_printk("physdev_op failed %d\n", rc); | 1640 | set_iopl.iopl = 1; |
1641 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); | ||
1642 | if (rc != 0) | ||
1643 | xen_raw_printk("physdev_op failed %d\n", rc); | ||
1644 | } | ||
1559 | 1645 | ||
1560 | #ifdef CONFIG_X86_32 | 1646 | #ifdef CONFIG_X86_32 |
1561 | /* set up basic CPUID stuff */ | 1647 | /* set up basic CPUID stuff */ |
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c index 3a5f55d51907..103c93f874b2 100644 --- a/arch/x86/xen/grant-table.c +++ b/arch/x86/xen/grant-table.c | |||
@@ -125,3 +125,66 @@ void arch_gnttab_unmap(void *shared, unsigned long nr_gframes) | |||
125 | apply_to_page_range(&init_mm, (unsigned long)shared, | 125 | apply_to_page_range(&init_mm, (unsigned long)shared, |
126 | PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL); | 126 | PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL); |
127 | } | 127 | } |
128 | #ifdef CONFIG_XEN_PVH | ||
129 | #include <xen/balloon.h> | ||
130 | #include <xen/events.h> | ||
131 | #include <xen/xen.h> | ||
132 | #include <linux/slab.h> | ||
133 | static int __init xlated_setup_gnttab_pages(void) | ||
134 | { | ||
135 | struct page **pages; | ||
136 | xen_pfn_t *pfns; | ||
137 | int rc; | ||
138 | unsigned int i; | ||
139 | unsigned long nr_grant_frames = gnttab_max_grant_frames(); | ||
140 | |||
141 | BUG_ON(nr_grant_frames == 0); | ||
142 | pages = kcalloc(nr_grant_frames, sizeof(pages[0]), GFP_KERNEL); | ||
143 | if (!pages) | ||
144 | return -ENOMEM; | ||
145 | |||
146 | pfns = kcalloc(nr_grant_frames, sizeof(pfns[0]), GFP_KERNEL); | ||
147 | if (!pfns) { | ||
148 | kfree(pages); | ||
149 | return -ENOMEM; | ||
150 | } | ||
151 | rc = alloc_xenballooned_pages(nr_grant_frames, pages, 0 /* lowmem */); | ||
152 | if (rc) { | ||
153 | pr_warn("%s Couldn't balloon alloc %ld pfns rc:%d\n", __func__, | ||
154 | nr_grant_frames, rc); | ||
155 | kfree(pages); | ||
156 | kfree(pfns); | ||
157 | return rc; | ||
158 | } | ||
159 | for (i = 0; i < nr_grant_frames; i++) | ||
160 | pfns[i] = page_to_pfn(pages[i]); | ||
161 | |||
162 | rc = arch_gnttab_map_shared(pfns, nr_grant_frames, nr_grant_frames, | ||
163 | &xen_auto_xlat_grant_frames.vaddr); | ||
164 | |||
165 | kfree(pages); | ||
166 | if (rc) { | ||
167 | pr_warn("%s Couldn't map %ld pfns rc:%d\n", __func__, | ||
168 | nr_grant_frames, rc); | ||
169 | free_xenballooned_pages(nr_grant_frames, pages); | ||
170 | kfree(pfns); | ||
171 | return rc; | ||
172 | } | ||
173 | |||
174 | xen_auto_xlat_grant_frames.pfn = pfns; | ||
175 | xen_auto_xlat_grant_frames.count = nr_grant_frames; | ||
176 | |||
177 | return 0; | ||
178 | } | ||
179 | |||
180 | static int __init xen_pvh_gnttab_setup(void) | ||
181 | { | ||
182 | if (!xen_pvh_domain()) | ||
183 | return -ENODEV; | ||
184 | |||
185 | return xlated_setup_gnttab_pages(); | ||
186 | } | ||
187 | /* Call it _before_ __gnttab_init as we need to initialize the | ||
188 | * xen_auto_xlat_grant_frames first. */ | ||
189 | core_initcall(xen_pvh_gnttab_setup); | ||
190 | #endif | ||
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 0da7f863056f..76ca326105f7 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c | |||
@@ -5,6 +5,7 @@ | |||
5 | #include <xen/interface/xen.h> | 5 | #include <xen/interface/xen.h> |
6 | #include <xen/interface/sched.h> | 6 | #include <xen/interface/sched.h> |
7 | #include <xen/interface/vcpu.h> | 7 | #include <xen/interface/vcpu.h> |
8 | #include <xen/features.h> | ||
8 | #include <xen/events.h> | 9 | #include <xen/events.h> |
9 | 10 | ||
10 | #include <asm/xen/hypercall.h> | 11 | #include <asm/xen/hypercall.h> |
@@ -128,6 +129,8 @@ static const struct pv_irq_ops xen_irq_ops __initconst = { | |||
128 | 129 | ||
129 | void __init xen_init_irq_ops(void) | 130 | void __init xen_init_irq_ops(void) |
130 | { | 131 | { |
131 | pv_irq_ops = xen_irq_ops; | 132 | /* For PVH we use default pv_irq_ops settings. */ |
133 | if (!xen_feature(XENFEAT_hvm_callback_vector)) | ||
134 | pv_irq_ops = xen_irq_ops; | ||
132 | x86_init.irqs.intr_init = xen_init_IRQ; | 135 | x86_init.irqs.intr_init = xen_init_IRQ; |
133 | } | 136 | } |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index ce563be09cc1..c1d406f35523 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -1198,44 +1198,40 @@ static void __init xen_cleanhighmap(unsigned long vaddr, | |||
1198 | * instead of somewhere later and be confusing. */ | 1198 | * instead of somewhere later and be confusing. */ |
1199 | xen_mc_flush(); | 1199 | xen_mc_flush(); |
1200 | } | 1200 | } |
1201 | #endif | 1201 | static void __init xen_pagetable_p2m_copy(void) |
1202 | static void __init xen_pagetable_init(void) | ||
1203 | { | 1202 | { |
1204 | #ifdef CONFIG_X86_64 | ||
1205 | unsigned long size; | 1203 | unsigned long size; |
1206 | unsigned long addr; | 1204 | unsigned long addr; |
1207 | #endif | 1205 | unsigned long new_mfn_list; |
1208 | paging_init(); | 1206 | |
1209 | xen_setup_shared_info(); | 1207 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
1210 | #ifdef CONFIG_X86_64 | 1208 | return; |
1211 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { | 1209 | |
1212 | unsigned long new_mfn_list; | 1210 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); |
1213 | 1211 | ||
1214 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); | 1212 | new_mfn_list = xen_revector_p2m_tree(); |
1215 | 1213 | /* No memory or already called. */ | |
1216 | /* On 32-bit, we get zero so this never gets executed. */ | 1214 | if (!new_mfn_list || new_mfn_list == xen_start_info->mfn_list) |
1217 | new_mfn_list = xen_revector_p2m_tree(); | 1215 | return; |
1218 | if (new_mfn_list && new_mfn_list != xen_start_info->mfn_list) { | 1216 | |
1219 | /* using __ka address and sticking INVALID_P2M_ENTRY! */ | 1217 | /* using __ka address and sticking INVALID_P2M_ENTRY! */ |
1220 | memset((void *)xen_start_info->mfn_list, 0xff, size); | 1218 | memset((void *)xen_start_info->mfn_list, 0xff, size); |
1221 | 1219 | ||
1222 | /* We should be in __ka space. */ | 1220 | /* We should be in __ka space. */ |
1223 | BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map); | 1221 | BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map); |
1224 | addr = xen_start_info->mfn_list; | 1222 | addr = xen_start_info->mfn_list; |
1225 | /* We roundup to the PMD, which means that if anybody at this stage is | 1223 | /* We roundup to the PMD, which means that if anybody at this stage is |
1226 | * using the __ka address of xen_start_info or xen_start_info->shared_info | 1224 | * using the __ka address of xen_start_info or xen_start_info->shared_info |
1227 | * they are in going to crash. Fortunatly we have already revectored | 1225 | * they are in going to crash. Fortunatly we have already revectored |
1228 | * in xen_setup_kernel_pagetable and in xen_setup_shared_info. */ | 1226 | * in xen_setup_kernel_pagetable and in xen_setup_shared_info. */ |
1229 | size = roundup(size, PMD_SIZE); | 1227 | size = roundup(size, PMD_SIZE); |
1230 | xen_cleanhighmap(addr, addr + size); | 1228 | xen_cleanhighmap(addr, addr + size); |
1231 | 1229 | ||
1232 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); | 1230 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); |
1233 | memblock_free(__pa(xen_start_info->mfn_list), size); | 1231 | memblock_free(__pa(xen_start_info->mfn_list), size); |
1234 | /* And revector! Bye bye old array */ | 1232 | /* And revector! Bye bye old array */ |
1235 | xen_start_info->mfn_list = new_mfn_list; | 1233 | xen_start_info->mfn_list = new_mfn_list; |
1236 | } else | 1234 | |
1237 | goto skip; | ||
1238 | } | ||
1239 | /* At this stage, cleanup_highmap has already cleaned __ka space | 1235 | /* At this stage, cleanup_highmap has already cleaned __ka space |
1240 | * from _brk_limit way up to the max_pfn_mapped (which is the end of | 1236 | * from _brk_limit way up to the max_pfn_mapped (which is the end of |
1241 | * the ramdisk). We continue on, erasing PMD entries that point to page | 1237 | * the ramdisk). We continue on, erasing PMD entries that point to page |
@@ -1255,7 +1251,15 @@ static void __init xen_pagetable_init(void) | |||
1255 | * anything at this stage. */ | 1251 | * anything at this stage. */ |
1256 | xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1); | 1252 | xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1); |
1257 | #endif | 1253 | #endif |
1258 | skip: | 1254 | } |
1255 | #endif | ||
1256 | |||
1257 | static void __init xen_pagetable_init(void) | ||
1258 | { | ||
1259 | paging_init(); | ||
1260 | xen_setup_shared_info(); | ||
1261 | #ifdef CONFIG_X86_64 | ||
1262 | xen_pagetable_p2m_copy(); | ||
1259 | #endif | 1263 | #endif |
1260 | xen_post_allocator_init(); | 1264 | xen_post_allocator_init(); |
1261 | } | 1265 | } |
@@ -1753,6 +1757,10 @@ static void set_page_prot_flags(void *addr, pgprot_t prot, unsigned long flags) | |||
1753 | unsigned long pfn = __pa(addr) >> PAGE_SHIFT; | 1757 | unsigned long pfn = __pa(addr) >> PAGE_SHIFT; |
1754 | pte_t pte = pfn_pte(pfn, prot); | 1758 | pte_t pte = pfn_pte(pfn, prot); |
1755 | 1759 | ||
1760 | /* For PVH no need to set R/O or R/W to pin them or unpin them. */ | ||
1761 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
1762 | return; | ||
1763 | |||
1756 | if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags)) | 1764 | if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags)) |
1757 | BUG(); | 1765 | BUG(); |
1758 | } | 1766 | } |
@@ -1863,6 +1871,7 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end, | |||
1863 | * but that's enough to get __va working. We need to fill in the rest | 1871 | * but that's enough to get __va working. We need to fill in the rest |
1864 | * of the physical mapping once some sort of allocator has been set | 1872 | * of the physical mapping once some sort of allocator has been set |
1865 | * up. | 1873 | * up. |
1874 | * NOTE: for PVH, the page tables are native. | ||
1866 | */ | 1875 | */ |
1867 | void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) | 1876 | void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) |
1868 | { | 1877 | { |
@@ -1884,17 +1893,18 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) | |||
1884 | /* Zap identity mapping */ | 1893 | /* Zap identity mapping */ |
1885 | init_level4_pgt[0] = __pgd(0); | 1894 | init_level4_pgt[0] = __pgd(0); |
1886 | 1895 | ||
1887 | /* Pre-constructed entries are in pfn, so convert to mfn */ | 1896 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { |
1888 | /* L4[272] -> level3_ident_pgt | 1897 | /* Pre-constructed entries are in pfn, so convert to mfn */ |
1889 | * L4[511] -> level3_kernel_pgt */ | 1898 | /* L4[272] -> level3_ident_pgt |
1890 | convert_pfn_mfn(init_level4_pgt); | 1899 | * L4[511] -> level3_kernel_pgt */ |
1891 | 1900 | convert_pfn_mfn(init_level4_pgt); | |
1892 | /* L3_i[0] -> level2_ident_pgt */ | 1901 | |
1893 | convert_pfn_mfn(level3_ident_pgt); | 1902 | /* L3_i[0] -> level2_ident_pgt */ |
1894 | /* L3_k[510] -> level2_kernel_pgt | 1903 | convert_pfn_mfn(level3_ident_pgt); |
1895 | * L3_i[511] -> level2_fixmap_pgt */ | 1904 | /* L3_k[510] -> level2_kernel_pgt |
1896 | convert_pfn_mfn(level3_kernel_pgt); | 1905 | * L3_i[511] -> level2_fixmap_pgt */ |
1897 | 1906 | convert_pfn_mfn(level3_kernel_pgt); | |
1907 | } | ||
1898 | /* We get [511][511] and have Xen's version of level2_kernel_pgt */ | 1908 | /* We get [511][511] and have Xen's version of level2_kernel_pgt */ |
1899 | l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); | 1909 | l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); |
1900 | l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); | 1910 | l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); |
@@ -1918,31 +1928,33 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) | |||
1918 | copy_page(level2_fixmap_pgt, l2); | 1928 | copy_page(level2_fixmap_pgt, l2); |
1919 | /* Note that we don't do anything with level1_fixmap_pgt which | 1929 | /* Note that we don't do anything with level1_fixmap_pgt which |
1920 | * we don't need. */ | 1930 | * we don't need. */ |
1931 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { | ||
1932 | /* Make pagetable pieces RO */ | ||
1933 | set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); | ||
1934 | set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); | ||
1935 | set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); | ||
1936 | set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); | ||
1937 | set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); | ||
1938 | set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); | ||
1939 | set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); | ||
1940 | |||
1941 | /* Pin down new L4 */ | ||
1942 | pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, | ||
1943 | PFN_DOWN(__pa_symbol(init_level4_pgt))); | ||
1944 | |||
1945 | /* Unpin Xen-provided one */ | ||
1946 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); | ||
1921 | 1947 | ||
1922 | /* Make pagetable pieces RO */ | 1948 | /* |
1923 | set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); | 1949 | * At this stage there can be no user pgd, and no page |
1924 | set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); | 1950 | * structure to attach it to, so make sure we just set kernel |
1925 | set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); | 1951 | * pgd. |
1926 | set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); | 1952 | */ |
1927 | set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); | 1953 | xen_mc_batch(); |
1928 | set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); | 1954 | __xen_write_cr3(true, __pa(init_level4_pgt)); |
1929 | set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); | 1955 | xen_mc_issue(PARAVIRT_LAZY_CPU); |
1930 | 1956 | } else | |
1931 | /* Pin down new L4 */ | 1957 | native_write_cr3(__pa(init_level4_pgt)); |
1932 | pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, | ||
1933 | PFN_DOWN(__pa_symbol(init_level4_pgt))); | ||
1934 | |||
1935 | /* Unpin Xen-provided one */ | ||
1936 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); | ||
1937 | |||
1938 | /* | ||
1939 | * At this stage there can be no user pgd, and no page | ||
1940 | * structure to attach it to, so make sure we just set kernel | ||
1941 | * pgd. | ||
1942 | */ | ||
1943 | xen_mc_batch(); | ||
1944 | __xen_write_cr3(true, __pa(init_level4_pgt)); | ||
1945 | xen_mc_issue(PARAVIRT_LAZY_CPU); | ||
1946 | 1958 | ||
1947 | /* We can't that easily rip out L3 and L2, as the Xen pagetables are | 1959 | /* We can't that easily rip out L3 and L2, as the Xen pagetables are |
1948 | * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for | 1960 | * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for |
@@ -2103,6 +2115,9 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) | |||
2103 | 2115 | ||
2104 | static void __init xen_post_allocator_init(void) | 2116 | static void __init xen_post_allocator_init(void) |
2105 | { | 2117 | { |
2118 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
2119 | return; | ||
2120 | |||
2106 | pv_mmu_ops.set_pte = xen_set_pte; | 2121 | pv_mmu_ops.set_pte = xen_set_pte; |
2107 | pv_mmu_ops.set_pmd = xen_set_pmd; | 2122 | pv_mmu_ops.set_pmd = xen_set_pmd; |
2108 | pv_mmu_ops.set_pud = xen_set_pud; | 2123 | pv_mmu_ops.set_pud = xen_set_pud; |
@@ -2207,6 +2222,15 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { | |||
2207 | void __init xen_init_mmu_ops(void) | 2222 | void __init xen_init_mmu_ops(void) |
2208 | { | 2223 | { |
2209 | x86_init.paging.pagetable_init = xen_pagetable_init; | 2224 | x86_init.paging.pagetable_init = xen_pagetable_init; |
2225 | |||
2226 | /* Optimization - we can use the HVM one but it has no idea which | ||
2227 | * VCPUs are descheduled - which means that it will needlessly IPI | ||
2228 | * them. Xen knows so let it do the job. | ||
2229 | */ | ||
2230 | if (xen_feature(XENFEAT_auto_translated_physmap)) { | ||
2231 | pv_mmu_ops.flush_tlb_others = xen_flush_tlb_others; | ||
2232 | return; | ||
2233 | } | ||
2210 | pv_mmu_ops = xen_mmu_ops; | 2234 | pv_mmu_ops = xen_mmu_ops; |
2211 | 2235 | ||
2212 | memset(dummy_mapping, 0xff, PAGE_SIZE); | 2236 | memset(dummy_mapping, 0xff, PAGE_SIZE); |
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 2ae8699e8767..696c694986d0 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c | |||
@@ -280,6 +280,9 @@ void __ref xen_build_mfn_list_list(void) | |||
280 | { | 280 | { |
281 | unsigned long pfn; | 281 | unsigned long pfn; |
282 | 282 | ||
283 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
284 | return; | ||
285 | |||
283 | /* Pre-initialize p2m_top_mfn to be completely missing */ | 286 | /* Pre-initialize p2m_top_mfn to be completely missing */ |
284 | if (p2m_top_mfn == NULL) { | 287 | if (p2m_top_mfn == NULL) { |
285 | p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); | 288 | p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); |
@@ -336,6 +339,9 @@ void __ref xen_build_mfn_list_list(void) | |||
336 | 339 | ||
337 | void xen_setup_mfn_list_list(void) | 340 | void xen_setup_mfn_list_list(void) |
338 | { | 341 | { |
342 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
343 | return; | ||
344 | |||
339 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); | 345 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); |
340 | 346 | ||
341 | HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = | 347 | HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = |
@@ -346,10 +352,15 @@ void xen_setup_mfn_list_list(void) | |||
346 | /* Set up p2m_top to point to the domain-builder provided p2m pages */ | 352 | /* Set up p2m_top to point to the domain-builder provided p2m pages */ |
347 | void __init xen_build_dynamic_phys_to_machine(void) | 353 | void __init xen_build_dynamic_phys_to_machine(void) |
348 | { | 354 | { |
349 | unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; | 355 | unsigned long *mfn_list; |
350 | unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); | 356 | unsigned long max_pfn; |
351 | unsigned long pfn; | 357 | unsigned long pfn; |
352 | 358 | ||
359 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
360 | return; | ||
361 | |||
362 | mfn_list = (unsigned long *)xen_start_info->mfn_list; | ||
363 | max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); | ||
353 | xen_max_p2m_pfn = max_pfn; | 364 | xen_max_p2m_pfn = max_pfn; |
354 | 365 | ||
355 | p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); | 366 | p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); |
diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c index 0a7852483ffe..a8261716d58d 100644 --- a/arch/x86/xen/platform-pci-unplug.c +++ b/arch/x86/xen/platform-pci-unplug.c | |||
@@ -30,10 +30,9 @@ | |||
30 | #define XEN_PLATFORM_ERR_PROTOCOL -2 | 30 | #define XEN_PLATFORM_ERR_PROTOCOL -2 |
31 | #define XEN_PLATFORM_ERR_BLACKLIST -3 | 31 | #define XEN_PLATFORM_ERR_BLACKLIST -3 |
32 | 32 | ||
33 | /* store the value of xen_emul_unplug after the unplug is done */ | ||
34 | int xen_platform_pci_unplug; | ||
35 | EXPORT_SYMBOL_GPL(xen_platform_pci_unplug); | ||
36 | #ifdef CONFIG_XEN_PVHVM | 33 | #ifdef CONFIG_XEN_PVHVM |
34 | /* store the value of xen_emul_unplug after the unplug is done */ | ||
35 | static int xen_platform_pci_unplug; | ||
37 | static int xen_emul_unplug; | 36 | static int xen_emul_unplug; |
38 | 37 | ||
39 | static int check_platform_magic(void) | 38 | static int check_platform_magic(void) |
@@ -69,6 +68,80 @@ static int check_platform_magic(void) | |||
69 | return 0; | 68 | return 0; |
70 | } | 69 | } |
71 | 70 | ||
71 | bool xen_has_pv_devices() | ||
72 | { | ||
73 | if (!xen_domain()) | ||
74 | return false; | ||
75 | |||
76 | /* PV domains always have them. */ | ||
77 | if (xen_pv_domain()) | ||
78 | return true; | ||
79 | |||
80 | /* And user has xen_platform_pci=0 set in guest config as | ||
81 | * driver did not modify the value. */ | ||
82 | if (xen_platform_pci_unplug == 0) | ||
83 | return false; | ||
84 | |||
85 | if (xen_platform_pci_unplug & XEN_UNPLUG_NEVER) | ||
86 | return false; | ||
87 | |||
88 | if (xen_platform_pci_unplug & XEN_UNPLUG_ALL) | ||
89 | return true; | ||
90 | |||
91 | /* This is an odd one - we are going to run legacy | ||
92 | * and PV drivers at the same time. */ | ||
93 | if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY) | ||
94 | return true; | ||
95 | |||
96 | /* And the caller has to follow with xen_pv_{disk,nic}_devices | ||
97 | * to be certain which driver can load. */ | ||
98 | return false; | ||
99 | } | ||
100 | EXPORT_SYMBOL_GPL(xen_has_pv_devices); | ||
101 | |||
102 | static bool __xen_has_pv_device(int state) | ||
103 | { | ||
104 | /* HVM domains might or might not */ | ||
105 | if (xen_hvm_domain() && (xen_platform_pci_unplug & state)) | ||
106 | return true; | ||
107 | |||
108 | return xen_has_pv_devices(); | ||
109 | } | ||
110 | |||
111 | bool xen_has_pv_nic_devices(void) | ||
112 | { | ||
113 | return __xen_has_pv_device(XEN_UNPLUG_ALL_NICS | XEN_UNPLUG_ALL); | ||
114 | } | ||
115 | EXPORT_SYMBOL_GPL(xen_has_pv_nic_devices); | ||
116 | |||
117 | bool xen_has_pv_disk_devices(void) | ||
118 | { | ||
119 | return __xen_has_pv_device(XEN_UNPLUG_ALL_IDE_DISKS | | ||
120 | XEN_UNPLUG_AUX_IDE_DISKS | XEN_UNPLUG_ALL); | ||
121 | } | ||
122 | EXPORT_SYMBOL_GPL(xen_has_pv_disk_devices); | ||
123 | |||
124 | /* | ||
125 | * This one is odd - it determines whether you want to run PV _and_ | ||
126 | * legacy (IDE) drivers together. This combination is only possible | ||
127 | * under HVM. | ||
128 | */ | ||
129 | bool xen_has_pv_and_legacy_disk_devices(void) | ||
130 | { | ||
131 | if (!xen_domain()) | ||
132 | return false; | ||
133 | |||
134 | /* N.B. This is only ever used in HVM mode */ | ||
135 | if (xen_pv_domain()) | ||
136 | return false; | ||
137 | |||
138 | if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY) | ||
139 | return true; | ||
140 | |||
141 | return false; | ||
142 | } | ||
143 | EXPORT_SYMBOL_GPL(xen_has_pv_and_legacy_disk_devices); | ||
144 | |||
72 | void xen_unplug_emulated_devices(void) | 145 | void xen_unplug_emulated_devices(void) |
73 | { | 146 | { |
74 | int r; | 147 | int r; |
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 68c054f59de6..dd5f905e33d5 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <xen/interface/memory.h> | 27 | #include <xen/interface/memory.h> |
28 | #include <xen/interface/physdev.h> | 28 | #include <xen/interface/physdev.h> |
29 | #include <xen/features.h> | 29 | #include <xen/features.h> |
30 | #include "mmu.h" | ||
30 | #include "xen-ops.h" | 31 | #include "xen-ops.h" |
31 | #include "vdso.h" | 32 | #include "vdso.h" |
32 | 33 | ||
@@ -81,6 +82,9 @@ static void __init xen_add_extra_mem(u64 start, u64 size) | |||
81 | 82 | ||
82 | memblock_reserve(start, size); | 83 | memblock_reserve(start, size); |
83 | 84 | ||
85 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
86 | return; | ||
87 | |||
84 | xen_max_p2m_pfn = PFN_DOWN(start + size); | 88 | xen_max_p2m_pfn = PFN_DOWN(start + size); |
85 | for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) { | 89 | for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) { |
86 | unsigned long mfn = pfn_to_mfn(pfn); | 90 | unsigned long mfn = pfn_to_mfn(pfn); |
@@ -103,6 +107,7 @@ static unsigned long __init xen_do_chunk(unsigned long start, | |||
103 | .domid = DOMID_SELF | 107 | .domid = DOMID_SELF |
104 | }; | 108 | }; |
105 | unsigned long len = 0; | 109 | unsigned long len = 0; |
110 | int xlated_phys = xen_feature(XENFEAT_auto_translated_physmap); | ||
106 | unsigned long pfn; | 111 | unsigned long pfn; |
107 | int ret; | 112 | int ret; |
108 | 113 | ||
@@ -116,7 +121,7 @@ static unsigned long __init xen_do_chunk(unsigned long start, | |||
116 | continue; | 121 | continue; |
117 | frame = mfn; | 122 | frame = mfn; |
118 | } else { | 123 | } else { |
119 | if (mfn != INVALID_P2M_ENTRY) | 124 | if (!xlated_phys && mfn != INVALID_P2M_ENTRY) |
120 | continue; | 125 | continue; |
121 | frame = pfn; | 126 | frame = pfn; |
122 | } | 127 | } |
@@ -154,6 +159,13 @@ static unsigned long __init xen_do_chunk(unsigned long start, | |||
154 | static unsigned long __init xen_release_chunk(unsigned long start, | 159 | static unsigned long __init xen_release_chunk(unsigned long start, |
155 | unsigned long end) | 160 | unsigned long end) |
156 | { | 161 | { |
162 | /* | ||
163 | * Xen already ballooned out the E820 non RAM regions for us | ||
164 | * and set them up properly in EPT. | ||
165 | */ | ||
166 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
167 | return end - start; | ||
168 | |||
157 | return xen_do_chunk(start, end, true); | 169 | return xen_do_chunk(start, end, true); |
158 | } | 170 | } |
159 | 171 | ||
@@ -222,7 +234,13 @@ static void __init xen_set_identity_and_release_chunk( | |||
222 | * (except for the ISA region which must be 1:1 mapped) to | 234 | * (except for the ISA region which must be 1:1 mapped) to |
223 | * release the refcounts (in Xen) on the original frames. | 235 | * release the refcounts (in Xen) on the original frames. |
224 | */ | 236 | */ |
225 | for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) { | 237 | |
238 | /* | ||
239 | * PVH E820 matches the hypervisor's P2M which means we need to | ||
240 | * account for the proper values of *release and *identity. | ||
241 | */ | ||
242 | for (pfn = start_pfn; !xen_feature(XENFEAT_auto_translated_physmap) && | ||
243 | pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) { | ||
226 | pte_t pte = __pte_ma(0); | 244 | pte_t pte = __pte_ma(0); |
227 | 245 | ||
228 | if (pfn < PFN_UP(ISA_END_ADDRESS)) | 246 | if (pfn < PFN_UP(ISA_END_ADDRESS)) |
@@ -563,16 +581,13 @@ void xen_enable_nmi(void) | |||
563 | BUG(); | 581 | BUG(); |
564 | #endif | 582 | #endif |
565 | } | 583 | } |
566 | void __init xen_arch_setup(void) | 584 | void __init xen_pvmmu_arch_setup(void) |
567 | { | 585 | { |
568 | xen_panic_handler_init(); | ||
569 | |||
570 | HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); | 586 | HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); |
571 | HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); | 587 | HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); |
572 | 588 | ||
573 | if (!xen_feature(XENFEAT_auto_translated_physmap)) | 589 | HYPERVISOR_vm_assist(VMASST_CMD_enable, |
574 | HYPERVISOR_vm_assist(VMASST_CMD_enable, | 590 | VMASST_TYPE_pae_extended_cr3); |
575 | VMASST_TYPE_pae_extended_cr3); | ||
576 | 591 | ||
577 | if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) || | 592 | if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) || |
578 | register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback)) | 593 | register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback)) |
@@ -581,6 +596,15 @@ void __init xen_arch_setup(void) | |||
581 | xen_enable_sysenter(); | 596 | xen_enable_sysenter(); |
582 | xen_enable_syscall(); | 597 | xen_enable_syscall(); |
583 | xen_enable_nmi(); | 598 | xen_enable_nmi(); |
599 | } | ||
600 | |||
601 | /* This function is not called for HVM domains */ | ||
602 | void __init xen_arch_setup(void) | ||
603 | { | ||
604 | xen_panic_handler_init(); | ||
605 | if (!xen_feature(XENFEAT_auto_translated_physmap)) | ||
606 | xen_pvmmu_arch_setup(); | ||
607 | |||
584 | #ifdef CONFIG_ACPI | 608 | #ifdef CONFIG_ACPI |
585 | if (!(xen_start_info->flags & SIF_INITDOMAIN)) { | 609 | if (!(xen_start_info->flags & SIF_INITDOMAIN)) { |
586 | printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); | 610 | printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); |
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index c36b325abd83..a18eadd8bb40 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -73,9 +73,11 @@ static void cpu_bringup(void) | |||
73 | touch_softlockup_watchdog(); | 73 | touch_softlockup_watchdog(); |
74 | preempt_disable(); | 74 | preempt_disable(); |
75 | 75 | ||
76 | xen_enable_sysenter(); | 76 | /* PVH runs in ring 0 and allows us to do native syscalls. Yay! */ |
77 | xen_enable_syscall(); | 77 | if (!xen_feature(XENFEAT_supervisor_mode_kernel)) { |
78 | 78 | xen_enable_sysenter(); | |
79 | xen_enable_syscall(); | ||
80 | } | ||
79 | cpu = smp_processor_id(); | 81 | cpu = smp_processor_id(); |
80 | smp_store_cpu_info(cpu); | 82 | smp_store_cpu_info(cpu); |
81 | cpu_data(cpu).x86_max_cores = 1; | 83 | cpu_data(cpu).x86_max_cores = 1; |
@@ -97,8 +99,14 @@ static void cpu_bringup(void) | |||
97 | wmb(); /* make sure everything is out */ | 99 | wmb(); /* make sure everything is out */ |
98 | } | 100 | } |
99 | 101 | ||
100 | static void cpu_bringup_and_idle(void) | 102 | /* Note: cpu parameter is only relevant for PVH */ |
103 | static void cpu_bringup_and_idle(int cpu) | ||
101 | { | 104 | { |
105 | #ifdef CONFIG_X86_64 | ||
106 | if (xen_feature(XENFEAT_auto_translated_physmap) && | ||
107 | xen_feature(XENFEAT_supervisor_mode_kernel)) | ||
108 | xen_pvh_secondary_vcpu_init(cpu); | ||
109 | #endif | ||
102 | cpu_bringup(); | 110 | cpu_bringup(); |
103 | cpu_startup_entry(CPUHP_ONLINE); | 111 | cpu_startup_entry(CPUHP_ONLINE); |
104 | } | 112 | } |
@@ -274,9 +282,10 @@ static void __init xen_smp_prepare_boot_cpu(void) | |||
274 | native_smp_prepare_boot_cpu(); | 282 | native_smp_prepare_boot_cpu(); |
275 | 283 | ||
276 | if (xen_pv_domain()) { | 284 | if (xen_pv_domain()) { |
277 | /* We've switched to the "real" per-cpu gdt, so make sure the | 285 | if (!xen_feature(XENFEAT_writable_page_tables)) |
278 | old memory can be recycled */ | 286 | /* We've switched to the "real" per-cpu gdt, so make |
279 | make_lowmem_page_readwrite(xen_initial_gdt); | 287 | * sure the old memory can be recycled. */ |
288 | make_lowmem_page_readwrite(xen_initial_gdt); | ||
280 | 289 | ||
281 | #ifdef CONFIG_X86_32 | 290 | #ifdef CONFIG_X86_32 |
282 | /* | 291 | /* |
@@ -360,22 +369,21 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) | |||
360 | 369 | ||
361 | gdt = get_cpu_gdt_table(cpu); | 370 | gdt = get_cpu_gdt_table(cpu); |
362 | 371 | ||
363 | ctxt->flags = VGCF_IN_KERNEL; | ||
364 | ctxt->user_regs.ss = __KERNEL_DS; | ||
365 | #ifdef CONFIG_X86_32 | 372 | #ifdef CONFIG_X86_32 |
373 | /* Note: PVH is not yet supported on x86_32. */ | ||
366 | ctxt->user_regs.fs = __KERNEL_PERCPU; | 374 | ctxt->user_regs.fs = __KERNEL_PERCPU; |
367 | ctxt->user_regs.gs = __KERNEL_STACK_CANARY; | 375 | ctxt->user_regs.gs = __KERNEL_STACK_CANARY; |
368 | #else | ||
369 | ctxt->gs_base_kernel = per_cpu_offset(cpu); | ||
370 | #endif | 376 | #endif |
371 | ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; | 377 | ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; |
372 | 378 | ||
373 | memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); | 379 | memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); |
374 | 380 | ||
375 | { | 381 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { |
382 | ctxt->flags = VGCF_IN_KERNEL; | ||
376 | ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ | 383 | ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ |
377 | ctxt->user_regs.ds = __USER_DS; | 384 | ctxt->user_regs.ds = __USER_DS; |
378 | ctxt->user_regs.es = __USER_DS; | 385 | ctxt->user_regs.es = __USER_DS; |
386 | ctxt->user_regs.ss = __KERNEL_DS; | ||
379 | 387 | ||
380 | xen_copy_trap_info(ctxt->trap_ctxt); | 388 | xen_copy_trap_info(ctxt->trap_ctxt); |
381 | 389 | ||
@@ -396,18 +404,27 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) | |||
396 | #ifdef CONFIG_X86_32 | 404 | #ifdef CONFIG_X86_32 |
397 | ctxt->event_callback_cs = __KERNEL_CS; | 405 | ctxt->event_callback_cs = __KERNEL_CS; |
398 | ctxt->failsafe_callback_cs = __KERNEL_CS; | 406 | ctxt->failsafe_callback_cs = __KERNEL_CS; |
407 | #else | ||
408 | ctxt->gs_base_kernel = per_cpu_offset(cpu); | ||
399 | #endif | 409 | #endif |
400 | ctxt->event_callback_eip = | 410 | ctxt->event_callback_eip = |
401 | (unsigned long)xen_hypervisor_callback; | 411 | (unsigned long)xen_hypervisor_callback; |
402 | ctxt->failsafe_callback_eip = | 412 | ctxt->failsafe_callback_eip = |
403 | (unsigned long)xen_failsafe_callback; | 413 | (unsigned long)xen_failsafe_callback; |
414 | ctxt->user_regs.cs = __KERNEL_CS; | ||
415 | per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); | ||
416 | #ifdef CONFIG_X86_32 | ||
404 | } | 417 | } |
405 | ctxt->user_regs.cs = __KERNEL_CS; | 418 | #else |
419 | } else | ||
420 | /* N.B. The user_regs.eip (cpu_bringup_and_idle) is called with | ||
421 | * %rdi having the cpu number - which means are passing in | ||
422 | * as the first parameter the cpu. Subtle! | ||
423 | */ | ||
424 | ctxt->user_regs.rdi = cpu; | ||
425 | #endif | ||
406 | ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); | 426 | ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); |
407 | |||
408 | per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); | ||
409 | ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); | 427 | ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); |
410 | |||
411 | if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt)) | 428 | if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt)) |
412 | BUG(); | 429 | BUG(); |
413 | 430 | ||
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 12a1ca707b94..7b78f88c1707 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c | |||
@@ -446,6 +446,7 @@ void xen_setup_timer(int cpu) | |||
446 | IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER| | 446 | IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER| |
447 | IRQF_FORCE_RESUME, | 447 | IRQF_FORCE_RESUME, |
448 | name, NULL); | 448 | name, NULL); |
449 | (void)xen_set_irq_priority(irq, XEN_IRQ_PRIORITY_MAX); | ||
449 | 450 | ||
450 | memcpy(evt, xen_clockevent, sizeof(*evt)); | 451 | memcpy(evt, xen_clockevent, sizeof(*evt)); |
451 | 452 | ||
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 7faed5869e5b..485b69585540 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S | |||
@@ -11,8 +11,28 @@ | |||
11 | #include <asm/page_types.h> | 11 | #include <asm/page_types.h> |
12 | 12 | ||
13 | #include <xen/interface/elfnote.h> | 13 | #include <xen/interface/elfnote.h> |
14 | #include <xen/interface/features.h> | ||
14 | #include <asm/xen/interface.h> | 15 | #include <asm/xen/interface.h> |
15 | 16 | ||
17 | #ifdef CONFIG_XEN_PVH | ||
18 | #define PVH_FEATURES_STR "|writable_descriptor_tables|auto_translated_physmap|supervisor_mode_kernel" | ||
19 | /* Note the lack of 'hvm_callback_vector'. Older hypervisor will | ||
20 | * balk at this being part of XEN_ELFNOTE_FEATURES, so we put it in | ||
21 | * XEN_ELFNOTE_SUPPORTED_FEATURES which older hypervisors will ignore. | ||
22 | */ | ||
23 | #define PVH_FEATURES ((1 << XENFEAT_writable_page_tables) | \ | ||
24 | (1 << XENFEAT_auto_translated_physmap) | \ | ||
25 | (1 << XENFEAT_supervisor_mode_kernel) | \ | ||
26 | (1 << XENFEAT_hvm_callback_vector)) | ||
27 | /* The XENFEAT_writable_page_tables is not stricly neccessary as we set that | ||
28 | * up regardless whether this CONFIG option is enabled or not, but it | ||
29 | * clarifies what the right flags need to be. | ||
30 | */ | ||
31 | #else | ||
32 | #define PVH_FEATURES_STR "" | ||
33 | #define PVH_FEATURES (0) | ||
34 | #endif | ||
35 | |||
16 | __INIT | 36 | __INIT |
17 | ENTRY(startup_xen) | 37 | ENTRY(startup_xen) |
18 | cld | 38 | cld |
@@ -95,7 +115,10 @@ NEXT_HYPERCALL(arch_6) | |||
95 | #endif | 115 | #endif |
96 | ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen) | 116 | ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen) |
97 | ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page) | 117 | ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page) |
98 | ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") | 118 | ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .ascii "!writable_page_tables|pae_pgdir_above_4gb"; .asciz PVH_FEATURES_STR) |
119 | ELFNOTE(Xen, XEN_ELFNOTE_SUPPORTED_FEATURES, .long (PVH_FEATURES) | | ||
120 | (1 << XENFEAT_writable_page_tables) | | ||
121 | (1 << XENFEAT_dom0)) | ||
99 | ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") | 122 | ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") |
100 | ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") | 123 | ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") |
101 | ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, | 124 | ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 95f8c6142328..1cb6f4c37300 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -123,4 +123,5 @@ __visible void xen_adjust_exception_frame(void); | |||
123 | 123 | ||
124 | extern int xen_panic_handler_init(void); | 124 | extern int xen_panic_handler_init(void); |
125 | 125 | ||
126 | void xen_pvh_secondary_vcpu_init(int cpu); | ||
126 | #endif /* XEN_OPS_H */ | 127 | #endif /* XEN_OPS_H */ |