diff options
120 files changed, 3134 insertions, 470 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 3a99cc96b6b1..dad6fa01af95 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt | |||
@@ -2233,6 +2233,17 @@ | |||
2233 | memory contents and reserves bad memory | 2233 | memory contents and reserves bad memory |
2234 | regions that are detected. | 2234 | regions that are detected. |
2235 | 2235 | ||
2236 | mem_encrypt= [X86-64] AMD Secure Memory Encryption (SME) control | ||
2237 | Valid arguments: on, off | ||
2238 | Default (depends on kernel configuration option): | ||
2239 | on (CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT=y) | ||
2240 | off (CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT=n) | ||
2241 | mem_encrypt=on: Activate SME | ||
2242 | mem_encrypt=off: Do not activate SME | ||
2243 | |||
2244 | Refer to Documentation/x86/amd-memory-encryption.txt | ||
2245 | for details on when memory encryption can be activated. | ||
2246 | |||
2236 | mem_sleep_default= [SUSPEND] Default system suspend mode: | 2247 | mem_sleep_default= [SUSPEND] Default system suspend mode: |
2237 | s2idle - Suspend-To-Idle | 2248 | s2idle - Suspend-To-Idle |
2238 | shallow - Power-On Suspend or equivalent (if supported) | 2249 | shallow - Power-On Suspend or equivalent (if supported) |
@@ -2697,6 +2708,8 @@ | |||
2697 | nopat [X86] Disable PAT (page attribute table extension of | 2708 | nopat [X86] Disable PAT (page attribute table extension of |
2698 | pagetables) support. | 2709 | pagetables) support. |
2699 | 2710 | ||
2711 | nopcid [X86-64] Disable the PCID cpu feature. | ||
2712 | |||
2700 | norandmaps Don't use address space randomization. Equivalent to | 2713 | norandmaps Don't use address space randomization. Equivalent to |
2701 | echo 0 > /proc/sys/kernel/randomize_va_space | 2714 | echo 0 > /proc/sys/kernel/randomize_va_space |
2702 | 2715 | ||
diff --git a/Documentation/x86/amd-memory-encryption.txt b/Documentation/x86/amd-memory-encryption.txt new file mode 100644 index 000000000000..f512ab718541 --- /dev/null +++ b/Documentation/x86/amd-memory-encryption.txt | |||
@@ -0,0 +1,68 @@ | |||
1 | Secure Memory Encryption (SME) is a feature found on AMD processors. | ||
2 | |||
3 | SME provides the ability to mark individual pages of memory as encrypted using | ||
4 | the standard x86 page tables. A page that is marked encrypted will be | ||
5 | automatically decrypted when read from DRAM and encrypted when written to | ||
6 | DRAM. SME can therefore be used to protect the contents of DRAM from physical | ||
7 | attacks on the system. | ||
8 | |||
9 | A page is encrypted when a page table entry has the encryption bit set (see | ||
10 | below on how to determine its position). The encryption bit can also be | ||
11 | specified in the cr3 register, allowing the PGD table to be encrypted. Each | ||
12 | successive level of page tables can also be encrypted by setting the encryption | ||
13 | bit in the page table entry that points to the next table. This allows the full | ||
14 | page table hierarchy to be encrypted. Note, this means that just because the | ||
15 | encryption bit is set in cr3, doesn't imply the full hierarchy is encyrpted. | ||
16 | Each page table entry in the hierarchy needs to have the encryption bit set to | ||
17 | achieve that. So, theoretically, you could have the encryption bit set in cr3 | ||
18 | so that the PGD is encrypted, but not set the encryption bit in the PGD entry | ||
19 | for a PUD which results in the PUD pointed to by that entry to not be | ||
20 | encrypted. | ||
21 | |||
22 | Support for SME can be determined through the CPUID instruction. The CPUID | ||
23 | function 0x8000001f reports information related to SME: | ||
24 | |||
25 | 0x8000001f[eax]: | ||
26 | Bit[0] indicates support for SME | ||
27 | 0x8000001f[ebx]: | ||
28 | Bits[5:0] pagetable bit number used to activate memory | ||
29 | encryption | ||
30 | Bits[11:6] reduction in physical address space, in bits, when | ||
31 | memory encryption is enabled (this only affects | ||
32 | system physical addresses, not guest physical | ||
33 | addresses) | ||
34 | |||
35 | If support for SME is present, MSR 0xc00100010 (MSR_K8_SYSCFG) can be used to | ||
36 | determine if SME is enabled and/or to enable memory encryption: | ||
37 | |||
38 | 0xc0010010: | ||
39 | Bit[23] 0 = memory encryption features are disabled | ||
40 | 1 = memory encryption features are enabled | ||
41 | |||
42 | Linux relies on BIOS to set this bit if BIOS has determined that the reduction | ||
43 | in the physical address space as a result of enabling memory encryption (see | ||
44 | CPUID information above) will not conflict with the address space resource | ||
45 | requirements for the system. If this bit is not set upon Linux startup then | ||
46 | Linux itself will not set it and memory encryption will not be possible. | ||
47 | |||
48 | The state of SME in the Linux kernel can be documented as follows: | ||
49 | - Supported: | ||
50 | The CPU supports SME (determined through CPUID instruction). | ||
51 | |||
52 | - Enabled: | ||
53 | Supported and bit 23 of MSR_K8_SYSCFG is set. | ||
54 | |||
55 | - Active: | ||
56 | Supported, Enabled and the Linux kernel is actively applying | ||
57 | the encryption bit to page table entries (the SME mask in the | ||
58 | kernel is non-zero). | ||
59 | |||
60 | SME can also be enabled and activated in the BIOS. If SME is enabled and | ||
61 | activated in the BIOS, then all memory accesses will be encrypted and it will | ||
62 | not be necessary to activate the Linux memory encryption support. If the BIOS | ||
63 | merely enables SME (sets bit 23 of the MSR_K8_SYSCFG), then Linux can activate | ||
64 | memory encryption by default (CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT=y) or | ||
65 | by supplying mem_encrypt=on on the kernel command line. However, if BIOS does | ||
66 | not enable SME, then Linux will not be able to activate memory encryption, even | ||
67 | if configured to do so by default or the mem_encrypt=on command line parameter | ||
68 | is specified. | ||
diff --git a/Documentation/x86/protection-keys.txt b/Documentation/x86/protection-keys.txt index b64304540821..fa46dcb347bc 100644 --- a/Documentation/x86/protection-keys.txt +++ b/Documentation/x86/protection-keys.txt | |||
@@ -34,7 +34,7 @@ with a key. In this example WRPKRU is wrapped by a C function | |||
34 | called pkey_set(). | 34 | called pkey_set(). |
35 | 35 | ||
36 | int real_prot = PROT_READ|PROT_WRITE; | 36 | int real_prot = PROT_READ|PROT_WRITE; |
37 | pkey = pkey_alloc(0, PKEY_DENY_WRITE); | 37 | pkey = pkey_alloc(0, PKEY_DISABLE_WRITE); |
38 | ptr = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); | 38 | ptr = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); |
39 | ret = pkey_mprotect(ptr, PAGE_SIZE, real_prot, pkey); | 39 | ret = pkey_mprotect(ptr, PAGE_SIZE, real_prot, pkey); |
40 | ... application runs here | 40 | ... application runs here |
@@ -42,9 +42,9 @@ called pkey_set(). | |||
42 | Now, if the application needs to update the data at 'ptr', it can | 42 | Now, if the application needs to update the data at 'ptr', it can |
43 | gain access, do the update, then remove its write access: | 43 | gain access, do the update, then remove its write access: |
44 | 44 | ||
45 | pkey_set(pkey, 0); // clear PKEY_DENY_WRITE | 45 | pkey_set(pkey, 0); // clear PKEY_DISABLE_WRITE |
46 | *ptr = foo; // assign something | 46 | *ptr = foo; // assign something |
47 | pkey_set(pkey, PKEY_DENY_WRITE); // set PKEY_DENY_WRITE again | 47 | pkey_set(pkey, PKEY_DISABLE_WRITE); // set PKEY_DISABLE_WRITE again |
48 | 48 | ||
49 | Now when it frees the memory, it will also free the pkey since it | 49 | Now when it frees the memory, it will also free the pkey since it |
50 | is no longer in use: | 50 | is no longer in use: |
diff --git a/Documentation/x86/x86_64/5level-paging.txt b/Documentation/x86/x86_64/5level-paging.txt new file mode 100644 index 000000000000..087251a0d99c --- /dev/null +++ b/Documentation/x86/x86_64/5level-paging.txt | |||
@@ -0,0 +1,64 @@ | |||
1 | == Overview == | ||
2 | |||
3 | Original x86-64 was limited by 4-level paing to 256 TiB of virtual address | ||
4 | space and 64 TiB of physical address space. We are already bumping into | ||
5 | this limit: some vendors offers servers with 64 TiB of memory today. | ||
6 | |||
7 | To overcome the limitation upcoming hardware will introduce support for | ||
8 | 5-level paging. It is a straight-forward extension of the current page | ||
9 | table structure adding one more layer of translation. | ||
10 | |||
11 | It bumps the limits to 128 PiB of virtual address space and 4 PiB of | ||
12 | physical address space. This "ought to be enough for anybody" ©. | ||
13 | |||
14 | QEMU 2.9 and later support 5-level paging. | ||
15 | |||
16 | Virtual memory layout for 5-level paging is described in | ||
17 | Documentation/x86/x86_64/mm.txt | ||
18 | |||
19 | == Enabling 5-level paging == | ||
20 | |||
21 | CONFIG_X86_5LEVEL=y enables the feature. | ||
22 | |||
23 | So far, a kernel compiled with the option enabled will be able to boot | ||
24 | only on machines that supports the feature -- see for 'la57' flag in | ||
25 | /proc/cpuinfo. | ||
26 | |||
27 | The plan is to implement boot-time switching between 4- and 5-level paging | ||
28 | in the future. | ||
29 | |||
30 | == User-space and large virtual address space == | ||
31 | |||
32 | On x86, 5-level paging enables 56-bit userspace virtual address space. | ||
33 | Not all user space is ready to handle wide addresses. It's known that | ||
34 | at least some JIT compilers use higher bits in pointers to encode their | ||
35 | information. It collides with valid pointers with 5-level paging and | ||
36 | leads to crashes. | ||
37 | |||
38 | To mitigate this, we are not going to allocate virtual address space | ||
39 | above 47-bit by default. | ||
40 | |||
41 | But userspace can ask for allocation from full address space by | ||
42 | specifying hint address (with or without MAP_FIXED) above 47-bits. | ||
43 | |||
44 | If hint address set above 47-bit, but MAP_FIXED is not specified, we try | ||
45 | to look for unmapped area by specified address. If it's already | ||
46 | occupied, we look for unmapped area in *full* address space, rather than | ||
47 | from 47-bit window. | ||
48 | |||
49 | A high hint address would only affect the allocation in question, but not | ||
50 | any future mmap()s. | ||
51 | |||
52 | Specifying high hint address on older kernel or on machine without 5-level | ||
53 | paging support is safe. The hint will be ignored and kernel will fall back | ||
54 | to allocation from 47-bit address space. | ||
55 | |||
56 | This approach helps to easily make application's memory allocator aware | ||
57 | about large address space without manually tracking allocated virtual | ||
58 | address space. | ||
59 | |||
60 | One important case we need to handle here is interaction with MPX. | ||
61 | MPX (without MAWA extension) cannot handle addresses above 47-bit, so we | ||
62 | need to make sure that MPX cannot be enabled we already have VMA above | ||
63 | the boundary and forbid creating such VMAs once MPX is enabled. | ||
64 | |||
diff --git a/arch/ia64/include/asm/acpi.h b/arch/ia64/include/asm/acpi.h index a3d0211970e9..c86a947f5368 100644 --- a/arch/ia64/include/asm/acpi.h +++ b/arch/ia64/include/asm/acpi.h | |||
@@ -112,8 +112,6 @@ static inline void arch_acpi_set_pdc_bits(u32 *buf) | |||
112 | buf[2] |= ACPI_PDC_EST_CAPABILITY_SMP; | 112 | buf[2] |= ACPI_PDC_EST_CAPABILITY_SMP; |
113 | } | 113 | } |
114 | 114 | ||
115 | #define acpi_unlazy_tlb(x) | ||
116 | |||
117 | #ifdef CONFIG_ACPI_NUMA | 115 | #ifdef CONFIG_ACPI_NUMA |
118 | extern cpumask_t early_cpu_possible_map; | 116 | extern cpumask_t early_cpu_possible_map; |
119 | #define for_each_possible_early_cpu(cpu) \ | 117 | #define for_each_possible_early_cpu(cpu) \ |
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index 121295637d0d..81416000c5e0 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c | |||
@@ -757,14 +757,14 @@ efi_memmap_intersects (unsigned long phys_addr, unsigned long size) | |||
757 | return 0; | 757 | return 0; |
758 | } | 758 | } |
759 | 759 | ||
760 | u32 | 760 | int |
761 | efi_mem_type (unsigned long phys_addr) | 761 | efi_mem_type (unsigned long phys_addr) |
762 | { | 762 | { |
763 | efi_memory_desc_t *md = efi_memory_descriptor(phys_addr); | 763 | efi_memory_desc_t *md = efi_memory_descriptor(phys_addr); |
764 | 764 | ||
765 | if (md) | 765 | if (md) |
766 | return md->type; | 766 | return md->type; |
767 | return 0; | 767 | return -EINVAL; |
768 | } | 768 | } |
769 | 769 | ||
770 | u64 | 770 | u64 |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index cce15191e9e9..b4b27ab016f6 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -169,6 +169,7 @@ config X86 | |||
169 | select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI | 169 | select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI |
170 | select HAVE_PERF_REGS | 170 | select HAVE_PERF_REGS |
171 | select HAVE_PERF_USER_STACK_DUMP | 171 | select HAVE_PERF_USER_STACK_DUMP |
172 | select HAVE_RCU_TABLE_FREE | ||
172 | select HAVE_REGS_AND_STACK_ACCESS_API | 173 | select HAVE_REGS_AND_STACK_ACCESS_API |
173 | select HAVE_RELIABLE_STACKTRACE if X86_64 && FRAME_POINTER_UNWINDER && STACK_VALIDATION | 174 | select HAVE_RELIABLE_STACKTRACE if X86_64 && FRAME_POINTER_UNWINDER && STACK_VALIDATION |
174 | select HAVE_STACK_VALIDATION if X86_64 | 175 | select HAVE_STACK_VALIDATION if X86_64 |
@@ -329,6 +330,7 @@ config FIX_EARLYCON_MEM | |||
329 | 330 | ||
330 | config PGTABLE_LEVELS | 331 | config PGTABLE_LEVELS |
331 | int | 332 | int |
333 | default 5 if X86_5LEVEL | ||
332 | default 4 if X86_64 | 334 | default 4 if X86_64 |
333 | default 3 if X86_PAE | 335 | default 3 if X86_PAE |
334 | default 2 | 336 | default 2 |
@@ -1399,6 +1401,24 @@ config X86_PAE | |||
1399 | has the cost of more pagetable lookup overhead, and also | 1401 | has the cost of more pagetable lookup overhead, and also |
1400 | consumes more pagetable space per process. | 1402 | consumes more pagetable space per process. |
1401 | 1403 | ||
1404 | config X86_5LEVEL | ||
1405 | bool "Enable 5-level page tables support" | ||
1406 | depends on X86_64 | ||
1407 | ---help--- | ||
1408 | 5-level paging enables access to larger address space: | ||
1409 | upto 128 PiB of virtual address space and 4 PiB of | ||
1410 | physical address space. | ||
1411 | |||
1412 | It will be supported by future Intel CPUs. | ||
1413 | |||
1414 | Note: a kernel with this option enabled can only be booted | ||
1415 | on machines that support the feature. | ||
1416 | |||
1417 | See Documentation/x86/x86_64/5level-paging.txt for more | ||
1418 | information. | ||
1419 | |||
1420 | Say N if unsure. | ||
1421 | |||
1402 | config ARCH_PHYS_ADDR_T_64BIT | 1422 | config ARCH_PHYS_ADDR_T_64BIT |
1403 | def_bool y | 1423 | def_bool y |
1404 | depends on X86_64 || X86_PAE | 1424 | depends on X86_64 || X86_PAE |
@@ -1416,6 +1436,35 @@ config X86_DIRECT_GBPAGES | |||
1416 | supports them), so don't confuse the user by printing | 1436 | supports them), so don't confuse the user by printing |
1417 | that we have them enabled. | 1437 | that we have them enabled. |
1418 | 1438 | ||
1439 | config ARCH_HAS_MEM_ENCRYPT | ||
1440 | def_bool y | ||
1441 | |||
1442 | config AMD_MEM_ENCRYPT | ||
1443 | bool "AMD Secure Memory Encryption (SME) support" | ||
1444 | depends on X86_64 && CPU_SUP_AMD | ||
1445 | ---help--- | ||
1446 | Say yes to enable support for the encryption of system memory. | ||
1447 | This requires an AMD processor that supports Secure Memory | ||
1448 | Encryption (SME). | ||
1449 | |||
1450 | config AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT | ||
1451 | bool "Activate AMD Secure Memory Encryption (SME) by default" | ||
1452 | default y | ||
1453 | depends on AMD_MEM_ENCRYPT | ||
1454 | ---help--- | ||
1455 | Say yes to have system memory encrypted by default if running on | ||
1456 | an AMD processor that supports Secure Memory Encryption (SME). | ||
1457 | |||
1458 | If set to Y, then the encryption of system memory can be | ||
1459 | deactivated with the mem_encrypt=off command line option. | ||
1460 | |||
1461 | If set to N, then the encryption of system memory can be | ||
1462 | activated with the mem_encrypt=on command line option. | ||
1463 | |||
1464 | config ARCH_USE_MEMREMAP_PROT | ||
1465 | def_bool y | ||
1466 | depends on AMD_MEM_ENCRYPT | ||
1467 | |||
1419 | # Common NUMA Features | 1468 | # Common NUMA Features |
1420 | config NUMA | 1469 | config NUMA |
1421 | bool "Numa Memory Allocation and Scheduler Support" | 1470 | bool "Numa Memory Allocation and Scheduler Support" |
diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c index 28029be47fbb..f1aa43854bed 100644 --- a/arch/x86/boot/compressed/pagetable.c +++ b/arch/x86/boot/compressed/pagetable.c | |||
@@ -15,6 +15,13 @@ | |||
15 | #define __pa(x) ((unsigned long)(x)) | 15 | #define __pa(x) ((unsigned long)(x)) |
16 | #define __va(x) ((void *)((unsigned long)(x))) | 16 | #define __va(x) ((void *)((unsigned long)(x))) |
17 | 17 | ||
18 | /* | ||
19 | * The pgtable.h and mm/ident_map.c includes make use of the SME related | ||
20 | * information which is not used in the compressed image support. Un-define | ||
21 | * the SME support to avoid any compile and link errors. | ||
22 | */ | ||
23 | #undef CONFIG_AMD_MEM_ENCRYPT | ||
24 | |||
18 | #include "misc.h" | 25 | #include "misc.h" |
19 | 26 | ||
20 | /* These actually do the work of building the kernel identity maps. */ | 27 | /* These actually do the work of building the kernel identity maps. */ |
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 2efc768e4362..72d867f6b518 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h | |||
@@ -150,8 +150,6 @@ static inline void disable_acpi(void) { } | |||
150 | extern int x86_acpi_numa_init(void); | 150 | extern int x86_acpi_numa_init(void); |
151 | #endif /* CONFIG_ACPI_NUMA */ | 151 | #endif /* CONFIG_ACPI_NUMA */ |
152 | 152 | ||
153 | #define acpi_unlazy_tlb(x) leave_mm(x) | ||
154 | |||
155 | #ifdef CONFIG_ACPI_APEI | 153 | #ifdef CONFIG_ACPI_APEI |
156 | static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr) | 154 | static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr) |
157 | { | 155 | { |
@@ -162,12 +160,13 @@ static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr) | |||
162 | * you call efi_mem_attributes() during boot and at runtime, | 160 | * you call efi_mem_attributes() during boot and at runtime, |
163 | * you could theoretically see different attributes. | 161 | * you could theoretically see different attributes. |
164 | * | 162 | * |
165 | * Since we are yet to see any x86 platforms that require | 163 | * We are yet to see any x86 platforms that require anything |
166 | * anything other than PAGE_KERNEL (some arm64 platforms | 164 | * other than PAGE_KERNEL (some ARM64 platforms require the |
167 | * require the equivalent of PAGE_KERNEL_NOCACHE), return that | 165 | * equivalent of PAGE_KERNEL_NOCACHE). Additionally, if SME |
168 | * until we know differently. | 166 | * is active, the ACPI information will not be encrypted, |
167 | * so return PAGE_KERNEL_NOENC until we know differently. | ||
169 | */ | 168 | */ |
170 | return PAGE_KERNEL; | 169 | return PAGE_KERNEL_NOENC; |
171 | } | 170 | } |
172 | #endif | 171 | #endif |
173 | 172 | ||
diff --git a/arch/x86/include/asm/cmdline.h b/arch/x86/include/asm/cmdline.h index e01f7f7ccb0c..84ae170bc3d0 100644 --- a/arch/x86/include/asm/cmdline.h +++ b/arch/x86/include/asm/cmdline.h | |||
@@ -2,5 +2,7 @@ | |||
2 | #define _ASM_X86_CMDLINE_H | 2 | #define _ASM_X86_CMDLINE_H |
3 | 3 | ||
4 | int cmdline_find_option_bool(const char *cmdline_ptr, const char *option); | 4 | int cmdline_find_option_bool(const char *cmdline_ptr, const char *option); |
5 | int cmdline_find_option(const char *cmdline_ptr, const char *option, | ||
6 | char *buffer, int bufsize); | ||
5 | 7 | ||
6 | #endif /* _ASM_X86_CMDLINE_H */ | 8 | #endif /* _ASM_X86_CMDLINE_H */ |
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 8ea315a11fe0..42bbbf0f173d 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h | |||
@@ -196,6 +196,7 @@ | |||
196 | 196 | ||
197 | #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ | 197 | #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ |
198 | #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ | 198 | #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ |
199 | #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ | ||
199 | 200 | ||
200 | #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ | 201 | #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ |
201 | #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ | 202 | #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ |
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 5dff775af7cd..c10c9128f54e 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h | |||
@@ -21,11 +21,13 @@ | |||
21 | # define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31)) | 21 | # define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31)) |
22 | # define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31)) | 22 | # define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31)) |
23 | # define DISABLE_CENTAUR_MCR (1<<(X86_FEATURE_CENTAUR_MCR & 31)) | 23 | # define DISABLE_CENTAUR_MCR (1<<(X86_FEATURE_CENTAUR_MCR & 31)) |
24 | # define DISABLE_PCID 0 | ||
24 | #else | 25 | #else |
25 | # define DISABLE_VME 0 | 26 | # define DISABLE_VME 0 |
26 | # define DISABLE_K6_MTRR 0 | 27 | # define DISABLE_K6_MTRR 0 |
27 | # define DISABLE_CYRIX_ARR 0 | 28 | # define DISABLE_CYRIX_ARR 0 |
28 | # define DISABLE_CENTAUR_MCR 0 | 29 | # define DISABLE_CENTAUR_MCR 0 |
30 | # define DISABLE_PCID (1<<(X86_FEATURE_PCID & 31)) | ||
29 | #endif /* CONFIG_X86_64 */ | 31 | #endif /* CONFIG_X86_64 */ |
30 | 32 | ||
31 | #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS | 33 | #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS |
@@ -49,7 +51,7 @@ | |||
49 | #define DISABLED_MASK1 0 | 51 | #define DISABLED_MASK1 0 |
50 | #define DISABLED_MASK2 0 | 52 | #define DISABLED_MASK2 0 |
51 | #define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR) | 53 | #define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR) |
52 | #define DISABLED_MASK4 0 | 54 | #define DISABLED_MASK4 (DISABLE_PCID) |
53 | #define DISABLED_MASK5 0 | 55 | #define DISABLED_MASK5 0 |
54 | #define DISABLED_MASK6 0 | 56 | #define DISABLED_MASK6 0 |
55 | #define DISABLED_MASK7 0 | 57 | #define DISABLED_MASK7 0 |
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 398c79889f5c..1387dafdba2d 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <asm/io.h> | 12 | #include <asm/io.h> |
13 | #include <asm/swiotlb.h> | 13 | #include <asm/swiotlb.h> |
14 | #include <linux/dma-contiguous.h> | 14 | #include <linux/dma-contiguous.h> |
15 | #include <linux/mem_encrypt.h> | ||
15 | 16 | ||
16 | #ifdef CONFIG_ISA | 17 | #ifdef CONFIG_ISA |
17 | # define ISA_DMA_BIT_MASK DMA_BIT_MASK(24) | 18 | # define ISA_DMA_BIT_MASK DMA_BIT_MASK(24) |
@@ -57,12 +58,12 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) | |||
57 | 58 | ||
58 | static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) | 59 | static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) |
59 | { | 60 | { |
60 | return paddr; | 61 | return __sme_set(paddr); |
61 | } | 62 | } |
62 | 63 | ||
63 | static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) | 64 | static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) |
64 | { | 65 | { |
65 | return daddr; | 66 | return __sme_clr(daddr); |
66 | } | 67 | } |
67 | #endif /* CONFIG_X86_DMA_REMAP */ | 68 | #endif /* CONFIG_X86_DMA_REMAP */ |
68 | 69 | ||
diff --git a/arch/x86/include/asm/dmi.h b/arch/x86/include/asm/dmi.h index 3c69fed215c5..a8e15b04565b 100644 --- a/arch/x86/include/asm/dmi.h +++ b/arch/x86/include/asm/dmi.h | |||
@@ -13,9 +13,9 @@ static __always_inline __init void *dmi_alloc(unsigned len) | |||
13 | } | 13 | } |
14 | 14 | ||
15 | /* Use early IO mappings for DMI because it's initialized early */ | 15 | /* Use early IO mappings for DMI because it's initialized early */ |
16 | #define dmi_early_remap early_ioremap | 16 | #define dmi_early_remap early_memremap |
17 | #define dmi_early_unmap early_iounmap | 17 | #define dmi_early_unmap early_memunmap |
18 | #define dmi_remap ioremap_cache | 18 | #define dmi_remap(_x, _l) memremap(_x, _l, MEMREMAP_WB) |
19 | #define dmi_unmap iounmap | 19 | #define dmi_unmap(_x) memunmap(_x) |
20 | 20 | ||
21 | #endif /* _ASM_X86_DMI_H */ | 21 | #endif /* _ASM_X86_DMI_H */ |
diff --git a/arch/x86/include/asm/e820/api.h b/arch/x86/include/asm/e820/api.h index a504adc661a4..cd266d830e49 100644 --- a/arch/x86/include/asm/e820/api.h +++ b/arch/x86/include/asm/e820/api.h | |||
@@ -39,6 +39,8 @@ extern void e820__setup_pci_gap(void); | |||
39 | extern void e820__reallocate_tables(void); | 39 | extern void e820__reallocate_tables(void); |
40 | extern void e820__register_nosave_regions(unsigned long limit_pfn); | 40 | extern void e820__register_nosave_regions(unsigned long limit_pfn); |
41 | 41 | ||
42 | extern int e820__get_entry_type(u64 start, u64 end); | ||
43 | |||
42 | /* | 44 | /* |
43 | * Returns true iff the specified range [start,end) is completely contained inside | 45 | * Returns true iff the specified range [start,end) is completely contained inside |
44 | * the ISA region. | 46 | * the ISA region. |
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index bda9f94bcb10..04330c8d9af9 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h | |||
@@ -305,8 +305,8 @@ static inline int mmap_is_ia32(void) | |||
305 | test_thread_flag(TIF_ADDR32)); | 305 | test_thread_flag(TIF_ADDR32)); |
306 | } | 306 | } |
307 | 307 | ||
308 | extern unsigned long tasksize_32bit(void); | 308 | extern unsigned long task_size_32bit(void); |
309 | extern unsigned long tasksize_64bit(void); | 309 | extern unsigned long task_size_64bit(int full_addr_space); |
310 | extern unsigned long get_mmap_base(int is_legacy); | 310 | extern unsigned long get_mmap_base(int is_legacy); |
311 | 311 | ||
312 | #ifdef CONFIG_X86_32 | 312 | #ifdef CONFIG_X86_32 |
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index b65155cc3760..dcd9fb55e679 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h | |||
@@ -157,6 +157,26 @@ static inline void __set_fixmap(enum fixed_addresses idx, | |||
157 | } | 157 | } |
158 | #endif | 158 | #endif |
159 | 159 | ||
160 | /* | ||
161 | * FIXMAP_PAGE_NOCACHE is used for MMIO. Memory encryption is not | ||
162 | * supported for MMIO addresses, so make sure that the memory encryption | ||
163 | * mask is not part of the page attributes. | ||
164 | */ | ||
165 | #define FIXMAP_PAGE_NOCACHE PAGE_KERNEL_IO_NOCACHE | ||
166 | |||
167 | /* | ||
168 | * Early memremap routines used for in-place encryption. The mappings created | ||
169 | * by these routines are intended to be used as temporary mappings. | ||
170 | */ | ||
171 | void __init *early_memremap_encrypted(resource_size_t phys_addr, | ||
172 | unsigned long size); | ||
173 | void __init *early_memremap_encrypted_wp(resource_size_t phys_addr, | ||
174 | unsigned long size); | ||
175 | void __init *early_memremap_decrypted(resource_size_t phys_addr, | ||
176 | unsigned long size); | ||
177 | void __init *early_memremap_decrypted_wp(resource_size_t phys_addr, | ||
178 | unsigned long size); | ||
179 | |||
160 | #include <asm-generic/fixmap.h> | 180 | #include <asm-generic/fixmap.h> |
161 | 181 | ||
162 | #define __late_set_fixmap(idx, phys, flags) __set_fixmap(idx, phys, flags) | 182 | #define __late_set_fixmap(idx, phys, flags) __set_fixmap(idx, phys, flags) |
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h index 474eb8c66fee..05c4aa00cc86 100644 --- a/arch/x86/include/asm/init.h +++ b/arch/x86/include/asm/init.h | |||
@@ -7,6 +7,7 @@ struct x86_mapping_info { | |||
7 | unsigned long page_flag; /* page flag for PMD or PUD entry */ | 7 | unsigned long page_flag; /* page flag for PMD or PUD entry */ |
8 | unsigned long offset; /* ident mapping offset */ | 8 | unsigned long offset; /* ident mapping offset */ |
9 | bool direct_gbpages; /* PUD level 1GB page support */ | 9 | bool direct_gbpages; /* PUD level 1GB page support */ |
10 | unsigned long kernpg_flag; /* kernel pagetable flag override */ | ||
10 | }; | 11 | }; |
11 | 12 | ||
12 | int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page, | 13 | int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page, |
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 1310e1f1cd65..c40a95c33bb8 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h | |||
@@ -377,4 +377,12 @@ extern void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size) | |||
377 | #define arch_io_reserve_memtype_wc arch_io_reserve_memtype_wc | 377 | #define arch_io_reserve_memtype_wc arch_io_reserve_memtype_wc |
378 | #endif | 378 | #endif |
379 | 379 | ||
380 | extern bool arch_memremap_can_ram_remap(resource_size_t offset, | ||
381 | unsigned long size, | ||
382 | unsigned long flags); | ||
383 | #define arch_memremap_can_ram_remap arch_memremap_can_ram_remap | ||
384 | |||
385 | extern bool phys_mem_access_encrypted(unsigned long phys_addr, | ||
386 | unsigned long size); | ||
387 | |||
380 | #endif /* _ASM_X86_IO_H */ | 388 | #endif /* _ASM_X86_IO_H */ |
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 70ef205489f0..942c1f444da8 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h | |||
@@ -147,7 +147,8 @@ unsigned long | |||
147 | relocate_kernel(unsigned long indirection_page, | 147 | relocate_kernel(unsigned long indirection_page, |
148 | unsigned long page_list, | 148 | unsigned long page_list, |
149 | unsigned long start_address, | 149 | unsigned long start_address, |
150 | unsigned int preserve_context); | 150 | unsigned int preserve_context, |
151 | unsigned int sme_active); | ||
151 | #endif | 152 | #endif |
152 | 153 | ||
153 | #define ARCH_HAS_KIMAGE_ARCH | 154 | #define ARCH_HAS_KIMAGE_ARCH |
@@ -207,6 +208,14 @@ struct kexec_entry64_regs { | |||
207 | uint64_t r15; | 208 | uint64_t r15; |
208 | uint64_t rip; | 209 | uint64_t rip; |
209 | }; | 210 | }; |
211 | |||
212 | extern int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, | ||
213 | gfp_t gfp); | ||
214 | #define arch_kexec_post_alloc_pages arch_kexec_post_alloc_pages | ||
215 | |||
216 | extern void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages); | ||
217 | #define arch_kexec_pre_free_pages arch_kexec_pre_free_pages | ||
218 | |||
210 | #endif | 219 | #endif |
211 | 220 | ||
212 | typedef void crash_vmclear_fn(void); | 221 | typedef void crash_vmclear_fn(void); |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 92c9032502d8..369e41c23f07 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -1079,7 +1079,7 @@ void kvm_mmu_init_vm(struct kvm *kvm); | |||
1079 | void kvm_mmu_uninit_vm(struct kvm *kvm); | 1079 | void kvm_mmu_uninit_vm(struct kvm *kvm); |
1080 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | 1080 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, |
1081 | u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask, | 1081 | u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask, |
1082 | u64 acc_track_mask); | 1082 | u64 acc_track_mask, u64 me_mask); |
1083 | 1083 | ||
1084 | void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); | 1084 | void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); |
1085 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, | 1085 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, |
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h new file mode 100644 index 000000000000..8e618fcf1f7c --- /dev/null +++ b/arch/x86/include/asm/mem_encrypt.h | |||
@@ -0,0 +1,80 @@ | |||
1 | /* | ||
2 | * AMD Memory Encryption Support | ||
3 | * | ||
4 | * Copyright (C) 2016 Advanced Micro Devices, Inc. | ||
5 | * | ||
6 | * Author: Tom Lendacky <thomas.lendacky@amd.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License version 2 as | ||
10 | * published by the Free Software Foundation. | ||
11 | */ | ||
12 | |||
13 | #ifndef __X86_MEM_ENCRYPT_H__ | ||
14 | #define __X86_MEM_ENCRYPT_H__ | ||
15 | |||
16 | #ifndef __ASSEMBLY__ | ||
17 | |||
18 | #include <linux/init.h> | ||
19 | |||
20 | #include <asm/bootparam.h> | ||
21 | |||
22 | #ifdef CONFIG_AMD_MEM_ENCRYPT | ||
23 | |||
24 | extern unsigned long sme_me_mask; | ||
25 | |||
26 | void sme_encrypt_execute(unsigned long encrypted_kernel_vaddr, | ||
27 | unsigned long decrypted_kernel_vaddr, | ||
28 | unsigned long kernel_len, | ||
29 | unsigned long encryption_wa, | ||
30 | unsigned long encryption_pgd); | ||
31 | |||
32 | void __init sme_early_encrypt(resource_size_t paddr, | ||
33 | unsigned long size); | ||
34 | void __init sme_early_decrypt(resource_size_t paddr, | ||
35 | unsigned long size); | ||
36 | |||
37 | void __init sme_map_bootdata(char *real_mode_data); | ||
38 | void __init sme_unmap_bootdata(char *real_mode_data); | ||
39 | |||
40 | void __init sme_early_init(void); | ||
41 | |||
42 | void __init sme_encrypt_kernel(void); | ||
43 | void __init sme_enable(struct boot_params *bp); | ||
44 | |||
45 | /* Architecture __weak replacement functions */ | ||
46 | void __init mem_encrypt_init(void); | ||
47 | |||
48 | void swiotlb_set_mem_attributes(void *vaddr, unsigned long size); | ||
49 | |||
50 | #else /* !CONFIG_AMD_MEM_ENCRYPT */ | ||
51 | |||
52 | #define sme_me_mask 0UL | ||
53 | |||
54 | static inline void __init sme_early_encrypt(resource_size_t paddr, | ||
55 | unsigned long size) { } | ||
56 | static inline void __init sme_early_decrypt(resource_size_t paddr, | ||
57 | unsigned long size) { } | ||
58 | |||
59 | static inline void __init sme_map_bootdata(char *real_mode_data) { } | ||
60 | static inline void __init sme_unmap_bootdata(char *real_mode_data) { } | ||
61 | |||
62 | static inline void __init sme_early_init(void) { } | ||
63 | |||
64 | static inline void __init sme_encrypt_kernel(void) { } | ||
65 | static inline void __init sme_enable(struct boot_params *bp) { } | ||
66 | |||
67 | #endif /* CONFIG_AMD_MEM_ENCRYPT */ | ||
68 | |||
69 | /* | ||
70 | * The __sme_pa() and __sme_pa_nodebug() macros are meant for use when | ||
71 | * writing to or comparing values from the cr3 register. Having the | ||
72 | * encryption mask set in cr3 enables the PGD entry to be encrypted and | ||
73 | * avoid special case handling of PGD allocations. | ||
74 | */ | ||
75 | #define __sme_pa(x) (__pa(x) | sme_me_mask) | ||
76 | #define __sme_pa_nodebug(x) (__pa_nodebug(x) | sme_me_mask) | ||
77 | |||
78 | #endif /* __ASSEMBLY__ */ | ||
79 | |||
80 | #endif /* __X86_MEM_ENCRYPT_H__ */ | ||
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 79b647a7ebd0..bb8c597c2248 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h | |||
@@ -3,12 +3,28 @@ | |||
3 | 3 | ||
4 | #include <linux/spinlock.h> | 4 | #include <linux/spinlock.h> |
5 | #include <linux/mutex.h> | 5 | #include <linux/mutex.h> |
6 | #include <linux/atomic.h> | ||
6 | 7 | ||
7 | /* | 8 | /* |
8 | * The x86 doesn't have a mmu context, but | 9 | * x86 has arch-specific MMU state beyond what lives in mm_struct. |
9 | * we put the segment information here. | ||
10 | */ | 10 | */ |
11 | typedef struct { | 11 | typedef struct { |
12 | /* | ||
13 | * ctx_id uniquely identifies this mm_struct. A ctx_id will never | ||
14 | * be reused, and zero is not a valid ctx_id. | ||
15 | */ | ||
16 | u64 ctx_id; | ||
17 | |||
18 | /* | ||
19 | * Any code that needs to do any sort of TLB flushing for this | ||
20 | * mm will first make its changes to the page tables, then | ||
21 | * increment tlb_gen, then flush. This lets the low-level | ||
22 | * flushing code keep track of what needs flushing. | ||
23 | * | ||
24 | * This is not used on Xen PV. | ||
25 | */ | ||
26 | atomic64_t tlb_gen; | ||
27 | |||
12 | #ifdef CONFIG_MODIFY_LDT_SYSCALL | 28 | #ifdef CONFIG_MODIFY_LDT_SYSCALL |
13 | struct ldt_struct *ldt; | 29 | struct ldt_struct *ldt; |
14 | #endif | 30 | #endif |
@@ -37,6 +53,11 @@ typedef struct { | |||
37 | #endif | 53 | #endif |
38 | } mm_context_t; | 54 | } mm_context_t; |
39 | 55 | ||
56 | #define INIT_MM_CONTEXT(mm) \ | ||
57 | .context = { \ | ||
58 | .ctx_id = 1, \ | ||
59 | } | ||
60 | |||
40 | void leave_mm(int cpu); | 61 | void leave_mm(int cpu); |
41 | 62 | ||
42 | #endif /* _ASM_X86_MMU_H */ | 63 | #endif /* _ASM_X86_MMU_H */ |
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 7a234be7e298..7ae318c340d9 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h | |||
@@ -12,6 +12,9 @@ | |||
12 | #include <asm/tlbflush.h> | 12 | #include <asm/tlbflush.h> |
13 | #include <asm/paravirt.h> | 13 | #include <asm/paravirt.h> |
14 | #include <asm/mpx.h> | 14 | #include <asm/mpx.h> |
15 | |||
16 | extern atomic64_t last_mm_ctx_id; | ||
17 | |||
15 | #ifndef CONFIG_PARAVIRT | 18 | #ifndef CONFIG_PARAVIRT |
16 | static inline void paravirt_activate_mm(struct mm_struct *prev, | 19 | static inline void paravirt_activate_mm(struct mm_struct *prev, |
17 | struct mm_struct *next) | 20 | struct mm_struct *next) |
@@ -125,13 +128,18 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next) | |||
125 | 128 | ||
126 | static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) | 129 | static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) |
127 | { | 130 | { |
128 | if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) | 131 | int cpu = smp_processor_id(); |
129 | this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY); | 132 | |
133 | if (cpumask_test_cpu(cpu, mm_cpumask(mm))) | ||
134 | cpumask_clear_cpu(cpu, mm_cpumask(mm)); | ||
130 | } | 135 | } |
131 | 136 | ||
132 | static inline int init_new_context(struct task_struct *tsk, | 137 | static inline int init_new_context(struct task_struct *tsk, |
133 | struct mm_struct *mm) | 138 | struct mm_struct *mm) |
134 | { | 139 | { |
140 | mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id); | ||
141 | atomic64_set(&mm->context.tlb_gen, 0); | ||
142 | |||
135 | #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS | 143 | #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS |
136 | if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { | 144 | if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { |
137 | /* pkey 0 is the default and always allocated */ | 145 | /* pkey 0 is the default and always allocated */ |
@@ -290,6 +298,9 @@ static inline unsigned long __get_current_cr3_fast(void) | |||
290 | { | 298 | { |
291 | unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd); | 299 | unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd); |
292 | 300 | ||
301 | if (static_cpu_has(X86_FEATURE_PCID)) | ||
302 | cr3 |= this_cpu_read(cpu_tlbstate.loaded_mm_asid); | ||
303 | |||
293 | /* For now, be very restrictive about when this can be called. */ | 304 | /* For now, be very restrictive about when this can be called. */ |
294 | VM_WARN_ON(in_nmi() || preemptible()); | 305 | VM_WARN_ON(in_nmi() || preemptible()); |
295 | 306 | ||
diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h index a0d662be4c5b..7d7404756bb4 100644 --- a/arch/x86/include/asm/mpx.h +++ b/arch/x86/include/asm/mpx.h | |||
@@ -73,6 +73,9 @@ static inline void mpx_mm_init(struct mm_struct *mm) | |||
73 | } | 73 | } |
74 | void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma, | 74 | void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma, |
75 | unsigned long start, unsigned long end); | 75 | unsigned long start, unsigned long end); |
76 | |||
77 | unsigned long mpx_unmapped_area_check(unsigned long addr, unsigned long len, | ||
78 | unsigned long flags); | ||
76 | #else | 79 | #else |
77 | static inline siginfo_t *mpx_generate_siginfo(struct pt_regs *regs) | 80 | static inline siginfo_t *mpx_generate_siginfo(struct pt_regs *regs) |
78 | { | 81 | { |
@@ -94,6 +97,12 @@ static inline void mpx_notify_unmap(struct mm_struct *mm, | |||
94 | unsigned long start, unsigned long end) | 97 | unsigned long start, unsigned long end) |
95 | { | 98 | { |
96 | } | 99 | } |
100 | |||
101 | static inline unsigned long mpx_unmapped_area_check(unsigned long addr, | ||
102 | unsigned long len, unsigned long flags) | ||
103 | { | ||
104 | return addr; | ||
105 | } | ||
97 | #endif /* CONFIG_X86_INTEL_MPX */ | 106 | #endif /* CONFIG_X86_INTEL_MPX */ |
98 | 107 | ||
99 | #endif /* _ASM_X86_MPX_H */ | 108 | #endif /* _ASM_X86_MPX_H */ |
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 5573c75f8e4c..17f5c12e1afd 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -356,6 +356,8 @@ | |||
356 | #define MSR_K8_TOP_MEM1 0xc001001a | 356 | #define MSR_K8_TOP_MEM1 0xc001001a |
357 | #define MSR_K8_TOP_MEM2 0xc001001d | 357 | #define MSR_K8_TOP_MEM2 0xc001001d |
358 | #define MSR_K8_SYSCFG 0xc0010010 | 358 | #define MSR_K8_SYSCFG 0xc0010010 |
359 | #define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT 23 | ||
360 | #define MSR_K8_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT) | ||
359 | #define MSR_K8_INT_PENDING_MSG 0xc0010055 | 361 | #define MSR_K8_INT_PENDING_MSG 0xc0010055 |
360 | /* C1E active bits in int pending message */ | 362 | /* C1E active bits in int pending message */ |
361 | #define K8_INTP_C1E_ACTIVE_MASK 0x18000000 | 363 | #define K8_INTP_C1E_ACTIVE_MASK 0x18000000 |
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index b4a0d43248cf..b50df06ad251 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h | |||
@@ -51,6 +51,10 @@ static inline void clear_page(void *page) | |||
51 | 51 | ||
52 | void copy_page(void *to, void *from); | 52 | void copy_page(void *to, void *from); |
53 | 53 | ||
54 | #ifdef CONFIG_X86_MCE | ||
55 | #define arch_unmap_kpfn arch_unmap_kpfn | ||
56 | #endif | ||
57 | |||
54 | #endif /* !__ASSEMBLY__ */ | 58 | #endif /* !__ASSEMBLY__ */ |
55 | 59 | ||
56 | #ifdef CONFIG_X86_VSYSCALL_EMULATION | 60 | #ifdef CONFIG_X86_VSYSCALL_EMULATION |
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index 7bd0099384ca..b98ed9d14630 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h | |||
@@ -3,6 +3,7 @@ | |||
3 | 3 | ||
4 | #include <linux/const.h> | 4 | #include <linux/const.h> |
5 | #include <linux/types.h> | 5 | #include <linux/types.h> |
6 | #include <linux/mem_encrypt.h> | ||
6 | 7 | ||
7 | /* PAGE_SHIFT determines the page size */ | 8 | /* PAGE_SHIFT determines the page size */ |
8 | #define PAGE_SHIFT 12 | 9 | #define PAGE_SHIFT 12 |
@@ -15,7 +16,7 @@ | |||
15 | #define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) | 16 | #define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) |
16 | #define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) | 17 | #define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) |
17 | 18 | ||
18 | #define __PHYSICAL_MASK ((phys_addr_t)((1ULL << __PHYSICAL_MASK_SHIFT) - 1)) | 19 | #define __PHYSICAL_MASK ((phys_addr_t)(__sme_clr((1ULL << __PHYSICAL_MASK_SHIFT) - 1))) |
19 | #define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1) | 20 | #define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1) |
20 | 21 | ||
21 | /* Cast *PAGE_MASK to a signed type so that it is sign-extended if | 22 | /* Cast *PAGE_MASK to a signed type so that it is sign-extended if |
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 77037b6f1caa..bbeae4a2bd01 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
@@ -1,6 +1,7 @@ | |||
1 | #ifndef _ASM_X86_PGTABLE_H | 1 | #ifndef _ASM_X86_PGTABLE_H |
2 | #define _ASM_X86_PGTABLE_H | 2 | #define _ASM_X86_PGTABLE_H |
3 | 3 | ||
4 | #include <linux/mem_encrypt.h> | ||
4 | #include <asm/page.h> | 5 | #include <asm/page.h> |
5 | #include <asm/pgtable_types.h> | 6 | #include <asm/pgtable_types.h> |
6 | 7 | ||
@@ -13,9 +14,18 @@ | |||
13 | cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS))) \ | 14 | cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS))) \ |
14 | : (prot)) | 15 | : (prot)) |
15 | 16 | ||
17 | /* | ||
18 | * Macros to add or remove encryption attribute | ||
19 | */ | ||
20 | #define pgprot_encrypted(prot) __pgprot(__sme_set(pgprot_val(prot))) | ||
21 | #define pgprot_decrypted(prot) __pgprot(__sme_clr(pgprot_val(prot))) | ||
22 | |||
16 | #ifndef __ASSEMBLY__ | 23 | #ifndef __ASSEMBLY__ |
17 | #include <asm/x86_init.h> | 24 | #include <asm/x86_init.h> |
18 | 25 | ||
26 | extern pgd_t early_top_pgt[PTRS_PER_PGD]; | ||
27 | int __init __early_make_pgtable(unsigned long address, pmdval_t pmd); | ||
28 | |||
19 | void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd); | 29 | void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd); |
20 | void ptdump_walk_pgd_level_checkwx(void); | 30 | void ptdump_walk_pgd_level_checkwx(void); |
21 | 31 | ||
@@ -38,6 +48,8 @@ extern struct list_head pgd_list; | |||
38 | 48 | ||
39 | extern struct mm_struct *pgd_page_get_mm(struct page *page); | 49 | extern struct mm_struct *pgd_page_get_mm(struct page *page); |
40 | 50 | ||
51 | extern pmdval_t early_pmd_flags; | ||
52 | |||
41 | #ifdef CONFIG_PARAVIRT | 53 | #ifdef CONFIG_PARAVIRT |
42 | #include <asm/paravirt.h> | 54 | #include <asm/paravirt.h> |
43 | #else /* !CONFIG_PARAVIRT */ | 55 | #else /* !CONFIG_PARAVIRT */ |
@@ -195,6 +207,11 @@ static inline unsigned long p4d_pfn(p4d_t p4d) | |||
195 | return (p4d_val(p4d) & p4d_pfn_mask(p4d)) >> PAGE_SHIFT; | 207 | return (p4d_val(p4d) & p4d_pfn_mask(p4d)) >> PAGE_SHIFT; |
196 | } | 208 | } |
197 | 209 | ||
210 | static inline unsigned long pgd_pfn(pgd_t pgd) | ||
211 | { | ||
212 | return (pgd_val(pgd) & PTE_PFN_MASK) >> PAGE_SHIFT; | ||
213 | } | ||
214 | |||
198 | static inline int p4d_large(p4d_t p4d) | 215 | static inline int p4d_large(p4d_t p4d) |
199 | { | 216 | { |
200 | /* No 512 GiB pages yet */ | 217 | /* No 512 GiB pages yet */ |
@@ -704,8 +721,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) | |||
704 | * Currently stuck as a macro due to indirect forward reference to | 721 | * Currently stuck as a macro due to indirect forward reference to |
705 | * linux/mmzone.h's __section_mem_map_addr() definition: | 722 | * linux/mmzone.h's __section_mem_map_addr() definition: |
706 | */ | 723 | */ |
707 | #define pmd_page(pmd) \ | 724 | #define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd)) |
708 | pfn_to_page((pmd_val(pmd) & pmd_pfn_mask(pmd)) >> PAGE_SHIFT) | ||
709 | 725 | ||
710 | /* | 726 | /* |
711 | * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD] | 727 | * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD] |
@@ -773,8 +789,7 @@ static inline unsigned long pud_page_vaddr(pud_t pud) | |||
773 | * Currently stuck as a macro due to indirect forward reference to | 789 | * Currently stuck as a macro due to indirect forward reference to |
774 | * linux/mmzone.h's __section_mem_map_addr() definition: | 790 | * linux/mmzone.h's __section_mem_map_addr() definition: |
775 | */ | 791 | */ |
776 | #define pud_page(pud) \ | 792 | #define pud_page(pud) pfn_to_page(pud_pfn(pud)) |
777 | pfn_to_page((pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT) | ||
778 | 793 | ||
779 | /* Find an entry in the second-level page table.. */ | 794 | /* Find an entry in the second-level page table.. */ |
780 | static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) | 795 | static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) |
@@ -824,8 +839,7 @@ static inline unsigned long p4d_page_vaddr(p4d_t p4d) | |||
824 | * Currently stuck as a macro due to indirect forward reference to | 839 | * Currently stuck as a macro due to indirect forward reference to |
825 | * linux/mmzone.h's __section_mem_map_addr() definition: | 840 | * linux/mmzone.h's __section_mem_map_addr() definition: |
826 | */ | 841 | */ |
827 | #define p4d_page(p4d) \ | 842 | #define p4d_page(p4d) pfn_to_page(p4d_pfn(p4d)) |
828 | pfn_to_page((p4d_val(p4d) & p4d_pfn_mask(p4d)) >> PAGE_SHIFT) | ||
829 | 843 | ||
830 | /* Find an entry in the third-level page table.. */ | 844 | /* Find an entry in the third-level page table.. */ |
831 | static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) | 845 | static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) |
@@ -859,7 +873,7 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd) | |||
859 | * Currently stuck as a macro due to indirect forward reference to | 873 | * Currently stuck as a macro due to indirect forward reference to |
860 | * linux/mmzone.h's __section_mem_map_addr() definition: | 874 | * linux/mmzone.h's __section_mem_map_addr() definition: |
861 | */ | 875 | */ |
862 | #define pgd_page(pgd) pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT) | 876 | #define pgd_page(pgd) pfn_to_page(pgd_pfn(pgd)) |
863 | 877 | ||
864 | /* to find an entry in a page-table-directory. */ | 878 | /* to find an entry in a page-table-directory. */ |
865 | static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) | 879 | static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) |
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index bf9638e1ee42..399261ce904c 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h | |||
@@ -2,6 +2,8 @@ | |||
2 | #define _ASM_X86_PGTABLE_DEFS_H | 2 | #define _ASM_X86_PGTABLE_DEFS_H |
3 | 3 | ||
4 | #include <linux/const.h> | 4 | #include <linux/const.h> |
5 | #include <linux/mem_encrypt.h> | ||
6 | |||
5 | #include <asm/page_types.h> | 7 | #include <asm/page_types.h> |
6 | 8 | ||
7 | #define FIRST_USER_ADDRESS 0UL | 9 | #define FIRST_USER_ADDRESS 0UL |
@@ -121,10 +123,10 @@ | |||
121 | 123 | ||
122 | #define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) | 124 | #define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) |
123 | 125 | ||
124 | #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ | 126 | #define _PAGE_TABLE_NOENC (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |\ |
125 | _PAGE_ACCESSED | _PAGE_DIRTY) | 127 | _PAGE_ACCESSED | _PAGE_DIRTY) |
126 | #define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \ | 128 | #define _KERNPG_TABLE_NOENC (_PAGE_PRESENT | _PAGE_RW | \ |
127 | _PAGE_DIRTY) | 129 | _PAGE_ACCESSED | _PAGE_DIRTY) |
128 | 130 | ||
129 | /* | 131 | /* |
130 | * Set of bits not changed in pte_modify. The pte's | 132 | * Set of bits not changed in pte_modify. The pte's |
@@ -159,6 +161,7 @@ enum page_cache_mode { | |||
159 | 161 | ||
160 | #define _PAGE_CACHE_MASK (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT) | 162 | #define _PAGE_CACHE_MASK (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT) |
161 | #define _PAGE_NOCACHE (cachemode2protval(_PAGE_CACHE_MODE_UC)) | 163 | #define _PAGE_NOCACHE (cachemode2protval(_PAGE_CACHE_MODE_UC)) |
164 | #define _PAGE_CACHE_WP (cachemode2protval(_PAGE_CACHE_MODE_WP)) | ||
162 | 165 | ||
163 | #define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) | 166 | #define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) |
164 | #define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ | 167 | #define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ |
@@ -187,22 +190,42 @@ enum page_cache_mode { | |||
187 | #define __PAGE_KERNEL_VVAR (__PAGE_KERNEL_RO | _PAGE_USER) | 190 | #define __PAGE_KERNEL_VVAR (__PAGE_KERNEL_RO | _PAGE_USER) |
188 | #define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) | 191 | #define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) |
189 | #define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) | 192 | #define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) |
193 | #define __PAGE_KERNEL_WP (__PAGE_KERNEL | _PAGE_CACHE_WP) | ||
190 | 194 | ||
191 | #define __PAGE_KERNEL_IO (__PAGE_KERNEL) | 195 | #define __PAGE_KERNEL_IO (__PAGE_KERNEL) |
192 | #define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE) | 196 | #define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE) |
193 | 197 | ||
194 | #define PAGE_KERNEL __pgprot(__PAGE_KERNEL) | 198 | #ifndef __ASSEMBLY__ |
195 | #define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO) | 199 | |
196 | #define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC) | 200 | #define _PAGE_ENC (_AT(pteval_t, sme_me_mask)) |
197 | #define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX) | 201 | |
198 | #define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE) | 202 | #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ |
199 | #define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE) | 203 | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_ENC) |
200 | #define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC) | 204 | #define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \ |
201 | #define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL) | 205 | _PAGE_DIRTY | _PAGE_ENC) |
202 | #define PAGE_KERNEL_VVAR __pgprot(__PAGE_KERNEL_VVAR) | 206 | |
207 | #define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _PAGE_ENC) | ||
208 | #define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _PAGE_ENC) | ||
209 | |||
210 | #define __PAGE_KERNEL_NOENC (__PAGE_KERNEL) | ||
211 | #define __PAGE_KERNEL_NOENC_WP (__PAGE_KERNEL_WP) | ||
212 | |||
213 | #define PAGE_KERNEL __pgprot(__PAGE_KERNEL | _PAGE_ENC) | ||
214 | #define PAGE_KERNEL_NOENC __pgprot(__PAGE_KERNEL) | ||
215 | #define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO | _PAGE_ENC) | ||
216 | #define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC | _PAGE_ENC) | ||
217 | #define PAGE_KERNEL_EXEC_NOENC __pgprot(__PAGE_KERNEL_EXEC) | ||
218 | #define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX | _PAGE_ENC) | ||
219 | #define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC) | ||
220 | #define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC) | ||
221 | #define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC) | ||
222 | #define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL | _PAGE_ENC) | ||
223 | #define PAGE_KERNEL_VVAR __pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC) | ||
224 | |||
225 | #define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO) | ||
226 | #define PAGE_KERNEL_IO_NOCACHE __pgprot(__PAGE_KERNEL_IO_NOCACHE) | ||
203 | 227 | ||
204 | #define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO) | 228 | #endif /* __ASSEMBLY__ */ |
205 | #define PAGE_KERNEL_IO_NOCACHE __pgprot(__PAGE_KERNEL_IO_NOCACHE) | ||
206 | 229 | ||
207 | /* xwr */ | 230 | /* xwr */ |
208 | #define __P000 PAGE_NONE | 231 | #define __P000 PAGE_NONE |
@@ -287,6 +310,11 @@ static inline p4dval_t native_p4d_val(p4d_t p4d) | |||
287 | #else | 310 | #else |
288 | #include <asm-generic/pgtable-nop4d.h> | 311 | #include <asm-generic/pgtable-nop4d.h> |
289 | 312 | ||
313 | static inline p4d_t native_make_p4d(pudval_t val) | ||
314 | { | ||
315 | return (p4d_t) { .pgd = native_make_pgd((pgdval_t)val) }; | ||
316 | } | ||
317 | |||
290 | static inline p4dval_t native_p4d_val(p4d_t p4d) | 318 | static inline p4dval_t native_p4d_val(p4d_t p4d) |
291 | { | 319 | { |
292 | return native_pgd_val(p4d.pgd); | 320 | return native_pgd_val(p4d.pgd); |
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h index 79aa2f98398d..dc723b64acf0 100644 --- a/arch/x86/include/asm/processor-flags.h +++ b/arch/x86/include/asm/processor-flags.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define _ASM_X86_PROCESSOR_FLAGS_H | 2 | #define _ASM_X86_PROCESSOR_FLAGS_H |
3 | 3 | ||
4 | #include <uapi/asm/processor-flags.h> | 4 | #include <uapi/asm/processor-flags.h> |
5 | #include <linux/mem_encrypt.h> | ||
5 | 6 | ||
6 | #ifdef CONFIG_VM86 | 7 | #ifdef CONFIG_VM86 |
7 | #define X86_VM_MASK X86_EFLAGS_VM | 8 | #define X86_VM_MASK X86_EFLAGS_VM |
@@ -32,16 +33,18 @@ | |||
32 | * CR3_ADDR_MASK is the mask used by read_cr3_pa(). | 33 | * CR3_ADDR_MASK is the mask used by read_cr3_pa(). |
33 | */ | 34 | */ |
34 | #ifdef CONFIG_X86_64 | 35 | #ifdef CONFIG_X86_64 |
35 | /* Mask off the address space ID bits. */ | 36 | /* Mask off the address space ID and SME encryption bits. */ |
36 | #define CR3_ADDR_MASK 0x7FFFFFFFFFFFF000ull | 37 | #define CR3_ADDR_MASK __sme_clr(0x7FFFFFFFFFFFF000ull) |
37 | #define CR3_PCID_MASK 0xFFFull | 38 | #define CR3_PCID_MASK 0xFFFull |
39 | #define CR3_NOFLUSH BIT_ULL(63) | ||
38 | #else | 40 | #else |
39 | /* | 41 | /* |
40 | * CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save | 42 | * CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save |
41 | * a tiny bit of code size by setting all the bits. | 43 | * a tiny bit of code size by setting all the bits. |
42 | */ | 44 | */ |
43 | #define CR3_ADDR_MASK 0xFFFFFFFFull | 45 | #define CR3_ADDR_MASK 0xFFFFFFFFull |
44 | #define CR3_PCID_MASK 0ull | 46 | #define CR3_PCID_MASK 0ull |
47 | #define CR3_NOFLUSH 0 | ||
45 | #endif | 48 | #endif |
46 | 49 | ||
47 | #endif /* _ASM_X86_PROCESSOR_FLAGS_H */ | 50 | #endif /* _ASM_X86_PROCESSOR_FLAGS_H */ |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index abc99b9c7ffd..3fa26a61eabc 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -30,6 +30,7 @@ struct vm86; | |||
30 | #include <linux/math64.h> | 30 | #include <linux/math64.h> |
31 | #include <linux/err.h> | 31 | #include <linux/err.h> |
32 | #include <linux/irqflags.h> | 32 | #include <linux/irqflags.h> |
33 | #include <linux/mem_encrypt.h> | ||
33 | 34 | ||
34 | /* | 35 | /* |
35 | * We handle most unaligned accesses in hardware. On the other hand | 36 | * We handle most unaligned accesses in hardware. On the other hand |
@@ -240,9 +241,14 @@ static inline unsigned long read_cr3_pa(void) | |||
240 | return __read_cr3() & CR3_ADDR_MASK; | 241 | return __read_cr3() & CR3_ADDR_MASK; |
241 | } | 242 | } |
242 | 243 | ||
244 | static inline unsigned long native_read_cr3_pa(void) | ||
245 | { | ||
246 | return __native_read_cr3() & CR3_ADDR_MASK; | ||
247 | } | ||
248 | |||
243 | static inline void load_cr3(pgd_t *pgdir) | 249 | static inline void load_cr3(pgd_t *pgdir) |
244 | { | 250 | { |
245 | write_cr3(__pa(pgdir)); | 251 | write_cr3(__sme_pa(pgdir)); |
246 | } | 252 | } |
247 | 253 | ||
248 | #ifdef CONFIG_X86_32 | 254 | #ifdef CONFIG_X86_32 |
@@ -805,7 +811,9 @@ static inline void spin_lock_prefetch(const void *x) | |||
805 | */ | 811 | */ |
806 | #define IA32_PAGE_OFFSET PAGE_OFFSET | 812 | #define IA32_PAGE_OFFSET PAGE_OFFSET |
807 | #define TASK_SIZE PAGE_OFFSET | 813 | #define TASK_SIZE PAGE_OFFSET |
814 | #define TASK_SIZE_LOW TASK_SIZE | ||
808 | #define TASK_SIZE_MAX TASK_SIZE | 815 | #define TASK_SIZE_MAX TASK_SIZE |
816 | #define DEFAULT_MAP_WINDOW TASK_SIZE | ||
809 | #define STACK_TOP TASK_SIZE | 817 | #define STACK_TOP TASK_SIZE |
810 | #define STACK_TOP_MAX STACK_TOP | 818 | #define STACK_TOP_MAX STACK_TOP |
811 | 819 | ||
@@ -845,7 +853,9 @@ static inline void spin_lock_prefetch(const void *x) | |||
845 | * particular problem by preventing anything from being mapped | 853 | * particular problem by preventing anything from being mapped |
846 | * at the maximum canonical address. | 854 | * at the maximum canonical address. |
847 | */ | 855 | */ |
848 | #define TASK_SIZE_MAX ((1UL << 47) - PAGE_SIZE) | 856 | #define TASK_SIZE_MAX ((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE) |
857 | |||
858 | #define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE) | ||
849 | 859 | ||
850 | /* This decides where the kernel will search for a free chunk of vm | 860 | /* This decides where the kernel will search for a free chunk of vm |
851 | * space during mmap's. | 861 | * space during mmap's. |
@@ -853,12 +863,14 @@ static inline void spin_lock_prefetch(const void *x) | |||
853 | #define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \ | 863 | #define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \ |
854 | 0xc0000000 : 0xFFFFe000) | 864 | 0xc0000000 : 0xFFFFe000) |
855 | 865 | ||
866 | #define TASK_SIZE_LOW (test_thread_flag(TIF_ADDR32) ? \ | ||
867 | IA32_PAGE_OFFSET : DEFAULT_MAP_WINDOW) | ||
856 | #define TASK_SIZE (test_thread_flag(TIF_ADDR32) ? \ | 868 | #define TASK_SIZE (test_thread_flag(TIF_ADDR32) ? \ |
857 | IA32_PAGE_OFFSET : TASK_SIZE_MAX) | 869 | IA32_PAGE_OFFSET : TASK_SIZE_MAX) |
858 | #define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_ADDR32)) ? \ | 870 | #define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_ADDR32)) ? \ |
859 | IA32_PAGE_OFFSET : TASK_SIZE_MAX) | 871 | IA32_PAGE_OFFSET : TASK_SIZE_MAX) |
860 | 872 | ||
861 | #define STACK_TOP TASK_SIZE | 873 | #define STACK_TOP TASK_SIZE_LOW |
862 | #define STACK_TOP_MAX TASK_SIZE_MAX | 874 | #define STACK_TOP_MAX TASK_SIZE_MAX |
863 | 875 | ||
864 | #define INIT_THREAD { \ | 876 | #define INIT_THREAD { \ |
@@ -879,7 +891,7 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip, | |||
879 | * space during mmap's. | 891 | * space during mmap's. |
880 | */ | 892 | */ |
881 | #define __TASK_UNMAPPED_BASE(task_size) (PAGE_ALIGN(task_size / 3)) | 893 | #define __TASK_UNMAPPED_BASE(task_size) (PAGE_ALIGN(task_size / 3)) |
882 | #define TASK_UNMAPPED_BASE __TASK_UNMAPPED_BASE(TASK_SIZE) | 894 | #define TASK_UNMAPPED_BASE __TASK_UNMAPPED_BASE(TASK_SIZE_LOW) |
883 | 895 | ||
884 | #define KSTK_EIP(task) (task_pt_regs(task)->ip) | 896 | #define KSTK_EIP(task) (task_pt_regs(task)->ip) |
885 | 897 | ||
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h index 230e1903acf0..90d91520c13a 100644 --- a/arch/x86/include/asm/realmode.h +++ b/arch/x86/include/asm/realmode.h | |||
@@ -1,6 +1,15 @@ | |||
1 | #ifndef _ARCH_X86_REALMODE_H | 1 | #ifndef _ARCH_X86_REALMODE_H |
2 | #define _ARCH_X86_REALMODE_H | 2 | #define _ARCH_X86_REALMODE_H |
3 | 3 | ||
4 | /* | ||
5 | * Flag bit definitions for use with the flags field of the trampoline header | ||
6 | * in the CONFIG_X86_64 variant. | ||
7 | */ | ||
8 | #define TH_FLAGS_SME_ACTIVE_BIT 0 | ||
9 | #define TH_FLAGS_SME_ACTIVE BIT(TH_FLAGS_SME_ACTIVE_BIT) | ||
10 | |||
11 | #ifndef __ASSEMBLY__ | ||
12 | |||
4 | #include <linux/types.h> | 13 | #include <linux/types.h> |
5 | #include <asm/io.h> | 14 | #include <asm/io.h> |
6 | 15 | ||
@@ -38,6 +47,7 @@ struct trampoline_header { | |||
38 | u64 start; | 47 | u64 start; |
39 | u64 efer; | 48 | u64 efer; |
40 | u32 cr4; | 49 | u32 cr4; |
50 | u32 flags; | ||
41 | #endif | 51 | #endif |
42 | }; | 52 | }; |
43 | 53 | ||
@@ -69,4 +79,6 @@ static inline size_t real_mode_size_needed(void) | |||
69 | void set_real_mode_mem(phys_addr_t mem, size_t size); | 79 | void set_real_mode_mem(phys_addr_t mem, size_t size); |
70 | void reserve_real_mode(void); | 80 | void reserve_real_mode(void); |
71 | 81 | ||
82 | #endif /* __ASSEMBLY__ */ | ||
83 | |||
72 | #endif /* _ARCH_X86_REALMODE_H */ | 84 | #endif /* _ARCH_X86_REALMODE_H */ |
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index eaec6c364e42..cd71273ec49d 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h | |||
@@ -11,6 +11,7 @@ | |||
11 | * Executability : eXeutable, NoteXecutable | 11 | * Executability : eXeutable, NoteXecutable |
12 | * Read/Write : ReadOnly, ReadWrite | 12 | * Read/Write : ReadOnly, ReadWrite |
13 | * Presence : NotPresent | 13 | * Presence : NotPresent |
14 | * Encryption : Encrypted, Decrypted | ||
14 | * | 15 | * |
15 | * Within a category, the attributes are mutually exclusive. | 16 | * Within a category, the attributes are mutually exclusive. |
16 | * | 17 | * |
@@ -42,6 +43,8 @@ int set_memory_wt(unsigned long addr, int numpages); | |||
42 | int set_memory_wb(unsigned long addr, int numpages); | 43 | int set_memory_wb(unsigned long addr, int numpages); |
43 | int set_memory_np(unsigned long addr, int numpages); | 44 | int set_memory_np(unsigned long addr, int numpages); |
44 | int set_memory_4k(unsigned long addr, int numpages); | 45 | int set_memory_4k(unsigned long addr, int numpages); |
46 | int set_memory_encrypted(unsigned long addr, int numpages); | ||
47 | int set_memory_decrypted(unsigned long addr, int numpages); | ||
45 | 48 | ||
46 | int set_memory_array_uc(unsigned long *addr, int addrinarray); | 49 | int set_memory_array_uc(unsigned long *addr, int addrinarray); |
47 | int set_memory_array_wc(unsigned long *addr, int addrinarray); | 50 | int set_memory_array_wc(unsigned long *addr, int addrinarray); |
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h index c7797307fc2b..79a4ca6a9606 100644 --- a/arch/x86/include/asm/tlb.h +++ b/arch/x86/include/asm/tlb.h | |||
@@ -15,4 +15,18 @@ | |||
15 | 15 | ||
16 | #include <asm-generic/tlb.h> | 16 | #include <asm-generic/tlb.h> |
17 | 17 | ||
18 | /* | ||
19 | * While x86 architecture in general requires an IPI to perform TLB | ||
20 | * shootdown, enablement code for several hypervisors overrides | ||
21 | * .flush_tlb_others hook in pv_mmu_ops and implements it by issuing | ||
22 | * a hypercall. To keep software pagetable walkers safe in this case we | ||
23 | * switch to RCU based table free (HAVE_RCU_TABLE_FREE). See the comment | ||
24 | * below 'ifdef CONFIG_HAVE_RCU_TABLE_FREE' in include/asm-generic/tlb.h | ||
25 | * for more details. | ||
26 | */ | ||
27 | static inline void __tlb_remove_table(void *table) | ||
28 | { | ||
29 | free_page_and_swap_cache(table); | ||
30 | } | ||
31 | |||
18 | #endif /* _ASM_X86_TLB_H */ | 32 | #endif /* _ASM_X86_TLB_H */ |
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 50ea3482e1d1..d23e61dc0640 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h | |||
@@ -57,6 +57,23 @@ static inline void invpcid_flush_all_nonglobals(void) | |||
57 | __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL); | 57 | __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL); |
58 | } | 58 | } |
59 | 59 | ||
60 | static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) | ||
61 | { | ||
62 | u64 new_tlb_gen; | ||
63 | |||
64 | /* | ||
65 | * Bump the generation count. This also serves as a full barrier | ||
66 | * that synchronizes with switch_mm(): callers are required to order | ||
67 | * their read of mm_cpumask after their writes to the paging | ||
68 | * structures. | ||
69 | */ | ||
70 | smp_mb__before_atomic(); | ||
71 | new_tlb_gen = atomic64_inc_return(&mm->context.tlb_gen); | ||
72 | smp_mb__after_atomic(); | ||
73 | |||
74 | return new_tlb_gen; | ||
75 | } | ||
76 | |||
60 | #ifdef CONFIG_PARAVIRT | 77 | #ifdef CONFIG_PARAVIRT |
61 | #include <asm/paravirt.h> | 78 | #include <asm/paravirt.h> |
62 | #else | 79 | #else |
@@ -65,6 +82,17 @@ static inline void invpcid_flush_all_nonglobals(void) | |||
65 | #define __flush_tlb_single(addr) __native_flush_tlb_single(addr) | 82 | #define __flush_tlb_single(addr) __native_flush_tlb_single(addr) |
66 | #endif | 83 | #endif |
67 | 84 | ||
85 | /* | ||
86 | * 6 because 6 should be plenty and struct tlb_state will fit in | ||
87 | * two cache lines. | ||
88 | */ | ||
89 | #define TLB_NR_DYN_ASIDS 6 | ||
90 | |||
91 | struct tlb_context { | ||
92 | u64 ctx_id; | ||
93 | u64 tlb_gen; | ||
94 | }; | ||
95 | |||
68 | struct tlb_state { | 96 | struct tlb_state { |
69 | /* | 97 | /* |
70 | * cpu_tlbstate.loaded_mm should match CR3 whenever interrupts | 98 | * cpu_tlbstate.loaded_mm should match CR3 whenever interrupts |
@@ -73,13 +101,35 @@ struct tlb_state { | |||
73 | * mode even if we've already switched back to swapper_pg_dir. | 101 | * mode even if we've already switched back to swapper_pg_dir. |
74 | */ | 102 | */ |
75 | struct mm_struct *loaded_mm; | 103 | struct mm_struct *loaded_mm; |
76 | int state; | 104 | u16 loaded_mm_asid; |
105 | u16 next_asid; | ||
77 | 106 | ||
78 | /* | 107 | /* |
79 | * Access to this CR4 shadow and to H/W CR4 is protected by | 108 | * Access to this CR4 shadow and to H/W CR4 is protected by |
80 | * disabling interrupts when modifying either one. | 109 | * disabling interrupts when modifying either one. |
81 | */ | 110 | */ |
82 | unsigned long cr4; | 111 | unsigned long cr4; |
112 | |||
113 | /* | ||
114 | * This is a list of all contexts that might exist in the TLB. | ||
115 | * There is one per ASID that we use, and the ASID (what the | ||
116 | * CPU calls PCID) is the index into ctxts. | ||
117 | * | ||
118 | * For each context, ctx_id indicates which mm the TLB's user | ||
119 | * entries came from. As an invariant, the TLB will never | ||
120 | * contain entries that are out-of-date as when that mm reached | ||
121 | * the tlb_gen in the list. | ||
122 | * | ||
123 | * To be clear, this means that it's legal for the TLB code to | ||
124 | * flush the TLB without updating tlb_gen. This can happen | ||
125 | * (for now, at least) due to paravirt remote flushes. | ||
126 | * | ||
127 | * NB: context 0 is a bit special, since it's also used by | ||
128 | * various bits of init code. This is fine -- code that | ||
129 | * isn't aware of PCID will end up harmlessly flushing | ||
130 | * context 0. | ||
131 | */ | ||
132 | struct tlb_context ctxs[TLB_NR_DYN_ASIDS]; | ||
83 | }; | 133 | }; |
84 | DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate); | 134 | DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate); |
85 | 135 | ||
@@ -207,6 +257,14 @@ static inline void __flush_tlb_all(void) | |||
207 | __flush_tlb_global(); | 257 | __flush_tlb_global(); |
208 | else | 258 | else |
209 | __flush_tlb(); | 259 | __flush_tlb(); |
260 | |||
261 | /* | ||
262 | * Note: if we somehow had PCID but not PGE, then this wouldn't work -- | ||
263 | * we'd end up flushing kernel translations for the current ASID but | ||
264 | * we might fail to flush kernel translations for other cached ASIDs. | ||
265 | * | ||
266 | * To avoid this issue, we force PCID off if PGE is off. | ||
267 | */ | ||
210 | } | 268 | } |
211 | 269 | ||
212 | static inline void __flush_tlb_one(unsigned long addr) | 270 | static inline void __flush_tlb_one(unsigned long addr) |
@@ -231,9 +289,26 @@ static inline void __flush_tlb_one(unsigned long addr) | |||
231 | * and page-granular flushes are available only on i486 and up. | 289 | * and page-granular flushes are available only on i486 and up. |
232 | */ | 290 | */ |
233 | struct flush_tlb_info { | 291 | struct flush_tlb_info { |
234 | struct mm_struct *mm; | 292 | /* |
235 | unsigned long start; | 293 | * We support several kinds of flushes. |
236 | unsigned long end; | 294 | * |
295 | * - Fully flush a single mm. .mm will be set, .end will be | ||
296 | * TLB_FLUSH_ALL, and .new_tlb_gen will be the tlb_gen to | ||
297 | * which the IPI sender is trying to catch us up. | ||
298 | * | ||
299 | * - Partially flush a single mm. .mm will be set, .start and | ||
300 | * .end will indicate the range, and .new_tlb_gen will be set | ||
301 | * such that the changes between generation .new_tlb_gen-1 and | ||
302 | * .new_tlb_gen are entirely contained in the indicated range. | ||
303 | * | ||
304 | * - Fully flush all mms whose tlb_gens have been updated. .mm | ||
305 | * will be NULL, .end will be TLB_FLUSH_ALL, and .new_tlb_gen | ||
306 | * will be zero. | ||
307 | */ | ||
308 | struct mm_struct *mm; | ||
309 | unsigned long start; | ||
310 | unsigned long end; | ||
311 | u64 new_tlb_gen; | ||
237 | }; | 312 | }; |
238 | 313 | ||
239 | #define local_flush_tlb() __flush_tlb() | 314 | #define local_flush_tlb() __flush_tlb() |
@@ -256,12 +331,10 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a) | |||
256 | void native_flush_tlb_others(const struct cpumask *cpumask, | 331 | void native_flush_tlb_others(const struct cpumask *cpumask, |
257 | const struct flush_tlb_info *info); | 332 | const struct flush_tlb_info *info); |
258 | 333 | ||
259 | #define TLBSTATE_OK 1 | ||
260 | #define TLBSTATE_LAZY 2 | ||
261 | |||
262 | static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch, | 334 | static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch, |
263 | struct mm_struct *mm) | 335 | struct mm_struct *mm) |
264 | { | 336 | { |
337 | inc_mm_tlb_gen(mm); | ||
265 | cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm)); | 338 | cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm)); |
266 | } | 339 | } |
267 | 340 | ||
diff --git a/arch/x86/include/asm/vga.h b/arch/x86/include/asm/vga.h index c4b9dc2f67c5..9f42beefc67a 100644 --- a/arch/x86/include/asm/vga.h +++ b/arch/x86/include/asm/vga.h | |||
@@ -7,12 +7,24 @@ | |||
7 | #ifndef _ASM_X86_VGA_H | 7 | #ifndef _ASM_X86_VGA_H |
8 | #define _ASM_X86_VGA_H | 8 | #define _ASM_X86_VGA_H |
9 | 9 | ||
10 | #include <asm/set_memory.h> | ||
11 | |||
10 | /* | 12 | /* |
11 | * On the PC, we can just recalculate addresses and then | 13 | * On the PC, we can just recalculate addresses and then |
12 | * access the videoram directly without any black magic. | 14 | * access the videoram directly without any black magic. |
15 | * To support memory encryption however, we need to access | ||
16 | * the videoram as decrypted memory. | ||
13 | */ | 17 | */ |
14 | 18 | ||
15 | #define VGA_MAP_MEM(x, s) (unsigned long)phys_to_virt(x) | 19 | #define VGA_MAP_MEM(x, s) \ |
20 | ({ \ | ||
21 | unsigned long start = (unsigned long)phys_to_virt(x); \ | ||
22 | \ | ||
23 | if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) \ | ||
24 | set_memory_decrypted(start, (s) >> PAGE_SHIFT); \ | ||
25 | \ | ||
26 | start; \ | ||
27 | }) | ||
16 | 28 | ||
17 | #define vga_readb(x) (*(x)) | 29 | #define vga_readb(x) (*(x)) |
18 | #define vga_writeb(x, y) (*(y) = (x)) | 30 | #define vga_writeb(x, y) (*(y) = (x)) |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 7491e73d9253..97bb2caf3428 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -115,7 +115,7 @@ static u32 isa_irq_to_gsi[NR_IRQS_LEGACY] __read_mostly = { | |||
115 | #define ACPI_INVALID_GSI INT_MIN | 115 | #define ACPI_INVALID_GSI INT_MIN |
116 | 116 | ||
117 | /* | 117 | /* |
118 | * This is just a simple wrapper around early_ioremap(), | 118 | * This is just a simple wrapper around early_memremap(), |
119 | * with sanity checks for phys == 0 and size == 0. | 119 | * with sanity checks for phys == 0 and size == 0. |
120 | */ | 120 | */ |
121 | char *__init __acpi_map_table(unsigned long phys, unsigned long size) | 121 | char *__init __acpi_map_table(unsigned long phys, unsigned long size) |
@@ -124,7 +124,7 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size) | |||
124 | if (!phys || !size) | 124 | if (!phys || !size) |
125 | return NULL; | 125 | return NULL; |
126 | 126 | ||
127 | return early_ioremap(phys, size); | 127 | return early_memremap(phys, size); |
128 | } | 128 | } |
129 | 129 | ||
130 | void __init __acpi_unmap_table(char *map, unsigned long size) | 130 | void __init __acpi_unmap_table(char *map, unsigned long size) |
@@ -132,7 +132,7 @@ void __init __acpi_unmap_table(char *map, unsigned long size) | |||
132 | if (!map || !size) | 132 | if (!map || !size) |
133 | return; | 133 | return; |
134 | 134 | ||
135 | early_iounmap(map, size); | 135 | early_memunmap(map, size); |
136 | } | 136 | } |
137 | 137 | ||
138 | #ifdef CONFIG_X86_LOCAL_APIC | 138 | #ifdef CONFIG_X86_LOCAL_APIC |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index e44338dd62dd..9862e2cd6d93 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -558,8 +558,12 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) | |||
558 | 558 | ||
559 | static void early_init_amd(struct cpuinfo_x86 *c) | 559 | static void early_init_amd(struct cpuinfo_x86 *c) |
560 | { | 560 | { |
561 | u32 dummy; | ||
562 | |||
561 | early_init_amd_mc(c); | 563 | early_init_amd_mc(c); |
562 | 564 | ||
565 | rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy); | ||
566 | |||
563 | /* | 567 | /* |
564 | * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate | 568 | * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate |
565 | * with P/T states and does not stop in deep C-states | 569 | * with P/T states and does not stop in deep C-states |
@@ -622,6 +626,27 @@ static void early_init_amd(struct cpuinfo_x86 *c) | |||
622 | */ | 626 | */ |
623 | if (cpu_has_amd_erratum(c, amd_erratum_400)) | 627 | if (cpu_has_amd_erratum(c, amd_erratum_400)) |
624 | set_cpu_bug(c, X86_BUG_AMD_E400); | 628 | set_cpu_bug(c, X86_BUG_AMD_E400); |
629 | |||
630 | /* | ||
631 | * BIOS support is required for SME. If BIOS has enabled SME then | ||
632 | * adjust x86_phys_bits by the SME physical address space reduction | ||
633 | * value. If BIOS has not enabled SME then don't advertise the | ||
634 | * feature (set in scattered.c). Also, since the SME support requires | ||
635 | * long mode, don't advertise the feature under CONFIG_X86_32. | ||
636 | */ | ||
637 | if (cpu_has(c, X86_FEATURE_SME)) { | ||
638 | u64 msr; | ||
639 | |||
640 | /* Check if SME is enabled */ | ||
641 | rdmsrl(MSR_K8_SYSCFG, msr); | ||
642 | if (msr & MSR_K8_SYSCFG_MEM_ENCRYPT) { | ||
643 | c->x86_phys_bits -= (cpuid_ebx(0x8000001f) >> 6) & 0x3f; | ||
644 | if (IS_ENABLED(CONFIG_X86_32)) | ||
645 | clear_cpu_cap(c, X86_FEATURE_SME); | ||
646 | } else { | ||
647 | clear_cpu_cap(c, X86_FEATURE_SME); | ||
648 | } | ||
649 | } | ||
625 | } | 650 | } |
626 | 651 | ||
627 | static void init_amd_k8(struct cpuinfo_x86 *c) | 652 | static void init_amd_k8(struct cpuinfo_x86 *c) |
@@ -740,8 +765,6 @@ static void init_amd_bd(struct cpuinfo_x86 *c) | |||
740 | 765 | ||
741 | static void init_amd(struct cpuinfo_x86 *c) | 766 | static void init_amd(struct cpuinfo_x86 *c) |
742 | { | 767 | { |
743 | u32 dummy; | ||
744 | |||
745 | early_init_amd(c); | 768 | early_init_amd(c); |
746 | 769 | ||
747 | /* | 770 | /* |
@@ -803,8 +826,6 @@ static void init_amd(struct cpuinfo_x86 *c) | |||
803 | if (c->x86 > 0x11) | 826 | if (c->x86 > 0x11) |
804 | set_cpu_cap(c, X86_FEATURE_ARAT); | 827 | set_cpu_cap(c, X86_FEATURE_ARAT); |
805 | 828 | ||
806 | rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy); | ||
807 | |||
808 | /* 3DNow or LM implies PREFETCHW */ | 829 | /* 3DNow or LM implies PREFETCHW */ |
809 | if (!cpu_has(c, X86_FEATURE_3DNOWPREFETCH)) | 830 | if (!cpu_has(c, X86_FEATURE_3DNOWPREFETCH)) |
810 | if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM)) | 831 | if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM)) |
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 0af86d9242da..db684880d74a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c | |||
@@ -21,6 +21,14 @@ | |||
21 | 21 | ||
22 | void __init check_bugs(void) | 22 | void __init check_bugs(void) |
23 | { | 23 | { |
24 | #ifdef CONFIG_X86_32 | ||
25 | /* | ||
26 | * Regardless of whether PCID is enumerated, the SDM says | ||
27 | * that it can't be enabled in 32-bit mode. | ||
28 | */ | ||
29 | setup_clear_cpu_cap(X86_FEATURE_PCID); | ||
30 | #endif | ||
31 | |||
24 | identify_boot_cpu(); | 32 | identify_boot_cpu(); |
25 | 33 | ||
26 | if (!IS_ENABLED(CONFIG_SMP)) { | 34 | if (!IS_ENABLED(CONFIG_SMP)) { |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c8b39870f33e..b95cd94ca97b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -168,6 +168,24 @@ static int __init x86_mpx_setup(char *s) | |||
168 | } | 168 | } |
169 | __setup("nompx", x86_mpx_setup); | 169 | __setup("nompx", x86_mpx_setup); |
170 | 170 | ||
171 | #ifdef CONFIG_X86_64 | ||
172 | static int __init x86_pcid_setup(char *s) | ||
173 | { | ||
174 | /* require an exact match without trailing characters */ | ||
175 | if (strlen(s)) | ||
176 | return 0; | ||
177 | |||
178 | /* do not emit a message if the feature is not present */ | ||
179 | if (!boot_cpu_has(X86_FEATURE_PCID)) | ||
180 | return 1; | ||
181 | |||
182 | setup_clear_cpu_cap(X86_FEATURE_PCID); | ||
183 | pr_info("nopcid: PCID feature disabled\n"); | ||
184 | return 1; | ||
185 | } | ||
186 | __setup("nopcid", x86_pcid_setup); | ||
187 | #endif | ||
188 | |||
171 | static int __init x86_noinvpcid_setup(char *s) | 189 | static int __init x86_noinvpcid_setup(char *s) |
172 | { | 190 | { |
173 | /* noinvpcid doesn't accept parameters */ | 191 | /* noinvpcid doesn't accept parameters */ |
@@ -311,6 +329,25 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c) | |||
311 | } | 329 | } |
312 | } | 330 | } |
313 | 331 | ||
332 | static void setup_pcid(struct cpuinfo_x86 *c) | ||
333 | { | ||
334 | if (cpu_has(c, X86_FEATURE_PCID)) { | ||
335 | if (cpu_has(c, X86_FEATURE_PGE)) { | ||
336 | cr4_set_bits(X86_CR4_PCIDE); | ||
337 | } else { | ||
338 | /* | ||
339 | * flush_tlb_all(), as currently implemented, won't | ||
340 | * work if PCID is on but PGE is not. Since that | ||
341 | * combination doesn't exist on real hardware, there's | ||
342 | * no reason to try to fully support it, but it's | ||
343 | * polite to avoid corrupting data if we're on | ||
344 | * an improperly configured VM. | ||
345 | */ | ||
346 | clear_cpu_cap(c, X86_FEATURE_PCID); | ||
347 | } | ||
348 | } | ||
349 | } | ||
350 | |||
314 | /* | 351 | /* |
315 | * Protection Keys are not available in 32-bit mode. | 352 | * Protection Keys are not available in 32-bit mode. |
316 | */ | 353 | */ |
@@ -1125,6 +1162,9 @@ static void identify_cpu(struct cpuinfo_x86 *c) | |||
1125 | setup_smep(c); | 1162 | setup_smep(c); |
1126 | setup_smap(c); | 1163 | setup_smap(c); |
1127 | 1164 | ||
1165 | /* Set up PCID */ | ||
1166 | setup_pcid(c); | ||
1167 | |||
1128 | /* | 1168 | /* |
1129 | * The vendor-specific functions might have changed features. | 1169 | * The vendor-specific functions might have changed features. |
1130 | * Now we do "generic changes." | 1170 | * Now we do "generic changes." |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 6dde0497efc7..3b413065c613 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -51,6 +51,7 @@ | |||
51 | #include <asm/mce.h> | 51 | #include <asm/mce.h> |
52 | #include <asm/msr.h> | 52 | #include <asm/msr.h> |
53 | #include <asm/reboot.h> | 53 | #include <asm/reboot.h> |
54 | #include <asm/set_memory.h> | ||
54 | 55 | ||
55 | #include "mce-internal.h" | 56 | #include "mce-internal.h" |
56 | 57 | ||
@@ -1051,6 +1052,48 @@ static int do_memory_failure(struct mce *m) | |||
1051 | return ret; | 1052 | return ret; |
1052 | } | 1053 | } |
1053 | 1054 | ||
1055 | #if defined(arch_unmap_kpfn) && defined(CONFIG_MEMORY_FAILURE) | ||
1056 | |||
1057 | void arch_unmap_kpfn(unsigned long pfn) | ||
1058 | { | ||
1059 | unsigned long decoy_addr; | ||
1060 | |||
1061 | /* | ||
1062 | * Unmap this page from the kernel 1:1 mappings to make sure | ||
1063 | * we don't log more errors because of speculative access to | ||
1064 | * the page. | ||
1065 | * We would like to just call: | ||
1066 | * set_memory_np((unsigned long)pfn_to_kaddr(pfn), 1); | ||
1067 | * but doing that would radically increase the odds of a | ||
1068 | * speculative access to the posion page because we'd have | ||
1069 | * the virtual address of the kernel 1:1 mapping sitting | ||
1070 | * around in registers. | ||
1071 | * Instead we get tricky. We create a non-canonical address | ||
1072 | * that looks just like the one we want, but has bit 63 flipped. | ||
1073 | * This relies on set_memory_np() not checking whether we passed | ||
1074 | * a legal address. | ||
1075 | */ | ||
1076 | |||
1077 | /* | ||
1078 | * Build time check to see if we have a spare virtual bit. Don't want | ||
1079 | * to leave this until run time because most developers don't have a | ||
1080 | * system that can exercise this code path. This will only become a | ||
1081 | * problem if/when we move beyond 5-level page tables. | ||
1082 | * | ||
1083 | * Hard code "9" here because cpp doesn't grok ilog2(PTRS_PER_PGD) | ||
1084 | */ | ||
1085 | #if PGDIR_SHIFT + 9 < 63 | ||
1086 | decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63)); | ||
1087 | #else | ||
1088 | #error "no unused virtual bit available" | ||
1089 | #endif | ||
1090 | |||
1091 | if (set_memory_np(decoy_addr, 1)) | ||
1092 | pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn); | ||
1093 | |||
1094 | } | ||
1095 | #endif | ||
1096 | |||
1054 | /* | 1097 | /* |
1055 | * The actual machine check handler. This only handles real | 1098 | * The actual machine check handler. This only handles real |
1056 | * exceptions when something got corrupted coming in through int 18. | 1099 | * exceptions when something got corrupted coming in through int 18. |
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 23c23508c012..05459ad3db46 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c | |||
@@ -31,6 +31,7 @@ static const struct cpuid_bit cpuid_bits[] = { | |||
31 | { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 }, | 31 | { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 }, |
32 | { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 }, | 32 | { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 }, |
33 | { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 }, | 33 | { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 }, |
34 | { X86_FEATURE_SME, CPUID_EAX, 0, 0x8000001f, 0 }, | ||
34 | { 0, 0, 0, 0, 0 } | 35 | { 0, 0, 0, 0, 0 } |
35 | }; | 36 | }; |
36 | 37 | ||
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 532da61d605c..71c11ad5643e 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -96,7 +96,8 @@ EXPORT_SYMBOL_GPL(e820__mapped_any); | |||
96 | * Note: this function only works correctly once the E820 table is sorted and | 96 | * Note: this function only works correctly once the E820 table is sorted and |
97 | * not-overlapping (at least for the range specified), which is the case normally. | 97 | * not-overlapping (at least for the range specified), which is the case normally. |
98 | */ | 98 | */ |
99 | bool __init e820__mapped_all(u64 start, u64 end, enum e820_type type) | 99 | static struct e820_entry *__e820__mapped_all(u64 start, u64 end, |
100 | enum e820_type type) | ||
100 | { | 101 | { |
101 | int i; | 102 | int i; |
102 | 103 | ||
@@ -122,9 +123,28 @@ bool __init e820__mapped_all(u64 start, u64 end, enum e820_type type) | |||
122 | * coverage of the desired range exists: | 123 | * coverage of the desired range exists: |
123 | */ | 124 | */ |
124 | if (start >= end) | 125 | if (start >= end) |
125 | return 1; | 126 | return entry; |
126 | } | 127 | } |
127 | return 0; | 128 | |
129 | return NULL; | ||
130 | } | ||
131 | |||
132 | /* | ||
133 | * This function checks if the entire range <start,end> is mapped with type. | ||
134 | */ | ||
135 | bool __init e820__mapped_all(u64 start, u64 end, enum e820_type type) | ||
136 | { | ||
137 | return __e820__mapped_all(start, end, type); | ||
138 | } | ||
139 | |||
140 | /* | ||
141 | * This function returns the type associated with the range <start,end>. | ||
142 | */ | ||
143 | int e820__get_entry_type(u64 start, u64 end) | ||
144 | { | ||
145 | struct e820_entry *entry = __e820__mapped_all(start, end, 0); | ||
146 | |||
147 | return entry ? entry->type : -EINVAL; | ||
128 | } | 148 | } |
129 | 149 | ||
130 | /* | 150 | /* |
diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index 6b91e2eb8d3f..9c4e7ba6870c 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c | |||
@@ -195,7 +195,7 @@ void init_espfix_ap(int cpu) | |||
195 | 195 | ||
196 | pte_p = pte_offset_kernel(&pmd, addr); | 196 | pte_p = pte_offset_kernel(&pmd, addr); |
197 | stack_page = page_address(alloc_pages_node(node, GFP_KERNEL, 0)); | 197 | stack_page = page_address(alloc_pages_node(node, GFP_KERNEL, 0)); |
198 | pte = __pte(__pa(stack_page) | (__PAGE_KERNEL_RO & ptemask)); | 198 | pte = __pte(__pa(stack_page) | ((__PAGE_KERNEL_RO | _PAGE_ENC) & ptemask)); |
199 | for (n = 0; n < ESPFIX_PTE_CLONES; n++) | 199 | for (n = 0; n < ESPFIX_PTE_CLONES; n++) |
200 | set_pte(&pte_p[n*PTE_STRIDE], pte); | 200 | set_pte(&pte_p[n*PTE_STRIDE], pte); |
201 | 201 | ||
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 9ba79543d9ee..6a193b93fd95 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/start_kernel.h> | 14 | #include <linux/start_kernel.h> |
15 | #include <linux/io.h> | 15 | #include <linux/io.h> |
16 | #include <linux/memblock.h> | 16 | #include <linux/memblock.h> |
17 | #include <linux/mem_encrypt.h> | ||
17 | 18 | ||
18 | #include <asm/processor.h> | 19 | #include <asm/processor.h> |
19 | #include <asm/proto.h> | 20 | #include <asm/proto.h> |
@@ -33,7 +34,6 @@ | |||
33 | /* | 34 | /* |
34 | * Manage page tables very early on. | 35 | * Manage page tables very early on. |
35 | */ | 36 | */ |
36 | extern pgd_t early_top_pgt[PTRS_PER_PGD]; | ||
37 | extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD]; | 37 | extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD]; |
38 | static unsigned int __initdata next_early_pgt; | 38 | static unsigned int __initdata next_early_pgt; |
39 | pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX); | 39 | pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX); |
@@ -45,9 +45,11 @@ static void __head *fixup_pointer(void *ptr, unsigned long physaddr) | |||
45 | return ptr - (void *)_text + (void *)physaddr; | 45 | return ptr - (void *)_text + (void *)physaddr; |
46 | } | 46 | } |
47 | 47 | ||
48 | void __head __startup_64(unsigned long physaddr) | 48 | unsigned long __head __startup_64(unsigned long physaddr, |
49 | struct boot_params *bp) | ||
49 | { | 50 | { |
50 | unsigned long load_delta, *p; | 51 | unsigned long load_delta, *p; |
52 | unsigned long pgtable_flags; | ||
51 | pgdval_t *pgd; | 53 | pgdval_t *pgd; |
52 | p4dval_t *p4d; | 54 | p4dval_t *p4d; |
53 | pudval_t *pud; | 55 | pudval_t *pud; |
@@ -69,6 +71,12 @@ void __head __startup_64(unsigned long physaddr) | |||
69 | if (load_delta & ~PMD_PAGE_MASK) | 71 | if (load_delta & ~PMD_PAGE_MASK) |
70 | for (;;); | 72 | for (;;); |
71 | 73 | ||
74 | /* Activate Secure Memory Encryption (SME) if supported and enabled */ | ||
75 | sme_enable(bp); | ||
76 | |||
77 | /* Include the SME encryption mask in the fixup value */ | ||
78 | load_delta += sme_get_me_mask(); | ||
79 | |||
72 | /* Fixup the physical addresses in the page table */ | 80 | /* Fixup the physical addresses in the page table */ |
73 | 81 | ||
74 | pgd = fixup_pointer(&early_top_pgt, physaddr); | 82 | pgd = fixup_pointer(&early_top_pgt, physaddr); |
@@ -92,31 +100,35 @@ void __head __startup_64(unsigned long physaddr) | |||
92 | * creates a bunch of nonsense entries but that is fine -- | 100 | * creates a bunch of nonsense entries but that is fine -- |
93 | * it avoids problems around wraparound. | 101 | * it avoids problems around wraparound. |
94 | */ | 102 | */ |
103 | |||
95 | next_pgt_ptr = fixup_pointer(&next_early_pgt, physaddr); | 104 | next_pgt_ptr = fixup_pointer(&next_early_pgt, physaddr); |
96 | pud = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++], physaddr); | 105 | pud = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++], physaddr); |
97 | pmd = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++], physaddr); | 106 | pmd = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++], physaddr); |
98 | 107 | ||
108 | pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask(); | ||
109 | |||
99 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) { | 110 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) { |
100 | p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr); | 111 | p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr); |
101 | 112 | ||
102 | i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; | 113 | i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; |
103 | pgd[i + 0] = (pgdval_t)p4d + _KERNPG_TABLE; | 114 | pgd[i + 0] = (pgdval_t)p4d + pgtable_flags; |
104 | pgd[i + 1] = (pgdval_t)p4d + _KERNPG_TABLE; | 115 | pgd[i + 1] = (pgdval_t)p4d + pgtable_flags; |
105 | 116 | ||
106 | i = (physaddr >> P4D_SHIFT) % PTRS_PER_P4D; | 117 | i = (physaddr >> P4D_SHIFT) % PTRS_PER_P4D; |
107 | p4d[i + 0] = (pgdval_t)pud + _KERNPG_TABLE; | 118 | p4d[i + 0] = (pgdval_t)pud + pgtable_flags; |
108 | p4d[i + 1] = (pgdval_t)pud + _KERNPG_TABLE; | 119 | p4d[i + 1] = (pgdval_t)pud + pgtable_flags; |
109 | } else { | 120 | } else { |
110 | i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; | 121 | i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; |
111 | pgd[i + 0] = (pgdval_t)pud + _KERNPG_TABLE; | 122 | pgd[i + 0] = (pgdval_t)pud + pgtable_flags; |
112 | pgd[i + 1] = (pgdval_t)pud + _KERNPG_TABLE; | 123 | pgd[i + 1] = (pgdval_t)pud + pgtable_flags; |
113 | } | 124 | } |
114 | 125 | ||
115 | i = (physaddr >> PUD_SHIFT) % PTRS_PER_PUD; | 126 | i = (physaddr >> PUD_SHIFT) % PTRS_PER_PUD; |
116 | pud[i + 0] = (pudval_t)pmd + _KERNPG_TABLE; | 127 | pud[i + 0] = (pudval_t)pmd + pgtable_flags; |
117 | pud[i + 1] = (pudval_t)pmd + _KERNPG_TABLE; | 128 | pud[i + 1] = (pudval_t)pmd + pgtable_flags; |
118 | 129 | ||
119 | pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL; | 130 | pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL; |
131 | pmd_entry += sme_get_me_mask(); | ||
120 | pmd_entry += physaddr; | 132 | pmd_entry += physaddr; |
121 | 133 | ||
122 | for (i = 0; i < DIV_ROUND_UP(_end - _text, PMD_SIZE); i++) { | 134 | for (i = 0; i < DIV_ROUND_UP(_end - _text, PMD_SIZE); i++) { |
@@ -137,9 +149,30 @@ void __head __startup_64(unsigned long physaddr) | |||
137 | pmd[i] += load_delta; | 149 | pmd[i] += load_delta; |
138 | } | 150 | } |
139 | 151 | ||
140 | /* Fixup phys_base */ | 152 | /* |
153 | * Fixup phys_base - remove the memory encryption mask to obtain | ||
154 | * the true physical address. | ||
155 | */ | ||
141 | p = fixup_pointer(&phys_base, physaddr); | 156 | p = fixup_pointer(&phys_base, physaddr); |
142 | *p += load_delta; | 157 | *p += load_delta - sme_get_me_mask(); |
158 | |||
159 | /* Encrypt the kernel (if SME is active) */ | ||
160 | sme_encrypt_kernel(); | ||
161 | |||
162 | /* | ||
163 | * Return the SME encryption mask (if SME is active) to be used as a | ||
164 | * modifier for the initial pgdir entry programmed into CR3. | ||
165 | */ | ||
166 | return sme_get_me_mask(); | ||
167 | } | ||
168 | |||
169 | unsigned long __startup_secondary_64(void) | ||
170 | { | ||
171 | /* | ||
172 | * Return the SME encryption mask (if SME is active) to be used as a | ||
173 | * modifier for the initial pgdir entry programmed into CR3. | ||
174 | */ | ||
175 | return sme_get_me_mask(); | ||
143 | } | 176 | } |
144 | 177 | ||
145 | /* Wipe all early page tables except for the kernel symbol map */ | 178 | /* Wipe all early page tables except for the kernel symbol map */ |
@@ -147,17 +180,17 @@ static void __init reset_early_page_tables(void) | |||
147 | { | 180 | { |
148 | memset(early_top_pgt, 0, sizeof(pgd_t)*(PTRS_PER_PGD-1)); | 181 | memset(early_top_pgt, 0, sizeof(pgd_t)*(PTRS_PER_PGD-1)); |
149 | next_early_pgt = 0; | 182 | next_early_pgt = 0; |
150 | write_cr3(__pa_nodebug(early_top_pgt)); | 183 | write_cr3(__sme_pa_nodebug(early_top_pgt)); |
151 | } | 184 | } |
152 | 185 | ||
153 | /* Create a new PMD entry */ | 186 | /* Create a new PMD entry */ |
154 | int __init early_make_pgtable(unsigned long address) | 187 | int __init __early_make_pgtable(unsigned long address, pmdval_t pmd) |
155 | { | 188 | { |
156 | unsigned long physaddr = address - __PAGE_OFFSET; | 189 | unsigned long physaddr = address - __PAGE_OFFSET; |
157 | pgdval_t pgd, *pgd_p; | 190 | pgdval_t pgd, *pgd_p; |
158 | p4dval_t p4d, *p4d_p; | 191 | p4dval_t p4d, *p4d_p; |
159 | pudval_t pud, *pud_p; | 192 | pudval_t pud, *pud_p; |
160 | pmdval_t pmd, *pmd_p; | 193 | pmdval_t *pmd_p; |
161 | 194 | ||
162 | /* Invalid address or early pgt is done ? */ | 195 | /* Invalid address or early pgt is done ? */ |
163 | if (physaddr >= MAXMEM || read_cr3_pa() != __pa_nodebug(early_top_pgt)) | 196 | if (physaddr >= MAXMEM || read_cr3_pa() != __pa_nodebug(early_top_pgt)) |
@@ -216,12 +249,21 @@ again: | |||
216 | memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD); | 249 | memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD); |
217 | *pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE; | 250 | *pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE; |
218 | } | 251 | } |
219 | pmd = (physaddr & PMD_MASK) + early_pmd_flags; | ||
220 | pmd_p[pmd_index(address)] = pmd; | 252 | pmd_p[pmd_index(address)] = pmd; |
221 | 253 | ||
222 | return 0; | 254 | return 0; |
223 | } | 255 | } |
224 | 256 | ||
257 | int __init early_make_pgtable(unsigned long address) | ||
258 | { | ||
259 | unsigned long physaddr = address - __PAGE_OFFSET; | ||
260 | pmdval_t pmd; | ||
261 | |||
262 | pmd = (physaddr & PMD_MASK) + early_pmd_flags; | ||
263 | |||
264 | return __early_make_pgtable(address, pmd); | ||
265 | } | ||
266 | |||
225 | /* Don't add a printk in there. printk relies on the PDA which is not initialized | 267 | /* Don't add a printk in there. printk relies on the PDA which is not initialized |
226 | yet. */ | 268 | yet. */ |
227 | static void __init clear_bss(void) | 269 | static void __init clear_bss(void) |
@@ -244,6 +286,12 @@ static void __init copy_bootdata(char *real_mode_data) | |||
244 | char * command_line; | 286 | char * command_line; |
245 | unsigned long cmd_line_ptr; | 287 | unsigned long cmd_line_ptr; |
246 | 288 | ||
289 | /* | ||
290 | * If SME is active, this will create decrypted mappings of the | ||
291 | * boot data in advance of the copy operations. | ||
292 | */ | ||
293 | sme_map_bootdata(real_mode_data); | ||
294 | |||
247 | memcpy(&boot_params, real_mode_data, sizeof boot_params); | 295 | memcpy(&boot_params, real_mode_data, sizeof boot_params); |
248 | sanitize_boot_params(&boot_params); | 296 | sanitize_boot_params(&boot_params); |
249 | cmd_line_ptr = get_cmd_line_ptr(); | 297 | cmd_line_ptr = get_cmd_line_ptr(); |
@@ -251,6 +299,14 @@ static void __init copy_bootdata(char *real_mode_data) | |||
251 | command_line = __va(cmd_line_ptr); | 299 | command_line = __va(cmd_line_ptr); |
252 | memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); | 300 | memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); |
253 | } | 301 | } |
302 | |||
303 | /* | ||
304 | * The old boot data is no longer needed and won't be reserved, | ||
305 | * freeing up that memory for use by the system. If SME is active, | ||
306 | * we need to remove the mappings that were created so that the | ||
307 | * memory doesn't remain mapped as decrypted. | ||
308 | */ | ||
309 | sme_unmap_bootdata(real_mode_data); | ||
254 | } | 310 | } |
255 | 311 | ||
256 | asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) | 312 | asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) |
@@ -280,6 +336,13 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) | |||
280 | 336 | ||
281 | clear_page(init_top_pgt); | 337 | clear_page(init_top_pgt); |
282 | 338 | ||
339 | /* | ||
340 | * SME support may update early_pmd_flags to include the memory | ||
341 | * encryption mask, so it needs to be called before anything | ||
342 | * that may generate a page fault. | ||
343 | */ | ||
344 | sme_early_init(); | ||
345 | |||
283 | kasan_early_init(); | 346 | kasan_early_init(); |
284 | 347 | ||
285 | for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) | 348 | for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 6225550883df..513cbb012ecc 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -73,12 +73,19 @@ startup_64: | |||
73 | /* Sanitize CPU configuration */ | 73 | /* Sanitize CPU configuration */ |
74 | call verify_cpu | 74 | call verify_cpu |
75 | 75 | ||
76 | /* | ||
77 | * Perform pagetable fixups. Additionally, if SME is active, encrypt | ||
78 | * the kernel and retrieve the modifier (SME encryption mask if SME | ||
79 | * is active) to be added to the initial pgdir entry that will be | ||
80 | * programmed into CR3. | ||
81 | */ | ||
76 | leaq _text(%rip), %rdi | 82 | leaq _text(%rip), %rdi |
77 | pushq %rsi | 83 | pushq %rsi |
78 | call __startup_64 | 84 | call __startup_64 |
79 | popq %rsi | 85 | popq %rsi |
80 | 86 | ||
81 | movq $(early_top_pgt - __START_KERNEL_map), %rax | 87 | /* Form the CR3 value being sure to include the CR3 modifier */ |
88 | addq $(early_top_pgt - __START_KERNEL_map), %rax | ||
82 | jmp 1f | 89 | jmp 1f |
83 | ENTRY(secondary_startup_64) | 90 | ENTRY(secondary_startup_64) |
84 | /* | 91 | /* |
@@ -98,7 +105,16 @@ ENTRY(secondary_startup_64) | |||
98 | /* Sanitize CPU configuration */ | 105 | /* Sanitize CPU configuration */ |
99 | call verify_cpu | 106 | call verify_cpu |
100 | 107 | ||
101 | movq $(init_top_pgt - __START_KERNEL_map), %rax | 108 | /* |
109 | * Retrieve the modifier (SME encryption mask if SME is active) to be | ||
110 | * added to the initial pgdir entry that will be programmed into CR3. | ||
111 | */ | ||
112 | pushq %rsi | ||
113 | call __startup_secondary_64 | ||
114 | popq %rsi | ||
115 | |||
116 | /* Form the CR3 value being sure to include the CR3 modifier */ | ||
117 | addq $(init_top_pgt - __START_KERNEL_map), %rax | ||
102 | 1: | 118 | 1: |
103 | 119 | ||
104 | /* Enable PAE mode, PGE and LA57 */ | 120 | /* Enable PAE mode, PGE and LA57 */ |
@@ -335,9 +351,9 @@ GLOBAL(name) | |||
335 | NEXT_PAGE(early_top_pgt) | 351 | NEXT_PAGE(early_top_pgt) |
336 | .fill 511,8,0 | 352 | .fill 511,8,0 |
337 | #ifdef CONFIG_X86_5LEVEL | 353 | #ifdef CONFIG_X86_5LEVEL |
338 | .quad level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE | 354 | .quad level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC |
339 | #else | 355 | #else |
340 | .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE | 356 | .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC |
341 | #endif | 357 | #endif |
342 | 358 | ||
343 | NEXT_PAGE(early_dynamic_pgts) | 359 | NEXT_PAGE(early_dynamic_pgts) |
@@ -350,15 +366,15 @@ NEXT_PAGE(init_top_pgt) | |||
350 | .fill 512,8,0 | 366 | .fill 512,8,0 |
351 | #else | 367 | #else |
352 | NEXT_PAGE(init_top_pgt) | 368 | NEXT_PAGE(init_top_pgt) |
353 | .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE | 369 | .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC |
354 | .org init_top_pgt + PGD_PAGE_OFFSET*8, 0 | 370 | .org init_top_pgt + PGD_PAGE_OFFSET*8, 0 |
355 | .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE | 371 | .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC |
356 | .org init_top_pgt + PGD_START_KERNEL*8, 0 | 372 | .org init_top_pgt + PGD_START_KERNEL*8, 0 |
357 | /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ | 373 | /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ |
358 | .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE | 374 | .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC |
359 | 375 | ||
360 | NEXT_PAGE(level3_ident_pgt) | 376 | NEXT_PAGE(level3_ident_pgt) |
361 | .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE | 377 | .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC |
362 | .fill 511, 8, 0 | 378 | .fill 511, 8, 0 |
363 | NEXT_PAGE(level2_ident_pgt) | 379 | NEXT_PAGE(level2_ident_pgt) |
364 | /* Since I easily can, map the first 1G. | 380 | /* Since I easily can, map the first 1G. |
@@ -370,14 +386,14 @@ NEXT_PAGE(level2_ident_pgt) | |||
370 | #ifdef CONFIG_X86_5LEVEL | 386 | #ifdef CONFIG_X86_5LEVEL |
371 | NEXT_PAGE(level4_kernel_pgt) | 387 | NEXT_PAGE(level4_kernel_pgt) |
372 | .fill 511,8,0 | 388 | .fill 511,8,0 |
373 | .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE | 389 | .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC |
374 | #endif | 390 | #endif |
375 | 391 | ||
376 | NEXT_PAGE(level3_kernel_pgt) | 392 | NEXT_PAGE(level3_kernel_pgt) |
377 | .fill L3_START_KERNEL,8,0 | 393 | .fill L3_START_KERNEL,8,0 |
378 | /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */ | 394 | /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */ |
379 | .quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE | 395 | .quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC |
380 | .quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE | 396 | .quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC |
381 | 397 | ||
382 | NEXT_PAGE(level2_kernel_pgt) | 398 | NEXT_PAGE(level2_kernel_pgt) |
383 | /* | 399 | /* |
@@ -395,7 +411,7 @@ NEXT_PAGE(level2_kernel_pgt) | |||
395 | 411 | ||
396 | NEXT_PAGE(level2_fixmap_pgt) | 412 | NEXT_PAGE(level2_fixmap_pgt) |
397 | .fill 506,8,0 | 413 | .fill 506,8,0 |
398 | .quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE | 414 | .quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC |
399 | /* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */ | 415 | /* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */ |
400 | .fill 5,8,0 | 416 | .fill 5,8,0 |
401 | 417 | ||
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index 38b64587b31b..fd6f8fbbe6f2 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c | |||
@@ -33,7 +33,6 @@ static ssize_t setup_data_read(struct file *file, char __user *user_buf, | |||
33 | struct setup_data_node *node = file->private_data; | 33 | struct setup_data_node *node = file->private_data; |
34 | unsigned long remain; | 34 | unsigned long remain; |
35 | loff_t pos = *ppos; | 35 | loff_t pos = *ppos; |
36 | struct page *pg; | ||
37 | void *p; | 36 | void *p; |
38 | u64 pa; | 37 | u64 pa; |
39 | 38 | ||
@@ -47,18 +46,13 @@ static ssize_t setup_data_read(struct file *file, char __user *user_buf, | |||
47 | count = node->len - pos; | 46 | count = node->len - pos; |
48 | 47 | ||
49 | pa = node->paddr + sizeof(struct setup_data) + pos; | 48 | pa = node->paddr + sizeof(struct setup_data) + pos; |
50 | pg = pfn_to_page((pa + count - 1) >> PAGE_SHIFT); | 49 | p = memremap(pa, count, MEMREMAP_WB); |
51 | if (PageHighMem(pg)) { | 50 | if (!p) |
52 | p = ioremap_cache(pa, count); | 51 | return -ENOMEM; |
53 | if (!p) | ||
54 | return -ENXIO; | ||
55 | } else | ||
56 | p = __va(pa); | ||
57 | 52 | ||
58 | remain = copy_to_user(user_buf, p, count); | 53 | remain = copy_to_user(user_buf, p, count); |
59 | 54 | ||
60 | if (PageHighMem(pg)) | 55 | memunmap(p); |
61 | iounmap(p); | ||
62 | 56 | ||
63 | if (remain) | 57 | if (remain) |
64 | return -EFAULT; | 58 | return -EFAULT; |
@@ -109,7 +103,6 @@ static int __init create_setup_data_nodes(struct dentry *parent) | |||
109 | struct setup_data *data; | 103 | struct setup_data *data; |
110 | int error; | 104 | int error; |
111 | struct dentry *d; | 105 | struct dentry *d; |
112 | struct page *pg; | ||
113 | u64 pa_data; | 106 | u64 pa_data; |
114 | int no = 0; | 107 | int no = 0; |
115 | 108 | ||
@@ -126,16 +119,12 @@ static int __init create_setup_data_nodes(struct dentry *parent) | |||
126 | goto err_dir; | 119 | goto err_dir; |
127 | } | 120 | } |
128 | 121 | ||
129 | pg = pfn_to_page((pa_data+sizeof(*data)-1) >> PAGE_SHIFT); | 122 | data = memremap(pa_data, sizeof(*data), MEMREMAP_WB); |
130 | if (PageHighMem(pg)) { | 123 | if (!data) { |
131 | data = ioremap_cache(pa_data, sizeof(*data)); | 124 | kfree(node); |
132 | if (!data) { | 125 | error = -ENOMEM; |
133 | kfree(node); | 126 | goto err_dir; |
134 | error = -ENXIO; | 127 | } |
135 | goto err_dir; | ||
136 | } | ||
137 | } else | ||
138 | data = __va(pa_data); | ||
139 | 128 | ||
140 | node->paddr = pa_data; | 129 | node->paddr = pa_data; |
141 | node->type = data->type; | 130 | node->type = data->type; |
@@ -143,8 +132,7 @@ static int __init create_setup_data_nodes(struct dentry *parent) | |||
143 | error = create_setup_data_node(d, no, node); | 132 | error = create_setup_data_node(d, no, node); |
144 | pa_data = data->next; | 133 | pa_data = data->next; |
145 | 134 | ||
146 | if (PageHighMem(pg)) | 135 | memunmap(data); |
147 | iounmap(data); | ||
148 | if (error) | 136 | if (error) |
149 | goto err_dir; | 137 | goto err_dir; |
150 | no++; | 138 | no++; |
diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c index 06e1ff5562c0..4b0592ca9e47 100644 --- a/arch/x86/kernel/ksysfs.c +++ b/arch/x86/kernel/ksysfs.c | |||
@@ -16,8 +16,8 @@ | |||
16 | #include <linux/stat.h> | 16 | #include <linux/stat.h> |
17 | #include <linux/slab.h> | 17 | #include <linux/slab.h> |
18 | #include <linux/mm.h> | 18 | #include <linux/mm.h> |
19 | #include <linux/io.h> | ||
19 | 20 | ||
20 | #include <asm/io.h> | ||
21 | #include <asm/setup.h> | 21 | #include <asm/setup.h> |
22 | 22 | ||
23 | static ssize_t version_show(struct kobject *kobj, | 23 | static ssize_t version_show(struct kobject *kobj, |
@@ -79,12 +79,12 @@ static int get_setup_data_paddr(int nr, u64 *paddr) | |||
79 | *paddr = pa_data; | 79 | *paddr = pa_data; |
80 | return 0; | 80 | return 0; |
81 | } | 81 | } |
82 | data = ioremap_cache(pa_data, sizeof(*data)); | 82 | data = memremap(pa_data, sizeof(*data), MEMREMAP_WB); |
83 | if (!data) | 83 | if (!data) |
84 | return -ENOMEM; | 84 | return -ENOMEM; |
85 | 85 | ||
86 | pa_data = data->next; | 86 | pa_data = data->next; |
87 | iounmap(data); | 87 | memunmap(data); |
88 | i++; | 88 | i++; |
89 | } | 89 | } |
90 | return -EINVAL; | 90 | return -EINVAL; |
@@ -97,17 +97,17 @@ static int __init get_setup_data_size(int nr, size_t *size) | |||
97 | u64 pa_data = boot_params.hdr.setup_data; | 97 | u64 pa_data = boot_params.hdr.setup_data; |
98 | 98 | ||
99 | while (pa_data) { | 99 | while (pa_data) { |
100 | data = ioremap_cache(pa_data, sizeof(*data)); | 100 | data = memremap(pa_data, sizeof(*data), MEMREMAP_WB); |
101 | if (!data) | 101 | if (!data) |
102 | return -ENOMEM; | 102 | return -ENOMEM; |
103 | if (nr == i) { | 103 | if (nr == i) { |
104 | *size = data->len; | 104 | *size = data->len; |
105 | iounmap(data); | 105 | memunmap(data); |
106 | return 0; | 106 | return 0; |
107 | } | 107 | } |
108 | 108 | ||
109 | pa_data = data->next; | 109 | pa_data = data->next; |
110 | iounmap(data); | 110 | memunmap(data); |
111 | i++; | 111 | i++; |
112 | } | 112 | } |
113 | return -EINVAL; | 113 | return -EINVAL; |
@@ -127,12 +127,12 @@ static ssize_t type_show(struct kobject *kobj, | |||
127 | ret = get_setup_data_paddr(nr, &paddr); | 127 | ret = get_setup_data_paddr(nr, &paddr); |
128 | if (ret) | 128 | if (ret) |
129 | return ret; | 129 | return ret; |
130 | data = ioremap_cache(paddr, sizeof(*data)); | 130 | data = memremap(paddr, sizeof(*data), MEMREMAP_WB); |
131 | if (!data) | 131 | if (!data) |
132 | return -ENOMEM; | 132 | return -ENOMEM; |
133 | 133 | ||
134 | ret = sprintf(buf, "0x%x\n", data->type); | 134 | ret = sprintf(buf, "0x%x\n", data->type); |
135 | iounmap(data); | 135 | memunmap(data); |
136 | return ret; | 136 | return ret; |
137 | } | 137 | } |
138 | 138 | ||
@@ -154,7 +154,7 @@ static ssize_t setup_data_data_read(struct file *fp, | |||
154 | ret = get_setup_data_paddr(nr, &paddr); | 154 | ret = get_setup_data_paddr(nr, &paddr); |
155 | if (ret) | 155 | if (ret) |
156 | return ret; | 156 | return ret; |
157 | data = ioremap_cache(paddr, sizeof(*data)); | 157 | data = memremap(paddr, sizeof(*data), MEMREMAP_WB); |
158 | if (!data) | 158 | if (!data) |
159 | return -ENOMEM; | 159 | return -ENOMEM; |
160 | 160 | ||
@@ -170,15 +170,15 @@ static ssize_t setup_data_data_read(struct file *fp, | |||
170 | goto out; | 170 | goto out; |
171 | 171 | ||
172 | ret = count; | 172 | ret = count; |
173 | p = ioremap_cache(paddr + sizeof(*data), data->len); | 173 | p = memremap(paddr + sizeof(*data), data->len, MEMREMAP_WB); |
174 | if (!p) { | 174 | if (!p) { |
175 | ret = -ENOMEM; | 175 | ret = -ENOMEM; |
176 | goto out; | 176 | goto out; |
177 | } | 177 | } |
178 | memcpy(buf, p + off, count); | 178 | memcpy(buf, p + off, count); |
179 | iounmap(p); | 179 | memunmap(p); |
180 | out: | 180 | out: |
181 | iounmap(data); | 181 | memunmap(data); |
182 | return ret; | 182 | return ret; |
183 | } | 183 | } |
184 | 184 | ||
@@ -250,13 +250,13 @@ static int __init get_setup_data_total_num(u64 pa_data, int *nr) | |||
250 | *nr = 0; | 250 | *nr = 0; |
251 | while (pa_data) { | 251 | while (pa_data) { |
252 | *nr += 1; | 252 | *nr += 1; |
253 | data = ioremap_cache(pa_data, sizeof(*data)); | 253 | data = memremap(pa_data, sizeof(*data), MEMREMAP_WB); |
254 | if (!data) { | 254 | if (!data) { |
255 | ret = -ENOMEM; | 255 | ret = -ENOMEM; |
256 | goto out; | 256 | goto out; |
257 | } | 257 | } |
258 | pa_data = data->next; | 258 | pa_data = data->next; |
259 | iounmap(data); | 259 | memunmap(data); |
260 | } | 260 | } |
261 | 261 | ||
262 | out: | 262 | out: |
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index cb0a30473c23..1f790cf9d38f 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c | |||
@@ -87,7 +87,7 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd) | |||
87 | set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE)); | 87 | set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE)); |
88 | } | 88 | } |
89 | pte = pte_offset_kernel(pmd, vaddr); | 89 | pte = pte_offset_kernel(pmd, vaddr); |
90 | set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC)); | 90 | set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC_NOENC)); |
91 | return 0; | 91 | return 0; |
92 | err: | 92 | err: |
93 | free_transition_pgtable(image); | 93 | free_transition_pgtable(image); |
@@ -115,6 +115,7 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable) | |||
115 | .alloc_pgt_page = alloc_pgt_page, | 115 | .alloc_pgt_page = alloc_pgt_page, |
116 | .context = image, | 116 | .context = image, |
117 | .page_flag = __PAGE_KERNEL_LARGE_EXEC, | 117 | .page_flag = __PAGE_KERNEL_LARGE_EXEC, |
118 | .kernpg_flag = _KERNPG_TABLE_NOENC, | ||
118 | }; | 119 | }; |
119 | unsigned long mstart, mend; | 120 | unsigned long mstart, mend; |
120 | pgd_t *level4p; | 121 | pgd_t *level4p; |
@@ -334,7 +335,8 @@ void machine_kexec(struct kimage *image) | |||
334 | image->start = relocate_kernel((unsigned long)image->head, | 335 | image->start = relocate_kernel((unsigned long)image->head, |
335 | (unsigned long)page_list, | 336 | (unsigned long)page_list, |
336 | image->start, | 337 | image->start, |
337 | image->preserve_context); | 338 | image->preserve_context, |
339 | sme_active()); | ||
338 | 340 | ||
339 | #ifdef CONFIG_KEXEC_JUMP | 341 | #ifdef CONFIG_KEXEC_JUMP |
340 | if (image->preserve_context) | 342 | if (image->preserve_context) |
@@ -602,3 +604,22 @@ void arch_kexec_unprotect_crashkres(void) | |||
602 | { | 604 | { |
603 | kexec_mark_crashkres(false); | 605 | kexec_mark_crashkres(false); |
604 | } | 606 | } |
607 | |||
608 | int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, gfp_t gfp) | ||
609 | { | ||
610 | /* | ||
611 | * If SME is active we need to be sure that kexec pages are | ||
612 | * not encrypted because when we boot to the new kernel the | ||
613 | * pages won't be accessed encrypted (initially). | ||
614 | */ | ||
615 | return set_memory_decrypted((unsigned long)vaddr, pages); | ||
616 | } | ||
617 | |||
618 | void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) | ||
619 | { | ||
620 | /* | ||
621 | * If SME is active we need to reset the pages back to being | ||
622 | * an encrypted mapping before freeing them. | ||
623 | */ | ||
624 | set_memory_encrypted((unsigned long)vaddr, pages); | ||
625 | } | ||
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 0d904d759ff1..5cbb3177ed17 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
@@ -429,16 +429,16 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) | |||
429 | } | 429 | } |
430 | } | 430 | } |
431 | 431 | ||
432 | static struct mpf_intel *mpf_found; | 432 | static unsigned long mpf_base; |
433 | 433 | ||
434 | static unsigned long __init get_mpc_size(unsigned long physptr) | 434 | static unsigned long __init get_mpc_size(unsigned long physptr) |
435 | { | 435 | { |
436 | struct mpc_table *mpc; | 436 | struct mpc_table *mpc; |
437 | unsigned long size; | 437 | unsigned long size; |
438 | 438 | ||
439 | mpc = early_ioremap(physptr, PAGE_SIZE); | 439 | mpc = early_memremap(physptr, PAGE_SIZE); |
440 | size = mpc->length; | 440 | size = mpc->length; |
441 | early_iounmap(mpc, PAGE_SIZE); | 441 | early_memunmap(mpc, PAGE_SIZE); |
442 | apic_printk(APIC_VERBOSE, " mpc: %lx-%lx\n", physptr, physptr + size); | 442 | apic_printk(APIC_VERBOSE, " mpc: %lx-%lx\n", physptr, physptr + size); |
443 | 443 | ||
444 | return size; | 444 | return size; |
@@ -450,7 +450,8 @@ static int __init check_physptr(struct mpf_intel *mpf, unsigned int early) | |||
450 | unsigned long size; | 450 | unsigned long size; |
451 | 451 | ||
452 | size = get_mpc_size(mpf->physptr); | 452 | size = get_mpc_size(mpf->physptr); |
453 | mpc = early_ioremap(mpf->physptr, size); | 453 | mpc = early_memremap(mpf->physptr, size); |
454 | |||
454 | /* | 455 | /* |
455 | * Read the physical hardware table. Anything here will | 456 | * Read the physical hardware table. Anything here will |
456 | * override the defaults. | 457 | * override the defaults. |
@@ -461,10 +462,10 @@ static int __init check_physptr(struct mpf_intel *mpf, unsigned int early) | |||
461 | #endif | 462 | #endif |
462 | pr_err("BIOS bug, MP table errors detected!...\n"); | 463 | pr_err("BIOS bug, MP table errors detected!...\n"); |
463 | pr_cont("... disabling SMP support. (tell your hw vendor)\n"); | 464 | pr_cont("... disabling SMP support. (tell your hw vendor)\n"); |
464 | early_iounmap(mpc, size); | 465 | early_memunmap(mpc, size); |
465 | return -1; | 466 | return -1; |
466 | } | 467 | } |
467 | early_iounmap(mpc, size); | 468 | early_memunmap(mpc, size); |
468 | 469 | ||
469 | if (early) | 470 | if (early) |
470 | return -1; | 471 | return -1; |
@@ -497,12 +498,12 @@ static int __init check_physptr(struct mpf_intel *mpf, unsigned int early) | |||
497 | */ | 498 | */ |
498 | void __init default_get_smp_config(unsigned int early) | 499 | void __init default_get_smp_config(unsigned int early) |
499 | { | 500 | { |
500 | struct mpf_intel *mpf = mpf_found; | 501 | struct mpf_intel *mpf; |
501 | 502 | ||
502 | if (!smp_found_config) | 503 | if (!smp_found_config) |
503 | return; | 504 | return; |
504 | 505 | ||
505 | if (!mpf) | 506 | if (!mpf_base) |
506 | return; | 507 | return; |
507 | 508 | ||
508 | if (acpi_lapic && early) | 509 | if (acpi_lapic && early) |
@@ -515,6 +516,12 @@ void __init default_get_smp_config(unsigned int early) | |||
515 | if (acpi_lapic && acpi_ioapic) | 516 | if (acpi_lapic && acpi_ioapic) |
516 | return; | 517 | return; |
517 | 518 | ||
519 | mpf = early_memremap(mpf_base, sizeof(*mpf)); | ||
520 | if (!mpf) { | ||
521 | pr_err("MPTABLE: error mapping MP table\n"); | ||
522 | return; | ||
523 | } | ||
524 | |||
518 | pr_info("Intel MultiProcessor Specification v1.%d\n", | 525 | pr_info("Intel MultiProcessor Specification v1.%d\n", |
519 | mpf->specification); | 526 | mpf->specification); |
520 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) | 527 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) |
@@ -529,7 +536,7 @@ void __init default_get_smp_config(unsigned int early) | |||
529 | /* | 536 | /* |
530 | * Now see if we need to read further. | 537 | * Now see if we need to read further. |
531 | */ | 538 | */ |
532 | if (mpf->feature1 != 0) { | 539 | if (mpf->feature1) { |
533 | if (early) { | 540 | if (early) { |
534 | /* | 541 | /* |
535 | * local APIC has default address | 542 | * local APIC has default address |
@@ -542,8 +549,10 @@ void __init default_get_smp_config(unsigned int early) | |||
542 | construct_default_ISA_mptable(mpf->feature1); | 549 | construct_default_ISA_mptable(mpf->feature1); |
543 | 550 | ||
544 | } else if (mpf->physptr) { | 551 | } else if (mpf->physptr) { |
545 | if (check_physptr(mpf, early)) | 552 | if (check_physptr(mpf, early)) { |
553 | early_memunmap(mpf, sizeof(*mpf)); | ||
546 | return; | 554 | return; |
555 | } | ||
547 | } else | 556 | } else |
548 | BUG(); | 557 | BUG(); |
549 | 558 | ||
@@ -552,6 +561,8 @@ void __init default_get_smp_config(unsigned int early) | |||
552 | /* | 561 | /* |
553 | * Only use the first configuration found. | 562 | * Only use the first configuration found. |
554 | */ | 563 | */ |
564 | |||
565 | early_memunmap(mpf, sizeof(*mpf)); | ||
555 | } | 566 | } |
556 | 567 | ||
557 | static void __init smp_reserve_memory(struct mpf_intel *mpf) | 568 | static void __init smp_reserve_memory(struct mpf_intel *mpf) |
@@ -561,15 +572,16 @@ static void __init smp_reserve_memory(struct mpf_intel *mpf) | |||
561 | 572 | ||
562 | static int __init smp_scan_config(unsigned long base, unsigned long length) | 573 | static int __init smp_scan_config(unsigned long base, unsigned long length) |
563 | { | 574 | { |
564 | unsigned int *bp = phys_to_virt(base); | 575 | unsigned int *bp; |
565 | struct mpf_intel *mpf; | 576 | struct mpf_intel *mpf; |
566 | unsigned long mem; | 577 | int ret = 0; |
567 | 578 | ||
568 | apic_printk(APIC_VERBOSE, "Scan for SMP in [mem %#010lx-%#010lx]\n", | 579 | apic_printk(APIC_VERBOSE, "Scan for SMP in [mem %#010lx-%#010lx]\n", |
569 | base, base + length - 1); | 580 | base, base + length - 1); |
570 | BUILD_BUG_ON(sizeof(*mpf) != 16); | 581 | BUILD_BUG_ON(sizeof(*mpf) != 16); |
571 | 582 | ||
572 | while (length > 0) { | 583 | while (length > 0) { |
584 | bp = early_memremap(base, length); | ||
573 | mpf = (struct mpf_intel *)bp; | 585 | mpf = (struct mpf_intel *)bp; |
574 | if ((*bp == SMP_MAGIC_IDENT) && | 586 | if ((*bp == SMP_MAGIC_IDENT) && |
575 | (mpf->length == 1) && | 587 | (mpf->length == 1) && |
@@ -579,24 +591,26 @@ static int __init smp_scan_config(unsigned long base, unsigned long length) | |||
579 | #ifdef CONFIG_X86_LOCAL_APIC | 591 | #ifdef CONFIG_X86_LOCAL_APIC |
580 | smp_found_config = 1; | 592 | smp_found_config = 1; |
581 | #endif | 593 | #endif |
582 | mpf_found = mpf; | 594 | mpf_base = base; |
583 | 595 | ||
584 | pr_info("found SMP MP-table at [mem %#010llx-%#010llx] mapped at [%p]\n", | 596 | pr_info("found SMP MP-table at [mem %#010lx-%#010lx] mapped at [%p]\n", |
585 | (unsigned long long) virt_to_phys(mpf), | 597 | base, base + sizeof(*mpf) - 1, mpf); |
586 | (unsigned long long) virt_to_phys(mpf) + | ||
587 | sizeof(*mpf) - 1, mpf); | ||
588 | 598 | ||
589 | mem = virt_to_phys(mpf); | 599 | memblock_reserve(base, sizeof(*mpf)); |
590 | memblock_reserve(mem, sizeof(*mpf)); | ||
591 | if (mpf->physptr) | 600 | if (mpf->physptr) |
592 | smp_reserve_memory(mpf); | 601 | smp_reserve_memory(mpf); |
593 | 602 | ||
594 | return 1; | 603 | ret = 1; |
595 | } | 604 | } |
596 | bp += 4; | 605 | early_memunmap(bp, length); |
606 | |||
607 | if (ret) | ||
608 | break; | ||
609 | |||
610 | base += 16; | ||
597 | length -= 16; | 611 | length -= 16; |
598 | } | 612 | } |
599 | return 0; | 613 | return ret; |
600 | } | 614 | } |
601 | 615 | ||
602 | void __init default_find_smp_config(void) | 616 | void __init default_find_smp_config(void) |
@@ -838,29 +852,40 @@ static int __init update_mp_table(void) | |||
838 | char oem[10]; | 852 | char oem[10]; |
839 | struct mpf_intel *mpf; | 853 | struct mpf_intel *mpf; |
840 | struct mpc_table *mpc, *mpc_new; | 854 | struct mpc_table *mpc, *mpc_new; |
855 | unsigned long size; | ||
841 | 856 | ||
842 | if (!enable_update_mptable) | 857 | if (!enable_update_mptable) |
843 | return 0; | 858 | return 0; |
844 | 859 | ||
845 | mpf = mpf_found; | 860 | if (!mpf_base) |
846 | if (!mpf) | ||
847 | return 0; | 861 | return 0; |
848 | 862 | ||
863 | mpf = early_memremap(mpf_base, sizeof(*mpf)); | ||
864 | if (!mpf) { | ||
865 | pr_err("MPTABLE: mpf early_memremap() failed\n"); | ||
866 | return 0; | ||
867 | } | ||
868 | |||
849 | /* | 869 | /* |
850 | * Now see if we need to go further. | 870 | * Now see if we need to go further. |
851 | */ | 871 | */ |
852 | if (mpf->feature1 != 0) | 872 | if (mpf->feature1) |
853 | return 0; | 873 | goto do_unmap_mpf; |
854 | 874 | ||
855 | if (!mpf->physptr) | 875 | if (!mpf->physptr) |
856 | return 0; | 876 | goto do_unmap_mpf; |
857 | 877 | ||
858 | mpc = phys_to_virt(mpf->physptr); | 878 | size = get_mpc_size(mpf->physptr); |
879 | mpc = early_memremap(mpf->physptr, size); | ||
880 | if (!mpc) { | ||
881 | pr_err("MPTABLE: mpc early_memremap() failed\n"); | ||
882 | goto do_unmap_mpf; | ||
883 | } | ||
859 | 884 | ||
860 | if (!smp_check_mpc(mpc, oem, str)) | 885 | if (!smp_check_mpc(mpc, oem, str)) |
861 | return 0; | 886 | goto do_unmap_mpc; |
862 | 887 | ||
863 | pr_info("mpf: %llx\n", (u64)virt_to_phys(mpf)); | 888 | pr_info("mpf: %llx\n", (u64)mpf_base); |
864 | pr_info("physptr: %x\n", mpf->physptr); | 889 | pr_info("physptr: %x\n", mpf->physptr); |
865 | 890 | ||
866 | if (mpc_new_phys && mpc->length > mpc_new_length) { | 891 | if (mpc_new_phys && mpc->length > mpc_new_length) { |
@@ -878,21 +903,32 @@ static int __init update_mp_table(void) | |||
878 | new = mpf_checksum((unsigned char *)mpc, mpc->length); | 903 | new = mpf_checksum((unsigned char *)mpc, mpc->length); |
879 | if (old == new) { | 904 | if (old == new) { |
880 | pr_info("mpc is readonly, please try alloc_mptable instead\n"); | 905 | pr_info("mpc is readonly, please try alloc_mptable instead\n"); |
881 | return 0; | 906 | goto do_unmap_mpc; |
882 | } | 907 | } |
883 | pr_info("use in-position replacing\n"); | 908 | pr_info("use in-position replacing\n"); |
884 | } else { | 909 | } else { |
910 | mpc_new = early_memremap(mpc_new_phys, mpc_new_length); | ||
911 | if (!mpc_new) { | ||
912 | pr_err("MPTABLE: new mpc early_memremap() failed\n"); | ||
913 | goto do_unmap_mpc; | ||
914 | } | ||
885 | mpf->physptr = mpc_new_phys; | 915 | mpf->physptr = mpc_new_phys; |
886 | mpc_new = phys_to_virt(mpc_new_phys); | ||
887 | memcpy(mpc_new, mpc, mpc->length); | 916 | memcpy(mpc_new, mpc, mpc->length); |
917 | early_memunmap(mpc, size); | ||
888 | mpc = mpc_new; | 918 | mpc = mpc_new; |
919 | size = mpc_new_length; | ||
889 | /* check if we can modify that */ | 920 | /* check if we can modify that */ |
890 | if (mpc_new_phys - mpf->physptr) { | 921 | if (mpc_new_phys - mpf->physptr) { |
891 | struct mpf_intel *mpf_new; | 922 | struct mpf_intel *mpf_new; |
892 | /* steal 16 bytes from [0, 1k) */ | 923 | /* steal 16 bytes from [0, 1k) */ |
924 | mpf_new = early_memremap(0x400 - 16, sizeof(*mpf_new)); | ||
925 | if (!mpf_new) { | ||
926 | pr_err("MPTABLE: new mpf early_memremap() failed\n"); | ||
927 | goto do_unmap_mpc; | ||
928 | } | ||
893 | pr_info("mpf new: %x\n", 0x400 - 16); | 929 | pr_info("mpf new: %x\n", 0x400 - 16); |
894 | mpf_new = phys_to_virt(0x400 - 16); | ||
895 | memcpy(mpf_new, mpf, 16); | 930 | memcpy(mpf_new, mpf, 16); |
931 | early_memunmap(mpf, sizeof(*mpf)); | ||
896 | mpf = mpf_new; | 932 | mpf = mpf_new; |
897 | mpf->physptr = mpc_new_phys; | 933 | mpf->physptr = mpc_new_phys; |
898 | } | 934 | } |
@@ -909,6 +945,12 @@ static int __init update_mp_table(void) | |||
909 | */ | 945 | */ |
910 | replace_intsrc_all(mpc, mpc_new_phys, mpc_new_length); | 946 | replace_intsrc_all(mpc, mpc_new_phys, mpc_new_length); |
911 | 947 | ||
948 | do_unmap_mpc: | ||
949 | early_memunmap(mpc, size); | ||
950 | |||
951 | do_unmap_mpf: | ||
952 | early_memunmap(mpf, sizeof(*mpf)); | ||
953 | |||
912 | return 0; | 954 | return 0; |
913 | } | 955 | } |
914 | 956 | ||
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 5e16d3f29594..0accc2404b92 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -93,9 +93,12 @@ again: | |||
93 | if (gfpflags_allow_blocking(flag)) { | 93 | if (gfpflags_allow_blocking(flag)) { |
94 | page = dma_alloc_from_contiguous(dev, count, get_order(size), | 94 | page = dma_alloc_from_contiguous(dev, count, get_order(size), |
95 | flag); | 95 | flag); |
96 | if (page && page_to_phys(page) + size > dma_mask) { | 96 | if (page) { |
97 | dma_release_from_contiguous(dev, page, count); | 97 | addr = phys_to_dma(dev, page_to_phys(page)); |
98 | page = NULL; | 98 | if (addr + size > dma_mask) { |
99 | dma_release_from_contiguous(dev, page, count); | ||
100 | page = NULL; | ||
101 | } | ||
99 | } | 102 | } |
100 | } | 103 | } |
101 | /* fallback */ | 104 | /* fallback */ |
@@ -104,7 +107,7 @@ again: | |||
104 | if (!page) | 107 | if (!page) |
105 | return NULL; | 108 | return NULL; |
106 | 109 | ||
107 | addr = page_to_phys(page); | 110 | addr = phys_to_dma(dev, page_to_phys(page)); |
108 | if (addr + size > dma_mask) { | 111 | if (addr + size > dma_mask) { |
109 | __free_pages(page, get_order(size)); | 112 | __free_pages(page, get_order(size)); |
110 | 113 | ||
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index a6d404087fe3..4fc3cb60ea11 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c | |||
@@ -32,7 +32,7 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page, | |||
32 | enum dma_data_direction dir, | 32 | enum dma_data_direction dir, |
33 | unsigned long attrs) | 33 | unsigned long attrs) |
34 | { | 34 | { |
35 | dma_addr_t bus = page_to_phys(page) + offset; | 35 | dma_addr_t bus = phys_to_dma(dev, page_to_phys(page)) + offset; |
36 | WARN_ON(size == 0); | 36 | WARN_ON(size == 0); |
37 | if (!check_addr("map_single", dev, bus, size)) | 37 | if (!check_addr("map_single", dev, bus, size)) |
38 | return NOMMU_MAPPING_ERROR; | 38 | return NOMMU_MAPPING_ERROR; |
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 1e23577e17cf..677077510e30 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c | |||
@@ -6,12 +6,14 @@ | |||
6 | #include <linux/swiotlb.h> | 6 | #include <linux/swiotlb.h> |
7 | #include <linux/bootmem.h> | 7 | #include <linux/bootmem.h> |
8 | #include <linux/dma-mapping.h> | 8 | #include <linux/dma-mapping.h> |
9 | #include <linux/mem_encrypt.h> | ||
9 | 10 | ||
10 | #include <asm/iommu.h> | 11 | #include <asm/iommu.h> |
11 | #include <asm/swiotlb.h> | 12 | #include <asm/swiotlb.h> |
12 | #include <asm/dma.h> | 13 | #include <asm/dma.h> |
13 | #include <asm/xen/swiotlb-xen.h> | 14 | #include <asm/xen/swiotlb-xen.h> |
14 | #include <asm/iommu_table.h> | 15 | #include <asm/iommu_table.h> |
16 | |||
15 | int swiotlb __read_mostly; | 17 | int swiotlb __read_mostly; |
16 | 18 | ||
17 | void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, | 19 | void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, |
@@ -79,8 +81,8 @@ IOMMU_INIT_FINISH(pci_swiotlb_detect_override, | |||
79 | pci_swiotlb_late_init); | 81 | pci_swiotlb_late_init); |
80 | 82 | ||
81 | /* | 83 | /* |
82 | * if 4GB or more detected (and iommu=off not set) return 1 | 84 | * If 4GB or more detected (and iommu=off not set) or if SME is active |
83 | * and set swiotlb to 1. | 85 | * then set swiotlb to 1 and return 1. |
84 | */ | 86 | */ |
85 | int __init pci_swiotlb_detect_4gb(void) | 87 | int __init pci_swiotlb_detect_4gb(void) |
86 | { | 88 | { |
@@ -89,6 +91,15 @@ int __init pci_swiotlb_detect_4gb(void) | |||
89 | if (!no_iommu && max_possible_pfn > MAX_DMA32_PFN) | 91 | if (!no_iommu && max_possible_pfn > MAX_DMA32_PFN) |
90 | swiotlb = 1; | 92 | swiotlb = 1; |
91 | #endif | 93 | #endif |
94 | |||
95 | /* | ||
96 | * If SME is active then swiotlb will be set to 1 so that bounce | ||
97 | * buffers are allocated and used for devices that do not support | ||
98 | * the addressing range required for the encryption mask. | ||
99 | */ | ||
100 | if (sme_active()) | ||
101 | swiotlb = 1; | ||
102 | |||
92 | return swiotlb; | 103 | return swiotlb; |
93 | } | 104 | } |
94 | IOMMU_INIT(pci_swiotlb_detect_4gb, | 105 | IOMMU_INIT(pci_swiotlb_detect_4gb, |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 3ca198080ea9..bd6b85fac666 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -355,6 +355,7 @@ bool xen_set_default_idle(void) | |||
355 | return ret; | 355 | return ret; |
356 | } | 356 | } |
357 | #endif | 357 | #endif |
358 | |||
358 | void stop_this_cpu(void *dummy) | 359 | void stop_this_cpu(void *dummy) |
359 | { | 360 | { |
360 | local_irq_disable(); | 361 | local_irq_disable(); |
@@ -365,8 +366,20 @@ void stop_this_cpu(void *dummy) | |||
365 | disable_local_APIC(); | 366 | disable_local_APIC(); |
366 | mcheck_cpu_clear(this_cpu_ptr(&cpu_info)); | 367 | mcheck_cpu_clear(this_cpu_ptr(&cpu_info)); |
367 | 368 | ||
368 | for (;;) | 369 | for (;;) { |
369 | halt(); | 370 | /* |
371 | * Use wbinvd followed by hlt to stop the processor. This | ||
372 | * provides support for kexec on a processor that supports | ||
373 | * SME. With kexec, going from SME inactive to SME active | ||
374 | * requires clearing cache entries so that addresses without | ||
375 | * the encryption bit set don't corrupt the same physical | ||
376 | * address that has the encryption bit set when caches are | ||
377 | * flushed. To achieve this a wbinvd is performed followed by | ||
378 | * a hlt. Even if the processor is not in the kexec/SME | ||
379 | * scenario this only adds a wbinvd to a halting processor. | ||
380 | */ | ||
381 | asm volatile("wbinvd; hlt" : : : "memory"); | ||
382 | } | ||
370 | } | 383 | } |
371 | 384 | ||
372 | /* | 385 | /* |
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index 98111b38ebfd..307d3bac5f04 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S | |||
@@ -47,6 +47,7 @@ relocate_kernel: | |||
47 | * %rsi page_list | 47 | * %rsi page_list |
48 | * %rdx start address | 48 | * %rdx start address |
49 | * %rcx preserve_context | 49 | * %rcx preserve_context |
50 | * %r8 sme_active | ||
50 | */ | 51 | */ |
51 | 52 | ||
52 | /* Save the CPU context, used for jumping back */ | 53 | /* Save the CPU context, used for jumping back */ |
@@ -71,6 +72,9 @@ relocate_kernel: | |||
71 | pushq $0 | 72 | pushq $0 |
72 | popfq | 73 | popfq |
73 | 74 | ||
75 | /* Save SME active flag */ | ||
76 | movq %r8, %r12 | ||
77 | |||
74 | /* | 78 | /* |
75 | * get physical address of control page now | 79 | * get physical address of control page now |
76 | * this is impossible after page table switch | 80 | * this is impossible after page table switch |
@@ -132,6 +136,16 @@ identity_mapped: | |||
132 | /* Flush the TLB (needed?) */ | 136 | /* Flush the TLB (needed?) */ |
133 | movq %r9, %cr3 | 137 | movq %r9, %cr3 |
134 | 138 | ||
139 | /* | ||
140 | * If SME is active, there could be old encrypted cache line | ||
141 | * entries that will conflict with the now unencrypted memory | ||
142 | * used by kexec. Flush the caches before copying the kernel. | ||
143 | */ | ||
144 | testq %r12, %r12 | ||
145 | jz 1f | ||
146 | wbinvd | ||
147 | 1: | ||
148 | |||
135 | movq %rcx, %r11 | 149 | movq %rcx, %r11 |
136 | call swap_pages | 150 | call swap_pages |
137 | 151 | ||
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index ecab32282f0f..022ebddb3734 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -69,6 +69,7 @@ | |||
69 | #include <linux/crash_dump.h> | 69 | #include <linux/crash_dump.h> |
70 | #include <linux/tboot.h> | 70 | #include <linux/tboot.h> |
71 | #include <linux/jiffies.h> | 71 | #include <linux/jiffies.h> |
72 | #include <linux/mem_encrypt.h> | ||
72 | 73 | ||
73 | #include <linux/usb/xhci-dbgp.h> | 74 | #include <linux/usb/xhci-dbgp.h> |
74 | #include <video/edid.h> | 75 | #include <video/edid.h> |
@@ -375,6 +376,14 @@ static void __init reserve_initrd(void) | |||
375 | !ramdisk_image || !ramdisk_size) | 376 | !ramdisk_image || !ramdisk_size) |
376 | return; /* No initrd provided by bootloader */ | 377 | return; /* No initrd provided by bootloader */ |
377 | 378 | ||
379 | /* | ||
380 | * If SME is active, this memory will be marked encrypted by the | ||
381 | * kernel when it is accessed (including relocation). However, the | ||
382 | * ramdisk image was loaded decrypted by the bootloader, so make | ||
383 | * sure that it is encrypted before accessing it. | ||
384 | */ | ||
385 | sme_early_encrypt(ramdisk_image, ramdisk_end - ramdisk_image); | ||
386 | |||
378 | initrd_start = 0; | 387 | initrd_start = 0; |
379 | 388 | ||
380 | mapped_size = memblock_mem_size(max_pfn_mapped); | 389 | mapped_size = memblock_mem_size(max_pfn_mapped); |
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 213ddf3e937d..73e4d28112f8 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <asm/compat.h> | 21 | #include <asm/compat.h> |
22 | #include <asm/ia32.h> | 22 | #include <asm/ia32.h> |
23 | #include <asm/syscalls.h> | 23 | #include <asm/syscalls.h> |
24 | #include <asm/mpx.h> | ||
24 | 25 | ||
25 | /* | 26 | /* |
26 | * Align a virtual address to avoid aliasing in the I$ on AMD F15h. | 27 | * Align a virtual address to avoid aliasing in the I$ on AMD F15h. |
@@ -100,8 +101,8 @@ out: | |||
100 | return error; | 101 | return error; |
101 | } | 102 | } |
102 | 103 | ||
103 | static void find_start_end(unsigned long flags, unsigned long *begin, | 104 | static void find_start_end(unsigned long addr, unsigned long flags, |
104 | unsigned long *end) | 105 | unsigned long *begin, unsigned long *end) |
105 | { | 106 | { |
106 | if (!in_compat_syscall() && (flags & MAP_32BIT)) { | 107 | if (!in_compat_syscall() && (flags & MAP_32BIT)) { |
107 | /* This is usually used needed to map code in small | 108 | /* This is usually used needed to map code in small |
@@ -120,7 +121,10 @@ static void find_start_end(unsigned long flags, unsigned long *begin, | |||
120 | } | 121 | } |
121 | 122 | ||
122 | *begin = get_mmap_base(1); | 123 | *begin = get_mmap_base(1); |
123 | *end = in_compat_syscall() ? tasksize_32bit() : tasksize_64bit(); | 124 | if (in_compat_syscall()) |
125 | *end = task_size_32bit(); | ||
126 | else | ||
127 | *end = task_size_64bit(addr > DEFAULT_MAP_WINDOW); | ||
124 | } | 128 | } |
125 | 129 | ||
126 | unsigned long | 130 | unsigned long |
@@ -132,10 +136,14 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, | |||
132 | struct vm_unmapped_area_info info; | 136 | struct vm_unmapped_area_info info; |
133 | unsigned long begin, end; | 137 | unsigned long begin, end; |
134 | 138 | ||
139 | addr = mpx_unmapped_area_check(addr, len, flags); | ||
140 | if (IS_ERR_VALUE(addr)) | ||
141 | return addr; | ||
142 | |||
135 | if (flags & MAP_FIXED) | 143 | if (flags & MAP_FIXED) |
136 | return addr; | 144 | return addr; |
137 | 145 | ||
138 | find_start_end(flags, &begin, &end); | 146 | find_start_end(addr, flags, &begin, &end); |
139 | 147 | ||
140 | if (len > end) | 148 | if (len > end) |
141 | return -ENOMEM; | 149 | return -ENOMEM; |
@@ -171,6 +179,10 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, | |||
171 | unsigned long addr = addr0; | 179 | unsigned long addr = addr0; |
172 | struct vm_unmapped_area_info info; | 180 | struct vm_unmapped_area_info info; |
173 | 181 | ||
182 | addr = mpx_unmapped_area_check(addr, len, flags); | ||
183 | if (IS_ERR_VALUE(addr)) | ||
184 | return addr; | ||
185 | |||
174 | /* requested length too big for entire address space */ | 186 | /* requested length too big for entire address space */ |
175 | if (len > TASK_SIZE) | 187 | if (len > TASK_SIZE) |
176 | return -ENOMEM; | 188 | return -ENOMEM; |
@@ -195,6 +207,16 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, | |||
195 | info.length = len; | 207 | info.length = len; |
196 | info.low_limit = PAGE_SIZE; | 208 | info.low_limit = PAGE_SIZE; |
197 | info.high_limit = get_mmap_base(0); | 209 | info.high_limit = get_mmap_base(0); |
210 | |||
211 | /* | ||
212 | * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area | ||
213 | * in the full address space. | ||
214 | * | ||
215 | * !in_compat_syscall() check to avoid high addresses for x32. | ||
216 | */ | ||
217 | if (addr > DEFAULT_MAP_WINDOW && !in_compat_syscall()) | ||
218 | info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW; | ||
219 | |||
198 | info.align_mask = 0; | 220 | info.align_mask = 0; |
199 | info.align_offset = pgoff << PAGE_SHIFT; | 221 | info.align_offset = pgoff << PAGE_SHIFT; |
200 | if (filp) { | 222 | if (filp) { |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 9b1dd114956a..04d750813c9d 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -108,7 +108,7 @@ module_param(dbg, bool, 0644); | |||
108 | (((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1)) | 108 | (((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1)) |
109 | 109 | ||
110 | 110 | ||
111 | #define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1)) | 111 | #define PT64_BASE_ADDR_MASK __sme_clr((((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))) |
112 | #define PT64_DIR_BASE_ADDR_MASK \ | 112 | #define PT64_DIR_BASE_ADDR_MASK \ |
113 | (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + PT64_LEVEL_BITS)) - 1)) | 113 | (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + PT64_LEVEL_BITS)) - 1)) |
114 | #define PT64_LVL_ADDR_MASK(level) \ | 114 | #define PT64_LVL_ADDR_MASK(level) \ |
@@ -126,7 +126,7 @@ module_param(dbg, bool, 0644); | |||
126 | * PT32_LEVEL_BITS))) - 1)) | 126 | * PT32_LEVEL_BITS))) - 1)) |
127 | 127 | ||
128 | #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | shadow_user_mask \ | 128 | #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | shadow_user_mask \ |
129 | | shadow_x_mask | shadow_nx_mask) | 129 | | shadow_x_mask | shadow_nx_mask | shadow_me_mask) |
130 | 130 | ||
131 | #define ACC_EXEC_MASK 1 | 131 | #define ACC_EXEC_MASK 1 |
132 | #define ACC_WRITE_MASK PT_WRITABLE_MASK | 132 | #define ACC_WRITE_MASK PT_WRITABLE_MASK |
@@ -186,6 +186,7 @@ static u64 __read_mostly shadow_dirty_mask; | |||
186 | static u64 __read_mostly shadow_mmio_mask; | 186 | static u64 __read_mostly shadow_mmio_mask; |
187 | static u64 __read_mostly shadow_mmio_value; | 187 | static u64 __read_mostly shadow_mmio_value; |
188 | static u64 __read_mostly shadow_present_mask; | 188 | static u64 __read_mostly shadow_present_mask; |
189 | static u64 __read_mostly shadow_me_mask; | ||
189 | 190 | ||
190 | /* | 191 | /* |
191 | * SPTEs used by MMUs without A/D bits are marked with shadow_acc_track_value. | 192 | * SPTEs used by MMUs without A/D bits are marked with shadow_acc_track_value. |
@@ -349,7 +350,7 @@ static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte) | |||
349 | */ | 350 | */ |
350 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | 351 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, |
351 | u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask, | 352 | u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask, |
352 | u64 acc_track_mask) | 353 | u64 acc_track_mask, u64 me_mask) |
353 | { | 354 | { |
354 | BUG_ON(!dirty_mask != !accessed_mask); | 355 | BUG_ON(!dirty_mask != !accessed_mask); |
355 | BUG_ON(!accessed_mask && !acc_track_mask); | 356 | BUG_ON(!accessed_mask && !acc_track_mask); |
@@ -362,6 +363,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | |||
362 | shadow_x_mask = x_mask; | 363 | shadow_x_mask = x_mask; |
363 | shadow_present_mask = p_mask; | 364 | shadow_present_mask = p_mask; |
364 | shadow_acc_track_mask = acc_track_mask; | 365 | shadow_acc_track_mask = acc_track_mask; |
366 | shadow_me_mask = me_mask; | ||
365 | } | 367 | } |
366 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); | 368 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); |
367 | 369 | ||
@@ -2433,7 +2435,7 @@ static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2433 | BUILD_BUG_ON(VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK); | 2435 | BUILD_BUG_ON(VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK); |
2434 | 2436 | ||
2435 | spte = __pa(sp->spt) | shadow_present_mask | PT_WRITABLE_MASK | | 2437 | spte = __pa(sp->spt) | shadow_present_mask | PT_WRITABLE_MASK | |
2436 | shadow_user_mask | shadow_x_mask; | 2438 | shadow_user_mask | shadow_x_mask | shadow_me_mask; |
2437 | 2439 | ||
2438 | if (sp_ad_disabled(sp)) | 2440 | if (sp_ad_disabled(sp)) |
2439 | spte |= shadow_acc_track_value; | 2441 | spte |= shadow_acc_track_value; |
@@ -2745,6 +2747,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2745 | pte_access &= ~ACC_WRITE_MASK; | 2747 | pte_access &= ~ACC_WRITE_MASK; |
2746 | 2748 | ||
2747 | spte |= (u64)pfn << PAGE_SHIFT; | 2749 | spte |= (u64)pfn << PAGE_SHIFT; |
2750 | spte |= shadow_me_mask; | ||
2748 | 2751 | ||
2749 | if (pte_access & ACC_WRITE_MASK) { | 2752 | if (pte_access & ACC_WRITE_MASK) { |
2750 | 2753 | ||
@@ -4106,16 +4109,28 @@ void | |||
4106 | reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context) | 4109 | reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context) |
4107 | { | 4110 | { |
4108 | bool uses_nx = context->nx || context->base_role.smep_andnot_wp; | 4111 | bool uses_nx = context->nx || context->base_role.smep_andnot_wp; |
4112 | struct rsvd_bits_validate *shadow_zero_check; | ||
4113 | int i; | ||
4109 | 4114 | ||
4110 | /* | 4115 | /* |
4111 | * Passing "true" to the last argument is okay; it adds a check | 4116 | * Passing "true" to the last argument is okay; it adds a check |
4112 | * on bit 8 of the SPTEs which KVM doesn't use anyway. | 4117 | * on bit 8 of the SPTEs which KVM doesn't use anyway. |
4113 | */ | 4118 | */ |
4114 | __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check, | 4119 | shadow_zero_check = &context->shadow_zero_check; |
4120 | __reset_rsvds_bits_mask(vcpu, shadow_zero_check, | ||
4115 | boot_cpu_data.x86_phys_bits, | 4121 | boot_cpu_data.x86_phys_bits, |
4116 | context->shadow_root_level, uses_nx, | 4122 | context->shadow_root_level, uses_nx, |
4117 | guest_cpuid_has_gbpages(vcpu), is_pse(vcpu), | 4123 | guest_cpuid_has_gbpages(vcpu), is_pse(vcpu), |
4118 | true); | 4124 | true); |
4125 | |||
4126 | if (!shadow_me_mask) | ||
4127 | return; | ||
4128 | |||
4129 | for (i = context->shadow_root_level; --i >= 0;) { | ||
4130 | shadow_zero_check->rsvd_bits_mask[0][i] &= ~shadow_me_mask; | ||
4131 | shadow_zero_check->rsvd_bits_mask[1][i] &= ~shadow_me_mask; | ||
4132 | } | ||
4133 | |||
4119 | } | 4134 | } |
4120 | EXPORT_SYMBOL_GPL(reset_shadow_zero_bits_mask); | 4135 | EXPORT_SYMBOL_GPL(reset_shadow_zero_bits_mask); |
4121 | 4136 | ||
@@ -4133,17 +4148,29 @@ static void | |||
4133 | reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, | 4148 | reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, |
4134 | struct kvm_mmu *context) | 4149 | struct kvm_mmu *context) |
4135 | { | 4150 | { |
4151 | struct rsvd_bits_validate *shadow_zero_check; | ||
4152 | int i; | ||
4153 | |||
4154 | shadow_zero_check = &context->shadow_zero_check; | ||
4155 | |||
4136 | if (boot_cpu_is_amd()) | 4156 | if (boot_cpu_is_amd()) |
4137 | __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check, | 4157 | __reset_rsvds_bits_mask(vcpu, shadow_zero_check, |
4138 | boot_cpu_data.x86_phys_bits, | 4158 | boot_cpu_data.x86_phys_bits, |
4139 | context->shadow_root_level, false, | 4159 | context->shadow_root_level, false, |
4140 | boot_cpu_has(X86_FEATURE_GBPAGES), | 4160 | boot_cpu_has(X86_FEATURE_GBPAGES), |
4141 | true, true); | 4161 | true, true); |
4142 | else | 4162 | else |
4143 | __reset_rsvds_bits_mask_ept(&context->shadow_zero_check, | 4163 | __reset_rsvds_bits_mask_ept(shadow_zero_check, |
4144 | boot_cpu_data.x86_phys_bits, | 4164 | boot_cpu_data.x86_phys_bits, |
4145 | false); | 4165 | false); |
4146 | 4166 | ||
4167 | if (!shadow_me_mask) | ||
4168 | return; | ||
4169 | |||
4170 | for (i = context->shadow_root_level; --i >= 0;) { | ||
4171 | shadow_zero_check->rsvd_bits_mask[0][i] &= ~shadow_me_mask; | ||
4172 | shadow_zero_check->rsvd_bits_mask[1][i] &= ~shadow_me_mask; | ||
4173 | } | ||
4147 | } | 4174 | } |
4148 | 4175 | ||
4149 | /* | 4176 | /* |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index af256b786a70..8dbd8dbc83eb 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -1167,9 +1167,9 @@ static void avic_init_vmcb(struct vcpu_svm *svm) | |||
1167 | { | 1167 | { |
1168 | struct vmcb *vmcb = svm->vmcb; | 1168 | struct vmcb *vmcb = svm->vmcb; |
1169 | struct kvm_arch *vm_data = &svm->vcpu.kvm->arch; | 1169 | struct kvm_arch *vm_data = &svm->vcpu.kvm->arch; |
1170 | phys_addr_t bpa = page_to_phys(svm->avic_backing_page); | 1170 | phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page)); |
1171 | phys_addr_t lpa = page_to_phys(vm_data->avic_logical_id_table_page); | 1171 | phys_addr_t lpa = __sme_set(page_to_phys(vm_data->avic_logical_id_table_page)); |
1172 | phys_addr_t ppa = page_to_phys(vm_data->avic_physical_id_table_page); | 1172 | phys_addr_t ppa = __sme_set(page_to_phys(vm_data->avic_physical_id_table_page)); |
1173 | 1173 | ||
1174 | vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK; | 1174 | vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK; |
1175 | vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK; | 1175 | vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK; |
@@ -1232,8 +1232,8 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
1232 | set_intercept(svm, INTERCEPT_MWAIT); | 1232 | set_intercept(svm, INTERCEPT_MWAIT); |
1233 | } | 1233 | } |
1234 | 1234 | ||
1235 | control->iopm_base_pa = iopm_base; | 1235 | control->iopm_base_pa = __sme_set(iopm_base); |
1236 | control->msrpm_base_pa = __pa(svm->msrpm); | 1236 | control->msrpm_base_pa = __sme_set(__pa(svm->msrpm)); |
1237 | control->int_ctl = V_INTR_MASKING_MASK; | 1237 | control->int_ctl = V_INTR_MASKING_MASK; |
1238 | 1238 | ||
1239 | init_seg(&save->es); | 1239 | init_seg(&save->es); |
@@ -1377,9 +1377,9 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu) | |||
1377 | return -EINVAL; | 1377 | return -EINVAL; |
1378 | 1378 | ||
1379 | new_entry = READ_ONCE(*entry); | 1379 | new_entry = READ_ONCE(*entry); |
1380 | new_entry = (page_to_phys(svm->avic_backing_page) & | 1380 | new_entry = __sme_set((page_to_phys(svm->avic_backing_page) & |
1381 | AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) | | 1381 | AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) | |
1382 | AVIC_PHYSICAL_ID_ENTRY_VALID_MASK; | 1382 | AVIC_PHYSICAL_ID_ENTRY_VALID_MASK); |
1383 | WRITE_ONCE(*entry, new_entry); | 1383 | WRITE_ONCE(*entry, new_entry); |
1384 | 1384 | ||
1385 | svm->avic_physical_id_cache = entry; | 1385 | svm->avic_physical_id_cache = entry; |
@@ -1647,7 +1647,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | |||
1647 | 1647 | ||
1648 | svm->vmcb = page_address(page); | 1648 | svm->vmcb = page_address(page); |
1649 | clear_page(svm->vmcb); | 1649 | clear_page(svm->vmcb); |
1650 | svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; | 1650 | svm->vmcb_pa = __sme_set(page_to_pfn(page) << PAGE_SHIFT); |
1651 | svm->asid_generation = 0; | 1651 | svm->asid_generation = 0; |
1652 | init_vmcb(svm); | 1652 | init_vmcb(svm); |
1653 | 1653 | ||
@@ -1675,7 +1675,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) | |||
1675 | { | 1675 | { |
1676 | struct vcpu_svm *svm = to_svm(vcpu); | 1676 | struct vcpu_svm *svm = to_svm(vcpu); |
1677 | 1677 | ||
1678 | __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT)); | 1678 | __free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT)); |
1679 | __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER); | 1679 | __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER); |
1680 | __free_page(virt_to_page(svm->nested.hsave)); | 1680 | __free_page(virt_to_page(svm->nested.hsave)); |
1681 | __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); | 1681 | __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); |
@@ -2330,7 +2330,7 @@ static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index) | |||
2330 | u64 pdpte; | 2330 | u64 pdpte; |
2331 | int ret; | 2331 | int ret; |
2332 | 2332 | ||
2333 | ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(cr3), &pdpte, | 2333 | ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(__sme_clr(cr3)), &pdpte, |
2334 | offset_in_page(cr3) + index * 8, 8); | 2334 | offset_in_page(cr3) + index * 8, 8); |
2335 | if (ret) | 2335 | if (ret) |
2336 | return 0; | 2336 | return 0; |
@@ -2342,7 +2342,7 @@ static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu, | |||
2342 | { | 2342 | { |
2343 | struct vcpu_svm *svm = to_svm(vcpu); | 2343 | struct vcpu_svm *svm = to_svm(vcpu); |
2344 | 2344 | ||
2345 | svm->vmcb->control.nested_cr3 = root; | 2345 | svm->vmcb->control.nested_cr3 = __sme_set(root); |
2346 | mark_dirty(svm->vmcb, VMCB_NPT); | 2346 | mark_dirty(svm->vmcb, VMCB_NPT); |
2347 | svm_flush_tlb(vcpu); | 2347 | svm_flush_tlb(vcpu); |
2348 | } | 2348 | } |
@@ -2873,7 +2873,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) | |||
2873 | svm->nested.msrpm[p] = svm->msrpm[p] | value; | 2873 | svm->nested.msrpm[p] = svm->msrpm[p] | value; |
2874 | } | 2874 | } |
2875 | 2875 | ||
2876 | svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm); | 2876 | svm->vmcb->control.msrpm_base_pa = __sme_set(__pa(svm->nested.msrpm)); |
2877 | 2877 | ||
2878 | return true; | 2878 | return true; |
2879 | } | 2879 | } |
@@ -4506,7 +4506,7 @@ get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, | |||
4506 | pr_debug("SVM: %s: use GA mode for irq %u\n", __func__, | 4506 | pr_debug("SVM: %s: use GA mode for irq %u\n", __func__, |
4507 | irq.vector); | 4507 | irq.vector); |
4508 | *svm = to_svm(vcpu); | 4508 | *svm = to_svm(vcpu); |
4509 | vcpu_info->pi_desc_addr = page_to_phys((*svm)->avic_backing_page); | 4509 | vcpu_info->pi_desc_addr = __sme_set(page_to_phys((*svm)->avic_backing_page)); |
4510 | vcpu_info->vector = irq.vector; | 4510 | vcpu_info->vector = irq.vector; |
4511 | 4511 | ||
4512 | return 0; | 4512 | return 0; |
@@ -4557,7 +4557,8 @@ static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq, | |||
4557 | struct amd_iommu_pi_data pi; | 4557 | struct amd_iommu_pi_data pi; |
4558 | 4558 | ||
4559 | /* Try to enable guest_mode in IRTE */ | 4559 | /* Try to enable guest_mode in IRTE */ |
4560 | pi.base = page_to_phys(svm->avic_backing_page) & AVIC_HPA_MASK; | 4560 | pi.base = __sme_set(page_to_phys(svm->avic_backing_page) & |
4561 | AVIC_HPA_MASK); | ||
4561 | pi.ga_tag = AVIC_GATAG(kvm->arch.avic_vm_id, | 4562 | pi.ga_tag = AVIC_GATAG(kvm->arch.avic_vm_id, |
4562 | svm->vcpu.vcpu_id); | 4563 | svm->vcpu.vcpu_id); |
4563 | pi.is_guest_mode = true; | 4564 | pi.is_guest_mode = true; |
@@ -5006,7 +5007,7 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) | |||
5006 | { | 5007 | { |
5007 | struct vcpu_svm *svm = to_svm(vcpu); | 5008 | struct vcpu_svm *svm = to_svm(vcpu); |
5008 | 5009 | ||
5009 | svm->vmcb->save.cr3 = root; | 5010 | svm->vmcb->save.cr3 = __sme_set(root); |
5010 | mark_dirty(svm->vmcb, VMCB_CR); | 5011 | mark_dirty(svm->vmcb, VMCB_CR); |
5011 | svm_flush_tlb(vcpu); | 5012 | svm_flush_tlb(vcpu); |
5012 | } | 5013 | } |
@@ -5015,7 +5016,7 @@ static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root) | |||
5015 | { | 5016 | { |
5016 | struct vcpu_svm *svm = to_svm(vcpu); | 5017 | struct vcpu_svm *svm = to_svm(vcpu); |
5017 | 5018 | ||
5018 | svm->vmcb->control.nested_cr3 = root; | 5019 | svm->vmcb->control.nested_cr3 = __sme_set(root); |
5019 | mark_dirty(svm->vmcb, VMCB_NPT); | 5020 | mark_dirty(svm->vmcb, VMCB_NPT); |
5020 | 5021 | ||
5021 | /* Also sync guest cr3 here in case we live migrate */ | 5022 | /* Also sync guest cr3 here in case we live migrate */ |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c6ef2940119b..d40900914a72 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -6556,7 +6556,7 @@ void vmx_enable_tdp(void) | |||
6556 | enable_ept_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull, | 6556 | enable_ept_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull, |
6557 | 0ull, VMX_EPT_EXECUTABLE_MASK, | 6557 | 0ull, VMX_EPT_EXECUTABLE_MASK, |
6558 | cpu_has_vmx_ept_execute_only() ? 0ull : VMX_EPT_READABLE_MASK, | 6558 | cpu_has_vmx_ept_execute_only() ? 0ull : VMX_EPT_READABLE_MASK, |
6559 | VMX_EPT_RWX_MASK); | 6559 | VMX_EPT_RWX_MASK, 0ull); |
6560 | 6560 | ||
6561 | ept_set_mmio_spte_mask(); | 6561 | ept_set_mmio_spte_mask(); |
6562 | kvm_enable_tdp(); | 6562 | kvm_enable_tdp(); |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 272320eb328c..ef5102f80497 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -54,6 +54,7 @@ | |||
54 | #include <linux/kvm_irqfd.h> | 54 | #include <linux/kvm_irqfd.h> |
55 | #include <linux/irqbypass.h> | 55 | #include <linux/irqbypass.h> |
56 | #include <linux/sched/stat.h> | 56 | #include <linux/sched/stat.h> |
57 | #include <linux/mem_encrypt.h> | ||
57 | 58 | ||
58 | #include <trace/events/kvm.h> | 59 | #include <trace/events/kvm.h> |
59 | 60 | ||
@@ -6125,7 +6126,7 @@ int kvm_arch_init(void *opaque) | |||
6125 | 6126 | ||
6126 | kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, | 6127 | kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, |
6127 | PT_DIRTY_MASK, PT64_NX_MASK, 0, | 6128 | PT_DIRTY_MASK, PT64_NX_MASK, 0, |
6128 | PT_PRESENT_MASK, 0); | 6129 | PT_PRESENT_MASK, 0, sme_me_mask); |
6129 | kvm_timer_init(); | 6130 | kvm_timer_init(); |
6130 | 6131 | ||
6131 | perf_register_guest_info_callbacks(&kvm_guest_cbs); | 6132 | perf_register_guest_info_callbacks(&kvm_guest_cbs); |
diff --git a/arch/x86/lib/cmdline.c b/arch/x86/lib/cmdline.c index 5cc78bf57232..3261abb21ef4 100644 --- a/arch/x86/lib/cmdline.c +++ b/arch/x86/lib/cmdline.c | |||
@@ -104,7 +104,112 @@ __cmdline_find_option_bool(const char *cmdline, int max_cmdline_size, | |||
104 | return 0; /* Buffer overrun */ | 104 | return 0; /* Buffer overrun */ |
105 | } | 105 | } |
106 | 106 | ||
107 | /* | ||
108 | * Find a non-boolean option (i.e. option=argument). In accordance with | ||
109 | * standard Linux practice, if this option is repeated, this returns the | ||
110 | * last instance on the command line. | ||
111 | * | ||
112 | * @cmdline: the cmdline string | ||
113 | * @max_cmdline_size: the maximum size of cmdline | ||
114 | * @option: option string to look for | ||
115 | * @buffer: memory buffer to return the option argument | ||
116 | * @bufsize: size of the supplied memory buffer | ||
117 | * | ||
118 | * Returns the length of the argument (regardless of if it was | ||
119 | * truncated to fit in the buffer), or -1 on not found. | ||
120 | */ | ||
121 | static int | ||
122 | __cmdline_find_option(const char *cmdline, int max_cmdline_size, | ||
123 | const char *option, char *buffer, int bufsize) | ||
124 | { | ||
125 | char c; | ||
126 | int pos = 0, len = -1; | ||
127 | const char *opptr = NULL; | ||
128 | char *bufptr = buffer; | ||
129 | enum { | ||
130 | st_wordstart = 0, /* Start of word/after whitespace */ | ||
131 | st_wordcmp, /* Comparing this word */ | ||
132 | st_wordskip, /* Miscompare, skip */ | ||
133 | st_bufcpy, /* Copying this to buffer */ | ||
134 | } state = st_wordstart; | ||
135 | |||
136 | if (!cmdline) | ||
137 | return -1; /* No command line */ | ||
138 | |||
139 | /* | ||
140 | * This 'pos' check ensures we do not overrun | ||
141 | * a non-NULL-terminated 'cmdline' | ||
142 | */ | ||
143 | while (pos++ < max_cmdline_size) { | ||
144 | c = *(char *)cmdline++; | ||
145 | if (!c) | ||
146 | break; | ||
147 | |||
148 | switch (state) { | ||
149 | case st_wordstart: | ||
150 | if (myisspace(c)) | ||
151 | break; | ||
152 | |||
153 | state = st_wordcmp; | ||
154 | opptr = option; | ||
155 | /* fall through */ | ||
156 | |||
157 | case st_wordcmp: | ||
158 | if ((c == '=') && !*opptr) { | ||
159 | /* | ||
160 | * We matched all the way to the end of the | ||
161 | * option we were looking for, prepare to | ||
162 | * copy the argument. | ||
163 | */ | ||
164 | len = 0; | ||
165 | bufptr = buffer; | ||
166 | state = st_bufcpy; | ||
167 | break; | ||
168 | } else if (c == *opptr++) { | ||
169 | /* | ||
170 | * We are currently matching, so continue | ||
171 | * to the next character on the cmdline. | ||
172 | */ | ||
173 | break; | ||
174 | } | ||
175 | state = st_wordskip; | ||
176 | /* fall through */ | ||
177 | |||
178 | case st_wordskip: | ||
179 | if (myisspace(c)) | ||
180 | state = st_wordstart; | ||
181 | break; | ||
182 | |||
183 | case st_bufcpy: | ||
184 | if (myisspace(c)) { | ||
185 | state = st_wordstart; | ||
186 | } else { | ||
187 | /* | ||
188 | * Increment len, but don't overrun the | ||
189 | * supplied buffer and leave room for the | ||
190 | * NULL terminator. | ||
191 | */ | ||
192 | if (++len < bufsize) | ||
193 | *bufptr++ = c; | ||
194 | } | ||
195 | break; | ||
196 | } | ||
197 | } | ||
198 | |||
199 | if (bufsize) | ||
200 | *bufptr = '\0'; | ||
201 | |||
202 | return len; | ||
203 | } | ||
204 | |||
107 | int cmdline_find_option_bool(const char *cmdline, const char *option) | 205 | int cmdline_find_option_bool(const char *cmdline, const char *option) |
108 | { | 206 | { |
109 | return __cmdline_find_option_bool(cmdline, COMMAND_LINE_SIZE, option); | 207 | return __cmdline_find_option_bool(cmdline, COMMAND_LINE_SIZE, option); |
110 | } | 208 | } |
209 | |||
210 | int cmdline_find_option(const char *cmdline, const char *option, char *buffer, | ||
211 | int bufsize) | ||
212 | { | ||
213 | return __cmdline_find_option(cmdline, COMMAND_LINE_SIZE, option, | ||
214 | buffer, bufsize); | ||
215 | } | ||
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 0fbdcb64f9f8..72bf8c01c6e3 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile | |||
@@ -39,3 +39,5 @@ obj-$(CONFIG_X86_INTEL_MPX) += mpx.o | |||
39 | obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o | 39 | obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o |
40 | obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o | 40 | obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o |
41 | 41 | ||
42 | obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt.o | ||
43 | obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o | ||
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 0470826d2bdc..5e3ac6fe6c9e 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c | |||
@@ -13,12 +13,12 @@ | |||
13 | */ | 13 | */ |
14 | 14 | ||
15 | #include <linux/debugfs.h> | 15 | #include <linux/debugfs.h> |
16 | #include <linux/kasan.h> | ||
16 | #include <linux/mm.h> | 17 | #include <linux/mm.h> |
17 | #include <linux/init.h> | 18 | #include <linux/init.h> |
18 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
19 | #include <linux/seq_file.h> | 20 | #include <linux/seq_file.h> |
20 | 21 | ||
21 | #include <asm/kasan.h> | ||
22 | #include <asm/pgtable.h> | 22 | #include <asm/pgtable.h> |
23 | 23 | ||
24 | /* | 24 | /* |
@@ -138,7 +138,7 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg) | |||
138 | { | 138 | { |
139 | pgprotval_t pr = pgprot_val(prot); | 139 | pgprotval_t pr = pgprot_val(prot); |
140 | static const char * const level_name[] = | 140 | static const char * const level_name[] = |
141 | { "cr3", "pgd", "pud", "pmd", "pte" }; | 141 | { "cr3", "pgd", "p4d", "pud", "pmd", "pte" }; |
142 | 142 | ||
143 | if (!pgprot_val(prot)) { | 143 | if (!pgprot_val(prot)) { |
144 | /* Not present */ | 144 | /* Not present */ |
@@ -162,12 +162,12 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg) | |||
162 | pt_dump_cont_printf(m, dmsg, " "); | 162 | pt_dump_cont_printf(m, dmsg, " "); |
163 | 163 | ||
164 | /* Bit 7 has a different meaning on level 3 vs 4 */ | 164 | /* Bit 7 has a different meaning on level 3 vs 4 */ |
165 | if (level <= 3 && pr & _PAGE_PSE) | 165 | if (level <= 4 && pr & _PAGE_PSE) |
166 | pt_dump_cont_printf(m, dmsg, "PSE "); | 166 | pt_dump_cont_printf(m, dmsg, "PSE "); |
167 | else | 167 | else |
168 | pt_dump_cont_printf(m, dmsg, " "); | 168 | pt_dump_cont_printf(m, dmsg, " "); |
169 | if ((level == 4 && pr & _PAGE_PAT) || | 169 | if ((level == 5 && pr & _PAGE_PAT) || |
170 | ((level == 3 || level == 2) && pr & _PAGE_PAT_LARGE)) | 170 | ((level == 4 || level == 3) && pr & _PAGE_PAT_LARGE)) |
171 | pt_dump_cont_printf(m, dmsg, "PAT "); | 171 | pt_dump_cont_printf(m, dmsg, "PAT "); |
172 | else | 172 | else |
173 | pt_dump_cont_printf(m, dmsg, " "); | 173 | pt_dump_cont_printf(m, dmsg, " "); |
@@ -188,11 +188,12 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg) | |||
188 | */ | 188 | */ |
189 | static unsigned long normalize_addr(unsigned long u) | 189 | static unsigned long normalize_addr(unsigned long u) |
190 | { | 190 | { |
191 | #ifdef CONFIG_X86_64 | 191 | int shift; |
192 | return (signed long)(u << 16) >> 16; | 192 | if (!IS_ENABLED(CONFIG_X86_64)) |
193 | #else | 193 | return u; |
194 | return u; | 194 | |
195 | #endif | 195 | shift = 64 - (__VIRTUAL_MASK_SHIFT + 1); |
196 | return (signed long)(u << shift) >> shift; | ||
196 | } | 197 | } |
197 | 198 | ||
198 | /* | 199 | /* |
@@ -297,32 +298,62 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr, | |||
297 | for (i = 0; i < PTRS_PER_PTE; i++) { | 298 | for (i = 0; i < PTRS_PER_PTE; i++) { |
298 | prot = pte_flags(*start); | 299 | prot = pte_flags(*start); |
299 | st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT); | 300 | st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT); |
300 | note_page(m, st, __pgprot(prot), 4); | 301 | note_page(m, st, __pgprot(prot), 5); |
301 | start++; | 302 | start++; |
302 | } | 303 | } |
303 | } | 304 | } |
305 | #ifdef CONFIG_KASAN | ||
306 | |||
307 | /* | ||
308 | * This is an optimization for KASAN=y case. Since all kasan page tables | ||
309 | * eventually point to the kasan_zero_page we could call note_page() | ||
310 | * right away without walking through lower level page tables. This saves | ||
311 | * us dozens of seconds (minutes for 5-level config) while checking for | ||
312 | * W+X mapping or reading kernel_page_tables debugfs file. | ||
313 | */ | ||
314 | static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st, | ||
315 | void *pt) | ||
316 | { | ||
317 | if (__pa(pt) == __pa(kasan_zero_pmd) || | ||
318 | #ifdef CONFIG_X86_5LEVEL | ||
319 | __pa(pt) == __pa(kasan_zero_p4d) || | ||
320 | #endif | ||
321 | __pa(pt) == __pa(kasan_zero_pud)) { | ||
322 | pgprotval_t prot = pte_flags(kasan_zero_pte[0]); | ||
323 | note_page(m, st, __pgprot(prot), 5); | ||
324 | return true; | ||
325 | } | ||
326 | return false; | ||
327 | } | ||
328 | #else | ||
329 | static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st, | ||
330 | void *pt) | ||
331 | { | ||
332 | return false; | ||
333 | } | ||
334 | #endif | ||
304 | 335 | ||
305 | #if PTRS_PER_PMD > 1 | 336 | #if PTRS_PER_PMD > 1 |
306 | 337 | ||
307 | static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, unsigned long P) | 338 | static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, unsigned long P) |
308 | { | 339 | { |
309 | int i; | 340 | int i; |
310 | pmd_t *start; | 341 | pmd_t *start, *pmd_start; |
311 | pgprotval_t prot; | 342 | pgprotval_t prot; |
312 | 343 | ||
313 | start = (pmd_t *)pud_page_vaddr(addr); | 344 | pmd_start = start = (pmd_t *)pud_page_vaddr(addr); |
314 | for (i = 0; i < PTRS_PER_PMD; i++) { | 345 | for (i = 0; i < PTRS_PER_PMD; i++) { |
315 | st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT); | 346 | st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT); |
316 | if (!pmd_none(*start)) { | 347 | if (!pmd_none(*start)) { |
317 | if (pmd_large(*start) || !pmd_present(*start)) { | 348 | if (pmd_large(*start) || !pmd_present(*start)) { |
318 | prot = pmd_flags(*start); | 349 | prot = pmd_flags(*start); |
319 | note_page(m, st, __pgprot(prot), 3); | 350 | note_page(m, st, __pgprot(prot), 4); |
320 | } else { | 351 | } else if (!kasan_page_table(m, st, pmd_start)) { |
321 | walk_pte_level(m, st, *start, | 352 | walk_pte_level(m, st, *start, |
322 | P + i * PMD_LEVEL_MULT); | 353 | P + i * PMD_LEVEL_MULT); |
323 | } | 354 | } |
324 | } else | 355 | } else |
325 | note_page(m, st, __pgprot(0), 3); | 356 | note_page(m, st, __pgprot(0), 4); |
326 | start++; | 357 | start++; |
327 | } | 358 | } |
328 | } | 359 | } |
@@ -335,39 +366,27 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, | |||
335 | 366 | ||
336 | #if PTRS_PER_PUD > 1 | 367 | #if PTRS_PER_PUD > 1 |
337 | 368 | ||
338 | /* | ||
339 | * This is an optimization for CONFIG_DEBUG_WX=y + CONFIG_KASAN=y | ||
340 | * KASAN fills page tables with the same values. Since there is no | ||
341 | * point in checking page table more than once we just skip repeated | ||
342 | * entries. This saves us dozens of seconds during boot. | ||
343 | */ | ||
344 | static bool pud_already_checked(pud_t *prev_pud, pud_t *pud, bool checkwx) | ||
345 | { | ||
346 | return checkwx && prev_pud && (pud_val(*prev_pud) == pud_val(*pud)); | ||
347 | } | ||
348 | |||
349 | static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr, unsigned long P) | 369 | static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr, unsigned long P) |
350 | { | 370 | { |
351 | int i; | 371 | int i; |
352 | pud_t *start; | 372 | pud_t *start, *pud_start; |
353 | pgprotval_t prot; | 373 | pgprotval_t prot; |
354 | pud_t *prev_pud = NULL; | 374 | pud_t *prev_pud = NULL; |
355 | 375 | ||
356 | start = (pud_t *)p4d_page_vaddr(addr); | 376 | pud_start = start = (pud_t *)p4d_page_vaddr(addr); |
357 | 377 | ||
358 | for (i = 0; i < PTRS_PER_PUD; i++) { | 378 | for (i = 0; i < PTRS_PER_PUD; i++) { |
359 | st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); | 379 | st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); |
360 | if (!pud_none(*start) && | 380 | if (!pud_none(*start)) { |
361 | !pud_already_checked(prev_pud, start, st->check_wx)) { | ||
362 | if (pud_large(*start) || !pud_present(*start)) { | 381 | if (pud_large(*start) || !pud_present(*start)) { |
363 | prot = pud_flags(*start); | 382 | prot = pud_flags(*start); |
364 | note_page(m, st, __pgprot(prot), 2); | 383 | note_page(m, st, __pgprot(prot), 3); |
365 | } else { | 384 | } else if (!kasan_page_table(m, st, pud_start)) { |
366 | walk_pmd_level(m, st, *start, | 385 | walk_pmd_level(m, st, *start, |
367 | P + i * PUD_LEVEL_MULT); | 386 | P + i * PUD_LEVEL_MULT); |
368 | } | 387 | } |
369 | } else | 388 | } else |
370 | note_page(m, st, __pgprot(0), 2); | 389 | note_page(m, st, __pgprot(0), 3); |
371 | 390 | ||
372 | prev_pud = start; | 391 | prev_pud = start; |
373 | start++; | 392 | start++; |
@@ -385,10 +404,10 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr, | |||
385 | static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr, unsigned long P) | 404 | static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr, unsigned long P) |
386 | { | 405 | { |
387 | int i; | 406 | int i; |
388 | p4d_t *start; | 407 | p4d_t *start, *p4d_start; |
389 | pgprotval_t prot; | 408 | pgprotval_t prot; |
390 | 409 | ||
391 | start = (p4d_t *)pgd_page_vaddr(addr); | 410 | p4d_start = start = (p4d_t *)pgd_page_vaddr(addr); |
392 | 411 | ||
393 | for (i = 0; i < PTRS_PER_P4D; i++) { | 412 | for (i = 0; i < PTRS_PER_P4D; i++) { |
394 | st->current_address = normalize_addr(P + i * P4D_LEVEL_MULT); | 413 | st->current_address = normalize_addr(P + i * P4D_LEVEL_MULT); |
@@ -396,7 +415,7 @@ static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr, | |||
396 | if (p4d_large(*start) || !p4d_present(*start)) { | 415 | if (p4d_large(*start) || !p4d_present(*start)) { |
397 | prot = p4d_flags(*start); | 416 | prot = p4d_flags(*start); |
398 | note_page(m, st, __pgprot(prot), 2); | 417 | note_page(m, st, __pgprot(prot), 2); |
399 | } else { | 418 | } else if (!kasan_page_table(m, st, p4d_start)) { |
400 | walk_pud_level(m, st, *start, | 419 | walk_pud_level(m, st, *start, |
401 | P + i * P4D_LEVEL_MULT); | 420 | P + i * P4D_LEVEL_MULT); |
402 | } | 421 | } |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 2a1fa10c6a98..0cdf14cf3270 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -396,14 +396,18 @@ static void dump_pagetable(unsigned long address) | |||
396 | pte_t *pte; | 396 | pte_t *pte; |
397 | 397 | ||
398 | #ifdef CONFIG_X86_PAE | 398 | #ifdef CONFIG_X86_PAE |
399 | printk("*pdpt = %016Lx ", pgd_val(*pgd)); | 399 | pr_info("*pdpt = %016Lx ", pgd_val(*pgd)); |
400 | if (!low_pfn(pgd_val(*pgd) >> PAGE_SHIFT) || !pgd_present(*pgd)) | 400 | if (!low_pfn(pgd_val(*pgd) >> PAGE_SHIFT) || !pgd_present(*pgd)) |
401 | goto out; | 401 | goto out; |
402 | #define pr_pde pr_cont | ||
403 | #else | ||
404 | #define pr_pde pr_info | ||
402 | #endif | 405 | #endif |
403 | p4d = p4d_offset(pgd, address); | 406 | p4d = p4d_offset(pgd, address); |
404 | pud = pud_offset(p4d, address); | 407 | pud = pud_offset(p4d, address); |
405 | pmd = pmd_offset(pud, address); | 408 | pmd = pmd_offset(pud, address); |
406 | printk(KERN_CONT "*pde = %0*Lx ", sizeof(*pmd) * 2, (u64)pmd_val(*pmd)); | 409 | pr_pde("*pde = %0*Lx ", sizeof(*pmd) * 2, (u64)pmd_val(*pmd)); |
410 | #undef pr_pde | ||
407 | 411 | ||
408 | /* | 412 | /* |
409 | * We must not directly access the pte in the highpte | 413 | * We must not directly access the pte in the highpte |
@@ -415,9 +419,9 @@ static void dump_pagetable(unsigned long address) | |||
415 | goto out; | 419 | goto out; |
416 | 420 | ||
417 | pte = pte_offset_kernel(pmd, address); | 421 | pte = pte_offset_kernel(pmd, address); |
418 | printk("*pte = %0*Lx ", sizeof(*pte) * 2, (u64)pte_val(*pte)); | 422 | pr_cont("*pte = %0*Lx ", sizeof(*pte) * 2, (u64)pte_val(*pte)); |
419 | out: | 423 | out: |
420 | printk("\n"); | 424 | pr_cont("\n"); |
421 | } | 425 | } |
422 | 426 | ||
423 | #else /* CONFIG_X86_64: */ | 427 | #else /* CONFIG_X86_64: */ |
@@ -565,7 +569,7 @@ static void dump_pagetable(unsigned long address) | |||
565 | if (bad_address(pgd)) | 569 | if (bad_address(pgd)) |
566 | goto bad; | 570 | goto bad; |
567 | 571 | ||
568 | printk("PGD %lx ", pgd_val(*pgd)); | 572 | pr_info("PGD %lx ", pgd_val(*pgd)); |
569 | 573 | ||
570 | if (!pgd_present(*pgd)) | 574 | if (!pgd_present(*pgd)) |
571 | goto out; | 575 | goto out; |
@@ -574,7 +578,7 @@ static void dump_pagetable(unsigned long address) | |||
574 | if (bad_address(p4d)) | 578 | if (bad_address(p4d)) |
575 | goto bad; | 579 | goto bad; |
576 | 580 | ||
577 | printk("P4D %lx ", p4d_val(*p4d)); | 581 | pr_cont("P4D %lx ", p4d_val(*p4d)); |
578 | if (!p4d_present(*p4d) || p4d_large(*p4d)) | 582 | if (!p4d_present(*p4d) || p4d_large(*p4d)) |
579 | goto out; | 583 | goto out; |
580 | 584 | ||
@@ -582,7 +586,7 @@ static void dump_pagetable(unsigned long address) | |||
582 | if (bad_address(pud)) | 586 | if (bad_address(pud)) |
583 | goto bad; | 587 | goto bad; |
584 | 588 | ||
585 | printk("PUD %lx ", pud_val(*pud)); | 589 | pr_cont("PUD %lx ", pud_val(*pud)); |
586 | if (!pud_present(*pud) || pud_large(*pud)) | 590 | if (!pud_present(*pud) || pud_large(*pud)) |
587 | goto out; | 591 | goto out; |
588 | 592 | ||
@@ -590,7 +594,7 @@ static void dump_pagetable(unsigned long address) | |||
590 | if (bad_address(pmd)) | 594 | if (bad_address(pmd)) |
591 | goto bad; | 595 | goto bad; |
592 | 596 | ||
593 | printk("PMD %lx ", pmd_val(*pmd)); | 597 | pr_cont("PMD %lx ", pmd_val(*pmd)); |
594 | if (!pmd_present(*pmd) || pmd_large(*pmd)) | 598 | if (!pmd_present(*pmd) || pmd_large(*pmd)) |
595 | goto out; | 599 | goto out; |
596 | 600 | ||
@@ -598,12 +602,12 @@ static void dump_pagetable(unsigned long address) | |||
598 | if (bad_address(pte)) | 602 | if (bad_address(pte)) |
599 | goto bad; | 603 | goto bad; |
600 | 604 | ||
601 | printk("PTE %lx", pte_val(*pte)); | 605 | pr_cont("PTE %lx", pte_val(*pte)); |
602 | out: | 606 | out: |
603 | printk("\n"); | 607 | pr_cont("\n"); |
604 | return; | 608 | return; |
605 | bad: | 609 | bad: |
606 | printk("BAD\n"); | 610 | pr_info("BAD\n"); |
607 | } | 611 | } |
608 | 612 | ||
609 | #endif /* CONFIG_X86_64 */ | 613 | #endif /* CONFIG_X86_64 */ |
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 2824607df108..6d06cf33e3de 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <asm/tlbflush.h> | 18 | #include <asm/tlbflush.h> |
19 | #include <asm/pgalloc.h> | 19 | #include <asm/pgalloc.h> |
20 | #include <asm/elf.h> | 20 | #include <asm/elf.h> |
21 | #include <asm/mpx.h> | ||
21 | 22 | ||
22 | #if 0 /* This is just for testing */ | 23 | #if 0 /* This is just for testing */ |
23 | struct page * | 24 | struct page * |
@@ -85,25 +86,38 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, | |||
85 | info.flags = 0; | 86 | info.flags = 0; |
86 | info.length = len; | 87 | info.length = len; |
87 | info.low_limit = get_mmap_base(1); | 88 | info.low_limit = get_mmap_base(1); |
89 | |||
90 | /* | ||
91 | * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area | ||
92 | * in the full address space. | ||
93 | */ | ||
88 | info.high_limit = in_compat_syscall() ? | 94 | info.high_limit = in_compat_syscall() ? |
89 | tasksize_32bit() : tasksize_64bit(); | 95 | task_size_32bit() : task_size_64bit(addr > DEFAULT_MAP_WINDOW); |
96 | |||
90 | info.align_mask = PAGE_MASK & ~huge_page_mask(h); | 97 | info.align_mask = PAGE_MASK & ~huge_page_mask(h); |
91 | info.align_offset = 0; | 98 | info.align_offset = 0; |
92 | return vm_unmapped_area(&info); | 99 | return vm_unmapped_area(&info); |
93 | } | 100 | } |
94 | 101 | ||
95 | static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, | 102 | static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, |
96 | unsigned long addr0, unsigned long len, | 103 | unsigned long addr, unsigned long len, |
97 | unsigned long pgoff, unsigned long flags) | 104 | unsigned long pgoff, unsigned long flags) |
98 | { | 105 | { |
99 | struct hstate *h = hstate_file(file); | 106 | struct hstate *h = hstate_file(file); |
100 | struct vm_unmapped_area_info info; | 107 | struct vm_unmapped_area_info info; |
101 | unsigned long addr; | ||
102 | 108 | ||
103 | info.flags = VM_UNMAPPED_AREA_TOPDOWN; | 109 | info.flags = VM_UNMAPPED_AREA_TOPDOWN; |
104 | info.length = len; | 110 | info.length = len; |
105 | info.low_limit = PAGE_SIZE; | 111 | info.low_limit = PAGE_SIZE; |
106 | info.high_limit = get_mmap_base(0); | 112 | info.high_limit = get_mmap_base(0); |
113 | |||
114 | /* | ||
115 | * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area | ||
116 | * in the full address space. | ||
117 | */ | ||
118 | if (addr > DEFAULT_MAP_WINDOW && !in_compat_syscall()) | ||
119 | info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW; | ||
120 | |||
107 | info.align_mask = PAGE_MASK & ~huge_page_mask(h); | 121 | info.align_mask = PAGE_MASK & ~huge_page_mask(h); |
108 | info.align_offset = 0; | 122 | info.align_offset = 0; |
109 | addr = vm_unmapped_area(&info); | 123 | addr = vm_unmapped_area(&info); |
@@ -118,7 +132,7 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, | |||
118 | VM_BUG_ON(addr != -ENOMEM); | 132 | VM_BUG_ON(addr != -ENOMEM); |
119 | info.flags = 0; | 133 | info.flags = 0; |
120 | info.low_limit = TASK_UNMAPPED_BASE; | 134 | info.low_limit = TASK_UNMAPPED_BASE; |
121 | info.high_limit = TASK_SIZE; | 135 | info.high_limit = TASK_SIZE_LOW; |
122 | addr = vm_unmapped_area(&info); | 136 | addr = vm_unmapped_area(&info); |
123 | } | 137 | } |
124 | 138 | ||
@@ -135,6 +149,11 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | |||
135 | 149 | ||
136 | if (len & ~huge_page_mask(h)) | 150 | if (len & ~huge_page_mask(h)) |
137 | return -EINVAL; | 151 | return -EINVAL; |
152 | |||
153 | addr = mpx_unmapped_area_check(addr, len, flags); | ||
154 | if (IS_ERR_VALUE(addr)) | ||
155 | return addr; | ||
156 | |||
138 | if (len > TASK_SIZE) | 157 | if (len > TASK_SIZE) |
139 | return -ENOMEM; | 158 | return -ENOMEM; |
140 | 159 | ||
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c index adab1595f4bd..31cea988fa36 100644 --- a/arch/x86/mm/ident_map.c +++ b/arch/x86/mm/ident_map.c | |||
@@ -51,7 +51,7 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page, | |||
51 | if (!pmd) | 51 | if (!pmd) |
52 | return -ENOMEM; | 52 | return -ENOMEM; |
53 | ident_pmd_init(info, pmd, addr, next); | 53 | ident_pmd_init(info, pmd, addr, next); |
54 | set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); | 54 | set_pud(pud, __pud(__pa(pmd) | info->kernpg_flag)); |
55 | } | 55 | } |
56 | 56 | ||
57 | return 0; | 57 | return 0; |
@@ -79,7 +79,7 @@ static int ident_p4d_init(struct x86_mapping_info *info, p4d_t *p4d_page, | |||
79 | if (!pud) | 79 | if (!pud) |
80 | return -ENOMEM; | 80 | return -ENOMEM; |
81 | ident_pud_init(info, pud, addr, next); | 81 | ident_pud_init(info, pud, addr, next); |
82 | set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE)); | 82 | set_p4d(p4d, __p4d(__pa(pud) | info->kernpg_flag)); |
83 | } | 83 | } |
84 | 84 | ||
85 | return 0; | 85 | return 0; |
@@ -93,6 +93,10 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page, | |||
93 | unsigned long next; | 93 | unsigned long next; |
94 | int result; | 94 | int result; |
95 | 95 | ||
96 | /* Set the default pagetable flags if not supplied */ | ||
97 | if (!info->kernpg_flag) | ||
98 | info->kernpg_flag = _KERNPG_TABLE; | ||
99 | |||
96 | for (; addr < end; addr = next) { | 100 | for (; addr < end; addr = next) { |
97 | pgd_t *pgd = pgd_page + pgd_index(addr); | 101 | pgd_t *pgd = pgd_page + pgd_index(addr); |
98 | p4d_t *p4d; | 102 | p4d_t *p4d; |
@@ -116,14 +120,14 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page, | |||
116 | if (result) | 120 | if (result) |
117 | return result; | 121 | return result; |
118 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) { | 122 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) { |
119 | set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE)); | 123 | set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag)); |
120 | } else { | 124 | } else { |
121 | /* | 125 | /* |
122 | * With p4d folded, pgd is equal to p4d. | 126 | * With p4d folded, pgd is equal to p4d. |
123 | * The pgd entry has to point to the pud page table in this case. | 127 | * The pgd entry has to point to the pud page table in this case. |
124 | */ | 128 | */ |
125 | pud_t *pud = pud_offset(p4d, 0); | 129 | pud_t *pud = pud_offset(p4d, 0); |
126 | set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE)); | 130 | set_pgd(pgd, __pgd(__pa(pud) | info->kernpg_flag)); |
127 | } | 131 | } |
128 | } | 132 | } |
129 | 133 | ||
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index bf3f1065d6ad..7777ccc0e9f9 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -815,7 +815,7 @@ void __init zone_sizes_init(void) | |||
815 | 815 | ||
816 | DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { | 816 | DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { |
817 | .loaded_mm = &init_mm, | 817 | .loaded_mm = &init_mm, |
818 | .state = 0, | 818 | .next_asid = 1, |
819 | .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */ | 819 | .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */ |
820 | }; | 820 | }; |
821 | EXPORT_SYMBOL_GPL(cpu_tlbstate); | 821 | EXPORT_SYMBOL_GPL(cpu_tlbstate); |
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 4c1b5fd0c7ad..34f0e1847dd6 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -13,6 +13,8 @@ | |||
13 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
14 | #include <linux/vmalloc.h> | 14 | #include <linux/vmalloc.h> |
15 | #include <linux/mmiotrace.h> | 15 | #include <linux/mmiotrace.h> |
16 | #include <linux/mem_encrypt.h> | ||
17 | #include <linux/efi.h> | ||
16 | 18 | ||
17 | #include <asm/set_memory.h> | 19 | #include <asm/set_memory.h> |
18 | #include <asm/e820/api.h> | 20 | #include <asm/e820/api.h> |
@@ -21,6 +23,7 @@ | |||
21 | #include <asm/tlbflush.h> | 23 | #include <asm/tlbflush.h> |
22 | #include <asm/pgalloc.h> | 24 | #include <asm/pgalloc.h> |
23 | #include <asm/pat.h> | 25 | #include <asm/pat.h> |
26 | #include <asm/setup.h> | ||
24 | 27 | ||
25 | #include "physaddr.h" | 28 | #include "physaddr.h" |
26 | 29 | ||
@@ -106,12 +109,6 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, | |||
106 | } | 109 | } |
107 | 110 | ||
108 | /* | 111 | /* |
109 | * Don't remap the low PCI/ISA area, it's always mapped.. | ||
110 | */ | ||
111 | if (is_ISA_range(phys_addr, last_addr)) | ||
112 | return (__force void __iomem *)phys_to_virt(phys_addr); | ||
113 | |||
114 | /* | ||
115 | * Don't allow anybody to remap normal RAM that we're using.. | 112 | * Don't allow anybody to remap normal RAM that we're using.. |
116 | */ | 113 | */ |
117 | pfn = phys_addr >> PAGE_SHIFT; | 114 | pfn = phys_addr >> PAGE_SHIFT; |
@@ -340,13 +337,17 @@ void iounmap(volatile void __iomem *addr) | |||
340 | return; | 337 | return; |
341 | 338 | ||
342 | /* | 339 | /* |
343 | * __ioremap special-cases the PCI/ISA range by not instantiating a | 340 | * The PCI/ISA range special-casing was removed from __ioremap() |
344 | * vm_area and by simply returning an address into the kernel mapping | 341 | * so this check, in theory, can be removed. However, there are |
345 | * of ISA space. So handle that here. | 342 | * cases where iounmap() is called for addresses not obtained via |
343 | * ioremap() (vga16fb for example). Add a warning so that these | ||
344 | * cases can be caught and fixed. | ||
346 | */ | 345 | */ |
347 | if ((void __force *)addr >= phys_to_virt(ISA_START_ADDRESS) && | 346 | if ((void __force *)addr >= phys_to_virt(ISA_START_ADDRESS) && |
348 | (void __force *)addr < phys_to_virt(ISA_END_ADDRESS)) | 347 | (void __force *)addr < phys_to_virt(ISA_END_ADDRESS)) { |
348 | WARN(1, "iounmap() called for ISA range not obtained using ioremap()\n"); | ||
349 | return; | 349 | return; |
350 | } | ||
350 | 351 | ||
351 | addr = (volatile void __iomem *) | 352 | addr = (volatile void __iomem *) |
352 | (PAGE_MASK & (unsigned long __force)addr); | 353 | (PAGE_MASK & (unsigned long __force)addr); |
@@ -399,12 +400,10 @@ void *xlate_dev_mem_ptr(phys_addr_t phys) | |||
399 | unsigned long offset = phys & ~PAGE_MASK; | 400 | unsigned long offset = phys & ~PAGE_MASK; |
400 | void *vaddr; | 401 | void *vaddr; |
401 | 402 | ||
402 | /* If page is RAM, we can use __va. Otherwise ioremap and unmap. */ | 403 | /* memremap() maps if RAM, otherwise falls back to ioremap() */ |
403 | if (page_is_ram(start >> PAGE_SHIFT)) | 404 | vaddr = memremap(start, PAGE_SIZE, MEMREMAP_WB); |
404 | return __va(phys); | ||
405 | 405 | ||
406 | vaddr = ioremap_cache(start, PAGE_SIZE); | 406 | /* Only add the offset on success and return NULL if memremap() failed */ |
407 | /* Only add the offset on success and return NULL if the ioremap() failed: */ | ||
408 | if (vaddr) | 407 | if (vaddr) |
409 | vaddr += offset; | 408 | vaddr += offset; |
410 | 409 | ||
@@ -413,11 +412,263 @@ void *xlate_dev_mem_ptr(phys_addr_t phys) | |||
413 | 412 | ||
414 | void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr) | 413 | void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr) |
415 | { | 414 | { |
416 | if (page_is_ram(phys >> PAGE_SHIFT)) | 415 | memunmap((void *)((unsigned long)addr & PAGE_MASK)); |
417 | return; | 416 | } |
417 | |||
418 | /* | ||
419 | * Examine the physical address to determine if it is an area of memory | ||
420 | * that should be mapped decrypted. If the memory is not part of the | ||
421 | * kernel usable area it was accessed and created decrypted, so these | ||
422 | * areas should be mapped decrypted. And since the encryption key can | ||
423 | * change across reboots, persistent memory should also be mapped | ||
424 | * decrypted. | ||
425 | */ | ||
426 | static bool memremap_should_map_decrypted(resource_size_t phys_addr, | ||
427 | unsigned long size) | ||
428 | { | ||
429 | int is_pmem; | ||
430 | |||
431 | /* | ||
432 | * Check if the address is part of a persistent memory region. | ||
433 | * This check covers areas added by E820, EFI and ACPI. | ||
434 | */ | ||
435 | is_pmem = region_intersects(phys_addr, size, IORESOURCE_MEM, | ||
436 | IORES_DESC_PERSISTENT_MEMORY); | ||
437 | if (is_pmem != REGION_DISJOINT) | ||
438 | return true; | ||
439 | |||
440 | /* | ||
441 | * Check if the non-volatile attribute is set for an EFI | ||
442 | * reserved area. | ||
443 | */ | ||
444 | if (efi_enabled(EFI_BOOT)) { | ||
445 | switch (efi_mem_type(phys_addr)) { | ||
446 | case EFI_RESERVED_TYPE: | ||
447 | if (efi_mem_attributes(phys_addr) & EFI_MEMORY_NV) | ||
448 | return true; | ||
449 | break; | ||
450 | default: | ||
451 | break; | ||
452 | } | ||
453 | } | ||
454 | |||
455 | /* Check if the address is outside kernel usable area */ | ||
456 | switch (e820__get_entry_type(phys_addr, phys_addr + size - 1)) { | ||
457 | case E820_TYPE_RESERVED: | ||
458 | case E820_TYPE_ACPI: | ||
459 | case E820_TYPE_NVS: | ||
460 | case E820_TYPE_UNUSABLE: | ||
461 | case E820_TYPE_PRAM: | ||
462 | return true; | ||
463 | default: | ||
464 | break; | ||
465 | } | ||
466 | |||
467 | return false; | ||
468 | } | ||
469 | |||
470 | /* | ||
471 | * Examine the physical address to determine if it is EFI data. Check | ||
472 | * it against the boot params structure and EFI tables and memory types. | ||
473 | */ | ||
474 | static bool memremap_is_efi_data(resource_size_t phys_addr, | ||
475 | unsigned long size) | ||
476 | { | ||
477 | u64 paddr; | ||
478 | |||
479 | /* Check if the address is part of EFI boot/runtime data */ | ||
480 | if (!efi_enabled(EFI_BOOT)) | ||
481 | return false; | ||
482 | |||
483 | paddr = boot_params.efi_info.efi_memmap_hi; | ||
484 | paddr <<= 32; | ||
485 | paddr |= boot_params.efi_info.efi_memmap; | ||
486 | if (phys_addr == paddr) | ||
487 | return true; | ||
488 | |||
489 | paddr = boot_params.efi_info.efi_systab_hi; | ||
490 | paddr <<= 32; | ||
491 | paddr |= boot_params.efi_info.efi_systab; | ||
492 | if (phys_addr == paddr) | ||
493 | return true; | ||
494 | |||
495 | if (efi_is_table_address(phys_addr)) | ||
496 | return true; | ||
497 | |||
498 | switch (efi_mem_type(phys_addr)) { | ||
499 | case EFI_BOOT_SERVICES_DATA: | ||
500 | case EFI_RUNTIME_SERVICES_DATA: | ||
501 | return true; | ||
502 | default: | ||
503 | break; | ||
504 | } | ||
505 | |||
506 | return false; | ||
507 | } | ||
508 | |||
509 | /* | ||
510 | * Examine the physical address to determine if it is boot data by checking | ||
511 | * it against the boot params setup_data chain. | ||
512 | */ | ||
513 | static bool memremap_is_setup_data(resource_size_t phys_addr, | ||
514 | unsigned long size) | ||
515 | { | ||
516 | struct setup_data *data; | ||
517 | u64 paddr, paddr_next; | ||
518 | |||
519 | paddr = boot_params.hdr.setup_data; | ||
520 | while (paddr) { | ||
521 | unsigned int len; | ||
522 | |||
523 | if (phys_addr == paddr) | ||
524 | return true; | ||
525 | |||
526 | data = memremap(paddr, sizeof(*data), | ||
527 | MEMREMAP_WB | MEMREMAP_DEC); | ||
528 | |||
529 | paddr_next = data->next; | ||
530 | len = data->len; | ||
531 | |||
532 | memunmap(data); | ||
533 | |||
534 | if ((phys_addr > paddr) && (phys_addr < (paddr + len))) | ||
535 | return true; | ||
536 | |||
537 | paddr = paddr_next; | ||
538 | } | ||
539 | |||
540 | return false; | ||
541 | } | ||
542 | |||
543 | /* | ||
544 | * Examine the physical address to determine if it is boot data by checking | ||
545 | * it against the boot params setup_data chain (early boot version). | ||
546 | */ | ||
547 | static bool __init early_memremap_is_setup_data(resource_size_t phys_addr, | ||
548 | unsigned long size) | ||
549 | { | ||
550 | struct setup_data *data; | ||
551 | u64 paddr, paddr_next; | ||
552 | |||
553 | paddr = boot_params.hdr.setup_data; | ||
554 | while (paddr) { | ||
555 | unsigned int len; | ||
556 | |||
557 | if (phys_addr == paddr) | ||
558 | return true; | ||
559 | |||
560 | data = early_memremap_decrypted(paddr, sizeof(*data)); | ||
561 | |||
562 | paddr_next = data->next; | ||
563 | len = data->len; | ||
564 | |||
565 | early_memunmap(data, sizeof(*data)); | ||
566 | |||
567 | if ((phys_addr > paddr) && (phys_addr < (paddr + len))) | ||
568 | return true; | ||
569 | |||
570 | paddr = paddr_next; | ||
571 | } | ||
572 | |||
573 | return false; | ||
574 | } | ||
575 | |||
576 | /* | ||
577 | * Architecture function to determine if RAM remap is allowed. By default, a | ||
578 | * RAM remap will map the data as encrypted. Determine if a RAM remap should | ||
579 | * not be done so that the data will be mapped decrypted. | ||
580 | */ | ||
581 | bool arch_memremap_can_ram_remap(resource_size_t phys_addr, unsigned long size, | ||
582 | unsigned long flags) | ||
583 | { | ||
584 | if (!sme_active()) | ||
585 | return true; | ||
586 | |||
587 | if (flags & MEMREMAP_ENC) | ||
588 | return true; | ||
589 | |||
590 | if (flags & MEMREMAP_DEC) | ||
591 | return false; | ||
592 | |||
593 | if (memremap_is_setup_data(phys_addr, size) || | ||
594 | memremap_is_efi_data(phys_addr, size) || | ||
595 | memremap_should_map_decrypted(phys_addr, size)) | ||
596 | return false; | ||
597 | |||
598 | return true; | ||
599 | } | ||
600 | |||
601 | /* | ||
602 | * Architecture override of __weak function to adjust the protection attributes | ||
603 | * used when remapping memory. By default, early_memremap() will map the data | ||
604 | * as encrypted. Determine if an encrypted mapping should not be done and set | ||
605 | * the appropriate protection attributes. | ||
606 | */ | ||
607 | pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr, | ||
608 | unsigned long size, | ||
609 | pgprot_t prot) | ||
610 | { | ||
611 | if (!sme_active()) | ||
612 | return prot; | ||
613 | |||
614 | if (early_memremap_is_setup_data(phys_addr, size) || | ||
615 | memremap_is_efi_data(phys_addr, size) || | ||
616 | memremap_should_map_decrypted(phys_addr, size)) | ||
617 | prot = pgprot_decrypted(prot); | ||
618 | else | ||
619 | prot = pgprot_encrypted(prot); | ||
620 | |||
621 | return prot; | ||
622 | } | ||
623 | |||
624 | bool phys_mem_access_encrypted(unsigned long phys_addr, unsigned long size) | ||
625 | { | ||
626 | return arch_memremap_can_ram_remap(phys_addr, size, 0); | ||
627 | } | ||
628 | |||
629 | #ifdef CONFIG_ARCH_USE_MEMREMAP_PROT | ||
630 | /* Remap memory with encryption */ | ||
631 | void __init *early_memremap_encrypted(resource_size_t phys_addr, | ||
632 | unsigned long size) | ||
633 | { | ||
634 | return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC); | ||
635 | } | ||
636 | |||
637 | /* | ||
638 | * Remap memory with encryption and write-protected - cannot be called | ||
639 | * before pat_init() is called | ||
640 | */ | ||
641 | void __init *early_memremap_encrypted_wp(resource_size_t phys_addr, | ||
642 | unsigned long size) | ||
643 | { | ||
644 | /* Be sure the write-protect PAT entry is set for write-protect */ | ||
645 | if (__pte2cachemode_tbl[_PAGE_CACHE_MODE_WP] != _PAGE_CACHE_MODE_WP) | ||
646 | return NULL; | ||
647 | |||
648 | return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC_WP); | ||
649 | } | ||
650 | |||
651 | /* Remap memory without encryption */ | ||
652 | void __init *early_memremap_decrypted(resource_size_t phys_addr, | ||
653 | unsigned long size) | ||
654 | { | ||
655 | return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC); | ||
656 | } | ||
657 | |||
658 | /* | ||
659 | * Remap memory without encryption and write-protected - cannot be called | ||
660 | * before pat_init() is called | ||
661 | */ | ||
662 | void __init *early_memremap_decrypted_wp(resource_size_t phys_addr, | ||
663 | unsigned long size) | ||
664 | { | ||
665 | /* Be sure the write-protect PAT entry is set for write-protect */ | ||
666 | if (__pte2cachemode_tbl[_PAGE_CACHE_MODE_WP] != _PAGE_CACHE_MODE_WP) | ||
667 | return NULL; | ||
418 | 668 | ||
419 | iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK)); | 669 | return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC_WP); |
420 | } | 670 | } |
671 | #endif /* CONFIG_ARCH_USE_MEMREMAP_PROT */ | ||
421 | 672 | ||
422 | static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss; | 673 | static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss; |
423 | 674 | ||
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 02c9d7553409..bc84b73684b7 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c | |||
@@ -11,8 +11,8 @@ | |||
11 | #include <asm/e820/types.h> | 11 | #include <asm/e820/types.h> |
12 | #include <asm/tlbflush.h> | 12 | #include <asm/tlbflush.h> |
13 | #include <asm/sections.h> | 13 | #include <asm/sections.h> |
14 | #include <asm/pgtable.h> | ||
14 | 15 | ||
15 | extern pgd_t early_top_pgt[PTRS_PER_PGD]; | ||
16 | extern struct range pfn_mapped[E820_MAX_ENTRIES]; | 16 | extern struct range pfn_mapped[E820_MAX_ENTRIES]; |
17 | 17 | ||
18 | static int __init map_range(struct range *range) | 18 | static int __init map_range(struct range *range) |
@@ -87,7 +87,7 @@ static struct notifier_block kasan_die_notifier = { | |||
87 | void __init kasan_early_init(void) | 87 | void __init kasan_early_init(void) |
88 | { | 88 | { |
89 | int i; | 89 | int i; |
90 | pteval_t pte_val = __pa_nodebug(kasan_zero_page) | __PAGE_KERNEL; | 90 | pteval_t pte_val = __pa_nodebug(kasan_zero_page) | __PAGE_KERNEL | _PAGE_ENC; |
91 | pmdval_t pmd_val = __pa_nodebug(kasan_zero_pte) | _KERNPG_TABLE; | 91 | pmdval_t pmd_val = __pa_nodebug(kasan_zero_pte) | _KERNPG_TABLE; |
92 | pudval_t pud_val = __pa_nodebug(kasan_zero_pmd) | _KERNPG_TABLE; | 92 | pudval_t pud_val = __pa_nodebug(kasan_zero_pmd) | _KERNPG_TABLE; |
93 | p4dval_t p4d_val = __pa_nodebug(kasan_zero_pud) | _KERNPG_TABLE; | 93 | p4dval_t p4d_val = __pa_nodebug(kasan_zero_pud) | _KERNPG_TABLE; |
@@ -153,7 +153,7 @@ void __init kasan_init(void) | |||
153 | */ | 153 | */ |
154 | memset(kasan_zero_page, 0, PAGE_SIZE); | 154 | memset(kasan_zero_page, 0, PAGE_SIZE); |
155 | for (i = 0; i < PTRS_PER_PTE; i++) { | 155 | for (i = 0; i < PTRS_PER_PTE; i++) { |
156 | pte_t pte = __pte(__pa(kasan_zero_page) | __PAGE_KERNEL_RO); | 156 | pte_t pte = __pte(__pa(kasan_zero_page) | __PAGE_KERNEL_RO | _PAGE_ENC); |
157 | set_pte(&kasan_zero_pte[i], pte); | 157 | set_pte(&kasan_zero_pte[i], pte); |
158 | } | 158 | } |
159 | /* Flush TLBs again to be sure that write protection applied. */ | 159 | /* Flush TLBs again to be sure that write protection applied. */ |
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c new file mode 100644 index 000000000000..0fbd09269757 --- /dev/null +++ b/arch/x86/mm/mem_encrypt.c | |||
@@ -0,0 +1,593 @@ | |||
1 | /* | ||
2 | * AMD Memory Encryption Support | ||
3 | * | ||
4 | * Copyright (C) 2016 Advanced Micro Devices, Inc. | ||
5 | * | ||
6 | * Author: Tom Lendacky <thomas.lendacky@amd.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License version 2 as | ||
10 | * published by the Free Software Foundation. | ||
11 | */ | ||
12 | |||
13 | #include <linux/linkage.h> | ||
14 | #include <linux/init.h> | ||
15 | #include <linux/mm.h> | ||
16 | #include <linux/dma-mapping.h> | ||
17 | #include <linux/swiotlb.h> | ||
18 | #include <linux/mem_encrypt.h> | ||
19 | |||
20 | #include <asm/tlbflush.h> | ||
21 | #include <asm/fixmap.h> | ||
22 | #include <asm/setup.h> | ||
23 | #include <asm/bootparam.h> | ||
24 | #include <asm/set_memory.h> | ||
25 | #include <asm/cacheflush.h> | ||
26 | #include <asm/sections.h> | ||
27 | #include <asm/processor-flags.h> | ||
28 | #include <asm/msr.h> | ||
29 | #include <asm/cmdline.h> | ||
30 | |||
31 | static char sme_cmdline_arg[] __initdata = "mem_encrypt"; | ||
32 | static char sme_cmdline_on[] __initdata = "on"; | ||
33 | static char sme_cmdline_off[] __initdata = "off"; | ||
34 | |||
35 | /* | ||
36 | * Since SME related variables are set early in the boot process they must | ||
37 | * reside in the .data section so as not to be zeroed out when the .bss | ||
38 | * section is later cleared. | ||
39 | */ | ||
40 | unsigned long sme_me_mask __section(.data) = 0; | ||
41 | EXPORT_SYMBOL_GPL(sme_me_mask); | ||
42 | |||
43 | /* Buffer used for early in-place encryption by BSP, no locking needed */ | ||
44 | static char sme_early_buffer[PAGE_SIZE] __aligned(PAGE_SIZE); | ||
45 | |||
46 | /* | ||
47 | * This routine does not change the underlying encryption setting of the | ||
48 | * page(s) that map this memory. It assumes that eventually the memory is | ||
49 | * meant to be accessed as either encrypted or decrypted but the contents | ||
50 | * are currently not in the desired state. | ||
51 | * | ||
52 | * This routine follows the steps outlined in the AMD64 Architecture | ||
53 | * Programmer's Manual Volume 2, Section 7.10.8 Encrypt-in-Place. | ||
54 | */ | ||
55 | static void __init __sme_early_enc_dec(resource_size_t paddr, | ||
56 | unsigned long size, bool enc) | ||
57 | { | ||
58 | void *src, *dst; | ||
59 | size_t len; | ||
60 | |||
61 | if (!sme_me_mask) | ||
62 | return; | ||
63 | |||
64 | local_flush_tlb(); | ||
65 | wbinvd(); | ||
66 | |||
67 | /* | ||
68 | * There are limited number of early mapping slots, so map (at most) | ||
69 | * one page at time. | ||
70 | */ | ||
71 | while (size) { | ||
72 | len = min_t(size_t, sizeof(sme_early_buffer), size); | ||
73 | |||
74 | /* | ||
75 | * Create mappings for the current and desired format of | ||
76 | * the memory. Use a write-protected mapping for the source. | ||
77 | */ | ||
78 | src = enc ? early_memremap_decrypted_wp(paddr, len) : | ||
79 | early_memremap_encrypted_wp(paddr, len); | ||
80 | |||
81 | dst = enc ? early_memremap_encrypted(paddr, len) : | ||
82 | early_memremap_decrypted(paddr, len); | ||
83 | |||
84 | /* | ||
85 | * If a mapping can't be obtained to perform the operation, | ||
86 | * then eventual access of that area in the desired mode | ||
87 | * will cause a crash. | ||
88 | */ | ||
89 | BUG_ON(!src || !dst); | ||
90 | |||
91 | /* | ||
92 | * Use a temporary buffer, of cache-line multiple size, to | ||
93 | * avoid data corruption as documented in the APM. | ||
94 | */ | ||
95 | memcpy(sme_early_buffer, src, len); | ||
96 | memcpy(dst, sme_early_buffer, len); | ||
97 | |||
98 | early_memunmap(dst, len); | ||
99 | early_memunmap(src, len); | ||
100 | |||
101 | paddr += len; | ||
102 | size -= len; | ||
103 | } | ||
104 | } | ||
105 | |||
106 | void __init sme_early_encrypt(resource_size_t paddr, unsigned long size) | ||
107 | { | ||
108 | __sme_early_enc_dec(paddr, size, true); | ||
109 | } | ||
110 | |||
111 | void __init sme_early_decrypt(resource_size_t paddr, unsigned long size) | ||
112 | { | ||
113 | __sme_early_enc_dec(paddr, size, false); | ||
114 | } | ||
115 | |||
116 | static void __init __sme_early_map_unmap_mem(void *vaddr, unsigned long size, | ||
117 | bool map) | ||
118 | { | ||
119 | unsigned long paddr = (unsigned long)vaddr - __PAGE_OFFSET; | ||
120 | pmdval_t pmd_flags, pmd; | ||
121 | |||
122 | /* Use early_pmd_flags but remove the encryption mask */ | ||
123 | pmd_flags = __sme_clr(early_pmd_flags); | ||
124 | |||
125 | do { | ||
126 | pmd = map ? (paddr & PMD_MASK) + pmd_flags : 0; | ||
127 | __early_make_pgtable((unsigned long)vaddr, pmd); | ||
128 | |||
129 | vaddr += PMD_SIZE; | ||
130 | paddr += PMD_SIZE; | ||
131 | size = (size <= PMD_SIZE) ? 0 : size - PMD_SIZE; | ||
132 | } while (size); | ||
133 | |||
134 | __native_flush_tlb(); | ||
135 | } | ||
136 | |||
137 | void __init sme_unmap_bootdata(char *real_mode_data) | ||
138 | { | ||
139 | struct boot_params *boot_data; | ||
140 | unsigned long cmdline_paddr; | ||
141 | |||
142 | if (!sme_active()) | ||
143 | return; | ||
144 | |||
145 | /* Get the command line address before unmapping the real_mode_data */ | ||
146 | boot_data = (struct boot_params *)real_mode_data; | ||
147 | cmdline_paddr = boot_data->hdr.cmd_line_ptr | ((u64)boot_data->ext_cmd_line_ptr << 32); | ||
148 | |||
149 | __sme_early_map_unmap_mem(real_mode_data, sizeof(boot_params), false); | ||
150 | |||
151 | if (!cmdline_paddr) | ||
152 | return; | ||
153 | |||
154 | __sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, false); | ||
155 | } | ||
156 | |||
157 | void __init sme_map_bootdata(char *real_mode_data) | ||
158 | { | ||
159 | struct boot_params *boot_data; | ||
160 | unsigned long cmdline_paddr; | ||
161 | |||
162 | if (!sme_active()) | ||
163 | return; | ||
164 | |||
165 | __sme_early_map_unmap_mem(real_mode_data, sizeof(boot_params), true); | ||
166 | |||
167 | /* Get the command line address after mapping the real_mode_data */ | ||
168 | boot_data = (struct boot_params *)real_mode_data; | ||
169 | cmdline_paddr = boot_data->hdr.cmd_line_ptr | ((u64)boot_data->ext_cmd_line_ptr << 32); | ||
170 | |||
171 | if (!cmdline_paddr) | ||
172 | return; | ||
173 | |||
174 | __sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, true); | ||
175 | } | ||
176 | |||
177 | void __init sme_early_init(void) | ||
178 | { | ||
179 | unsigned int i; | ||
180 | |||
181 | if (!sme_me_mask) | ||
182 | return; | ||
183 | |||
184 | early_pmd_flags = __sme_set(early_pmd_flags); | ||
185 | |||
186 | __supported_pte_mask = __sme_set(__supported_pte_mask); | ||
187 | |||
188 | /* Update the protection map with memory encryption mask */ | ||
189 | for (i = 0; i < ARRAY_SIZE(protection_map); i++) | ||
190 | protection_map[i] = pgprot_encrypted(protection_map[i]); | ||
191 | } | ||
192 | |||
193 | /* Architecture __weak replacement functions */ | ||
194 | void __init mem_encrypt_init(void) | ||
195 | { | ||
196 | if (!sme_me_mask) | ||
197 | return; | ||
198 | |||
199 | /* Call into SWIOTLB to update the SWIOTLB DMA buffers */ | ||
200 | swiotlb_update_mem_attributes(); | ||
201 | |||
202 | pr_info("AMD Secure Memory Encryption (SME) active\n"); | ||
203 | } | ||
204 | |||
205 | void swiotlb_set_mem_attributes(void *vaddr, unsigned long size) | ||
206 | { | ||
207 | WARN(PAGE_ALIGN(size) != size, | ||
208 | "size is not page-aligned (%#lx)\n", size); | ||
209 | |||
210 | /* Make the SWIOTLB buffer area decrypted */ | ||
211 | set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT); | ||
212 | } | ||
213 | |||
214 | static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start, | ||
215 | unsigned long end) | ||
216 | { | ||
217 | unsigned long pgd_start, pgd_end, pgd_size; | ||
218 | pgd_t *pgd_p; | ||
219 | |||
220 | pgd_start = start & PGDIR_MASK; | ||
221 | pgd_end = end & PGDIR_MASK; | ||
222 | |||
223 | pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1); | ||
224 | pgd_size *= sizeof(pgd_t); | ||
225 | |||
226 | pgd_p = pgd_base + pgd_index(start); | ||
227 | |||
228 | memset(pgd_p, 0, pgd_size); | ||
229 | } | ||
230 | |||
231 | #define PGD_FLAGS _KERNPG_TABLE_NOENC | ||
232 | #define P4D_FLAGS _KERNPG_TABLE_NOENC | ||
233 | #define PUD_FLAGS _KERNPG_TABLE_NOENC | ||
234 | #define PMD_FLAGS (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL) | ||
235 | |||
236 | static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area, | ||
237 | unsigned long vaddr, pmdval_t pmd_val) | ||
238 | { | ||
239 | pgd_t *pgd_p; | ||
240 | p4d_t *p4d_p; | ||
241 | pud_t *pud_p; | ||
242 | pmd_t *pmd_p; | ||
243 | |||
244 | pgd_p = pgd_base + pgd_index(vaddr); | ||
245 | if (native_pgd_val(*pgd_p)) { | ||
246 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) | ||
247 | p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK); | ||
248 | else | ||
249 | pud_p = (pud_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK); | ||
250 | } else { | ||
251 | pgd_t pgd; | ||
252 | |||
253 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) { | ||
254 | p4d_p = pgtable_area; | ||
255 | memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D); | ||
256 | pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D; | ||
257 | |||
258 | pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS); | ||
259 | } else { | ||
260 | pud_p = pgtable_area; | ||
261 | memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); | ||
262 | pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; | ||
263 | |||
264 | pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS); | ||
265 | } | ||
266 | native_set_pgd(pgd_p, pgd); | ||
267 | } | ||
268 | |||
269 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) { | ||
270 | p4d_p += p4d_index(vaddr); | ||
271 | if (native_p4d_val(*p4d_p)) { | ||
272 | pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK); | ||
273 | } else { | ||
274 | p4d_t p4d; | ||
275 | |||
276 | pud_p = pgtable_area; | ||
277 | memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); | ||
278 | pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; | ||
279 | |||
280 | p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS); | ||
281 | native_set_p4d(p4d_p, p4d); | ||
282 | } | ||
283 | } | ||
284 | |||
285 | pud_p += pud_index(vaddr); | ||
286 | if (native_pud_val(*pud_p)) { | ||
287 | if (native_pud_val(*pud_p) & _PAGE_PSE) | ||
288 | goto out; | ||
289 | |||
290 | pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK); | ||
291 | } else { | ||
292 | pud_t pud; | ||
293 | |||
294 | pmd_p = pgtable_area; | ||
295 | memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD); | ||
296 | pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD; | ||
297 | |||
298 | pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS); | ||
299 | native_set_pud(pud_p, pud); | ||
300 | } | ||
301 | |||
302 | pmd_p += pmd_index(vaddr); | ||
303 | if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE)) | ||
304 | native_set_pmd(pmd_p, native_make_pmd(pmd_val)); | ||
305 | |||
306 | out: | ||
307 | return pgtable_area; | ||
308 | } | ||
309 | |||
310 | static unsigned long __init sme_pgtable_calc(unsigned long len) | ||
311 | { | ||
312 | unsigned long p4d_size, pud_size, pmd_size; | ||
313 | unsigned long total; | ||
314 | |||
315 | /* | ||
316 | * Perform a relatively simplistic calculation of the pagetable | ||
317 | * entries that are needed. That mappings will be covered by 2MB | ||
318 | * PMD entries so we can conservatively calculate the required | ||
319 | * number of P4D, PUD and PMD structures needed to perform the | ||
320 | * mappings. Incrementing the count for each covers the case where | ||
321 | * the addresses cross entries. | ||
322 | */ | ||
323 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) { | ||
324 | p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1; | ||
325 | p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D; | ||
326 | pud_size = (ALIGN(len, P4D_SIZE) / P4D_SIZE) + 1; | ||
327 | pud_size *= sizeof(pud_t) * PTRS_PER_PUD; | ||
328 | } else { | ||
329 | p4d_size = 0; | ||
330 | pud_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1; | ||
331 | pud_size *= sizeof(pud_t) * PTRS_PER_PUD; | ||
332 | } | ||
333 | pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1; | ||
334 | pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD; | ||
335 | |||
336 | total = p4d_size + pud_size + pmd_size; | ||
337 | |||
338 | /* | ||
339 | * Now calculate the added pagetable structures needed to populate | ||
340 | * the new pagetables. | ||
341 | */ | ||
342 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) { | ||
343 | p4d_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE; | ||
344 | p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D; | ||
345 | pud_size = ALIGN(total, P4D_SIZE) / P4D_SIZE; | ||
346 | pud_size *= sizeof(pud_t) * PTRS_PER_PUD; | ||
347 | } else { | ||
348 | p4d_size = 0; | ||
349 | pud_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE; | ||
350 | pud_size *= sizeof(pud_t) * PTRS_PER_PUD; | ||
351 | } | ||
352 | pmd_size = ALIGN(total, PUD_SIZE) / PUD_SIZE; | ||
353 | pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD; | ||
354 | |||
355 | total += p4d_size + pud_size + pmd_size; | ||
356 | |||
357 | return total; | ||
358 | } | ||
359 | |||
360 | void __init sme_encrypt_kernel(void) | ||
361 | { | ||
362 | unsigned long workarea_start, workarea_end, workarea_len; | ||
363 | unsigned long execute_start, execute_end, execute_len; | ||
364 | unsigned long kernel_start, kernel_end, kernel_len; | ||
365 | unsigned long pgtable_area_len; | ||
366 | unsigned long paddr, pmd_flags; | ||
367 | unsigned long decrypted_base; | ||
368 | void *pgtable_area; | ||
369 | pgd_t *pgd; | ||
370 | |||
371 | if (!sme_active()) | ||
372 | return; | ||
373 | |||
374 | /* | ||
375 | * Prepare for encrypting the kernel by building new pagetables with | ||
376 | * the necessary attributes needed to encrypt the kernel in place. | ||
377 | * | ||
378 | * One range of virtual addresses will map the memory occupied | ||
379 | * by the kernel as encrypted. | ||
380 | * | ||
381 | * Another range of virtual addresses will map the memory occupied | ||
382 | * by the kernel as decrypted and write-protected. | ||
383 | * | ||
384 | * The use of write-protect attribute will prevent any of the | ||
385 | * memory from being cached. | ||
386 | */ | ||
387 | |||
388 | /* Physical addresses gives us the identity mapped virtual addresses */ | ||
389 | kernel_start = __pa_symbol(_text); | ||
390 | kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE); | ||
391 | kernel_len = kernel_end - kernel_start; | ||
392 | |||
393 | /* Set the encryption workarea to be immediately after the kernel */ | ||
394 | workarea_start = kernel_end; | ||
395 | |||
396 | /* | ||
397 | * Calculate required number of workarea bytes needed: | ||
398 | * executable encryption area size: | ||
399 | * stack page (PAGE_SIZE) | ||
400 | * encryption routine page (PAGE_SIZE) | ||
401 | * intermediate copy buffer (PMD_PAGE_SIZE) | ||
402 | * pagetable structures for the encryption of the kernel | ||
403 | * pagetable structures for workarea (in case not currently mapped) | ||
404 | */ | ||
405 | execute_start = workarea_start; | ||
406 | execute_end = execute_start + (PAGE_SIZE * 2) + PMD_PAGE_SIZE; | ||
407 | execute_len = execute_end - execute_start; | ||
408 | |||
409 | /* | ||
410 | * One PGD for both encrypted and decrypted mappings and a set of | ||
411 | * PUDs and PMDs for each of the encrypted and decrypted mappings. | ||
412 | */ | ||
413 | pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD; | ||
414 | pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2; | ||
415 | |||
416 | /* PUDs and PMDs needed in the current pagetables for the workarea */ | ||
417 | pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len); | ||
418 | |||
419 | /* | ||
420 | * The total workarea includes the executable encryption area and | ||
421 | * the pagetable area. | ||
422 | */ | ||
423 | workarea_len = execute_len + pgtable_area_len; | ||
424 | workarea_end = workarea_start + workarea_len; | ||
425 | |||
426 | /* | ||
427 | * Set the address to the start of where newly created pagetable | ||
428 | * structures (PGDs, PUDs and PMDs) will be allocated. New pagetable | ||
429 | * structures are created when the workarea is added to the current | ||
430 | * pagetables and when the new encrypted and decrypted kernel | ||
431 | * mappings are populated. | ||
432 | */ | ||
433 | pgtable_area = (void *)execute_end; | ||
434 | |||
435 | /* | ||
436 | * Make sure the current pagetable structure has entries for | ||
437 | * addressing the workarea. | ||
438 | */ | ||
439 | pgd = (pgd_t *)native_read_cr3_pa(); | ||
440 | paddr = workarea_start; | ||
441 | while (paddr < workarea_end) { | ||
442 | pgtable_area = sme_populate_pgd(pgd, pgtable_area, | ||
443 | paddr, | ||
444 | paddr + PMD_FLAGS); | ||
445 | |||
446 | paddr += PMD_PAGE_SIZE; | ||
447 | } | ||
448 | |||
449 | /* Flush the TLB - no globals so cr3 is enough */ | ||
450 | native_write_cr3(__native_read_cr3()); | ||
451 | |||
452 | /* | ||
453 | * A new pagetable structure is being built to allow for the kernel | ||
454 | * to be encrypted. It starts with an empty PGD that will then be | ||
455 | * populated with new PUDs and PMDs as the encrypted and decrypted | ||
456 | * kernel mappings are created. | ||
457 | */ | ||
458 | pgd = pgtable_area; | ||
459 | memset(pgd, 0, sizeof(*pgd) * PTRS_PER_PGD); | ||
460 | pgtable_area += sizeof(*pgd) * PTRS_PER_PGD; | ||
461 | |||
462 | /* Add encrypted kernel (identity) mappings */ | ||
463 | pmd_flags = PMD_FLAGS | _PAGE_ENC; | ||
464 | paddr = kernel_start; | ||
465 | while (paddr < kernel_end) { | ||
466 | pgtable_area = sme_populate_pgd(pgd, pgtable_area, | ||
467 | paddr, | ||
468 | paddr + pmd_flags); | ||
469 | |||
470 | paddr += PMD_PAGE_SIZE; | ||
471 | } | ||
472 | |||
473 | /* | ||
474 | * A different PGD index/entry must be used to get different | ||
475 | * pagetable entries for the decrypted mapping. Choose the next | ||
476 | * PGD index and convert it to a virtual address to be used as | ||
477 | * the base of the mapping. | ||
478 | */ | ||
479 | decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1); | ||
480 | decrypted_base <<= PGDIR_SHIFT; | ||
481 | |||
482 | /* Add decrypted, write-protected kernel (non-identity) mappings */ | ||
483 | pmd_flags = (PMD_FLAGS & ~_PAGE_CACHE_MASK) | (_PAGE_PAT | _PAGE_PWT); | ||
484 | paddr = kernel_start; | ||
485 | while (paddr < kernel_end) { | ||
486 | pgtable_area = sme_populate_pgd(pgd, pgtable_area, | ||
487 | paddr + decrypted_base, | ||
488 | paddr + pmd_flags); | ||
489 | |||
490 | paddr += PMD_PAGE_SIZE; | ||
491 | } | ||
492 | |||
493 | /* Add decrypted workarea mappings to both kernel mappings */ | ||
494 | paddr = workarea_start; | ||
495 | while (paddr < workarea_end) { | ||
496 | pgtable_area = sme_populate_pgd(pgd, pgtable_area, | ||
497 | paddr, | ||
498 | paddr + PMD_FLAGS); | ||
499 | |||
500 | pgtable_area = sme_populate_pgd(pgd, pgtable_area, | ||
501 | paddr + decrypted_base, | ||
502 | paddr + PMD_FLAGS); | ||
503 | |||
504 | paddr += PMD_PAGE_SIZE; | ||
505 | } | ||
506 | |||
507 | /* Perform the encryption */ | ||
508 | sme_encrypt_execute(kernel_start, kernel_start + decrypted_base, | ||
509 | kernel_len, workarea_start, (unsigned long)pgd); | ||
510 | |||
511 | /* | ||
512 | * At this point we are running encrypted. Remove the mappings for | ||
513 | * the decrypted areas - all that is needed for this is to remove | ||
514 | * the PGD entry/entries. | ||
515 | */ | ||
516 | sme_clear_pgd(pgd, kernel_start + decrypted_base, | ||
517 | kernel_end + decrypted_base); | ||
518 | |||
519 | sme_clear_pgd(pgd, workarea_start + decrypted_base, | ||
520 | workarea_end + decrypted_base); | ||
521 | |||
522 | /* Flush the TLB - no globals so cr3 is enough */ | ||
523 | native_write_cr3(__native_read_cr3()); | ||
524 | } | ||
525 | |||
526 | void __init __nostackprotector sme_enable(struct boot_params *bp) | ||
527 | { | ||
528 | const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off; | ||
529 | unsigned int eax, ebx, ecx, edx; | ||
530 | bool active_by_default; | ||
531 | unsigned long me_mask; | ||
532 | char buffer[16]; | ||
533 | u64 msr; | ||
534 | |||
535 | /* Check for the SME support leaf */ | ||
536 | eax = 0x80000000; | ||
537 | ecx = 0; | ||
538 | native_cpuid(&eax, &ebx, &ecx, &edx); | ||
539 | if (eax < 0x8000001f) | ||
540 | return; | ||
541 | |||
542 | /* | ||
543 | * Check for the SME feature: | ||
544 | * CPUID Fn8000_001F[EAX] - Bit 0 | ||
545 | * Secure Memory Encryption support | ||
546 | * CPUID Fn8000_001F[EBX] - Bits 5:0 | ||
547 | * Pagetable bit position used to indicate encryption | ||
548 | */ | ||
549 | eax = 0x8000001f; | ||
550 | ecx = 0; | ||
551 | native_cpuid(&eax, &ebx, &ecx, &edx); | ||
552 | if (!(eax & 1)) | ||
553 | return; | ||
554 | |||
555 | me_mask = 1UL << (ebx & 0x3f); | ||
556 | |||
557 | /* Check if SME is enabled */ | ||
558 | msr = __rdmsr(MSR_K8_SYSCFG); | ||
559 | if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) | ||
560 | return; | ||
561 | |||
562 | /* | ||
563 | * Fixups have not been applied to phys_base yet and we're running | ||
564 | * identity mapped, so we must obtain the address to the SME command | ||
565 | * line argument data using rip-relative addressing. | ||
566 | */ | ||
567 | asm ("lea sme_cmdline_arg(%%rip), %0" | ||
568 | : "=r" (cmdline_arg) | ||
569 | : "p" (sme_cmdline_arg)); | ||
570 | asm ("lea sme_cmdline_on(%%rip), %0" | ||
571 | : "=r" (cmdline_on) | ||
572 | : "p" (sme_cmdline_on)); | ||
573 | asm ("lea sme_cmdline_off(%%rip), %0" | ||
574 | : "=r" (cmdline_off) | ||
575 | : "p" (sme_cmdline_off)); | ||
576 | |||
577 | if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT)) | ||
578 | active_by_default = true; | ||
579 | else | ||
580 | active_by_default = false; | ||
581 | |||
582 | cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr | | ||
583 | ((u64)bp->ext_cmd_line_ptr << 32)); | ||
584 | |||
585 | cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)); | ||
586 | |||
587 | if (!strncmp(buffer, cmdline_on, sizeof(buffer))) | ||
588 | sme_me_mask = me_mask; | ||
589 | else if (!strncmp(buffer, cmdline_off, sizeof(buffer))) | ||
590 | sme_me_mask = 0; | ||
591 | else | ||
592 | sme_me_mask = active_by_default ? me_mask : 0; | ||
593 | } | ||
diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S new file mode 100644 index 000000000000..730e6d541df1 --- /dev/null +++ b/arch/x86/mm/mem_encrypt_boot.S | |||
@@ -0,0 +1,149 @@ | |||
1 | /* | ||
2 | * AMD Memory Encryption Support | ||
3 | * | ||
4 | * Copyright (C) 2016 Advanced Micro Devices, Inc. | ||
5 | * | ||
6 | * Author: Tom Lendacky <thomas.lendacky@amd.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License version 2 as | ||
10 | * published by the Free Software Foundation. | ||
11 | */ | ||
12 | |||
13 | #include <linux/linkage.h> | ||
14 | #include <asm/pgtable.h> | ||
15 | #include <asm/page.h> | ||
16 | #include <asm/processor-flags.h> | ||
17 | #include <asm/msr-index.h> | ||
18 | |||
19 | .text | ||
20 | .code64 | ||
21 | ENTRY(sme_encrypt_execute) | ||
22 | |||
23 | /* | ||
24 | * Entry parameters: | ||
25 | * RDI - virtual address for the encrypted kernel mapping | ||
26 | * RSI - virtual address for the decrypted kernel mapping | ||
27 | * RDX - length of kernel | ||
28 | * RCX - virtual address of the encryption workarea, including: | ||
29 | * - stack page (PAGE_SIZE) | ||
30 | * - encryption routine page (PAGE_SIZE) | ||
31 | * - intermediate copy buffer (PMD_PAGE_SIZE) | ||
32 | * R8 - physcial address of the pagetables to use for encryption | ||
33 | */ | ||
34 | |||
35 | push %rbp | ||
36 | movq %rsp, %rbp /* RBP now has original stack pointer */ | ||
37 | |||
38 | /* Set up a one page stack in the non-encrypted memory area */ | ||
39 | movq %rcx, %rax /* Workarea stack page */ | ||
40 | leaq PAGE_SIZE(%rax), %rsp /* Set new stack pointer */ | ||
41 | addq $PAGE_SIZE, %rax /* Workarea encryption routine */ | ||
42 | |||
43 | push %r12 | ||
44 | movq %rdi, %r10 /* Encrypted kernel */ | ||
45 | movq %rsi, %r11 /* Decrypted kernel */ | ||
46 | movq %rdx, %r12 /* Kernel length */ | ||
47 | |||
48 | /* Copy encryption routine into the workarea */ | ||
49 | movq %rax, %rdi /* Workarea encryption routine */ | ||
50 | leaq __enc_copy(%rip), %rsi /* Encryption routine */ | ||
51 | movq $(.L__enc_copy_end - __enc_copy), %rcx /* Encryption routine length */ | ||
52 | rep movsb | ||
53 | |||
54 | /* Setup registers for call */ | ||
55 | movq %r10, %rdi /* Encrypted kernel */ | ||
56 | movq %r11, %rsi /* Decrypted kernel */ | ||
57 | movq %r8, %rdx /* Pagetables used for encryption */ | ||
58 | movq %r12, %rcx /* Kernel length */ | ||
59 | movq %rax, %r8 /* Workarea encryption routine */ | ||
60 | addq $PAGE_SIZE, %r8 /* Workarea intermediate copy buffer */ | ||
61 | |||
62 | call *%rax /* Call the encryption routine */ | ||
63 | |||
64 | pop %r12 | ||
65 | |||
66 | movq %rbp, %rsp /* Restore original stack pointer */ | ||
67 | pop %rbp | ||
68 | |||
69 | ret | ||
70 | ENDPROC(sme_encrypt_execute) | ||
71 | |||
72 | ENTRY(__enc_copy) | ||
73 | /* | ||
74 | * Routine used to encrypt kernel. | ||
75 | * This routine must be run outside of the kernel proper since | ||
76 | * the kernel will be encrypted during the process. So this | ||
77 | * routine is defined here and then copied to an area outside | ||
78 | * of the kernel where it will remain and run decrypted | ||
79 | * during execution. | ||
80 | * | ||
81 | * On entry the registers must be: | ||
82 | * RDI - virtual address for the encrypted kernel mapping | ||
83 | * RSI - virtual address for the decrypted kernel mapping | ||
84 | * RDX - address of the pagetables to use for encryption | ||
85 | * RCX - length of kernel | ||
86 | * R8 - intermediate copy buffer | ||
87 | * | ||
88 | * RAX - points to this routine | ||
89 | * | ||
90 | * The kernel will be encrypted by copying from the non-encrypted | ||
91 | * kernel space to an intermediate buffer and then copying from the | ||
92 | * intermediate buffer back to the encrypted kernel space. The physical | ||
93 | * addresses of the two kernel space mappings are the same which | ||
94 | * results in the kernel being encrypted "in place". | ||
95 | */ | ||
96 | /* Enable the new page tables */ | ||
97 | mov %rdx, %cr3 | ||
98 | |||
99 | /* Flush any global TLBs */ | ||
100 | mov %cr4, %rdx | ||
101 | andq $~X86_CR4_PGE, %rdx | ||
102 | mov %rdx, %cr4 | ||
103 | orq $X86_CR4_PGE, %rdx | ||
104 | mov %rdx, %cr4 | ||
105 | |||
106 | /* Set the PAT register PA5 entry to write-protect */ | ||
107 | push %rcx | ||
108 | movl $MSR_IA32_CR_PAT, %ecx | ||
109 | rdmsr | ||
110 | push %rdx /* Save original PAT value */ | ||
111 | andl $0xffff00ff, %edx /* Clear PA5 */ | ||
112 | orl $0x00000500, %edx /* Set PA5 to WP */ | ||
113 | wrmsr | ||
114 | pop %rdx /* RDX contains original PAT value */ | ||
115 | pop %rcx | ||
116 | |||
117 | movq %rcx, %r9 /* Save kernel length */ | ||
118 | movq %rdi, %r10 /* Save encrypted kernel address */ | ||
119 | movq %rsi, %r11 /* Save decrypted kernel address */ | ||
120 | |||
121 | wbinvd /* Invalidate any cache entries */ | ||
122 | |||
123 | /* Copy/encrypt 2MB at a time */ | ||
124 | 1: | ||
125 | movq %r11, %rsi /* Source - decrypted kernel */ | ||
126 | movq %r8, %rdi /* Dest - intermediate copy buffer */ | ||
127 | movq $PMD_PAGE_SIZE, %rcx /* 2MB length */ | ||
128 | rep movsb | ||
129 | |||
130 | movq %r8, %rsi /* Source - intermediate copy buffer */ | ||
131 | movq %r10, %rdi /* Dest - encrypted kernel */ | ||
132 | movq $PMD_PAGE_SIZE, %rcx /* 2MB length */ | ||
133 | rep movsb | ||
134 | |||
135 | addq $PMD_PAGE_SIZE, %r11 | ||
136 | addq $PMD_PAGE_SIZE, %r10 | ||
137 | subq $PMD_PAGE_SIZE, %r9 /* Kernel length decrement */ | ||
138 | jnz 1b /* Kernel length not zero? */ | ||
139 | |||
140 | /* Restore PAT register */ | ||
141 | push %rdx /* Save original PAT value */ | ||
142 | movl $MSR_IA32_CR_PAT, %ecx | ||
143 | rdmsr | ||
144 | pop %rdx /* Restore original PAT value */ | ||
145 | wrmsr | ||
146 | |||
147 | ret | ||
148 | .L__enc_copy_end: | ||
149 | ENDPROC(__enc_copy) | ||
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index a88cfbfbd078..a99679826846 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c | |||
@@ -37,21 +37,21 @@ struct va_alignment __read_mostly va_align = { | |||
37 | .flags = -1, | 37 | .flags = -1, |
38 | }; | 38 | }; |
39 | 39 | ||
40 | unsigned long tasksize_32bit(void) | 40 | unsigned long task_size_32bit(void) |
41 | { | 41 | { |
42 | return IA32_PAGE_OFFSET; | 42 | return IA32_PAGE_OFFSET; |
43 | } | 43 | } |
44 | 44 | ||
45 | unsigned long tasksize_64bit(void) | 45 | unsigned long task_size_64bit(int full_addr_space) |
46 | { | 46 | { |
47 | return TASK_SIZE_MAX; | 47 | return full_addr_space ? TASK_SIZE_MAX : DEFAULT_MAP_WINDOW; |
48 | } | 48 | } |
49 | 49 | ||
50 | static unsigned long stack_maxrandom_size(unsigned long task_size) | 50 | static unsigned long stack_maxrandom_size(unsigned long task_size) |
51 | { | 51 | { |
52 | unsigned long max = 0; | 52 | unsigned long max = 0; |
53 | if (current->flags & PF_RANDOMIZE) { | 53 | if (current->flags & PF_RANDOMIZE) { |
54 | max = (-1UL) & __STACK_RND_MASK(task_size == tasksize_32bit()); | 54 | max = (-1UL) & __STACK_RND_MASK(task_size == task_size_32bit()); |
55 | max <<= PAGE_SHIFT; | 55 | max <<= PAGE_SHIFT; |
56 | } | 56 | } |
57 | 57 | ||
@@ -141,7 +141,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm) | |||
141 | mm->get_unmapped_area = arch_get_unmapped_area_topdown; | 141 | mm->get_unmapped_area = arch_get_unmapped_area_topdown; |
142 | 142 | ||
143 | arch_pick_mmap_base(&mm->mmap_base, &mm->mmap_legacy_base, | 143 | arch_pick_mmap_base(&mm->mmap_base, &mm->mmap_legacy_base, |
144 | arch_rnd(mmap64_rnd_bits), tasksize_64bit()); | 144 | arch_rnd(mmap64_rnd_bits), task_size_64bit(0)); |
145 | 145 | ||
146 | #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES | 146 | #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES |
147 | /* | 147 | /* |
@@ -151,7 +151,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm) | |||
151 | * mmap_base, the compat syscall uses mmap_compat_base. | 151 | * mmap_base, the compat syscall uses mmap_compat_base. |
152 | */ | 152 | */ |
153 | arch_pick_mmap_base(&mm->mmap_compat_base, &mm->mmap_compat_legacy_base, | 153 | arch_pick_mmap_base(&mm->mmap_compat_base, &mm->mmap_compat_legacy_base, |
154 | arch_rnd(mmap32_rnd_bits), tasksize_32bit()); | 154 | arch_rnd(mmap32_rnd_bits), task_size_32bit()); |
155 | #endif | 155 | #endif |
156 | } | 156 | } |
157 | 157 | ||
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 1c34b767c84c..9ceaa955d2ba 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c | |||
@@ -355,10 +355,19 @@ int mpx_enable_management(void) | |||
355 | */ | 355 | */ |
356 | bd_base = mpx_get_bounds_dir(); | 356 | bd_base = mpx_get_bounds_dir(); |
357 | down_write(&mm->mmap_sem); | 357 | down_write(&mm->mmap_sem); |
358 | |||
359 | /* MPX doesn't support addresses above 47 bits yet. */ | ||
360 | if (find_vma(mm, DEFAULT_MAP_WINDOW)) { | ||
361 | pr_warn_once("%s (%d): MPX cannot handle addresses " | ||
362 | "above 47-bits. Disabling.", | ||
363 | current->comm, current->pid); | ||
364 | ret = -ENXIO; | ||
365 | goto out; | ||
366 | } | ||
358 | mm->context.bd_addr = bd_base; | 367 | mm->context.bd_addr = bd_base; |
359 | if (mm->context.bd_addr == MPX_INVALID_BOUNDS_DIR) | 368 | if (mm->context.bd_addr == MPX_INVALID_BOUNDS_DIR) |
360 | ret = -ENXIO; | 369 | ret = -ENXIO; |
361 | 370 | out: | |
362 | up_write(&mm->mmap_sem); | 371 | up_write(&mm->mmap_sem); |
363 | return ret; | 372 | return ret; |
364 | } | 373 | } |
@@ -1030,3 +1039,25 @@ void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1030 | if (ret) | 1039 | if (ret) |
1031 | force_sig(SIGSEGV, current); | 1040 | force_sig(SIGSEGV, current); |
1032 | } | 1041 | } |
1042 | |||
1043 | /* MPX cannot handle addresses above 47 bits yet. */ | ||
1044 | unsigned long mpx_unmapped_area_check(unsigned long addr, unsigned long len, | ||
1045 | unsigned long flags) | ||
1046 | { | ||
1047 | if (!kernel_managing_mpx_tables(current->mm)) | ||
1048 | return addr; | ||
1049 | if (addr + len <= DEFAULT_MAP_WINDOW) | ||
1050 | return addr; | ||
1051 | if (flags & MAP_FIXED) | ||
1052 | return -ENOMEM; | ||
1053 | |||
1054 | /* | ||
1055 | * Requested len is larger than the whole area we're allowed to map in. | ||
1056 | * Resetting hinting address wouldn't do much good -- fail early. | ||
1057 | */ | ||
1058 | if (len > DEFAULT_MAP_WINDOW) | ||
1059 | return -ENOMEM; | ||
1060 | |||
1061 | /* Look for unmap area within DEFAULT_MAP_WINDOW */ | ||
1062 | return 0; | ||
1063 | } | ||
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 757b0bcdf712..dfb7d657cf43 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -1775,6 +1775,70 @@ int set_memory_4k(unsigned long addr, int numpages) | |||
1775 | __pgprot(0), 1, 0, NULL); | 1775 | __pgprot(0), 1, 0, NULL); |
1776 | } | 1776 | } |
1777 | 1777 | ||
1778 | static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc) | ||
1779 | { | ||
1780 | struct cpa_data cpa; | ||
1781 | unsigned long start; | ||
1782 | int ret; | ||
1783 | |||
1784 | /* Nothing to do if the SME is not active */ | ||
1785 | if (!sme_active()) | ||
1786 | return 0; | ||
1787 | |||
1788 | /* Should not be working on unaligned addresses */ | ||
1789 | if (WARN_ONCE(addr & ~PAGE_MASK, "misaligned address: %#lx\n", addr)) | ||
1790 | addr &= PAGE_MASK; | ||
1791 | |||
1792 | start = addr; | ||
1793 | |||
1794 | memset(&cpa, 0, sizeof(cpa)); | ||
1795 | cpa.vaddr = &addr; | ||
1796 | cpa.numpages = numpages; | ||
1797 | cpa.mask_set = enc ? __pgprot(_PAGE_ENC) : __pgprot(0); | ||
1798 | cpa.mask_clr = enc ? __pgprot(0) : __pgprot(_PAGE_ENC); | ||
1799 | cpa.pgd = init_mm.pgd; | ||
1800 | |||
1801 | /* Must avoid aliasing mappings in the highmem code */ | ||
1802 | kmap_flush_unused(); | ||
1803 | vm_unmap_aliases(); | ||
1804 | |||
1805 | /* | ||
1806 | * Before changing the encryption attribute, we need to flush caches. | ||
1807 | */ | ||
1808 | if (static_cpu_has(X86_FEATURE_CLFLUSH)) | ||
1809 | cpa_flush_range(start, numpages, 1); | ||
1810 | else | ||
1811 | cpa_flush_all(1); | ||
1812 | |||
1813 | ret = __change_page_attr_set_clr(&cpa, 1); | ||
1814 | |||
1815 | /* | ||
1816 | * After changing the encryption attribute, we need to flush TLBs | ||
1817 | * again in case any speculative TLB caching occurred (but no need | ||
1818 | * to flush caches again). We could just use cpa_flush_all(), but | ||
1819 | * in case TLB flushing gets optimized in the cpa_flush_range() | ||
1820 | * path use the same logic as above. | ||
1821 | */ | ||
1822 | if (static_cpu_has(X86_FEATURE_CLFLUSH)) | ||
1823 | cpa_flush_range(start, numpages, 0); | ||
1824 | else | ||
1825 | cpa_flush_all(0); | ||
1826 | |||
1827 | return ret; | ||
1828 | } | ||
1829 | |||
1830 | int set_memory_encrypted(unsigned long addr, int numpages) | ||
1831 | { | ||
1832 | return __set_memory_enc_dec(addr, numpages, true); | ||
1833 | } | ||
1834 | EXPORT_SYMBOL_GPL(set_memory_encrypted); | ||
1835 | |||
1836 | int set_memory_decrypted(unsigned long addr, int numpages) | ||
1837 | { | ||
1838 | return __set_memory_enc_dec(addr, numpages, false); | ||
1839 | } | ||
1840 | EXPORT_SYMBOL_GPL(set_memory_decrypted); | ||
1841 | |||
1778 | int set_pages_uc(struct page *page, int numpages) | 1842 | int set_pages_uc(struct page *page, int numpages) |
1779 | { | 1843 | { |
1780 | unsigned long addr = (unsigned long)page_address(page); | 1844 | unsigned long addr = (unsigned long)page_address(page); |
@@ -2020,6 +2084,9 @@ int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, | |||
2020 | if (!(page_flags & _PAGE_RW)) | 2084 | if (!(page_flags & _PAGE_RW)) |
2021 | cpa.mask_clr = __pgprot(_PAGE_RW); | 2085 | cpa.mask_clr = __pgprot(_PAGE_RW); |
2022 | 2086 | ||
2087 | if (!(page_flags & _PAGE_ENC)) | ||
2088 | cpa.mask_clr = pgprot_encrypted(cpa.mask_clr); | ||
2089 | |||
2023 | cpa.mask_set = __pgprot(_PAGE_PRESENT | page_flags); | 2090 | cpa.mask_set = __pgprot(_PAGE_PRESENT | page_flags); |
2024 | 2091 | ||
2025 | retval = __change_page_attr_set_clr(&cpa, 0); | 2092 | retval = __change_page_attr_set_clr(&cpa, 0); |
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 45979502f64b..fe7d57a8fb60 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c | |||
@@ -293,7 +293,7 @@ void init_cache_modes(void) | |||
293 | * pat_init - Initialize PAT MSR and PAT table | 293 | * pat_init - Initialize PAT MSR and PAT table |
294 | * | 294 | * |
295 | * This function initializes PAT MSR and PAT table with an OS-defined value | 295 | * This function initializes PAT MSR and PAT table with an OS-defined value |
296 | * to enable additional cache attributes, WC and WT. | 296 | * to enable additional cache attributes, WC, WT and WP. |
297 | * | 297 | * |
298 | * This function must be called on all CPUs using the specific sequence of | 298 | * This function must be called on all CPUs using the specific sequence of |
299 | * operations defined in Intel SDM. mtrr_rendezvous_handler() provides this | 299 | * operations defined in Intel SDM. mtrr_rendezvous_handler() provides this |
@@ -352,7 +352,7 @@ void pat_init(void) | |||
352 | * 010 2 UC-: _PAGE_CACHE_MODE_UC_MINUS | 352 | * 010 2 UC-: _PAGE_CACHE_MODE_UC_MINUS |
353 | * 011 3 UC : _PAGE_CACHE_MODE_UC | 353 | * 011 3 UC : _PAGE_CACHE_MODE_UC |
354 | * 100 4 WB : Reserved | 354 | * 100 4 WB : Reserved |
355 | * 101 5 WC : Reserved | 355 | * 101 5 WP : _PAGE_CACHE_MODE_WP |
356 | * 110 6 UC-: Reserved | 356 | * 110 6 UC-: Reserved |
357 | * 111 7 WT : _PAGE_CACHE_MODE_WT | 357 | * 111 7 WT : _PAGE_CACHE_MODE_WT |
358 | * | 358 | * |
@@ -360,7 +360,7 @@ void pat_init(void) | |||
360 | * corresponding types in the presence of PAT errata. | 360 | * corresponding types in the presence of PAT errata. |
361 | */ | 361 | */ |
362 | pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) | | 362 | pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) | |
363 | PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, WT); | 363 | PAT(4, WB) | PAT(5, WP) | PAT(6, UC_MINUS) | PAT(7, WT); |
364 | } | 364 | } |
365 | 365 | ||
366 | if (!boot_cpu_done) { | 366 | if (!boot_cpu_done) { |
@@ -744,6 +744,9 @@ EXPORT_SYMBOL(arch_io_free_memtype_wc); | |||
744 | pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, | 744 | pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, |
745 | unsigned long size, pgprot_t vma_prot) | 745 | unsigned long size, pgprot_t vma_prot) |
746 | { | 746 | { |
747 | if (!phys_mem_access_encrypted(pfn << PAGE_SHIFT, size)) | ||
748 | vma_prot = pgprot_decrypted(vma_prot); | ||
749 | |||
747 | return vma_prot; | 750 | return vma_prot; |
748 | } | 751 | } |
749 | 752 | ||
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 508a708eb9a6..218834a3e9ad 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c | |||
@@ -56,7 +56,7 @@ void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte) | |||
56 | { | 56 | { |
57 | pgtable_page_dtor(pte); | 57 | pgtable_page_dtor(pte); |
58 | paravirt_release_pte(page_to_pfn(pte)); | 58 | paravirt_release_pte(page_to_pfn(pte)); |
59 | tlb_remove_page(tlb, pte); | 59 | tlb_remove_table(tlb, pte); |
60 | } | 60 | } |
61 | 61 | ||
62 | #if CONFIG_PGTABLE_LEVELS > 2 | 62 | #if CONFIG_PGTABLE_LEVELS > 2 |
@@ -72,21 +72,21 @@ void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) | |||
72 | tlb->need_flush_all = 1; | 72 | tlb->need_flush_all = 1; |
73 | #endif | 73 | #endif |
74 | pgtable_pmd_page_dtor(page); | 74 | pgtable_pmd_page_dtor(page); |
75 | tlb_remove_page(tlb, page); | 75 | tlb_remove_table(tlb, page); |
76 | } | 76 | } |
77 | 77 | ||
78 | #if CONFIG_PGTABLE_LEVELS > 3 | 78 | #if CONFIG_PGTABLE_LEVELS > 3 |
79 | void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) | 79 | void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) |
80 | { | 80 | { |
81 | paravirt_release_pud(__pa(pud) >> PAGE_SHIFT); | 81 | paravirt_release_pud(__pa(pud) >> PAGE_SHIFT); |
82 | tlb_remove_page(tlb, virt_to_page(pud)); | 82 | tlb_remove_table(tlb, virt_to_page(pud)); |
83 | } | 83 | } |
84 | 84 | ||
85 | #if CONFIG_PGTABLE_LEVELS > 4 | 85 | #if CONFIG_PGTABLE_LEVELS > 4 |
86 | void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d) | 86 | void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d) |
87 | { | 87 | { |
88 | paravirt_release_p4d(__pa(p4d) >> PAGE_SHIFT); | 88 | paravirt_release_p4d(__pa(p4d) >> PAGE_SHIFT); |
89 | tlb_remove_page(tlb, virt_to_page(p4d)); | 89 | tlb_remove_table(tlb, virt_to_page(p4d)); |
90 | } | 90 | } |
91 | #endif /* CONFIG_PGTABLE_LEVELS > 4 */ | 91 | #endif /* CONFIG_PGTABLE_LEVELS > 4 */ |
92 | #endif /* CONFIG_PGTABLE_LEVELS > 3 */ | 92 | #endif /* CONFIG_PGTABLE_LEVELS > 3 */ |
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 014d07a80053..ce104b962a17 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c | |||
@@ -28,6 +28,42 @@ | |||
28 | * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi | 28 | * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi |
29 | */ | 29 | */ |
30 | 30 | ||
31 | atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1); | ||
32 | |||
33 | static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen, | ||
34 | u16 *new_asid, bool *need_flush) | ||
35 | { | ||
36 | u16 asid; | ||
37 | |||
38 | if (!static_cpu_has(X86_FEATURE_PCID)) { | ||
39 | *new_asid = 0; | ||
40 | *need_flush = true; | ||
41 | return; | ||
42 | } | ||
43 | |||
44 | for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) { | ||
45 | if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) != | ||
46 | next->context.ctx_id) | ||
47 | continue; | ||
48 | |||
49 | *new_asid = asid; | ||
50 | *need_flush = (this_cpu_read(cpu_tlbstate.ctxs[asid].tlb_gen) < | ||
51 | next_tlb_gen); | ||
52 | return; | ||
53 | } | ||
54 | |||
55 | /* | ||
56 | * We don't currently own an ASID slot on this CPU. | ||
57 | * Allocate a slot. | ||
58 | */ | ||
59 | *new_asid = this_cpu_add_return(cpu_tlbstate.next_asid, 1) - 1; | ||
60 | if (*new_asid >= TLB_NR_DYN_ASIDS) { | ||
61 | *new_asid = 0; | ||
62 | this_cpu_write(cpu_tlbstate.next_asid, 1); | ||
63 | } | ||
64 | *need_flush = true; | ||
65 | } | ||
66 | |||
31 | void leave_mm(int cpu) | 67 | void leave_mm(int cpu) |
32 | { | 68 | { |
33 | struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm); | 69 | struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm); |
@@ -43,12 +79,11 @@ void leave_mm(int cpu) | |||
43 | if (loaded_mm == &init_mm) | 79 | if (loaded_mm == &init_mm) |
44 | return; | 80 | return; |
45 | 81 | ||
46 | if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) | 82 | /* Warn if we're not lazy. */ |
47 | BUG(); | 83 | WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm))); |
48 | 84 | ||
49 | switch_mm(NULL, &init_mm, NULL); | 85 | switch_mm(NULL, &init_mm, NULL); |
50 | } | 86 | } |
51 | EXPORT_SYMBOL_GPL(leave_mm); | ||
52 | 87 | ||
53 | void switch_mm(struct mm_struct *prev, struct mm_struct *next, | 88 | void switch_mm(struct mm_struct *prev, struct mm_struct *next, |
54 | struct task_struct *tsk) | 89 | struct task_struct *tsk) |
@@ -63,115 +98,219 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next, | |||
63 | void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, | 98 | void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, |
64 | struct task_struct *tsk) | 99 | struct task_struct *tsk) |
65 | { | 100 | { |
66 | unsigned cpu = smp_processor_id(); | ||
67 | struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm); | 101 | struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm); |
102 | u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); | ||
103 | unsigned cpu = smp_processor_id(); | ||
104 | u64 next_tlb_gen; | ||
68 | 105 | ||
69 | /* | 106 | /* |
70 | * NB: The scheduler will call us with prev == next when | 107 | * NB: The scheduler will call us with prev == next when switching |
71 | * switching from lazy TLB mode to normal mode if active_mm | 108 | * from lazy TLB mode to normal mode if active_mm isn't changing. |
72 | * isn't changing. When this happens, there is no guarantee | 109 | * When this happens, we don't assume that CR3 (and hence |
73 | * that CR3 (and hence cpu_tlbstate.loaded_mm) matches next. | 110 | * cpu_tlbstate.loaded_mm) matches next. |
74 | * | 111 | * |
75 | * NB: leave_mm() calls us with prev == NULL and tsk == NULL. | 112 | * NB: leave_mm() calls us with prev == NULL and tsk == NULL. |
76 | */ | 113 | */ |
77 | 114 | ||
78 | this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); | 115 | /* We don't want flush_tlb_func_* to run concurrently with us. */ |
116 | if (IS_ENABLED(CONFIG_PROVE_LOCKING)) | ||
117 | WARN_ON_ONCE(!irqs_disabled()); | ||
118 | |||
119 | /* | ||
120 | * Verify that CR3 is what we think it is. This will catch | ||
121 | * hypothetical buggy code that directly switches to swapper_pg_dir | ||
122 | * without going through leave_mm() / switch_mm_irqs_off() or that | ||
123 | * does something like write_cr3(read_cr3_pa()). | ||
124 | */ | ||
125 | VM_BUG_ON(__read_cr3() != (__sme_pa(real_prev->pgd) | prev_asid)); | ||
79 | 126 | ||
80 | if (real_prev == next) { | 127 | if (real_prev == next) { |
81 | /* | 128 | VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) != |
82 | * There's nothing to do: we always keep the per-mm control | 129 | next->context.ctx_id); |
83 | * regs in sync with cpu_tlbstate.loaded_mm. Just | 130 | |
84 | * sanity-check mm_cpumask. | 131 | if (cpumask_test_cpu(cpu, mm_cpumask(next))) { |
85 | */ | 132 | /* |
86 | if (WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(next)))) | 133 | * There's nothing to do: we weren't lazy, and we |
87 | cpumask_set_cpu(cpu, mm_cpumask(next)); | 134 | * aren't changing our mm. We don't need to flush |
88 | return; | 135 | * anything, nor do we need to update CR3, CR4, or |
89 | } | 136 | * LDTR. |
137 | */ | ||
138 | return; | ||
139 | } | ||
140 | |||
141 | /* Resume remote flushes and then read tlb_gen. */ | ||
142 | cpumask_set_cpu(cpu, mm_cpumask(next)); | ||
143 | next_tlb_gen = atomic64_read(&next->context.tlb_gen); | ||
144 | |||
145 | if (this_cpu_read(cpu_tlbstate.ctxs[prev_asid].tlb_gen) < | ||
146 | next_tlb_gen) { | ||
147 | /* | ||
148 | * Ideally, we'd have a flush_tlb() variant that | ||
149 | * takes the known CR3 value as input. This would | ||
150 | * be faster on Xen PV and on hypothetical CPUs | ||
151 | * on which INVPCID is fast. | ||
152 | */ | ||
153 | this_cpu_write(cpu_tlbstate.ctxs[prev_asid].tlb_gen, | ||
154 | next_tlb_gen); | ||
155 | write_cr3(__sme_pa(next->pgd) | prev_asid); | ||
156 | trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, | ||
157 | TLB_FLUSH_ALL); | ||
158 | } | ||
90 | 159 | ||
91 | if (IS_ENABLED(CONFIG_VMAP_STACK)) { | ||
92 | /* | 160 | /* |
93 | * If our current stack is in vmalloc space and isn't | 161 | * We just exited lazy mode, which means that CR4 and/or LDTR |
94 | * mapped in the new pgd, we'll double-fault. Forcibly | 162 | * may be stale. (Changes to the required CR4 and LDTR states |
95 | * map it. | 163 | * are not reflected in tlb_gen.) |
96 | */ | 164 | */ |
97 | unsigned int stack_pgd_index = pgd_index(current_stack_pointer()); | 165 | } else { |
98 | 166 | u16 new_asid; | |
99 | pgd_t *pgd = next->pgd + stack_pgd_index; | 167 | bool need_flush; |
100 | 168 | ||
101 | if (unlikely(pgd_none(*pgd))) | 169 | if (IS_ENABLED(CONFIG_VMAP_STACK)) { |
102 | set_pgd(pgd, init_mm.pgd[stack_pgd_index]); | 170 | /* |
103 | } | 171 | * If our current stack is in vmalloc space and isn't |
172 | * mapped in the new pgd, we'll double-fault. Forcibly | ||
173 | * map it. | ||
174 | */ | ||
175 | unsigned int index = pgd_index(current_stack_pointer()); | ||
176 | pgd_t *pgd = next->pgd + index; | ||
177 | |||
178 | if (unlikely(pgd_none(*pgd))) | ||
179 | set_pgd(pgd, init_mm.pgd[index]); | ||
180 | } | ||
104 | 181 | ||
105 | this_cpu_write(cpu_tlbstate.loaded_mm, next); | 182 | /* Stop remote flushes for the previous mm */ |
183 | if (cpumask_test_cpu(cpu, mm_cpumask(real_prev))) | ||
184 | cpumask_clear_cpu(cpu, mm_cpumask(real_prev)); | ||
106 | 185 | ||
107 | WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next))); | 186 | VM_WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next))); |
108 | cpumask_set_cpu(cpu, mm_cpumask(next)); | ||
109 | 187 | ||
110 | /* | 188 | /* |
111 | * Re-load page tables. | 189 | * Start remote flushes and then read tlb_gen. |
112 | * | 190 | */ |
113 | * This logic has an ordering constraint: | 191 | cpumask_set_cpu(cpu, mm_cpumask(next)); |
114 | * | 192 | next_tlb_gen = atomic64_read(&next->context.tlb_gen); |
115 | * CPU 0: Write to a PTE for 'next' | 193 | |
116 | * CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI. | 194 | choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush); |
117 | * CPU 1: set bit 1 in next's mm_cpumask | 195 | |
118 | * CPU 1: load from the PTE that CPU 0 writes (implicit) | 196 | if (need_flush) { |
119 | * | 197 | this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id); |
120 | * We need to prevent an outcome in which CPU 1 observes | 198 | this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); |
121 | * the new PTE value and CPU 0 observes bit 1 clear in | 199 | write_cr3(__sme_pa(next->pgd) | new_asid); |
122 | * mm_cpumask. (If that occurs, then the IPI will never | 200 | trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, |
123 | * be sent, and CPU 0's TLB will contain a stale entry.) | 201 | TLB_FLUSH_ALL); |
124 | * | 202 | } else { |
125 | * The bad outcome can occur if either CPU's load is | 203 | /* The new ASID is already up to date. */ |
126 | * reordered before that CPU's store, so both CPUs must | 204 | write_cr3(__sme_pa(next->pgd) | new_asid | CR3_NOFLUSH); |
127 | * execute full barriers to prevent this from happening. | 205 | trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0); |
128 | * | 206 | } |
129 | * Thus, switch_mm needs a full barrier between the | ||
130 | * store to mm_cpumask and any operation that could load | ||
131 | * from next->pgd. TLB fills are special and can happen | ||
132 | * due to instruction fetches or for no reason at all, | ||
133 | * and neither LOCK nor MFENCE orders them. | ||
134 | * Fortunately, load_cr3() is serializing and gives the | ||
135 | * ordering guarantee we need. | ||
136 | */ | ||
137 | load_cr3(next->pgd); | ||
138 | |||
139 | /* | ||
140 | * This gets called via leave_mm() in the idle path where RCU | ||
141 | * functions differently. Tracing normally uses RCU, so we have to | ||
142 | * call the tracepoint specially here. | ||
143 | */ | ||
144 | trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); | ||
145 | 207 | ||
146 | /* Stop flush ipis for the previous mm */ | 208 | this_cpu_write(cpu_tlbstate.loaded_mm, next); |
147 | WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(real_prev)) && | 209 | this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid); |
148 | real_prev != &init_mm); | 210 | } |
149 | cpumask_clear_cpu(cpu, mm_cpumask(real_prev)); | ||
150 | 211 | ||
151 | /* Load per-mm CR4 and LDTR state */ | ||
152 | load_mm_cr4(next); | 212 | load_mm_cr4(next); |
153 | switch_ldt(real_prev, next); | 213 | switch_ldt(real_prev, next); |
154 | } | 214 | } |
155 | 215 | ||
216 | /* | ||
217 | * flush_tlb_func_common()'s memory ordering requirement is that any | ||
218 | * TLB fills that happen after we flush the TLB are ordered after we | ||
219 | * read active_mm's tlb_gen. We don't need any explicit barriers | ||
220 | * because all x86 flush operations are serializing and the | ||
221 | * atomic64_read operation won't be reordered by the compiler. | ||
222 | */ | ||
156 | static void flush_tlb_func_common(const struct flush_tlb_info *f, | 223 | static void flush_tlb_func_common(const struct flush_tlb_info *f, |
157 | bool local, enum tlb_flush_reason reason) | 224 | bool local, enum tlb_flush_reason reason) |
158 | { | 225 | { |
226 | /* | ||
227 | * We have three different tlb_gen values in here. They are: | ||
228 | * | ||
229 | * - mm_tlb_gen: the latest generation. | ||
230 | * - local_tlb_gen: the generation that this CPU has already caught | ||
231 | * up to. | ||
232 | * - f->new_tlb_gen: the generation that the requester of the flush | ||
233 | * wants us to catch up to. | ||
234 | */ | ||
235 | struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm); | ||
236 | u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); | ||
237 | u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen); | ||
238 | u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen); | ||
239 | |||
159 | /* This code cannot presently handle being reentered. */ | 240 | /* This code cannot presently handle being reentered. */ |
160 | VM_WARN_ON(!irqs_disabled()); | 241 | VM_WARN_ON(!irqs_disabled()); |
161 | 242 | ||
162 | if (this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK) { | 243 | VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) != |
163 | leave_mm(smp_processor_id()); | 244 | loaded_mm->context.ctx_id); |
245 | |||
246 | if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm))) { | ||
247 | /* | ||
248 | * We're in lazy mode -- don't flush. We can get here on | ||
249 | * remote flushes due to races and on local flushes if a | ||
250 | * kernel thread coincidentally flushes the mm it's lazily | ||
251 | * still using. | ||
252 | */ | ||
164 | return; | 253 | return; |
165 | } | 254 | } |
166 | 255 | ||
167 | if (f->end == TLB_FLUSH_ALL) { | 256 | if (unlikely(local_tlb_gen == mm_tlb_gen)) { |
168 | local_flush_tlb(); | 257 | /* |
169 | if (local) | 258 | * There's nothing to do: we're already up to date. This can |
170 | count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); | 259 | * happen if two concurrent flushes happen -- the first flush to |
171 | trace_tlb_flush(reason, TLB_FLUSH_ALL); | 260 | * be handled can catch us all the way up, leaving no work for |
172 | } else { | 261 | * the second flush. |
262 | */ | ||
263 | trace_tlb_flush(reason, 0); | ||
264 | return; | ||
265 | } | ||
266 | |||
267 | WARN_ON_ONCE(local_tlb_gen > mm_tlb_gen); | ||
268 | WARN_ON_ONCE(f->new_tlb_gen > mm_tlb_gen); | ||
269 | |||
270 | /* | ||
271 | * If we get to this point, we know that our TLB is out of date. | ||
272 | * This does not strictly imply that we need to flush (it's | ||
273 | * possible that f->new_tlb_gen <= local_tlb_gen), but we're | ||
274 | * going to need to flush in the very near future, so we might | ||
275 | * as well get it over with. | ||
276 | * | ||
277 | * The only question is whether to do a full or partial flush. | ||
278 | * | ||
279 | * We do a partial flush if requested and two extra conditions | ||
280 | * are met: | ||
281 | * | ||
282 | * 1. f->new_tlb_gen == local_tlb_gen + 1. We have an invariant that | ||
283 | * we've always done all needed flushes to catch up to | ||
284 | * local_tlb_gen. If, for example, local_tlb_gen == 2 and | ||
285 | * f->new_tlb_gen == 3, then we know that the flush needed to bring | ||
286 | * us up to date for tlb_gen 3 is the partial flush we're | ||
287 | * processing. | ||
288 | * | ||
289 | * As an example of why this check is needed, suppose that there | ||
290 | * are two concurrent flushes. The first is a full flush that | ||
291 | * changes context.tlb_gen from 1 to 2. The second is a partial | ||
292 | * flush that changes context.tlb_gen from 2 to 3. If they get | ||
293 | * processed on this CPU in reverse order, we'll see | ||
294 | * local_tlb_gen == 1, mm_tlb_gen == 3, and end != TLB_FLUSH_ALL. | ||
295 | * If we were to use __flush_tlb_single() and set local_tlb_gen to | ||
296 | * 3, we'd be break the invariant: we'd update local_tlb_gen above | ||
297 | * 1 without the full flush that's needed for tlb_gen 2. | ||
298 | * | ||
299 | * 2. f->new_tlb_gen == mm_tlb_gen. This is purely an optimiation. | ||
300 | * Partial TLB flushes are not all that much cheaper than full TLB | ||
301 | * flushes, so it seems unlikely that it would be a performance win | ||
302 | * to do a partial flush if that won't bring our TLB fully up to | ||
303 | * date. By doing a full flush instead, we can increase | ||
304 | * local_tlb_gen all the way to mm_tlb_gen and we can probably | ||
305 | * avoid another flush in the very near future. | ||
306 | */ | ||
307 | if (f->end != TLB_FLUSH_ALL && | ||
308 | f->new_tlb_gen == local_tlb_gen + 1 && | ||
309 | f->new_tlb_gen == mm_tlb_gen) { | ||
310 | /* Partial flush */ | ||
173 | unsigned long addr; | 311 | unsigned long addr; |
174 | unsigned long nr_pages = (f->end - f->start) >> PAGE_SHIFT; | 312 | unsigned long nr_pages = (f->end - f->start) >> PAGE_SHIFT; |
313 | |||
175 | addr = f->start; | 314 | addr = f->start; |
176 | while (addr < f->end) { | 315 | while (addr < f->end) { |
177 | __flush_tlb_single(addr); | 316 | __flush_tlb_single(addr); |
@@ -180,7 +319,16 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f, | |||
180 | if (local) | 319 | if (local) |
181 | count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_pages); | 320 | count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_pages); |
182 | trace_tlb_flush(reason, nr_pages); | 321 | trace_tlb_flush(reason, nr_pages); |
322 | } else { | ||
323 | /* Full flush. */ | ||
324 | local_flush_tlb(); | ||
325 | if (local) | ||
326 | count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); | ||
327 | trace_tlb_flush(reason, TLB_FLUSH_ALL); | ||
183 | } | 328 | } |
329 | |||
330 | /* Both paths above update our state to mm_tlb_gen. */ | ||
331 | this_cpu_write(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen, mm_tlb_gen); | ||
184 | } | 332 | } |
185 | 333 | ||
186 | static void flush_tlb_func_local(void *info, enum tlb_flush_reason reason) | 334 | static void flush_tlb_func_local(void *info, enum tlb_flush_reason reason) |
@@ -214,6 +362,21 @@ void native_flush_tlb_others(const struct cpumask *cpumask, | |||
214 | (info->end - info->start) >> PAGE_SHIFT); | 362 | (info->end - info->start) >> PAGE_SHIFT); |
215 | 363 | ||
216 | if (is_uv_system()) { | 364 | if (is_uv_system()) { |
365 | /* | ||
366 | * This whole special case is confused. UV has a "Broadcast | ||
367 | * Assist Unit", which seems to be a fancy way to send IPIs. | ||
368 | * Back when x86 used an explicit TLB flush IPI, UV was | ||
369 | * optimized to use its own mechanism. These days, x86 uses | ||
370 | * smp_call_function_many(), but UV still uses a manual IPI, | ||
371 | * and that IPI's action is out of date -- it does a manual | ||
372 | * flush instead of calling flush_tlb_func_remote(). This | ||
373 | * means that the percpu tlb_gen variables won't be updated | ||
374 | * and we'll do pointless flushes on future context switches. | ||
375 | * | ||
376 | * Rather than hooking native_flush_tlb_others() here, I think | ||
377 | * that UV should be updated so that smp_call_function_many(), | ||
378 | * etc, are optimal on UV. | ||
379 | */ | ||
217 | unsigned int cpu; | 380 | unsigned int cpu; |
218 | 381 | ||
219 | cpu = smp_processor_id(); | 382 | cpu = smp_processor_id(); |
@@ -250,8 +413,8 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, | |||
250 | 413 | ||
251 | cpu = get_cpu(); | 414 | cpu = get_cpu(); |
252 | 415 | ||
253 | /* Synchronize with switch_mm. */ | 416 | /* This is also a barrier that synchronizes with switch_mm(). */ |
254 | smp_mb(); | 417 | info.new_tlb_gen = inc_mm_tlb_gen(mm); |
255 | 418 | ||
256 | /* Should we flush just the requested range? */ | 419 | /* Should we flush just the requested range? */ |
257 | if ((end != TLB_FLUSH_ALL) && | 420 | if ((end != TLB_FLUSH_ALL) && |
@@ -273,6 +436,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, | |||
273 | 436 | ||
274 | if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) | 437 | if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) |
275 | flush_tlb_others(mm_cpumask(mm), &info); | 438 | flush_tlb_others(mm_cpumask(mm), &info); |
439 | |||
276 | put_cpu(); | 440 | put_cpu(); |
277 | } | 441 | } |
278 | 442 | ||
@@ -281,8 +445,6 @@ static void do_flush_tlb_all(void *info) | |||
281 | { | 445 | { |
282 | count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); | 446 | count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); |
283 | __flush_tlb_all(); | 447 | __flush_tlb_all(); |
284 | if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) | ||
285 | leave_mm(smp_processor_id()); | ||
286 | } | 448 | } |
287 | 449 | ||
288 | void flush_tlb_all(void) | 450 | void flush_tlb_all(void) |
@@ -335,6 +497,7 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) | |||
335 | 497 | ||
336 | if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) | 498 | if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) |
337 | flush_tlb_others(&batch->cpumask, &info); | 499 | flush_tlb_others(&batch->cpumask, &info); |
500 | |||
338 | cpumask_clear(&batch->cpumask); | 501 | cpumask_clear(&batch->cpumask); |
339 | 502 | ||
340 | put_cpu(); | 503 | put_cpu(); |
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index dbe2132b0ed4..7a5350d08cef 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c | |||
@@ -674,7 +674,7 @@ int pcibios_add_device(struct pci_dev *dev) | |||
674 | 674 | ||
675 | pa_data = boot_params.hdr.setup_data; | 675 | pa_data = boot_params.hdr.setup_data; |
676 | while (pa_data) { | 676 | while (pa_data) { |
677 | data = ioremap(pa_data, sizeof(*rom)); | 677 | data = memremap(pa_data, sizeof(*rom), MEMREMAP_WB); |
678 | if (!data) | 678 | if (!data) |
679 | return -ENOMEM; | 679 | return -ENOMEM; |
680 | 680 | ||
@@ -693,7 +693,7 @@ int pcibios_add_device(struct pci_dev *dev) | |||
693 | } | 693 | } |
694 | } | 694 | } |
695 | pa_data = data->next; | 695 | pa_data = data->next; |
696 | iounmap(data); | 696 | memunmap(data); |
697 | } | 697 | } |
698 | set_dma_domain_ops(dev); | 698 | set_dma_domain_ops(dev); |
699 | set_dev_domain_options(dev); | 699 | set_dev_domain_options(dev); |
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index f084d8718ac4..6217b23e85f6 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c | |||
@@ -1035,12 +1035,12 @@ void __init efi_enter_virtual_mode(void) | |||
1035 | /* | 1035 | /* |
1036 | * Convenience functions to obtain memory types and attributes | 1036 | * Convenience functions to obtain memory types and attributes |
1037 | */ | 1037 | */ |
1038 | u32 efi_mem_type(unsigned long phys_addr) | 1038 | int efi_mem_type(unsigned long phys_addr) |
1039 | { | 1039 | { |
1040 | efi_memory_desc_t *md; | 1040 | efi_memory_desc_t *md; |
1041 | 1041 | ||
1042 | if (!efi_enabled(EFI_MEMMAP)) | 1042 | if (!efi_enabled(EFI_MEMMAP)) |
1043 | return 0; | 1043 | return -ENOTSUPP; |
1044 | 1044 | ||
1045 | for_each_efi_memory_desc(md) { | 1045 | for_each_efi_memory_desc(md) { |
1046 | if ((md->phys_addr <= phys_addr) && | 1046 | if ((md->phys_addr <= phys_addr) && |
@@ -1048,7 +1048,7 @@ u32 efi_mem_type(unsigned long phys_addr) | |||
1048 | (md->num_pages << EFI_PAGE_SHIFT)))) | 1048 | (md->num_pages << EFI_PAGE_SHIFT)))) |
1049 | return md->type; | 1049 | return md->type; |
1050 | } | 1050 | } |
1051 | return 0; | 1051 | return -EINVAL; |
1052 | } | 1052 | } |
1053 | 1053 | ||
1054 | static int __init arch_parse_efi_cmdline(char *str) | 1054 | static int __init arch_parse_efi_cmdline(char *str) |
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 9bf72f5bfedb..12e83888e5b9 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c | |||
@@ -327,7 +327,7 @@ virt_to_phys_or_null_size(void *va, unsigned long size) | |||
327 | 327 | ||
328 | int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) | 328 | int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) |
329 | { | 329 | { |
330 | unsigned long pfn, text; | 330 | unsigned long pfn, text, pf; |
331 | struct page *page; | 331 | struct page *page; |
332 | unsigned npages; | 332 | unsigned npages; |
333 | pgd_t *pgd; | 333 | pgd_t *pgd; |
@@ -335,7 +335,12 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) | |||
335 | if (efi_enabled(EFI_OLD_MEMMAP)) | 335 | if (efi_enabled(EFI_OLD_MEMMAP)) |
336 | return 0; | 336 | return 0; |
337 | 337 | ||
338 | efi_scratch.efi_pgt = (pgd_t *)__pa(efi_pgd); | 338 | /* |
339 | * Since the PGD is encrypted, set the encryption mask so that when | ||
340 | * this value is loaded into cr3 the PGD will be decrypted during | ||
341 | * the pagetable walk. | ||
342 | */ | ||
343 | efi_scratch.efi_pgt = (pgd_t *)__sme_pa(efi_pgd); | ||
339 | pgd = efi_pgd; | 344 | pgd = efi_pgd; |
340 | 345 | ||
341 | /* | 346 | /* |
@@ -345,7 +350,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) | |||
345 | * phys_efi_set_virtual_address_map(). | 350 | * phys_efi_set_virtual_address_map(). |
346 | */ | 351 | */ |
347 | pfn = pa_memmap >> PAGE_SHIFT; | 352 | pfn = pa_memmap >> PAGE_SHIFT; |
348 | if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, _PAGE_NX | _PAGE_RW)) { | 353 | pf = _PAGE_NX | _PAGE_RW | _PAGE_ENC; |
354 | if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, pf)) { | ||
349 | pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap); | 355 | pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap); |
350 | return 1; | 356 | return 1; |
351 | } | 357 | } |
@@ -388,7 +394,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) | |||
388 | text = __pa(_text); | 394 | text = __pa(_text); |
389 | pfn = text >> PAGE_SHIFT; | 395 | pfn = text >> PAGE_SHIFT; |
390 | 396 | ||
391 | if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, _PAGE_RW)) { | 397 | pf = _PAGE_RW | _PAGE_ENC; |
398 | if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, pf)) { | ||
392 | pr_err("Failed to map kernel text 1:1\n"); | 399 | pr_err("Failed to map kernel text 1:1\n"); |
393 | return 1; | 400 | return 1; |
394 | } | 401 | } |
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index cd4be19c36dc..1f71980fc5e0 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c | |||
@@ -1,6 +1,7 @@ | |||
1 | #include <linux/io.h> | 1 | #include <linux/io.h> |
2 | #include <linux/slab.h> | 2 | #include <linux/slab.h> |
3 | #include <linux/memblock.h> | 3 | #include <linux/memblock.h> |
4 | #include <linux/mem_encrypt.h> | ||
4 | 5 | ||
5 | #include <asm/set_memory.h> | 6 | #include <asm/set_memory.h> |
6 | #include <asm/pgtable.h> | 7 | #include <asm/pgtable.h> |
@@ -59,6 +60,13 @@ static void __init setup_real_mode(void) | |||
59 | 60 | ||
60 | base = (unsigned char *)real_mode_header; | 61 | base = (unsigned char *)real_mode_header; |
61 | 62 | ||
63 | /* | ||
64 | * If SME is active, the trampoline area will need to be in | ||
65 | * decrypted memory in order to bring up other processors | ||
66 | * successfully. | ||
67 | */ | ||
68 | set_memory_decrypted((unsigned long)base, size >> PAGE_SHIFT); | ||
69 | |||
62 | memcpy(base, real_mode_blob, size); | 70 | memcpy(base, real_mode_blob, size); |
63 | 71 | ||
64 | phys_base = __pa(base); | 72 | phys_base = __pa(base); |
@@ -100,6 +108,10 @@ static void __init setup_real_mode(void) | |||
100 | trampoline_cr4_features = &trampoline_header->cr4; | 108 | trampoline_cr4_features = &trampoline_header->cr4; |
101 | *trampoline_cr4_features = mmu_cr4_features; | 109 | *trampoline_cr4_features = mmu_cr4_features; |
102 | 110 | ||
111 | trampoline_header->flags = 0; | ||
112 | if (sme_active()) | ||
113 | trampoline_header->flags |= TH_FLAGS_SME_ACTIVE; | ||
114 | |||
103 | trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd); | 115 | trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd); |
104 | trampoline_pgd[0] = trampoline_pgd_entry.pgd; | 116 | trampoline_pgd[0] = trampoline_pgd_entry.pgd; |
105 | trampoline_pgd[511] = init_top_pgt[511].pgd; | 117 | trampoline_pgd[511] = init_top_pgt[511].pgd; |
diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S index dac7b20d2f9d..614fd7064d0a 100644 --- a/arch/x86/realmode/rm/trampoline_64.S +++ b/arch/x86/realmode/rm/trampoline_64.S | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <asm/msr.h> | 30 | #include <asm/msr.h> |
31 | #include <asm/segment.h> | 31 | #include <asm/segment.h> |
32 | #include <asm/processor-flags.h> | 32 | #include <asm/processor-flags.h> |
33 | #include <asm/realmode.h> | ||
33 | #include "realmode.h" | 34 | #include "realmode.h" |
34 | 35 | ||
35 | .text | 36 | .text |
@@ -92,6 +93,28 @@ ENTRY(startup_32) | |||
92 | movl %edx, %fs | 93 | movl %edx, %fs |
93 | movl %edx, %gs | 94 | movl %edx, %gs |
94 | 95 | ||
96 | /* | ||
97 | * Check for memory encryption support. This is a safety net in | ||
98 | * case BIOS hasn't done the necessary step of setting the bit in | ||
99 | * the MSR for this AP. If SME is active and we've gotten this far | ||
100 | * then it is safe for us to set the MSR bit and continue. If we | ||
101 | * don't we'll eventually crash trying to execute encrypted | ||
102 | * instructions. | ||
103 | */ | ||
104 | bt $TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags | ||
105 | jnc .Ldone | ||
106 | movl $MSR_K8_SYSCFG, %ecx | ||
107 | rdmsr | ||
108 | bts $MSR_K8_SYSCFG_MEM_ENCRYPT_BIT, %eax | ||
109 | jc .Ldone | ||
110 | |||
111 | /* | ||
112 | * Memory encryption is enabled but the SME enable bit for this | ||
113 | * CPU has has not been set. It is safe to set it, so do so. | ||
114 | */ | ||
115 | wrmsr | ||
116 | .Ldone: | ||
117 | |||
95 | movl pa_tr_cr4, %eax | 118 | movl pa_tr_cr4, %eax |
96 | movl %eax, %cr4 # Enable PAE mode | 119 | movl %eax, %cr4 # Enable PAE mode |
97 | 120 | ||
@@ -147,6 +170,7 @@ GLOBAL(trampoline_header) | |||
147 | tr_start: .space 8 | 170 | tr_start: .space 8 |
148 | GLOBAL(tr_efer) .space 8 | 171 | GLOBAL(tr_efer) .space 8 |
149 | GLOBAL(tr_cr4) .space 4 | 172 | GLOBAL(tr_cr4) .space 4 |
173 | GLOBAL(tr_flags) .space 4 | ||
150 | END(trampoline_header) | 174 | END(trampoline_header) |
151 | 175 | ||
152 | #include "trampoline_common.S" | 176 | #include "trampoline_common.S" |
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 027987638e98..1ecd419811a2 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig | |||
@@ -17,6 +17,9 @@ config XEN_PV | |||
17 | bool "Xen PV guest support" | 17 | bool "Xen PV guest support" |
18 | default y | 18 | default y |
19 | depends on XEN | 19 | depends on XEN |
20 | # XEN_PV is not ready to work with 5-level paging. | ||
21 | # Changes to hypervisor are also required. | ||
22 | depends on !X86_5LEVEL | ||
20 | select XEN_HAVE_PVMMU | 23 | select XEN_HAVE_PVMMU |
21 | select XEN_HAVE_VPMU | 24 | select XEN_HAVE_VPMU |
22 | help | 25 | help |
@@ -75,4 +78,6 @@ config XEN_DEBUG_FS | |||
75 | config XEN_PVH | 78 | config XEN_PVH |
76 | bool "Support for running as a PVH guest" | 79 | bool "Support for running as a PVH guest" |
77 | depends on XEN && XEN_PVHVM && ACPI | 80 | depends on XEN && XEN_PVHVM && ACPI |
81 | # Pre-built page tables are not ready to handle 5-level paging. | ||
82 | depends on !X86_5LEVEL | ||
78 | def_bool n | 83 | def_bool n |
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 98491521bb43..6c279c8f0a0e 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c | |||
@@ -263,6 +263,13 @@ static void __init xen_init_capabilities(void) | |||
263 | setup_clear_cpu_cap(X86_FEATURE_MTRR); | 263 | setup_clear_cpu_cap(X86_FEATURE_MTRR); |
264 | setup_clear_cpu_cap(X86_FEATURE_ACC); | 264 | setup_clear_cpu_cap(X86_FEATURE_ACC); |
265 | setup_clear_cpu_cap(X86_FEATURE_X2APIC); | 265 | setup_clear_cpu_cap(X86_FEATURE_X2APIC); |
266 | setup_clear_cpu_cap(X86_FEATURE_SME); | ||
267 | |||
268 | /* | ||
269 | * Xen PV would need some work to support PCID: CR3 handling as well | ||
270 | * as xen_flush_tlb_others() would need updating. | ||
271 | */ | ||
272 | setup_clear_cpu_cap(X86_FEATURE_PCID); | ||
266 | 273 | ||
267 | if (!xen_initial_domain()) | 274 | if (!xen_initial_domain()) |
268 | setup_clear_cpu_cap(X86_FEATURE_ACPI); | 275 | setup_clear_cpu_cap(X86_FEATURE_ACPI); |
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index cab28cf2cffb..e437714750f8 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c | |||
@@ -1005,14 +1005,12 @@ static void xen_drop_mm_ref(struct mm_struct *mm) | |||
1005 | /* Get the "official" set of cpus referring to our pagetable. */ | 1005 | /* Get the "official" set of cpus referring to our pagetable. */ |
1006 | if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) { | 1006 | if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) { |
1007 | for_each_online_cpu(cpu) { | 1007 | for_each_online_cpu(cpu) { |
1008 | if (!cpumask_test_cpu(cpu, mm_cpumask(mm)) | 1008 | if (per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd)) |
1009 | && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd)) | ||
1010 | continue; | 1009 | continue; |
1011 | smp_call_function_single(cpu, drop_mm_ref_this_cpu, mm, 1); | 1010 | smp_call_function_single(cpu, drop_mm_ref_this_cpu, mm, 1); |
1012 | } | 1011 | } |
1013 | return; | 1012 | return; |
1014 | } | 1013 | } |
1015 | cpumask_copy(mask, mm_cpumask(mm)); | ||
1016 | 1014 | ||
1017 | /* | 1015 | /* |
1018 | * It's possible that a vcpu may have a stale reference to our | 1016 | * It's possible that a vcpu may have a stale reference to our |
@@ -1021,6 +1019,7 @@ static void xen_drop_mm_ref(struct mm_struct *mm) | |||
1021 | * look at its actual current cr3 value, and force it to flush | 1019 | * look at its actual current cr3 value, and force it to flush |
1022 | * if needed. | 1020 | * if needed. |
1023 | */ | 1021 | */ |
1022 | cpumask_clear(mask); | ||
1024 | for_each_online_cpu(cpu) { | 1023 | for_each_online_cpu(cpu) { |
1025 | if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd)) | 1024 | if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd)) |
1026 | cpumask_set_cpu(cpu, mask); | 1025 | cpumask_set_cpu(cpu, mask); |
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 72a8e6adebe6..a7525e95d53f 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S | |||
@@ -58,7 +58,7 @@ ENTRY(hypercall_page) | |||
58 | #else | 58 | #else |
59 | ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, _ASM_PTR __START_KERNEL_map) | 59 | ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, _ASM_PTR __START_KERNEL_map) |
60 | /* Map the p2m table to a 512GB-aligned user address. */ | 60 | /* Map the p2m table to a 512GB-aligned user address. */ |
61 | ELFNOTE(Xen, XEN_ELFNOTE_INIT_P2M, .quad PGDIR_SIZE) | 61 | ELFNOTE(Xen, XEN_ELFNOTE_INIT_P2M, .quad (PUD_SIZE * PTRS_PER_PUD)) |
62 | #endif | 62 | #endif |
63 | #ifdef CONFIG_XEN_PV | 63 | #ifdef CONFIG_XEN_PV |
64 | ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen) | 64 | ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen) |
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 5c8aa9cf62d7..fe3d2a40f311 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c | |||
@@ -708,8 +708,6 @@ static DEFINE_RAW_SPINLOCK(c3_lock); | |||
708 | static void acpi_idle_enter_bm(struct acpi_processor *pr, | 708 | static void acpi_idle_enter_bm(struct acpi_processor *pr, |
709 | struct acpi_processor_cx *cx, bool timer_bc) | 709 | struct acpi_processor_cx *cx, bool timer_bc) |
710 | { | 710 | { |
711 | acpi_unlazy_tlb(smp_processor_id()); | ||
712 | |||
713 | /* | 711 | /* |
714 | * Must be done before busmaster disable as we might need to | 712 | * Must be done before busmaster disable as we might need to |
715 | * access HPET ! | 713 | * access HPET ! |
diff --git a/drivers/firmware/dmi-sysfs.c b/drivers/firmware/dmi-sysfs.c index ef76e5eecf0b..d5de6ee8466d 100644 --- a/drivers/firmware/dmi-sysfs.c +++ b/drivers/firmware/dmi-sysfs.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
26 | #include <linux/list.h> | 26 | #include <linux/list.h> |
27 | #include <linux/io.h> | 27 | #include <linux/io.h> |
28 | #include <asm/dmi.h> | ||
28 | 29 | ||
29 | #define MAX_ENTRY_TYPE 255 /* Most of these aren't used, but we consider | 30 | #define MAX_ENTRY_TYPE 255 /* Most of these aren't used, but we consider |
30 | the top entry type is only 8 bits */ | 31 | the top entry type is only 8 bits */ |
@@ -380,7 +381,7 @@ static ssize_t dmi_sel_raw_read_phys32(struct dmi_sysfs_entry *entry, | |||
380 | u8 __iomem *mapped; | 381 | u8 __iomem *mapped; |
381 | ssize_t wrote = 0; | 382 | ssize_t wrote = 0; |
382 | 383 | ||
383 | mapped = ioremap(sel->access_method_address, sel->area_length); | 384 | mapped = dmi_remap(sel->access_method_address, sel->area_length); |
384 | if (!mapped) | 385 | if (!mapped) |
385 | return -EIO; | 386 | return -EIO; |
386 | 387 | ||
@@ -390,7 +391,7 @@ static ssize_t dmi_sel_raw_read_phys32(struct dmi_sysfs_entry *entry, | |||
390 | wrote++; | 391 | wrote++; |
391 | } | 392 | } |
392 | 393 | ||
393 | iounmap(mapped); | 394 | dmi_unmap(mapped); |
394 | return wrote; | 395 | return wrote; |
395 | } | 396 | } |
396 | 397 | ||
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 045d6d311bde..69d4d130e055 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c | |||
@@ -55,6 +55,25 @@ struct efi __read_mostly efi = { | |||
55 | }; | 55 | }; |
56 | EXPORT_SYMBOL(efi); | 56 | EXPORT_SYMBOL(efi); |
57 | 57 | ||
58 | static unsigned long *efi_tables[] = { | ||
59 | &efi.mps, | ||
60 | &efi.acpi, | ||
61 | &efi.acpi20, | ||
62 | &efi.smbios, | ||
63 | &efi.smbios3, | ||
64 | &efi.sal_systab, | ||
65 | &efi.boot_info, | ||
66 | &efi.hcdp, | ||
67 | &efi.uga, | ||
68 | &efi.uv_systab, | ||
69 | &efi.fw_vendor, | ||
70 | &efi.runtime, | ||
71 | &efi.config_table, | ||
72 | &efi.esrt, | ||
73 | &efi.properties_table, | ||
74 | &efi.mem_attr_table, | ||
75 | }; | ||
76 | |||
58 | static bool disable_runtime; | 77 | static bool disable_runtime; |
59 | static int __init setup_noefi(char *arg) | 78 | static int __init setup_noefi(char *arg) |
60 | { | 79 | { |
@@ -855,6 +874,20 @@ int efi_status_to_err(efi_status_t status) | |||
855 | return err; | 874 | return err; |
856 | } | 875 | } |
857 | 876 | ||
877 | bool efi_is_table_address(unsigned long phys_addr) | ||
878 | { | ||
879 | unsigned int i; | ||
880 | |||
881 | if (phys_addr == EFI_INVALID_TABLE_ADDR) | ||
882 | return false; | ||
883 | |||
884 | for (i = 0; i < ARRAY_SIZE(efi_tables); i++) | ||
885 | if (*(efi_tables[i]) == phys_addr) | ||
886 | return true; | ||
887 | |||
888 | return false; | ||
889 | } | ||
890 | |||
858 | #ifdef CONFIG_KEXEC | 891 | #ifdef CONFIG_KEXEC |
859 | static int update_efi_random_seed(struct notifier_block *nb, | 892 | static int update_efi_random_seed(struct notifier_block *nb, |
860 | unsigned long code, void *unused) | 893 | unsigned long code, void *unused) |
diff --git a/drivers/firmware/pcdp.c b/drivers/firmware/pcdp.c index 75273a251603..e83d6aec0c13 100644 --- a/drivers/firmware/pcdp.c +++ b/drivers/firmware/pcdp.c | |||
@@ -95,7 +95,7 @@ efi_setup_pcdp_console(char *cmdline) | |||
95 | if (efi.hcdp == EFI_INVALID_TABLE_ADDR) | 95 | if (efi.hcdp == EFI_INVALID_TABLE_ADDR) |
96 | return -ENODEV; | 96 | return -ENODEV; |
97 | 97 | ||
98 | pcdp = early_ioremap(efi.hcdp, 4096); | 98 | pcdp = early_memremap(efi.hcdp, 4096); |
99 | printk(KERN_INFO "PCDP: v%d at 0x%lx\n", pcdp->rev, efi.hcdp); | 99 | printk(KERN_INFO "PCDP: v%d at 0x%lx\n", pcdp->rev, efi.hcdp); |
100 | 100 | ||
101 | if (strstr(cmdline, "console=hcdp")) { | 101 | if (strstr(cmdline, "console=hcdp")) { |
@@ -131,6 +131,6 @@ efi_setup_pcdp_console(char *cmdline) | |||
131 | } | 131 | } |
132 | 132 | ||
133 | out: | 133 | out: |
134 | early_iounmap(pcdp, 4096); | 134 | early_memunmap(pcdp, 4096); |
135 | return rc; | 135 | return rc; |
136 | } | 136 | } |
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 88c6d78ee2d5..c55f338e380b 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/pagemap.h> | 36 | #include <linux/pagemap.h> |
37 | #include <linux/shmem_fs.h> | 37 | #include <linux/shmem_fs.h> |
38 | #include <linux/dma-buf.h> | 38 | #include <linux/dma-buf.h> |
39 | #include <linux/mem_encrypt.h> | ||
39 | #include <drm/drmP.h> | 40 | #include <drm/drmP.h> |
40 | #include <drm/drm_vma_manager.h> | 41 | #include <drm/drm_vma_manager.h> |
41 | #include <drm/drm_gem.h> | 42 | #include <drm/drm_gem.h> |
@@ -965,6 +966,7 @@ int drm_gem_mmap_obj(struct drm_gem_object *obj, unsigned long obj_size, | |||
965 | vma->vm_ops = dev->driver->gem_vm_ops; | 966 | vma->vm_ops = dev->driver->gem_vm_ops; |
966 | vma->vm_private_data = obj; | 967 | vma->vm_private_data = obj; |
967 | vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); | 968 | vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); |
969 | vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot); | ||
968 | 970 | ||
969 | /* Take a ref for this mapping of the object, so that the fault | 971 | /* Take a ref for this mapping of the object, so that the fault |
970 | * handler can dereference the mmap offset's pointer to the object. | 972 | * handler can dereference the mmap offset's pointer to the object. |
diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c index 13a59ed2afbc..2660543ad86a 100644 --- a/drivers/gpu/drm/drm_vm.c +++ b/drivers/gpu/drm/drm_vm.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <linux/efi.h> | 40 | #include <linux/efi.h> |
41 | #include <linux/slab.h> | 41 | #include <linux/slab.h> |
42 | #endif | 42 | #endif |
43 | #include <linux/mem_encrypt.h> | ||
43 | #include <asm/pgtable.h> | 44 | #include <asm/pgtable.h> |
44 | #include "drm_internal.h" | 45 | #include "drm_internal.h" |
45 | #include "drm_legacy.h" | 46 | #include "drm_legacy.h" |
@@ -58,6 +59,9 @@ static pgprot_t drm_io_prot(struct drm_local_map *map, | |||
58 | { | 59 | { |
59 | pgprot_t tmp = vm_get_page_prot(vma->vm_flags); | 60 | pgprot_t tmp = vm_get_page_prot(vma->vm_flags); |
60 | 61 | ||
62 | /* We don't want graphics memory to be mapped encrypted */ | ||
63 | tmp = pgprot_decrypted(tmp); | ||
64 | |||
61 | #if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) | 65 | #if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) |
62 | if (map->type == _DRM_REGISTERS && !(map->flags & _DRM_WRITE_COMBINING)) | 66 | if (map->type == _DRM_REGISTERS && !(map->flags & _DRM_WRITE_COMBINING)) |
63 | tmp = pgprot_noncached(tmp); | 67 | tmp = pgprot_noncached(tmp); |
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index a01e5c90fd87..c8ebb757e36b 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include <linux/rbtree.h> | 39 | #include <linux/rbtree.h> |
40 | #include <linux/module.h> | 40 | #include <linux/module.h> |
41 | #include <linux/uaccess.h> | 41 | #include <linux/uaccess.h> |
42 | #include <linux/mem_encrypt.h> | ||
42 | 43 | ||
43 | #define TTM_BO_VM_NUM_PREFAULT 16 | 44 | #define TTM_BO_VM_NUM_PREFAULT 16 |
44 | 45 | ||
@@ -230,9 +231,11 @@ static int ttm_bo_vm_fault(struct vm_fault *vmf) | |||
230 | * first page. | 231 | * first page. |
231 | */ | 232 | */ |
232 | for (i = 0; i < TTM_BO_VM_NUM_PREFAULT; ++i) { | 233 | for (i = 0; i < TTM_BO_VM_NUM_PREFAULT; ++i) { |
233 | if (bo->mem.bus.is_iomem) | 234 | if (bo->mem.bus.is_iomem) { |
235 | /* Iomem should not be marked encrypted */ | ||
236 | cvma.vm_page_prot = pgprot_decrypted(cvma.vm_page_prot); | ||
234 | pfn = bdev->driver->io_mem_pfn(bo, page_offset); | 237 | pfn = bdev->driver->io_mem_pfn(bo, page_offset); |
235 | else { | 238 | } else { |
236 | page = ttm->pages[page_offset]; | 239 | page = ttm->pages[page_offset]; |
237 | if (unlikely(!page && i == 0)) { | 240 | if (unlikely(!page && i == 0)) { |
238 | retval = VM_FAULT_OOM; | 241 | retval = VM_FAULT_OOM; |
diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c index b7ca90db4e80..b5b335c9b2bb 100644 --- a/drivers/gpu/drm/udl/udl_fb.c +++ b/drivers/gpu/drm/udl/udl_fb.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/slab.h> | 14 | #include <linux/slab.h> |
15 | #include <linux/fb.h> | 15 | #include <linux/fb.h> |
16 | #include <linux/dma-buf.h> | 16 | #include <linux/dma-buf.h> |
17 | #include <linux/mem_encrypt.h> | ||
17 | 18 | ||
18 | #include <drm/drmP.h> | 19 | #include <drm/drmP.h> |
19 | #include <drm/drm_crtc.h> | 20 | #include <drm/drm_crtc.h> |
@@ -169,6 +170,9 @@ static int udl_fb_mmap(struct fb_info *info, struct vm_area_struct *vma) | |||
169 | pr_notice("mmap() framebuffer addr:%lu size:%lu\n", | 170 | pr_notice("mmap() framebuffer addr:%lu size:%lu\n", |
170 | pos, size); | 171 | pos, size); |
171 | 172 | ||
173 | /* We don't want the framebuffer to be mapped encrypted */ | ||
174 | vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot); | ||
175 | |||
172 | while (size > 0) { | 176 | while (size > 0) { |
173 | page = vmalloc_to_pfn((void *)pos); | 177 | page = vmalloc_to_pfn((void *)pos); |
174 | if (remap_pfn_range(vma, start, page, PAGE_SIZE, PAGE_SHARED)) | 178 | if (remap_pfn_range(vma, start, page, PAGE_SIZE, PAGE_SHARED)) |
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index c2ae819a871c..e87ffb3c31a9 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c | |||
@@ -913,16 +913,15 @@ static __cpuidle int intel_idle(struct cpuidle_device *dev, | |||
913 | struct cpuidle_state *state = &drv->states[index]; | 913 | struct cpuidle_state *state = &drv->states[index]; |
914 | unsigned long eax = flg2MWAIT(state->flags); | 914 | unsigned long eax = flg2MWAIT(state->flags); |
915 | unsigned int cstate; | 915 | unsigned int cstate; |
916 | int cpu = smp_processor_id(); | ||
917 | 916 | ||
918 | cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) + 1; | 917 | cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) + 1; |
919 | 918 | ||
920 | /* | 919 | /* |
921 | * leave_mm() to avoid costly and often unnecessary wakeups | 920 | * NB: if CPUIDLE_FLAG_TLB_FLUSHED is set, this idle transition |
922 | * for flushing the user TLB's associated with the active mm. | 921 | * will probably flush the TLB. It's not guaranteed to flush |
922 | * the TLB, though, so it's not clear that we can do anything | ||
923 | * useful with this knowledge. | ||
923 | */ | 924 | */ |
924 | if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED) | ||
925 | leave_mm(cpu); | ||
926 | 925 | ||
927 | if (!(lapic_timer_reliable_states & (1 << (cstate)))) | 926 | if (!(lapic_timer_reliable_states & (1 << (cstate)))) |
928 | tick_broadcast_enter(); | 927 | tick_broadcast_enter(); |
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 354cbd6392cd..4ad7e5e31943 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c | |||
@@ -575,7 +575,7 @@ static void dump_dte_entry(u16 devid) | |||
575 | 575 | ||
576 | static void dump_command(unsigned long phys_addr) | 576 | static void dump_command(unsigned long phys_addr) |
577 | { | 577 | { |
578 | struct iommu_cmd *cmd = phys_to_virt(phys_addr); | 578 | struct iommu_cmd *cmd = iommu_phys_to_virt(phys_addr); |
579 | int i; | 579 | int i; |
580 | 580 | ||
581 | for (i = 0; i < 4; ++i) | 581 | for (i = 0; i < 4; ++i) |
@@ -919,11 +919,13 @@ static void copy_cmd_to_buffer(struct amd_iommu *iommu, | |||
919 | 919 | ||
920 | static void build_completion_wait(struct iommu_cmd *cmd, u64 address) | 920 | static void build_completion_wait(struct iommu_cmd *cmd, u64 address) |
921 | { | 921 | { |
922 | u64 paddr = iommu_virt_to_phys((void *)address); | ||
923 | |||
922 | WARN_ON(address & 0x7ULL); | 924 | WARN_ON(address & 0x7ULL); |
923 | 925 | ||
924 | memset(cmd, 0, sizeof(*cmd)); | 926 | memset(cmd, 0, sizeof(*cmd)); |
925 | cmd->data[0] = lower_32_bits(__pa(address)) | CMD_COMPL_WAIT_STORE_MASK; | 927 | cmd->data[0] = lower_32_bits(paddr) | CMD_COMPL_WAIT_STORE_MASK; |
926 | cmd->data[1] = upper_32_bits(__pa(address)); | 928 | cmd->data[1] = upper_32_bits(paddr); |
927 | cmd->data[2] = 1; | 929 | cmd->data[2] = 1; |
928 | CMD_SET_TYPE(cmd, CMD_COMPL_WAIT); | 930 | CMD_SET_TYPE(cmd, CMD_COMPL_WAIT); |
929 | } | 931 | } |
@@ -1383,7 +1385,7 @@ static bool increase_address_space(struct protection_domain *domain, | |||
1383 | return false; | 1385 | return false; |
1384 | 1386 | ||
1385 | *pte = PM_LEVEL_PDE(domain->mode, | 1387 | *pte = PM_LEVEL_PDE(domain->mode, |
1386 | virt_to_phys(domain->pt_root)); | 1388 | iommu_virt_to_phys(domain->pt_root)); |
1387 | domain->pt_root = pte; | 1389 | domain->pt_root = pte; |
1388 | domain->mode += 1; | 1390 | domain->mode += 1; |
1389 | domain->updated = true; | 1391 | domain->updated = true; |
@@ -1420,7 +1422,7 @@ static u64 *alloc_pte(struct protection_domain *domain, | |||
1420 | if (!page) | 1422 | if (!page) |
1421 | return NULL; | 1423 | return NULL; |
1422 | 1424 | ||
1423 | __npte = PM_LEVEL_PDE(level, virt_to_phys(page)); | 1425 | __npte = PM_LEVEL_PDE(level, iommu_virt_to_phys(page)); |
1424 | 1426 | ||
1425 | /* pte could have been changed somewhere. */ | 1427 | /* pte could have been changed somewhere. */ |
1426 | if (cmpxchg64(pte, __pte, __npte) != __pte) { | 1428 | if (cmpxchg64(pte, __pte, __npte) != __pte) { |
@@ -1536,10 +1538,10 @@ static int iommu_map_page(struct protection_domain *dom, | |||
1536 | return -EBUSY; | 1538 | return -EBUSY; |
1537 | 1539 | ||
1538 | if (count > 1) { | 1540 | if (count > 1) { |
1539 | __pte = PAGE_SIZE_PTE(phys_addr, page_size); | 1541 | __pte = PAGE_SIZE_PTE(__sme_set(phys_addr), page_size); |
1540 | __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_P | IOMMU_PTE_FC; | 1542 | __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_P | IOMMU_PTE_FC; |
1541 | } else | 1543 | } else |
1542 | __pte = phys_addr | IOMMU_PTE_P | IOMMU_PTE_FC; | 1544 | __pte = __sme_set(phys_addr) | IOMMU_PTE_P | IOMMU_PTE_FC; |
1543 | 1545 | ||
1544 | if (prot & IOMMU_PROT_IR) | 1546 | if (prot & IOMMU_PROT_IR) |
1545 | __pte |= IOMMU_PTE_IR; | 1547 | __pte |= IOMMU_PTE_IR; |
@@ -1755,7 +1757,7 @@ static void free_gcr3_tbl_level1(u64 *tbl) | |||
1755 | if (!(tbl[i] & GCR3_VALID)) | 1757 | if (!(tbl[i] & GCR3_VALID)) |
1756 | continue; | 1758 | continue; |
1757 | 1759 | ||
1758 | ptr = __va(tbl[i] & PAGE_MASK); | 1760 | ptr = iommu_phys_to_virt(tbl[i] & PAGE_MASK); |
1759 | 1761 | ||
1760 | free_page((unsigned long)ptr); | 1762 | free_page((unsigned long)ptr); |
1761 | } | 1763 | } |
@@ -1770,7 +1772,7 @@ static void free_gcr3_tbl_level2(u64 *tbl) | |||
1770 | if (!(tbl[i] & GCR3_VALID)) | 1772 | if (!(tbl[i] & GCR3_VALID)) |
1771 | continue; | 1773 | continue; |
1772 | 1774 | ||
1773 | ptr = __va(tbl[i] & PAGE_MASK); | 1775 | ptr = iommu_phys_to_virt(tbl[i] & PAGE_MASK); |
1774 | 1776 | ||
1775 | free_gcr3_tbl_level1(ptr); | 1777 | free_gcr3_tbl_level1(ptr); |
1776 | } | 1778 | } |
@@ -2049,7 +2051,7 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats) | |||
2049 | u64 flags = 0; | 2051 | u64 flags = 0; |
2050 | 2052 | ||
2051 | if (domain->mode != PAGE_MODE_NONE) | 2053 | if (domain->mode != PAGE_MODE_NONE) |
2052 | pte_root = virt_to_phys(domain->pt_root); | 2054 | pte_root = iommu_virt_to_phys(domain->pt_root); |
2053 | 2055 | ||
2054 | pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) | 2056 | pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) |
2055 | << DEV_ENTRY_MODE_SHIFT; | 2057 | << DEV_ENTRY_MODE_SHIFT; |
@@ -2061,7 +2063,7 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats) | |||
2061 | flags |= DTE_FLAG_IOTLB; | 2063 | flags |= DTE_FLAG_IOTLB; |
2062 | 2064 | ||
2063 | if (domain->flags & PD_IOMMUV2_MASK) { | 2065 | if (domain->flags & PD_IOMMUV2_MASK) { |
2064 | u64 gcr3 = __pa(domain->gcr3_tbl); | 2066 | u64 gcr3 = iommu_virt_to_phys(domain->gcr3_tbl); |
2065 | u64 glx = domain->glx; | 2067 | u64 glx = domain->glx; |
2066 | u64 tmp; | 2068 | u64 tmp; |
2067 | 2069 | ||
@@ -3606,10 +3608,10 @@ static u64 *__get_gcr3_pte(u64 *root, int level, int pasid, bool alloc) | |||
3606 | if (root == NULL) | 3608 | if (root == NULL) |
3607 | return NULL; | 3609 | return NULL; |
3608 | 3610 | ||
3609 | *pte = __pa(root) | GCR3_VALID; | 3611 | *pte = iommu_virt_to_phys(root) | GCR3_VALID; |
3610 | } | 3612 | } |
3611 | 3613 | ||
3612 | root = __va(*pte & PAGE_MASK); | 3614 | root = iommu_phys_to_virt(*pte & PAGE_MASK); |
3613 | 3615 | ||
3614 | level -= 1; | 3616 | level -= 1; |
3615 | } | 3617 | } |
@@ -3788,7 +3790,7 @@ static void set_dte_irq_entry(u16 devid, struct irq_remap_table *table) | |||
3788 | 3790 | ||
3789 | dte = amd_iommu_dev_table[devid].data[2]; | 3791 | dte = amd_iommu_dev_table[devid].data[2]; |
3790 | dte &= ~DTE_IRQ_PHYS_ADDR_MASK; | 3792 | dte &= ~DTE_IRQ_PHYS_ADDR_MASK; |
3791 | dte |= virt_to_phys(table->table); | 3793 | dte |= iommu_virt_to_phys(table->table); |
3792 | dte |= DTE_IRQ_REMAP_INTCTL; | 3794 | dte |= DTE_IRQ_REMAP_INTCTL; |
3793 | dte |= DTE_IRQ_TABLE_LEN; | 3795 | dte |= DTE_IRQ_TABLE_LEN; |
3794 | dte |= DTE_IRQ_REMAP_ENABLE; | 3796 | dte |= DTE_IRQ_REMAP_ENABLE; |
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 372303700566..2292a6cece76 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/iommu.h> | 30 | #include <linux/iommu.h> |
31 | #include <linux/kmemleak.h> | 31 | #include <linux/kmemleak.h> |
32 | #include <linux/crash_dump.h> | 32 | #include <linux/crash_dump.h> |
33 | #include <linux/mem_encrypt.h> | ||
33 | #include <asm/pci-direct.h> | 34 | #include <asm/pci-direct.h> |
34 | #include <asm/iommu.h> | 35 | #include <asm/iommu.h> |
35 | #include <asm/gart.h> | 36 | #include <asm/gart.h> |
@@ -348,7 +349,7 @@ static void iommu_set_device_table(struct amd_iommu *iommu) | |||
348 | 349 | ||
349 | BUG_ON(iommu->mmio_base == NULL); | 350 | BUG_ON(iommu->mmio_base == NULL); |
350 | 351 | ||
351 | entry = virt_to_phys(amd_iommu_dev_table); | 352 | entry = iommu_virt_to_phys(amd_iommu_dev_table); |
352 | entry |= (dev_table_size >> 12) - 1; | 353 | entry |= (dev_table_size >> 12) - 1; |
353 | memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET, | 354 | memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET, |
354 | &entry, sizeof(entry)); | 355 | &entry, sizeof(entry)); |
@@ -606,7 +607,7 @@ static void iommu_enable_command_buffer(struct amd_iommu *iommu) | |||
606 | 607 | ||
607 | BUG_ON(iommu->cmd_buf == NULL); | 608 | BUG_ON(iommu->cmd_buf == NULL); |
608 | 609 | ||
609 | entry = (u64)virt_to_phys(iommu->cmd_buf); | 610 | entry = iommu_virt_to_phys(iommu->cmd_buf); |
610 | entry |= MMIO_CMD_SIZE_512; | 611 | entry |= MMIO_CMD_SIZE_512; |
611 | 612 | ||
612 | memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, | 613 | memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, |
@@ -635,7 +636,7 @@ static void iommu_enable_event_buffer(struct amd_iommu *iommu) | |||
635 | 636 | ||
636 | BUG_ON(iommu->evt_buf == NULL); | 637 | BUG_ON(iommu->evt_buf == NULL); |
637 | 638 | ||
638 | entry = (u64)virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK; | 639 | entry = iommu_virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK; |
639 | 640 | ||
640 | memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, | 641 | memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, |
641 | &entry, sizeof(entry)); | 642 | &entry, sizeof(entry)); |
@@ -668,7 +669,7 @@ static void iommu_enable_ppr_log(struct amd_iommu *iommu) | |||
668 | if (iommu->ppr_log == NULL) | 669 | if (iommu->ppr_log == NULL) |
669 | return; | 670 | return; |
670 | 671 | ||
671 | entry = (u64)virt_to_phys(iommu->ppr_log) | PPR_LOG_SIZE_512; | 672 | entry = iommu_virt_to_phys(iommu->ppr_log) | PPR_LOG_SIZE_512; |
672 | 673 | ||
673 | memcpy_toio(iommu->mmio_base + MMIO_PPR_LOG_OFFSET, | 674 | memcpy_toio(iommu->mmio_base + MMIO_PPR_LOG_OFFSET, |
674 | &entry, sizeof(entry)); | 675 | &entry, sizeof(entry)); |
@@ -748,10 +749,10 @@ static int iommu_init_ga_log(struct amd_iommu *iommu) | |||
748 | if (!iommu->ga_log_tail) | 749 | if (!iommu->ga_log_tail) |
749 | goto err_out; | 750 | goto err_out; |
750 | 751 | ||
751 | entry = (u64)virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512; | 752 | entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512; |
752 | memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET, | 753 | memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET, |
753 | &entry, sizeof(entry)); | 754 | &entry, sizeof(entry)); |
754 | entry = ((u64)virt_to_phys(iommu->ga_log) & 0xFFFFFFFFFFFFFULL) & ~7ULL; | 755 | entry = (iommu_virt_to_phys(iommu->ga_log) & 0xFFFFFFFFFFFFFULL) & ~7ULL; |
755 | memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET, | 756 | memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET, |
756 | &entry, sizeof(entry)); | 757 | &entry, sizeof(entry)); |
757 | writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET); | 758 | writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET); |
@@ -2564,6 +2565,24 @@ static int __init amd_iommu_init(void) | |||
2564 | return ret; | 2565 | return ret; |
2565 | } | 2566 | } |
2566 | 2567 | ||
2568 | static bool amd_iommu_sme_check(void) | ||
2569 | { | ||
2570 | if (!sme_active() || (boot_cpu_data.x86 != 0x17)) | ||
2571 | return true; | ||
2572 | |||
2573 | /* For Fam17h, a specific level of support is required */ | ||
2574 | if (boot_cpu_data.microcode >= 0x08001205) | ||
2575 | return true; | ||
2576 | |||
2577 | if ((boot_cpu_data.microcode >= 0x08001126) && | ||
2578 | (boot_cpu_data.microcode <= 0x080011ff)) | ||
2579 | return true; | ||
2580 | |||
2581 | pr_notice("AMD-Vi: IOMMU not currently supported when SME is active\n"); | ||
2582 | |||
2583 | return false; | ||
2584 | } | ||
2585 | |||
2567 | /**************************************************************************** | 2586 | /**************************************************************************** |
2568 | * | 2587 | * |
2569 | * Early detect code. This code runs at IOMMU detection time in the DMA | 2588 | * Early detect code. This code runs at IOMMU detection time in the DMA |
@@ -2578,6 +2597,9 @@ int __init amd_iommu_detect(void) | |||
2578 | if (no_iommu || (iommu_detected && !gart_iommu_aperture)) | 2597 | if (no_iommu || (iommu_detected && !gart_iommu_aperture)) |
2579 | return -ENODEV; | 2598 | return -ENODEV; |
2580 | 2599 | ||
2600 | if (!amd_iommu_sme_check()) | ||
2601 | return -ENODEV; | ||
2602 | |||
2581 | ret = iommu_go_to_state(IOMMU_IVRS_DETECTED); | 2603 | ret = iommu_go_to_state(IOMMU_IVRS_DETECTED); |
2582 | if (ret) | 2604 | if (ret) |
2583 | return ret; | 2605 | return ret; |
diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h index 466260f8a1df..3f12fb2338ea 100644 --- a/drivers/iommu/amd_iommu_proto.h +++ b/drivers/iommu/amd_iommu_proto.h | |||
@@ -87,4 +87,14 @@ static inline bool iommu_feature(struct amd_iommu *iommu, u64 f) | |||
87 | return !!(iommu->features & f); | 87 | return !!(iommu->features & f); |
88 | } | 88 | } |
89 | 89 | ||
90 | static inline u64 iommu_virt_to_phys(void *vaddr) | ||
91 | { | ||
92 | return (u64)__sme_set(virt_to_phys(vaddr)); | ||
93 | } | ||
94 | |||
95 | static inline void *iommu_phys_to_virt(unsigned long paddr) | ||
96 | { | ||
97 | return phys_to_virt(__sme_clr(paddr)); | ||
98 | } | ||
99 | |||
90 | #endif /* _ASM_X86_AMD_IOMMU_PROTO_H */ | 100 | #endif /* _ASM_X86_AMD_IOMMU_PROTO_H */ |
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index d6b873b57054..8e3a85759242 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h | |||
@@ -344,7 +344,7 @@ | |||
344 | 344 | ||
345 | #define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL) | 345 | #define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL) |
346 | #define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P) | 346 | #define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P) |
347 | #define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK)) | 347 | #define IOMMU_PTE_PAGE(pte) (iommu_phys_to_virt((pte) & IOMMU_PAGE_MASK)) |
348 | #define IOMMU_PTE_MODE(pte) (((pte) >> 9) & 0x07) | 348 | #define IOMMU_PTE_MODE(pte) (((pte) >> 9) & 0x07) |
349 | 349 | ||
350 | #define IOMMU_PROT_MASK 0x03 | 350 | #define IOMMU_PROT_MASK 0x03 |
diff --git a/drivers/sfi/sfi_core.c b/drivers/sfi/sfi_core.c index 296db7a69c27..153b3f3cc795 100644 --- a/drivers/sfi/sfi_core.c +++ b/drivers/sfi/sfi_core.c | |||
@@ -68,6 +68,7 @@ | |||
68 | #include <linux/init.h> | 68 | #include <linux/init.h> |
69 | #include <linux/sfi.h> | 69 | #include <linux/sfi.h> |
70 | #include <linux/slab.h> | 70 | #include <linux/slab.h> |
71 | #include <linux/io.h> | ||
71 | 72 | ||
72 | #include "sfi_core.h" | 73 | #include "sfi_core.h" |
73 | 74 | ||
@@ -86,13 +87,13 @@ static struct sfi_table_simple *syst_va __read_mostly; | |||
86 | /* | 87 | /* |
87 | * FW creates and saves the SFI tables in memory. When these tables get | 88 | * FW creates and saves the SFI tables in memory. When these tables get |
88 | * used, they may need to be mapped to virtual address space, and the mapping | 89 | * used, they may need to be mapped to virtual address space, and the mapping |
89 | * can happen before or after the ioremap() is ready, so a flag is needed | 90 | * can happen before or after the memremap() is ready, so a flag is needed |
90 | * to indicating this | 91 | * to indicating this |
91 | */ | 92 | */ |
92 | static u32 sfi_use_ioremap __read_mostly; | 93 | static u32 sfi_use_memremap __read_mostly; |
93 | 94 | ||
94 | /* | 95 | /* |
95 | * sfi_un/map_memory calls early_ioremap/iounmap which is a __init function | 96 | * sfi_un/map_memory calls early_memremap/memunmap which is a __init function |
96 | * and introduces section mismatch. So use __ref to make it calm. | 97 | * and introduces section mismatch. So use __ref to make it calm. |
97 | */ | 98 | */ |
98 | static void __iomem * __ref sfi_map_memory(u64 phys, u32 size) | 99 | static void __iomem * __ref sfi_map_memory(u64 phys, u32 size) |
@@ -100,10 +101,10 @@ static void __iomem * __ref sfi_map_memory(u64 phys, u32 size) | |||
100 | if (!phys || !size) | 101 | if (!phys || !size) |
101 | return NULL; | 102 | return NULL; |
102 | 103 | ||
103 | if (sfi_use_ioremap) | 104 | if (sfi_use_memremap) |
104 | return ioremap_cache(phys, size); | 105 | return memremap(phys, size, MEMREMAP_WB); |
105 | else | 106 | else |
106 | return early_ioremap(phys, size); | 107 | return early_memremap(phys, size); |
107 | } | 108 | } |
108 | 109 | ||
109 | static void __ref sfi_unmap_memory(void __iomem *virt, u32 size) | 110 | static void __ref sfi_unmap_memory(void __iomem *virt, u32 size) |
@@ -111,10 +112,10 @@ static void __ref sfi_unmap_memory(void __iomem *virt, u32 size) | |||
111 | if (!virt || !size) | 112 | if (!virt || !size) |
112 | return; | 113 | return; |
113 | 114 | ||
114 | if (sfi_use_ioremap) | 115 | if (sfi_use_memremap) |
115 | iounmap(virt); | 116 | memunmap(virt); |
116 | else | 117 | else |
117 | early_iounmap(virt, size); | 118 | early_memunmap(virt, size); |
118 | } | 119 | } |
119 | 120 | ||
120 | static void sfi_print_table_header(unsigned long long pa, | 121 | static void sfi_print_table_header(unsigned long long pa, |
@@ -507,8 +508,8 @@ void __init sfi_init_late(void) | |||
507 | length = syst_va->header.len; | 508 | length = syst_va->header.len; |
508 | sfi_unmap_memory(syst_va, sizeof(struct sfi_table_simple)); | 509 | sfi_unmap_memory(syst_va, sizeof(struct sfi_table_simple)); |
509 | 510 | ||
510 | /* Use ioremap now after it is ready */ | 511 | /* Use memremap now after it is ready */ |
511 | sfi_use_ioremap = 1; | 512 | sfi_use_memremap = 1; |
512 | syst_va = sfi_map_memory(syst_pa, length); | 513 | syst_va = sfi_map_memory(syst_pa, length); |
513 | 514 | ||
514 | sfi_acpi_init(); | 515 | sfi_acpi_init(); |
diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 7a42238db446..25e862c487f6 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <linux/device.h> | 32 | #include <linux/device.h> |
33 | #include <linux/efi.h> | 33 | #include <linux/efi.h> |
34 | #include <linux/fb.h> | 34 | #include <linux/fb.h> |
35 | #include <linux/mem_encrypt.h> | ||
35 | 36 | ||
36 | #include <asm/fb.h> | 37 | #include <asm/fb.h> |
37 | 38 | ||
@@ -1396,6 +1397,12 @@ fb_mmap(struct file *file, struct vm_area_struct * vma) | |||
1396 | mutex_lock(&info->mm_lock); | 1397 | mutex_lock(&info->mm_lock); |
1397 | if (fb->fb_mmap) { | 1398 | if (fb->fb_mmap) { |
1398 | int res; | 1399 | int res; |
1400 | |||
1401 | /* | ||
1402 | * The framebuffer needs to be accessed decrypted, be sure | ||
1403 | * SME protection is removed ahead of the call | ||
1404 | */ | ||
1405 | vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot); | ||
1399 | res = fb->fb_mmap(info, vma); | 1406 | res = fb->fb_mmap(info, vma); |
1400 | mutex_unlock(&info->mm_lock); | 1407 | mutex_unlock(&info->mm_lock); |
1401 | return res; | 1408 | return res; |
@@ -1421,6 +1428,11 @@ fb_mmap(struct file *file, struct vm_area_struct * vma) | |||
1421 | mutex_unlock(&info->mm_lock); | 1428 | mutex_unlock(&info->mm_lock); |
1422 | 1429 | ||
1423 | vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); | 1430 | vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); |
1431 | /* | ||
1432 | * The framebuffer needs to be accessed decrypted, be sure | ||
1433 | * SME protection is removed | ||
1434 | */ | ||
1435 | vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot); | ||
1424 | fb_pgprotect(file, vma, start); | 1436 | fb_pgprotect(file, vma, start); |
1425 | 1437 | ||
1426 | return vm_iomap_memory(vma, start, len); | 1438 | return vm_iomap_memory(vma, start, len); |
diff --git a/include/asm-generic/early_ioremap.h b/include/asm-generic/early_ioremap.h index 734ad4db388c..2edef8d7fa6b 100644 --- a/include/asm-generic/early_ioremap.h +++ b/include/asm-generic/early_ioremap.h | |||
@@ -13,6 +13,8 @@ extern void *early_memremap(resource_size_t phys_addr, | |||
13 | unsigned long size); | 13 | unsigned long size); |
14 | extern void *early_memremap_ro(resource_size_t phys_addr, | 14 | extern void *early_memremap_ro(resource_size_t phys_addr, |
15 | unsigned long size); | 15 | unsigned long size); |
16 | extern void *early_memremap_prot(resource_size_t phys_addr, | ||
17 | unsigned long size, unsigned long prot_val); | ||
16 | extern void early_iounmap(void __iomem *addr, unsigned long size); | 18 | extern void early_iounmap(void __iomem *addr, unsigned long size); |
17 | extern void early_memunmap(void *addr, unsigned long size); | 19 | extern void early_memunmap(void *addr, unsigned long size); |
18 | 20 | ||
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 7dfa767dc680..4d7bb98f4134 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h | |||
@@ -583,6 +583,18 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm, | |||
583 | #endif /* CONFIG_MMU */ | 583 | #endif /* CONFIG_MMU */ |
584 | 584 | ||
585 | /* | 585 | /* |
586 | * No-op macros that just return the current protection value. Defined here | ||
587 | * because these macros can be used used even if CONFIG_MMU is not defined. | ||
588 | */ | ||
589 | #ifndef pgprot_encrypted | ||
590 | #define pgprot_encrypted(prot) (prot) | ||
591 | #endif | ||
592 | |||
593 | #ifndef pgprot_decrypted | ||
594 | #define pgprot_decrypted(prot) (prot) | ||
595 | #endif | ||
596 | |||
597 | /* | ||
586 | * A facility to provide lazy MMU batching. This allows PTE updates and | 598 | * A facility to provide lazy MMU batching. This allows PTE updates and |
587 | * page invalidations to be delayed until a call to leave lazy MMU mode | 599 | * page invalidations to be delayed until a call to leave lazy MMU mode |
588 | * is issued. Some architectures may benefit from doing this, and it is | 600 | * is issued. Some architectures may benefit from doing this, and it is |
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 310f51d42550..16d41de92ee3 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h | |||
@@ -166,6 +166,8 @@ | |||
166 | 166 | ||
167 | #if GCC_VERSION >= 40100 | 167 | #if GCC_VERSION >= 40100 |
168 | # define __compiletime_object_size(obj) __builtin_object_size(obj, 0) | 168 | # define __compiletime_object_size(obj) __builtin_object_size(obj, 0) |
169 | |||
170 | #define __nostackprotector __attribute__((__optimize__("no-stack-protector"))) | ||
169 | #endif | 171 | #endif |
170 | 172 | ||
171 | #if GCC_VERSION >= 40300 | 173 | #if GCC_VERSION >= 40300 |
diff --git a/include/linux/compiler.h b/include/linux/compiler.h index e786337cf5a7..e95a2631e545 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h | |||
@@ -501,6 +501,10 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s | |||
501 | #define __visible | 501 | #define __visible |
502 | #endif | 502 | #endif |
503 | 503 | ||
504 | #ifndef __nostackprotector | ||
505 | # define __nostackprotector | ||
506 | #endif | ||
507 | |||
504 | /* | 508 | /* |
505 | * Assume alignment of return value. | 509 | * Assume alignment of return value. |
506 | */ | 510 | */ |
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 03c0196a6f24..2189c79cde5d 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/scatterlist.h> | 10 | #include <linux/scatterlist.h> |
11 | #include <linux/kmemcheck.h> | 11 | #include <linux/kmemcheck.h> |
12 | #include <linux/bug.h> | 12 | #include <linux/bug.h> |
13 | #include <linux/mem_encrypt.h> | ||
13 | 14 | ||
14 | /** | 15 | /** |
15 | * List of possible attributes associated with a DMA mapping. The semantics | 16 | * List of possible attributes associated with a DMA mapping. The semantics |
@@ -572,6 +573,12 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) | |||
572 | return 0; | 573 | return 0; |
573 | } | 574 | } |
574 | 575 | ||
576 | static inline void dma_check_mask(struct device *dev, u64 mask) | ||
577 | { | ||
578 | if (sme_active() && (mask < (((u64)sme_get_me_mask() << 1) - 1))) | ||
579 | dev_warn(dev, "SME is active, device will require DMA bounce buffers\n"); | ||
580 | } | ||
581 | |||
575 | static inline int dma_supported(struct device *dev, u64 mask) | 582 | static inline int dma_supported(struct device *dev, u64 mask) |
576 | { | 583 | { |
577 | const struct dma_map_ops *ops = get_dma_ops(dev); | 584 | const struct dma_map_ops *ops = get_dma_ops(dev); |
@@ -588,6 +595,9 @@ static inline int dma_set_mask(struct device *dev, u64 mask) | |||
588 | { | 595 | { |
589 | if (!dev->dma_mask || !dma_supported(dev, mask)) | 596 | if (!dev->dma_mask || !dma_supported(dev, mask)) |
590 | return -EIO; | 597 | return -EIO; |
598 | |||
599 | dma_check_mask(dev, mask); | ||
600 | |||
591 | *dev->dma_mask = mask; | 601 | *dev->dma_mask = mask; |
592 | return 0; | 602 | return 0; |
593 | } | 603 | } |
@@ -607,6 +617,9 @@ static inline int dma_set_coherent_mask(struct device *dev, u64 mask) | |||
607 | { | 617 | { |
608 | if (!dma_supported(dev, mask)) | 618 | if (!dma_supported(dev, mask)) |
609 | return -EIO; | 619 | return -EIO; |
620 | |||
621 | dma_check_mask(dev, mask); | ||
622 | |||
610 | dev->coherent_dma_mask = mask; | 623 | dev->coherent_dma_mask = mask; |
611 | return 0; | 624 | return 0; |
612 | } | 625 | } |
diff --git a/include/linux/efi.h b/include/linux/efi.h index a686ca9a7e5c..4102b85217d5 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h | |||
@@ -985,7 +985,7 @@ static inline void efi_esrt_init(void) { } | |||
985 | extern int efi_config_parse_tables(void *config_tables, int count, int sz, | 985 | extern int efi_config_parse_tables(void *config_tables, int count, int sz, |
986 | efi_config_table_type_t *arch_tables); | 986 | efi_config_table_type_t *arch_tables); |
987 | extern u64 efi_get_iobase (void); | 987 | extern u64 efi_get_iobase (void); |
988 | extern u32 efi_mem_type (unsigned long phys_addr); | 988 | extern int efi_mem_type(unsigned long phys_addr); |
989 | extern u64 efi_mem_attributes (unsigned long phys_addr); | 989 | extern u64 efi_mem_attributes (unsigned long phys_addr); |
990 | extern u64 efi_mem_attribute (unsigned long phys_addr, unsigned long size); | 990 | extern u64 efi_mem_attribute (unsigned long phys_addr, unsigned long size); |
991 | extern int __init efi_uart_console_only (void); | 991 | extern int __init efi_uart_console_only (void); |
@@ -1113,6 +1113,8 @@ static inline bool efi_enabled(int feature) | |||
1113 | return test_bit(feature, &efi.flags) != 0; | 1113 | return test_bit(feature, &efi.flags) != 0; |
1114 | } | 1114 | } |
1115 | extern void efi_reboot(enum reboot_mode reboot_mode, const char *__unused); | 1115 | extern void efi_reboot(enum reboot_mode reboot_mode, const char *__unused); |
1116 | |||
1117 | extern bool efi_is_table_address(unsigned long phys_addr); | ||
1116 | #else | 1118 | #else |
1117 | static inline bool efi_enabled(int feature) | 1119 | static inline bool efi_enabled(int feature) |
1118 | { | 1120 | { |
@@ -1126,6 +1128,11 @@ efi_capsule_pending(int *reset_type) | |||
1126 | { | 1128 | { |
1127 | return false; | 1129 | return false; |
1128 | } | 1130 | } |
1131 | |||
1132 | static inline bool efi_is_table_address(unsigned long phys_addr) | ||
1133 | { | ||
1134 | return false; | ||
1135 | } | ||
1129 | #endif | 1136 | #endif |
1130 | 1137 | ||
1131 | extern int efi_status_to_err(efi_status_t status); | 1138 | extern int efi_status_to_err(efi_status_t status); |
diff --git a/include/linux/io.h b/include/linux/io.h index 2195d9ea4aaa..32e30e8fb9db 100644 --- a/include/linux/io.h +++ b/include/linux/io.h | |||
@@ -157,6 +157,8 @@ enum { | |||
157 | MEMREMAP_WB = 1 << 0, | 157 | MEMREMAP_WB = 1 << 0, |
158 | MEMREMAP_WT = 1 << 1, | 158 | MEMREMAP_WT = 1 << 1, |
159 | MEMREMAP_WC = 1 << 2, | 159 | MEMREMAP_WC = 1 << 2, |
160 | MEMREMAP_ENC = 1 << 3, | ||
161 | MEMREMAP_DEC = 1 << 4, | ||
160 | }; | 162 | }; |
161 | 163 | ||
162 | void *memremap(resource_size_t offset, size_t size, unsigned long flags); | 164 | void *memremap(resource_size_t offset, size_t size, unsigned long flags); |
diff --git a/include/linux/kexec.h b/include/linux/kexec.h index dd056fab9e35..2b7590f5483a 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h | |||
@@ -327,6 +327,14 @@ static inline void *boot_phys_to_virt(unsigned long entry) | |||
327 | return phys_to_virt(boot_phys_to_phys(entry)); | 327 | return phys_to_virt(boot_phys_to_phys(entry)); |
328 | } | 328 | } |
329 | 329 | ||
330 | #ifndef arch_kexec_post_alloc_pages | ||
331 | static inline int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, gfp_t gfp) { return 0; } | ||
332 | #endif | ||
333 | |||
334 | #ifndef arch_kexec_pre_free_pages | ||
335 | static inline void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) { } | ||
336 | #endif | ||
337 | |||
330 | #else /* !CONFIG_KEXEC_CORE */ | 338 | #else /* !CONFIG_KEXEC_CORE */ |
331 | struct pt_regs; | 339 | struct pt_regs; |
332 | struct task_struct; | 340 | struct task_struct; |
diff --git a/include/linux/mem_encrypt.h b/include/linux/mem_encrypt.h new file mode 100644 index 000000000000..1255f09f5e42 --- /dev/null +++ b/include/linux/mem_encrypt.h | |||
@@ -0,0 +1,48 @@ | |||
1 | /* | ||
2 | * AMD Memory Encryption Support | ||
3 | * | ||
4 | * Copyright (C) 2016 Advanced Micro Devices, Inc. | ||
5 | * | ||
6 | * Author: Tom Lendacky <thomas.lendacky@amd.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License version 2 as | ||
10 | * published by the Free Software Foundation. | ||
11 | */ | ||
12 | |||
13 | #ifndef __MEM_ENCRYPT_H__ | ||
14 | #define __MEM_ENCRYPT_H__ | ||
15 | |||
16 | #ifndef __ASSEMBLY__ | ||
17 | |||
18 | #ifdef CONFIG_ARCH_HAS_MEM_ENCRYPT | ||
19 | |||
20 | #include <asm/mem_encrypt.h> | ||
21 | |||
22 | #else /* !CONFIG_ARCH_HAS_MEM_ENCRYPT */ | ||
23 | |||
24 | #define sme_me_mask 0UL | ||
25 | |||
26 | #endif /* CONFIG_ARCH_HAS_MEM_ENCRYPT */ | ||
27 | |||
28 | static inline bool sme_active(void) | ||
29 | { | ||
30 | return !!sme_me_mask; | ||
31 | } | ||
32 | |||
33 | static inline unsigned long sme_get_me_mask(void) | ||
34 | { | ||
35 | return sme_me_mask; | ||
36 | } | ||
37 | |||
38 | /* | ||
39 | * The __sme_set() and __sme_clr() macros are useful for adding or removing | ||
40 | * the encryption mask from a value (e.g. when dealing with pagetable | ||
41 | * entries). | ||
42 | */ | ||
43 | #define __sme_set(x) ((unsigned long)(x) | sme_me_mask) | ||
44 | #define __sme_clr(x) ((unsigned long)(x) & ~sme_me_mask) | ||
45 | |||
46 | #endif /* __ASSEMBLY__ */ | ||
47 | |||
48 | #endif /* __MEM_ENCRYPT_H__ */ | ||
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index e030a68ead7e..25438b2b6f22 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h | |||
@@ -126,4 +126,10 @@ static __always_inline enum lru_list page_lru(struct page *page) | |||
126 | 126 | ||
127 | #define lru_to_page(head) (list_entry((head)->prev, struct page, lru)) | 127 | #define lru_to_page(head) (list_entry((head)->prev, struct page, lru)) |
128 | 128 | ||
129 | #ifdef arch_unmap_kpfn | ||
130 | extern void arch_unmap_kpfn(unsigned long pfn); | ||
131 | #else | ||
132 | static __always_inline void arch_unmap_kpfn(unsigned long pfn) { } | ||
133 | #endif | ||
134 | |||
129 | #endif | 135 | #endif |
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 4ee479f2f355..15e7160751a8 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h | |||
@@ -35,6 +35,7 @@ int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose); | |||
35 | extern unsigned long swiotlb_nr_tbl(void); | 35 | extern unsigned long swiotlb_nr_tbl(void); |
36 | unsigned long swiotlb_size_or_default(void); | 36 | unsigned long swiotlb_size_or_default(void); |
37 | extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs); | 37 | extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs); |
38 | extern void __init swiotlb_update_mem_attributes(void); | ||
38 | 39 | ||
39 | /* | 40 | /* |
40 | * Enumeration for sync targets | 41 | * Enumeration for sync targets |
diff --git a/init/main.c b/init/main.c index b78f63c30b17..8828fc148670 100644 --- a/init/main.c +++ b/init/main.c | |||
@@ -487,6 +487,8 @@ void __init __weak thread_stack_cache_init(void) | |||
487 | } | 487 | } |
488 | #endif | 488 | #endif |
489 | 489 | ||
490 | void __init __weak mem_encrypt_init(void) { } | ||
491 | |||
490 | /* | 492 | /* |
491 | * Set up kernel memory allocators | 493 | * Set up kernel memory allocators |
492 | */ | 494 | */ |
@@ -640,6 +642,14 @@ asmlinkage __visible void __init start_kernel(void) | |||
640 | */ | 642 | */ |
641 | locking_selftest(); | 643 | locking_selftest(); |
642 | 644 | ||
645 | /* | ||
646 | * This needs to be called before any devices perform DMA | ||
647 | * operations that might use the SWIOTLB bounce buffers. It will | ||
648 | * mark the bounce buffers as decrypted so that their usage will | ||
649 | * not cause "plain-text" data to be decrypted when accessed. | ||
650 | */ | ||
651 | mem_encrypt_init(); | ||
652 | |||
643 | #ifdef CONFIG_BLK_DEV_INITRD | 653 | #ifdef CONFIG_BLK_DEV_INITRD |
644 | if (initrd_start && !initrd_below_start_ok && | 654 | if (initrd_start && !initrd_below_start_ok && |
645 | page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) { | 655 | page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) { |
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 1ae7c41c33c1..20fef1a38602 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c | |||
@@ -301,7 +301,7 @@ static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order) | |||
301 | { | 301 | { |
302 | struct page *pages; | 302 | struct page *pages; |
303 | 303 | ||
304 | pages = alloc_pages(gfp_mask, order); | 304 | pages = alloc_pages(gfp_mask & ~__GFP_ZERO, order); |
305 | if (pages) { | 305 | if (pages) { |
306 | unsigned int count, i; | 306 | unsigned int count, i; |
307 | 307 | ||
@@ -310,6 +310,13 @@ static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order) | |||
310 | count = 1 << order; | 310 | count = 1 << order; |
311 | for (i = 0; i < count; i++) | 311 | for (i = 0; i < count; i++) |
312 | SetPageReserved(pages + i); | 312 | SetPageReserved(pages + i); |
313 | |||
314 | arch_kexec_post_alloc_pages(page_address(pages), count, | ||
315 | gfp_mask); | ||
316 | |||
317 | if (gfp_mask & __GFP_ZERO) | ||
318 | for (i = 0; i < count; i++) | ||
319 | clear_highpage(pages + i); | ||
313 | } | 320 | } |
314 | 321 | ||
315 | return pages; | 322 | return pages; |
@@ -321,6 +328,9 @@ static void kimage_free_pages(struct page *page) | |||
321 | 328 | ||
322 | order = page_private(page); | 329 | order = page_private(page); |
323 | count = 1 << order; | 330 | count = 1 << order; |
331 | |||
332 | arch_kexec_pre_free_pages(page_address(page), count); | ||
333 | |||
324 | for (i = 0; i < count; i++) | 334 | for (i = 0; i < count; i++) |
325 | ClearPageReserved(page + i); | 335 | ClearPageReserved(page + i); |
326 | __free_pages(page, order); | 336 | __free_pages(page, order); |
diff --git a/kernel/memremap.c b/kernel/memremap.c index 124bed776532..9afdc434fb49 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c | |||
@@ -34,13 +34,24 @@ static void *arch_memremap_wb(resource_size_t offset, unsigned long size) | |||
34 | } | 34 | } |
35 | #endif | 35 | #endif |
36 | 36 | ||
37 | static void *try_ram_remap(resource_size_t offset, size_t size) | 37 | #ifndef arch_memremap_can_ram_remap |
38 | static bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size, | ||
39 | unsigned long flags) | ||
40 | { | ||
41 | return true; | ||
42 | } | ||
43 | #endif | ||
44 | |||
45 | static void *try_ram_remap(resource_size_t offset, size_t size, | ||
46 | unsigned long flags) | ||
38 | { | 47 | { |
39 | unsigned long pfn = PHYS_PFN(offset); | 48 | unsigned long pfn = PHYS_PFN(offset); |
40 | 49 | ||
41 | /* In the simple case just return the existing linear address */ | 50 | /* In the simple case just return the existing linear address */ |
42 | if (pfn_valid(pfn) && !PageHighMem(pfn_to_page(pfn))) | 51 | if (pfn_valid(pfn) && !PageHighMem(pfn_to_page(pfn)) && |
52 | arch_memremap_can_ram_remap(offset, size, flags)) | ||
43 | return __va(offset); | 53 | return __va(offset); |
54 | |||
44 | return NULL; /* fallback to arch_memremap_wb */ | 55 | return NULL; /* fallback to arch_memremap_wb */ |
45 | } | 56 | } |
46 | 57 | ||
@@ -48,7 +59,8 @@ static void *try_ram_remap(resource_size_t offset, size_t size) | |||
48 | * memremap() - remap an iomem_resource as cacheable memory | 59 | * memremap() - remap an iomem_resource as cacheable memory |
49 | * @offset: iomem resource start address | 60 | * @offset: iomem resource start address |
50 | * @size: size of remap | 61 | * @size: size of remap |
51 | * @flags: any of MEMREMAP_WB, MEMREMAP_WT and MEMREMAP_WC | 62 | * @flags: any of MEMREMAP_WB, MEMREMAP_WT, MEMREMAP_WC, |
63 | * MEMREMAP_ENC, MEMREMAP_DEC | ||
52 | * | 64 | * |
53 | * memremap() is "ioremap" for cases where it is known that the resource | 65 | * memremap() is "ioremap" for cases where it is known that the resource |
54 | * being mapped does not have i/o side effects and the __iomem | 66 | * being mapped does not have i/o side effects and the __iomem |
@@ -95,7 +107,7 @@ void *memremap(resource_size_t offset, size_t size, unsigned long flags) | |||
95 | * the requested range is potentially in System RAM. | 107 | * the requested range is potentially in System RAM. |
96 | */ | 108 | */ |
97 | if (is_ram == REGION_INTERSECTS) | 109 | if (is_ram == REGION_INTERSECTS) |
98 | addr = try_ram_remap(offset, size); | 110 | addr = try_ram_remap(offset, size, flags); |
99 | if (!addr) | 111 | if (!addr) |
100 | addr = arch_memremap_wb(offset, size); | 112 | addr = arch_memremap_wb(offset, size); |
101 | } | 113 | } |
diff --git a/lib/swiotlb.c b/lib/swiotlb.c index a8d74a733a38..8c6c83ef57a4 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/highmem.h> | 30 | #include <linux/highmem.h> |
31 | #include <linux/gfp.h> | 31 | #include <linux/gfp.h> |
32 | #include <linux/scatterlist.h> | 32 | #include <linux/scatterlist.h> |
33 | #include <linux/mem_encrypt.h> | ||
33 | 34 | ||
34 | #include <asm/io.h> | 35 | #include <asm/io.h> |
35 | #include <asm/dma.h> | 36 | #include <asm/dma.h> |
@@ -155,6 +156,15 @@ unsigned long swiotlb_size_or_default(void) | |||
155 | return size ? size : (IO_TLB_DEFAULT_SIZE); | 156 | return size ? size : (IO_TLB_DEFAULT_SIZE); |
156 | } | 157 | } |
157 | 158 | ||
159 | void __weak swiotlb_set_mem_attributes(void *vaddr, unsigned long size) { } | ||
160 | |||
161 | /* For swiotlb, clear memory encryption mask from dma addresses */ | ||
162 | static dma_addr_t swiotlb_phys_to_dma(struct device *hwdev, | ||
163 | phys_addr_t address) | ||
164 | { | ||
165 | return __sme_clr(phys_to_dma(hwdev, address)); | ||
166 | } | ||
167 | |||
158 | /* Note that this doesn't work with highmem page */ | 168 | /* Note that this doesn't work with highmem page */ |
159 | static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev, | 169 | static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev, |
160 | volatile void *address) | 170 | volatile void *address) |
@@ -183,6 +193,31 @@ void swiotlb_print_info(void) | |||
183 | bytes >> 20, vstart, vend - 1); | 193 | bytes >> 20, vstart, vend - 1); |
184 | } | 194 | } |
185 | 195 | ||
196 | /* | ||
197 | * Early SWIOTLB allocation may be too early to allow an architecture to | ||
198 | * perform the desired operations. This function allows the architecture to | ||
199 | * call SWIOTLB when the operations are possible. It needs to be called | ||
200 | * before the SWIOTLB memory is used. | ||
201 | */ | ||
202 | void __init swiotlb_update_mem_attributes(void) | ||
203 | { | ||
204 | void *vaddr; | ||
205 | unsigned long bytes; | ||
206 | |||
207 | if (no_iotlb_memory || late_alloc) | ||
208 | return; | ||
209 | |||
210 | vaddr = phys_to_virt(io_tlb_start); | ||
211 | bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT); | ||
212 | swiotlb_set_mem_attributes(vaddr, bytes); | ||
213 | memset(vaddr, 0, bytes); | ||
214 | |||
215 | vaddr = phys_to_virt(io_tlb_overflow_buffer); | ||
216 | bytes = PAGE_ALIGN(io_tlb_overflow); | ||
217 | swiotlb_set_mem_attributes(vaddr, bytes); | ||
218 | memset(vaddr, 0, bytes); | ||
219 | } | ||
220 | |||
186 | int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) | 221 | int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) |
187 | { | 222 | { |
188 | void *v_overflow_buffer; | 223 | void *v_overflow_buffer; |
@@ -320,6 +355,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) | |||
320 | io_tlb_start = virt_to_phys(tlb); | 355 | io_tlb_start = virt_to_phys(tlb); |
321 | io_tlb_end = io_tlb_start + bytes; | 356 | io_tlb_end = io_tlb_start + bytes; |
322 | 357 | ||
358 | swiotlb_set_mem_attributes(tlb, bytes); | ||
323 | memset(tlb, 0, bytes); | 359 | memset(tlb, 0, bytes); |
324 | 360 | ||
325 | /* | 361 | /* |
@@ -330,6 +366,8 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) | |||
330 | if (!v_overflow_buffer) | 366 | if (!v_overflow_buffer) |
331 | goto cleanup2; | 367 | goto cleanup2; |
332 | 368 | ||
369 | swiotlb_set_mem_attributes(v_overflow_buffer, io_tlb_overflow); | ||
370 | memset(v_overflow_buffer, 0, io_tlb_overflow); | ||
333 | io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer); | 371 | io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer); |
334 | 372 | ||
335 | /* | 373 | /* |
@@ -469,6 +507,9 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, | |||
469 | if (no_iotlb_memory) | 507 | if (no_iotlb_memory) |
470 | panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); | 508 | panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); |
471 | 509 | ||
510 | if (sme_active()) | ||
511 | pr_warn_once("SME is active and system is using DMA bounce buffers\n"); | ||
512 | |||
472 | mask = dma_get_seg_boundary(hwdev); | 513 | mask = dma_get_seg_boundary(hwdev); |
473 | 514 | ||
474 | tbl_dma_addr &= mask; | 515 | tbl_dma_addr &= mask; |
@@ -581,7 +622,7 @@ map_single(struct device *hwdev, phys_addr_t phys, size_t size, | |||
581 | return SWIOTLB_MAP_ERROR; | 622 | return SWIOTLB_MAP_ERROR; |
582 | } | 623 | } |
583 | 624 | ||
584 | start_dma_addr = phys_to_dma(hwdev, io_tlb_start); | 625 | start_dma_addr = swiotlb_phys_to_dma(hwdev, io_tlb_start); |
585 | return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, | 626 | return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, |
586 | dir, attrs); | 627 | dir, attrs); |
587 | } | 628 | } |
@@ -702,7 +743,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, | |||
702 | goto err_warn; | 743 | goto err_warn; |
703 | 744 | ||
704 | ret = phys_to_virt(paddr); | 745 | ret = phys_to_virt(paddr); |
705 | dev_addr = phys_to_dma(hwdev, paddr); | 746 | dev_addr = swiotlb_phys_to_dma(hwdev, paddr); |
706 | 747 | ||
707 | /* Confirm address can be DMA'd by device */ | 748 | /* Confirm address can be DMA'd by device */ |
708 | if (dev_addr + size - 1 > dma_mask) { | 749 | if (dev_addr + size - 1 > dma_mask) { |
@@ -812,10 +853,10 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, | |||
812 | map = map_single(dev, phys, size, dir, attrs); | 853 | map = map_single(dev, phys, size, dir, attrs); |
813 | if (map == SWIOTLB_MAP_ERROR) { | 854 | if (map == SWIOTLB_MAP_ERROR) { |
814 | swiotlb_full(dev, size, dir, 1); | 855 | swiotlb_full(dev, size, dir, 1); |
815 | return phys_to_dma(dev, io_tlb_overflow_buffer); | 856 | return swiotlb_phys_to_dma(dev, io_tlb_overflow_buffer); |
816 | } | 857 | } |
817 | 858 | ||
818 | dev_addr = phys_to_dma(dev, map); | 859 | dev_addr = swiotlb_phys_to_dma(dev, map); |
819 | 860 | ||
820 | /* Ensure that the address returned is DMA'ble */ | 861 | /* Ensure that the address returned is DMA'ble */ |
821 | if (dma_capable(dev, dev_addr, size)) | 862 | if (dma_capable(dev, dev_addr, size)) |
@@ -824,7 +865,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, | |||
824 | attrs |= DMA_ATTR_SKIP_CPU_SYNC; | 865 | attrs |= DMA_ATTR_SKIP_CPU_SYNC; |
825 | swiotlb_tbl_unmap_single(dev, map, size, dir, attrs); | 866 | swiotlb_tbl_unmap_single(dev, map, size, dir, attrs); |
826 | 867 | ||
827 | return phys_to_dma(dev, io_tlb_overflow_buffer); | 868 | return swiotlb_phys_to_dma(dev, io_tlb_overflow_buffer); |
828 | } | 869 | } |
829 | EXPORT_SYMBOL_GPL(swiotlb_map_page); | 870 | EXPORT_SYMBOL_GPL(swiotlb_map_page); |
830 | 871 | ||
@@ -958,7 +999,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, | |||
958 | sg_dma_len(sgl) = 0; | 999 | sg_dma_len(sgl) = 0; |
959 | return 0; | 1000 | return 0; |
960 | } | 1001 | } |
961 | sg->dma_address = phys_to_dma(hwdev, map); | 1002 | sg->dma_address = swiotlb_phys_to_dma(hwdev, map); |
962 | } else | 1003 | } else |
963 | sg->dma_address = dev_addr; | 1004 | sg->dma_address = dev_addr; |
964 | sg_dma_len(sg) = sg->length; | 1005 | sg_dma_len(sg) = sg->length; |
@@ -1026,7 +1067,7 @@ EXPORT_SYMBOL(swiotlb_sync_sg_for_device); | |||
1026 | int | 1067 | int |
1027 | swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr) | 1068 | swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr) |
1028 | { | 1069 | { |
1029 | return (dma_addr == phys_to_dma(hwdev, io_tlb_overflow_buffer)); | 1070 | return (dma_addr == swiotlb_phys_to_dma(hwdev, io_tlb_overflow_buffer)); |
1030 | } | 1071 | } |
1031 | EXPORT_SYMBOL(swiotlb_dma_mapping_error); | 1072 | EXPORT_SYMBOL(swiotlb_dma_mapping_error); |
1032 | 1073 | ||
@@ -1039,6 +1080,6 @@ EXPORT_SYMBOL(swiotlb_dma_mapping_error); | |||
1039 | int | 1080 | int |
1040 | swiotlb_dma_supported(struct device *hwdev, u64 mask) | 1081 | swiotlb_dma_supported(struct device *hwdev, u64 mask) |
1041 | { | 1082 | { |
1042 | return phys_to_dma(hwdev, io_tlb_end - 1) <= mask; | 1083 | return swiotlb_phys_to_dma(hwdev, io_tlb_end - 1) <= mask; |
1043 | } | 1084 | } |
1044 | EXPORT_SYMBOL(swiotlb_dma_supported); | 1085 | EXPORT_SYMBOL(swiotlb_dma_supported); |
diff --git a/mm/early_ioremap.c b/mm/early_ioremap.c index 6d5717bd7197..b1dd4a948fc0 100644 --- a/mm/early_ioremap.c +++ b/mm/early_ioremap.c | |||
@@ -30,6 +30,13 @@ early_param("early_ioremap_debug", early_ioremap_debug_setup); | |||
30 | 30 | ||
31 | static int after_paging_init __initdata; | 31 | static int after_paging_init __initdata; |
32 | 32 | ||
33 | pgprot_t __init __weak early_memremap_pgprot_adjust(resource_size_t phys_addr, | ||
34 | unsigned long size, | ||
35 | pgprot_t prot) | ||
36 | { | ||
37 | return prot; | ||
38 | } | ||
39 | |||
33 | void __init __weak early_ioremap_shutdown(void) | 40 | void __init __weak early_ioremap_shutdown(void) |
34 | { | 41 | { |
35 | } | 42 | } |
@@ -215,14 +222,29 @@ early_ioremap(resource_size_t phys_addr, unsigned long size) | |||
215 | void __init * | 222 | void __init * |
216 | early_memremap(resource_size_t phys_addr, unsigned long size) | 223 | early_memremap(resource_size_t phys_addr, unsigned long size) |
217 | { | 224 | { |
218 | return (__force void *)__early_ioremap(phys_addr, size, | 225 | pgprot_t prot = early_memremap_pgprot_adjust(phys_addr, size, |
219 | FIXMAP_PAGE_NORMAL); | 226 | FIXMAP_PAGE_NORMAL); |
227 | |||
228 | return (__force void *)__early_ioremap(phys_addr, size, prot); | ||
220 | } | 229 | } |
221 | #ifdef FIXMAP_PAGE_RO | 230 | #ifdef FIXMAP_PAGE_RO |
222 | void __init * | 231 | void __init * |
223 | early_memremap_ro(resource_size_t phys_addr, unsigned long size) | 232 | early_memremap_ro(resource_size_t phys_addr, unsigned long size) |
224 | { | 233 | { |
225 | return (__force void *)__early_ioremap(phys_addr, size, FIXMAP_PAGE_RO); | 234 | pgprot_t prot = early_memremap_pgprot_adjust(phys_addr, size, |
235 | FIXMAP_PAGE_RO); | ||
236 | |||
237 | return (__force void *)__early_ioremap(phys_addr, size, prot); | ||
238 | } | ||
239 | #endif | ||
240 | |||
241 | #ifdef CONFIG_ARCH_USE_MEMREMAP_PROT | ||
242 | void __init * | ||
243 | early_memremap_prot(resource_size_t phys_addr, unsigned long size, | ||
244 | unsigned long prot_val) | ||
245 | { | ||
246 | return (__force void *)__early_ioremap(phys_addr, size, | ||
247 | __pgprot(prot_val)); | ||
226 | } | 248 | } |
227 | #endif | 249 | #endif |
228 | 250 | ||
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 1cd3b3569af8..88366626c0b7 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -1146,6 +1146,8 @@ int memory_failure(unsigned long pfn, int trapno, int flags) | |||
1146 | return 0; | 1146 | return 0; |
1147 | } | 1147 | } |
1148 | 1148 | ||
1149 | arch_unmap_kpfn(pfn); | ||
1150 | |||
1149 | orig_head = hpage = compound_head(p); | 1151 | orig_head = hpage = compound_head(p); |
1150 | num_poisoned_pages_inc(); | 1152 | num_poisoned_pages_inc(); |
1151 | 1153 | ||