aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/fixmap.h10
-rw-r--r--arch/x86/include/asm/mem_encrypt.h7
-rw-r--r--arch/x86/include/asm/pgtable_64.h3
-rw-r--r--arch/x86/kernel/cpu/intel_rdt.h17
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c27
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_rdtgroup.c53
-rw-r--r--arch/x86/kernel/head64.c20
-rw-r--r--arch/x86/kernel/head_64.S16
-rw-r--r--arch/x86/kernel/kvmclock.c52
-rw-r--r--arch/x86/kernel/paravirt.c4
-rw-r--r--arch/x86/kernel/vmlinux.lds.S19
-rw-r--r--arch/x86/mm/init.c4
-rw-r--r--arch/x86/mm/mem_encrypt.c24
-rw-r--r--arch/x86/mm/pgtable.c9
-rw-r--r--arch/x86/xen/mmu_pv.c8
15 files changed, 234 insertions, 39 deletions
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index e203169931c7..6390bd8c141b 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -14,6 +14,16 @@
14#ifndef _ASM_X86_FIXMAP_H 14#ifndef _ASM_X86_FIXMAP_H
15#define _ASM_X86_FIXMAP_H 15#define _ASM_X86_FIXMAP_H
16 16
17/*
18 * Exposed to assembly code for setting up initial page tables. Cannot be
19 * calculated in assembly code (fixmap entries are an enum), but is sanity
20 * checked in the actual fixmap C code to make sure that the fixmap is
21 * covered fully.
22 */
23#define FIXMAP_PMD_NUM 2
24/* fixmap starts downwards from the 507th entry in level2_fixmap_pgt */
25#define FIXMAP_PMD_TOP 507
26
17#ifndef __ASSEMBLY__ 27#ifndef __ASSEMBLY__
18#include <linux/kernel.h> 28#include <linux/kernel.h>
19#include <asm/acpi.h> 29#include <asm/acpi.h>
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index c0643831706e..616f8e637bc3 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -48,10 +48,13 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);
48 48
49/* Architecture __weak replacement functions */ 49/* Architecture __weak replacement functions */
50void __init mem_encrypt_init(void); 50void __init mem_encrypt_init(void);
51void __init mem_encrypt_free_decrypted_mem(void);
51 52
52bool sme_active(void); 53bool sme_active(void);
53bool sev_active(void); 54bool sev_active(void);
54 55
56#define __bss_decrypted __attribute__((__section__(".bss..decrypted")))
57
55#else /* !CONFIG_AMD_MEM_ENCRYPT */ 58#else /* !CONFIG_AMD_MEM_ENCRYPT */
56 59
57#define sme_me_mask 0ULL 60#define sme_me_mask 0ULL
@@ -77,6 +80,8 @@ early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0;
77static inline int __init 80static inline int __init
78early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; } 81early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; }
79 82
83#define __bss_decrypted
84
80#endif /* CONFIG_AMD_MEM_ENCRYPT */ 85#endif /* CONFIG_AMD_MEM_ENCRYPT */
81 86
82/* 87/*
@@ -88,6 +93,8 @@ early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0;
88#define __sme_pa(x) (__pa(x) | sme_me_mask) 93#define __sme_pa(x) (__pa(x) | sme_me_mask)
89#define __sme_pa_nodebug(x) (__pa_nodebug(x) | sme_me_mask) 94#define __sme_pa_nodebug(x) (__pa_nodebug(x) | sme_me_mask)
90 95
96extern char __start_bss_decrypted[], __end_bss_decrypted[], __start_bss_decrypted_unused[];
97
91#endif /* __ASSEMBLY__ */ 98#endif /* __ASSEMBLY__ */
92 99
93#endif /* __X86_MEM_ENCRYPT_H__ */ 100#endif /* __X86_MEM_ENCRYPT_H__ */
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index ce2b59047cb8..9c85b54bf03c 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -14,6 +14,7 @@
14#include <asm/processor.h> 14#include <asm/processor.h>
15#include <linux/bitops.h> 15#include <linux/bitops.h>
16#include <linux/threads.h> 16#include <linux/threads.h>
17#include <asm/fixmap.h>
17 18
18extern p4d_t level4_kernel_pgt[512]; 19extern p4d_t level4_kernel_pgt[512];
19extern p4d_t level4_ident_pgt[512]; 20extern p4d_t level4_ident_pgt[512];
@@ -22,7 +23,7 @@ extern pud_t level3_ident_pgt[512];
22extern pmd_t level2_kernel_pgt[512]; 23extern pmd_t level2_kernel_pgt[512];
23extern pmd_t level2_fixmap_pgt[512]; 24extern pmd_t level2_fixmap_pgt[512];
24extern pmd_t level2_ident_pgt[512]; 25extern pmd_t level2_ident_pgt[512];
25extern pte_t level1_fixmap_pgt[512]; 26extern pte_t level1_fixmap_pgt[512 * FIXMAP_PMD_NUM];
26extern pgd_t init_top_pgt[]; 27extern pgd_t init_top_pgt[];
27 28
28#define swapper_pg_dir init_top_pgt 29#define swapper_pg_dir init_top_pgt
diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h
index 4e588f36228f..285eb3ec4200 100644
--- a/arch/x86/kernel/cpu/intel_rdt.h
+++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -382,6 +382,11 @@ static inline bool is_mbm_event(int e)
382 e <= QOS_L3_MBM_LOCAL_EVENT_ID); 382 e <= QOS_L3_MBM_LOCAL_EVENT_ID);
383} 383}
384 384
385struct rdt_parse_data {
386 struct rdtgroup *rdtgrp;
387 char *buf;
388};
389
385/** 390/**
386 * struct rdt_resource - attributes of an RDT resource 391 * struct rdt_resource - attributes of an RDT resource
387 * @rid: The index of the resource 392 * @rid: The index of the resource
@@ -423,16 +428,19 @@ struct rdt_resource {
423 struct rdt_cache cache; 428 struct rdt_cache cache;
424 struct rdt_membw membw; 429 struct rdt_membw membw;
425 const char *format_str; 430 const char *format_str;
426 int (*parse_ctrlval) (void *data, struct rdt_resource *r, 431 int (*parse_ctrlval)(struct rdt_parse_data *data,
427 struct rdt_domain *d); 432 struct rdt_resource *r,
433 struct rdt_domain *d);
428 struct list_head evt_list; 434 struct list_head evt_list;
429 int num_rmid; 435 int num_rmid;
430 unsigned int mon_scale; 436 unsigned int mon_scale;
431 unsigned long fflags; 437 unsigned long fflags;
432}; 438};
433 439
434int parse_cbm(void *_data, struct rdt_resource *r, struct rdt_domain *d); 440int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
435int parse_bw(void *_buf, struct rdt_resource *r, struct rdt_domain *d); 441 struct rdt_domain *d);
442int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r,
443 struct rdt_domain *d);
436 444
437extern struct mutex rdtgroup_mutex; 445extern struct mutex rdtgroup_mutex;
438 446
@@ -536,6 +544,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp);
536void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp); 544void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp);
537struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r); 545struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r);
538int update_domains(struct rdt_resource *r, int closid); 546int update_domains(struct rdt_resource *r, int closid);
547int closids_supported(void);
539void closid_free(int closid); 548void closid_free(int closid);
540int alloc_rmid(void); 549int alloc_rmid(void);
541void free_rmid(u32 rmid); 550void free_rmid(u32 rmid);
diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
index af358ca05160..0f53049719cd 100644
--- a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
+++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
@@ -64,19 +64,19 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
64 return true; 64 return true;
65} 65}
66 66
67int parse_bw(void *_buf, struct rdt_resource *r, struct rdt_domain *d) 67int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r,
68 struct rdt_domain *d)
68{ 69{
69 unsigned long data; 70 unsigned long bw_val;
70 char *buf = _buf;
71 71
72 if (d->have_new_ctrl) { 72 if (d->have_new_ctrl) {
73 rdt_last_cmd_printf("duplicate domain %d\n", d->id); 73 rdt_last_cmd_printf("duplicate domain %d\n", d->id);
74 return -EINVAL; 74 return -EINVAL;
75 } 75 }
76 76
77 if (!bw_validate(buf, &data, r)) 77 if (!bw_validate(data->buf, &bw_val, r))
78 return -EINVAL; 78 return -EINVAL;
79 d->new_ctrl = data; 79 d->new_ctrl = bw_val;
80 d->have_new_ctrl = true; 80 d->have_new_ctrl = true;
81 81
82 return 0; 82 return 0;
@@ -123,18 +123,13 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
123 return true; 123 return true;
124} 124}
125 125
126struct rdt_cbm_parse_data {
127 struct rdtgroup *rdtgrp;
128 char *buf;
129};
130
131/* 126/*
132 * Read one cache bit mask (hex). Check that it is valid for the current 127 * Read one cache bit mask (hex). Check that it is valid for the current
133 * resource type. 128 * resource type.
134 */ 129 */
135int parse_cbm(void *_data, struct rdt_resource *r, struct rdt_domain *d) 130int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
131 struct rdt_domain *d)
136{ 132{
137 struct rdt_cbm_parse_data *data = _data;
138 struct rdtgroup *rdtgrp = data->rdtgrp; 133 struct rdtgroup *rdtgrp = data->rdtgrp;
139 u32 cbm_val; 134 u32 cbm_val;
140 135
@@ -195,11 +190,17 @@ int parse_cbm(void *_data, struct rdt_resource *r, struct rdt_domain *d)
195static int parse_line(char *line, struct rdt_resource *r, 190static int parse_line(char *line, struct rdt_resource *r,
196 struct rdtgroup *rdtgrp) 191 struct rdtgroup *rdtgrp)
197{ 192{
198 struct rdt_cbm_parse_data data; 193 struct rdt_parse_data data;
199 char *dom = NULL, *id; 194 char *dom = NULL, *id;
200 struct rdt_domain *d; 195 struct rdt_domain *d;
201 unsigned long dom_id; 196 unsigned long dom_id;
202 197
198 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP &&
199 r->rid == RDT_RESOURCE_MBA) {
200 rdt_last_cmd_puts("Cannot pseudo-lock MBA resource\n");
201 return -EINVAL;
202 }
203
203next: 204next:
204 if (!line || line[0] == '\0') 205 if (!line || line[0] == '\0')
205 return 0; 206 return 0;
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index b799c00bef09..1b8e86a5d5e1 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -97,6 +97,12 @@ void rdt_last_cmd_printf(const char *fmt, ...)
97 * limited as the number of resources grows. 97 * limited as the number of resources grows.
98 */ 98 */
99static int closid_free_map; 99static int closid_free_map;
100static int closid_free_map_len;
101
102int closids_supported(void)
103{
104 return closid_free_map_len;
105}
100 106
101static void closid_init(void) 107static void closid_init(void)
102{ 108{
@@ -111,6 +117,7 @@ static void closid_init(void)
111 117
112 /* CLOSID 0 is always reserved for the default group */ 118 /* CLOSID 0 is always reserved for the default group */
113 closid_free_map &= ~1; 119 closid_free_map &= ~1;
120 closid_free_map_len = rdt_min_closid;
114} 121}
115 122
116static int closid_alloc(void) 123static int closid_alloc(void)
@@ -802,7 +809,7 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
802 sw_shareable = 0; 809 sw_shareable = 0;
803 exclusive = 0; 810 exclusive = 0;
804 seq_printf(seq, "%d=", dom->id); 811 seq_printf(seq, "%d=", dom->id);
805 for (i = 0; i < r->num_closid; i++, ctrl++) { 812 for (i = 0; i < closids_supported(); i++, ctrl++) {
806 if (!closid_allocated(i)) 813 if (!closid_allocated(i))
807 continue; 814 continue;
808 mode = rdtgroup_mode_by_closid(i); 815 mode = rdtgroup_mode_by_closid(i);
@@ -989,7 +996,7 @@ bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
989 996
990 /* Check for overlap with other resource groups */ 997 /* Check for overlap with other resource groups */
991 ctrl = d->ctrl_val; 998 ctrl = d->ctrl_val;
992 for (i = 0; i < r->num_closid; i++, ctrl++) { 999 for (i = 0; i < closids_supported(); i++, ctrl++) {
993 ctrl_b = (unsigned long *)ctrl; 1000 ctrl_b = (unsigned long *)ctrl;
994 mode = rdtgroup_mode_by_closid(i); 1001 mode = rdtgroup_mode_by_closid(i);
995 if (closid_allocated(i) && i != closid && 1002 if (closid_allocated(i) && i != closid &&
@@ -1024,16 +1031,27 @@ static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
1024{ 1031{
1025 int closid = rdtgrp->closid; 1032 int closid = rdtgrp->closid;
1026 struct rdt_resource *r; 1033 struct rdt_resource *r;
1034 bool has_cache = false;
1027 struct rdt_domain *d; 1035 struct rdt_domain *d;
1028 1036
1029 for_each_alloc_enabled_rdt_resource(r) { 1037 for_each_alloc_enabled_rdt_resource(r) {
1038 if (r->rid == RDT_RESOURCE_MBA)
1039 continue;
1040 has_cache = true;
1030 list_for_each_entry(d, &r->domains, list) { 1041 list_for_each_entry(d, &r->domains, list) {
1031 if (rdtgroup_cbm_overlaps(r, d, d->ctrl_val[closid], 1042 if (rdtgroup_cbm_overlaps(r, d, d->ctrl_val[closid],
1032 rdtgrp->closid, false)) 1043 rdtgrp->closid, false)) {
1044 rdt_last_cmd_puts("schemata overlaps\n");
1033 return false; 1045 return false;
1046 }
1034 } 1047 }
1035 } 1048 }
1036 1049
1050 if (!has_cache) {
1051 rdt_last_cmd_puts("cannot be exclusive without CAT/CDP\n");
1052 return false;
1053 }
1054
1037 return true; 1055 return true;
1038} 1056}
1039 1057
@@ -1085,7 +1103,6 @@ static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
1085 rdtgrp->mode = RDT_MODE_SHAREABLE; 1103 rdtgrp->mode = RDT_MODE_SHAREABLE;
1086 } else if (!strcmp(buf, "exclusive")) { 1104 } else if (!strcmp(buf, "exclusive")) {
1087 if (!rdtgroup_mode_test_exclusive(rdtgrp)) { 1105 if (!rdtgroup_mode_test_exclusive(rdtgrp)) {
1088 rdt_last_cmd_printf("schemata overlaps\n");
1089 ret = -EINVAL; 1106 ret = -EINVAL;
1090 goto out; 1107 goto out;
1091 } 1108 }
@@ -1155,8 +1172,8 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
1155 struct rdt_resource *r; 1172 struct rdt_resource *r;
1156 struct rdt_domain *d; 1173 struct rdt_domain *d;
1157 unsigned int size; 1174 unsigned int size;
1158 bool sep = false; 1175 bool sep;
1159 u32 cbm; 1176 u32 ctrl;
1160 1177
1161 rdtgrp = rdtgroup_kn_lock_live(of->kn); 1178 rdtgrp = rdtgroup_kn_lock_live(of->kn);
1162 if (!rdtgrp) { 1179 if (!rdtgrp) {
@@ -1174,6 +1191,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
1174 } 1191 }
1175 1192
1176 for_each_alloc_enabled_rdt_resource(r) { 1193 for_each_alloc_enabled_rdt_resource(r) {
1194 sep = false;
1177 seq_printf(s, "%*s:", max_name_width, r->name); 1195 seq_printf(s, "%*s:", max_name_width, r->name);
1178 list_for_each_entry(d, &r->domains, list) { 1196 list_for_each_entry(d, &r->domains, list) {
1179 if (sep) 1197 if (sep)
@@ -1181,8 +1199,13 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
1181 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { 1199 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1182 size = 0; 1200 size = 0;
1183 } else { 1201 } else {
1184 cbm = d->ctrl_val[rdtgrp->closid]; 1202 ctrl = (!is_mba_sc(r) ?
1185 size = rdtgroup_cbm_to_size(r, d, cbm); 1203 d->ctrl_val[rdtgrp->closid] :
1204 d->mbps_val[rdtgrp->closid]);
1205 if (r->rid == RDT_RESOURCE_MBA)
1206 size = ctrl;
1207 else
1208 size = rdtgroup_cbm_to_size(r, d, ctrl);
1186 } 1209 }
1187 seq_printf(s, "%d=%u", d->id, size); 1210 seq_printf(s, "%d=%u", d->id, size);
1188 sep = true; 1211 sep = true;
@@ -2336,12 +2359,18 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
2336 u32 *ctrl; 2359 u32 *ctrl;
2337 2360
2338 for_each_alloc_enabled_rdt_resource(r) { 2361 for_each_alloc_enabled_rdt_resource(r) {
2362 /*
2363 * Only initialize default allocations for CBM cache
2364 * resources
2365 */
2366 if (r->rid == RDT_RESOURCE_MBA)
2367 continue;
2339 list_for_each_entry(d, &r->domains, list) { 2368 list_for_each_entry(d, &r->domains, list) {
2340 d->have_new_ctrl = false; 2369 d->have_new_ctrl = false;
2341 d->new_ctrl = r->cache.shareable_bits; 2370 d->new_ctrl = r->cache.shareable_bits;
2342 used_b = r->cache.shareable_bits; 2371 used_b = r->cache.shareable_bits;
2343 ctrl = d->ctrl_val; 2372 ctrl = d->ctrl_val;
2344 for (i = 0; i < r->num_closid; i++, ctrl++) { 2373 for (i = 0; i < closids_supported(); i++, ctrl++) {
2345 if (closid_allocated(i) && i != closid) { 2374 if (closid_allocated(i) && i != closid) {
2346 mode = rdtgroup_mode_by_closid(i); 2375 mode = rdtgroup_mode_by_closid(i);
2347 if (mode == RDT_MODE_PSEUDO_LOCKSETUP) 2376 if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
@@ -2373,6 +2402,12 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
2373 } 2402 }
2374 2403
2375 for_each_alloc_enabled_rdt_resource(r) { 2404 for_each_alloc_enabled_rdt_resource(r) {
2405 /*
2406 * Only initialize default allocations for CBM cache
2407 * resources
2408 */
2409 if (r->rid == RDT_RESOURCE_MBA)
2410 continue;
2376 ret = update_domains(r, rdtgrp->closid); 2411 ret = update_domains(r, rdtgrp->closid);
2377 if (ret < 0) { 2412 if (ret < 0) {
2378 rdt_last_cmd_puts("failed to initialize allocations\n"); 2413 rdt_last_cmd_puts("failed to initialize allocations\n");
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 8047379e575a..ddee1f0870c4 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -35,6 +35,7 @@
35#include <asm/bootparam_utils.h> 35#include <asm/bootparam_utils.h>
36#include <asm/microcode.h> 36#include <asm/microcode.h>
37#include <asm/kasan.h> 37#include <asm/kasan.h>
38#include <asm/fixmap.h>
38 39
39/* 40/*
40 * Manage page tables very early on. 41 * Manage page tables very early on.
@@ -112,6 +113,7 @@ static bool __head check_la57_support(unsigned long physaddr)
112unsigned long __head __startup_64(unsigned long physaddr, 113unsigned long __head __startup_64(unsigned long physaddr,
113 struct boot_params *bp) 114 struct boot_params *bp)
114{ 115{
116 unsigned long vaddr, vaddr_end;
115 unsigned long load_delta, *p; 117 unsigned long load_delta, *p;
116 unsigned long pgtable_flags; 118 unsigned long pgtable_flags;
117 pgdval_t *pgd; 119 pgdval_t *pgd;
@@ -165,7 +167,8 @@ unsigned long __head __startup_64(unsigned long physaddr,
165 pud[511] += load_delta; 167 pud[511] += load_delta;
166 168
167 pmd = fixup_pointer(level2_fixmap_pgt, physaddr); 169 pmd = fixup_pointer(level2_fixmap_pgt, physaddr);
168 pmd[506] += load_delta; 170 for (i = FIXMAP_PMD_TOP; i > FIXMAP_PMD_TOP - FIXMAP_PMD_NUM; i--)
171 pmd[i] += load_delta;
169 172
170 /* 173 /*
171 * Set up the identity mapping for the switchover. These 174 * Set up the identity mapping for the switchover. These
@@ -235,6 +238,21 @@ unsigned long __head __startup_64(unsigned long physaddr,
235 sme_encrypt_kernel(bp); 238 sme_encrypt_kernel(bp);
236 239
237 /* 240 /*
241 * Clear the memory encryption mask from the .bss..decrypted section.
242 * The bss section will be memset to zero later in the initialization so
243 * there is no need to zero it after changing the memory encryption
244 * attribute.
245 */
246 if (mem_encrypt_active()) {
247 vaddr = (unsigned long)__start_bss_decrypted;
248 vaddr_end = (unsigned long)__end_bss_decrypted;
249 for (; vaddr < vaddr_end; vaddr += PMD_SIZE) {
250 i = pmd_index(vaddr);
251 pmd[i] -= sme_get_me_mask();
252 }
253 }
254
255 /*
238 * Return the SME encryption mask (if SME is active) to be used as a 256 * Return the SME encryption mask (if SME is active) to be used as a
239 * modifier for the initial pgdir entry programmed into CR3. 257 * modifier for the initial pgdir entry programmed into CR3.
240 */ 258 */
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 15ebc2fc166e..a3618cf04cf6 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -24,6 +24,7 @@
24#include "../entry/calling.h" 24#include "../entry/calling.h"
25#include <asm/export.h> 25#include <asm/export.h>
26#include <asm/nospec-branch.h> 26#include <asm/nospec-branch.h>
27#include <asm/fixmap.h>
27 28
28#ifdef CONFIG_PARAVIRT 29#ifdef CONFIG_PARAVIRT
29#include <asm/asm-offsets.h> 30#include <asm/asm-offsets.h>
@@ -445,13 +446,20 @@ NEXT_PAGE(level2_kernel_pgt)
445 KERNEL_IMAGE_SIZE/PMD_SIZE) 446 KERNEL_IMAGE_SIZE/PMD_SIZE)
446 447
447NEXT_PAGE(level2_fixmap_pgt) 448NEXT_PAGE(level2_fixmap_pgt)
448 .fill 506,8,0 449 .fill (512 - 4 - FIXMAP_PMD_NUM),8,0
449 .quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC 450 pgtno = 0
450 /* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */ 451 .rept (FIXMAP_PMD_NUM)
451 .fill 5,8,0 452 .quad level1_fixmap_pgt + (pgtno << PAGE_SHIFT) - __START_KERNEL_map \
453 + _PAGE_TABLE_NOENC;
454 pgtno = pgtno + 1
455 .endr
456 /* 6 MB reserved space + a 2MB hole */
457 .fill 4,8,0
452 458
453NEXT_PAGE(level1_fixmap_pgt) 459NEXT_PAGE(level1_fixmap_pgt)
460 .rept (FIXMAP_PMD_NUM)
454 .fill 512,8,0 461 .fill 512,8,0
462 .endr
455 463
456#undef PMDS 464#undef PMDS
457 465
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 1e6764648af3..013fe3d21dbb 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -28,6 +28,7 @@
28#include <linux/sched/clock.h> 28#include <linux/sched/clock.h>
29#include <linux/mm.h> 29#include <linux/mm.h>
30#include <linux/slab.h> 30#include <linux/slab.h>
31#include <linux/set_memory.h>
31 32
32#include <asm/hypervisor.h> 33#include <asm/hypervisor.h>
33#include <asm/mem_encrypt.h> 34#include <asm/mem_encrypt.h>
@@ -61,9 +62,10 @@ early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);
61 (PAGE_SIZE / sizeof(struct pvclock_vsyscall_time_info)) 62 (PAGE_SIZE / sizeof(struct pvclock_vsyscall_time_info))
62 63
63static struct pvclock_vsyscall_time_info 64static struct pvclock_vsyscall_time_info
64 hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __aligned(PAGE_SIZE); 65 hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __bss_decrypted __aligned(PAGE_SIZE);
65static struct pvclock_wall_clock wall_clock; 66static struct pvclock_wall_clock wall_clock __bss_decrypted;
66static DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu); 67static DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu);
68static struct pvclock_vsyscall_time_info *hvclock_mem;
67 69
68static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void) 70static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void)
69{ 71{
@@ -236,6 +238,45 @@ static void kvm_shutdown(void)
236 native_machine_shutdown(); 238 native_machine_shutdown();
237} 239}
238 240
241static void __init kvmclock_init_mem(void)
242{
243 unsigned long ncpus;
244 unsigned int order;
245 struct page *p;
246 int r;
247
248 if (HVC_BOOT_ARRAY_SIZE >= num_possible_cpus())
249 return;
250
251 ncpus = num_possible_cpus() - HVC_BOOT_ARRAY_SIZE;
252 order = get_order(ncpus * sizeof(*hvclock_mem));
253
254 p = alloc_pages(GFP_KERNEL, order);
255 if (!p) {
256 pr_warn("%s: failed to alloc %d pages", __func__, (1U << order));
257 return;
258 }
259
260 hvclock_mem = page_address(p);
261
262 /*
263 * hvclock is shared between the guest and the hypervisor, must
264 * be mapped decrypted.
265 */
266 if (sev_active()) {
267 r = set_memory_decrypted((unsigned long) hvclock_mem,
268 1UL << order);
269 if (r) {
270 __free_pages(p, order);
271 hvclock_mem = NULL;
272 pr_warn("kvmclock: set_memory_decrypted() failed. Disabling\n");
273 return;
274 }
275 }
276
277 memset(hvclock_mem, 0, PAGE_SIZE << order);
278}
279
239static int __init kvm_setup_vsyscall_timeinfo(void) 280static int __init kvm_setup_vsyscall_timeinfo(void)
240{ 281{
241#ifdef CONFIG_X86_64 282#ifdef CONFIG_X86_64
@@ -250,6 +291,9 @@ static int __init kvm_setup_vsyscall_timeinfo(void)
250 291
251 kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK; 292 kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK;
252#endif 293#endif
294
295 kvmclock_init_mem();
296
253 return 0; 297 return 0;
254} 298}
255early_initcall(kvm_setup_vsyscall_timeinfo); 299early_initcall(kvm_setup_vsyscall_timeinfo);
@@ -269,8 +313,10 @@ static int kvmclock_setup_percpu(unsigned int cpu)
269 /* Use the static page for the first CPUs, allocate otherwise */ 313 /* Use the static page for the first CPUs, allocate otherwise */
270 if (cpu < HVC_BOOT_ARRAY_SIZE) 314 if (cpu < HVC_BOOT_ARRAY_SIZE)
271 p = &hv_clock_boot[cpu]; 315 p = &hv_clock_boot[cpu];
316 else if (hvclock_mem)
317 p = hvclock_mem + cpu - HVC_BOOT_ARRAY_SIZE;
272 else 318 else
273 p = kzalloc(sizeof(*p), GFP_KERNEL); 319 return -ENOMEM;
274 320
275 per_cpu(hv_clock_per_cpu, cpu) = p; 321 per_cpu(hv_clock_per_cpu, cpu) = p;
276 return p ? 0 : -ENOMEM; 322 return p ? 0 : -ENOMEM;
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index afdb303285f8..8dc69d82567e 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -91,7 +91,7 @@ unsigned paravirt_patch_call(void *insnbuf,
91 91
92 if (len < 5) { 92 if (len < 5) {
93#ifdef CONFIG_RETPOLINE 93#ifdef CONFIG_RETPOLINE
94 WARN_ONCE("Failing to patch indirect CALL in %ps\n", (void *)addr); 94 WARN_ONCE(1, "Failing to patch indirect CALL in %ps\n", (void *)addr);
95#endif 95#endif
96 return len; /* call too long for patch site */ 96 return len; /* call too long for patch site */
97 } 97 }
@@ -111,7 +111,7 @@ unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
111 111
112 if (len < 5) { 112 if (len < 5) {
113#ifdef CONFIG_RETPOLINE 113#ifdef CONFIG_RETPOLINE
114 WARN_ONCE("Failing to patch indirect JMP in %ps\n", (void *)addr); 114 WARN_ONCE(1, "Failing to patch indirect JMP in %ps\n", (void *)addr);
115#endif 115#endif
116 return len; /* call too long for patch site */ 116 return len; /* call too long for patch site */
117 } 117 }
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 8bde0a419f86..5dd3317d761f 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -65,6 +65,23 @@ jiffies_64 = jiffies;
65#define ALIGN_ENTRY_TEXT_BEGIN . = ALIGN(PMD_SIZE); 65#define ALIGN_ENTRY_TEXT_BEGIN . = ALIGN(PMD_SIZE);
66#define ALIGN_ENTRY_TEXT_END . = ALIGN(PMD_SIZE); 66#define ALIGN_ENTRY_TEXT_END . = ALIGN(PMD_SIZE);
67 67
68/*
69 * This section contains data which will be mapped as decrypted. Memory
70 * encryption operates on a page basis. Make this section PMD-aligned
71 * to avoid splitting the pages while mapping the section early.
72 *
73 * Note: We use a separate section so that only this section gets
74 * decrypted to avoid exposing more than we wish.
75 */
76#define BSS_DECRYPTED \
77 . = ALIGN(PMD_SIZE); \
78 __start_bss_decrypted = .; \
79 *(.bss..decrypted); \
80 . = ALIGN(PAGE_SIZE); \
81 __start_bss_decrypted_unused = .; \
82 . = ALIGN(PMD_SIZE); \
83 __end_bss_decrypted = .; \
84
68#else 85#else
69 86
70#define X86_ALIGN_RODATA_BEGIN 87#define X86_ALIGN_RODATA_BEGIN
@@ -74,6 +91,7 @@ jiffies_64 = jiffies;
74 91
75#define ALIGN_ENTRY_TEXT_BEGIN 92#define ALIGN_ENTRY_TEXT_BEGIN
76#define ALIGN_ENTRY_TEXT_END 93#define ALIGN_ENTRY_TEXT_END
94#define BSS_DECRYPTED
77 95
78#endif 96#endif
79 97
@@ -355,6 +373,7 @@ SECTIONS
355 __bss_start = .; 373 __bss_start = .;
356 *(.bss..page_aligned) 374 *(.bss..page_aligned)
357 *(.bss) 375 *(.bss)
376 BSS_DECRYPTED
358 . = ALIGN(PAGE_SIZE); 377 . = ALIGN(PAGE_SIZE);
359 __bss_stop = .; 378 __bss_stop = .;
360 } 379 }
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 7a8fc26c1115..faca978ebf9d 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -815,10 +815,14 @@ void free_kernel_image_pages(void *begin, void *end)
815 set_memory_np_noalias(begin_ul, len_pages); 815 set_memory_np_noalias(begin_ul, len_pages);
816} 816}
817 817
818void __weak mem_encrypt_free_decrypted_mem(void) { }
819
818void __ref free_initmem(void) 820void __ref free_initmem(void)
819{ 821{
820 e820__reallocate_tables(); 822 e820__reallocate_tables();
821 823
824 mem_encrypt_free_decrypted_mem();
825
822 free_kernel_image_pages(&__init_begin, &__init_end); 826 free_kernel_image_pages(&__init_begin, &__init_end);
823} 827}
824 828
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index b2de398d1fd3..006f373f54ab 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -348,6 +348,30 @@ bool sev_active(void)
348EXPORT_SYMBOL(sev_active); 348EXPORT_SYMBOL(sev_active);
349 349
350/* Architecture __weak replacement functions */ 350/* Architecture __weak replacement functions */
351void __init mem_encrypt_free_decrypted_mem(void)
352{
353 unsigned long vaddr, vaddr_end, npages;
354 int r;
355
356 vaddr = (unsigned long)__start_bss_decrypted_unused;
357 vaddr_end = (unsigned long)__end_bss_decrypted;
358 npages = (vaddr_end - vaddr) >> PAGE_SHIFT;
359
360 /*
361 * The unused memory range was mapped decrypted, change the encryption
362 * attribute from decrypted to encrypted before freeing it.
363 */
364 if (mem_encrypt_active()) {
365 r = set_memory_encrypted(vaddr, npages);
366 if (r) {
367 pr_warn("failed to free unused decrypted pages\n");
368 return;
369 }
370 }
371
372 free_init_pages("unused decrypted", vaddr, vaddr_end);
373}
374
351void __init mem_encrypt_init(void) 375void __init mem_encrypt_init(void)
352{ 376{
353 if (!sme_me_mask) 377 if (!sme_me_mask)
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index ae394552fb94..089e78c4effd 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -637,6 +637,15 @@ void __native_set_fixmap(enum fixed_addresses idx, pte_t pte)
637{ 637{
638 unsigned long address = __fix_to_virt(idx); 638 unsigned long address = __fix_to_virt(idx);
639 639
640#ifdef CONFIG_X86_64
641 /*
642 * Ensure that the static initial page tables are covering the
643 * fixmap completely.
644 */
645 BUILD_BUG_ON(__end_of_permanent_fixed_addresses >
646 (FIXMAP_PMD_NUM * PTRS_PER_PTE));
647#endif
648
640 if (idx >= __end_of_fixed_addresses) { 649 if (idx >= __end_of_fixed_addresses) {
641 BUG(); 650 BUG();
642 return; 651 return;
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 2fe5c9b1816b..dd461c0167ef 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1907,7 +1907,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
1907 /* L3_k[511] -> level2_fixmap_pgt */ 1907 /* L3_k[511] -> level2_fixmap_pgt */
1908 convert_pfn_mfn(level3_kernel_pgt); 1908 convert_pfn_mfn(level3_kernel_pgt);
1909 1909
1910 /* L3_k[511][506] -> level1_fixmap_pgt */ 1910 /* L3_k[511][508-FIXMAP_PMD_NUM ... 507] -> level1_fixmap_pgt */
1911 convert_pfn_mfn(level2_fixmap_pgt); 1911 convert_pfn_mfn(level2_fixmap_pgt);
1912 1912
1913 /* We get [511][511] and have Xen's version of level2_kernel_pgt */ 1913 /* We get [511][511] and have Xen's version of level2_kernel_pgt */
@@ -1952,7 +1952,11 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
1952 set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); 1952 set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
1953 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); 1953 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
1954 set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); 1954 set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
1955 set_page_prot(level1_fixmap_pgt, PAGE_KERNEL_RO); 1955
1956 for (i = 0; i < FIXMAP_PMD_NUM; i++) {
1957 set_page_prot(level1_fixmap_pgt + i * PTRS_PER_PTE,
1958 PAGE_KERNEL_RO);
1959 }
1956 1960
1957 /* Pin down new L4 */ 1961 /* Pin down new L4 */
1958 pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, 1962 pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,