aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>2018-09-23 02:10:12 -0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2018-09-23 02:10:12 -0400
commit328c6333ba3df92d5ea7f2cee46379ed57882af6 (patch)
tree152f3e9916b59bff1f43cfe2265328cdbde9a630
parent52890d2afc6a06d6121ae1fd4bfc2448b0d67728 (diff)
parent05ab1d8a4b36ee912b7087c6da127439ed0a903e (diff)
Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Thomas writes: "A set of fixes for x86: - Resolve the kvmclock regression on AMD systems with memory encryption enabled. The rework of the kvmclock memory allocation during early boot results in encrypted storage, which is not shareable with the hypervisor. Create a new section for this data which is mapped unencrypted and take care that the later allocations for shared kvmclock memory is unencrypted as well. - Fix the build regression in the paravirt code introduced by the recent spectre v2 updates. - Ensure that the initial static page tables cover the fixmap space correctly so early console always works. This worked so far by chance, but recent modifications to the fixmap layout can - depending on kernel configuration - move the relevant entries to a different place which is not covered by the initial static page tables. - Address the regressions and issues which got introduced with the recent extensions to the Intel Recource Director Technology code. - Update maintainer entries to document reality" * 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/mm: Expand static page table for fixmap space MAINTAINERS: Add X86 MM entry x86/intel_rdt: Add Reinette as co-maintainer for RDT MAINTAINERS: Add Borislav to the x86 maintainers x86/paravirt: Fix some warning messages x86/intel_rdt: Fix incorrect loop end condition x86/intel_rdt: Fix exclusive mode handling of MBA resource x86/intel_rdt: Fix incorrect loop end condition x86/intel_rdt: Do not allow pseudo-locking of MBA resource x86/intel_rdt: Fix unchecked MSR access x86/intel_rdt: Fix invalid mode warning when multiple resources are managed x86/intel_rdt: Global closid helper to support future fixes x86/intel_rdt: Fix size reporting of MBA resource x86/intel_rdt: Fix data type in parsing callbacks x86/kvm: Use __bss_decrypted attribute in shared variables x86/mm: Add .bss..decrypted section to hold shared variables
-rw-r--r--MAINTAINERS11
-rw-r--r--arch/x86/include/asm/fixmap.h10
-rw-r--r--arch/x86/include/asm/mem_encrypt.h7
-rw-r--r--arch/x86/include/asm/pgtable_64.h3
-rw-r--r--arch/x86/kernel/cpu/intel_rdt.h17
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c27
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_rdtgroup.c53
-rw-r--r--arch/x86/kernel/head64.c20
-rw-r--r--arch/x86/kernel/head_64.S16
-rw-r--r--arch/x86/kernel/kvmclock.c52
-rw-r--r--arch/x86/kernel/paravirt.c4
-rw-r--r--arch/x86/kernel/vmlinux.lds.S19
-rw-r--r--arch/x86/mm/init.c4
-rw-r--r--arch/x86/mm/mem_encrypt.c24
-rw-r--r--arch/x86/mm/pgtable.c9
-rw-r--r--arch/x86/xen/mmu_pv.c8
16 files changed, 245 insertions, 39 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index e993064637ca..02a39617ec82 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12260,6 +12260,7 @@ F: Documentation/networking/rds.txt
12260 12260
12261RDT - RESOURCE ALLOCATION 12261RDT - RESOURCE ALLOCATION
12262M: Fenghua Yu <fenghua.yu@intel.com> 12262M: Fenghua Yu <fenghua.yu@intel.com>
12263M: Reinette Chatre <reinette.chatre@intel.com>
12263L: linux-kernel@vger.kernel.org 12264L: linux-kernel@vger.kernel.org
12264S: Supported 12265S: Supported
12265F: arch/x86/kernel/cpu/intel_rdt* 12266F: arch/x86/kernel/cpu/intel_rdt*
@@ -15912,6 +15913,7 @@ F: net/x25/
15912X86 ARCHITECTURE (32-BIT AND 64-BIT) 15913X86 ARCHITECTURE (32-BIT AND 64-BIT)
15913M: Thomas Gleixner <tglx@linutronix.de> 15914M: Thomas Gleixner <tglx@linutronix.de>
15914M: Ingo Molnar <mingo@redhat.com> 15915M: Ingo Molnar <mingo@redhat.com>
15916M: Borislav Petkov <bp@alien8.de>
15915R: "H. Peter Anvin" <hpa@zytor.com> 15917R: "H. Peter Anvin" <hpa@zytor.com>
15916M: x86@kernel.org 15918M: x86@kernel.org
15917L: linux-kernel@vger.kernel.org 15919L: linux-kernel@vger.kernel.org
@@ -15940,6 +15942,15 @@ M: Borislav Petkov <bp@alien8.de>
15940S: Maintained 15942S: Maintained
15941F: arch/x86/kernel/cpu/microcode/* 15943F: arch/x86/kernel/cpu/microcode/*
15942 15944
15945X86 MM
15946M: Dave Hansen <dave.hansen@linux.intel.com>
15947M: Andy Lutomirski <luto@kernel.org>
15948M: Peter Zijlstra <peterz@infradead.org>
15949L: linux-kernel@vger.kernel.org
15950T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/mm
15951S: Maintained
15952F: arch/x86/mm/
15953
15943X86 PLATFORM DRIVERS 15954X86 PLATFORM DRIVERS
15944M: Darren Hart <dvhart@infradead.org> 15955M: Darren Hart <dvhart@infradead.org>
15945M: Andy Shevchenko <andy@infradead.org> 15956M: Andy Shevchenko <andy@infradead.org>
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index e203169931c7..6390bd8c141b 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -14,6 +14,16 @@
14#ifndef _ASM_X86_FIXMAP_H 14#ifndef _ASM_X86_FIXMAP_H
15#define _ASM_X86_FIXMAP_H 15#define _ASM_X86_FIXMAP_H
16 16
17/*
18 * Exposed to assembly code for setting up initial page tables. Cannot be
19 * calculated in assembly code (fixmap entries are an enum), but is sanity
20 * checked in the actual fixmap C code to make sure that the fixmap is
21 * covered fully.
22 */
23#define FIXMAP_PMD_NUM 2
24/* fixmap starts downwards from the 507th entry in level2_fixmap_pgt */
25#define FIXMAP_PMD_TOP 507
26
17#ifndef __ASSEMBLY__ 27#ifndef __ASSEMBLY__
18#include <linux/kernel.h> 28#include <linux/kernel.h>
19#include <asm/acpi.h> 29#include <asm/acpi.h>
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index c0643831706e..616f8e637bc3 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -48,10 +48,13 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);
48 48
49/* Architecture __weak replacement functions */ 49/* Architecture __weak replacement functions */
50void __init mem_encrypt_init(void); 50void __init mem_encrypt_init(void);
51void __init mem_encrypt_free_decrypted_mem(void);
51 52
52bool sme_active(void); 53bool sme_active(void);
53bool sev_active(void); 54bool sev_active(void);
54 55
56#define __bss_decrypted __attribute__((__section__(".bss..decrypted")))
57
55#else /* !CONFIG_AMD_MEM_ENCRYPT */ 58#else /* !CONFIG_AMD_MEM_ENCRYPT */
56 59
57#define sme_me_mask 0ULL 60#define sme_me_mask 0ULL
@@ -77,6 +80,8 @@ early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0;
77static inline int __init 80static inline int __init
78early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; } 81early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; }
79 82
83#define __bss_decrypted
84
80#endif /* CONFIG_AMD_MEM_ENCRYPT */ 85#endif /* CONFIG_AMD_MEM_ENCRYPT */
81 86
82/* 87/*
@@ -88,6 +93,8 @@ early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0;
88#define __sme_pa(x) (__pa(x) | sme_me_mask) 93#define __sme_pa(x) (__pa(x) | sme_me_mask)
89#define __sme_pa_nodebug(x) (__pa_nodebug(x) | sme_me_mask) 94#define __sme_pa_nodebug(x) (__pa_nodebug(x) | sme_me_mask)
90 95
96extern char __start_bss_decrypted[], __end_bss_decrypted[], __start_bss_decrypted_unused[];
97
91#endif /* __ASSEMBLY__ */ 98#endif /* __ASSEMBLY__ */
92 99
93#endif /* __X86_MEM_ENCRYPT_H__ */ 100#endif /* __X86_MEM_ENCRYPT_H__ */
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index ce2b59047cb8..9c85b54bf03c 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -14,6 +14,7 @@
14#include <asm/processor.h> 14#include <asm/processor.h>
15#include <linux/bitops.h> 15#include <linux/bitops.h>
16#include <linux/threads.h> 16#include <linux/threads.h>
17#include <asm/fixmap.h>
17 18
18extern p4d_t level4_kernel_pgt[512]; 19extern p4d_t level4_kernel_pgt[512];
19extern p4d_t level4_ident_pgt[512]; 20extern p4d_t level4_ident_pgt[512];
@@ -22,7 +23,7 @@ extern pud_t level3_ident_pgt[512];
22extern pmd_t level2_kernel_pgt[512]; 23extern pmd_t level2_kernel_pgt[512];
23extern pmd_t level2_fixmap_pgt[512]; 24extern pmd_t level2_fixmap_pgt[512];
24extern pmd_t level2_ident_pgt[512]; 25extern pmd_t level2_ident_pgt[512];
25extern pte_t level1_fixmap_pgt[512]; 26extern pte_t level1_fixmap_pgt[512 * FIXMAP_PMD_NUM];
26extern pgd_t init_top_pgt[]; 27extern pgd_t init_top_pgt[];
27 28
28#define swapper_pg_dir init_top_pgt 29#define swapper_pg_dir init_top_pgt
diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h
index 4e588f36228f..285eb3ec4200 100644
--- a/arch/x86/kernel/cpu/intel_rdt.h
+++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -382,6 +382,11 @@ static inline bool is_mbm_event(int e)
382 e <= QOS_L3_MBM_LOCAL_EVENT_ID); 382 e <= QOS_L3_MBM_LOCAL_EVENT_ID);
383} 383}
384 384
385struct rdt_parse_data {
386 struct rdtgroup *rdtgrp;
387 char *buf;
388};
389
385/** 390/**
386 * struct rdt_resource - attributes of an RDT resource 391 * struct rdt_resource - attributes of an RDT resource
387 * @rid: The index of the resource 392 * @rid: The index of the resource
@@ -423,16 +428,19 @@ struct rdt_resource {
423 struct rdt_cache cache; 428 struct rdt_cache cache;
424 struct rdt_membw membw; 429 struct rdt_membw membw;
425 const char *format_str; 430 const char *format_str;
426 int (*parse_ctrlval) (void *data, struct rdt_resource *r, 431 int (*parse_ctrlval)(struct rdt_parse_data *data,
427 struct rdt_domain *d); 432 struct rdt_resource *r,
433 struct rdt_domain *d);
428 struct list_head evt_list; 434 struct list_head evt_list;
429 int num_rmid; 435 int num_rmid;
430 unsigned int mon_scale; 436 unsigned int mon_scale;
431 unsigned long fflags; 437 unsigned long fflags;
432}; 438};
433 439
434int parse_cbm(void *_data, struct rdt_resource *r, struct rdt_domain *d); 440int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
435int parse_bw(void *_buf, struct rdt_resource *r, struct rdt_domain *d); 441 struct rdt_domain *d);
442int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r,
443 struct rdt_domain *d);
436 444
437extern struct mutex rdtgroup_mutex; 445extern struct mutex rdtgroup_mutex;
438 446
@@ -536,6 +544,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp);
536void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp); 544void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp);
537struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r); 545struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r);
538int update_domains(struct rdt_resource *r, int closid); 546int update_domains(struct rdt_resource *r, int closid);
547int closids_supported(void);
539void closid_free(int closid); 548void closid_free(int closid);
540int alloc_rmid(void); 549int alloc_rmid(void);
541void free_rmid(u32 rmid); 550void free_rmid(u32 rmid);
diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
index af358ca05160..0f53049719cd 100644
--- a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
+++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
@@ -64,19 +64,19 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
64 return true; 64 return true;
65} 65}
66 66
67int parse_bw(void *_buf, struct rdt_resource *r, struct rdt_domain *d) 67int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r,
68 struct rdt_domain *d)
68{ 69{
69 unsigned long data; 70 unsigned long bw_val;
70 char *buf = _buf;
71 71
72 if (d->have_new_ctrl) { 72 if (d->have_new_ctrl) {
73 rdt_last_cmd_printf("duplicate domain %d\n", d->id); 73 rdt_last_cmd_printf("duplicate domain %d\n", d->id);
74 return -EINVAL; 74 return -EINVAL;
75 } 75 }
76 76
77 if (!bw_validate(buf, &data, r)) 77 if (!bw_validate(data->buf, &bw_val, r))
78 return -EINVAL; 78 return -EINVAL;
79 d->new_ctrl = data; 79 d->new_ctrl = bw_val;
80 d->have_new_ctrl = true; 80 d->have_new_ctrl = true;
81 81
82 return 0; 82 return 0;
@@ -123,18 +123,13 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
123 return true; 123 return true;
124} 124}
125 125
126struct rdt_cbm_parse_data {
127 struct rdtgroup *rdtgrp;
128 char *buf;
129};
130
131/* 126/*
132 * Read one cache bit mask (hex). Check that it is valid for the current 127 * Read one cache bit mask (hex). Check that it is valid for the current
133 * resource type. 128 * resource type.
134 */ 129 */
135int parse_cbm(void *_data, struct rdt_resource *r, struct rdt_domain *d) 130int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
131 struct rdt_domain *d)
136{ 132{
137 struct rdt_cbm_parse_data *data = _data;
138 struct rdtgroup *rdtgrp = data->rdtgrp; 133 struct rdtgroup *rdtgrp = data->rdtgrp;
139 u32 cbm_val; 134 u32 cbm_val;
140 135
@@ -195,11 +190,17 @@ int parse_cbm(void *_data, struct rdt_resource *r, struct rdt_domain *d)
195static int parse_line(char *line, struct rdt_resource *r, 190static int parse_line(char *line, struct rdt_resource *r,
196 struct rdtgroup *rdtgrp) 191 struct rdtgroup *rdtgrp)
197{ 192{
198 struct rdt_cbm_parse_data data; 193 struct rdt_parse_data data;
199 char *dom = NULL, *id; 194 char *dom = NULL, *id;
200 struct rdt_domain *d; 195 struct rdt_domain *d;
201 unsigned long dom_id; 196 unsigned long dom_id;
202 197
198 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP &&
199 r->rid == RDT_RESOURCE_MBA) {
200 rdt_last_cmd_puts("Cannot pseudo-lock MBA resource\n");
201 return -EINVAL;
202 }
203
203next: 204next:
204 if (!line || line[0] == '\0') 205 if (!line || line[0] == '\0')
205 return 0; 206 return 0;
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index b799c00bef09..1b8e86a5d5e1 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -97,6 +97,12 @@ void rdt_last_cmd_printf(const char *fmt, ...)
97 * limited as the number of resources grows. 97 * limited as the number of resources grows.
98 */ 98 */
99static int closid_free_map; 99static int closid_free_map;
100static int closid_free_map_len;
101
102int closids_supported(void)
103{
104 return closid_free_map_len;
105}
100 106
101static void closid_init(void) 107static void closid_init(void)
102{ 108{
@@ -111,6 +117,7 @@ static void closid_init(void)
111 117
112 /* CLOSID 0 is always reserved for the default group */ 118 /* CLOSID 0 is always reserved for the default group */
113 closid_free_map &= ~1; 119 closid_free_map &= ~1;
120 closid_free_map_len = rdt_min_closid;
114} 121}
115 122
116static int closid_alloc(void) 123static int closid_alloc(void)
@@ -802,7 +809,7 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
802 sw_shareable = 0; 809 sw_shareable = 0;
803 exclusive = 0; 810 exclusive = 0;
804 seq_printf(seq, "%d=", dom->id); 811 seq_printf(seq, "%d=", dom->id);
805 for (i = 0; i < r->num_closid; i++, ctrl++) { 812 for (i = 0; i < closids_supported(); i++, ctrl++) {
806 if (!closid_allocated(i)) 813 if (!closid_allocated(i))
807 continue; 814 continue;
808 mode = rdtgroup_mode_by_closid(i); 815 mode = rdtgroup_mode_by_closid(i);
@@ -989,7 +996,7 @@ bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
989 996
990 /* Check for overlap with other resource groups */ 997 /* Check for overlap with other resource groups */
991 ctrl = d->ctrl_val; 998 ctrl = d->ctrl_val;
992 for (i = 0; i < r->num_closid; i++, ctrl++) { 999 for (i = 0; i < closids_supported(); i++, ctrl++) {
993 ctrl_b = (unsigned long *)ctrl; 1000 ctrl_b = (unsigned long *)ctrl;
994 mode = rdtgroup_mode_by_closid(i); 1001 mode = rdtgroup_mode_by_closid(i);
995 if (closid_allocated(i) && i != closid && 1002 if (closid_allocated(i) && i != closid &&
@@ -1024,16 +1031,27 @@ static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
1024{ 1031{
1025 int closid = rdtgrp->closid; 1032 int closid = rdtgrp->closid;
1026 struct rdt_resource *r; 1033 struct rdt_resource *r;
1034 bool has_cache = false;
1027 struct rdt_domain *d; 1035 struct rdt_domain *d;
1028 1036
1029 for_each_alloc_enabled_rdt_resource(r) { 1037 for_each_alloc_enabled_rdt_resource(r) {
1038 if (r->rid == RDT_RESOURCE_MBA)
1039 continue;
1040 has_cache = true;
1030 list_for_each_entry(d, &r->domains, list) { 1041 list_for_each_entry(d, &r->domains, list) {
1031 if (rdtgroup_cbm_overlaps(r, d, d->ctrl_val[closid], 1042 if (rdtgroup_cbm_overlaps(r, d, d->ctrl_val[closid],
1032 rdtgrp->closid, false)) 1043 rdtgrp->closid, false)) {
1044 rdt_last_cmd_puts("schemata overlaps\n");
1033 return false; 1045 return false;
1046 }
1034 } 1047 }
1035 } 1048 }
1036 1049
1050 if (!has_cache) {
1051 rdt_last_cmd_puts("cannot be exclusive without CAT/CDP\n");
1052 return false;
1053 }
1054
1037 return true; 1055 return true;
1038} 1056}
1039 1057
@@ -1085,7 +1103,6 @@ static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
1085 rdtgrp->mode = RDT_MODE_SHAREABLE; 1103 rdtgrp->mode = RDT_MODE_SHAREABLE;
1086 } else if (!strcmp(buf, "exclusive")) { 1104 } else if (!strcmp(buf, "exclusive")) {
1087 if (!rdtgroup_mode_test_exclusive(rdtgrp)) { 1105 if (!rdtgroup_mode_test_exclusive(rdtgrp)) {
1088 rdt_last_cmd_printf("schemata overlaps\n");
1089 ret = -EINVAL; 1106 ret = -EINVAL;
1090 goto out; 1107 goto out;
1091 } 1108 }
@@ -1155,8 +1172,8 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
1155 struct rdt_resource *r; 1172 struct rdt_resource *r;
1156 struct rdt_domain *d; 1173 struct rdt_domain *d;
1157 unsigned int size; 1174 unsigned int size;
1158 bool sep = false; 1175 bool sep;
1159 u32 cbm; 1176 u32 ctrl;
1160 1177
1161 rdtgrp = rdtgroup_kn_lock_live(of->kn); 1178 rdtgrp = rdtgroup_kn_lock_live(of->kn);
1162 if (!rdtgrp) { 1179 if (!rdtgrp) {
@@ -1174,6 +1191,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
1174 } 1191 }
1175 1192
1176 for_each_alloc_enabled_rdt_resource(r) { 1193 for_each_alloc_enabled_rdt_resource(r) {
1194 sep = false;
1177 seq_printf(s, "%*s:", max_name_width, r->name); 1195 seq_printf(s, "%*s:", max_name_width, r->name);
1178 list_for_each_entry(d, &r->domains, list) { 1196 list_for_each_entry(d, &r->domains, list) {
1179 if (sep) 1197 if (sep)
@@ -1181,8 +1199,13 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
1181 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { 1199 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1182 size = 0; 1200 size = 0;
1183 } else { 1201 } else {
1184 cbm = d->ctrl_val[rdtgrp->closid]; 1202 ctrl = (!is_mba_sc(r) ?
1185 size = rdtgroup_cbm_to_size(r, d, cbm); 1203 d->ctrl_val[rdtgrp->closid] :
1204 d->mbps_val[rdtgrp->closid]);
1205 if (r->rid == RDT_RESOURCE_MBA)
1206 size = ctrl;
1207 else
1208 size = rdtgroup_cbm_to_size(r, d, ctrl);
1186 } 1209 }
1187 seq_printf(s, "%d=%u", d->id, size); 1210 seq_printf(s, "%d=%u", d->id, size);
1188 sep = true; 1211 sep = true;
@@ -2336,12 +2359,18 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
2336 u32 *ctrl; 2359 u32 *ctrl;
2337 2360
2338 for_each_alloc_enabled_rdt_resource(r) { 2361 for_each_alloc_enabled_rdt_resource(r) {
2362 /*
2363 * Only initialize default allocations for CBM cache
2364 * resources
2365 */
2366 if (r->rid == RDT_RESOURCE_MBA)
2367 continue;
2339 list_for_each_entry(d, &r->domains, list) { 2368 list_for_each_entry(d, &r->domains, list) {
2340 d->have_new_ctrl = false; 2369 d->have_new_ctrl = false;
2341 d->new_ctrl = r->cache.shareable_bits; 2370 d->new_ctrl = r->cache.shareable_bits;
2342 used_b = r->cache.shareable_bits; 2371 used_b = r->cache.shareable_bits;
2343 ctrl = d->ctrl_val; 2372 ctrl = d->ctrl_val;
2344 for (i = 0; i < r->num_closid; i++, ctrl++) { 2373 for (i = 0; i < closids_supported(); i++, ctrl++) {
2345 if (closid_allocated(i) && i != closid) { 2374 if (closid_allocated(i) && i != closid) {
2346 mode = rdtgroup_mode_by_closid(i); 2375 mode = rdtgroup_mode_by_closid(i);
2347 if (mode == RDT_MODE_PSEUDO_LOCKSETUP) 2376 if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
@@ -2373,6 +2402,12 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
2373 } 2402 }
2374 2403
2375 for_each_alloc_enabled_rdt_resource(r) { 2404 for_each_alloc_enabled_rdt_resource(r) {
2405 /*
2406 * Only initialize default allocations for CBM cache
2407 * resources
2408 */
2409 if (r->rid == RDT_RESOURCE_MBA)
2410 continue;
2376 ret = update_domains(r, rdtgrp->closid); 2411 ret = update_domains(r, rdtgrp->closid);
2377 if (ret < 0) { 2412 if (ret < 0) {
2378 rdt_last_cmd_puts("failed to initialize allocations\n"); 2413 rdt_last_cmd_puts("failed to initialize allocations\n");
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 8047379e575a..ddee1f0870c4 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -35,6 +35,7 @@
35#include <asm/bootparam_utils.h> 35#include <asm/bootparam_utils.h>
36#include <asm/microcode.h> 36#include <asm/microcode.h>
37#include <asm/kasan.h> 37#include <asm/kasan.h>
38#include <asm/fixmap.h>
38 39
39/* 40/*
40 * Manage page tables very early on. 41 * Manage page tables very early on.
@@ -112,6 +113,7 @@ static bool __head check_la57_support(unsigned long physaddr)
112unsigned long __head __startup_64(unsigned long physaddr, 113unsigned long __head __startup_64(unsigned long physaddr,
113 struct boot_params *bp) 114 struct boot_params *bp)
114{ 115{
116 unsigned long vaddr, vaddr_end;
115 unsigned long load_delta, *p; 117 unsigned long load_delta, *p;
116 unsigned long pgtable_flags; 118 unsigned long pgtable_flags;
117 pgdval_t *pgd; 119 pgdval_t *pgd;
@@ -165,7 +167,8 @@ unsigned long __head __startup_64(unsigned long physaddr,
165 pud[511] += load_delta; 167 pud[511] += load_delta;
166 168
167 pmd = fixup_pointer(level2_fixmap_pgt, physaddr); 169 pmd = fixup_pointer(level2_fixmap_pgt, physaddr);
168 pmd[506] += load_delta; 170 for (i = FIXMAP_PMD_TOP; i > FIXMAP_PMD_TOP - FIXMAP_PMD_NUM; i--)
171 pmd[i] += load_delta;
169 172
170 /* 173 /*
171 * Set up the identity mapping for the switchover. These 174 * Set up the identity mapping for the switchover. These
@@ -235,6 +238,21 @@ unsigned long __head __startup_64(unsigned long physaddr,
235 sme_encrypt_kernel(bp); 238 sme_encrypt_kernel(bp);
236 239
237 /* 240 /*
241 * Clear the memory encryption mask from the .bss..decrypted section.
242 * The bss section will be memset to zero later in the initialization so
243 * there is no need to zero it after changing the memory encryption
244 * attribute.
245 */
246 if (mem_encrypt_active()) {
247 vaddr = (unsigned long)__start_bss_decrypted;
248 vaddr_end = (unsigned long)__end_bss_decrypted;
249 for (; vaddr < vaddr_end; vaddr += PMD_SIZE) {
250 i = pmd_index(vaddr);
251 pmd[i] -= sme_get_me_mask();
252 }
253 }
254
255 /*
238 * Return the SME encryption mask (if SME is active) to be used as a 256 * Return the SME encryption mask (if SME is active) to be used as a
239 * modifier for the initial pgdir entry programmed into CR3. 257 * modifier for the initial pgdir entry programmed into CR3.
240 */ 258 */
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 15ebc2fc166e..a3618cf04cf6 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -24,6 +24,7 @@
24#include "../entry/calling.h" 24#include "../entry/calling.h"
25#include <asm/export.h> 25#include <asm/export.h>
26#include <asm/nospec-branch.h> 26#include <asm/nospec-branch.h>
27#include <asm/fixmap.h>
27 28
28#ifdef CONFIG_PARAVIRT 29#ifdef CONFIG_PARAVIRT
29#include <asm/asm-offsets.h> 30#include <asm/asm-offsets.h>
@@ -445,13 +446,20 @@ NEXT_PAGE(level2_kernel_pgt)
445 KERNEL_IMAGE_SIZE/PMD_SIZE) 446 KERNEL_IMAGE_SIZE/PMD_SIZE)
446 447
447NEXT_PAGE(level2_fixmap_pgt) 448NEXT_PAGE(level2_fixmap_pgt)
448 .fill 506,8,0 449 .fill (512 - 4 - FIXMAP_PMD_NUM),8,0
449 .quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC 450 pgtno = 0
450 /* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */ 451 .rept (FIXMAP_PMD_NUM)
451 .fill 5,8,0 452 .quad level1_fixmap_pgt + (pgtno << PAGE_SHIFT) - __START_KERNEL_map \
453 + _PAGE_TABLE_NOENC;
454 pgtno = pgtno + 1
455 .endr
456 /* 6 MB reserved space + a 2MB hole */
457 .fill 4,8,0
452 458
453NEXT_PAGE(level1_fixmap_pgt) 459NEXT_PAGE(level1_fixmap_pgt)
460 .rept (FIXMAP_PMD_NUM)
454 .fill 512,8,0 461 .fill 512,8,0
462 .endr
455 463
456#undef PMDS 464#undef PMDS
457 465
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 1e6764648af3..013fe3d21dbb 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -28,6 +28,7 @@
28#include <linux/sched/clock.h> 28#include <linux/sched/clock.h>
29#include <linux/mm.h> 29#include <linux/mm.h>
30#include <linux/slab.h> 30#include <linux/slab.h>
31#include <linux/set_memory.h>
31 32
32#include <asm/hypervisor.h> 33#include <asm/hypervisor.h>
33#include <asm/mem_encrypt.h> 34#include <asm/mem_encrypt.h>
@@ -61,9 +62,10 @@ early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);
61 (PAGE_SIZE / sizeof(struct pvclock_vsyscall_time_info)) 62 (PAGE_SIZE / sizeof(struct pvclock_vsyscall_time_info))
62 63
63static struct pvclock_vsyscall_time_info 64static struct pvclock_vsyscall_time_info
64 hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __aligned(PAGE_SIZE); 65 hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __bss_decrypted __aligned(PAGE_SIZE);
65static struct pvclock_wall_clock wall_clock; 66static struct pvclock_wall_clock wall_clock __bss_decrypted;
66static DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu); 67static DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu);
68static struct pvclock_vsyscall_time_info *hvclock_mem;
67 69
68static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void) 70static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void)
69{ 71{
@@ -236,6 +238,45 @@ static void kvm_shutdown(void)
236 native_machine_shutdown(); 238 native_machine_shutdown();
237} 239}
238 240
241static void __init kvmclock_init_mem(void)
242{
243 unsigned long ncpus;
244 unsigned int order;
245 struct page *p;
246 int r;
247
248 if (HVC_BOOT_ARRAY_SIZE >= num_possible_cpus())
249 return;
250
251 ncpus = num_possible_cpus() - HVC_BOOT_ARRAY_SIZE;
252 order = get_order(ncpus * sizeof(*hvclock_mem));
253
254 p = alloc_pages(GFP_KERNEL, order);
255 if (!p) {
256 pr_warn("%s: failed to alloc %d pages", __func__, (1U << order));
257 return;
258 }
259
260 hvclock_mem = page_address(p);
261
262 /*
263 * hvclock is shared between the guest and the hypervisor, must
264 * be mapped decrypted.
265 */
266 if (sev_active()) {
267 r = set_memory_decrypted((unsigned long) hvclock_mem,
268 1UL << order);
269 if (r) {
270 __free_pages(p, order);
271 hvclock_mem = NULL;
272 pr_warn("kvmclock: set_memory_decrypted() failed. Disabling\n");
273 return;
274 }
275 }
276
277 memset(hvclock_mem, 0, PAGE_SIZE << order);
278}
279
239static int __init kvm_setup_vsyscall_timeinfo(void) 280static int __init kvm_setup_vsyscall_timeinfo(void)
240{ 281{
241#ifdef CONFIG_X86_64 282#ifdef CONFIG_X86_64
@@ -250,6 +291,9 @@ static int __init kvm_setup_vsyscall_timeinfo(void)
250 291
251 kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK; 292 kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK;
252#endif 293#endif
294
295 kvmclock_init_mem();
296
253 return 0; 297 return 0;
254} 298}
255early_initcall(kvm_setup_vsyscall_timeinfo); 299early_initcall(kvm_setup_vsyscall_timeinfo);
@@ -269,8 +313,10 @@ static int kvmclock_setup_percpu(unsigned int cpu)
269 /* Use the static page for the first CPUs, allocate otherwise */ 313 /* Use the static page for the first CPUs, allocate otherwise */
270 if (cpu < HVC_BOOT_ARRAY_SIZE) 314 if (cpu < HVC_BOOT_ARRAY_SIZE)
271 p = &hv_clock_boot[cpu]; 315 p = &hv_clock_boot[cpu];
316 else if (hvclock_mem)
317 p = hvclock_mem + cpu - HVC_BOOT_ARRAY_SIZE;
272 else 318 else
273 p = kzalloc(sizeof(*p), GFP_KERNEL); 319 return -ENOMEM;
274 320
275 per_cpu(hv_clock_per_cpu, cpu) = p; 321 per_cpu(hv_clock_per_cpu, cpu) = p;
276 return p ? 0 : -ENOMEM; 322 return p ? 0 : -ENOMEM;
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index afdb303285f8..8dc69d82567e 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -91,7 +91,7 @@ unsigned paravirt_patch_call(void *insnbuf,
91 91
92 if (len < 5) { 92 if (len < 5) {
93#ifdef CONFIG_RETPOLINE 93#ifdef CONFIG_RETPOLINE
94 WARN_ONCE("Failing to patch indirect CALL in %ps\n", (void *)addr); 94 WARN_ONCE(1, "Failing to patch indirect CALL in %ps\n", (void *)addr);
95#endif 95#endif
96 return len; /* call too long for patch site */ 96 return len; /* call too long for patch site */
97 } 97 }
@@ -111,7 +111,7 @@ unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
111 111
112 if (len < 5) { 112 if (len < 5) {
113#ifdef CONFIG_RETPOLINE 113#ifdef CONFIG_RETPOLINE
114 WARN_ONCE("Failing to patch indirect JMP in %ps\n", (void *)addr); 114 WARN_ONCE(1, "Failing to patch indirect JMP in %ps\n", (void *)addr);
115#endif 115#endif
116 return len; /* call too long for patch site */ 116 return len; /* call too long for patch site */
117 } 117 }
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 8bde0a419f86..5dd3317d761f 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -65,6 +65,23 @@ jiffies_64 = jiffies;
65#define ALIGN_ENTRY_TEXT_BEGIN . = ALIGN(PMD_SIZE); 65#define ALIGN_ENTRY_TEXT_BEGIN . = ALIGN(PMD_SIZE);
66#define ALIGN_ENTRY_TEXT_END . = ALIGN(PMD_SIZE); 66#define ALIGN_ENTRY_TEXT_END . = ALIGN(PMD_SIZE);
67 67
68/*
69 * This section contains data which will be mapped as decrypted. Memory
70 * encryption operates on a page basis. Make this section PMD-aligned
71 * to avoid splitting the pages while mapping the section early.
72 *
73 * Note: We use a separate section so that only this section gets
74 * decrypted to avoid exposing more than we wish.
75 */
76#define BSS_DECRYPTED \
77 . = ALIGN(PMD_SIZE); \
78 __start_bss_decrypted = .; \
79 *(.bss..decrypted); \
80 . = ALIGN(PAGE_SIZE); \
81 __start_bss_decrypted_unused = .; \
82 . = ALIGN(PMD_SIZE); \
83 __end_bss_decrypted = .; \
84
68#else 85#else
69 86
70#define X86_ALIGN_RODATA_BEGIN 87#define X86_ALIGN_RODATA_BEGIN
@@ -74,6 +91,7 @@ jiffies_64 = jiffies;
74 91
75#define ALIGN_ENTRY_TEXT_BEGIN 92#define ALIGN_ENTRY_TEXT_BEGIN
76#define ALIGN_ENTRY_TEXT_END 93#define ALIGN_ENTRY_TEXT_END
94#define BSS_DECRYPTED
77 95
78#endif 96#endif
79 97
@@ -355,6 +373,7 @@ SECTIONS
355 __bss_start = .; 373 __bss_start = .;
356 *(.bss..page_aligned) 374 *(.bss..page_aligned)
357 *(.bss) 375 *(.bss)
376 BSS_DECRYPTED
358 . = ALIGN(PAGE_SIZE); 377 . = ALIGN(PAGE_SIZE);
359 __bss_stop = .; 378 __bss_stop = .;
360 } 379 }
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 7a8fc26c1115..faca978ebf9d 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -815,10 +815,14 @@ void free_kernel_image_pages(void *begin, void *end)
815 set_memory_np_noalias(begin_ul, len_pages); 815 set_memory_np_noalias(begin_ul, len_pages);
816} 816}
817 817
818void __weak mem_encrypt_free_decrypted_mem(void) { }
819
818void __ref free_initmem(void) 820void __ref free_initmem(void)
819{ 821{
820 e820__reallocate_tables(); 822 e820__reallocate_tables();
821 823
824 mem_encrypt_free_decrypted_mem();
825
822 free_kernel_image_pages(&__init_begin, &__init_end); 826 free_kernel_image_pages(&__init_begin, &__init_end);
823} 827}
824 828
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index b2de398d1fd3..006f373f54ab 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -348,6 +348,30 @@ bool sev_active(void)
348EXPORT_SYMBOL(sev_active); 348EXPORT_SYMBOL(sev_active);
349 349
350/* Architecture __weak replacement functions */ 350/* Architecture __weak replacement functions */
351void __init mem_encrypt_free_decrypted_mem(void)
352{
353 unsigned long vaddr, vaddr_end, npages;
354 int r;
355
356 vaddr = (unsigned long)__start_bss_decrypted_unused;
357 vaddr_end = (unsigned long)__end_bss_decrypted;
358 npages = (vaddr_end - vaddr) >> PAGE_SHIFT;
359
360 /*
361 * The unused memory range was mapped decrypted, change the encryption
362 * attribute from decrypted to encrypted before freeing it.
363 */
364 if (mem_encrypt_active()) {
365 r = set_memory_encrypted(vaddr, npages);
366 if (r) {
367 pr_warn("failed to free unused decrypted pages\n");
368 return;
369 }
370 }
371
372 free_init_pages("unused decrypted", vaddr, vaddr_end);
373}
374
351void __init mem_encrypt_init(void) 375void __init mem_encrypt_init(void)
352{ 376{
353 if (!sme_me_mask) 377 if (!sme_me_mask)
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index ae394552fb94..089e78c4effd 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -637,6 +637,15 @@ void __native_set_fixmap(enum fixed_addresses idx, pte_t pte)
637{ 637{
638 unsigned long address = __fix_to_virt(idx); 638 unsigned long address = __fix_to_virt(idx);
639 639
640#ifdef CONFIG_X86_64
641 /*
642 * Ensure that the static initial page tables are covering the
643 * fixmap completely.
644 */
645 BUILD_BUG_ON(__end_of_permanent_fixed_addresses >
646 (FIXMAP_PMD_NUM * PTRS_PER_PTE));
647#endif
648
640 if (idx >= __end_of_fixed_addresses) { 649 if (idx >= __end_of_fixed_addresses) {
641 BUG(); 650 BUG();
642 return; 651 return;
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 2fe5c9b1816b..dd461c0167ef 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1907,7 +1907,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
1907 /* L3_k[511] -> level2_fixmap_pgt */ 1907 /* L3_k[511] -> level2_fixmap_pgt */
1908 convert_pfn_mfn(level3_kernel_pgt); 1908 convert_pfn_mfn(level3_kernel_pgt);
1909 1909
1910 /* L3_k[511][506] -> level1_fixmap_pgt */ 1910 /* L3_k[511][508-FIXMAP_PMD_NUM ... 507] -> level1_fixmap_pgt */
1911 convert_pfn_mfn(level2_fixmap_pgt); 1911 convert_pfn_mfn(level2_fixmap_pgt);
1912 1912
1913 /* We get [511][511] and have Xen's version of level2_kernel_pgt */ 1913 /* We get [511][511] and have Xen's version of level2_kernel_pgt */
@@ -1952,7 +1952,11 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
1952 set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); 1952 set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
1953 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); 1953 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
1954 set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); 1954 set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
1955 set_page_prot(level1_fixmap_pgt, PAGE_KERNEL_RO); 1955
1956 for (i = 0; i < FIXMAP_PMD_NUM; i++) {
1957 set_page_prot(level1_fixmap_pgt + i * PTRS_PER_PTE,
1958 PAGE_KERNEL_RO);
1959 }
1956 1960
1957 /* Pin down new L4 */ 1961 /* Pin down new L4 */
1958 pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, 1962 pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,