aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-04-15 16:35:29 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-04-15 16:35:29 -0400
commit6b0a02e86c293c32a50d49b33a1f04420585d40b (patch)
tree048e4f6f19548cd0052395867327b4dcf2d19546
parent71b8ebbf3d7bee88427eb207ef643f2f6447c625 (diff)
parente3e288121408c3abeed5af60b87b95c847143845 (diff)
Merge branch 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 pti updates from Thomas Gleixner: "Another series of PTI related changes: - Remove the manual stack switch for user entries from the idtentry code. This debloats entry by 5k+ bytes of text. - Use the proper types for the asm/bootparam.h defines to prevent user space compile errors. - Use PAGE_GLOBAL for !PCID systems to gain back performance - Prevent setting of huge PUD/PMD entries when the entries are not leaf entries otherwise the entries to which the PUD/PMD points to and are populated get lost" * 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/pgtable: Don't set huge PUD/PMD on non-leaf entries x86/pti: Leave kernel text global for !PCID x86/pti: Never implicitly clear _PAGE_GLOBAL for kernel image x86/pti: Enable global pages for shared areas x86/mm: Do not forbid _PAGE_RW before init for __ro_after_init x86/mm: Comment _PAGE_GLOBAL mystery x86/mm: Remove extra filtering in pageattr code x86/mm: Do not auto-massage page protections x86/espfix: Document use of _PAGE_GLOBAL x86/mm: Introduce "default" kernel PTE mask x86/mm: Undo double _PAGE_PSE clearing x86/mm: Factor out pageattr _PAGE_GLOBAL setting x86/entry/64: Drop idtentry's manual stack switch for user entries x86/uapi: Fix asm/bootparam.h userspace compilation errors
-rw-r--r--arch/x86/boot/compressed/kaslr.c3
-rw-r--r--arch/x86/entry/entry_64.S4
-rw-r--r--arch/x86/include/asm/pgtable.h27
-rw-r--r--arch/x86/include/asm/pgtable_types.h29
-rw-r--r--arch/x86/include/asm/pti.h2
-rw-r--r--arch/x86/include/uapi/asm/bootparam.h18
-rw-r--r--arch/x86/kernel/espfix_64.c4
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/head_64.S11
-rw-r--r--arch/x86/kernel/ldt.c6
-rw-r--r--arch/x86/mm/cpu_entry_area.c14
-rw-r--r--arch/x86/mm/ident_map.c3
-rw-r--r--arch/x86/mm/init.c14
-rw-r--r--arch/x86/mm/init_32.c8
-rw-r--r--arch/x86/mm/init_64.c11
-rw-r--r--arch/x86/mm/iomap_32.c6
-rw-r--r--arch/x86/mm/ioremap.c3
-rw-r--r--arch/x86/mm/kasan_init_64.c14
-rw-r--r--arch/x86/mm/pageattr.c97
-rw-r--r--arch/x86/mm/pgtable.c12
-rw-r--r--arch/x86/mm/pti.c126
-rw-r--r--arch/x86/power/hibernate_64.c20
22 files changed, 329 insertions, 105 deletions
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 66e42a098d70..a0a50b91ecef 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -54,6 +54,9 @@ unsigned int ptrs_per_p4d __ro_after_init = 1;
54 54
55extern unsigned long get_cmd_line_ptr(void); 55extern unsigned long get_cmd_line_ptr(void);
56 56
57/* Used by PAGE_KERN* macros: */
58pteval_t __default_kernel_pte_mask __read_mostly = ~0;
59
57/* Simplified build-specific string for starting entropy. */ 60/* Simplified build-specific string for starting entropy. */
58static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@" 61static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
59 LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION; 62 LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index b0a4649e55ce..a0c1353a2266 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -913,7 +913,7 @@ ENTRY(\sym)
913 pushq $-1 /* ORIG_RAX: no syscall to restart */ 913 pushq $-1 /* ORIG_RAX: no syscall to restart */
914 .endif 914 .endif
915 915
916 .if \paranoid < 2 916 .if \paranoid == 1
917 testb $3, CS-ORIG_RAX(%rsp) /* If coming from userspace, switch stacks */ 917 testb $3, CS-ORIG_RAX(%rsp) /* If coming from userspace, switch stacks */
918 jnz .Lfrom_usermode_switch_stack_\@ 918 jnz .Lfrom_usermode_switch_stack_\@
919 .endif 919 .endif
@@ -960,7 +960,7 @@ ENTRY(\sym)
960 jmp error_exit 960 jmp error_exit
961 .endif 961 .endif
962 962
963 .if \paranoid < 2 963 .if \paranoid == 1
964 /* 964 /*
965 * Entry from userspace. Switch stacks and treat it 965 * Entry from userspace. Switch stacks and treat it
966 * as a normal entry. This means that paranoid handlers 966 * as a normal entry. This means that paranoid handlers
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 89d5c8886c85..5f49b4ff0c24 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -526,22 +526,39 @@ static inline pgprotval_t massage_pgprot(pgprot_t pgprot)
526 return protval; 526 return protval;
527} 527}
528 528
529static inline pgprotval_t check_pgprot(pgprot_t pgprot)
530{
531 pgprotval_t massaged_val = massage_pgprot(pgprot);
532
533 /* mmdebug.h can not be included here because of dependencies */
534#ifdef CONFIG_DEBUG_VM
535 WARN_ONCE(pgprot_val(pgprot) != massaged_val,
536 "attempted to set unsupported pgprot: %016llx "
537 "bits: %016llx supported: %016llx\n",
538 (u64)pgprot_val(pgprot),
539 (u64)pgprot_val(pgprot) ^ massaged_val,
540 (u64)__supported_pte_mask);
541#endif
542
543 return massaged_val;
544}
545
529static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) 546static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
530{ 547{
531 return __pte(((phys_addr_t)page_nr << PAGE_SHIFT) | 548 return __pte(((phys_addr_t)page_nr << PAGE_SHIFT) |
532 massage_pgprot(pgprot)); 549 check_pgprot(pgprot));
533} 550}
534 551
535static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) 552static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
536{ 553{
537 return __pmd(((phys_addr_t)page_nr << PAGE_SHIFT) | 554 return __pmd(((phys_addr_t)page_nr << PAGE_SHIFT) |
538 massage_pgprot(pgprot)); 555 check_pgprot(pgprot));
539} 556}
540 557
541static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot) 558static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot)
542{ 559{
543 return __pud(((phys_addr_t)page_nr << PAGE_SHIFT) | 560 return __pud(((phys_addr_t)page_nr << PAGE_SHIFT) |
544 massage_pgprot(pgprot)); 561 check_pgprot(pgprot));
545} 562}
546 563
547static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) 564static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
@@ -553,7 +570,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
553 * the newprot (if present): 570 * the newprot (if present):
554 */ 571 */
555 val &= _PAGE_CHG_MASK; 572 val &= _PAGE_CHG_MASK;
556 val |= massage_pgprot(newprot) & ~_PAGE_CHG_MASK; 573 val |= check_pgprot(newprot) & ~_PAGE_CHG_MASK;
557 574
558 return __pte(val); 575 return __pte(val);
559} 576}
@@ -563,7 +580,7 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
563 pmdval_t val = pmd_val(pmd); 580 pmdval_t val = pmd_val(pmd);
564 581
565 val &= _HPAGE_CHG_MASK; 582 val &= _HPAGE_CHG_MASK;
566 val |= massage_pgprot(newprot) & ~_HPAGE_CHG_MASK; 583 val |= check_pgprot(newprot) & ~_HPAGE_CHG_MASK;
567 584
568 return __pmd(val); 585 return __pmd(val);
569} 586}
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index acfe755562a6..1e5a40673953 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -196,19 +196,21 @@ enum page_cache_mode {
196#define __PAGE_KERNEL_NOENC (__PAGE_KERNEL) 196#define __PAGE_KERNEL_NOENC (__PAGE_KERNEL)
197#define __PAGE_KERNEL_NOENC_WP (__PAGE_KERNEL_WP) 197#define __PAGE_KERNEL_NOENC_WP (__PAGE_KERNEL_WP)
198 198
199#define PAGE_KERNEL __pgprot(__PAGE_KERNEL | _PAGE_ENC) 199#define default_pgprot(x) __pgprot((x) & __default_kernel_pte_mask)
200#define PAGE_KERNEL_NOENC __pgprot(__PAGE_KERNEL) 200
201#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO | _PAGE_ENC) 201#define PAGE_KERNEL default_pgprot(__PAGE_KERNEL | _PAGE_ENC)
202#define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC | _PAGE_ENC) 202#define PAGE_KERNEL_NOENC default_pgprot(__PAGE_KERNEL)
203#define PAGE_KERNEL_EXEC_NOENC __pgprot(__PAGE_KERNEL_EXEC) 203#define PAGE_KERNEL_RO default_pgprot(__PAGE_KERNEL_RO | _PAGE_ENC)
204#define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX | _PAGE_ENC) 204#define PAGE_KERNEL_EXEC default_pgprot(__PAGE_KERNEL_EXEC | _PAGE_ENC)
205#define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC) 205#define PAGE_KERNEL_EXEC_NOENC default_pgprot(__PAGE_KERNEL_EXEC)
206#define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC) 206#define PAGE_KERNEL_RX default_pgprot(__PAGE_KERNEL_RX | _PAGE_ENC)
207#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC) 207#define PAGE_KERNEL_NOCACHE default_pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC)
208#define PAGE_KERNEL_VVAR __pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC) 208#define PAGE_KERNEL_LARGE default_pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC)
209 209#define PAGE_KERNEL_LARGE_EXEC default_pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC)
210#define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO) 210#define PAGE_KERNEL_VVAR default_pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC)
211#define PAGE_KERNEL_IO_NOCACHE __pgprot(__PAGE_KERNEL_IO_NOCACHE) 211
212#define PAGE_KERNEL_IO default_pgprot(__PAGE_KERNEL_IO)
213#define PAGE_KERNEL_IO_NOCACHE default_pgprot(__PAGE_KERNEL_IO_NOCACHE)
212 214
213#endif /* __ASSEMBLY__ */ 215#endif /* __ASSEMBLY__ */
214 216
@@ -483,6 +485,7 @@ static inline pgprot_t pgprot_large_2_4k(pgprot_t pgprot)
483typedef struct page *pgtable_t; 485typedef struct page *pgtable_t;
484 486
485extern pteval_t __supported_pte_mask; 487extern pteval_t __supported_pte_mask;
488extern pteval_t __default_kernel_pte_mask;
486extern void set_nx(void); 489extern void set_nx(void);
487extern int nx_enabled; 490extern int nx_enabled;
488 491
diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h
index 0b5ef05b2d2d..38a17f1d5c9d 100644
--- a/arch/x86/include/asm/pti.h
+++ b/arch/x86/include/asm/pti.h
@@ -6,8 +6,10 @@
6#ifdef CONFIG_PAGE_TABLE_ISOLATION 6#ifdef CONFIG_PAGE_TABLE_ISOLATION
7extern void pti_init(void); 7extern void pti_init(void);
8extern void pti_check_boottime_disable(void); 8extern void pti_check_boottime_disable(void);
9extern void pti_clone_kernel_text(void);
9#else 10#else
10static inline void pti_check_boottime_disable(void) { } 11static inline void pti_check_boottime_disable(void) { }
12static inline void pti_clone_kernel_text(void) { }
11#endif 13#endif
12 14
13#endif /* __ASSEMBLY__ */ 15#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index aebf60357758..a06cbf019744 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -137,15 +137,15 @@ struct boot_e820_entry {
137 * setup data structure. 137 * setup data structure.
138 */ 138 */
139struct jailhouse_setup_data { 139struct jailhouse_setup_data {
140 u16 version; 140 __u16 version;
141 u16 compatible_version; 141 __u16 compatible_version;
142 u16 pm_timer_address; 142 __u16 pm_timer_address;
143 u16 num_cpus; 143 __u16 num_cpus;
144 u64 pci_mmconfig_base; 144 __u64 pci_mmconfig_base;
145 u32 tsc_khz; 145 __u32 tsc_khz;
146 u32 apic_khz; 146 __u32 apic_khz;
147 u8 standard_ioapic; 147 __u8 standard_ioapic;
148 u8 cpu_ids[255]; 148 __u8 cpu_ids[255];
149} __attribute__((packed)); 149} __attribute__((packed));
150 150
151/* The so-called "zeropage" */ 151/* The so-called "zeropage" */
diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
index e5ec3cafa72e..aebd0d5bc086 100644
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@@ -195,6 +195,10 @@ void init_espfix_ap(int cpu)
195 195
196 pte_p = pte_offset_kernel(&pmd, addr); 196 pte_p = pte_offset_kernel(&pmd, addr);
197 stack_page = page_address(alloc_pages_node(node, GFP_KERNEL, 0)); 197 stack_page = page_address(alloc_pages_node(node, GFP_KERNEL, 0));
198 /*
199 * __PAGE_KERNEL_* includes _PAGE_GLOBAL, which we want since
200 * this is mapped to userspace.
201 */
198 pte = __pte(__pa(stack_page) | ((__PAGE_KERNEL_RO | _PAGE_ENC) & ptemask)); 202 pte = __pte(__pa(stack_page) | ((__PAGE_KERNEL_RO | _PAGE_ENC) & ptemask));
199 for (n = 0; n < ESPFIX_PTE_CLONES; n++) 203 for (n = 0; n < ESPFIX_PTE_CLONES; n++)
200 set_pte(&pte_p[n*PTE_STRIDE], pte); 204 set_pte(&pte_p[n*PTE_STRIDE], pte);
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 0c855deee165..0c408f8c4ed4 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -195,6 +195,8 @@ unsigned long __head __startup_64(unsigned long physaddr,
195 pud[i + 1] = (pudval_t)pmd + pgtable_flags; 195 pud[i + 1] = (pudval_t)pmd + pgtable_flags;
196 196
197 pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL; 197 pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
198 /* Filter out unsupported __PAGE_KERNEL_* bits: */
199 pmd_entry &= __supported_pte_mask;
198 pmd_entry += sme_get_me_mask(); 200 pmd_entry += sme_get_me_mask();
199 pmd_entry += physaddr; 201 pmd_entry += physaddr;
200 202
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 48385c1074a5..8344dd2f310a 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -399,8 +399,13 @@ NEXT_PAGE(level3_ident_pgt)
399 .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC 399 .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
400 .fill 511, 8, 0 400 .fill 511, 8, 0
401NEXT_PAGE(level2_ident_pgt) 401NEXT_PAGE(level2_ident_pgt)
402 /* Since I easily can, map the first 1G. 402 /*
403 * Since I easily can, map the first 1G.
403 * Don't set NX because code runs from these pages. 404 * Don't set NX because code runs from these pages.
405 *
406 * Note: This sets _PAGE_GLOBAL despite whether
407 * the CPU supports it or it is enabled. But,
408 * the CPU should ignore the bit.
404 */ 409 */
405 PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) 410 PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
406#else 411#else
@@ -431,6 +436,10 @@ NEXT_PAGE(level2_kernel_pgt)
431 * (NOTE: at +512MB starts the module area, see MODULES_VADDR. 436 * (NOTE: at +512MB starts the module area, see MODULES_VADDR.
432 * If you want to increase this then increase MODULES_VADDR 437 * If you want to increase this then increase MODULES_VADDR
433 * too.) 438 * too.)
439 *
440 * This table is eventually used by the kernel during normal
441 * runtime. Care must be taken to clear out undesired bits
442 * later, like _PAGE_RW or _PAGE_GLOBAL in some cases.
434 */ 443 */
435 PMDS(0, __PAGE_KERNEL_LARGE_EXEC, 444 PMDS(0, __PAGE_KERNEL_LARGE_EXEC,
436 KERNEL_IMAGE_SIZE/PMD_SIZE) 445 KERNEL_IMAGE_SIZE/PMD_SIZE)
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 26d713ecad34..d41d896481b8 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -145,6 +145,7 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
145 unsigned long offset = i << PAGE_SHIFT; 145 unsigned long offset = i << PAGE_SHIFT;
146 const void *src = (char *)ldt->entries + offset; 146 const void *src = (char *)ldt->entries + offset;
147 unsigned long pfn; 147 unsigned long pfn;
148 pgprot_t pte_prot;
148 pte_t pte, *ptep; 149 pte_t pte, *ptep;
149 150
150 va = (unsigned long)ldt_slot_va(slot) + offset; 151 va = (unsigned long)ldt_slot_va(slot) + offset;
@@ -163,7 +164,10 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
163 * target via some kernel interface which misses a 164 * target via some kernel interface which misses a
164 * permission check. 165 * permission check.
165 */ 166 */
166 pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL)); 167 pte_prot = __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL);
168 /* Filter out unsuppored __PAGE_KERNEL* bits: */
169 pgprot_val(pte_prot) |= __supported_pte_mask;
170 pte = pfn_pte(pfn, pte_prot);
167 set_pte_at(mm, va, ptep, pte); 171 set_pte_at(mm, va, ptep, pte);
168 pte_unmap_unlock(ptep, ptl); 172 pte_unmap_unlock(ptep, ptl);
169 } 173 }
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index 476d810639a8..b45f5aaefd74 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -27,8 +27,20 @@ EXPORT_SYMBOL(get_cpu_entry_area);
27void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags) 27void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags)
28{ 28{
29 unsigned long va = (unsigned long) cea_vaddr; 29 unsigned long va = (unsigned long) cea_vaddr;
30 pte_t pte = pfn_pte(pa >> PAGE_SHIFT, flags);
30 31
31 set_pte_vaddr(va, pfn_pte(pa >> PAGE_SHIFT, flags)); 32 /*
33 * The cpu_entry_area is shared between the user and kernel
34 * page tables. All of its ptes can safely be global.
35 * _PAGE_GLOBAL gets reused to help indicate PROT_NONE for
36 * non-present PTEs, so be careful not to set it in that
37 * case to avoid confusion.
38 */
39 if (boot_cpu_has(X86_FEATURE_PGE) &&
40 (pgprot_val(flags) & _PAGE_PRESENT))
41 pte = pte_set_flags(pte, _PAGE_GLOBAL);
42
43 set_pte_vaddr(va, pte);
32} 44}
33 45
34static void __init 46static void __init
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index 9aa22be8331e..a2f0c7e20fb0 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -98,6 +98,9 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
98 if (!info->kernpg_flag) 98 if (!info->kernpg_flag)
99 info->kernpg_flag = _KERNPG_TABLE; 99 info->kernpg_flag = _KERNPG_TABLE;
100 100
101 /* Filter out unsupported __PAGE_KERNEL_* bits: */
102 info->kernpg_flag &= __default_kernel_pte_mask;
103
101 for (; addr < end; addr = next) { 104 for (; addr < end; addr = next) {
102 pgd_t *pgd = pgd_page + pgd_index(addr); 105 pgd_t *pgd = pgd_page + pgd_index(addr);
103 p4d_t *p4d; 106 p4d_t *p4d;
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 82f5252c723a..fec82b577c18 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -161,12 +161,6 @@ struct map_range {
161 161
162static int page_size_mask; 162static int page_size_mask;
163 163
164static void enable_global_pages(void)
165{
166 if (!static_cpu_has(X86_FEATURE_PTI))
167 __supported_pte_mask |= _PAGE_GLOBAL;
168}
169
170static void __init probe_page_size_mask(void) 164static void __init probe_page_size_mask(void)
171{ 165{
172 /* 166 /*
@@ -187,9 +181,15 @@ static void __init probe_page_size_mask(void)
187 __supported_pte_mask &= ~_PAGE_GLOBAL; 181 __supported_pte_mask &= ~_PAGE_GLOBAL;
188 if (boot_cpu_has(X86_FEATURE_PGE)) { 182 if (boot_cpu_has(X86_FEATURE_PGE)) {
189 cr4_set_bits_and_update_boot(X86_CR4_PGE); 183 cr4_set_bits_and_update_boot(X86_CR4_PGE);
190 enable_global_pages(); 184 __supported_pte_mask |= _PAGE_GLOBAL;
191 } 185 }
192 186
187 /* By the default is everything supported: */
188 __default_kernel_pte_mask = __supported_pte_mask;
189 /* Except when with PTI where the kernel is mostly non-Global: */
190 if (cpu_feature_enabled(X86_FEATURE_PTI))
191 __default_kernel_pte_mask &= ~_PAGE_GLOBAL;
192
193 /* Enable 1 GB linear kernel mappings if available: */ 193 /* Enable 1 GB linear kernel mappings if available: */
194 if (direct_gbpages && boot_cpu_has(X86_FEATURE_GBPAGES)) { 194 if (direct_gbpages && boot_cpu_has(X86_FEATURE_GBPAGES)) {
195 printk(KERN_INFO "Using GB pages for direct mapping\n"); 195 printk(KERN_INFO "Using GB pages for direct mapping\n");
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 8008db2bddb3..c893c6a3d707 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -558,8 +558,14 @@ static void __init pagetable_init(void)
558 permanent_kmaps_init(pgd_base); 558 permanent_kmaps_init(pgd_base);
559} 559}
560 560
561pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL); 561#define DEFAULT_PTE_MASK ~(_PAGE_NX | _PAGE_GLOBAL)
562/* Bits supported by the hardware: */
563pteval_t __supported_pte_mask __read_mostly = DEFAULT_PTE_MASK;
564/* Bits allowed in normal kernel mappings: */
565pteval_t __default_kernel_pte_mask __read_mostly = DEFAULT_PTE_MASK;
562EXPORT_SYMBOL_GPL(__supported_pte_mask); 566EXPORT_SYMBOL_GPL(__supported_pte_mask);
567/* Used in PAGE_KERNEL_* macros which are reasonably used out-of-tree: */
568EXPORT_SYMBOL(__default_kernel_pte_mask);
563 569
564/* user-defined highmem size */ 570/* user-defined highmem size */
565static unsigned int highmem_pages = -1; 571static unsigned int highmem_pages = -1;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 66de40e45f58..0a400606dea0 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -65,8 +65,13 @@
65 * around without checking the pgd every time. 65 * around without checking the pgd every time.
66 */ 66 */
67 67
68/* Bits supported by the hardware: */
68pteval_t __supported_pte_mask __read_mostly = ~0; 69pteval_t __supported_pte_mask __read_mostly = ~0;
70/* Bits allowed in normal kernel mappings: */
71pteval_t __default_kernel_pte_mask __read_mostly = ~0;
69EXPORT_SYMBOL_GPL(__supported_pte_mask); 72EXPORT_SYMBOL_GPL(__supported_pte_mask);
73/* Used in PAGE_KERNEL_* macros which are reasonably used out-of-tree: */
74EXPORT_SYMBOL(__default_kernel_pte_mask);
70 75
71int force_personality32; 76int force_personality32;
72 77
@@ -1286,6 +1291,12 @@ void mark_rodata_ro(void)
1286 (unsigned long) __va(__pa_symbol(_sdata))); 1291 (unsigned long) __va(__pa_symbol(_sdata)));
1287 1292
1288 debug_checkwx(); 1293 debug_checkwx();
1294
1295 /*
1296 * Do this after all of the manipulation of the
1297 * kernel text page tables are complete.
1298 */
1299 pti_clone_kernel_text();
1289} 1300}
1290 1301
1291int kern_addr_valid(unsigned long addr) 1302int kern_addr_valid(unsigned long addr)
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index ada98b39b8ad..b3294d36769d 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -44,6 +44,9 @@ int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot)
44 return ret; 44 return ret;
45 45
46 *prot = __pgprot(__PAGE_KERNEL | cachemode2protval(pcm)); 46 *prot = __pgprot(__PAGE_KERNEL | cachemode2protval(pcm));
47 /* Filter out unsupported __PAGE_KERNEL* bits: */
48 pgprot_val(*prot) &= __default_kernel_pte_mask;
49
47 return 0; 50 return 0;
48} 51}
49EXPORT_SYMBOL_GPL(iomap_create_wc); 52EXPORT_SYMBOL_GPL(iomap_create_wc);
@@ -88,6 +91,9 @@ iomap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
88 prot = __pgprot(__PAGE_KERNEL | 91 prot = __pgprot(__PAGE_KERNEL |
89 cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS)); 92 cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS));
90 93
94 /* Filter out unsupported __PAGE_KERNEL* bits: */
95 pgprot_val(prot) &= __default_kernel_pte_mask;
96
91 return (void __force __iomem *) kmap_atomic_prot_pfn(pfn, prot); 97 return (void __force __iomem *) kmap_atomic_prot_pfn(pfn, prot);
92} 98}
93EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn); 99EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn);
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index e2db83bebc3b..c63a545ec199 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -816,6 +816,9 @@ void __init __early_set_fixmap(enum fixed_addresses idx,
816 } 816 }
817 pte = early_ioremap_pte(addr); 817 pte = early_ioremap_pte(addr);
818 818
819 /* Sanitize 'prot' against any unsupported bits: */
820 pgprot_val(flags) &= __default_kernel_pte_mask;
821
819 if (pgprot_val(flags)) 822 if (pgprot_val(flags))
820 set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); 823 set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
821 else 824 else
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index d8ff013ea9d0..980dbebd0ca7 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -269,6 +269,12 @@ void __init kasan_early_init(void)
269 pudval_t pud_val = __pa_nodebug(kasan_zero_pmd) | _KERNPG_TABLE; 269 pudval_t pud_val = __pa_nodebug(kasan_zero_pmd) | _KERNPG_TABLE;
270 p4dval_t p4d_val = __pa_nodebug(kasan_zero_pud) | _KERNPG_TABLE; 270 p4dval_t p4d_val = __pa_nodebug(kasan_zero_pud) | _KERNPG_TABLE;
271 271
272 /* Mask out unsupported __PAGE_KERNEL bits: */
273 pte_val &= __default_kernel_pte_mask;
274 pmd_val &= __default_kernel_pte_mask;
275 pud_val &= __default_kernel_pte_mask;
276 p4d_val &= __default_kernel_pte_mask;
277
272 for (i = 0; i < PTRS_PER_PTE; i++) 278 for (i = 0; i < PTRS_PER_PTE; i++)
273 kasan_zero_pte[i] = __pte(pte_val); 279 kasan_zero_pte[i] = __pte(pte_val);
274 280
@@ -371,7 +377,13 @@ void __init kasan_init(void)
371 */ 377 */
372 memset(kasan_zero_page, 0, PAGE_SIZE); 378 memset(kasan_zero_page, 0, PAGE_SIZE);
373 for (i = 0; i < PTRS_PER_PTE; i++) { 379 for (i = 0; i < PTRS_PER_PTE; i++) {
374 pte_t pte = __pte(__pa(kasan_zero_page) | __PAGE_KERNEL_RO | _PAGE_ENC); 380 pte_t pte;
381 pgprot_t prot;
382
383 prot = __pgprot(__PAGE_KERNEL_RO | _PAGE_ENC);
384 pgprot_val(prot) &= __default_kernel_pte_mask;
385
386 pte = __pte(__pa(kasan_zero_page) | pgprot_val(prot));
375 set_pte(&kasan_zero_pte[i], pte); 387 set_pte(&kasan_zero_pte[i], pte);
376 } 388 }
377 /* Flush TLBs again to be sure that write protection applied. */ 389 /* Flush TLBs again to be sure that write protection applied. */
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 85cf12219dea..0f3d50f4c48c 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -298,9 +298,11 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
298 298
299 /* 299 /*
300 * The .rodata section needs to be read-only. Using the pfn 300 * The .rodata section needs to be read-only. Using the pfn
301 * catches all aliases. 301 * catches all aliases. This also includes __ro_after_init,
302 * so do not enforce until kernel_set_to_readonly is true.
302 */ 303 */
303 if (within(pfn, __pa_symbol(__start_rodata) >> PAGE_SHIFT, 304 if (kernel_set_to_readonly &&
305 within(pfn, __pa_symbol(__start_rodata) >> PAGE_SHIFT,
304 __pa_symbol(__end_rodata) >> PAGE_SHIFT)) 306 __pa_symbol(__end_rodata) >> PAGE_SHIFT))
305 pgprot_val(forbidden) |= _PAGE_RW; 307 pgprot_val(forbidden) |= _PAGE_RW;
306 308
@@ -512,6 +514,23 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
512#endif 514#endif
513} 515}
514 516
517static pgprot_t pgprot_clear_protnone_bits(pgprot_t prot)
518{
519 /*
520 * _PAGE_GLOBAL means "global page" for present PTEs.
521 * But, it is also used to indicate _PAGE_PROTNONE
522 * for non-present PTEs.
523 *
524 * This ensures that a _PAGE_GLOBAL PTE going from
525 * present to non-present is not confused as
526 * _PAGE_PROTNONE.
527 */
528 if (!(pgprot_val(prot) & _PAGE_PRESENT))
529 pgprot_val(prot) &= ~_PAGE_GLOBAL;
530
531 return prot;
532}
533
515static int 534static int
516try_preserve_large_page(pte_t *kpte, unsigned long address, 535try_preserve_large_page(pte_t *kpte, unsigned long address,
517 struct cpa_data *cpa) 536 struct cpa_data *cpa)
@@ -566,6 +585,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
566 * up accordingly. 585 * up accordingly.
567 */ 586 */
568 old_pte = *kpte; 587 old_pte = *kpte;
588 /* Clear PSE (aka _PAGE_PAT) and move PAT bit to correct position */
569 req_prot = pgprot_large_2_4k(old_prot); 589 req_prot = pgprot_large_2_4k(old_prot);
570 590
571 pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr); 591 pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr);
@@ -577,19 +597,9 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
577 * different bit positions in the two formats. 597 * different bit positions in the two formats.
578 */ 598 */
579 req_prot = pgprot_4k_2_large(req_prot); 599 req_prot = pgprot_4k_2_large(req_prot);
580 600 req_prot = pgprot_clear_protnone_bits(req_prot);
581 /*
582 * Set the PSE and GLOBAL flags only if the PRESENT flag is
583 * set otherwise pmd_present/pmd_huge will return true even on
584 * a non present pmd. The canon_pgprot will clear _PAGE_GLOBAL
585 * for the ancient hardware that doesn't support it.
586 */
587 if (pgprot_val(req_prot) & _PAGE_PRESENT) 601 if (pgprot_val(req_prot) & _PAGE_PRESENT)
588 pgprot_val(req_prot) |= _PAGE_PSE | _PAGE_GLOBAL; 602 pgprot_val(req_prot) |= _PAGE_PSE;
589 else
590 pgprot_val(req_prot) &= ~(_PAGE_PSE | _PAGE_GLOBAL);
591
592 req_prot = canon_pgprot(req_prot);
593 603
594 /* 604 /*
595 * old_pfn points to the large page base pfn. So we need 605 * old_pfn points to the large page base pfn. So we need
@@ -674,8 +684,12 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
674 switch (level) { 684 switch (level) {
675 case PG_LEVEL_2M: 685 case PG_LEVEL_2M:
676 ref_prot = pmd_pgprot(*(pmd_t *)kpte); 686 ref_prot = pmd_pgprot(*(pmd_t *)kpte);
677 /* clear PSE and promote PAT bit to correct position */ 687 /*
688 * Clear PSE (aka _PAGE_PAT) and move
689 * PAT bit to correct position.
690 */
678 ref_prot = pgprot_large_2_4k(ref_prot); 691 ref_prot = pgprot_large_2_4k(ref_prot);
692
679 ref_pfn = pmd_pfn(*(pmd_t *)kpte); 693 ref_pfn = pmd_pfn(*(pmd_t *)kpte);
680 break; 694 break;
681 695
@@ -698,23 +712,14 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
698 return 1; 712 return 1;
699 } 713 }
700 714
701 /* 715 ref_prot = pgprot_clear_protnone_bits(ref_prot);
702 * Set the GLOBAL flags only if the PRESENT flag is set
703 * otherwise pmd/pte_present will return true even on a non
704 * present pmd/pte. The canon_pgprot will clear _PAGE_GLOBAL
705 * for the ancient hardware that doesn't support it.
706 */
707 if (pgprot_val(ref_prot) & _PAGE_PRESENT)
708 pgprot_val(ref_prot) |= _PAGE_GLOBAL;
709 else
710 pgprot_val(ref_prot) &= ~_PAGE_GLOBAL;
711 716
712 /* 717 /*
713 * Get the target pfn from the original entry: 718 * Get the target pfn from the original entry:
714 */ 719 */
715 pfn = ref_pfn; 720 pfn = ref_pfn;
716 for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc) 721 for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc)
717 set_pte(&pbase[i], pfn_pte(pfn, canon_pgprot(ref_prot))); 722 set_pte(&pbase[i], pfn_pte(pfn, ref_prot));
718 723
719 if (virt_addr_valid(address)) { 724 if (virt_addr_valid(address)) {
720 unsigned long pfn = PFN_DOWN(__pa(address)); 725 unsigned long pfn = PFN_DOWN(__pa(address));
@@ -930,19 +935,7 @@ static void populate_pte(struct cpa_data *cpa,
930 935
931 pte = pte_offset_kernel(pmd, start); 936 pte = pte_offset_kernel(pmd, start);
932 937
933 /* 938 pgprot = pgprot_clear_protnone_bits(pgprot);
934 * Set the GLOBAL flags only if the PRESENT flag is
935 * set otherwise pte_present will return true even on
936 * a non present pte. The canon_pgprot will clear
937 * _PAGE_GLOBAL for the ancient hardware that doesn't
938 * support it.
939 */
940 if (pgprot_val(pgprot) & _PAGE_PRESENT)
941 pgprot_val(pgprot) |= _PAGE_GLOBAL;
942 else
943 pgprot_val(pgprot) &= ~_PAGE_GLOBAL;
944
945 pgprot = canon_pgprot(pgprot);
946 939
947 while (num_pages-- && start < end) { 940 while (num_pages-- && start < end) {
948 set_pte(pte, pfn_pte(cpa->pfn, pgprot)); 941 set_pte(pte, pfn_pte(cpa->pfn, pgprot));
@@ -1234,24 +1227,14 @@ repeat:
1234 1227
1235 new_prot = static_protections(new_prot, address, pfn); 1228 new_prot = static_protections(new_prot, address, pfn);
1236 1229
1237 /* 1230 new_prot = pgprot_clear_protnone_bits(new_prot);
1238 * Set the GLOBAL flags only if the PRESENT flag is
1239 * set otherwise pte_present will return true even on
1240 * a non present pte. The canon_pgprot will clear
1241 * _PAGE_GLOBAL for the ancient hardware that doesn't
1242 * support it.
1243 */
1244 if (pgprot_val(new_prot) & _PAGE_PRESENT)
1245 pgprot_val(new_prot) |= _PAGE_GLOBAL;
1246 else
1247 pgprot_val(new_prot) &= ~_PAGE_GLOBAL;
1248 1231
1249 /* 1232 /*
1250 * We need to keep the pfn from the existing PTE, 1233 * We need to keep the pfn from the existing PTE,
1251 * after all we're only going to change it's attributes 1234 * after all we're only going to change it's attributes
1252 * not the memory it points to 1235 * not the memory it points to
1253 */ 1236 */
1254 new_pte = pfn_pte(pfn, canon_pgprot(new_prot)); 1237 new_pte = pfn_pte(pfn, new_prot);
1255 cpa->pfn = pfn; 1238 cpa->pfn = pfn;
1256 /* 1239 /*
1257 * Do we really change anything ? 1240 * Do we really change anything ?
@@ -1428,11 +1411,11 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
1428 memset(&cpa, 0, sizeof(cpa)); 1411 memset(&cpa, 0, sizeof(cpa));
1429 1412
1430 /* 1413 /*
1431 * Check, if we are requested to change a not supported 1414 * Check, if we are requested to set a not supported
1432 * feature: 1415 * feature. Clearing non-supported features is OK.
1433 */ 1416 */
1434 mask_set = canon_pgprot(mask_set); 1417 mask_set = canon_pgprot(mask_set);
1435 mask_clr = canon_pgprot(mask_clr); 1418
1436 if (!pgprot_val(mask_set) && !pgprot_val(mask_clr) && !force_split) 1419 if (!pgprot_val(mask_set) && !pgprot_val(mask_clr) && !force_split)
1437 return 0; 1420 return 0;
1438 1421
@@ -1775,6 +1758,12 @@ int set_memory_4k(unsigned long addr, int numpages)
1775 __pgprot(0), 1, 0, NULL); 1758 __pgprot(0), 1, 0, NULL);
1776} 1759}
1777 1760
1761int set_memory_nonglobal(unsigned long addr, int numpages)
1762{
1763 return change_page_attr_clear(&addr, numpages,
1764 __pgprot(_PAGE_GLOBAL), 0);
1765}
1766
1778static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc) 1767static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
1779{ 1768{
1780 struct cpa_data cpa; 1769 struct cpa_data cpa;
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 34cda7e0551b..ffc8c13c50e4 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -1,6 +1,7 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2#include <linux/mm.h> 2#include <linux/mm.h>
3#include <linux/gfp.h> 3#include <linux/gfp.h>
4#include <linux/hugetlb.h>
4#include <asm/pgalloc.h> 5#include <asm/pgalloc.h>
5#include <asm/pgtable.h> 6#include <asm/pgtable.h>
6#include <asm/tlb.h> 7#include <asm/tlb.h>
@@ -583,6 +584,9 @@ void __native_set_fixmap(enum fixed_addresses idx, pte_t pte)
583void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys, 584void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys,
584 pgprot_t flags) 585 pgprot_t flags)
585{ 586{
587 /* Sanitize 'prot' against any unsupported bits: */
588 pgprot_val(flags) &= __default_kernel_pte_mask;
589
586 __native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags)); 590 __native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags));
587} 591}
588 592
@@ -636,6 +640,10 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
636 (mtrr != MTRR_TYPE_WRBACK)) 640 (mtrr != MTRR_TYPE_WRBACK))
637 return 0; 641 return 0;
638 642
643 /* Bail out if we are we on a populated non-leaf entry: */
644 if (pud_present(*pud) && !pud_huge(*pud))
645 return 0;
646
639 prot = pgprot_4k_2_large(prot); 647 prot = pgprot_4k_2_large(prot);
640 648
641 set_pte((pte_t *)pud, pfn_pte( 649 set_pte((pte_t *)pud, pfn_pte(
@@ -664,6 +672,10 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
664 return 0; 672 return 0;
665 } 673 }
666 674
675 /* Bail out if we are we on a populated non-leaf entry: */
676 if (pmd_present(*pmd) && !pmd_huge(*pmd))
677 return 0;
678
667 prot = pgprot_4k_2_large(prot); 679 prot = pgprot_4k_2_large(prot);
668 680
669 set_pte((pte_t *)pmd, pfn_pte( 681 set_pte((pte_t *)pmd, pfn_pte(
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 631507f0c198..f1fd52f449e0 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -66,12 +66,22 @@ static void __init pti_print_if_secure(const char *reason)
66 pr_info("%s\n", reason); 66 pr_info("%s\n", reason);
67} 67}
68 68
69enum pti_mode {
70 PTI_AUTO = 0,
71 PTI_FORCE_OFF,
72 PTI_FORCE_ON
73} pti_mode;
74
69void __init pti_check_boottime_disable(void) 75void __init pti_check_boottime_disable(void)
70{ 76{
71 char arg[5]; 77 char arg[5];
72 int ret; 78 int ret;
73 79
80 /* Assume mode is auto unless overridden. */
81 pti_mode = PTI_AUTO;
82
74 if (hypervisor_is_type(X86_HYPER_XEN_PV)) { 83 if (hypervisor_is_type(X86_HYPER_XEN_PV)) {
84 pti_mode = PTI_FORCE_OFF;
75 pti_print_if_insecure("disabled on XEN PV."); 85 pti_print_if_insecure("disabled on XEN PV.");
76 return; 86 return;
77 } 87 }
@@ -79,18 +89,23 @@ void __init pti_check_boottime_disable(void)
79 ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg)); 89 ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg));
80 if (ret > 0) { 90 if (ret > 0) {
81 if (ret == 3 && !strncmp(arg, "off", 3)) { 91 if (ret == 3 && !strncmp(arg, "off", 3)) {
92 pti_mode = PTI_FORCE_OFF;
82 pti_print_if_insecure("disabled on command line."); 93 pti_print_if_insecure("disabled on command line.");
83 return; 94 return;
84 } 95 }
85 if (ret == 2 && !strncmp(arg, "on", 2)) { 96 if (ret == 2 && !strncmp(arg, "on", 2)) {
97 pti_mode = PTI_FORCE_ON;
86 pti_print_if_secure("force enabled on command line."); 98 pti_print_if_secure("force enabled on command line.");
87 goto enable; 99 goto enable;
88 } 100 }
89 if (ret == 4 && !strncmp(arg, "auto", 4)) 101 if (ret == 4 && !strncmp(arg, "auto", 4)) {
102 pti_mode = PTI_AUTO;
90 goto autosel; 103 goto autosel;
104 }
91 } 105 }
92 106
93 if (cmdline_find_option_bool(boot_command_line, "nopti")) { 107 if (cmdline_find_option_bool(boot_command_line, "nopti")) {
108 pti_mode = PTI_FORCE_OFF;
94 pti_print_if_insecure("disabled on command line."); 109 pti_print_if_insecure("disabled on command line.");
95 return; 110 return;
96 } 111 }
@@ -149,7 +164,7 @@ pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
149 * 164 *
150 * Returns a pointer to a P4D on success, or NULL on failure. 165 * Returns a pointer to a P4D on success, or NULL on failure.
151 */ 166 */
152static __init p4d_t *pti_user_pagetable_walk_p4d(unsigned long address) 167static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
153{ 168{
154 pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address)); 169 pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address));
155 gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); 170 gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
@@ -177,7 +192,7 @@ static __init p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
177 * 192 *
178 * Returns a pointer to a PMD on success, or NULL on failure. 193 * Returns a pointer to a PMD on success, or NULL on failure.
179 */ 194 */
180static __init pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) 195static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
181{ 196{
182 gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); 197 gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
183 p4d_t *p4d = pti_user_pagetable_walk_p4d(address); 198 p4d_t *p4d = pti_user_pagetable_walk_p4d(address);
@@ -267,7 +282,7 @@ static void __init pti_setup_vsyscall(void)
267static void __init pti_setup_vsyscall(void) { } 282static void __init pti_setup_vsyscall(void) { }
268#endif 283#endif
269 284
270static void __init 285static void
271pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear) 286pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear)
272{ 287{
273 unsigned long addr; 288 unsigned long addr;
@@ -300,6 +315,27 @@ pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear)
300 return; 315 return;
301 316
302 /* 317 /*
318 * Only clone present PMDs. This ensures only setting
319 * _PAGE_GLOBAL on present PMDs. This should only be
320 * called on well-known addresses anyway, so a non-
321 * present PMD would be a surprise.
322 */
323 if (WARN_ON(!(pmd_flags(*pmd) & _PAGE_PRESENT)))
324 return;
325
326 /*
327 * Setting 'target_pmd' below creates a mapping in both
328 * the user and kernel page tables. It is effectively
329 * global, so set it as global in both copies. Note:
330 * the X86_FEATURE_PGE check is not _required_ because
331 * the CPU ignores _PAGE_GLOBAL when PGE is not
332 * supported. The check keeps consistentency with
333 * code that only set this bit when supported.
334 */
335 if (boot_cpu_has(X86_FEATURE_PGE))
336 *pmd = pmd_set_flags(*pmd, _PAGE_GLOBAL);
337
338 /*
303 * Copy the PMD. That is, the kernelmode and usermode 339 * Copy the PMD. That is, the kernelmode and usermode
304 * tables will share the last-level page tables of this 340 * tables will share the last-level page tables of this
305 * address range 341 * address range
@@ -348,7 +384,83 @@ static void __init pti_clone_entry_text(void)
348{ 384{
349 pti_clone_pmds((unsigned long) __entry_text_start, 385 pti_clone_pmds((unsigned long) __entry_text_start,
350 (unsigned long) __irqentry_text_end, 386 (unsigned long) __irqentry_text_end,
351 _PAGE_RW | _PAGE_GLOBAL); 387 _PAGE_RW);
388}
389
390/*
391 * Global pages and PCIDs are both ways to make kernel TLB entries
392 * live longer, reduce TLB misses and improve kernel performance.
393 * But, leaving all kernel text Global makes it potentially accessible
394 * to Meltdown-style attacks which make it trivial to find gadgets or
395 * defeat KASLR.
396 *
397 * Only use global pages when it is really worth it.
398 */
399static inline bool pti_kernel_image_global_ok(void)
400{
401 /*
402 * Systems with PCIDs get litlle benefit from global
403 * kernel text and are not worth the downsides.
404 */
405 if (cpu_feature_enabled(X86_FEATURE_PCID))
406 return false;
407
408 /*
409 * Only do global kernel image for pti=auto. Do the most
410 * secure thing (not global) if pti=on specified.
411 */
412 if (pti_mode != PTI_AUTO)
413 return false;
414
415 /*
416 * K8 may not tolerate the cleared _PAGE_RW on the userspace
417 * global kernel image pages. Do the safe thing (disable
418 * global kernel image). This is unlikely to ever be
419 * noticed because PTI is disabled by default on AMD CPUs.
420 */
421 if (boot_cpu_has(X86_FEATURE_K8))
422 return false;
423
424 return true;
425}
426
427/*
428 * For some configurations, map all of kernel text into the user page
429 * tables. This reduces TLB misses, especially on non-PCID systems.
430 */
431void pti_clone_kernel_text(void)
432{
433 unsigned long start = PFN_ALIGN(_text);
434 unsigned long end = ALIGN((unsigned long)_end, PMD_PAGE_SIZE);
435
436 if (!pti_kernel_image_global_ok())
437 return;
438
439 pti_clone_pmds(start, end, _PAGE_RW);
440}
441
442/*
443 * This is the only user for it and it is not arch-generic like
444 * the other set_memory.h functions. Just extern it.
445 */
446extern int set_memory_nonglobal(unsigned long addr, int numpages);
447void pti_set_kernel_image_nonglobal(void)
448{
449 /*
450 * The identity map is created with PMDs, regardless of the
451 * actual length of the kernel. We need to clear
452 * _PAGE_GLOBAL up to a PMD boundary, not just to the end
453 * of the image.
454 */
455 unsigned long start = PFN_ALIGN(_text);
456 unsigned long end = ALIGN((unsigned long)_end, PMD_PAGE_SIZE);
457
458 if (pti_kernel_image_global_ok())
459 return;
460
461 pr_debug("set kernel image non-global\n");
462
463 set_memory_nonglobal(start, (end - start) >> PAGE_SHIFT);
352} 464}
353 465
354/* 466/*
@@ -362,6 +474,10 @@ void __init pti_init(void)
362 pr_info("enabled\n"); 474 pr_info("enabled\n");
363 475
364 pti_clone_user_shared(); 476 pti_clone_user_shared();
477
478 /* Undo all global bits from the init pagetables in head_64.S: */
479 pti_set_kernel_image_nonglobal();
480 /* Replace some of the global bits just for shared entry text: */
365 pti_clone_entry_text(); 481 pti_clone_entry_text();
366 pti_setup_espfix64(); 482 pti_setup_espfix64();
367 pti_setup_vsyscall(); 483 pti_setup_vsyscall();
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index 74a532989308..48b14b534897 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -51,6 +51,12 @@ static int set_up_temporary_text_mapping(pgd_t *pgd)
51 pmd_t *pmd; 51 pmd_t *pmd;
52 pud_t *pud; 52 pud_t *pud;
53 p4d_t *p4d = NULL; 53 p4d_t *p4d = NULL;
54 pgprot_t pgtable_prot = __pgprot(_KERNPG_TABLE);
55 pgprot_t pmd_text_prot = __pgprot(__PAGE_KERNEL_LARGE_EXEC);
56
57 /* Filter out unsupported __PAGE_KERNEL* bits: */
58 pgprot_val(pmd_text_prot) &= __default_kernel_pte_mask;
59 pgprot_val(pgtable_prot) &= __default_kernel_pte_mask;
54 60
55 /* 61 /*
56 * The new mapping only has to cover the page containing the image 62 * The new mapping only has to cover the page containing the image
@@ -81,15 +87,19 @@ static int set_up_temporary_text_mapping(pgd_t *pgd)
81 return -ENOMEM; 87 return -ENOMEM;
82 88
83 set_pmd(pmd + pmd_index(restore_jump_address), 89 set_pmd(pmd + pmd_index(restore_jump_address),
84 __pmd((jump_address_phys & PMD_MASK) | __PAGE_KERNEL_LARGE_EXEC)); 90 __pmd((jump_address_phys & PMD_MASK) | pgprot_val(pmd_text_prot)));
85 set_pud(pud + pud_index(restore_jump_address), 91 set_pud(pud + pud_index(restore_jump_address),
86 __pud(__pa(pmd) | _KERNPG_TABLE)); 92 __pud(__pa(pmd) | pgprot_val(pgtable_prot)));
87 if (p4d) { 93 if (p4d) {
88 set_p4d(p4d + p4d_index(restore_jump_address), __p4d(__pa(pud) | _KERNPG_TABLE)); 94 p4d_t new_p4d = __p4d(__pa(pud) | pgprot_val(pgtable_prot));
89 set_pgd(pgd + pgd_index(restore_jump_address), __pgd(__pa(p4d) | _KERNPG_TABLE)); 95 pgd_t new_pgd = __pgd(__pa(p4d) | pgprot_val(pgtable_prot));
96
97 set_p4d(p4d + p4d_index(restore_jump_address), new_p4d);
98 set_pgd(pgd + pgd_index(restore_jump_address), new_pgd);
90 } else { 99 } else {
91 /* No p4d for 4-level paging: point the pgd to the pud page table */ 100 /* No p4d for 4-level paging: point the pgd to the pud page table */
92 set_pgd(pgd + pgd_index(restore_jump_address), __pgd(__pa(pud) | _KERNPG_TABLE)); 101 pgd_t new_pgd = __pgd(__pa(p4d) | pgprot_val(pgtable_prot));
102 set_pgd(pgd + pgd_index(restore_jump_address), new_pgd);
93 } 103 }
94 104
95 return 0; 105 return 0;