aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-04-29 12:36:22 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-04-29 12:36:22 -0400
commit65f4d6d0f80b3c55830ec5735194703fa2909ba1 (patch)
tree1a7ea2b9ccad8a38364e888bb927564329059e0e
parent810fb07a9b504ac22b95899cf8b39d25a5f3e5c5 (diff)
parent8bb2610bc4967f19672444a7b0407367f1540028 (diff)
Merge branch 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 pti fixes from Thomas Gleixner: "A set of updates for the x86/pti related code: - Preserve r8-r11 in int $0x80. r8-r11 need to be preserved, but the int$80 entry code removed that quite some time ago. Make it correct again. - A set of fixes for the Global Bit work which went into 4.17 and caused a bunch of interesting regressions: - Triggering a BUG in the page attribute code due to a missing check for early boot stage - Warnings in the page attribute code about holes in the kernel text mapping which are caused by the freeing of the init code. Handle such holes gracefully. - Reduce the amount of kernel memory which is set global to the actual text and do not incidentally overlap with data. - Disable the global bit when RANDSTRUCT is enabled as it partially defeats the hardening. - Make the page protection setup correct for vma->page_prot population again. The adjustment of the protections fell through the crack during the Global bit rework and triggers warnings on machines which do not support certain features, e.g. NX" * 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/entry/64/compat: Preserve r8-r11 in int $0x80 x86/pti: Filter at vma->vm_page_prot population x86/pti: Disallow global kernel text with RANDSTRUCT x86/pti: Reduce amount of kernel text allowed to be Global x86/pti: Fix boot warning from Global-bit setting x86/pti: Fix boot problems from Global-bit setting
-rw-r--r--arch/x86/Kconfig4
-rw-r--r--arch/x86/entry/entry_64_compat.S8
-rw-r--r--arch/x86/include/asm/pgtable.h5
-rw-r--r--arch/x86/mm/pageattr.c44
-rw-r--r--arch/x86/mm/pti.c26
-rw-r--r--mm/mmap.c11
-rw-r--r--tools/testing/selftests/x86/test_syscall_vdso.c35
7 files changed, 99 insertions, 34 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 00fcf81f2c56..c07f492b871a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -52,6 +52,7 @@ config X86
52 select ARCH_HAS_DEVMEM_IS_ALLOWED 52 select ARCH_HAS_DEVMEM_IS_ALLOWED
53 select ARCH_HAS_ELF_RANDOMIZE 53 select ARCH_HAS_ELF_RANDOMIZE
54 select ARCH_HAS_FAST_MULTIPLIER 54 select ARCH_HAS_FAST_MULTIPLIER
55 select ARCH_HAS_FILTER_PGPROT
55 select ARCH_HAS_FORTIFY_SOURCE 56 select ARCH_HAS_FORTIFY_SOURCE
56 select ARCH_HAS_GCOV_PROFILE_ALL 57 select ARCH_HAS_GCOV_PROFILE_ALL
57 select ARCH_HAS_KCOV if X86_64 58 select ARCH_HAS_KCOV if X86_64
@@ -273,6 +274,9 @@ config ARCH_HAS_CPU_RELAX
273config ARCH_HAS_CACHE_LINE_SIZE 274config ARCH_HAS_CACHE_LINE_SIZE
274 def_bool y 275 def_bool y
275 276
277config ARCH_HAS_FILTER_PGPROT
278 def_bool y
279
276config HAVE_SETUP_PER_CPU_AREA 280config HAVE_SETUP_PER_CPU_AREA
277 def_bool y 281 def_bool y
278 282
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 9af927e59d49..9de7f1e1dede 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -84,13 +84,13 @@ ENTRY(entry_SYSENTER_compat)
84 pushq %rdx /* pt_regs->dx */ 84 pushq %rdx /* pt_regs->dx */
85 pushq %rcx /* pt_regs->cx */ 85 pushq %rcx /* pt_regs->cx */
86 pushq $-ENOSYS /* pt_regs->ax */ 86 pushq $-ENOSYS /* pt_regs->ax */
87 pushq $0 /* pt_regs->r8 = 0 */ 87 pushq %r8 /* pt_regs->r8 */
88 xorl %r8d, %r8d /* nospec r8 */ 88 xorl %r8d, %r8d /* nospec r8 */
89 pushq $0 /* pt_regs->r9 = 0 */ 89 pushq %r9 /* pt_regs->r9 */
90 xorl %r9d, %r9d /* nospec r9 */ 90 xorl %r9d, %r9d /* nospec r9 */
91 pushq $0 /* pt_regs->r10 = 0 */ 91 pushq %r10 /* pt_regs->r10 */
92 xorl %r10d, %r10d /* nospec r10 */ 92 xorl %r10d, %r10d /* nospec r10 */
93 pushq $0 /* pt_regs->r11 = 0 */ 93 pushq %r11 /* pt_regs->r11 */
94 xorl %r11d, %r11d /* nospec r11 */ 94 xorl %r11d, %r11d /* nospec r11 */
95 pushq %rbx /* pt_regs->rbx */ 95 pushq %rbx /* pt_regs->rbx */
96 xorl %ebx, %ebx /* nospec rbx */ 96 xorl %ebx, %ebx /* nospec rbx */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 5f49b4ff0c24..f1633de5a675 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -601,6 +601,11 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
601 601
602#define canon_pgprot(p) __pgprot(massage_pgprot(p)) 602#define canon_pgprot(p) __pgprot(massage_pgprot(p))
603 603
604static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
605{
606 return canon_pgprot(prot);
607}
608
604static inline int is_new_memtype_allowed(u64 paddr, unsigned long size, 609static inline int is_new_memtype_allowed(u64 paddr, unsigned long size,
605 enum page_cache_mode pcm, 610 enum page_cache_mode pcm,
606 enum page_cache_mode new_pcm) 611 enum page_cache_mode new_pcm)
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 0f3d50f4c48c..3bded76e8d5c 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -93,6 +93,18 @@ void arch_report_meminfo(struct seq_file *m)
93static inline void split_page_count(int level) { } 93static inline void split_page_count(int level) { }
94#endif 94#endif
95 95
96static inline int
97within(unsigned long addr, unsigned long start, unsigned long end)
98{
99 return addr >= start && addr < end;
100}
101
102static inline int
103within_inclusive(unsigned long addr, unsigned long start, unsigned long end)
104{
105 return addr >= start && addr <= end;
106}
107
96#ifdef CONFIG_X86_64 108#ifdef CONFIG_X86_64
97 109
98static inline unsigned long highmap_start_pfn(void) 110static inline unsigned long highmap_start_pfn(void)
@@ -106,20 +118,25 @@ static inline unsigned long highmap_end_pfn(void)
106 return __pa_symbol(roundup(_brk_end, PMD_SIZE) - 1) >> PAGE_SHIFT; 118 return __pa_symbol(roundup(_brk_end, PMD_SIZE) - 1) >> PAGE_SHIFT;
107} 119}
108 120
109#endif 121static bool __cpa_pfn_in_highmap(unsigned long pfn)
110
111static inline int
112within(unsigned long addr, unsigned long start, unsigned long end)
113{ 122{
114 return addr >= start && addr < end; 123 /*
124 * Kernel text has an alias mapping at a high address, known
125 * here as "highmap".
126 */
127 return within_inclusive(pfn, highmap_start_pfn(), highmap_end_pfn());
115} 128}
116 129
117static inline int 130#else
118within_inclusive(unsigned long addr, unsigned long start, unsigned long end) 131
132static bool __cpa_pfn_in_highmap(unsigned long pfn)
119{ 133{
120 return addr >= start && addr <= end; 134 /* There is no highmap on 32-bit */
135 return false;
121} 136}
122 137
138#endif
139
123/* 140/*
124 * Flushing functions 141 * Flushing functions
125 */ 142 */
@@ -172,7 +189,7 @@ static void __cpa_flush_all(void *arg)
172 189
173static void cpa_flush_all(unsigned long cache) 190static void cpa_flush_all(unsigned long cache)
174{ 191{
175 BUG_ON(irqs_disabled()); 192 BUG_ON(irqs_disabled() && !early_boot_irqs_disabled);
176 193
177 on_each_cpu(__cpa_flush_all, (void *) cache, 1); 194 on_each_cpu(__cpa_flush_all, (void *) cache, 1);
178} 195}
@@ -236,7 +253,7 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache,
236 unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */ 253 unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */
237#endif 254#endif
238 255
239 BUG_ON(irqs_disabled()); 256 BUG_ON(irqs_disabled() && !early_boot_irqs_disabled);
240 257
241 on_each_cpu(__cpa_flush_all, (void *) do_wbinvd, 1); 258 on_each_cpu(__cpa_flush_all, (void *) do_wbinvd, 1);
242 259
@@ -1183,6 +1200,10 @@ static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr,
1183 cpa->numpages = 1; 1200 cpa->numpages = 1;
1184 cpa->pfn = __pa(vaddr) >> PAGE_SHIFT; 1201 cpa->pfn = __pa(vaddr) >> PAGE_SHIFT;
1185 return 0; 1202 return 0;
1203
1204 } else if (__cpa_pfn_in_highmap(cpa->pfn)) {
1205 /* Faults in the highmap are OK, so do not warn: */
1206 return -EFAULT;
1186 } else { 1207 } else {
1187 WARN(1, KERN_WARNING "CPA: called for zero pte. " 1208 WARN(1, KERN_WARNING "CPA: called for zero pte. "
1188 "vaddr = %lx cpa->vaddr = %lx\n", vaddr, 1209 "vaddr = %lx cpa->vaddr = %lx\n", vaddr,
@@ -1335,8 +1356,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
1335 * to touch the high mapped kernel as well: 1356 * to touch the high mapped kernel as well:
1336 */ 1357 */
1337 if (!within(vaddr, (unsigned long)_text, _brk_end) && 1358 if (!within(vaddr, (unsigned long)_text, _brk_end) &&
1338 within_inclusive(cpa->pfn, highmap_start_pfn(), 1359 __cpa_pfn_in_highmap(cpa->pfn)) {
1339 highmap_end_pfn())) {
1340 unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + 1360 unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) +
1341 __START_KERNEL_map - phys_base; 1361 __START_KERNEL_map - phys_base;
1342 alias_cpa = *cpa; 1362 alias_cpa = *cpa;
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index f1fd52f449e0..4d418e705878 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -421,6 +421,16 @@ static inline bool pti_kernel_image_global_ok(void)
421 if (boot_cpu_has(X86_FEATURE_K8)) 421 if (boot_cpu_has(X86_FEATURE_K8))
422 return false; 422 return false;
423 423
424 /*
425 * RANDSTRUCT derives its hardening benefits from the
426 * attacker's lack of knowledge about the layout of kernel
427 * data structures. Keep the kernel image non-global in
428 * cases where RANDSTRUCT is in use to help keep the layout a
429 * secret.
430 */
431 if (IS_ENABLED(CONFIG_GCC_PLUGIN_RANDSTRUCT))
432 return false;
433
424 return true; 434 return true;
425} 435}
426 436
@@ -430,12 +440,24 @@ static inline bool pti_kernel_image_global_ok(void)
430 */ 440 */
431void pti_clone_kernel_text(void) 441void pti_clone_kernel_text(void)
432{ 442{
443 /*
444 * rodata is part of the kernel image and is normally
445 * readable on the filesystem or on the web. But, do not
446 * clone the areas past rodata, they might contain secrets.
447 */
433 unsigned long start = PFN_ALIGN(_text); 448 unsigned long start = PFN_ALIGN(_text);
434 unsigned long end = ALIGN((unsigned long)_end, PMD_PAGE_SIZE); 449 unsigned long end = (unsigned long)__end_rodata_hpage_align;
435 450
436 if (!pti_kernel_image_global_ok()) 451 if (!pti_kernel_image_global_ok())
437 return; 452 return;
438 453
454 pr_debug("mapping partial kernel image into user address space\n");
455
456 /*
457 * Note that this will undo _some_ of the work that
458 * pti_set_kernel_image_nonglobal() did to clear the
459 * global bit.
460 */
439 pti_clone_pmds(start, end, _PAGE_RW); 461 pti_clone_pmds(start, end, _PAGE_RW);
440} 462}
441 463
@@ -458,8 +480,6 @@ void pti_set_kernel_image_nonglobal(void)
458 if (pti_kernel_image_global_ok()) 480 if (pti_kernel_image_global_ok())
459 return; 481 return;
460 482
461 pr_debug("set kernel image non-global\n");
462
463 set_memory_nonglobal(start, (end - start) >> PAGE_SHIFT); 483 set_memory_nonglobal(start, (end - start) >> PAGE_SHIFT);
464} 484}
465 485
diff --git a/mm/mmap.c b/mm/mmap.c
index 188f195883b9..9d5968d1e8e3 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -100,11 +100,20 @@ pgprot_t protection_map[16] __ro_after_init = {
100 __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111 100 __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
101}; 101};
102 102
103#ifndef CONFIG_ARCH_HAS_FILTER_PGPROT
104static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
105{
106 return prot;
107}
108#endif
109
103pgprot_t vm_get_page_prot(unsigned long vm_flags) 110pgprot_t vm_get_page_prot(unsigned long vm_flags)
104{ 111{
105 return __pgprot(pgprot_val(protection_map[vm_flags & 112 pgprot_t ret = __pgprot(pgprot_val(protection_map[vm_flags &
106 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) | 113 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
107 pgprot_val(arch_vm_get_page_prot(vm_flags))); 114 pgprot_val(arch_vm_get_page_prot(vm_flags)));
115
116 return arch_filter_pgprot(ret);
108} 117}
109EXPORT_SYMBOL(vm_get_page_prot); 118EXPORT_SYMBOL(vm_get_page_prot);
110 119
diff --git a/tools/testing/selftests/x86/test_syscall_vdso.c b/tools/testing/selftests/x86/test_syscall_vdso.c
index 40370354d4c1..c9c3281077bc 100644
--- a/tools/testing/selftests/x86/test_syscall_vdso.c
+++ b/tools/testing/selftests/x86/test_syscall_vdso.c
@@ -100,12 +100,19 @@ asm (
100 " shl $32, %r8\n" 100 " shl $32, %r8\n"
101 " orq $0x7f7f7f7f, %r8\n" 101 " orq $0x7f7f7f7f, %r8\n"
102 " movq %r8, %r9\n" 102 " movq %r8, %r9\n"
103 " movq %r8, %r10\n" 103 " incq %r9\n"
104 " movq %r8, %r11\n" 104 " movq %r9, %r10\n"
105 " movq %r8, %r12\n" 105 " incq %r10\n"
106 " movq %r8, %r13\n" 106 " movq %r10, %r11\n"
107 " movq %r8, %r14\n" 107 " incq %r11\n"
108 " movq %r8, %r15\n" 108 " movq %r11, %r12\n"
109 " incq %r12\n"
110 " movq %r12, %r13\n"
111 " incq %r13\n"
112 " movq %r13, %r14\n"
113 " incq %r14\n"
114 " movq %r14, %r15\n"
115 " incq %r15\n"
109 " ret\n" 116 " ret\n"
110 " .code32\n" 117 " .code32\n"
111 " .popsection\n" 118 " .popsection\n"
@@ -128,12 +135,13 @@ int check_regs64(void)
128 int err = 0; 135 int err = 0;
129 int num = 8; 136 int num = 8;
130 uint64_t *r64 = &regs64.r8; 137 uint64_t *r64 = &regs64.r8;
138 uint64_t expected = 0x7f7f7f7f7f7f7f7fULL;
131 139
132 if (!kernel_is_64bit) 140 if (!kernel_is_64bit)
133 return 0; 141 return 0;
134 142
135 do { 143 do {
136 if (*r64 == 0x7f7f7f7f7f7f7f7fULL) 144 if (*r64 == expected++)
137 continue; /* register did not change */ 145 continue; /* register did not change */
138 if (syscall_addr != (long)&int80) { 146 if (syscall_addr != (long)&int80) {
139 /* 147 /*
@@ -147,18 +155,17 @@ int check_regs64(void)
147 continue; 155 continue;
148 } 156 }
149 } else { 157 } else {
150 /* INT80 syscall entrypoint can be used by 158 /*
159 * INT80 syscall entrypoint can be used by
151 * 64-bit programs too, unlike SYSCALL/SYSENTER. 160 * 64-bit programs too, unlike SYSCALL/SYSENTER.
152 * Therefore it must preserve R12+ 161 * Therefore it must preserve R12+
153 * (they are callee-saved registers in 64-bit C ABI). 162 * (they are callee-saved registers in 64-bit C ABI).
154 * 163 *
155 * This was probably historically not intended, 164 * Starting in Linux 4.17 (and any kernel that
156 * but R8..11 are clobbered (cleared to 0). 165 * backports the change), R8..11 are preserved.
157 * IOW: they are the only registers which aren't 166 * Historically (and probably unintentionally), they
158 * preserved across INT80 syscall. 167 * were clobbered or zeroed.
159 */ 168 */
160 if (*r64 == 0 && num <= 11)
161 continue;
162 } 169 }
163 printf("[FAIL]\tR%d has changed:%016llx\n", num, *r64); 170 printf("[FAIL]\tR%d has changed:%016llx\n", num, *r64);
164 err++; 171 err++;