diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-29 12:36:22 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-29 12:36:22 -0400 |
commit | 65f4d6d0f80b3c55830ec5735194703fa2909ba1 (patch) | |
tree | 1a7ea2b9ccad8a38364e888bb927564329059e0e | |
parent | 810fb07a9b504ac22b95899cf8b39d25a5f3e5c5 (diff) | |
parent | 8bb2610bc4967f19672444a7b0407367f1540028 (diff) |
Merge branch 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 pti fixes from Thomas Gleixner:
"A set of updates for the x86/pti related code:
- Preserve r8-r11 in int $0x80. r8-r11 need to be preserved, but the
int$80 entry code removed that quite some time ago. Make it correct
again.
- A set of fixes for the Global Bit work which went into 4.17 and
caused a bunch of interesting regressions:
- Triggering a BUG in the page attribute code due to a missing
check for early boot stage
- Warnings in the page attribute code about holes in the kernel
text mapping which are caused by the freeing of the init code.
Handle such holes gracefully.
- Reduce the amount of kernel memory which is set global to the
actual text and do not incidentally overlap with data.
- Disable the global bit when RANDSTRUCT is enabled as it
partially defeats the hardening.
- Make the page protection setup correct for vma->page_prot
population again. The adjustment of the protections fell through
the crack during the Global bit rework and triggers warnings on
machines which do not support certain features, e.g. NX"
* 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/entry/64/compat: Preserve r8-r11 in int $0x80
x86/pti: Filter at vma->vm_page_prot population
x86/pti: Disallow global kernel text with RANDSTRUCT
x86/pti: Reduce amount of kernel text allowed to be Global
x86/pti: Fix boot warning from Global-bit setting
x86/pti: Fix boot problems from Global-bit setting
-rw-r--r-- | arch/x86/Kconfig | 4 | ||||
-rw-r--r-- | arch/x86/entry/entry_64_compat.S | 8 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable.h | 5 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 44 | ||||
-rw-r--r-- | arch/x86/mm/pti.c | 26 | ||||
-rw-r--r-- | mm/mmap.c | 11 | ||||
-rw-r--r-- | tools/testing/selftests/x86/test_syscall_vdso.c | 35 |
7 files changed, 99 insertions, 34 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 00fcf81f2c56..c07f492b871a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -52,6 +52,7 @@ config X86 | |||
52 | select ARCH_HAS_DEVMEM_IS_ALLOWED | 52 | select ARCH_HAS_DEVMEM_IS_ALLOWED |
53 | select ARCH_HAS_ELF_RANDOMIZE | 53 | select ARCH_HAS_ELF_RANDOMIZE |
54 | select ARCH_HAS_FAST_MULTIPLIER | 54 | select ARCH_HAS_FAST_MULTIPLIER |
55 | select ARCH_HAS_FILTER_PGPROT | ||
55 | select ARCH_HAS_FORTIFY_SOURCE | 56 | select ARCH_HAS_FORTIFY_SOURCE |
56 | select ARCH_HAS_GCOV_PROFILE_ALL | 57 | select ARCH_HAS_GCOV_PROFILE_ALL |
57 | select ARCH_HAS_KCOV if X86_64 | 58 | select ARCH_HAS_KCOV if X86_64 |
@@ -273,6 +274,9 @@ config ARCH_HAS_CPU_RELAX | |||
273 | config ARCH_HAS_CACHE_LINE_SIZE | 274 | config ARCH_HAS_CACHE_LINE_SIZE |
274 | def_bool y | 275 | def_bool y |
275 | 276 | ||
277 | config ARCH_HAS_FILTER_PGPROT | ||
278 | def_bool y | ||
279 | |||
276 | config HAVE_SETUP_PER_CPU_AREA | 280 | config HAVE_SETUP_PER_CPU_AREA |
277 | def_bool y | 281 | def_bool y |
278 | 282 | ||
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 9af927e59d49..9de7f1e1dede 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S | |||
@@ -84,13 +84,13 @@ ENTRY(entry_SYSENTER_compat) | |||
84 | pushq %rdx /* pt_regs->dx */ | 84 | pushq %rdx /* pt_regs->dx */ |
85 | pushq %rcx /* pt_regs->cx */ | 85 | pushq %rcx /* pt_regs->cx */ |
86 | pushq $-ENOSYS /* pt_regs->ax */ | 86 | pushq $-ENOSYS /* pt_regs->ax */ |
87 | pushq $0 /* pt_regs->r8 = 0 */ | 87 | pushq %r8 /* pt_regs->r8 */ |
88 | xorl %r8d, %r8d /* nospec r8 */ | 88 | xorl %r8d, %r8d /* nospec r8 */ |
89 | pushq $0 /* pt_regs->r9 = 0 */ | 89 | pushq %r9 /* pt_regs->r9 */ |
90 | xorl %r9d, %r9d /* nospec r9 */ | 90 | xorl %r9d, %r9d /* nospec r9 */ |
91 | pushq $0 /* pt_regs->r10 = 0 */ | 91 | pushq %r10 /* pt_regs->r10 */ |
92 | xorl %r10d, %r10d /* nospec r10 */ | 92 | xorl %r10d, %r10d /* nospec r10 */ |
93 | pushq $0 /* pt_regs->r11 = 0 */ | 93 | pushq %r11 /* pt_regs->r11 */ |
94 | xorl %r11d, %r11d /* nospec r11 */ | 94 | xorl %r11d, %r11d /* nospec r11 */ |
95 | pushq %rbx /* pt_regs->rbx */ | 95 | pushq %rbx /* pt_regs->rbx */ |
96 | xorl %ebx, %ebx /* nospec rbx */ | 96 | xorl %ebx, %ebx /* nospec rbx */ |
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 5f49b4ff0c24..f1633de5a675 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
@@ -601,6 +601,11 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) | |||
601 | 601 | ||
602 | #define canon_pgprot(p) __pgprot(massage_pgprot(p)) | 602 | #define canon_pgprot(p) __pgprot(massage_pgprot(p)) |
603 | 603 | ||
604 | static inline pgprot_t arch_filter_pgprot(pgprot_t prot) | ||
605 | { | ||
606 | return canon_pgprot(prot); | ||
607 | } | ||
608 | |||
604 | static inline int is_new_memtype_allowed(u64 paddr, unsigned long size, | 609 | static inline int is_new_memtype_allowed(u64 paddr, unsigned long size, |
605 | enum page_cache_mode pcm, | 610 | enum page_cache_mode pcm, |
606 | enum page_cache_mode new_pcm) | 611 | enum page_cache_mode new_pcm) |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 0f3d50f4c48c..3bded76e8d5c 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -93,6 +93,18 @@ void arch_report_meminfo(struct seq_file *m) | |||
93 | static inline void split_page_count(int level) { } | 93 | static inline void split_page_count(int level) { } |
94 | #endif | 94 | #endif |
95 | 95 | ||
96 | static inline int | ||
97 | within(unsigned long addr, unsigned long start, unsigned long end) | ||
98 | { | ||
99 | return addr >= start && addr < end; | ||
100 | } | ||
101 | |||
102 | static inline int | ||
103 | within_inclusive(unsigned long addr, unsigned long start, unsigned long end) | ||
104 | { | ||
105 | return addr >= start && addr <= end; | ||
106 | } | ||
107 | |||
96 | #ifdef CONFIG_X86_64 | 108 | #ifdef CONFIG_X86_64 |
97 | 109 | ||
98 | static inline unsigned long highmap_start_pfn(void) | 110 | static inline unsigned long highmap_start_pfn(void) |
@@ -106,20 +118,25 @@ static inline unsigned long highmap_end_pfn(void) | |||
106 | return __pa_symbol(roundup(_brk_end, PMD_SIZE) - 1) >> PAGE_SHIFT; | 118 | return __pa_symbol(roundup(_brk_end, PMD_SIZE) - 1) >> PAGE_SHIFT; |
107 | } | 119 | } |
108 | 120 | ||
109 | #endif | 121 | static bool __cpa_pfn_in_highmap(unsigned long pfn) |
110 | |||
111 | static inline int | ||
112 | within(unsigned long addr, unsigned long start, unsigned long end) | ||
113 | { | 122 | { |
114 | return addr >= start && addr < end; | 123 | /* |
124 | * Kernel text has an alias mapping at a high address, known | ||
125 | * here as "highmap". | ||
126 | */ | ||
127 | return within_inclusive(pfn, highmap_start_pfn(), highmap_end_pfn()); | ||
115 | } | 128 | } |
116 | 129 | ||
117 | static inline int | 130 | #else |
118 | within_inclusive(unsigned long addr, unsigned long start, unsigned long end) | 131 | |
132 | static bool __cpa_pfn_in_highmap(unsigned long pfn) | ||
119 | { | 133 | { |
120 | return addr >= start && addr <= end; | 134 | /* There is no highmap on 32-bit */ |
135 | return false; | ||
121 | } | 136 | } |
122 | 137 | ||
138 | #endif | ||
139 | |||
123 | /* | 140 | /* |
124 | * Flushing functions | 141 | * Flushing functions |
125 | */ | 142 | */ |
@@ -172,7 +189,7 @@ static void __cpa_flush_all(void *arg) | |||
172 | 189 | ||
173 | static void cpa_flush_all(unsigned long cache) | 190 | static void cpa_flush_all(unsigned long cache) |
174 | { | 191 | { |
175 | BUG_ON(irqs_disabled()); | 192 | BUG_ON(irqs_disabled() && !early_boot_irqs_disabled); |
176 | 193 | ||
177 | on_each_cpu(__cpa_flush_all, (void *) cache, 1); | 194 | on_each_cpu(__cpa_flush_all, (void *) cache, 1); |
178 | } | 195 | } |
@@ -236,7 +253,7 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache, | |||
236 | unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */ | 253 | unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */ |
237 | #endif | 254 | #endif |
238 | 255 | ||
239 | BUG_ON(irqs_disabled()); | 256 | BUG_ON(irqs_disabled() && !early_boot_irqs_disabled); |
240 | 257 | ||
241 | on_each_cpu(__cpa_flush_all, (void *) do_wbinvd, 1); | 258 | on_each_cpu(__cpa_flush_all, (void *) do_wbinvd, 1); |
242 | 259 | ||
@@ -1183,6 +1200,10 @@ static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr, | |||
1183 | cpa->numpages = 1; | 1200 | cpa->numpages = 1; |
1184 | cpa->pfn = __pa(vaddr) >> PAGE_SHIFT; | 1201 | cpa->pfn = __pa(vaddr) >> PAGE_SHIFT; |
1185 | return 0; | 1202 | return 0; |
1203 | |||
1204 | } else if (__cpa_pfn_in_highmap(cpa->pfn)) { | ||
1205 | /* Faults in the highmap are OK, so do not warn: */ | ||
1206 | return -EFAULT; | ||
1186 | } else { | 1207 | } else { |
1187 | WARN(1, KERN_WARNING "CPA: called for zero pte. " | 1208 | WARN(1, KERN_WARNING "CPA: called for zero pte. " |
1188 | "vaddr = %lx cpa->vaddr = %lx\n", vaddr, | 1209 | "vaddr = %lx cpa->vaddr = %lx\n", vaddr, |
@@ -1335,8 +1356,7 @@ static int cpa_process_alias(struct cpa_data *cpa) | |||
1335 | * to touch the high mapped kernel as well: | 1356 | * to touch the high mapped kernel as well: |
1336 | */ | 1357 | */ |
1337 | if (!within(vaddr, (unsigned long)_text, _brk_end) && | 1358 | if (!within(vaddr, (unsigned long)_text, _brk_end) && |
1338 | within_inclusive(cpa->pfn, highmap_start_pfn(), | 1359 | __cpa_pfn_in_highmap(cpa->pfn)) { |
1339 | highmap_end_pfn())) { | ||
1340 | unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + | 1360 | unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + |
1341 | __START_KERNEL_map - phys_base; | 1361 | __START_KERNEL_map - phys_base; |
1342 | alias_cpa = *cpa; | 1362 | alias_cpa = *cpa; |
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index f1fd52f449e0..4d418e705878 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c | |||
@@ -421,6 +421,16 @@ static inline bool pti_kernel_image_global_ok(void) | |||
421 | if (boot_cpu_has(X86_FEATURE_K8)) | 421 | if (boot_cpu_has(X86_FEATURE_K8)) |
422 | return false; | 422 | return false; |
423 | 423 | ||
424 | /* | ||
425 | * RANDSTRUCT derives its hardening benefits from the | ||
426 | * attacker's lack of knowledge about the layout of kernel | ||
427 | * data structures. Keep the kernel image non-global in | ||
428 | * cases where RANDSTRUCT is in use to help keep the layout a | ||
429 | * secret. | ||
430 | */ | ||
431 | if (IS_ENABLED(CONFIG_GCC_PLUGIN_RANDSTRUCT)) | ||
432 | return false; | ||
433 | |||
424 | return true; | 434 | return true; |
425 | } | 435 | } |
426 | 436 | ||
@@ -430,12 +440,24 @@ static inline bool pti_kernel_image_global_ok(void) | |||
430 | */ | 440 | */ |
431 | void pti_clone_kernel_text(void) | 441 | void pti_clone_kernel_text(void) |
432 | { | 442 | { |
443 | /* | ||
444 | * rodata is part of the kernel image and is normally | ||
445 | * readable on the filesystem or on the web. But, do not | ||
446 | * clone the areas past rodata, they might contain secrets. | ||
447 | */ | ||
433 | unsigned long start = PFN_ALIGN(_text); | 448 | unsigned long start = PFN_ALIGN(_text); |
434 | unsigned long end = ALIGN((unsigned long)_end, PMD_PAGE_SIZE); | 449 | unsigned long end = (unsigned long)__end_rodata_hpage_align; |
435 | 450 | ||
436 | if (!pti_kernel_image_global_ok()) | 451 | if (!pti_kernel_image_global_ok()) |
437 | return; | 452 | return; |
438 | 453 | ||
454 | pr_debug("mapping partial kernel image into user address space\n"); | ||
455 | |||
456 | /* | ||
457 | * Note that this will undo _some_ of the work that | ||
458 | * pti_set_kernel_image_nonglobal() did to clear the | ||
459 | * global bit. | ||
460 | */ | ||
439 | pti_clone_pmds(start, end, _PAGE_RW); | 461 | pti_clone_pmds(start, end, _PAGE_RW); |
440 | } | 462 | } |
441 | 463 | ||
@@ -458,8 +480,6 @@ void pti_set_kernel_image_nonglobal(void) | |||
458 | if (pti_kernel_image_global_ok()) | 480 | if (pti_kernel_image_global_ok()) |
459 | return; | 481 | return; |
460 | 482 | ||
461 | pr_debug("set kernel image non-global\n"); | ||
462 | |||
463 | set_memory_nonglobal(start, (end - start) >> PAGE_SHIFT); | 483 | set_memory_nonglobal(start, (end - start) >> PAGE_SHIFT); |
464 | } | 484 | } |
465 | 485 | ||
@@ -100,11 +100,20 @@ pgprot_t protection_map[16] __ro_after_init = { | |||
100 | __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111 | 100 | __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111 |
101 | }; | 101 | }; |
102 | 102 | ||
103 | #ifndef CONFIG_ARCH_HAS_FILTER_PGPROT | ||
104 | static inline pgprot_t arch_filter_pgprot(pgprot_t prot) | ||
105 | { | ||
106 | return prot; | ||
107 | } | ||
108 | #endif | ||
109 | |||
103 | pgprot_t vm_get_page_prot(unsigned long vm_flags) | 110 | pgprot_t vm_get_page_prot(unsigned long vm_flags) |
104 | { | 111 | { |
105 | return __pgprot(pgprot_val(protection_map[vm_flags & | 112 | pgprot_t ret = __pgprot(pgprot_val(protection_map[vm_flags & |
106 | (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) | | 113 | (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) | |
107 | pgprot_val(arch_vm_get_page_prot(vm_flags))); | 114 | pgprot_val(arch_vm_get_page_prot(vm_flags))); |
115 | |||
116 | return arch_filter_pgprot(ret); | ||
108 | } | 117 | } |
109 | EXPORT_SYMBOL(vm_get_page_prot); | 118 | EXPORT_SYMBOL(vm_get_page_prot); |
110 | 119 | ||
diff --git a/tools/testing/selftests/x86/test_syscall_vdso.c b/tools/testing/selftests/x86/test_syscall_vdso.c index 40370354d4c1..c9c3281077bc 100644 --- a/tools/testing/selftests/x86/test_syscall_vdso.c +++ b/tools/testing/selftests/x86/test_syscall_vdso.c | |||
@@ -100,12 +100,19 @@ asm ( | |||
100 | " shl $32, %r8\n" | 100 | " shl $32, %r8\n" |
101 | " orq $0x7f7f7f7f, %r8\n" | 101 | " orq $0x7f7f7f7f, %r8\n" |
102 | " movq %r8, %r9\n" | 102 | " movq %r8, %r9\n" |
103 | " movq %r8, %r10\n" | 103 | " incq %r9\n" |
104 | " movq %r8, %r11\n" | 104 | " movq %r9, %r10\n" |
105 | " movq %r8, %r12\n" | 105 | " incq %r10\n" |
106 | " movq %r8, %r13\n" | 106 | " movq %r10, %r11\n" |
107 | " movq %r8, %r14\n" | 107 | " incq %r11\n" |
108 | " movq %r8, %r15\n" | 108 | " movq %r11, %r12\n" |
109 | " incq %r12\n" | ||
110 | " movq %r12, %r13\n" | ||
111 | " incq %r13\n" | ||
112 | " movq %r13, %r14\n" | ||
113 | " incq %r14\n" | ||
114 | " movq %r14, %r15\n" | ||
115 | " incq %r15\n" | ||
109 | " ret\n" | 116 | " ret\n" |
110 | " .code32\n" | 117 | " .code32\n" |
111 | " .popsection\n" | 118 | " .popsection\n" |
@@ -128,12 +135,13 @@ int check_regs64(void) | |||
128 | int err = 0; | 135 | int err = 0; |
129 | int num = 8; | 136 | int num = 8; |
130 | uint64_t *r64 = ®s64.r8; | 137 | uint64_t *r64 = ®s64.r8; |
138 | uint64_t expected = 0x7f7f7f7f7f7f7f7fULL; | ||
131 | 139 | ||
132 | if (!kernel_is_64bit) | 140 | if (!kernel_is_64bit) |
133 | return 0; | 141 | return 0; |
134 | 142 | ||
135 | do { | 143 | do { |
136 | if (*r64 == 0x7f7f7f7f7f7f7f7fULL) | 144 | if (*r64 == expected++) |
137 | continue; /* register did not change */ | 145 | continue; /* register did not change */ |
138 | if (syscall_addr != (long)&int80) { | 146 | if (syscall_addr != (long)&int80) { |
139 | /* | 147 | /* |
@@ -147,18 +155,17 @@ int check_regs64(void) | |||
147 | continue; | 155 | continue; |
148 | } | 156 | } |
149 | } else { | 157 | } else { |
150 | /* INT80 syscall entrypoint can be used by | 158 | /* |
159 | * INT80 syscall entrypoint can be used by | ||
151 | * 64-bit programs too, unlike SYSCALL/SYSENTER. | 160 | * 64-bit programs too, unlike SYSCALL/SYSENTER. |
152 | * Therefore it must preserve R12+ | 161 | * Therefore it must preserve R12+ |
153 | * (they are callee-saved registers in 64-bit C ABI). | 162 | * (they are callee-saved registers in 64-bit C ABI). |
154 | * | 163 | * |
155 | * This was probably historically not intended, | 164 | * Starting in Linux 4.17 (and any kernel that |
156 | * but R8..11 are clobbered (cleared to 0). | 165 | * backports the change), R8..11 are preserved. |
157 | * IOW: they are the only registers which aren't | 166 | * Historically (and probably unintentionally), they |
158 | * preserved across INT80 syscall. | 167 | * were clobbered or zeroed. |
159 | */ | 168 | */ |
160 | if (*r64 == 0 && num <= 11) | ||
161 | continue; | ||
162 | } | 169 | } |
163 | printf("[FAIL]\tR%d has changed:%016llx\n", num, *r64); | 170 | printf("[FAIL]\tR%d has changed:%016llx\n", num, *r64); |
164 | err++; | 171 | err++; |