aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoerg Roedel <jroedel@suse.de>2018-08-07 06:24:31 -0400
committerThomas Gleixner <tglx@linutronix.de>2018-08-07 17:36:02 -0400
commit16a3fe634f6a568c6234b8747e5d50487fed3526 (patch)
tree57022662196020a7186f1257259215570cdfa878
parent30514effc9206d4e084ec32239ae221db157d43a (diff)
x86/mm/pti: Clone kernel-image on PTE level for 32 bit
On 32 bit the kernel sections are not huge-page aligned. When we clone them on PMD-level we unevitably map some areas that are normal kernel memory and may contain secrets to user-space. To prevent that we need to clone the kernel-image on PTE-level for 32 bit. Also make the page-table cloning code more general so that it can handle PMD and PTE level cloning. This can be generalized further in the future to also handle clones on the P4D-level. Signed-off-by: Joerg Roedel <jroedel@suse.de> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: "H . Peter Anvin" <hpa@zytor.com> Cc: linux-mm@kvack.org Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andy Lutomirski <luto@kernel.org> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Juergen Gross <jgross@suse.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Jiri Kosina <jkosina@suse.cz> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Brian Gerst <brgerst@gmail.com> Cc: David Laight <David.Laight@aculab.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: Eduardo Valentin <eduval@amazon.com> Cc: Greg KH <gregkh@linuxfoundation.org> Cc: Will Deacon <will.deacon@arm.com> Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Waiman Long <llong@redhat.com> Cc: Pavel Machek <pavel@ucw.cz> Cc: "David H . Gutteridge" <dhgutteridge@sympatico.ca> Cc: joro@8bytes.org Link: https://lkml.kernel.org/r/1533637471-30953-4-git-send-email-joro@8bytes.org
-rw-r--r--arch/x86/mm/pti.c140
1 files changed, 99 insertions, 41 deletions
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 5164c987b1f1..1dc5c683e7a5 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -54,6 +54,16 @@
54#define __GFP_NOTRACK 0 54#define __GFP_NOTRACK 0
55#endif 55#endif
56 56
57/*
58 * Define the page-table levels we clone for user-space on 32
59 * and 64 bit.
60 */
61#ifdef CONFIG_X86_64
62#define PTI_LEVEL_KERNEL_IMAGE PTI_CLONE_PMD
63#else
64#define PTI_LEVEL_KERNEL_IMAGE PTI_CLONE_PTE
65#endif
66
57static void __init pti_print_if_insecure(const char *reason) 67static void __init pti_print_if_insecure(const char *reason)
58{ 68{
59 if (boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) 69 if (boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
@@ -228,7 +238,6 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
228 return pmd_offset(pud, address); 238 return pmd_offset(pud, address);
229} 239}
230 240
231#ifdef CONFIG_X86_VSYSCALL_EMULATION
232/* 241/*
233 * Walk the shadow copy of the page tables (optionally) trying to allocate 242 * Walk the shadow copy of the page tables (optionally) trying to allocate
234 * page table pages on the way down. Does not support large pages. 243 * page table pages on the way down. Does not support large pages.
@@ -270,6 +279,7 @@ static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address)
270 return pte; 279 return pte;
271} 280}
272 281
282#ifdef CONFIG_X86_VSYSCALL_EMULATION
273static void __init pti_setup_vsyscall(void) 283static void __init pti_setup_vsyscall(void)
274{ 284{
275 pte_t *pte, *target_pte; 285 pte_t *pte, *target_pte;
@@ -290,8 +300,14 @@ static void __init pti_setup_vsyscall(void)
290static void __init pti_setup_vsyscall(void) { } 300static void __init pti_setup_vsyscall(void) { }
291#endif 301#endif
292 302
303enum pti_clone_level {
304 PTI_CLONE_PMD,
305 PTI_CLONE_PTE,
306};
307
293static void 308static void
294pti_clone_pmds(unsigned long start, unsigned long end) 309pti_clone_pgtable(unsigned long start, unsigned long end,
310 enum pti_clone_level level)
295{ 311{
296 unsigned long addr; 312 unsigned long addr;
297 313
@@ -299,7 +315,8 @@ pti_clone_pmds(unsigned long start, unsigned long end)
299 * Clone the populated PMDs which cover start to end. These PMD areas 315 * Clone the populated PMDs which cover start to end. These PMD areas
300 * can have holes. 316 * can have holes.
301 */ 317 */
302 for (addr = start; addr < end; addr += PMD_SIZE) { 318 for (addr = start; addr < end;) {
319 pte_t *pte, *target_pte;
303 pmd_t *pmd, *target_pmd; 320 pmd_t *pmd, *target_pmd;
304 pgd_t *pgd; 321 pgd_t *pgd;
305 p4d_t *p4d; 322 p4d_t *p4d;
@@ -315,44 +332,84 @@ pti_clone_pmds(unsigned long start, unsigned long end)
315 p4d = p4d_offset(pgd, addr); 332 p4d = p4d_offset(pgd, addr);
316 if (WARN_ON(p4d_none(*p4d))) 333 if (WARN_ON(p4d_none(*p4d)))
317 return; 334 return;
335
318 pud = pud_offset(p4d, addr); 336 pud = pud_offset(p4d, addr);
319 if (pud_none(*pud)) 337 if (pud_none(*pud)) {
338 addr += PUD_SIZE;
320 continue; 339 continue;
340 }
341
321 pmd = pmd_offset(pud, addr); 342 pmd = pmd_offset(pud, addr);
322 if (pmd_none(*pmd)) 343 if (pmd_none(*pmd)) {
344 addr += PMD_SIZE;
323 continue; 345 continue;
346 }
324 347
325 target_pmd = pti_user_pagetable_walk_pmd(addr); 348 if (pmd_large(*pmd) || level == PTI_CLONE_PMD) {
326 if (WARN_ON(!target_pmd)) 349 target_pmd = pti_user_pagetable_walk_pmd(addr);
327 return; 350 if (WARN_ON(!target_pmd))
328 351 return;
329 /* 352
330 * Only clone present PMDs. This ensures only setting 353 /*
331 * _PAGE_GLOBAL on present PMDs. This should only be 354 * Only clone present PMDs. This ensures only setting
332 * called on well-known addresses anyway, so a non- 355 * _PAGE_GLOBAL on present PMDs. This should only be
333 * present PMD would be a surprise. 356 * called on well-known addresses anyway, so a non-
334 */ 357 * present PMD would be a surprise.
335 if (WARN_ON(!(pmd_flags(*pmd) & _PAGE_PRESENT))) 358 */
336 return; 359 if (WARN_ON(!(pmd_flags(*pmd) & _PAGE_PRESENT)))
337 360 return;
338 /* 361
339 * Setting 'target_pmd' below creates a mapping in both 362 /*
340 * the user and kernel page tables. It is effectively 363 * Setting 'target_pmd' below creates a mapping in both
341 * global, so set it as global in both copies. Note: 364 * the user and kernel page tables. It is effectively
342 * the X86_FEATURE_PGE check is not _required_ because 365 * global, so set it as global in both copies. Note:
343 * the CPU ignores _PAGE_GLOBAL when PGE is not 366 * the X86_FEATURE_PGE check is not _required_ because
344 * supported. The check keeps consistentency with 367 * the CPU ignores _PAGE_GLOBAL when PGE is not
345 * code that only set this bit when supported. 368 * supported. The check keeps consistentency with
346 */ 369 * code that only set this bit when supported.
347 if (boot_cpu_has(X86_FEATURE_PGE)) 370 */
348 *pmd = pmd_set_flags(*pmd, _PAGE_GLOBAL); 371 if (boot_cpu_has(X86_FEATURE_PGE))
349 372 *pmd = pmd_set_flags(*pmd, _PAGE_GLOBAL);
350 /* 373
351 * Copy the PMD. That is, the kernelmode and usermode 374 /*
352 * tables will share the last-level page tables of this 375 * Copy the PMD. That is, the kernelmode and usermode
353 * address range 376 * tables will share the last-level page tables of this
354 */ 377 * address range
355 *target_pmd = *pmd; 378 */
379 *target_pmd = *pmd;
380
381 addr += PMD_SIZE;
382
383 } else if (level == PTI_CLONE_PTE) {
384
385 /* Walk the page-table down to the pte level */
386 pte = pte_offset_kernel(pmd, addr);
387 if (pte_none(*pte)) {
388 addr += PAGE_SIZE;
389 continue;
390 }
391
392 /* Only clone present PTEs */
393 if (WARN_ON(!(pte_flags(*pte) & _PAGE_PRESENT)))
394 return;
395
396 /* Allocate PTE in the user page-table */
397 target_pte = pti_user_pagetable_walk_pte(addr);
398 if (WARN_ON(!target_pte))
399 return;
400
401 /* Set GLOBAL bit in both PTEs */
402 if (boot_cpu_has(X86_FEATURE_PGE))
403 *pte = pte_set_flags(*pte, _PAGE_GLOBAL);
404
405 /* Clone the PTE */
406 *target_pte = *pte;
407
408 addr += PAGE_SIZE;
409
410 } else {
411 BUG();
412 }
356 } 413 }
357} 414}
358 415
@@ -398,7 +455,7 @@ static void __init pti_clone_user_shared(void)
398 start = CPU_ENTRY_AREA_BASE; 455 start = CPU_ENTRY_AREA_BASE;
399 end = start + (PAGE_SIZE * CPU_ENTRY_AREA_PAGES); 456 end = start + (PAGE_SIZE * CPU_ENTRY_AREA_PAGES);
400 457
401 pti_clone_pmds(start, end); 458 pti_clone_pgtable(start, end, PTI_CLONE_PMD);
402} 459}
403#endif /* CONFIG_X86_64 */ 460#endif /* CONFIG_X86_64 */
404 461
@@ -417,8 +474,9 @@ static void __init pti_setup_espfix64(void)
417 */ 474 */
418static void pti_clone_entry_text(void) 475static void pti_clone_entry_text(void)
419{ 476{
420 pti_clone_pmds((unsigned long) __entry_text_start, 477 pti_clone_pgtable((unsigned long) __entry_text_start,
421 (unsigned long) __irqentry_text_end); 478 (unsigned long) __irqentry_text_end,
479 PTI_CLONE_PMD);
422} 480}
423 481
424/* 482/*
@@ -500,10 +558,10 @@ static void pti_clone_kernel_text(void)
500 * pti_set_kernel_image_nonglobal() did to clear the 558 * pti_set_kernel_image_nonglobal() did to clear the
501 * global bit. 559 * global bit.
502 */ 560 */
503 pti_clone_pmds(start, end_clone); 561 pti_clone_pgtable(start, end_clone, PTI_LEVEL_KERNEL_IMAGE);
504 562
505 /* 563 /*
506 * pti_clone_pmds() will set the global bit in any PMDs 564 * pti_clone_pgtable() will set the global bit in any PMDs
507 * that it clones, but we also need to get any PTEs in 565 * that it clones, but we also need to get any PTEs in
508 * the last level for areas that are not huge-page-aligned. 566 * the last level for areas that are not huge-page-aligned.
509 */ 567 */