aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrijesh Singh <brijesh.singh@amd.com>2019-04-17 11:41:17 -0400
committerBorislav Petkov <bp@suse.de>2019-05-08 13:08:35 -0400
commiteccd906484d1cd4b5da00f093d678badb6f48f28 (patch)
treea7dd24f4e3fed0a73fe911feb9619050294d1d11
parent0e72499c3cc0cead32f88b94a02204d2b80768bf (diff)
x86/mm: Do not use set_{pud, pmd}_safe() when splitting a large page
The commit 0a9fe8ca844d ("x86/mm: Validate kernel_physical_mapping_init() PTE population") triggers this warning in SEV guests: WARNING: CPU: 0 PID: 0 at arch/x86/include/asm/pgalloc.h:87 phys_pmd_init+0x30d/0x386 Call Trace: kernel_physical_mapping_init+0xce/0x259 early_set_memory_enc_dec+0x10f/0x160 kvm_smp_prepare_boot_cpu+0x71/0x9d start_kernel+0x1c9/0x50b secondary_startup_64+0xa4/0xb0 A SEV guest calls kernel_physical_mapping_init() to clear the encryption mask from an existing mapping. While doing so, it also splits large pages into smaller. To split a page, kernel_physical_mapping_init() allocates a new page and updates the existing entry. The set_{pud,pmd}_safe() helpers trigger a warning when updating an entry with a page in the present state. Add a new kernel_physical_mapping_change() helper which uses the non-safe variants of set_{pmd,pud,p4d}() and {pmd,pud,p4d}_populate() routines when updating the entry. Since kernel_physical_mapping_change() may replace an existing entry with a new entry, the caller is responsible to flush the TLB at the end. Change early_set_memory_enc_dec() to use kernel_physical_mapping_change() when it wants to clear the memory encryption mask from the page table entry. [ bp: - massage commit message. - flesh out comment according to dhansen's request. - align function arguments at opening brace. ] Fixes: 0a9fe8ca844d ("x86/mm: Validate kernel_physical_mapping_init() PTE population") Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> Signed-off-by: Borislav Petkov <bp@suse.de> Reviewed-by: Dave Hansen <dave.hansen@intel.com> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Andy Lutomirski <luto@kernel.org> Cc: Dan Williams <dan.j.williams@intel.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Thomas Lendacky <Thomas.Lendacky@amd.com> Cc: x86-ml <x86@kernel.org> Link: https://lkml.kernel.org/r/20190417154102.22613-1-brijesh.singh@amd.com
-rw-r--r--arch/x86/mm/init_64.c144
-rw-r--r--arch/x86/mm/mem_encrypt.c10
-rw-r--r--arch/x86/mm/mm_internal.h3
3 files changed, 114 insertions, 43 deletions
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index bccff68e3267..5cd125bd2a85 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -58,6 +58,37 @@
58 58
59#include "ident_map.c" 59#include "ident_map.c"
60 60
61#define DEFINE_POPULATE(fname, type1, type2, init) \
62static inline void fname##_init(struct mm_struct *mm, \
63 type1##_t *arg1, type2##_t *arg2, bool init) \
64{ \
65 if (init) \
66 fname##_safe(mm, arg1, arg2); \
67 else \
68 fname(mm, arg1, arg2); \
69}
70
71DEFINE_POPULATE(p4d_populate, p4d, pud, init)
72DEFINE_POPULATE(pgd_populate, pgd, p4d, init)
73DEFINE_POPULATE(pud_populate, pud, pmd, init)
74DEFINE_POPULATE(pmd_populate_kernel, pmd, pte, init)
75
76#define DEFINE_ENTRY(type1, type2, init) \
77static inline void set_##type1##_init(type1##_t *arg1, \
78 type2##_t arg2, bool init) \
79{ \
80 if (init) \
81 set_##type1##_safe(arg1, arg2); \
82 else \
83 set_##type1(arg1, arg2); \
84}
85
86DEFINE_ENTRY(p4d, p4d, init)
87DEFINE_ENTRY(pud, pud, init)
88DEFINE_ENTRY(pmd, pmd, init)
89DEFINE_ENTRY(pte, pte, init)
90
91
61/* 92/*
62 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the 93 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
63 * physical space so we can cache the place of the first one and move 94 * physical space so we can cache the place of the first one and move
@@ -414,7 +445,7 @@ void __init cleanup_highmap(void)
414 */ 445 */
415static unsigned long __meminit 446static unsigned long __meminit
416phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end, 447phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
417 pgprot_t prot) 448 pgprot_t prot, bool init)
418{ 449{
419 unsigned long pages = 0, paddr_next; 450 unsigned long pages = 0, paddr_next;
420 unsigned long paddr_last = paddr_end; 451 unsigned long paddr_last = paddr_end;
@@ -432,7 +463,7 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
432 E820_TYPE_RAM) && 463 E820_TYPE_RAM) &&
433 !e820__mapped_any(paddr & PAGE_MASK, paddr_next, 464 !e820__mapped_any(paddr & PAGE_MASK, paddr_next,
434 E820_TYPE_RESERVED_KERN)) 465 E820_TYPE_RESERVED_KERN))
435 set_pte_safe(pte, __pte(0)); 466 set_pte_init(pte, __pte(0), init);
436 continue; 467 continue;
437 } 468 }
438 469
@@ -452,7 +483,7 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
452 pr_info(" pte=%p addr=%lx pte=%016lx\n", pte, paddr, 483 pr_info(" pte=%p addr=%lx pte=%016lx\n", pte, paddr,
453 pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL).pte); 484 pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL).pte);
454 pages++; 485 pages++;
455 set_pte_safe(pte, pfn_pte(paddr >> PAGE_SHIFT, prot)); 486 set_pte_init(pte, pfn_pte(paddr >> PAGE_SHIFT, prot), init);
456 paddr_last = (paddr & PAGE_MASK) + PAGE_SIZE; 487 paddr_last = (paddr & PAGE_MASK) + PAGE_SIZE;
457 } 488 }
458 489
@@ -468,7 +499,7 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
468 */ 499 */
469static unsigned long __meminit 500static unsigned long __meminit
470phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end, 501phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
471 unsigned long page_size_mask, pgprot_t prot) 502 unsigned long page_size_mask, pgprot_t prot, bool init)
472{ 503{
473 unsigned long pages = 0, paddr_next; 504 unsigned long pages = 0, paddr_next;
474 unsigned long paddr_last = paddr_end; 505 unsigned long paddr_last = paddr_end;
@@ -487,7 +518,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
487 E820_TYPE_RAM) && 518 E820_TYPE_RAM) &&
488 !e820__mapped_any(paddr & PMD_MASK, paddr_next, 519 !e820__mapped_any(paddr & PMD_MASK, paddr_next,
489 E820_TYPE_RESERVED_KERN)) 520 E820_TYPE_RESERVED_KERN))
490 set_pmd_safe(pmd, __pmd(0)); 521 set_pmd_init(pmd, __pmd(0), init);
491 continue; 522 continue;
492 } 523 }
493 524
@@ -496,7 +527,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
496 spin_lock(&init_mm.page_table_lock); 527 spin_lock(&init_mm.page_table_lock);
497 pte = (pte_t *)pmd_page_vaddr(*pmd); 528 pte = (pte_t *)pmd_page_vaddr(*pmd);
498 paddr_last = phys_pte_init(pte, paddr, 529 paddr_last = phys_pte_init(pte, paddr,
499 paddr_end, prot); 530 paddr_end, prot,
531 init);
500 spin_unlock(&init_mm.page_table_lock); 532 spin_unlock(&init_mm.page_table_lock);
501 continue; 533 continue;
502 } 534 }
@@ -524,19 +556,20 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
524 if (page_size_mask & (1<<PG_LEVEL_2M)) { 556 if (page_size_mask & (1<<PG_LEVEL_2M)) {
525 pages++; 557 pages++;
526 spin_lock(&init_mm.page_table_lock); 558 spin_lock(&init_mm.page_table_lock);
527 set_pte_safe((pte_t *)pmd, 559 set_pte_init((pte_t *)pmd,
528 pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT, 560 pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT,
529 __pgprot(pgprot_val(prot) | _PAGE_PSE))); 561 __pgprot(pgprot_val(prot) | _PAGE_PSE)),
562 init);
530 spin_unlock(&init_mm.page_table_lock); 563 spin_unlock(&init_mm.page_table_lock);
531 paddr_last = paddr_next; 564 paddr_last = paddr_next;
532 continue; 565 continue;
533 } 566 }
534 567
535 pte = alloc_low_page(); 568 pte = alloc_low_page();
536 paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot); 569 paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot, init);
537 570
538 spin_lock(&init_mm.page_table_lock); 571 spin_lock(&init_mm.page_table_lock);
539 pmd_populate_kernel_safe(&init_mm, pmd, pte); 572 pmd_populate_kernel_init(&init_mm, pmd, pte, init);
540 spin_unlock(&init_mm.page_table_lock); 573 spin_unlock(&init_mm.page_table_lock);
541 } 574 }
542 update_page_count(PG_LEVEL_2M, pages); 575 update_page_count(PG_LEVEL_2M, pages);
@@ -551,7 +584,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
551 */ 584 */
552static unsigned long __meminit 585static unsigned long __meminit
553phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, 586phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
554 unsigned long page_size_mask) 587 unsigned long page_size_mask, bool init)
555{ 588{
556 unsigned long pages = 0, paddr_next; 589 unsigned long pages = 0, paddr_next;
557 unsigned long paddr_last = paddr_end; 590 unsigned long paddr_last = paddr_end;
@@ -573,7 +606,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
573 E820_TYPE_RAM) && 606 E820_TYPE_RAM) &&
574 !e820__mapped_any(paddr & PUD_MASK, paddr_next, 607 !e820__mapped_any(paddr & PUD_MASK, paddr_next,
575 E820_TYPE_RESERVED_KERN)) 608 E820_TYPE_RESERVED_KERN))
576 set_pud_safe(pud, __pud(0)); 609 set_pud_init(pud, __pud(0), init);
577 continue; 610 continue;
578 } 611 }
579 612
@@ -583,7 +616,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
583 paddr_last = phys_pmd_init(pmd, paddr, 616 paddr_last = phys_pmd_init(pmd, paddr,
584 paddr_end, 617 paddr_end,
585 page_size_mask, 618 page_size_mask,
586 prot); 619 prot, init);
587 continue; 620 continue;
588 } 621 }
589 /* 622 /*
@@ -610,9 +643,10 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
610 if (page_size_mask & (1<<PG_LEVEL_1G)) { 643 if (page_size_mask & (1<<PG_LEVEL_1G)) {
611 pages++; 644 pages++;
612 spin_lock(&init_mm.page_table_lock); 645 spin_lock(&init_mm.page_table_lock);
613 set_pte_safe((pte_t *)pud, 646 set_pte_init((pte_t *)pud,
614 pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT, 647 pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT,
615 PAGE_KERNEL_LARGE)); 648 PAGE_KERNEL_LARGE),
649 init);
616 spin_unlock(&init_mm.page_table_lock); 650 spin_unlock(&init_mm.page_table_lock);
617 paddr_last = paddr_next; 651 paddr_last = paddr_next;
618 continue; 652 continue;
@@ -620,10 +654,10 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
620 654
621 pmd = alloc_low_page(); 655 pmd = alloc_low_page();
622 paddr_last = phys_pmd_init(pmd, paddr, paddr_end, 656 paddr_last = phys_pmd_init(pmd, paddr, paddr_end,
623 page_size_mask, prot); 657 page_size_mask, prot, init);
624 658
625 spin_lock(&init_mm.page_table_lock); 659 spin_lock(&init_mm.page_table_lock);
626 pud_populate_safe(&init_mm, pud, pmd); 660 pud_populate_init(&init_mm, pud, pmd, init);
627 spin_unlock(&init_mm.page_table_lock); 661 spin_unlock(&init_mm.page_table_lock);
628 } 662 }
629 663
@@ -634,14 +668,15 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
634 668
635static unsigned long __meminit 669static unsigned long __meminit
636phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, 670phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
637 unsigned long page_size_mask) 671 unsigned long page_size_mask, bool init)
638{ 672{
639 unsigned long paddr_next, paddr_last = paddr_end; 673 unsigned long paddr_next, paddr_last = paddr_end;
640 unsigned long vaddr = (unsigned long)__va(paddr); 674 unsigned long vaddr = (unsigned long)__va(paddr);
641 int i = p4d_index(vaddr); 675 int i = p4d_index(vaddr);
642 676
643 if (!pgtable_l5_enabled()) 677 if (!pgtable_l5_enabled())
644 return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, page_size_mask); 678 return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end,
679 page_size_mask, init);
645 680
646 for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) { 681 for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) {
647 p4d_t *p4d; 682 p4d_t *p4d;
@@ -657,39 +692,34 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
657 E820_TYPE_RAM) && 692 E820_TYPE_RAM) &&
658 !e820__mapped_any(paddr & P4D_MASK, paddr_next, 693 !e820__mapped_any(paddr & P4D_MASK, paddr_next,
659 E820_TYPE_RESERVED_KERN)) 694 E820_TYPE_RESERVED_KERN))
660 set_p4d_safe(p4d, __p4d(0)); 695 set_p4d_init(p4d, __p4d(0), init);
661 continue; 696 continue;
662 } 697 }
663 698
664 if (!p4d_none(*p4d)) { 699 if (!p4d_none(*p4d)) {
665 pud = pud_offset(p4d, 0); 700 pud = pud_offset(p4d, 0);
666 paddr_last = phys_pud_init(pud, paddr, 701 paddr_last = phys_pud_init(pud, paddr, paddr_end,
667 paddr_end, 702 page_size_mask, init);
668 page_size_mask);
669 continue; 703 continue;
670 } 704 }
671 705
672 pud = alloc_low_page(); 706 pud = alloc_low_page();
673 paddr_last = phys_pud_init(pud, paddr, paddr_end, 707 paddr_last = phys_pud_init(pud, paddr, paddr_end,
674 page_size_mask); 708 page_size_mask, init);
675 709
676 spin_lock(&init_mm.page_table_lock); 710 spin_lock(&init_mm.page_table_lock);
677 p4d_populate_safe(&init_mm, p4d, pud); 711 p4d_populate_init(&init_mm, p4d, pud, init);
678 spin_unlock(&init_mm.page_table_lock); 712 spin_unlock(&init_mm.page_table_lock);
679 } 713 }
680 714
681 return paddr_last; 715 return paddr_last;
682} 716}
683 717
684/* 718static unsigned long __meminit
685 * Create page table mapping for the physical memory for specific physical 719__kernel_physical_mapping_init(unsigned long paddr_start,
686 * addresses. The virtual and physical addresses have to be aligned on PMD level 720 unsigned long paddr_end,
687 * down. It returns the last physical address mapped. 721 unsigned long page_size_mask,
688 */ 722 bool init)
689unsigned long __meminit
690kernel_physical_mapping_init(unsigned long paddr_start,
691 unsigned long paddr_end,
692 unsigned long page_size_mask)
693{ 723{
694 bool pgd_changed = false; 724 bool pgd_changed = false;
695 unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last; 725 unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last;
@@ -709,19 +739,22 @@ kernel_physical_mapping_init(unsigned long paddr_start,
709 p4d = (p4d_t *)pgd_page_vaddr(*pgd); 739 p4d = (p4d_t *)pgd_page_vaddr(*pgd);
710 paddr_last = phys_p4d_init(p4d, __pa(vaddr), 740 paddr_last = phys_p4d_init(p4d, __pa(vaddr),
711 __pa(vaddr_end), 741 __pa(vaddr_end),
712 page_size_mask); 742 page_size_mask,
743 init);
713 continue; 744 continue;
714 } 745 }
715 746
716 p4d = alloc_low_page(); 747 p4d = alloc_low_page();
717 paddr_last = phys_p4d_init(p4d, __pa(vaddr), __pa(vaddr_end), 748 paddr_last = phys_p4d_init(p4d, __pa(vaddr), __pa(vaddr_end),
718 page_size_mask); 749 page_size_mask, init);
719 750
720 spin_lock(&init_mm.page_table_lock); 751 spin_lock(&init_mm.page_table_lock);
721 if (pgtable_l5_enabled()) 752 if (pgtable_l5_enabled())
722 pgd_populate_safe(&init_mm, pgd, p4d); 753 pgd_populate_init(&init_mm, pgd, p4d, init);
723 else 754 else
724 p4d_populate_safe(&init_mm, p4d_offset(pgd, vaddr), (pud_t *) p4d); 755 p4d_populate_init(&init_mm, p4d_offset(pgd, vaddr),
756 (pud_t *) p4d, init);
757
725 spin_unlock(&init_mm.page_table_lock); 758 spin_unlock(&init_mm.page_table_lock);
726 pgd_changed = true; 759 pgd_changed = true;
727 } 760 }
@@ -732,6 +765,37 @@ kernel_physical_mapping_init(unsigned long paddr_start,
732 return paddr_last; 765 return paddr_last;
733} 766}
734 767
768
769/*
770 * Create page table mapping for the physical memory for specific physical
771 * addresses. Note that it can only be used to populate non-present entries.
772 * The virtual and physical addresses have to be aligned on PMD level
773 * down. It returns the last physical address mapped.
774 */
775unsigned long __meminit
776kernel_physical_mapping_init(unsigned long paddr_start,
777 unsigned long paddr_end,
778 unsigned long page_size_mask)
779{
780 return __kernel_physical_mapping_init(paddr_start, paddr_end,
781 page_size_mask, true);
782}
783
784/*
785 * This function is similar to kernel_physical_mapping_init() above with the
786 * exception that it uses set_{pud,pmd}() instead of the set_{pud,pte}_safe()
787 * when updating the mapping. The caller is responsible to flush the TLBs after
788 * the function returns.
789 */
790unsigned long __meminit
791kernel_physical_mapping_change(unsigned long paddr_start,
792 unsigned long paddr_end,
793 unsigned long page_size_mask)
794{
795 return __kernel_physical_mapping_init(paddr_start, paddr_end,
796 page_size_mask, false);
797}
798
735#ifndef CONFIG_NUMA 799#ifndef CONFIG_NUMA
736void __init initmem_init(void) 800void __init initmem_init(void)
737{ 801{
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 385afa2b9e17..51f50a7a07ef 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -301,9 +301,13 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr,
301 else 301 else
302 split_page_size_mask = 1 << PG_LEVEL_2M; 302 split_page_size_mask = 1 << PG_LEVEL_2M;
303 303
304 kernel_physical_mapping_init(__pa(vaddr & pmask), 304 /*
305 __pa((vaddr_end & pmask) + psize), 305 * kernel_physical_mapping_change() does not flush the TLBs, so
306 split_page_size_mask); 306 * a TLB flush is required after we exit from the for loop.
307 */
308 kernel_physical_mapping_change(__pa(vaddr & pmask),
309 __pa((vaddr_end & pmask) + psize),
310 split_page_size_mask);
307 } 311 }
308 312
309 ret = 0; 313 ret = 0;
diff --git a/arch/x86/mm/mm_internal.h b/arch/x86/mm/mm_internal.h
index 319bde386d5f..eeae142062ed 100644
--- a/arch/x86/mm/mm_internal.h
+++ b/arch/x86/mm/mm_internal.h
@@ -13,6 +13,9 @@ void early_ioremap_page_table_range_init(void);
13unsigned long kernel_physical_mapping_init(unsigned long start, 13unsigned long kernel_physical_mapping_init(unsigned long start,
14 unsigned long end, 14 unsigned long end,
15 unsigned long page_size_mask); 15 unsigned long page_size_mask);
16unsigned long kernel_physical_mapping_change(unsigned long start,
17 unsigned long end,
18 unsigned long page_size_mask);
16void zone_sizes_init(void); 19void zone_sizes_init(void);
17 20
18extern int after_bootmem; 21extern int after_bootmem;