aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKirill A. Shutemov <kirill.shutemov@linux.intel.com>2018-01-31 08:54:02 -0500
committerIngo Molnar <mingo@kernel.org>2018-02-13 09:59:48 -0500
commit1cd9c22fee3ac21db52a0997d08cf2f065d2c0c0 (patch)
tree9315ad91a89b13c5b182e25380496a94dbc7f4b9
parent515ab7c41306aad1f80a980e1936ef635c61570c (diff)
x86/mm/encrypt: Move page table helpers into separate translation unit
There are bunch of functions in mem_encrypt.c that operate on the identity mapping, which means they want virtual addresses to be equal to physical one, without PAGE_OFFSET shift. We also need to avoid paravirtualizaion call there. Getting this done is tricky. We cannot use usual page table helpers. It forces us to open-code a lot of things. It makes code ugly and hard to modify. We can get it work with the page table helpers, but it requires few preprocessor tricks. These tricks may have side effects for the rest of the file. Let's isolate such functions into own translation unit. Tested-by: Tom Lendacky <thomas.lendacky@amd.com> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180131135404.40692-2-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/include/asm/mem_encrypt.h1
-rw-r--r--arch/x86/mm/Makefile14
-rw-r--r--arch/x86/mm/mem_encrypt.c578
-rw-r--r--arch/x86/mm/mem_encrypt_identity.c597
4 files changed, 608 insertions, 582 deletions
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index 22c5f3e6f820..8fe61ad21047 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -22,6 +22,7 @@
22#ifdef CONFIG_AMD_MEM_ENCRYPT 22#ifdef CONFIG_AMD_MEM_ENCRYPT
23 23
24extern u64 sme_me_mask; 24extern u64 sme_me_mask;
25extern bool sev_enabled;
25 26
26void sme_encrypt_execute(unsigned long encrypted_kernel_vaddr, 27void sme_encrypt_execute(unsigned long encrypted_kernel_vaddr,
27 unsigned long decrypted_kernel_vaddr, 28 unsigned long decrypted_kernel_vaddr,
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 27e9e90a8d35..03c6c8561623 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,12 +1,15 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2# Kernel does not boot with instrumentation of tlb.c and mem_encrypt.c 2# Kernel does not boot with instrumentation of tlb.c and mem_encrypt*.c
3KCOV_INSTRUMENT_tlb.o := n 3KCOV_INSTRUMENT_tlb.o := n
4KCOV_INSTRUMENT_mem_encrypt.o := n 4KCOV_INSTRUMENT_mem_encrypt.o := n
5KCOV_INSTRUMENT_mem_encrypt_identity.o := n
5 6
6KASAN_SANITIZE_mem_encrypt.o := n 7KASAN_SANITIZE_mem_encrypt.o := n
8KASAN_SANITIZE_mem_encrypt_identity.o := n
7 9
8ifdef CONFIG_FUNCTION_TRACER 10ifdef CONFIG_FUNCTION_TRACER
9CFLAGS_REMOVE_mem_encrypt.o = -pg 11CFLAGS_REMOVE_mem_encrypt.o = -pg
12CFLAGS_REMOVE_mem_encrypt_identity.o = -pg
10endif 13endif
11 14
12obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ 15obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
@@ -47,4 +50,5 @@ obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
47obj-$(CONFIG_PAGE_TABLE_ISOLATION) += pti.o 50obj-$(CONFIG_PAGE_TABLE_ISOLATION) += pti.o
48 51
49obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt.o 52obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt.o
53obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_identity.o
50obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o 54obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 1a53071e2e17..3a1b5fe4c2ca 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -25,17 +25,12 @@
25#include <asm/bootparam.h> 25#include <asm/bootparam.h>
26#include <asm/set_memory.h> 26#include <asm/set_memory.h>
27#include <asm/cacheflush.h> 27#include <asm/cacheflush.h>
28#include <asm/sections.h>
29#include <asm/processor-flags.h> 28#include <asm/processor-flags.h>
30#include <asm/msr.h> 29#include <asm/msr.h>
31#include <asm/cmdline.h> 30#include <asm/cmdline.h>
32 31
33#include "mm_internal.h" 32#include "mm_internal.h"
34 33
35static char sme_cmdline_arg[] __initdata = "mem_encrypt";
36static char sme_cmdline_on[] __initdata = "on";
37static char sme_cmdline_off[] __initdata = "off";
38
39/* 34/*
40 * Since SME related variables are set early in the boot process they must 35 * Since SME related variables are set early in the boot process they must
41 * reside in the .data section so as not to be zeroed out when the .bss 36 * reside in the .data section so as not to be zeroed out when the .bss
@@ -46,7 +41,7 @@ EXPORT_SYMBOL(sme_me_mask);
46DEFINE_STATIC_KEY_FALSE(sev_enable_key); 41DEFINE_STATIC_KEY_FALSE(sev_enable_key);
47EXPORT_SYMBOL_GPL(sev_enable_key); 42EXPORT_SYMBOL_GPL(sev_enable_key);
48 43
49static bool sev_enabled __section(.data); 44bool sev_enabled __section(.data);
50 45
51/* Buffer used for early in-place encryption by BSP, no locking needed */ 46/* Buffer used for early in-place encryption by BSP, no locking needed */
52static char sme_early_buffer[PAGE_SIZE] __aligned(PAGE_SIZE); 47static char sme_early_buffer[PAGE_SIZE] __aligned(PAGE_SIZE);
@@ -463,574 +458,3 @@ void swiotlb_set_mem_attributes(void *vaddr, unsigned long size)
463 /* Make the SWIOTLB buffer area decrypted */ 458 /* Make the SWIOTLB buffer area decrypted */
464 set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT); 459 set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT);
465} 460}
466
467struct sme_populate_pgd_data {
468 void *pgtable_area;
469 pgd_t *pgd;
470
471 pmdval_t pmd_flags;
472 pteval_t pte_flags;
473 unsigned long paddr;
474
475 unsigned long vaddr;
476 unsigned long vaddr_end;
477};
478
479static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
480{
481 unsigned long pgd_start, pgd_end, pgd_size;
482 pgd_t *pgd_p;
483
484 pgd_start = ppd->vaddr & PGDIR_MASK;
485 pgd_end = ppd->vaddr_end & PGDIR_MASK;
486
487 pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1) * sizeof(pgd_t);
488
489 pgd_p = ppd->pgd + pgd_index(ppd->vaddr);
490
491 memset(pgd_p, 0, pgd_size);
492}
493
494#define PGD_FLAGS _KERNPG_TABLE_NOENC
495#define P4D_FLAGS _KERNPG_TABLE_NOENC
496#define PUD_FLAGS _KERNPG_TABLE_NOENC
497#define PMD_FLAGS _KERNPG_TABLE_NOENC
498
499#define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
500
501#define PMD_FLAGS_DEC PMD_FLAGS_LARGE
502#define PMD_FLAGS_DEC_WP ((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
503 (_PAGE_PAT | _PAGE_PWT))
504
505#define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC)
506
507#define PTE_FLAGS (__PAGE_KERNEL_EXEC & ~_PAGE_GLOBAL)
508
509#define PTE_FLAGS_DEC PTE_FLAGS
510#define PTE_FLAGS_DEC_WP ((PTE_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
511 (_PAGE_PAT | _PAGE_PWT))
512
513#define PTE_FLAGS_ENC (PTE_FLAGS | _PAGE_ENC)
514
515static pmd_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
516{
517 pgd_t *pgd_p;
518 p4d_t *p4d_p;
519 pud_t *pud_p;
520 pmd_t *pmd_p;
521
522 pgd_p = ppd->pgd + pgd_index(ppd->vaddr);
523 if (native_pgd_val(*pgd_p)) {
524 if (IS_ENABLED(CONFIG_X86_5LEVEL))
525 p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK);
526 else
527 pud_p = (pud_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK);
528 } else {
529 pgd_t pgd;
530
531 if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
532 p4d_p = ppd->pgtable_area;
533 memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D);
534 ppd->pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D;
535
536 pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS);
537 } else {
538 pud_p = ppd->pgtable_area;
539 memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
540 ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
541
542 pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS);
543 }
544 native_set_pgd(pgd_p, pgd);
545 }
546
547 if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
548 p4d_p += p4d_index(ppd->vaddr);
549 if (native_p4d_val(*p4d_p)) {
550 pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK);
551 } else {
552 p4d_t p4d;
553
554 pud_p = ppd->pgtable_area;
555 memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
556 ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
557
558 p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS);
559 native_set_p4d(p4d_p, p4d);
560 }
561 }
562
563 pud_p += pud_index(ppd->vaddr);
564 if (native_pud_val(*pud_p)) {
565 if (native_pud_val(*pud_p) & _PAGE_PSE)
566 return NULL;
567
568 pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK);
569 } else {
570 pud_t pud;
571
572 pmd_p = ppd->pgtable_area;
573 memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD);
574 ppd->pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD;
575
576 pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS);
577 native_set_pud(pud_p, pud);
578 }
579
580 return pmd_p;
581}
582
583static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
584{
585 pmd_t *pmd_p;
586
587 pmd_p = sme_prepare_pgd(ppd);
588 if (!pmd_p)
589 return;
590
591 pmd_p += pmd_index(ppd->vaddr);
592 if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE))
593 native_set_pmd(pmd_p, native_make_pmd(ppd->paddr | ppd->pmd_flags));
594}
595
596static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
597{
598 pmd_t *pmd_p;
599 pte_t *pte_p;
600
601 pmd_p = sme_prepare_pgd(ppd);
602 if (!pmd_p)
603 return;
604
605 pmd_p += pmd_index(ppd->vaddr);
606 if (native_pmd_val(*pmd_p)) {
607 if (native_pmd_val(*pmd_p) & _PAGE_PSE)
608 return;
609
610 pte_p = (pte_t *)(native_pmd_val(*pmd_p) & ~PTE_FLAGS_MASK);
611 } else {
612 pmd_t pmd;
613
614 pte_p = ppd->pgtable_area;
615 memset(pte_p, 0, sizeof(*pte_p) * PTRS_PER_PTE);
616 ppd->pgtable_area += sizeof(*pte_p) * PTRS_PER_PTE;
617
618 pmd = native_make_pmd((pteval_t)pte_p + PMD_FLAGS);
619 native_set_pmd(pmd_p, pmd);
620 }
621
622 pte_p += pte_index(ppd->vaddr);
623 if (!native_pte_val(*pte_p))
624 native_set_pte(pte_p, native_make_pte(ppd->paddr | ppd->pte_flags));
625}
626
627static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
628{
629 while (ppd->vaddr < ppd->vaddr_end) {
630 sme_populate_pgd_large(ppd);
631
632 ppd->vaddr += PMD_PAGE_SIZE;
633 ppd->paddr += PMD_PAGE_SIZE;
634 }
635}
636
637static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
638{
639 while (ppd->vaddr < ppd->vaddr_end) {
640 sme_populate_pgd(ppd);
641
642 ppd->vaddr += PAGE_SIZE;
643 ppd->paddr += PAGE_SIZE;
644 }
645}
646
647static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
648 pmdval_t pmd_flags, pteval_t pte_flags)
649{
650 unsigned long vaddr_end;
651
652 ppd->pmd_flags = pmd_flags;
653 ppd->pte_flags = pte_flags;
654
655 /* Save original end value since we modify the struct value */
656 vaddr_end = ppd->vaddr_end;
657
658 /* If start is not 2MB aligned, create PTE entries */
659 ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE);
660 __sme_map_range_pte(ppd);
661
662 /* Create PMD entries */
663 ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK;
664 __sme_map_range_pmd(ppd);
665
666 /* If end is not 2MB aligned, create PTE entries */
667 ppd->vaddr_end = vaddr_end;
668 __sme_map_range_pte(ppd);
669}
670
671static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd)
672{
673 __sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC);
674}
675
676static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
677{
678 __sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC);
679}
680
681static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd)
682{
683 __sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP);
684}
685
686static unsigned long __init sme_pgtable_calc(unsigned long len)
687{
688 unsigned long p4d_size, pud_size, pmd_size, pte_size;
689 unsigned long total;
690
691 /*
692 * Perform a relatively simplistic calculation of the pagetable
693 * entries that are needed. Those mappings will be covered mostly
694 * by 2MB PMD entries so we can conservatively calculate the required
695 * number of P4D, PUD and PMD structures needed to perform the
696 * mappings. For mappings that are not 2MB aligned, PTE mappings
697 * would be needed for the start and end portion of the address range
698 * that fall outside of the 2MB alignment. This results in, at most,
699 * two extra pages to hold PTE entries for each range that is mapped.
700 * Incrementing the count for each covers the case where the addresses
701 * cross entries.
702 */
703 if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
704 p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1;
705 p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D;
706 pud_size = (ALIGN(len, P4D_SIZE) / P4D_SIZE) + 1;
707 pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
708 } else {
709 p4d_size = 0;
710 pud_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1;
711 pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
712 }
713 pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1;
714 pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD;
715 pte_size = 2 * sizeof(pte_t) * PTRS_PER_PTE;
716
717 total = p4d_size + pud_size + pmd_size + pte_size;
718
719 /*
720 * Now calculate the added pagetable structures needed to populate
721 * the new pagetables.
722 */
723 if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
724 p4d_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE;
725 p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D;
726 pud_size = ALIGN(total, P4D_SIZE) / P4D_SIZE;
727 pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
728 } else {
729 p4d_size = 0;
730 pud_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE;
731 pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
732 }
733 pmd_size = ALIGN(total, PUD_SIZE) / PUD_SIZE;
734 pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD;
735
736 total += p4d_size + pud_size + pmd_size;
737
738 return total;
739}
740
741void __init __nostackprotector sme_encrypt_kernel(struct boot_params *bp)
742{
743 unsigned long workarea_start, workarea_end, workarea_len;
744 unsigned long execute_start, execute_end, execute_len;
745 unsigned long kernel_start, kernel_end, kernel_len;
746 unsigned long initrd_start, initrd_end, initrd_len;
747 struct sme_populate_pgd_data ppd;
748 unsigned long pgtable_area_len;
749 unsigned long decrypted_base;
750
751 if (!sme_active())
752 return;
753
754 /*
755 * Prepare for encrypting the kernel and initrd by building new
756 * pagetables with the necessary attributes needed to encrypt the
757 * kernel in place.
758 *
759 * One range of virtual addresses will map the memory occupied
760 * by the kernel and initrd as encrypted.
761 *
762 * Another range of virtual addresses will map the memory occupied
763 * by the kernel and initrd as decrypted and write-protected.
764 *
765 * The use of write-protect attribute will prevent any of the
766 * memory from being cached.
767 */
768
769 /* Physical addresses gives us the identity mapped virtual addresses */
770 kernel_start = __pa_symbol(_text);
771 kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE);
772 kernel_len = kernel_end - kernel_start;
773
774 initrd_start = 0;
775 initrd_end = 0;
776 initrd_len = 0;
777#ifdef CONFIG_BLK_DEV_INITRD
778 initrd_len = (unsigned long)bp->hdr.ramdisk_size |
779 ((unsigned long)bp->ext_ramdisk_size << 32);
780 if (initrd_len) {
781 initrd_start = (unsigned long)bp->hdr.ramdisk_image |
782 ((unsigned long)bp->ext_ramdisk_image << 32);
783 initrd_end = PAGE_ALIGN(initrd_start + initrd_len);
784 initrd_len = initrd_end - initrd_start;
785 }
786#endif
787
788 /* Set the encryption workarea to be immediately after the kernel */
789 workarea_start = kernel_end;
790
791 /*
792 * Calculate required number of workarea bytes needed:
793 * executable encryption area size:
794 * stack page (PAGE_SIZE)
795 * encryption routine page (PAGE_SIZE)
796 * intermediate copy buffer (PMD_PAGE_SIZE)
797 * pagetable structures for the encryption of the kernel
798 * pagetable structures for workarea (in case not currently mapped)
799 */
800 execute_start = workarea_start;
801 execute_end = execute_start + (PAGE_SIZE * 2) + PMD_PAGE_SIZE;
802 execute_len = execute_end - execute_start;
803
804 /*
805 * One PGD for both encrypted and decrypted mappings and a set of
806 * PUDs and PMDs for each of the encrypted and decrypted mappings.
807 */
808 pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD;
809 pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2;
810 if (initrd_len)
811 pgtable_area_len += sme_pgtable_calc(initrd_len) * 2;
812
813 /* PUDs and PMDs needed in the current pagetables for the workarea */
814 pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len);
815
816 /*
817 * The total workarea includes the executable encryption area and
818 * the pagetable area. The start of the workarea is already 2MB
819 * aligned, align the end of the workarea on a 2MB boundary so that
820 * we don't try to create/allocate PTE entries from the workarea
821 * before it is mapped.
822 */
823 workarea_len = execute_len + pgtable_area_len;
824 workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE);
825
826 /*
827 * Set the address to the start of where newly created pagetable
828 * structures (PGDs, PUDs and PMDs) will be allocated. New pagetable
829 * structures are created when the workarea is added to the current
830 * pagetables and when the new encrypted and decrypted kernel
831 * mappings are populated.
832 */
833 ppd.pgtable_area = (void *)execute_end;
834
835 /*
836 * Make sure the current pagetable structure has entries for
837 * addressing the workarea.
838 */
839 ppd.pgd = (pgd_t *)native_read_cr3_pa();
840 ppd.paddr = workarea_start;
841 ppd.vaddr = workarea_start;
842 ppd.vaddr_end = workarea_end;
843 sme_map_range_decrypted(&ppd);
844
845 /* Flush the TLB - no globals so cr3 is enough */
846 native_write_cr3(__native_read_cr3());
847
848 /*
849 * A new pagetable structure is being built to allow for the kernel
850 * and initrd to be encrypted. It starts with an empty PGD that will
851 * then be populated with new PUDs and PMDs as the encrypted and
852 * decrypted kernel mappings are created.
853 */
854 ppd.pgd = ppd.pgtable_area;
855 memset(ppd.pgd, 0, sizeof(pgd_t) * PTRS_PER_PGD);
856 ppd.pgtable_area += sizeof(pgd_t) * PTRS_PER_PGD;
857
858 /*
859 * A different PGD index/entry must be used to get different
860 * pagetable entries for the decrypted mapping. Choose the next
861 * PGD index and convert it to a virtual address to be used as
862 * the base of the mapping.
863 */
864 decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1);
865 if (initrd_len) {
866 unsigned long check_base;
867
868 check_base = (pgd_index(initrd_end) + 1) & (PTRS_PER_PGD - 1);
869 decrypted_base = max(decrypted_base, check_base);
870 }
871 decrypted_base <<= PGDIR_SHIFT;
872
873 /* Add encrypted kernel (identity) mappings */
874 ppd.paddr = kernel_start;
875 ppd.vaddr = kernel_start;
876 ppd.vaddr_end = kernel_end;
877 sme_map_range_encrypted(&ppd);
878
879 /* Add decrypted, write-protected kernel (non-identity) mappings */
880 ppd.paddr = kernel_start;
881 ppd.vaddr = kernel_start + decrypted_base;
882 ppd.vaddr_end = kernel_end + decrypted_base;
883 sme_map_range_decrypted_wp(&ppd);
884
885 if (initrd_len) {
886 /* Add encrypted initrd (identity) mappings */
887 ppd.paddr = initrd_start;
888 ppd.vaddr = initrd_start;
889 ppd.vaddr_end = initrd_end;
890 sme_map_range_encrypted(&ppd);
891 /*
892 * Add decrypted, write-protected initrd (non-identity) mappings
893 */
894 ppd.paddr = initrd_start;
895 ppd.vaddr = initrd_start + decrypted_base;
896 ppd.vaddr_end = initrd_end + decrypted_base;
897 sme_map_range_decrypted_wp(&ppd);
898 }
899
900 /* Add decrypted workarea mappings to both kernel mappings */
901 ppd.paddr = workarea_start;
902 ppd.vaddr = workarea_start;
903 ppd.vaddr_end = workarea_end;
904 sme_map_range_decrypted(&ppd);
905
906 ppd.paddr = workarea_start;
907 ppd.vaddr = workarea_start + decrypted_base;
908 ppd.vaddr_end = workarea_end + decrypted_base;
909 sme_map_range_decrypted(&ppd);
910
911 /* Perform the encryption */
912 sme_encrypt_execute(kernel_start, kernel_start + decrypted_base,
913 kernel_len, workarea_start, (unsigned long)ppd.pgd);
914
915 if (initrd_len)
916 sme_encrypt_execute(initrd_start, initrd_start + decrypted_base,
917 initrd_len, workarea_start,
918 (unsigned long)ppd.pgd);
919
920 /*
921 * At this point we are running encrypted. Remove the mappings for
922 * the decrypted areas - all that is needed for this is to remove
923 * the PGD entry/entries.
924 */
925 ppd.vaddr = kernel_start + decrypted_base;
926 ppd.vaddr_end = kernel_end + decrypted_base;
927 sme_clear_pgd(&ppd);
928
929 if (initrd_len) {
930 ppd.vaddr = initrd_start + decrypted_base;
931 ppd.vaddr_end = initrd_end + decrypted_base;
932 sme_clear_pgd(&ppd);
933 }
934
935 ppd.vaddr = workarea_start + decrypted_base;
936 ppd.vaddr_end = workarea_end + decrypted_base;
937 sme_clear_pgd(&ppd);
938
939 /* Flush the TLB - no globals so cr3 is enough */
940 native_write_cr3(__native_read_cr3());
941}
942
943void __init __nostackprotector sme_enable(struct boot_params *bp)
944{
945 const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off;
946 unsigned int eax, ebx, ecx, edx;
947 unsigned long feature_mask;
948 bool active_by_default;
949 unsigned long me_mask;
950 char buffer[16];
951 u64 msr;
952
953 /* Check for the SME/SEV support leaf */
954 eax = 0x80000000;
955 ecx = 0;
956 native_cpuid(&eax, &ebx, &ecx, &edx);
957 if (eax < 0x8000001f)
958 return;
959
960#define AMD_SME_BIT BIT(0)
961#define AMD_SEV_BIT BIT(1)
962 /*
963 * Set the feature mask (SME or SEV) based on whether we are
964 * running under a hypervisor.
965 */
966 eax = 1;
967 ecx = 0;
968 native_cpuid(&eax, &ebx, &ecx, &edx);
969 feature_mask = (ecx & BIT(31)) ? AMD_SEV_BIT : AMD_SME_BIT;
970
971 /*
972 * Check for the SME/SEV feature:
973 * CPUID Fn8000_001F[EAX]
974 * - Bit 0 - Secure Memory Encryption support
975 * - Bit 1 - Secure Encrypted Virtualization support
976 * CPUID Fn8000_001F[EBX]
977 * - Bits 5:0 - Pagetable bit position used to indicate encryption
978 */
979 eax = 0x8000001f;
980 ecx = 0;
981 native_cpuid(&eax, &ebx, &ecx, &edx);
982 if (!(eax & feature_mask))
983 return;
984
985 me_mask = 1UL << (ebx & 0x3f);
986
987 /* Check if memory encryption is enabled */
988 if (feature_mask == AMD_SME_BIT) {
989 /* For SME, check the SYSCFG MSR */
990 msr = __rdmsr(MSR_K8_SYSCFG);
991 if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
992 return;
993 } else {
994 /* For SEV, check the SEV MSR */
995 msr = __rdmsr(MSR_AMD64_SEV);
996 if (!(msr & MSR_AMD64_SEV_ENABLED))
997 return;
998
999 /* SEV state cannot be controlled by a command line option */
1000 sme_me_mask = me_mask;
1001 sev_enabled = true;
1002 return;
1003 }
1004
1005 /*
1006 * Fixups have not been applied to phys_base yet and we're running
1007 * identity mapped, so we must obtain the address to the SME command
1008 * line argument data using rip-relative addressing.
1009 */
1010 asm ("lea sme_cmdline_arg(%%rip), %0"
1011 : "=r" (cmdline_arg)
1012 : "p" (sme_cmdline_arg));
1013 asm ("lea sme_cmdline_on(%%rip), %0"
1014 : "=r" (cmdline_on)
1015 : "p" (sme_cmdline_on));
1016 asm ("lea sme_cmdline_off(%%rip), %0"
1017 : "=r" (cmdline_off)
1018 : "p" (sme_cmdline_off));
1019
1020 if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT))
1021 active_by_default = true;
1022 else
1023 active_by_default = false;
1024
1025 cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr |
1026 ((u64)bp->ext_cmd_line_ptr << 32));
1027
1028 cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer));
1029
1030 if (!strncmp(buffer, cmdline_on, sizeof(buffer)))
1031 sme_me_mask = me_mask;
1032 else if (!strncmp(buffer, cmdline_off, sizeof(buffer)))
1033 sme_me_mask = 0;
1034 else
1035 sme_me_mask = active_by_default ? me_mask : 0;
1036}
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
new file mode 100644
index 000000000000..a28978a37bfa
--- /dev/null
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -0,0 +1,597 @@
1/*
2 * AMD Memory Encryption Support
3 *
4 * Copyright (C) 2016 Advanced Micro Devices, Inc.
5 *
6 * Author: Tom Lendacky <thomas.lendacky@amd.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#define DISABLE_BRANCH_PROFILING
14
15#include <linux/mm.h>
16#include <linux/mem_encrypt.h>
17
18#include <asm/setup.h>
19#include <asm/sections.h>
20#include <asm/cmdline.h>
21
22#include "mm_internal.h"
23
24#define PGD_FLAGS _KERNPG_TABLE_NOENC
25#define P4D_FLAGS _KERNPG_TABLE_NOENC
26#define PUD_FLAGS _KERNPG_TABLE_NOENC
27#define PMD_FLAGS _KERNPG_TABLE_NOENC
28
29#define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
30
31#define PMD_FLAGS_DEC PMD_FLAGS_LARGE
32#define PMD_FLAGS_DEC_WP ((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
33 (_PAGE_PAT | _PAGE_PWT))
34
35#define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC)
36
37#define PTE_FLAGS (__PAGE_KERNEL_EXEC & ~_PAGE_GLOBAL)
38
39#define PTE_FLAGS_DEC PTE_FLAGS
40#define PTE_FLAGS_DEC_WP ((PTE_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
41 (_PAGE_PAT | _PAGE_PWT))
42
43#define PTE_FLAGS_ENC (PTE_FLAGS | _PAGE_ENC)
44
45struct sme_populate_pgd_data {
46 void *pgtable_area;
47 pgd_t *pgd;
48
49 pmdval_t pmd_flags;
50 pteval_t pte_flags;
51 unsigned long paddr;
52
53 unsigned long vaddr;
54 unsigned long vaddr_end;
55};
56
57static char sme_cmdline_arg[] __initdata = "mem_encrypt";
58static char sme_cmdline_on[] __initdata = "on";
59static char sme_cmdline_off[] __initdata = "off";
60
61static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
62{
63 unsigned long pgd_start, pgd_end, pgd_size;
64 pgd_t *pgd_p;
65
66 pgd_start = ppd->vaddr & PGDIR_MASK;
67 pgd_end = ppd->vaddr_end & PGDIR_MASK;
68
69 pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1) * sizeof(pgd_t);
70
71 pgd_p = ppd->pgd + pgd_index(ppd->vaddr);
72
73 memset(pgd_p, 0, pgd_size);
74}
75
76static pmd_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
77{
78 pgd_t *pgd_p;
79 p4d_t *p4d_p;
80 pud_t *pud_p;
81 pmd_t *pmd_p;
82
83 pgd_p = ppd->pgd + pgd_index(ppd->vaddr);
84 if (native_pgd_val(*pgd_p)) {
85 if (IS_ENABLED(CONFIG_X86_5LEVEL))
86 p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK);
87 else
88 pud_p = (pud_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK);
89 } else {
90 pgd_t pgd;
91
92 if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
93 p4d_p = ppd->pgtable_area;
94 memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D);
95 ppd->pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D;
96
97 pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS);
98 } else {
99 pud_p = ppd->pgtable_area;
100 memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
101 ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
102
103 pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS);
104 }
105 native_set_pgd(pgd_p, pgd);
106 }
107
108 if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
109 p4d_p += p4d_index(ppd->vaddr);
110 if (native_p4d_val(*p4d_p)) {
111 pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK);
112 } else {
113 p4d_t p4d;
114
115 pud_p = ppd->pgtable_area;
116 memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
117 ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
118
119 p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS);
120 native_set_p4d(p4d_p, p4d);
121 }
122 }
123
124 pud_p += pud_index(ppd->vaddr);
125 if (native_pud_val(*pud_p)) {
126 if (native_pud_val(*pud_p) & _PAGE_PSE)
127 return NULL;
128
129 pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK);
130 } else {
131 pud_t pud;
132
133 pmd_p = ppd->pgtable_area;
134 memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD);
135 ppd->pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD;
136
137 pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS);
138 native_set_pud(pud_p, pud);
139 }
140
141 return pmd_p;
142}
143
144static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
145{
146 pmd_t *pmd_p;
147
148 pmd_p = sme_prepare_pgd(ppd);
149 if (!pmd_p)
150 return;
151
152 pmd_p += pmd_index(ppd->vaddr);
153 if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE))
154 native_set_pmd(pmd_p, native_make_pmd(ppd->paddr | ppd->pmd_flags));
155}
156
157static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
158{
159 pmd_t *pmd_p;
160 pte_t *pte_p;
161
162 pmd_p = sme_prepare_pgd(ppd);
163 if (!pmd_p)
164 return;
165
166 pmd_p += pmd_index(ppd->vaddr);
167 if (native_pmd_val(*pmd_p)) {
168 if (native_pmd_val(*pmd_p) & _PAGE_PSE)
169 return;
170
171 pte_p = (pte_t *)(native_pmd_val(*pmd_p) & ~PTE_FLAGS_MASK);
172 } else {
173 pmd_t pmd;
174
175 pte_p = ppd->pgtable_area;
176 memset(pte_p, 0, sizeof(*pte_p) * PTRS_PER_PTE);
177 ppd->pgtable_area += sizeof(*pte_p) * PTRS_PER_PTE;
178
179 pmd = native_make_pmd((pteval_t)pte_p + PMD_FLAGS);
180 native_set_pmd(pmd_p, pmd);
181 }
182
183 pte_p += pte_index(ppd->vaddr);
184 if (!native_pte_val(*pte_p))
185 native_set_pte(pte_p, native_make_pte(ppd->paddr | ppd->pte_flags));
186}
187
188static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
189{
190 while (ppd->vaddr < ppd->vaddr_end) {
191 sme_populate_pgd_large(ppd);
192
193 ppd->vaddr += PMD_PAGE_SIZE;
194 ppd->paddr += PMD_PAGE_SIZE;
195 }
196}
197
198static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
199{
200 while (ppd->vaddr < ppd->vaddr_end) {
201 sme_populate_pgd(ppd);
202
203 ppd->vaddr += PAGE_SIZE;
204 ppd->paddr += PAGE_SIZE;
205 }
206}
207
208static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
209 pmdval_t pmd_flags, pteval_t pte_flags)
210{
211 unsigned long vaddr_end;
212
213 ppd->pmd_flags = pmd_flags;
214 ppd->pte_flags = pte_flags;
215
216 /* Save original end value since we modify the struct value */
217 vaddr_end = ppd->vaddr_end;
218
219 /* If start is not 2MB aligned, create PTE entries */
220 ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE);
221 __sme_map_range_pte(ppd);
222
223 /* Create PMD entries */
224 ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK;
225 __sme_map_range_pmd(ppd);
226
227 /* If end is not 2MB aligned, create PTE entries */
228 ppd->vaddr_end = vaddr_end;
229 __sme_map_range_pte(ppd);
230}
231
232static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd)
233{
234 __sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC);
235}
236
237static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
238{
239 __sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC);
240}
241
242static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd)
243{
244 __sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP);
245}
246
247static unsigned long __init sme_pgtable_calc(unsigned long len)
248{
249 unsigned long p4d_size, pud_size, pmd_size, pte_size;
250 unsigned long total;
251
252 /*
253 * Perform a relatively simplistic calculation of the pagetable
254 * entries that are needed. Those mappings will be covered mostly
255 * by 2MB PMD entries so we can conservatively calculate the required
256 * number of P4D, PUD and PMD structures needed to perform the
257 * mappings. For mappings that are not 2MB aligned, PTE mappings
258 * would be needed for the start and end portion of the address range
259 * that fall outside of the 2MB alignment. This results in, at most,
260 * two extra pages to hold PTE entries for each range that is mapped.
261 * Incrementing the count for each covers the case where the addresses
262 * cross entries.
263 */
264 if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
265 p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1;
266 p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D;
267 pud_size = (ALIGN(len, P4D_SIZE) / P4D_SIZE) + 1;
268 pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
269 } else {
270 p4d_size = 0;
271 pud_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1;
272 pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
273 }
274 pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1;
275 pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD;
276 pte_size = 2 * sizeof(pte_t) * PTRS_PER_PTE;
277
278 total = p4d_size + pud_size + pmd_size + pte_size;
279
280 /*
281 * Now calculate the added pagetable structures needed to populate
282 * the new pagetables.
283 */
284 if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
285 p4d_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE;
286 p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D;
287 pud_size = ALIGN(total, P4D_SIZE) / P4D_SIZE;
288 pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
289 } else {
290 p4d_size = 0;
291 pud_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE;
292 pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
293 }
294 pmd_size = ALIGN(total, PUD_SIZE) / PUD_SIZE;
295 pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD;
296
297 total += p4d_size + pud_size + pmd_size;
298
299 return total;
300}
301
302void __init __nostackprotector sme_encrypt_kernel(struct boot_params *bp)
303{
304 unsigned long workarea_start, workarea_end, workarea_len;
305 unsigned long execute_start, execute_end, execute_len;
306 unsigned long kernel_start, kernel_end, kernel_len;
307 unsigned long initrd_start, initrd_end, initrd_len;
308 struct sme_populate_pgd_data ppd;
309 unsigned long pgtable_area_len;
310 unsigned long decrypted_base;
311
312 if (!sme_active())
313 return;
314
315 /*
316 * Prepare for encrypting the kernel and initrd by building new
317 * pagetables with the necessary attributes needed to encrypt the
318 * kernel in place.
319 *
320 * One range of virtual addresses will map the memory occupied
321 * by the kernel and initrd as encrypted.
322 *
323 * Another range of virtual addresses will map the memory occupied
324 * by the kernel and initrd as decrypted and write-protected.
325 *
326 * The use of write-protect attribute will prevent any of the
327 * memory from being cached.
328 */
329
330 /* Physical addresses gives us the identity mapped virtual addresses */
331 kernel_start = __pa_symbol(_text);
332 kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE);
333 kernel_len = kernel_end - kernel_start;
334
335 initrd_start = 0;
336 initrd_end = 0;
337 initrd_len = 0;
338#ifdef CONFIG_BLK_DEV_INITRD
339 initrd_len = (unsigned long)bp->hdr.ramdisk_size |
340 ((unsigned long)bp->ext_ramdisk_size << 32);
341 if (initrd_len) {
342 initrd_start = (unsigned long)bp->hdr.ramdisk_image |
343 ((unsigned long)bp->ext_ramdisk_image << 32);
344 initrd_end = PAGE_ALIGN(initrd_start + initrd_len);
345 initrd_len = initrd_end - initrd_start;
346 }
347#endif
348
349 /* Set the encryption workarea to be immediately after the kernel */
350 workarea_start = kernel_end;
351
352 /*
353 * Calculate required number of workarea bytes needed:
354 * executable encryption area size:
355 * stack page (PAGE_SIZE)
356 * encryption routine page (PAGE_SIZE)
357 * intermediate copy buffer (PMD_PAGE_SIZE)
358 * pagetable structures for the encryption of the kernel
359 * pagetable structures for workarea (in case not currently mapped)
360 */
361 execute_start = workarea_start;
362 execute_end = execute_start + (PAGE_SIZE * 2) + PMD_PAGE_SIZE;
363 execute_len = execute_end - execute_start;
364
365 /*
366 * One PGD for both encrypted and decrypted mappings and a set of
367 * PUDs and PMDs for each of the encrypted and decrypted mappings.
368 */
369 pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD;
370 pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2;
371 if (initrd_len)
372 pgtable_area_len += sme_pgtable_calc(initrd_len) * 2;
373
374 /* PUDs and PMDs needed in the current pagetables for the workarea */
375 pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len);
376
377 /*
378 * The total workarea includes the executable encryption area and
379 * the pagetable area. The start of the workarea is already 2MB
380 * aligned, align the end of the workarea on a 2MB boundary so that
381 * we don't try to create/allocate PTE entries from the workarea
382 * before it is mapped.
383 */
384 workarea_len = execute_len + pgtable_area_len;
385 workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE);
386
387 /*
388 * Set the address to the start of where newly created pagetable
389 * structures (PGDs, PUDs and PMDs) will be allocated. New pagetable
390 * structures are created when the workarea is added to the current
391 * pagetables and when the new encrypted and decrypted kernel
392 * mappings are populated.
393 */
394 ppd.pgtable_area = (void *)execute_end;
395
396 /*
397 * Make sure the current pagetable structure has entries for
398 * addressing the workarea.
399 */
400 ppd.pgd = (pgd_t *)native_read_cr3_pa();
401 ppd.paddr = workarea_start;
402 ppd.vaddr = workarea_start;
403 ppd.vaddr_end = workarea_end;
404 sme_map_range_decrypted(&ppd);
405
406 /* Flush the TLB - no globals so cr3 is enough */
407 native_write_cr3(__native_read_cr3());
408
409 /*
410 * A new pagetable structure is being built to allow for the kernel
411 * and initrd to be encrypted. It starts with an empty PGD that will
412 * then be populated with new PUDs and PMDs as the encrypted and
413 * decrypted kernel mappings are created.
414 */
415 ppd.pgd = ppd.pgtable_area;
416 memset(ppd.pgd, 0, sizeof(pgd_t) * PTRS_PER_PGD);
417 ppd.pgtable_area += sizeof(pgd_t) * PTRS_PER_PGD;
418
419 /*
420 * A different PGD index/entry must be used to get different
421 * pagetable entries for the decrypted mapping. Choose the next
422 * PGD index and convert it to a virtual address to be used as
423 * the base of the mapping.
424 */
425 decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1);
426 if (initrd_len) {
427 unsigned long check_base;
428
429 check_base = (pgd_index(initrd_end) + 1) & (PTRS_PER_PGD - 1);
430 decrypted_base = max(decrypted_base, check_base);
431 }
432 decrypted_base <<= PGDIR_SHIFT;
433
434 /* Add encrypted kernel (identity) mappings */
435 ppd.paddr = kernel_start;
436 ppd.vaddr = kernel_start;
437 ppd.vaddr_end = kernel_end;
438 sme_map_range_encrypted(&ppd);
439
440 /* Add decrypted, write-protected kernel (non-identity) mappings */
441 ppd.paddr = kernel_start;
442 ppd.vaddr = kernel_start + decrypted_base;
443 ppd.vaddr_end = kernel_end + decrypted_base;
444 sme_map_range_decrypted_wp(&ppd);
445
446 if (initrd_len) {
447 /* Add encrypted initrd (identity) mappings */
448 ppd.paddr = initrd_start;
449 ppd.vaddr = initrd_start;
450 ppd.vaddr_end = initrd_end;
451 sme_map_range_encrypted(&ppd);
452 /*
453 * Add decrypted, write-protected initrd (non-identity) mappings
454 */
455 ppd.paddr = initrd_start;
456 ppd.vaddr = initrd_start + decrypted_base;
457 ppd.vaddr_end = initrd_end + decrypted_base;
458 sme_map_range_decrypted_wp(&ppd);
459 }
460
461 /* Add decrypted workarea mappings to both kernel mappings */
462 ppd.paddr = workarea_start;
463 ppd.vaddr = workarea_start;
464 ppd.vaddr_end = workarea_end;
465 sme_map_range_decrypted(&ppd);
466
467 ppd.paddr = workarea_start;
468 ppd.vaddr = workarea_start + decrypted_base;
469 ppd.vaddr_end = workarea_end + decrypted_base;
470 sme_map_range_decrypted(&ppd);
471
472 /* Perform the encryption */
473 sme_encrypt_execute(kernel_start, kernel_start + decrypted_base,
474 kernel_len, workarea_start, (unsigned long)ppd.pgd);
475
476 if (initrd_len)
477 sme_encrypt_execute(initrd_start, initrd_start + decrypted_base,
478 initrd_len, workarea_start,
479 (unsigned long)ppd.pgd);
480
481 /*
482 * At this point we are running encrypted. Remove the mappings for
483 * the decrypted areas - all that is needed for this is to remove
484 * the PGD entry/entries.
485 */
486 ppd.vaddr = kernel_start + decrypted_base;
487 ppd.vaddr_end = kernel_end + decrypted_base;
488 sme_clear_pgd(&ppd);
489
490 if (initrd_len) {
491 ppd.vaddr = initrd_start + decrypted_base;
492 ppd.vaddr_end = initrd_end + decrypted_base;
493 sme_clear_pgd(&ppd);
494 }
495
496 ppd.vaddr = workarea_start + decrypted_base;
497 ppd.vaddr_end = workarea_end + decrypted_base;
498 sme_clear_pgd(&ppd);
499
500 /* Flush the TLB - no globals so cr3 is enough */
501 native_write_cr3(__native_read_cr3());
502}
503
504void __init __nostackprotector sme_enable(struct boot_params *bp)
505{
506 const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off;
507 unsigned int eax, ebx, ecx, edx;
508 unsigned long feature_mask;
509 bool active_by_default;
510 unsigned long me_mask;
511 char buffer[16];
512 u64 msr;
513
514 /* Check for the SME/SEV support leaf */
515 eax = 0x80000000;
516 ecx = 0;
517 native_cpuid(&eax, &ebx, &ecx, &edx);
518 if (eax < 0x8000001f)
519 return;
520
521#define AMD_SME_BIT BIT(0)
522#define AMD_SEV_BIT BIT(1)
523 /*
524 * Set the feature mask (SME or SEV) based on whether we are
525 * running under a hypervisor.
526 */
527 eax = 1;
528 ecx = 0;
529 native_cpuid(&eax, &ebx, &ecx, &edx);
530 feature_mask = (ecx & BIT(31)) ? AMD_SEV_BIT : AMD_SME_BIT;
531
532 /*
533 * Check for the SME/SEV feature:
534 * CPUID Fn8000_001F[EAX]
535 * - Bit 0 - Secure Memory Encryption support
536 * - Bit 1 - Secure Encrypted Virtualization support
537 * CPUID Fn8000_001F[EBX]
538 * - Bits 5:0 - Pagetable bit position used to indicate encryption
539 */
540 eax = 0x8000001f;
541 ecx = 0;
542 native_cpuid(&eax, &ebx, &ecx, &edx);
543 if (!(eax & feature_mask))
544 return;
545
546 me_mask = 1UL << (ebx & 0x3f);
547
548 /* Check if memory encryption is enabled */
549 if (feature_mask == AMD_SME_BIT) {
550 /* For SME, check the SYSCFG MSR */
551 msr = __rdmsr(MSR_K8_SYSCFG);
552 if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
553 return;
554 } else {
555 /* For SEV, check the SEV MSR */
556 msr = __rdmsr(MSR_AMD64_SEV);
557 if (!(msr & MSR_AMD64_SEV_ENABLED))
558 return;
559
560 /* SEV state cannot be controlled by a command line option */
561 sme_me_mask = me_mask;
562 sev_enabled = true;
563 return;
564 }
565
566 /*
567 * Fixups have not been applied to phys_base yet and we're running
568 * identity mapped, so we must obtain the address to the SME command
569 * line argument data using rip-relative addressing.
570 */
571 asm ("lea sme_cmdline_arg(%%rip), %0"
572 : "=r" (cmdline_arg)
573 : "p" (sme_cmdline_arg));
574 asm ("lea sme_cmdline_on(%%rip), %0"
575 : "=r" (cmdline_on)
576 : "p" (sme_cmdline_on));
577 asm ("lea sme_cmdline_off(%%rip), %0"
578 : "=r" (cmdline_off)
579 : "p" (sme_cmdline_off));
580
581 if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT))
582 active_by_default = true;
583 else
584 active_by_default = false;
585
586 cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr |
587 ((u64)bp->ext_cmd_line_ptr << 32));
588
589 cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer));
590
591 if (!strncmp(buffer, cmdline_on, sizeof(buffer)))
592 sme_me_mask = me_mask;
593 else if (!strncmp(buffer, cmdline_off, sizeof(buffer)))
594 sme_me_mask = 0;
595 else
596 sme_me_mask = active_by_default ? me_mask : 0;
597}