arm64: KVM: Implement 48 VA support for KVM EL2 and Stage-2

This patch adds the necessary support for all host kernel PGSIZE and VA_SPACE configuration options for both EL2 and the Stage-2 page tables. However, for 40bit and 42bit PARange systems, the architecture mandates that VTCR_EL2.SL0 is maximum 1, resulting in fewer levels of stage-2 pagge tables than levels of host kernel page tables. At the same time, systems with a PARange > 42bit, we limit the IPA range by always setting VTCR_EL2.T0SZ to 24. To solve the situation with different levels of page tables for Stage-2 translation than the host kernel page tables, we allocate a dummy PGD with pointers to our actual inital level Stage-2 page table, in order for us to reuse the kernel pgtable manipulation primitives. Reproducing all these in KVM does not look pretty and unnecessarily complicates the 32-bit side. Systems with a PARange < 40bits are not yet supported. [ I have reworked this patch from its original form submitted by Jungseok to take the architecture constraints into consideration. There were too many changes from the original patch for me to preserve the authorship. Thanks to Catalin Marinas for his help in figuring out a good solution to this challenge. I have also fixed various bugs and missing error code handling from the original patch. - Christoffer ] Reviewed-by: Catalin Marinas <catalin.marinas@arm.com> Acked-by: Marc Zyngier <marc.zyngier@arm.com> Signed-off-by: Jungseok Lee <jungseoklee85@gmail.com> Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
author: Christoffer Dall <christoffer.dall@linaro.org> 2014-10-10 06:14:28 -0400
committer: Christoffer Dall <christoffer.dall@linaro.org> 2014-10-14 08:48:19 -0400
commit: 38f791a4e499792eeb2a3c0572dd5133511c5bbb (patch)
tree: 782fd34d0980d623fb1cfe7e026915e25983f12f
parent: 8eef91239e57d2e932e7470879c9a504d5494ebb (diff)
4 files changed, 249 insertions, 40 deletions
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index eaa6deac97b2..acb0d5712716 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -37,6 +37,11 @@
 */
 #define TRAMPOLINE_VA           UL(CONFIG_VECTORS_BASE)
+/*
+ * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation levels.
+ */
+#define KVM_MMU_CACHE_MIN_PAGES 2
 #ifndef __ASSEMBLY__
 #include <asm/cacheflush.h>
@@ -83,6 +88,11 @@ static inline void kvm_clean_pgd(pgd_t *pgd)
        clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t));
 }
+static inline void kvm_clean_pmd(pmd_t *pmd)
+{
+        clean_dcache_area(pmd, PTRS_PER_PMD * sizeof(pmd_t));
+}
 static inline void kvm_clean_pmd_entry(pmd_t *pmd)
 {
        clean_pmd_entry(pmd);
@@ -123,10 +133,23 @@ static inline bool kvm_page_empty(void *ptr)
 }
-#define kvm_pte_table_empty(ptep) kvm_page_empty(ptep)
+#define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)
-#define kvm_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
+#define kvm_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp)
-#define kvm_pud_table_empty(pudp) (0)
+#define kvm_pud_table_empty(kvm, pudp) (0)
+#define KVM_PREALLOC_LEVEL      0
+static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd)
+{
+        return 0;
+}
+static inline void kvm_free_hwpgd(struct kvm *kvm) { }
+static inline void *kvm_get_hwpgd(struct kvm *kvm)
+{
+        return kvm->arch.pgd;
+}
 struct kvm;
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 45e5f67e63b1..9e193c8a959e 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -409,7 +409,7 @@ static void update_vttbr(struct kvm *kvm)
        kvm_next_vmid++;
        /* update vttbr to be used with the new vmid */
-        pgd_phys = virt_to_phys(kvm->arch.pgd);
+        pgd_phys = virt_to_phys(kvm_get_hwpgd(kvm));
        BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
        vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK;
        kvm->arch.vttbr = pgd_phys | vmid;
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 6038027ab1d6..ee142edcca84 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -42,7 +42,7 @@ static unsigned long hyp_idmap_start;
 static unsigned long hyp_idmap_end;
 static phys_addr_t hyp_idmap_vector;
-#define pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
+#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
 #define kvm_pmd_huge(_x)        (pmd_huge(_x) || pmd_trans_huge(_x))
@@ -134,7 +134,7 @@ static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
                }
        } while (pte++, addr += PAGE_SIZE, addr != end);
-        if (kvm_pte_table_empty(start_pte))
+        if (kvm_pte_table_empty(kvm, start_pte))
                clear_pmd_entry(kvm, pmd, start_addr);
 }
@@ -158,7 +158,7 @@ static void unmap_pmds(struct kvm *kvm, pud_t *pud,
                }
        } while (pmd++, addr = next, addr != end);
-        if (kvm_pmd_table_empty(start_pmd))
+        if (kvm_pmd_table_empty(kvm, start_pmd))
                clear_pud_entry(kvm, pud, start_addr);
 }
@@ -182,7 +182,7 @@ static void unmap_puds(struct kvm *kvm, pgd_t *pgd,
                }
        } while (pud++, addr = next, addr != end);
-        if (kvm_pud_table_empty(start_pud))
+        if (kvm_pud_table_empty(kvm, start_pud))
                clear_pgd_entry(kvm, pgd, start_addr);
 }
@@ -306,7 +306,7 @@ void free_boot_hyp_pgd(void)
        if (boot_hyp_pgd) {
                unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
                unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
-                free_pages((unsigned long)boot_hyp_pgd, pgd_order);
+                free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order);
                boot_hyp_pgd = NULL;
        }
@@ -343,7 +343,7 @@ void free_hyp_pgds(void)
                for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
                        unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
-                free_pages((unsigned long)hyp_pgd, pgd_order);
+                free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
                hyp_pgd = NULL;
        }
@@ -401,13 +401,46 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
        return 0;
 }
+static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start,
+                                   unsigned long end, unsigned long pfn,
+                                   pgprot_t prot)
+{
+        pud_t *pud;
+        pmd_t *pmd;
+        unsigned long addr, next;
+        int ret;
+        addr = start;
+        do {
+                pud = pud_offset(pgd, addr);
+                if (pud_none_or_clear_bad(pud)) {
+                        pmd = pmd_alloc_one(NULL, addr);
+                        if (!pmd) {
+                                kvm_err("Cannot allocate Hyp pmd\n");
+                                return -ENOMEM;
+                        }
+                        pud_populate(NULL, pud, pmd);
+                        get_page(virt_to_page(pud));
+                        kvm_flush_dcache_to_poc(pud, sizeof(*pud));
+                }
+                next = pud_addr_end(addr, end);
+                ret = create_hyp_pmd_mappings(pud, addr, next, pfn, prot);
+                if (ret)
+                        return ret;
+                pfn += (next - addr) >> PAGE_SHIFT;
+        } while (addr = next, addr != end);
+        return 0;
+}
 static int __create_hyp_mappings(pgd_t *pgdp,
                                 unsigned long start, unsigned long end,
                                 unsigned long pfn, pgprot_t prot)
 {
        pgd_t *pgd;
        pud_t *pud;
-        pmd_t *pmd;
        unsigned long addr, next;
        int err = 0;
@@ -416,22 +449,21 @@ static int __create_hyp_mappings(pgd_t *pgdp,
        end = PAGE_ALIGN(end);
        do {
                pgd = pgdp + pgd_index(addr);
-                pud = pud_offset(pgd, addr);
-                if (pud_none_or_clear_bad(pud)) {
+                if (pgd_none(*pgd)) {
-                        pmd = pmd_alloc_one(NULL, addr);
+                        pud = pud_alloc_one(NULL, addr);
-                        if (!pmd) {
+                        if (!pud) {
-                                kvm_err("Cannot allocate Hyp pmd\n");
+                                kvm_err("Cannot allocate Hyp pud\n");
                                err = -ENOMEM;
                                goto out;
                        }
-                        pud_populate(NULL, pud, pmd);
+                        pgd_populate(NULL, pgd, pud);
-                        get_page(virt_to_page(pud));
+                        get_page(virt_to_page(pgd));
-                        kvm_flush_dcache_to_poc(pud, sizeof(*pud));
+                        kvm_flush_dcache_to_poc(pgd, sizeof(*pgd));
                }
                next = pgd_addr_end(addr, end);
-                err = create_hyp_pmd_mappings(pud, addr, next, pfn, prot);
+                err = create_hyp_pud_mappings(pgd, addr, next, pfn, prot);
                if (err)
                        goto out;
                pfn += (next - addr) >> PAGE_SHIFT;
@@ -521,6 +553,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
 */
 int kvm_alloc_stage2_pgd(struct kvm *kvm)
 {
+        int ret;
        pgd_t *pgd;
        if (kvm->arch.pgd != NULL) {
@@ -528,14 +561,38 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
                return -EINVAL;
        }
-        pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, S2_PGD_ORDER);
+        if (KVM_PREALLOC_LEVEL > 0) {
+                /*
+                 * Allocate fake pgd for the page table manipulation macros to
+                 * work.  This is not used by the hardware and we have no
+                 * alignment requirement for this allocation.
+                 */
+                pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t),
+                                       GFP_KERNEL | __GFP_ZERO);
+        } else {
+                /*
+                 * Allocate actual first-level Stage-2 page table used by the
+                 * hardware for Stage-2 page table walks.
+                 */
+                pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, S2_PGD_ORDER);
+        }
        if (!pgd)
                return -ENOMEM;
+        ret = kvm_prealloc_hwpgd(kvm, pgd);
+        if (ret)
+                goto out_err;
        kvm_clean_pgd(pgd);
        kvm->arch.pgd = pgd;
        return 0;
+out_err:
+        if (KVM_PREALLOC_LEVEL > 0)
+                kfree(pgd);
+        else
+                free_pages((unsigned long)pgd, S2_PGD_ORDER);
+        return ret;
 }
 /**
@@ -571,19 +628,39 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
                return;
        unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
-        free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);
+        kvm_free_hwpgd(kvm);
+        if (KVM_PREALLOC_LEVEL > 0)
+                kfree(kvm->arch.pgd);
+        else
+                free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);
        kvm->arch.pgd = NULL;
 }
-static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
                             phys_addr_t addr)
 {
        pgd_t *pgd;
        pud_t *pud;
-        pmd_t *pmd;
        pgd = kvm->arch.pgd + pgd_index(addr);
-        pud = pud_offset(pgd, addr);
+        if (WARN_ON(pgd_none(*pgd))) {
+                if (!cache)
+                        return NULL;
+                pud = mmu_memory_cache_alloc(cache);
+                pgd_populate(NULL, pgd, pud);
+                get_page(virt_to_page(pgd));
+        }
+        return pud_offset(pgd, addr);
+}
+static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+                             phys_addr_t addr)
+{
+        pud_t *pud;
+        pmd_t *pmd;
+        pud = stage2_get_pud(kvm, cache, addr);
        if (pud_none(*pud)) {
                if (!cache)
                        return NULL;
@@ -629,7 +706,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
        pmd_t *pmd;
        pte_t *pte, old_pte;
-        /* Create stage-2 page table mapping - Level 1 */
+        /* Create stage-2 page table mapping - Levels 0 and 1 */
        pmd = stage2_get_pmd(kvm, cache, addr);
        if (!pmd) {
                /*
@@ -690,7 +767,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
                if (writable)
                        kvm_set_s2pte_writable(&pte);
-                ret = mmu_topup_memory_cache(&cache, 2, 2);
+                ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES,
+                                                KVM_NR_MEM_OBJS);
                if (ret)
                        goto out;
                spin_lock(&kvm->mmu_lock);
@@ -805,7 +883,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
        up_read(&current->mm->mmap_sem);
        /* We need minimum second+third level pages */
-        ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS);
+        ret = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES,
+                                     KVM_NR_MEM_OBJS);
        if (ret)
                return ret;
@@ -1080,8 +1159,8 @@ int kvm_mmu_init(void)
                         (unsigned long)phys_base);
        }
-        hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order);
+        hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
-        boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order);
+        boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
        if (!hyp_pgd || !boot_hyp_pgd) {
                kvm_err("Hyp mode PGD not allocated\n");
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index e36171974d6a..0caf7a59f6a1 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -41,6 +41,18 @@
 */
 #define TRAMPOLINE_VA           (HYP_PAGE_OFFSET_MASK & PAGE_MASK)
+/*
+ * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation
+ * levels in addition to the PGD and potentially the PUD which are
+ * pre-allocated (we pre-allocate the fake PGD and the PUD when the Stage-2
+ * tables use one level of tables less than the kernel.
+ */
+#ifdef CONFIG_ARM64_64K_PAGES
+#define KVM_MMU_CACHE_MIN_PAGES 1
+#else
+#define KVM_MMU_CACHE_MIN_PAGES 2
+#endif
 #ifdef __ASSEMBLY__
 /*
@@ -53,6 +65,7 @@
 #else
+#include <asm/pgalloc.h>
 #include <asm/cachetype.h>
 #include <asm/cacheflush.h>
@@ -65,10 +78,6 @@
 #define KVM_PHYS_SIZE   (1UL << KVM_PHYS_SHIFT)
 #define KVM_PHYS_MASK   (KVM_PHYS_SIZE - 1UL)
-/* Make sure we get the right size, and thus the right alignment */
-#define PTRS_PER_S2_PGD (1 << (KVM_PHYS_SHIFT - PGDIR_SHIFT))
-#define S2_PGD_ORDER    get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
 int create_hyp_mappings(void *from, void *to);
 int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
 void free_boot_hyp_pgd(void);
@@ -93,6 +102,7 @@ void kvm_clear_hyp_idmap(void);
 #define kvm_set_pmd(pmdp, pmd)          set_pmd(pmdp, pmd)
 static inline void kvm_clean_pgd(pgd_t *pgd) {}
+static inline void kvm_clean_pmd(pmd_t *pmd) {}
 static inline void kvm_clean_pmd_entry(pmd_t *pmd) {}
 static inline void kvm_clean_pte(pte_t *pte) {}
 static inline void kvm_clean_pte_entry(pte_t *pte) {}
@@ -111,19 +121,116 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
 #define kvm_pud_addr_end(addr, end)     pud_addr_end(addr, end)
 #define kvm_pmd_addr_end(addr, end)     pmd_addr_end(addr, end)
+/*
+ * In the case where PGDIR_SHIFT is larger than KVM_PHYS_SHIFT, we can address
+ * the entire IPA input range with a single pgd entry, and we would only need
+ * one pgd entry.  Note that in this case, the pgd is actually not used by
+ * the MMU for Stage-2 translations, but is merely a fake pgd used as a data
+ * structure for the kernel pgtable macros to work.
+ */
+#if PGDIR_SHIFT > KVM_PHYS_SHIFT
+#define PTRS_PER_S2_PGD_SHIFT   0
+#else
+#define PTRS_PER_S2_PGD_SHIFT   (KVM_PHYS_SHIFT - PGDIR_SHIFT)
+#endif
+#define PTRS_PER_S2_PGD         (1 << PTRS_PER_S2_PGD_SHIFT)
+#define S2_PGD_ORDER            get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
+/*
+ * If we are concatenating first level stage-2 page tables, we would have less
+ * than or equal to 16 pointers in the fake PGD, because that's what the
+ * architecture allows.  In this case, (4 - CONFIG_ARM64_PGTABLE_LEVELS)
+ * represents the first level for the host, and we add 1 to go to the next
+ * level (which uses contatenation) for the stage-2 tables.
+ */
+#if PTRS_PER_S2_PGD <= 16
+#define KVM_PREALLOC_LEVEL      (4 - CONFIG_ARM64_PGTABLE_LEVELS + 1)
+#else
+#define KVM_PREALLOC_LEVEL      (0)
+#endif
+/**
+ * kvm_prealloc_hwpgd - allocate inital table for VTTBR
+ * @kvm:        The KVM struct pointer for the VM.
+ * @pgd:        The kernel pseudo pgd
+ *
+ * When the kernel uses more levels of page tables than the guest, we allocate
+ * a fake PGD and pre-populate it to point to the next-level page table, which
+ * will be the real initial page table pointed to by the VTTBR.
+ *
+ * When KVM_PREALLOC_LEVEL==2, we allocate a single page for the PMD and
+ * the kernel will use folded pud.  When KVM_PREALLOC_LEVEL==1, we
+ * allocate 2 consecutive PUD pages.
+ */
+static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd)
+{
+        unsigned int i;
+        unsigned long hwpgd;
+        if (KVM_PREALLOC_LEVEL == 0)
+                return 0;
+        hwpgd = __get_free_pages(GFP_KERNEL | __GFP_ZERO, PTRS_PER_S2_PGD_SHIFT);
+        if (!hwpgd)
+                return -ENOMEM;
+        for (i = 0; i < PTRS_PER_S2_PGD; i++) {
+                if (KVM_PREALLOC_LEVEL == 1)
+                        pgd_populate(NULL, pgd + i,
+                                     (pud_t *)hwpgd + i * PTRS_PER_PUD);
+                else if (KVM_PREALLOC_LEVEL == 2)
+                        pud_populate(NULL, pud_offset(pgd, 0) + i,
+                                     (pmd_t *)hwpgd + i * PTRS_PER_PMD);
+        }
+        return 0;
+}
+static inline void *kvm_get_hwpgd(struct kvm *kvm)
+{
+        pgd_t *pgd = kvm->arch.pgd;
+        pud_t *pud;
+        if (KVM_PREALLOC_LEVEL == 0)
+                return pgd;
+        pud = pud_offset(pgd, 0);
+        if (KVM_PREALLOC_LEVEL == 1)
+                return pud;
+        BUG_ON(KVM_PREALLOC_LEVEL != 2);
+        return pmd_offset(pud, 0);
+}
+static inline void kvm_free_hwpgd(struct kvm *kvm)
+{
+        if (KVM_PREALLOC_LEVEL > 0) {
+                unsigned long hwpgd = (unsigned long)kvm_get_hwpgd(kvm);
+                free_pages(hwpgd, PTRS_PER_S2_PGD_SHIFT);
+        }
+}
 static inline bool kvm_page_empty(void *ptr)
 {
        struct page *ptr_page = virt_to_page(ptr);
        return page_count(ptr_page) == 1;
 }
-#define kvm_pte_table_empty(ptep) kvm_page_empty(ptep)
+#define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)
-#ifndef CONFIG_ARM64_64K_PAGES
-#define kvm_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
+#ifdef __PAGETABLE_PMD_FOLDED
+#define kvm_pmd_table_empty(kvm, pmdp) (0)
+#else
+#define kvm_pmd_table_empty(kvm, pmdp) \
+        (kvm_page_empty(pmdp) && (!(kvm) || KVM_PREALLOC_LEVEL < 2))
+#endif
+#ifdef __PAGETABLE_PUD_FOLDED
+#define kvm_pud_table_empty(kvm, pudp) (0)
 #else
-#define kvm_pmd_table_empty(pmdp) (0)
+#define kvm_pud_table_empty(kvm, pudp) \
+        (kvm_page_empty(pudp) && (!(kvm) || KVM_PREALLOC_LEVEL < 1))
 #endif
-#define kvm_pud_table_empty(pudp) (0)
 struct kvm;
author	Christoffer Dall <christoffer.dall@linaro.org>	2014-10-10 06:14:28 -0400
committer	Christoffer Dall <christoffer.dall@linaro.org>	2014-10-14 08:48:19 -0400
commit	38f791a4e499792eeb2a3c0572dd5133511c5bbb (patch)
tree	782fd34d0980d623fb1cfe7e026915e25983f12f
parent	8eef91239e57d2e932e7470879c9a504d5494ebb (diff)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index eaa6deac97b2..acb0d5712716 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h
@@ -37,6 +37,11 @@
37	*/	37	*/
38	#define TRAMPOLINE_VA UL(CONFIG_VECTORS_BASE)	38	#define TRAMPOLINE_VA UL(CONFIG_VECTORS_BASE)
39		39
		40	/*
		41	* KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation levels.
		42	*/
		43	#define KVM_MMU_CACHE_MIN_PAGES 2
		44
40	#ifndef __ASSEMBLY__	45	#ifndef __ASSEMBLY__
41		46
42	#include <asm/cacheflush.h>	47	#include <asm/cacheflush.h>
@@ -83,6 +88,11 @@ static inline void kvm_clean_pgd(pgd_t *pgd)
83	clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t));	88	clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t));
84	}	89	}
85		90
		91	static inline void kvm_clean_pmd(pmd_t *pmd)
		92	{
		93	clean_dcache_area(pmd, PTRS_PER_PMD * sizeof(pmd_t));
		94	}
		95
86	static inline void kvm_clean_pmd_entry(pmd_t *pmd)	96	static inline void kvm_clean_pmd_entry(pmd_t *pmd)
87	{	97	{
88	clean_pmd_entry(pmd);	98	clean_pmd_entry(pmd);
@@ -123,10 +133,23 @@ static inline bool kvm_page_empty(void *ptr)
123	}	133	}
124		134
125		135
126	#define kvm_pte_table_empty(ptep) kvm_page_empty(ptep)	136	#define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)
127	#define kvm_pmd_table_empty(pmdp) kvm_page_empty(pmdp)	137	#define kvm_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp)
128	#define kvm_pud_table_empty(pudp) (0)	138	#define kvm_pud_table_empty(kvm, pudp) (0)
		139
		140	#define KVM_PREALLOC_LEVEL 0
129		141
		142	static inline int kvm_prealloc_hwpgd(struct kvm kvm, pgd_t pgd)
		143	{
		144	return 0;
		145	}
		146
		147	static inline void kvm_free_hwpgd(struct kvm *kvm) { }
		148
		149	static inline void kvm_get_hwpgd(struct kvm kvm)
		150	{
		151	return kvm->arch.pgd;
		152	}
130		153
131	struct kvm;	154	struct kvm;
132		155


diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 45e5f67e63b1..9e193c8a959e 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c
@@ -409,7 +409,7 @@ static void update_vttbr(struct kvm *kvm)
409	kvm_next_vmid++;	409	kvm_next_vmid++;
410		410
411	/* update vttbr to be used with the new vmid */	411	/* update vttbr to be used with the new vmid */
412	pgd_phys = virt_to_phys(kvm->arch.pgd);	412	pgd_phys = virt_to_phys(kvm_get_hwpgd(kvm));
413	BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);	413	BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
414	vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK;	414	vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK;
415	kvm->arch.vttbr = pgd_phys \| vmid;	415	kvm->arch.vttbr = pgd_phys \| vmid;


diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 6038027ab1d6..ee142edcca84 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c
@@ -42,7 +42,7 @@ static unsigned long hyp_idmap_start;
42	static unsigned long hyp_idmap_end;	42	static unsigned long hyp_idmap_end;
43	static phys_addr_t hyp_idmap_vector;	43	static phys_addr_t hyp_idmap_vector;
44		44
45	#define pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))	45	#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
46		46
47	#define kvm_pmd_huge(_x) (pmd_huge(_x) \|\| pmd_trans_huge(_x))	47	#define kvm_pmd_huge(_x) (pmd_huge(_x) \|\| pmd_trans_huge(_x))
48		48
@@ -134,7 +134,7 @@ static void unmap_ptes(struct kvm kvm, pmd_t pmd,
134	}	134	}
135	} while (pte++, addr += PAGE_SIZE, addr != end);	135	} while (pte++, addr += PAGE_SIZE, addr != end);
136		136
137	if (kvm_pte_table_empty(start_pte))	137	if (kvm_pte_table_empty(kvm, start_pte))
138	clear_pmd_entry(kvm, pmd, start_addr);	138	clear_pmd_entry(kvm, pmd, start_addr);
139	}	139	}
140		140
@@ -158,7 +158,7 @@ static void unmap_pmds(struct kvm kvm, pud_t pud,
158	}	158	}
159	} while (pmd++, addr = next, addr != end);	159	} while (pmd++, addr = next, addr != end);
160		160
161	if (kvm_pmd_table_empty(start_pmd))	161	if (kvm_pmd_table_empty(kvm, start_pmd))
162	clear_pud_entry(kvm, pud, start_addr);	162	clear_pud_entry(kvm, pud, start_addr);
163	}	163	}
164		164
@@ -182,7 +182,7 @@ static void unmap_puds(struct kvm kvm, pgd_t pgd,
182	}	182	}
183	} while (pud++, addr = next, addr != end);	183	} while (pud++, addr = next, addr != end);
184		184
185	if (kvm_pud_table_empty(start_pud))	185	if (kvm_pud_table_empty(kvm, start_pud))
186	clear_pgd_entry(kvm, pgd, start_addr);	186	clear_pgd_entry(kvm, pgd, start_addr);
187	}	187	}
188		188
@@ -306,7 +306,7 @@ void free_boot_hyp_pgd(void)
306	if (boot_hyp_pgd) {	306	if (boot_hyp_pgd) {
307	unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);	307	unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
308	unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);	308	unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
309	free_pages((unsigned long)boot_hyp_pgd, pgd_order);	309	free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order);
310	boot_hyp_pgd = NULL;	310	boot_hyp_pgd = NULL;
311	}	311	}
312		312
@@ -343,7 +343,7 @@ void free_hyp_pgds(void)
343	for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)	343	for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
344	unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);	344	unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
345		345
346	free_pages((unsigned long)hyp_pgd, pgd_order);	346	free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
347	hyp_pgd = NULL;	347	hyp_pgd = NULL;
348	}	348	}
349		349
@@ -401,13 +401,46 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
401	return 0;	401	return 0;
402	}	402	}
403		403
		404	static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start,
		405	unsigned long end, unsigned long pfn,
		406	pgprot_t prot)
		407	{
		408	pud_t *pud;
		409	pmd_t *pmd;
		410	unsigned long addr, next;
		411	int ret;
		412
		413	addr = start;
		414	do {
		415	pud = pud_offset(pgd, addr);
		416
		417	if (pud_none_or_clear_bad(pud)) {
		418	pmd = pmd_alloc_one(NULL, addr);
		419	if (!pmd) {
		420	kvm_err("Cannot allocate Hyp pmd\n");
		421	return -ENOMEM;
		422	}
		423	pud_populate(NULL, pud, pmd);
		424	get_page(virt_to_page(pud));
		425	kvm_flush_dcache_to_poc(pud, sizeof(*pud));
		426	}
		427
		428	next = pud_addr_end(addr, end);
		429	ret = create_hyp_pmd_mappings(pud, addr, next, pfn, prot);
		430	if (ret)
		431	return ret;
		432	pfn += (next - addr) >> PAGE_SHIFT;
		433	} while (addr = next, addr != end);
		434
		435	return 0;
		436	}
		437
404	static int __create_hyp_mappings(pgd_t *pgdp,	438	static int __create_hyp_mappings(pgd_t *pgdp,
405	unsigned long start, unsigned long end,	439	unsigned long start, unsigned long end,
406	unsigned long pfn, pgprot_t prot)	440	unsigned long pfn, pgprot_t prot)
407	{	441	{
408	pgd_t *pgd;	442	pgd_t *pgd;
409	pud_t *pud;	443	pud_t *pud;
410	pmd_t *pmd;
411	unsigned long addr, next;	444	unsigned long addr, next;
412	int err = 0;	445	int err = 0;
413		446
@@ -416,22 +449,21 @@ static int __create_hyp_mappings(pgd_t *pgdp,
416	end = PAGE_ALIGN(end);	449	end = PAGE_ALIGN(end);
417	do {	450	do {
418	pgd = pgdp + pgd_index(addr);	451	pgd = pgdp + pgd_index(addr);
419	pud = pud_offset(pgd, addr);
420		452
421	if (pud_none_or_clear_bad(pud)) {	453	if (pgd_none(*pgd)) {
422	pmd = pmd_alloc_one(NULL, addr);	454	pud = pud_alloc_one(NULL, addr);
423	if (!pmd) {	455	if (!pud) {
424	kvm_err("Cannot allocate Hyp pmd\n");	456	kvm_err("Cannot allocate Hyp pud\n");
425	err = -ENOMEM;	457	err = -ENOMEM;
426	goto out;	458	goto out;
427	}	459	}
428	pud_populate(NULL, pud, pmd);	460	pgd_populate(NULL, pgd, pud);
429	get_page(virt_to_page(pud));	461	get_page(virt_to_page(pgd));
430	kvm_flush_dcache_to_poc(pud, sizeof(*pud));	462	kvm_flush_dcache_to_poc(pgd, sizeof(*pgd));
431	}	463	}
432		464
433	next = pgd_addr_end(addr, end);	465	next = pgd_addr_end(addr, end);
434	err = create_hyp_pmd_mappings(pud, addr, next, pfn, prot);	466	err = create_hyp_pud_mappings(pgd, addr, next, pfn, prot);
435	if (err)	467	if (err)
436	goto out;	468	goto out;
437	pfn += (next - addr) >> PAGE_SHIFT;	469	pfn += (next - addr) >> PAGE_SHIFT;
@@ -521,6 +553,7 @@ int create_hyp_io_mappings(void from, void to, phys_addr_t phys_addr)
521	*/	553	*/
522	int kvm_alloc_stage2_pgd(struct kvm *kvm)	554	int kvm_alloc_stage2_pgd(struct kvm *kvm)
523	{	555	{
		556	int ret;
524	pgd_t *pgd;	557	pgd_t *pgd;
525		558
526	if (kvm->arch.pgd != NULL) {	559	if (kvm->arch.pgd != NULL) {
@@ -528,14 +561,38 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
528	return -EINVAL;	561	return -EINVAL;
529	}	562	}
530		563
531	pgd = (pgd_t *)__get_free_pages(GFP_KERNEL \| __GFP_ZERO, S2_PGD_ORDER);	564	if (KVM_PREALLOC_LEVEL > 0) {
		565	/*
		566	* Allocate fake pgd for the page table manipulation macros to
		567	* work. This is not used by the hardware and we have no
		568	* alignment requirement for this allocation.
		569	*/
		570	pgd = (pgd_t )kmalloc(PTRS_PER_S2_PGD sizeof(pgd_t),
		571	GFP_KERNEL \| __GFP_ZERO);
		572	} else {
		573	/*
		574	* Allocate actual first-level Stage-2 page table used by the
		575	* hardware for Stage-2 page table walks.
		576	*/
		577	pgd = (pgd_t *)__get_free_pages(GFP_KERNEL \| __GFP_ZERO, S2_PGD_ORDER);
		578	}
		579
532	if (!pgd)	580	if (!pgd)
533	return -ENOMEM;	581	return -ENOMEM;
534		582
		583	ret = kvm_prealloc_hwpgd(kvm, pgd);
		584	if (ret)
		585	goto out_err;
		586
535	kvm_clean_pgd(pgd);	587	kvm_clean_pgd(pgd);
536	kvm->arch.pgd = pgd;	588	kvm->arch.pgd = pgd;
537
538	return 0;	589	return 0;
		590	out_err:
		591	if (KVM_PREALLOC_LEVEL > 0)
		592	kfree(pgd);
		593	else
		594	free_pages((unsigned long)pgd, S2_PGD_ORDER);
		595	return ret;
539	}	596	}
540		597
541	/**	598	/**
@@ -571,19 +628,39 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
571	return;	628	return;
572		629
573	unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);	630	unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
574	free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);	631	kvm_free_hwpgd(kvm);
		632	if (KVM_PREALLOC_LEVEL > 0)
		633	kfree(kvm->arch.pgd);
		634	else
		635	free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);
575	kvm->arch.pgd = NULL;	636	kvm->arch.pgd = NULL;
576	}	637	}
577		638
578	static pmd_t stage2_get_pmd(struct kvm kvm, struct kvm_mmu_memory_cache *cache,	639	static pud_t stage2_get_pud(struct kvm kvm, struct kvm_mmu_memory_cache *cache,
579	phys_addr_t addr)	640	phys_addr_t addr)
580	{	641	{
581	pgd_t *pgd;	642	pgd_t *pgd;
582	pud_t *pud;	643	pud_t *pud;
583	pmd_t *pmd;
584		644
585	pgd = kvm->arch.pgd + pgd_index(addr);	645	pgd = kvm->arch.pgd + pgd_index(addr);
586	pud = pud_offset(pgd, addr);	646	if (WARN_ON(pgd_none(*pgd))) {
		647	if (!cache)
		648	return NULL;
		649	pud = mmu_memory_cache_alloc(cache);
		650	pgd_populate(NULL, pgd, pud);
		651	get_page(virt_to_page(pgd));
		652	}
		653
		654	return pud_offset(pgd, addr);
		655	}
		656
		657	static pmd_t stage2_get_pmd(struct kvm kvm, struct kvm_mmu_memory_cache *cache,
		658	phys_addr_t addr)
		659	{
		660	pud_t *pud;
		661	pmd_t *pmd;
		662
		663	pud = stage2_get_pud(kvm, cache, addr);
587	if (pud_none(*pud)) {	664	if (pud_none(*pud)) {
588	if (!cache)	665	if (!cache)
589	return NULL;	666	return NULL;
@@ -629,7 +706,7 @@ static int stage2_set_pte(struct kvm kvm, struct kvm_mmu_memory_cache cache,
629	pmd_t *pmd;	706	pmd_t *pmd;
630	pte_t *pte, old_pte;	707	pte_t *pte, old_pte;
631		708
632	/* Create stage-2 page table mapping - Level 1 */	709	/* Create stage-2 page table mapping - Levels 0 and 1 */
633	pmd = stage2_get_pmd(kvm, cache, addr);	710	pmd = stage2_get_pmd(kvm, cache, addr);
634	if (!pmd) {	711	if (!pmd) {
635	/*	712	/*
@@ -690,7 +767,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
690	if (writable)	767	if (writable)
691	kvm_set_s2pte_writable(&pte);	768	kvm_set_s2pte_writable(&pte);
692		769
693	ret = mmu_topup_memory_cache(&cache, 2, 2);	770	ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES,
		771	KVM_NR_MEM_OBJS);
694	if (ret)	772	if (ret)
695	goto out;	773	goto out;
696	spin_lock(&kvm->mmu_lock);	774	spin_lock(&kvm->mmu_lock);
@@ -805,7 +883,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
805	up_read(&current->mm->mmap_sem);	883	up_read(&current->mm->mmap_sem);
806		884
807	/* We need minimum second+third level pages */	885	/* We need minimum second+third level pages */
808	ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS);	886	ret = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES,
		887	KVM_NR_MEM_OBJS);
809	if (ret)	888	if (ret)
810	return ret;	889	return ret;
811		890
@@ -1080,8 +1159,8 @@ int kvm_mmu_init(void)
1080	(unsigned long)phys_base);	1159	(unsigned long)phys_base);
1081	}	1160	}
1082		1161
1083	hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL \| __GFP_ZERO, pgd_order);	1162	hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL \| __GFP_ZERO, hyp_pgd_order);
1084	boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL \| __GFP_ZERO, pgd_order);	1163	boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL \| __GFP_ZERO, hyp_pgd_order);
1085		1164
1086	if (!hyp_pgd \|\| !boot_hyp_pgd) {	1165	if (!hyp_pgd \|\| !boot_hyp_pgd) {
1087	kvm_err("Hyp mode PGD not allocated\n");	1166	kvm_err("Hyp mode PGD not allocated\n");


diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index e36171974d6a..0caf7a59f6a1 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -41,6 +41,18 @@
41	*/	41	*/
42	#define TRAMPOLINE_VA (HYP_PAGE_OFFSET_MASK & PAGE_MASK)	42	#define TRAMPOLINE_VA (HYP_PAGE_OFFSET_MASK & PAGE_MASK)
43		43
		44	/*
		45	* KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation
		46	* levels in addition to the PGD and potentially the PUD which are
		47	* pre-allocated (we pre-allocate the fake PGD and the PUD when the Stage-2
		48	* tables use one level of tables less than the kernel.
		49	*/
		50	#ifdef CONFIG_ARM64_64K_PAGES
		51	#define KVM_MMU_CACHE_MIN_PAGES 1
		52	#else
		53	#define KVM_MMU_CACHE_MIN_PAGES 2
		54	#endif
		55
44	#ifdef __ASSEMBLY__	56	#ifdef __ASSEMBLY__
45		57
46	/*	58	/*
@@ -53,6 +65,7 @@
53		65
54	#else	66	#else
55		67
		68	#include <asm/pgalloc.h>
56	#include <asm/cachetype.h>	69	#include <asm/cachetype.h>
57	#include <asm/cacheflush.h>	70	#include <asm/cacheflush.h>
58		71
@@ -65,10 +78,6 @@
65	#define KVM_PHYS_SIZE (1UL << KVM_PHYS_SHIFT)	78	#define KVM_PHYS_SIZE (1UL << KVM_PHYS_SHIFT)
66	#define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1UL)	79	#define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1UL)
67		80
68	/* Make sure we get the right size, and thus the right alignment */
69	#define PTRS_PER_S2_PGD (1 << (KVM_PHYS_SHIFT - PGDIR_SHIFT))
70	#define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
71
72	int create_hyp_mappings(void from, void to);	81	int create_hyp_mappings(void from, void to);
73	int create_hyp_io_mappings(void from, void to, phys_addr_t);	82	int create_hyp_io_mappings(void from, void to, phys_addr_t);
74	void free_boot_hyp_pgd(void);	83	void free_boot_hyp_pgd(void);
@@ -93,6 +102,7 @@ void kvm_clear_hyp_idmap(void);
93	#define kvm_set_pmd(pmdp, pmd) set_pmd(pmdp, pmd)	102	#define kvm_set_pmd(pmdp, pmd) set_pmd(pmdp, pmd)
94		103
95	static inline void kvm_clean_pgd(pgd_t *pgd) {}	104	static inline void kvm_clean_pgd(pgd_t *pgd) {}
		105	static inline void kvm_clean_pmd(pmd_t *pmd) {}
96	static inline void kvm_clean_pmd_entry(pmd_t *pmd) {}	106	static inline void kvm_clean_pmd_entry(pmd_t *pmd) {}
97	static inline void kvm_clean_pte(pte_t *pte) {}	107	static inline void kvm_clean_pte(pte_t *pte) {}
98	static inline void kvm_clean_pte_entry(pte_t *pte) {}	108	static inline void kvm_clean_pte_entry(pte_t *pte) {}
@@ -111,19 +121,116 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
111	#define kvm_pud_addr_end(addr, end) pud_addr_end(addr, end)	121	#define kvm_pud_addr_end(addr, end) pud_addr_end(addr, end)
112	#define kvm_pmd_addr_end(addr, end) pmd_addr_end(addr, end)	122	#define kvm_pmd_addr_end(addr, end) pmd_addr_end(addr, end)
113		123
		124	/*
		125	* In the case where PGDIR_SHIFT is larger than KVM_PHYS_SHIFT, we can address
		126	* the entire IPA input range with a single pgd entry, and we would only need
		127	* one pgd entry. Note that in this case, the pgd is actually not used by
		128	* the MMU for Stage-2 translations, but is merely a fake pgd used as a data
		129	* structure for the kernel pgtable macros to work.
		130	*/
		131	#if PGDIR_SHIFT > KVM_PHYS_SHIFT
		132	#define PTRS_PER_S2_PGD_SHIFT 0
		133	#else
		134	#define PTRS_PER_S2_PGD_SHIFT (KVM_PHYS_SHIFT - PGDIR_SHIFT)
		135	#endif
		136	#define PTRS_PER_S2_PGD (1 << PTRS_PER_S2_PGD_SHIFT)
		137	#define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
		138
		139	/*
		140	* If we are concatenating first level stage-2 page tables, we would have less
		141	* than or equal to 16 pointers in the fake PGD, because that's what the
		142	* architecture allows. In this case, (4 - CONFIG_ARM64_PGTABLE_LEVELS)
		143	* represents the first level for the host, and we add 1 to go to the next
		144	* level (which uses contatenation) for the stage-2 tables.
		145	*/
		146	#if PTRS_PER_S2_PGD <= 16
		147	#define KVM_PREALLOC_LEVEL (4 - CONFIG_ARM64_PGTABLE_LEVELS + 1)
		148	#else
		149	#define KVM_PREALLOC_LEVEL (0)
		150	#endif
		151
		152	/**
		153	* kvm_prealloc_hwpgd - allocate inital table for VTTBR
		154	* @kvm: The KVM struct pointer for the VM.
		155	* @pgd: The kernel pseudo pgd
		156	*
		157	* When the kernel uses more levels of page tables than the guest, we allocate
		158	* a fake PGD and pre-populate it to point to the next-level page table, which
		159	* will be the real initial page table pointed to by the VTTBR.
		160	*
		161	* When KVM_PREALLOC_LEVEL==2, we allocate a single page for the PMD and
		162	* the kernel will use folded pud. When KVM_PREALLOC_LEVEL==1, we
		163	* allocate 2 consecutive PUD pages.
		164	*/
		165	static inline int kvm_prealloc_hwpgd(struct kvm kvm, pgd_t pgd)
		166	{
		167	unsigned int i;
		168	unsigned long hwpgd;
		169
		170	if (KVM_PREALLOC_LEVEL == 0)
		171	return 0;
		172
		173	hwpgd = __get_free_pages(GFP_KERNEL \| __GFP_ZERO, PTRS_PER_S2_PGD_SHIFT);
		174	if (!hwpgd)
		175	return -ENOMEM;
		176
		177	for (i = 0; i < PTRS_PER_S2_PGD; i++) {
		178	if (KVM_PREALLOC_LEVEL == 1)
		179	pgd_populate(NULL, pgd + i,
		180	(pud_t )hwpgd + i PTRS_PER_PUD);
		181	else if (KVM_PREALLOC_LEVEL == 2)
		182	pud_populate(NULL, pud_offset(pgd, 0) + i,
		183	(pmd_t )hwpgd + i PTRS_PER_PMD);
		184	}
		185
		186	return 0;
		187	}
		188
		189	static inline void kvm_get_hwpgd(struct kvm kvm)
		190	{
		191	pgd_t *pgd = kvm->arch.pgd;
		192	pud_t *pud;
		193
		194	if (KVM_PREALLOC_LEVEL == 0)
		195	return pgd;
		196
		197	pud = pud_offset(pgd, 0);
		198	if (KVM_PREALLOC_LEVEL == 1)
		199	return pud;
		200
		201	BUG_ON(KVM_PREALLOC_LEVEL != 2);
		202	return pmd_offset(pud, 0);
		203	}
		204
		205	static inline void kvm_free_hwpgd(struct kvm *kvm)
		206	{
		207	if (KVM_PREALLOC_LEVEL > 0) {
		208	unsigned long hwpgd = (unsigned long)kvm_get_hwpgd(kvm);
		209	free_pages(hwpgd, PTRS_PER_S2_PGD_SHIFT);
		210	}
		211	}
		212
114	static inline bool kvm_page_empty(void *ptr)	213	static inline bool kvm_page_empty(void *ptr)
115	{	214	{
116	struct page *ptr_page = virt_to_page(ptr);	215	struct page *ptr_page = virt_to_page(ptr);
117	return page_count(ptr_page) == 1;	216	return page_count(ptr_page) == 1;
118	}	217	}
119		218
120	#define kvm_pte_table_empty(ptep) kvm_page_empty(ptep)	219	#define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)
121	#ifndef CONFIG_ARM64_64K_PAGES	220
122	#define kvm_pmd_table_empty(pmdp) kvm_page_empty(pmdp)	221	#ifdef __PAGETABLE_PMD_FOLDED
		222	#define kvm_pmd_table_empty(kvm, pmdp) (0)
		223	#else
		224	#define kvm_pmd_table_empty(kvm, pmdp) \
		225	(kvm_page_empty(pmdp) && (!(kvm) \|\| KVM_PREALLOC_LEVEL < 2))
		226	#endif
		227
		228	#ifdef __PAGETABLE_PUD_FOLDED
		229	#define kvm_pud_table_empty(kvm, pudp) (0)
123	#else	230	#else
124	#define kvm_pmd_table_empty(pmdp) (0)	231	#define kvm_pud_table_empty(kvm, pudp) \
		232	(kvm_page_empty(pudp) && (!(kvm) \|\| KVM_PREALLOC_LEVEL < 1))
125	#endif	233	#endif
126	#define kvm_pud_table_empty(pudp) (0)
127		234
128		235
129	struct kvm;	236	struct kvm;