aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoffer Dall <christoffer.dall@linaro.org>2014-10-10 06:14:28 -0400
committerChristoffer Dall <christoffer.dall@linaro.org>2014-10-14 08:48:19 -0400
commit38f791a4e499792eeb2a3c0572dd5133511c5bbb (patch)
tree782fd34d0980d623fb1cfe7e026915e25983f12f
parent8eef91239e57d2e932e7470879c9a504d5494ebb (diff)
arm64: KVM: Implement 48 VA support for KVM EL2 and Stage-2
This patch adds the necessary support for all host kernel PGSIZE and VA_SPACE configuration options for both EL2 and the Stage-2 page tables. However, for 40bit and 42bit PARange systems, the architecture mandates that VTCR_EL2.SL0 is maximum 1, resulting in fewer levels of stage-2 pagge tables than levels of host kernel page tables. At the same time, systems with a PARange > 42bit, we limit the IPA range by always setting VTCR_EL2.T0SZ to 24. To solve the situation with different levels of page tables for Stage-2 translation than the host kernel page tables, we allocate a dummy PGD with pointers to our actual inital level Stage-2 page table, in order for us to reuse the kernel pgtable manipulation primitives. Reproducing all these in KVM does not look pretty and unnecessarily complicates the 32-bit side. Systems with a PARange < 40bits are not yet supported. [ I have reworked this patch from its original form submitted by Jungseok to take the architecture constraints into consideration. There were too many changes from the original patch for me to preserve the authorship. Thanks to Catalin Marinas for his help in figuring out a good solution to this challenge. I have also fixed various bugs and missing error code handling from the original patch. - Christoffer ] Reviewed-by: Catalin Marinas <catalin.marinas@arm.com> Acked-by: Marc Zyngier <marc.zyngier@arm.com> Signed-off-by: Jungseok Lee <jungseoklee85@gmail.com> Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
-rw-r--r--arch/arm/include/asm/kvm_mmu.h29
-rw-r--r--arch/arm/kvm/arm.c2
-rw-r--r--arch/arm/kvm/mmu.c133
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h125
4 files changed, 249 insertions, 40 deletions
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index eaa6deac97b2..acb0d5712716 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -37,6 +37,11 @@
37 */ 37 */
38#define TRAMPOLINE_VA UL(CONFIG_VECTORS_BASE) 38#define TRAMPOLINE_VA UL(CONFIG_VECTORS_BASE)
39 39
40/*
41 * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation levels.
42 */
43#define KVM_MMU_CACHE_MIN_PAGES 2
44
40#ifndef __ASSEMBLY__ 45#ifndef __ASSEMBLY__
41 46
42#include <asm/cacheflush.h> 47#include <asm/cacheflush.h>
@@ -83,6 +88,11 @@ static inline void kvm_clean_pgd(pgd_t *pgd)
83 clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t)); 88 clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t));
84} 89}
85 90
91static inline void kvm_clean_pmd(pmd_t *pmd)
92{
93 clean_dcache_area(pmd, PTRS_PER_PMD * sizeof(pmd_t));
94}
95
86static inline void kvm_clean_pmd_entry(pmd_t *pmd) 96static inline void kvm_clean_pmd_entry(pmd_t *pmd)
87{ 97{
88 clean_pmd_entry(pmd); 98 clean_pmd_entry(pmd);
@@ -123,10 +133,23 @@ static inline bool kvm_page_empty(void *ptr)
123} 133}
124 134
125 135
126#define kvm_pte_table_empty(ptep) kvm_page_empty(ptep) 136#define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)
127#define kvm_pmd_table_empty(pmdp) kvm_page_empty(pmdp) 137#define kvm_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp)
128#define kvm_pud_table_empty(pudp) (0) 138#define kvm_pud_table_empty(kvm, pudp) (0)
139
140#define KVM_PREALLOC_LEVEL 0
129 141
142static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd)
143{
144 return 0;
145}
146
147static inline void kvm_free_hwpgd(struct kvm *kvm) { }
148
149static inline void *kvm_get_hwpgd(struct kvm *kvm)
150{
151 return kvm->arch.pgd;
152}
130 153
131struct kvm; 154struct kvm;
132 155
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 45e5f67e63b1..9e193c8a959e 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -409,7 +409,7 @@ static void update_vttbr(struct kvm *kvm)
409 kvm_next_vmid++; 409 kvm_next_vmid++;
410 410
411 /* update vttbr to be used with the new vmid */ 411 /* update vttbr to be used with the new vmid */
412 pgd_phys = virt_to_phys(kvm->arch.pgd); 412 pgd_phys = virt_to_phys(kvm_get_hwpgd(kvm));
413 BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK); 413 BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
414 vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK; 414 vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK;
415 kvm->arch.vttbr = pgd_phys | vmid; 415 kvm->arch.vttbr = pgd_phys | vmid;
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 6038027ab1d6..ee142edcca84 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -42,7 +42,7 @@ static unsigned long hyp_idmap_start;
42static unsigned long hyp_idmap_end; 42static unsigned long hyp_idmap_end;
43static phys_addr_t hyp_idmap_vector; 43static phys_addr_t hyp_idmap_vector;
44 44
45#define pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) 45#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
46 46
47#define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x)) 47#define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x))
48 48
@@ -134,7 +134,7 @@ static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
134 } 134 }
135 } while (pte++, addr += PAGE_SIZE, addr != end); 135 } while (pte++, addr += PAGE_SIZE, addr != end);
136 136
137 if (kvm_pte_table_empty(start_pte)) 137 if (kvm_pte_table_empty(kvm, start_pte))
138 clear_pmd_entry(kvm, pmd, start_addr); 138 clear_pmd_entry(kvm, pmd, start_addr);
139} 139}
140 140
@@ -158,7 +158,7 @@ static void unmap_pmds(struct kvm *kvm, pud_t *pud,
158 } 158 }
159 } while (pmd++, addr = next, addr != end); 159 } while (pmd++, addr = next, addr != end);
160 160
161 if (kvm_pmd_table_empty(start_pmd)) 161 if (kvm_pmd_table_empty(kvm, start_pmd))
162 clear_pud_entry(kvm, pud, start_addr); 162 clear_pud_entry(kvm, pud, start_addr);
163} 163}
164 164
@@ -182,7 +182,7 @@ static void unmap_puds(struct kvm *kvm, pgd_t *pgd,
182 } 182 }
183 } while (pud++, addr = next, addr != end); 183 } while (pud++, addr = next, addr != end);
184 184
185 if (kvm_pud_table_empty(start_pud)) 185 if (kvm_pud_table_empty(kvm, start_pud))
186 clear_pgd_entry(kvm, pgd, start_addr); 186 clear_pgd_entry(kvm, pgd, start_addr);
187} 187}
188 188
@@ -306,7 +306,7 @@ void free_boot_hyp_pgd(void)
306 if (boot_hyp_pgd) { 306 if (boot_hyp_pgd) {
307 unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); 307 unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
308 unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); 308 unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
309 free_pages((unsigned long)boot_hyp_pgd, pgd_order); 309 free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order);
310 boot_hyp_pgd = NULL; 310 boot_hyp_pgd = NULL;
311 } 311 }
312 312
@@ -343,7 +343,7 @@ void free_hyp_pgds(void)
343 for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) 343 for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
344 unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); 344 unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
345 345
346 free_pages((unsigned long)hyp_pgd, pgd_order); 346 free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
347 hyp_pgd = NULL; 347 hyp_pgd = NULL;
348 } 348 }
349 349
@@ -401,13 +401,46 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
401 return 0; 401 return 0;
402} 402}
403 403
404static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start,
405 unsigned long end, unsigned long pfn,
406 pgprot_t prot)
407{
408 pud_t *pud;
409 pmd_t *pmd;
410 unsigned long addr, next;
411 int ret;
412
413 addr = start;
414 do {
415 pud = pud_offset(pgd, addr);
416
417 if (pud_none_or_clear_bad(pud)) {
418 pmd = pmd_alloc_one(NULL, addr);
419 if (!pmd) {
420 kvm_err("Cannot allocate Hyp pmd\n");
421 return -ENOMEM;
422 }
423 pud_populate(NULL, pud, pmd);
424 get_page(virt_to_page(pud));
425 kvm_flush_dcache_to_poc(pud, sizeof(*pud));
426 }
427
428 next = pud_addr_end(addr, end);
429 ret = create_hyp_pmd_mappings(pud, addr, next, pfn, prot);
430 if (ret)
431 return ret;
432 pfn += (next - addr) >> PAGE_SHIFT;
433 } while (addr = next, addr != end);
434
435 return 0;
436}
437
404static int __create_hyp_mappings(pgd_t *pgdp, 438static int __create_hyp_mappings(pgd_t *pgdp,
405 unsigned long start, unsigned long end, 439 unsigned long start, unsigned long end,
406 unsigned long pfn, pgprot_t prot) 440 unsigned long pfn, pgprot_t prot)
407{ 441{
408 pgd_t *pgd; 442 pgd_t *pgd;
409 pud_t *pud; 443 pud_t *pud;
410 pmd_t *pmd;
411 unsigned long addr, next; 444 unsigned long addr, next;
412 int err = 0; 445 int err = 0;
413 446
@@ -416,22 +449,21 @@ static int __create_hyp_mappings(pgd_t *pgdp,
416 end = PAGE_ALIGN(end); 449 end = PAGE_ALIGN(end);
417 do { 450 do {
418 pgd = pgdp + pgd_index(addr); 451 pgd = pgdp + pgd_index(addr);
419 pud = pud_offset(pgd, addr);
420 452
421 if (pud_none_or_clear_bad(pud)) { 453 if (pgd_none(*pgd)) {
422 pmd = pmd_alloc_one(NULL, addr); 454 pud = pud_alloc_one(NULL, addr);
423 if (!pmd) { 455 if (!pud) {
424 kvm_err("Cannot allocate Hyp pmd\n"); 456 kvm_err("Cannot allocate Hyp pud\n");
425 err = -ENOMEM; 457 err = -ENOMEM;
426 goto out; 458 goto out;
427 } 459 }
428 pud_populate(NULL, pud, pmd); 460 pgd_populate(NULL, pgd, pud);
429 get_page(virt_to_page(pud)); 461 get_page(virt_to_page(pgd));
430 kvm_flush_dcache_to_poc(pud, sizeof(*pud)); 462 kvm_flush_dcache_to_poc(pgd, sizeof(*pgd));
431 } 463 }
432 464
433 next = pgd_addr_end(addr, end); 465 next = pgd_addr_end(addr, end);
434 err = create_hyp_pmd_mappings(pud, addr, next, pfn, prot); 466 err = create_hyp_pud_mappings(pgd, addr, next, pfn, prot);
435 if (err) 467 if (err)
436 goto out; 468 goto out;
437 pfn += (next - addr) >> PAGE_SHIFT; 469 pfn += (next - addr) >> PAGE_SHIFT;
@@ -521,6 +553,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
521 */ 553 */
522int kvm_alloc_stage2_pgd(struct kvm *kvm) 554int kvm_alloc_stage2_pgd(struct kvm *kvm)
523{ 555{
556 int ret;
524 pgd_t *pgd; 557 pgd_t *pgd;
525 558
526 if (kvm->arch.pgd != NULL) { 559 if (kvm->arch.pgd != NULL) {
@@ -528,14 +561,38 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
528 return -EINVAL; 561 return -EINVAL;
529 } 562 }
530 563
531 pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, S2_PGD_ORDER); 564 if (KVM_PREALLOC_LEVEL > 0) {
565 /*
566 * Allocate fake pgd for the page table manipulation macros to
567 * work. This is not used by the hardware and we have no
568 * alignment requirement for this allocation.
569 */
570 pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t),
571 GFP_KERNEL | __GFP_ZERO);
572 } else {
573 /*
574 * Allocate actual first-level Stage-2 page table used by the
575 * hardware for Stage-2 page table walks.
576 */
577 pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, S2_PGD_ORDER);
578 }
579
532 if (!pgd) 580 if (!pgd)
533 return -ENOMEM; 581 return -ENOMEM;
534 582
583 ret = kvm_prealloc_hwpgd(kvm, pgd);
584 if (ret)
585 goto out_err;
586
535 kvm_clean_pgd(pgd); 587 kvm_clean_pgd(pgd);
536 kvm->arch.pgd = pgd; 588 kvm->arch.pgd = pgd;
537
538 return 0; 589 return 0;
590out_err:
591 if (KVM_PREALLOC_LEVEL > 0)
592 kfree(pgd);
593 else
594 free_pages((unsigned long)pgd, S2_PGD_ORDER);
595 return ret;
539} 596}
540 597
541/** 598/**
@@ -571,19 +628,39 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
571 return; 628 return;
572 629
573 unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); 630 unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
574 free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER); 631 kvm_free_hwpgd(kvm);
632 if (KVM_PREALLOC_LEVEL > 0)
633 kfree(kvm->arch.pgd);
634 else
635 free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);
575 kvm->arch.pgd = NULL; 636 kvm->arch.pgd = NULL;
576} 637}
577 638
578static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, 639static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
579 phys_addr_t addr) 640 phys_addr_t addr)
580{ 641{
581 pgd_t *pgd; 642 pgd_t *pgd;
582 pud_t *pud; 643 pud_t *pud;
583 pmd_t *pmd;
584 644
585 pgd = kvm->arch.pgd + pgd_index(addr); 645 pgd = kvm->arch.pgd + pgd_index(addr);
586 pud = pud_offset(pgd, addr); 646 if (WARN_ON(pgd_none(*pgd))) {
647 if (!cache)
648 return NULL;
649 pud = mmu_memory_cache_alloc(cache);
650 pgd_populate(NULL, pgd, pud);
651 get_page(virt_to_page(pgd));
652 }
653
654 return pud_offset(pgd, addr);
655}
656
657static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
658 phys_addr_t addr)
659{
660 pud_t *pud;
661 pmd_t *pmd;
662
663 pud = stage2_get_pud(kvm, cache, addr);
587 if (pud_none(*pud)) { 664 if (pud_none(*pud)) {
588 if (!cache) 665 if (!cache)
589 return NULL; 666 return NULL;
@@ -629,7 +706,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
629 pmd_t *pmd; 706 pmd_t *pmd;
630 pte_t *pte, old_pte; 707 pte_t *pte, old_pte;
631 708
632 /* Create stage-2 page table mapping - Level 1 */ 709 /* Create stage-2 page table mapping - Levels 0 and 1 */
633 pmd = stage2_get_pmd(kvm, cache, addr); 710 pmd = stage2_get_pmd(kvm, cache, addr);
634 if (!pmd) { 711 if (!pmd) {
635 /* 712 /*
@@ -690,7 +767,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
690 if (writable) 767 if (writable)
691 kvm_set_s2pte_writable(&pte); 768 kvm_set_s2pte_writable(&pte);
692 769
693 ret = mmu_topup_memory_cache(&cache, 2, 2); 770 ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES,
771 KVM_NR_MEM_OBJS);
694 if (ret) 772 if (ret)
695 goto out; 773 goto out;
696 spin_lock(&kvm->mmu_lock); 774 spin_lock(&kvm->mmu_lock);
@@ -805,7 +883,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
805 up_read(&current->mm->mmap_sem); 883 up_read(&current->mm->mmap_sem);
806 884
807 /* We need minimum second+third level pages */ 885 /* We need minimum second+third level pages */
808 ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS); 886 ret = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES,
887 KVM_NR_MEM_OBJS);
809 if (ret) 888 if (ret)
810 return ret; 889 return ret;
811 890
@@ -1080,8 +1159,8 @@ int kvm_mmu_init(void)
1080 (unsigned long)phys_base); 1159 (unsigned long)phys_base);
1081 } 1160 }
1082 1161
1083 hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order); 1162 hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
1084 boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order); 1163 boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
1085 1164
1086 if (!hyp_pgd || !boot_hyp_pgd) { 1165 if (!hyp_pgd || !boot_hyp_pgd) {
1087 kvm_err("Hyp mode PGD not allocated\n"); 1166 kvm_err("Hyp mode PGD not allocated\n");
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index e36171974d6a..0caf7a59f6a1 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -41,6 +41,18 @@
41 */ 41 */
42#define TRAMPOLINE_VA (HYP_PAGE_OFFSET_MASK & PAGE_MASK) 42#define TRAMPOLINE_VA (HYP_PAGE_OFFSET_MASK & PAGE_MASK)
43 43
44/*
45 * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation
46 * levels in addition to the PGD and potentially the PUD which are
47 * pre-allocated (we pre-allocate the fake PGD and the PUD when the Stage-2
48 * tables use one level of tables less than the kernel.
49 */
50#ifdef CONFIG_ARM64_64K_PAGES
51#define KVM_MMU_CACHE_MIN_PAGES 1
52#else
53#define KVM_MMU_CACHE_MIN_PAGES 2
54#endif
55
44#ifdef __ASSEMBLY__ 56#ifdef __ASSEMBLY__
45 57
46/* 58/*
@@ -53,6 +65,7 @@
53 65
54#else 66#else
55 67
68#include <asm/pgalloc.h>
56#include <asm/cachetype.h> 69#include <asm/cachetype.h>
57#include <asm/cacheflush.h> 70#include <asm/cacheflush.h>
58 71
@@ -65,10 +78,6 @@
65#define KVM_PHYS_SIZE (1UL << KVM_PHYS_SHIFT) 78#define KVM_PHYS_SIZE (1UL << KVM_PHYS_SHIFT)
66#define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1UL) 79#define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1UL)
67 80
68/* Make sure we get the right size, and thus the right alignment */
69#define PTRS_PER_S2_PGD (1 << (KVM_PHYS_SHIFT - PGDIR_SHIFT))
70#define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
71
72int create_hyp_mappings(void *from, void *to); 81int create_hyp_mappings(void *from, void *to);
73int create_hyp_io_mappings(void *from, void *to, phys_addr_t); 82int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
74void free_boot_hyp_pgd(void); 83void free_boot_hyp_pgd(void);
@@ -93,6 +102,7 @@ void kvm_clear_hyp_idmap(void);
93#define kvm_set_pmd(pmdp, pmd) set_pmd(pmdp, pmd) 102#define kvm_set_pmd(pmdp, pmd) set_pmd(pmdp, pmd)
94 103
95static inline void kvm_clean_pgd(pgd_t *pgd) {} 104static inline void kvm_clean_pgd(pgd_t *pgd) {}
105static inline void kvm_clean_pmd(pmd_t *pmd) {}
96static inline void kvm_clean_pmd_entry(pmd_t *pmd) {} 106static inline void kvm_clean_pmd_entry(pmd_t *pmd) {}
97static inline void kvm_clean_pte(pte_t *pte) {} 107static inline void kvm_clean_pte(pte_t *pte) {}
98static inline void kvm_clean_pte_entry(pte_t *pte) {} 108static inline void kvm_clean_pte_entry(pte_t *pte) {}
@@ -111,19 +121,116 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
111#define kvm_pud_addr_end(addr, end) pud_addr_end(addr, end) 121#define kvm_pud_addr_end(addr, end) pud_addr_end(addr, end)
112#define kvm_pmd_addr_end(addr, end) pmd_addr_end(addr, end) 122#define kvm_pmd_addr_end(addr, end) pmd_addr_end(addr, end)
113 123
124/*
125 * In the case where PGDIR_SHIFT is larger than KVM_PHYS_SHIFT, we can address
126 * the entire IPA input range with a single pgd entry, and we would only need
127 * one pgd entry. Note that in this case, the pgd is actually not used by
128 * the MMU for Stage-2 translations, but is merely a fake pgd used as a data
129 * structure for the kernel pgtable macros to work.
130 */
131#if PGDIR_SHIFT > KVM_PHYS_SHIFT
132#define PTRS_PER_S2_PGD_SHIFT 0
133#else
134#define PTRS_PER_S2_PGD_SHIFT (KVM_PHYS_SHIFT - PGDIR_SHIFT)
135#endif
136#define PTRS_PER_S2_PGD (1 << PTRS_PER_S2_PGD_SHIFT)
137#define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
138
139/*
140 * If we are concatenating first level stage-2 page tables, we would have less
141 * than or equal to 16 pointers in the fake PGD, because that's what the
142 * architecture allows. In this case, (4 - CONFIG_ARM64_PGTABLE_LEVELS)
143 * represents the first level for the host, and we add 1 to go to the next
144 * level (which uses contatenation) for the stage-2 tables.
145 */
146#if PTRS_PER_S2_PGD <= 16
147#define KVM_PREALLOC_LEVEL (4 - CONFIG_ARM64_PGTABLE_LEVELS + 1)
148#else
149#define KVM_PREALLOC_LEVEL (0)
150#endif
151
152/**
153 * kvm_prealloc_hwpgd - allocate inital table for VTTBR
154 * @kvm: The KVM struct pointer for the VM.
155 * @pgd: The kernel pseudo pgd
156 *
157 * When the kernel uses more levels of page tables than the guest, we allocate
158 * a fake PGD and pre-populate it to point to the next-level page table, which
159 * will be the real initial page table pointed to by the VTTBR.
160 *
161 * When KVM_PREALLOC_LEVEL==2, we allocate a single page for the PMD and
162 * the kernel will use folded pud. When KVM_PREALLOC_LEVEL==1, we
163 * allocate 2 consecutive PUD pages.
164 */
165static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd)
166{
167 unsigned int i;
168 unsigned long hwpgd;
169
170 if (KVM_PREALLOC_LEVEL == 0)
171 return 0;
172
173 hwpgd = __get_free_pages(GFP_KERNEL | __GFP_ZERO, PTRS_PER_S2_PGD_SHIFT);
174 if (!hwpgd)
175 return -ENOMEM;
176
177 for (i = 0; i < PTRS_PER_S2_PGD; i++) {
178 if (KVM_PREALLOC_LEVEL == 1)
179 pgd_populate(NULL, pgd + i,
180 (pud_t *)hwpgd + i * PTRS_PER_PUD);
181 else if (KVM_PREALLOC_LEVEL == 2)
182 pud_populate(NULL, pud_offset(pgd, 0) + i,
183 (pmd_t *)hwpgd + i * PTRS_PER_PMD);
184 }
185
186 return 0;
187}
188
189static inline void *kvm_get_hwpgd(struct kvm *kvm)
190{
191 pgd_t *pgd = kvm->arch.pgd;
192 pud_t *pud;
193
194 if (KVM_PREALLOC_LEVEL == 0)
195 return pgd;
196
197 pud = pud_offset(pgd, 0);
198 if (KVM_PREALLOC_LEVEL == 1)
199 return pud;
200
201 BUG_ON(KVM_PREALLOC_LEVEL != 2);
202 return pmd_offset(pud, 0);
203}
204
205static inline void kvm_free_hwpgd(struct kvm *kvm)
206{
207 if (KVM_PREALLOC_LEVEL > 0) {
208 unsigned long hwpgd = (unsigned long)kvm_get_hwpgd(kvm);
209 free_pages(hwpgd, PTRS_PER_S2_PGD_SHIFT);
210 }
211}
212
114static inline bool kvm_page_empty(void *ptr) 213static inline bool kvm_page_empty(void *ptr)
115{ 214{
116 struct page *ptr_page = virt_to_page(ptr); 215 struct page *ptr_page = virt_to_page(ptr);
117 return page_count(ptr_page) == 1; 216 return page_count(ptr_page) == 1;
118} 217}
119 218
120#define kvm_pte_table_empty(ptep) kvm_page_empty(ptep) 219#define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)
121#ifndef CONFIG_ARM64_64K_PAGES 220
122#define kvm_pmd_table_empty(pmdp) kvm_page_empty(pmdp) 221#ifdef __PAGETABLE_PMD_FOLDED
222#define kvm_pmd_table_empty(kvm, pmdp) (0)
223#else
224#define kvm_pmd_table_empty(kvm, pmdp) \
225 (kvm_page_empty(pmdp) && (!(kvm) || KVM_PREALLOC_LEVEL < 2))
226#endif
227
228#ifdef __PAGETABLE_PUD_FOLDED
229#define kvm_pud_table_empty(kvm, pudp) (0)
123#else 230#else
124#define kvm_pmd_table_empty(pmdp) (0) 231#define kvm_pud_table_empty(kvm, pudp) \
232 (kvm_page_empty(pudp) && (!(kvm) || KVM_PREALLOC_LEVEL < 1))
125#endif 233#endif
126#define kvm_pud_table_empty(pudp) (0)
127 234
128 235
129struct kvm; 236struct kvm;