aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2015-04-07 12:06:01 -0400
committerPaolo Bonzini <pbonzini@redhat.com>2015-04-07 12:06:01 -0400
commit8999602d08a804ae9cb271fdd5378f910058112d (patch)
tree4cc03fe92e23087684ccffbff6afa2195e0b59d6 /arch
parent1d804d079a92138d011900785193b6b00b44bc00 (diff)
parentae705930fca6322600690df9dc1c7d0516145a93 (diff)
Merge tag 'kvm-arm-fixes-4.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into 'kvm-next'
Fixes for KVM/ARM for 4.0-rc5. Fixes page refcounting issues in our Stage-2 page table management code, fixes a missing unlock in a gicv3 error path, and fixes a race that can cause lost interrupts if signals are pending just prior to entering the guest.
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/include/asm/kvm_mmu.h13
-rw-r--r--arch/arm/kvm/mmu.c75
-rw-r--r--arch/arm64/include/asm/kvm_arm.h5
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h48
4 files changed, 68 insertions, 73 deletions
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index bf0fe99e8ca9..4cf48c3aca13 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -149,29 +149,28 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
149 (__boundary - 1 < (end) - 1)? __boundary: (end); \ 149 (__boundary - 1 < (end) - 1)? __boundary: (end); \
150}) 150})
151 151
152#define kvm_pgd_index(addr) pgd_index(addr)
153
152static inline bool kvm_page_empty(void *ptr) 154static inline bool kvm_page_empty(void *ptr)
153{ 155{
154 struct page *ptr_page = virt_to_page(ptr); 156 struct page *ptr_page = virt_to_page(ptr);
155 return page_count(ptr_page) == 1; 157 return page_count(ptr_page) == 1;
156} 158}
157 159
158
159#define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep) 160#define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)
160#define kvm_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp) 161#define kvm_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp)
161#define kvm_pud_table_empty(kvm, pudp) (0) 162#define kvm_pud_table_empty(kvm, pudp) (0)
162 163
163#define KVM_PREALLOC_LEVEL 0 164#define KVM_PREALLOC_LEVEL 0
164 165
165static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd) 166static inline void *kvm_get_hwpgd(struct kvm *kvm)
166{ 167{
167 return 0; 168 return kvm->arch.pgd;
168} 169}
169 170
170static inline void kvm_free_hwpgd(struct kvm *kvm) { } 171static inline unsigned int kvm_get_hwpgd_size(void)
171
172static inline void *kvm_get_hwpgd(struct kvm *kvm)
173{ 172{
174 return kvm->arch.pgd; 173 return PTRS_PER_S2_PGD * sizeof(pgd_t);
175} 174}
176 175
177struct kvm; 176struct kvm;
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 3e6859bc3e11..5656d79c5a44 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -290,7 +290,7 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
290 phys_addr_t addr = start, end = start + size; 290 phys_addr_t addr = start, end = start + size;
291 phys_addr_t next; 291 phys_addr_t next;
292 292
293 pgd = pgdp + pgd_index(addr); 293 pgd = pgdp + kvm_pgd_index(addr);
294 do { 294 do {
295 next = kvm_pgd_addr_end(addr, end); 295 next = kvm_pgd_addr_end(addr, end);
296 if (!pgd_none(*pgd)) 296 if (!pgd_none(*pgd))
@@ -355,7 +355,7 @@ static void stage2_flush_memslot(struct kvm *kvm,
355 phys_addr_t next; 355 phys_addr_t next;
356 pgd_t *pgd; 356 pgd_t *pgd;
357 357
358 pgd = kvm->arch.pgd + pgd_index(addr); 358 pgd = kvm->arch.pgd + kvm_pgd_index(addr);
359 do { 359 do {
360 next = kvm_pgd_addr_end(addr, end); 360 next = kvm_pgd_addr_end(addr, end);
361 stage2_flush_puds(kvm, pgd, addr, next); 361 stage2_flush_puds(kvm, pgd, addr, next);
@@ -632,6 +632,20 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
632 __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE); 632 __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
633} 633}
634 634
635/* Free the HW pgd, one page at a time */
636static void kvm_free_hwpgd(void *hwpgd)
637{
638 free_pages_exact(hwpgd, kvm_get_hwpgd_size());
639}
640
641/* Allocate the HW PGD, making sure that each page gets its own refcount */
642static void *kvm_alloc_hwpgd(void)
643{
644 unsigned int size = kvm_get_hwpgd_size();
645
646 return alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
647}
648
635/** 649/**
636 * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation. 650 * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
637 * @kvm: The KVM struct pointer for the VM. 651 * @kvm: The KVM struct pointer for the VM.
@@ -645,15 +659,31 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
645 */ 659 */
646int kvm_alloc_stage2_pgd(struct kvm *kvm) 660int kvm_alloc_stage2_pgd(struct kvm *kvm)
647{ 661{
648 int ret;
649 pgd_t *pgd; 662 pgd_t *pgd;
663 void *hwpgd;
650 664
651 if (kvm->arch.pgd != NULL) { 665 if (kvm->arch.pgd != NULL) {
652 kvm_err("kvm_arch already initialized?\n"); 666 kvm_err("kvm_arch already initialized?\n");
653 return -EINVAL; 667 return -EINVAL;
654 } 668 }
655 669
670 hwpgd = kvm_alloc_hwpgd();
671 if (!hwpgd)
672 return -ENOMEM;
673
674 /* When the kernel uses more levels of page tables than the
675 * guest, we allocate a fake PGD and pre-populate it to point
676 * to the next-level page table, which will be the real
677 * initial page table pointed to by the VTTBR.
678 *
679 * When KVM_PREALLOC_LEVEL==2, we allocate a single page for
680 * the PMD and the kernel will use folded pud.
681 * When KVM_PREALLOC_LEVEL==1, we allocate 2 consecutive PUD
682 * pages.
683 */
656 if (KVM_PREALLOC_LEVEL > 0) { 684 if (KVM_PREALLOC_LEVEL > 0) {
685 int i;
686
657 /* 687 /*
658 * Allocate fake pgd for the page table manipulation macros to 688 * Allocate fake pgd for the page table manipulation macros to
659 * work. This is not used by the hardware and we have no 689 * work. This is not used by the hardware and we have no
@@ -661,30 +691,32 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
661 */ 691 */
662 pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t), 692 pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t),
663 GFP_KERNEL | __GFP_ZERO); 693 GFP_KERNEL | __GFP_ZERO);
694
695 if (!pgd) {
696 kvm_free_hwpgd(hwpgd);
697 return -ENOMEM;
698 }
699
700 /* Plug the HW PGD into the fake one. */
701 for (i = 0; i < PTRS_PER_S2_PGD; i++) {
702 if (KVM_PREALLOC_LEVEL == 1)
703 pgd_populate(NULL, pgd + i,
704 (pud_t *)hwpgd + i * PTRS_PER_PUD);
705 else if (KVM_PREALLOC_LEVEL == 2)
706 pud_populate(NULL, pud_offset(pgd, 0) + i,
707 (pmd_t *)hwpgd + i * PTRS_PER_PMD);
708 }
664 } else { 709 } else {
665 /* 710 /*
666 * Allocate actual first-level Stage-2 page table used by the 711 * Allocate actual first-level Stage-2 page table used by the
667 * hardware for Stage-2 page table walks. 712 * hardware for Stage-2 page table walks.
668 */ 713 */
669 pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, S2_PGD_ORDER); 714 pgd = (pgd_t *)hwpgd;
670 } 715 }
671 716
672 if (!pgd)
673 return -ENOMEM;
674
675 ret = kvm_prealloc_hwpgd(kvm, pgd);
676 if (ret)
677 goto out_err;
678
679 kvm_clean_pgd(pgd); 717 kvm_clean_pgd(pgd);
680 kvm->arch.pgd = pgd; 718 kvm->arch.pgd = pgd;
681 return 0; 719 return 0;
682out_err:
683 if (KVM_PREALLOC_LEVEL > 0)
684 kfree(pgd);
685 else
686 free_pages((unsigned long)pgd, S2_PGD_ORDER);
687 return ret;
688} 720}
689 721
690/** 722/**
@@ -785,11 +817,10 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
785 return; 817 return;
786 818
787 unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); 819 unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
788 kvm_free_hwpgd(kvm); 820 kvm_free_hwpgd(kvm_get_hwpgd(kvm));
789 if (KVM_PREALLOC_LEVEL > 0) 821 if (KVM_PREALLOC_LEVEL > 0)
790 kfree(kvm->arch.pgd); 822 kfree(kvm->arch.pgd);
791 else 823
792 free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);
793 kvm->arch.pgd = NULL; 824 kvm->arch.pgd = NULL;
794} 825}
795 826
@@ -799,7 +830,7 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
799 pgd_t *pgd; 830 pgd_t *pgd;
800 pud_t *pud; 831 pud_t *pud;
801 832
802 pgd = kvm->arch.pgd + pgd_index(addr); 833 pgd = kvm->arch.pgd + kvm_pgd_index(addr);
803 if (WARN_ON(pgd_none(*pgd))) { 834 if (WARN_ON(pgd_none(*pgd))) {
804 if (!cache) 835 if (!cache)
805 return NULL; 836 return NULL;
@@ -1089,7 +1120,7 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
1089 pgd_t *pgd; 1120 pgd_t *pgd;
1090 phys_addr_t next; 1121 phys_addr_t next;
1091 1122
1092 pgd = kvm->arch.pgd + pgd_index(addr); 1123 pgd = kvm->arch.pgd + kvm_pgd_index(addr);
1093 do { 1124 do {
1094 /* 1125 /*
1095 * Release kvm_mmu_lock periodically if the memory region is 1126 * Release kvm_mmu_lock periodically if the memory region is
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 94674eb7e7bb..54bb4ba97441 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -129,6 +129,9 @@
129 * 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are 129 * 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are
130 * not known to exist and will break with this configuration. 130 * not known to exist and will break with this configuration.
131 * 131 *
132 * VTCR_EL2.PS is extracted from ID_AA64MMFR0_EL1.PARange at boot time
133 * (see hyp-init.S).
134 *
132 * Note that when using 4K pages, we concatenate two first level page tables 135 * Note that when using 4K pages, we concatenate two first level page tables
133 * together. 136 * together.
134 * 137 *
@@ -138,7 +141,6 @@
138#ifdef CONFIG_ARM64_64K_PAGES 141#ifdef CONFIG_ARM64_64K_PAGES
139/* 142/*
140 * Stage2 translation configuration: 143 * Stage2 translation configuration:
141 * 40bits output (PS = 2)
142 * 40bits input (T0SZ = 24) 144 * 40bits input (T0SZ = 24)
143 * 64kB pages (TG0 = 1) 145 * 64kB pages (TG0 = 1)
144 * 2 level page tables (SL = 1) 146 * 2 level page tables (SL = 1)
@@ -150,7 +152,6 @@
150#else 152#else
151/* 153/*
152 * Stage2 translation configuration: 154 * Stage2 translation configuration:
153 * 40bits output (PS = 2)
154 * 40bits input (T0SZ = 24) 155 * 40bits input (T0SZ = 24)
155 * 4kB pages (TG0 = 0) 156 * 4kB pages (TG0 = 0)
156 * 3 level page tables (SL = 1) 157 * 3 level page tables (SL = 1)
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 6458b5373142..bbfb600fa822 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -158,6 +158,8 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
158#define PTRS_PER_S2_PGD (1 << PTRS_PER_S2_PGD_SHIFT) 158#define PTRS_PER_S2_PGD (1 << PTRS_PER_S2_PGD_SHIFT)
159#define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t)) 159#define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
160 160
161#define kvm_pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1))
162
161/* 163/*
162 * If we are concatenating first level stage-2 page tables, we would have less 164 * If we are concatenating first level stage-2 page tables, we would have less
163 * than or equal to 16 pointers in the fake PGD, because that's what the 165 * than or equal to 16 pointers in the fake PGD, because that's what the
@@ -171,43 +173,6 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
171#define KVM_PREALLOC_LEVEL (0) 173#define KVM_PREALLOC_LEVEL (0)
172#endif 174#endif
173 175
174/**
175 * kvm_prealloc_hwpgd - allocate inital table for VTTBR
176 * @kvm: The KVM struct pointer for the VM.
177 * @pgd: The kernel pseudo pgd
178 *
179 * When the kernel uses more levels of page tables than the guest, we allocate
180 * a fake PGD and pre-populate it to point to the next-level page table, which
181 * will be the real initial page table pointed to by the VTTBR.
182 *
183 * When KVM_PREALLOC_LEVEL==2, we allocate a single page for the PMD and
184 * the kernel will use folded pud. When KVM_PREALLOC_LEVEL==1, we
185 * allocate 2 consecutive PUD pages.
186 */
187static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd)
188{
189 unsigned int i;
190 unsigned long hwpgd;
191
192 if (KVM_PREALLOC_LEVEL == 0)
193 return 0;
194
195 hwpgd = __get_free_pages(GFP_KERNEL | __GFP_ZERO, PTRS_PER_S2_PGD_SHIFT);
196 if (!hwpgd)
197 return -ENOMEM;
198
199 for (i = 0; i < PTRS_PER_S2_PGD; i++) {
200 if (KVM_PREALLOC_LEVEL == 1)
201 pgd_populate(NULL, pgd + i,
202 (pud_t *)hwpgd + i * PTRS_PER_PUD);
203 else if (KVM_PREALLOC_LEVEL == 2)
204 pud_populate(NULL, pud_offset(pgd, 0) + i,
205 (pmd_t *)hwpgd + i * PTRS_PER_PMD);
206 }
207
208 return 0;
209}
210
211static inline void *kvm_get_hwpgd(struct kvm *kvm) 176static inline void *kvm_get_hwpgd(struct kvm *kvm)
212{ 177{
213 pgd_t *pgd = kvm->arch.pgd; 178 pgd_t *pgd = kvm->arch.pgd;
@@ -224,12 +189,11 @@ static inline void *kvm_get_hwpgd(struct kvm *kvm)
224 return pmd_offset(pud, 0); 189 return pmd_offset(pud, 0);
225} 190}
226 191
227static inline void kvm_free_hwpgd(struct kvm *kvm) 192static inline unsigned int kvm_get_hwpgd_size(void)
228{ 193{
229 if (KVM_PREALLOC_LEVEL > 0) { 194 if (KVM_PREALLOC_LEVEL > 0)
230 unsigned long hwpgd = (unsigned long)kvm_get_hwpgd(kvm); 195 return PTRS_PER_S2_PGD * PAGE_SIZE;
231 free_pages(hwpgd, PTRS_PER_S2_PGD_SHIFT); 196 return PTRS_PER_S2_PGD * sizeof(pgd_t);
232 }
233} 197}
234 198
235static inline bool kvm_page_empty(void *ptr) 199static inline bool kvm_page_empty(void *ptr)