aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm/kvm/mmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm/kvm/mmu.c')
-rw-r--r--arch/arm/kvm/mmu.c184
1 files changed, 88 insertions, 96 deletions
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 99e07c7dd745..2f12e4056408 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -20,7 +20,6 @@
20#include <linux/kvm_host.h> 20#include <linux/kvm_host.h>
21#include <linux/io.h> 21#include <linux/io.h>
22#include <trace/events/kvm.h> 22#include <trace/events/kvm.h>
23#include <asm/idmap.h>
24#include <asm/pgalloc.h> 23#include <asm/pgalloc.h>
25#include <asm/cacheflush.h> 24#include <asm/cacheflush.h>
26#include <asm/kvm_arm.h> 25#include <asm/kvm_arm.h>
@@ -28,8 +27,6 @@
28#include <asm/kvm_mmio.h> 27#include <asm/kvm_mmio.h>
29#include <asm/kvm_asm.h> 28#include <asm/kvm_asm.h>
30#include <asm/kvm_emulate.h> 29#include <asm/kvm_emulate.h>
31#include <asm/mach/map.h>
32#include <trace/events/kvm.h>
33 30
34#include "trace.h" 31#include "trace.h"
35 32
@@ -37,19 +34,9 @@ extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
37 34
38static DEFINE_MUTEX(kvm_hyp_pgd_mutex); 35static DEFINE_MUTEX(kvm_hyp_pgd_mutex);
39 36
40static void kvm_tlb_flush_vmid(struct kvm *kvm) 37static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
41{ 38{
42 kvm_call_hyp(__kvm_tlb_flush_vmid, kvm); 39 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
43}
44
45static void kvm_set_pte(pte_t *pte, pte_t new_pte)
46{
47 pte_val(*pte) = new_pte;
48 /*
49 * flush_pmd_entry just takes a void pointer and cleans the necessary
50 * cache entries, so we can reuse the function for ptes.
51 */
52 flush_pmd_entry(pte);
53} 40}
54 41
55static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, 42static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
@@ -98,33 +85,42 @@ static void free_ptes(pmd_t *pmd, unsigned long addr)
98 } 85 }
99} 86}
100 87
88static void free_hyp_pgd_entry(unsigned long addr)
89{
90 pgd_t *pgd;
91 pud_t *pud;
92 pmd_t *pmd;
93 unsigned long hyp_addr = KERN_TO_HYP(addr);
94
95 pgd = hyp_pgd + pgd_index(hyp_addr);
96 pud = pud_offset(pgd, hyp_addr);
97
98 if (pud_none(*pud))
99 return;
100 BUG_ON(pud_bad(*pud));
101
102 pmd = pmd_offset(pud, hyp_addr);
103 free_ptes(pmd, addr);
104 pmd_free(NULL, pmd);
105 pud_clear(pud);
106}
107
101/** 108/**
102 * free_hyp_pmds - free a Hyp-mode level-2 tables and child level-3 tables 109 * free_hyp_pmds - free a Hyp-mode level-2 tables and child level-3 tables
103 * 110 *
104 * Assumes this is a page table used strictly in Hyp-mode and therefore contains 111 * Assumes this is a page table used strictly in Hyp-mode and therefore contains
105 * only mappings in the kernel memory area, which is above PAGE_OFFSET. 112 * either mappings in the kernel memory area (above PAGE_OFFSET), or
113 * device mappings in the vmalloc range (from VMALLOC_START to VMALLOC_END).
106 */ 114 */
107void free_hyp_pmds(void) 115void free_hyp_pmds(void)
108{ 116{
109 pgd_t *pgd;
110 pud_t *pud;
111 pmd_t *pmd;
112 unsigned long addr; 117 unsigned long addr;
113 118
114 mutex_lock(&kvm_hyp_pgd_mutex); 119 mutex_lock(&kvm_hyp_pgd_mutex);
115 for (addr = PAGE_OFFSET; addr != 0; addr += PGDIR_SIZE) { 120 for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE)
116 pgd = hyp_pgd + pgd_index(addr); 121 free_hyp_pgd_entry(addr);
117 pud = pud_offset(pgd, addr); 122 for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
118 123 free_hyp_pgd_entry(addr);
119 if (pud_none(*pud))
120 continue;
121 BUG_ON(pud_bad(*pud));
122
123 pmd = pmd_offset(pud, addr);
124 free_ptes(pmd, addr);
125 pmd_free(NULL, pmd);
126 pud_clear(pud);
127 }
128 mutex_unlock(&kvm_hyp_pgd_mutex); 124 mutex_unlock(&kvm_hyp_pgd_mutex);
129} 125}
130 126
@@ -136,7 +132,9 @@ static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start,
136 struct page *page; 132 struct page *page;
137 133
138 for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) { 134 for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) {
139 pte = pte_offset_kernel(pmd, addr); 135 unsigned long hyp_addr = KERN_TO_HYP(addr);
136
137 pte = pte_offset_kernel(pmd, hyp_addr);
140 BUG_ON(!virt_addr_valid(addr)); 138 BUG_ON(!virt_addr_valid(addr));
141 page = virt_to_page(addr); 139 page = virt_to_page(addr);
142 kvm_set_pte(pte, mk_pte(page, PAGE_HYP)); 140 kvm_set_pte(pte, mk_pte(page, PAGE_HYP));
@@ -151,7 +149,9 @@ static void create_hyp_io_pte_mappings(pmd_t *pmd, unsigned long start,
151 unsigned long addr; 149 unsigned long addr;
152 150
153 for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) { 151 for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) {
154 pte = pte_offset_kernel(pmd, addr); 152 unsigned long hyp_addr = KERN_TO_HYP(addr);
153
154 pte = pte_offset_kernel(pmd, hyp_addr);
155 BUG_ON(pfn_valid(*pfn_base)); 155 BUG_ON(pfn_valid(*pfn_base));
156 kvm_set_pte(pte, pfn_pte(*pfn_base, PAGE_HYP_DEVICE)); 156 kvm_set_pte(pte, pfn_pte(*pfn_base, PAGE_HYP_DEVICE));
157 (*pfn_base)++; 157 (*pfn_base)++;
@@ -166,12 +166,13 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
166 unsigned long addr, next; 166 unsigned long addr, next;
167 167
168 for (addr = start; addr < end; addr = next) { 168 for (addr = start; addr < end; addr = next) {
169 pmd = pmd_offset(pud, addr); 169 unsigned long hyp_addr = KERN_TO_HYP(addr);
170 pmd = pmd_offset(pud, hyp_addr);
170 171
171 BUG_ON(pmd_sect(*pmd)); 172 BUG_ON(pmd_sect(*pmd));
172 173
173 if (pmd_none(*pmd)) { 174 if (pmd_none(*pmd)) {
174 pte = pte_alloc_one_kernel(NULL, addr); 175 pte = pte_alloc_one_kernel(NULL, hyp_addr);
175 if (!pte) { 176 if (!pte) {
176 kvm_err("Cannot allocate Hyp pte\n"); 177 kvm_err("Cannot allocate Hyp pte\n");
177 return -ENOMEM; 178 return -ENOMEM;
@@ -206,17 +207,23 @@ static int __create_hyp_mappings(void *from, void *to, unsigned long *pfn_base)
206 unsigned long addr, next; 207 unsigned long addr, next;
207 int err = 0; 208 int err = 0;
208 209
209 BUG_ON(start > end); 210 if (start >= end)
210 if (start < PAGE_OFFSET) 211 return -EINVAL;
212 /* Check for a valid kernel memory mapping */
213 if (!pfn_base && (!virt_addr_valid(from) || !virt_addr_valid(to - 1)))
214 return -EINVAL;
215 /* Check for a valid kernel IO mapping */
216 if (pfn_base && (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1)))
211 return -EINVAL; 217 return -EINVAL;
212 218
213 mutex_lock(&kvm_hyp_pgd_mutex); 219 mutex_lock(&kvm_hyp_pgd_mutex);
214 for (addr = start; addr < end; addr = next) { 220 for (addr = start; addr < end; addr = next) {
215 pgd = hyp_pgd + pgd_index(addr); 221 unsigned long hyp_addr = KERN_TO_HYP(addr);
216 pud = pud_offset(pgd, addr); 222 pgd = hyp_pgd + pgd_index(hyp_addr);
223 pud = pud_offset(pgd, hyp_addr);
217 224
218 if (pud_none_or_clear_bad(pud)) { 225 if (pud_none_or_clear_bad(pud)) {
219 pmd = pmd_alloc_one(NULL, addr); 226 pmd = pmd_alloc_one(NULL, hyp_addr);
220 if (!pmd) { 227 if (!pmd) {
221 kvm_err("Cannot allocate Hyp pmd\n"); 228 kvm_err("Cannot allocate Hyp pmd\n");
222 err = -ENOMEM; 229 err = -ENOMEM;
@@ -236,12 +243,13 @@ out:
236} 243}
237 244
238/** 245/**
239 * create_hyp_mappings - map a kernel virtual address range in Hyp mode 246 * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
240 * @from: The virtual kernel start address of the range 247 * @from: The virtual kernel start address of the range
241 * @to: The virtual kernel end address of the range (exclusive) 248 * @to: The virtual kernel end address of the range (exclusive)
242 * 249 *
243 * The same virtual address as the kernel virtual address is also used in 250 * The same virtual address as the kernel virtual address is also used
244 * Hyp-mode mapping to the same underlying physical pages. 251 * in Hyp-mode mapping (modulo HYP_PAGE_OFFSET) to the same underlying
252 * physical pages.
245 * 253 *
246 * Note: Wrapping around zero in the "to" address is not supported. 254 * Note: Wrapping around zero in the "to" address is not supported.
247 */ 255 */
@@ -251,10 +259,13 @@ int create_hyp_mappings(void *from, void *to)
251} 259}
252 260
253/** 261/**
254 * create_hyp_io_mappings - map a physical IO range in Hyp mode 262 * create_hyp_io_mappings - duplicate a kernel IO mapping into Hyp mode
255 * @from: The virtual HYP start address of the range 263 * @from: The kernel start VA of the range
256 * @to: The virtual HYP end address of the range (exclusive) 264 * @to: The kernel end VA of the range (exclusive)
257 * @addr: The physical start address which gets mapped 265 * @addr: The physical start address which gets mapped
266 *
267 * The resulting HYP VA is the same as the kernel VA, modulo
268 * HYP_PAGE_OFFSET.
258 */ 269 */
259int create_hyp_io_mappings(void *from, void *to, phys_addr_t addr) 270int create_hyp_io_mappings(void *from, void *to, phys_addr_t addr)
260{ 271{
@@ -290,7 +301,7 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
290 VM_BUG_ON((unsigned long)pgd & (S2_PGD_SIZE - 1)); 301 VM_BUG_ON((unsigned long)pgd & (S2_PGD_SIZE - 1));
291 302
292 memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t)); 303 memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t));
293 clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t)); 304 kvm_clean_pgd(pgd);
294 kvm->arch.pgd = pgd; 305 kvm->arch.pgd = pgd;
295 306
296 return 0; 307 return 0;
@@ -422,22 +433,22 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
422 return 0; /* ignore calls from kvm_set_spte_hva */ 433 return 0; /* ignore calls from kvm_set_spte_hva */
423 pmd = mmu_memory_cache_alloc(cache); 434 pmd = mmu_memory_cache_alloc(cache);
424 pud_populate(NULL, pud, pmd); 435 pud_populate(NULL, pud, pmd);
425 pmd += pmd_index(addr);
426 get_page(virt_to_page(pud)); 436 get_page(virt_to_page(pud));
427 } else 437 }
428 pmd = pmd_offset(pud, addr); 438
439 pmd = pmd_offset(pud, addr);
429 440
430 /* Create 2nd stage page table mapping - Level 2 */ 441 /* Create 2nd stage page table mapping - Level 2 */
431 if (pmd_none(*pmd)) { 442 if (pmd_none(*pmd)) {
432 if (!cache) 443 if (!cache)
433 return 0; /* ignore calls from kvm_set_spte_hva */ 444 return 0; /* ignore calls from kvm_set_spte_hva */
434 pte = mmu_memory_cache_alloc(cache); 445 pte = mmu_memory_cache_alloc(cache);
435 clean_pte_table(pte); 446 kvm_clean_pte(pte);
436 pmd_populate_kernel(NULL, pmd, pte); 447 pmd_populate_kernel(NULL, pmd, pte);
437 pte += pte_index(addr);
438 get_page(virt_to_page(pmd)); 448 get_page(virt_to_page(pmd));
439 } else 449 }
440 pte = pte_offset_kernel(pmd, addr); 450
451 pte = pte_offset_kernel(pmd, addr);
441 452
442 if (iomap && pte_present(*pte)) 453 if (iomap && pte_present(*pte))
443 return -EFAULT; 454 return -EFAULT;
@@ -446,7 +457,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
446 old_pte = *pte; 457 old_pte = *pte;
447 kvm_set_pte(pte, *new_pte); 458 kvm_set_pte(pte, *new_pte);
448 if (pte_present(old_pte)) 459 if (pte_present(old_pte))
449 kvm_tlb_flush_vmid(kvm); 460 kvm_tlb_flush_vmid_ipa(kvm, addr);
450 else 461 else
451 get_page(virt_to_page(pte)); 462 get_page(virt_to_page(pte));
452 463
@@ -473,7 +484,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
473 pfn = __phys_to_pfn(pa); 484 pfn = __phys_to_pfn(pa);
474 485
475 for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) { 486 for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) {
476 pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE | L_PTE_S2_RDWR); 487 pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE);
488 kvm_set_s2pte_writable(&pte);
477 489
478 ret = mmu_topup_memory_cache(&cache, 2, 2); 490 ret = mmu_topup_memory_cache(&cache, 2, 2);
479 if (ret) 491 if (ret)
@@ -492,29 +504,6 @@ out:
492 return ret; 504 return ret;
493} 505}
494 506
495static void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
496{
497 /*
498 * If we are going to insert an instruction page and the icache is
499 * either VIPT or PIPT, there is a potential problem where the host
500 * (or another VM) may have used the same page as this guest, and we
501 * read incorrect data from the icache. If we're using a PIPT cache,
502 * we can invalidate just that page, but if we are using a VIPT cache
503 * we need to invalidate the entire icache - damn shame - as written
504 * in the ARM ARM (DDI 0406C.b - Page B3-1393).
505 *
506 * VIVT caches are tagged using both the ASID and the VMID and doesn't
507 * need any kind of flushing (DDI 0406C.b - Page B3-1392).
508 */
509 if (icache_is_pipt()) {
510 unsigned long hva = gfn_to_hva(kvm, gfn);
511 __cpuc_coherent_user_range(hva, hva + PAGE_SIZE);
512 } else if (!icache_is_vivt_asid_tagged()) {
513 /* any kind of VIPT cache */
514 __flush_icache_all();
515 }
516}
517
518static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, 507static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
519 gfn_t gfn, struct kvm_memory_slot *memslot, 508 gfn_t gfn, struct kvm_memory_slot *memslot,
520 unsigned long fault_status) 509 unsigned long fault_status)
@@ -526,7 +515,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
526 unsigned long mmu_seq; 515 unsigned long mmu_seq;
527 struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; 516 struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
528 517
529 write_fault = kvm_is_write_fault(vcpu->arch.hsr); 518 write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
530 if (fault_status == FSC_PERM && !write_fault) { 519 if (fault_status == FSC_PERM && !write_fault) {
531 kvm_err("Unexpected L2 read permission error\n"); 520 kvm_err("Unexpected L2 read permission error\n");
532 return -EFAULT; 521 return -EFAULT;
@@ -560,7 +549,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
560 if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) 549 if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
561 goto out_unlock; 550 goto out_unlock;
562 if (writable) { 551 if (writable) {
563 pte_val(new_pte) |= L_PTE_S2_RDWR; 552 kvm_set_s2pte_writable(&new_pte);
564 kvm_set_pfn_dirty(pfn); 553 kvm_set_pfn_dirty(pfn);
565 } 554 }
566 stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte, false); 555 stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte, false);
@@ -585,7 +574,6 @@ out_unlock:
585 */ 574 */
586int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) 575int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
587{ 576{
588 unsigned long hsr_ec;
589 unsigned long fault_status; 577 unsigned long fault_status;
590 phys_addr_t fault_ipa; 578 phys_addr_t fault_ipa;
591 struct kvm_memory_slot *memslot; 579 struct kvm_memory_slot *memslot;
@@ -593,18 +581,17 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
593 gfn_t gfn; 581 gfn_t gfn;
594 int ret, idx; 582 int ret, idx;
595 583
596 hsr_ec = vcpu->arch.hsr >> HSR_EC_SHIFT; 584 is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
597 is_iabt = (hsr_ec == HSR_EC_IABT); 585 fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
598 fault_ipa = ((phys_addr_t)vcpu->arch.hpfar & HPFAR_MASK) << 8;
599 586
600 trace_kvm_guest_fault(*vcpu_pc(vcpu), vcpu->arch.hsr, 587 trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu),
601 vcpu->arch.hxfar, fault_ipa); 588 kvm_vcpu_get_hfar(vcpu), fault_ipa);
602 589
603 /* Check the stage-2 fault is trans. fault or write fault */ 590 /* Check the stage-2 fault is trans. fault or write fault */
604 fault_status = (vcpu->arch.hsr & HSR_FSC_TYPE); 591 fault_status = kvm_vcpu_trap_get_fault(vcpu);
605 if (fault_status != FSC_FAULT && fault_status != FSC_PERM) { 592 if (fault_status != FSC_FAULT && fault_status != FSC_PERM) {
606 kvm_err("Unsupported fault status: EC=%#lx DFCS=%#lx\n", 593 kvm_err("Unsupported fault status: EC=%#x DFCS=%#lx\n",
607 hsr_ec, fault_status); 594 kvm_vcpu_trap_get_class(vcpu), fault_status);
608 return -EFAULT; 595 return -EFAULT;
609 } 596 }
610 597
@@ -614,7 +601,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
614 if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) { 601 if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) {
615 if (is_iabt) { 602 if (is_iabt) {
616 /* Prefetch Abort on I/O address */ 603 /* Prefetch Abort on I/O address */
617 kvm_inject_pabt(vcpu, vcpu->arch.hxfar); 604 kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
618 ret = 1; 605 ret = 1;
619 goto out_unlock; 606 goto out_unlock;
620 } 607 }
@@ -626,8 +613,13 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
626 goto out_unlock; 613 goto out_unlock;
627 } 614 }
628 615
629 /* Adjust page offset */ 616 /*
630 fault_ipa |= vcpu->arch.hxfar & ~PAGE_MASK; 617 * The IPA is reported as [MAX:12], so we need to
618 * complement it with the bottom 12 bits from the
619 * faulting VA. This is always 12 bits, irrespective
620 * of the page size.
621 */
622 fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1);
631 ret = io_mem_abort(vcpu, run, fault_ipa); 623 ret = io_mem_abort(vcpu, run, fault_ipa);
632 goto out_unlock; 624 goto out_unlock;
633 } 625 }
@@ -682,7 +674,7 @@ static void handle_hva_to_gpa(struct kvm *kvm,
682static void kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) 674static void kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
683{ 675{
684 unmap_stage2_range(kvm, gpa, PAGE_SIZE); 676 unmap_stage2_range(kvm, gpa, PAGE_SIZE);
685 kvm_tlb_flush_vmid(kvm); 677 kvm_tlb_flush_vmid_ipa(kvm, gpa);
686} 678}
687 679
688int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) 680int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
@@ -776,7 +768,7 @@ void kvm_clear_hyp_idmap(void)
776 pmd = pmd_offset(pud, addr); 768 pmd = pmd_offset(pud, addr);
777 769
778 pud_clear(pud); 770 pud_clear(pud);
779 clean_pmd_entry(pmd); 771 kvm_clean_pmd_entry(pmd);
780 pmd_free(NULL, (pmd_t *)((unsigned long)pmd & PAGE_MASK)); 772 pmd_free(NULL, (pmd_t *)((unsigned long)pmd & PAGE_MASK));
781 } while (pgd++, addr = next, addr < end); 773 } while (pgd++, addr = next, addr < end);
782} 774}