diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-10-18 17:32:31 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-10-18 17:32:31 -0400 |
commit | 8a5de18239e418fe7b1f36504834689f754d8ccc (patch) | |
tree | 8d05ae77da1d4a8512b6052e2ba23571543666c7 /arch/arm | |
parent | 857b50f5d0eed113428c864e927289d8f5f2b864 (diff) | |
parent | 2df36a5dd6792870bef48f63bfca42055ea5b79c (diff) |
Merge tag 'kvm-arm-for-3.18-take-2' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm
Pull second batch of changes for KVM/{arm,arm64} from Marc Zyngier:
"The most obvious thing is the sizeable MMU changes to support 48bit
VAs on arm64.
Summary:
- support for 48bit IPA and VA (EL2)
- a number of fixes for devices mapped into guests
- yet another VGIC fix for BE
- a fix for CPU hotplug
- a few compile fixes (disabled VGIC, strict mm checks)"
[ I'm pulling directly from Marc at the request of Paolo Bonzini, whose
backpack was stolen at Düsseldorf airport and will do new keys and
rebuild his web of trust. - Linus ]
* tag 'kvm-arm-for-3.18-take-2' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm:
arm/arm64: KVM: Fix BE accesses to GICv2 EISR and ELRSR regs
arm: kvm: STRICT_MM_TYPECHECKS fix for user_mem_abort
arm/arm64: KVM: Ensure memslots are within KVM_PHYS_SIZE
arm64: KVM: Implement 48 VA support for KVM EL2 and Stage-2
arm/arm64: KVM: map MMIO regions at creation time
arm64: kvm: define PAGE_S2_DEVICE as read-only by default
ARM: kvm: define PAGE_S2_DEVICE as read-only by default
arm/arm64: KVM: add 'writable' parameter to kvm_phys_addr_ioremap
arm/arm64: KVM: fix potential NULL dereference in user_mem_abort()
arm/arm64: KVM: use __GFP_ZERO not memset() to get zeroed pages
ARM: KVM: fix vgic-disabled build
arm: kvm: fix CPU hotplug
Diffstat (limited to 'arch/arm')
-rw-r--r-- | arch/arm/include/asm/kvm_mmu.h | 31 | ||||
-rw-r--r-- | arch/arm/include/asm/pgtable.h | 2 | ||||
-rw-r--r-- | arch/arm/kvm/arm.c | 5 | ||||
-rw-r--r-- | arch/arm/kvm/interrupts_head.S | 7 | ||||
-rw-r--r-- | arch/arm/kvm/mmu.c | 235 |
5 files changed, 235 insertions, 45 deletions
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 3f688b458143..acb0d5712716 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h | |||
@@ -37,6 +37,11 @@ | |||
37 | */ | 37 | */ |
38 | #define TRAMPOLINE_VA UL(CONFIG_VECTORS_BASE) | 38 | #define TRAMPOLINE_VA UL(CONFIG_VECTORS_BASE) |
39 | 39 | ||
40 | /* | ||
41 | * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation levels. | ||
42 | */ | ||
43 | #define KVM_MMU_CACHE_MIN_PAGES 2 | ||
44 | |||
40 | #ifndef __ASSEMBLY__ | 45 | #ifndef __ASSEMBLY__ |
41 | 46 | ||
42 | #include <asm/cacheflush.h> | 47 | #include <asm/cacheflush.h> |
@@ -50,7 +55,7 @@ void free_hyp_pgds(void); | |||
50 | int kvm_alloc_stage2_pgd(struct kvm *kvm); | 55 | int kvm_alloc_stage2_pgd(struct kvm *kvm); |
51 | void kvm_free_stage2_pgd(struct kvm *kvm); | 56 | void kvm_free_stage2_pgd(struct kvm *kvm); |
52 | int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, | 57 | int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, |
53 | phys_addr_t pa, unsigned long size); | 58 | phys_addr_t pa, unsigned long size, bool writable); |
54 | 59 | ||
55 | int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run); | 60 | int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run); |
56 | 61 | ||
@@ -83,6 +88,11 @@ static inline void kvm_clean_pgd(pgd_t *pgd) | |||
83 | clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t)); | 88 | clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t)); |
84 | } | 89 | } |
85 | 90 | ||
91 | static inline void kvm_clean_pmd(pmd_t *pmd) | ||
92 | { | ||
93 | clean_dcache_area(pmd, PTRS_PER_PMD * sizeof(pmd_t)); | ||
94 | } | ||
95 | |||
86 | static inline void kvm_clean_pmd_entry(pmd_t *pmd) | 96 | static inline void kvm_clean_pmd_entry(pmd_t *pmd) |
87 | { | 97 | { |
88 | clean_pmd_entry(pmd); | 98 | clean_pmd_entry(pmd); |
@@ -123,10 +133,23 @@ static inline bool kvm_page_empty(void *ptr) | |||
123 | } | 133 | } |
124 | 134 | ||
125 | 135 | ||
126 | #define kvm_pte_table_empty(ptep) kvm_page_empty(ptep) | 136 | #define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep) |
127 | #define kvm_pmd_table_empty(pmdp) kvm_page_empty(pmdp) | 137 | #define kvm_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp) |
128 | #define kvm_pud_table_empty(pudp) (0) | 138 | #define kvm_pud_table_empty(kvm, pudp) (0) |
139 | |||
140 | #define KVM_PREALLOC_LEVEL 0 | ||
129 | 141 | ||
142 | static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd) | ||
143 | { | ||
144 | return 0; | ||
145 | } | ||
146 | |||
147 | static inline void kvm_free_hwpgd(struct kvm *kvm) { } | ||
148 | |||
149 | static inline void *kvm_get_hwpgd(struct kvm *kvm) | ||
150 | { | ||
151 | return kvm->arch.pgd; | ||
152 | } | ||
130 | 153 | ||
131 | struct kvm; | 154 | struct kvm; |
132 | 155 | ||
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 90aa4583b308..3b30062975b2 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h | |||
@@ -100,7 +100,7 @@ extern pgprot_t pgprot_s2_device; | |||
100 | #define PAGE_HYP _MOD_PROT(pgprot_kernel, L_PTE_HYP) | 100 | #define PAGE_HYP _MOD_PROT(pgprot_kernel, L_PTE_HYP) |
101 | #define PAGE_HYP_DEVICE _MOD_PROT(pgprot_hyp_device, L_PTE_HYP) | 101 | #define PAGE_HYP_DEVICE _MOD_PROT(pgprot_hyp_device, L_PTE_HYP) |
102 | #define PAGE_S2 _MOD_PROT(pgprot_s2, L_PTE_S2_RDONLY) | 102 | #define PAGE_S2 _MOD_PROT(pgprot_s2, L_PTE_S2_RDONLY) |
103 | #define PAGE_S2_DEVICE _MOD_PROT(pgprot_s2_device, L_PTE_S2_RDWR) | 103 | #define PAGE_S2_DEVICE _MOD_PROT(pgprot_s2_device, L_PTE_S2_RDONLY) |
104 | 104 | ||
105 | #define __PAGE_NONE __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN | L_PTE_NONE) | 105 | #define __PAGE_NONE __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN | L_PTE_NONE) |
106 | #define __PAGE_SHARED __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN) | 106 | #define __PAGE_SHARED __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN) |
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 779605122f32..9e193c8a959e 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c | |||
@@ -409,7 +409,7 @@ static void update_vttbr(struct kvm *kvm) | |||
409 | kvm_next_vmid++; | 409 | kvm_next_vmid++; |
410 | 410 | ||
411 | /* update vttbr to be used with the new vmid */ | 411 | /* update vttbr to be used with the new vmid */ |
412 | pgd_phys = virt_to_phys(kvm->arch.pgd); | 412 | pgd_phys = virt_to_phys(kvm_get_hwpgd(kvm)); |
413 | BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK); | 413 | BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK); |
414 | vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK; | 414 | vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK; |
415 | kvm->arch.vttbr = pgd_phys | vmid; | 415 | kvm->arch.vttbr = pgd_phys | vmid; |
@@ -808,7 +808,8 @@ static int hyp_init_cpu_notify(struct notifier_block *self, | |||
808 | switch (action) { | 808 | switch (action) { |
809 | case CPU_STARTING: | 809 | case CPU_STARTING: |
810 | case CPU_STARTING_FROZEN: | 810 | case CPU_STARTING_FROZEN: |
811 | cpu_init_hyp_mode(NULL); | 811 | if (__hyp_get_vectors() == hyp_default_vectors) |
812 | cpu_init_hyp_mode(NULL); | ||
812 | break; | 813 | break; |
813 | } | 814 | } |
814 | 815 | ||
diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index 98c8c5b9a87f..14d488388480 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S | |||
@@ -433,10 +433,17 @@ ARM_BE8(rev r10, r10 ) | |||
433 | str r3, [r11, #VGIC_V2_CPU_HCR] | 433 | str r3, [r11, #VGIC_V2_CPU_HCR] |
434 | str r4, [r11, #VGIC_V2_CPU_VMCR] | 434 | str r4, [r11, #VGIC_V2_CPU_VMCR] |
435 | str r5, [r11, #VGIC_V2_CPU_MISR] | 435 | str r5, [r11, #VGIC_V2_CPU_MISR] |
436 | #ifdef CONFIG_CPU_ENDIAN_BE8 | ||
437 | str r6, [r11, #(VGIC_V2_CPU_EISR + 4)] | ||
438 | str r7, [r11, #VGIC_V2_CPU_EISR] | ||
439 | str r8, [r11, #(VGIC_V2_CPU_ELRSR + 4)] | ||
440 | str r9, [r11, #VGIC_V2_CPU_ELRSR] | ||
441 | #else | ||
436 | str r6, [r11, #VGIC_V2_CPU_EISR] | 442 | str r6, [r11, #VGIC_V2_CPU_EISR] |
437 | str r7, [r11, #(VGIC_V2_CPU_EISR + 4)] | 443 | str r7, [r11, #(VGIC_V2_CPU_EISR + 4)] |
438 | str r8, [r11, #VGIC_V2_CPU_ELRSR] | 444 | str r8, [r11, #VGIC_V2_CPU_ELRSR] |
439 | str r9, [r11, #(VGIC_V2_CPU_ELRSR + 4)] | 445 | str r9, [r11, #(VGIC_V2_CPU_ELRSR + 4)] |
446 | #endif | ||
440 | str r10, [r11, #VGIC_V2_CPU_APR] | 447 | str r10, [r11, #VGIC_V2_CPU_APR] |
441 | 448 | ||
442 | /* Clear GICH_HCR */ | 449 | /* Clear GICH_HCR */ |
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index eea03069161b..57a403a5c22b 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c | |||
@@ -42,7 +42,7 @@ static unsigned long hyp_idmap_start; | |||
42 | static unsigned long hyp_idmap_end; | 42 | static unsigned long hyp_idmap_end; |
43 | static phys_addr_t hyp_idmap_vector; | 43 | static phys_addr_t hyp_idmap_vector; |
44 | 44 | ||
45 | #define pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) | 45 | #define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) |
46 | 46 | ||
47 | #define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x)) | 47 | #define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x)) |
48 | 48 | ||
@@ -134,7 +134,7 @@ static void unmap_ptes(struct kvm *kvm, pmd_t *pmd, | |||
134 | } | 134 | } |
135 | } while (pte++, addr += PAGE_SIZE, addr != end); | 135 | } while (pte++, addr += PAGE_SIZE, addr != end); |
136 | 136 | ||
137 | if (kvm_pte_table_empty(start_pte)) | 137 | if (kvm_pte_table_empty(kvm, start_pte)) |
138 | clear_pmd_entry(kvm, pmd, start_addr); | 138 | clear_pmd_entry(kvm, pmd, start_addr); |
139 | } | 139 | } |
140 | 140 | ||
@@ -158,7 +158,7 @@ static void unmap_pmds(struct kvm *kvm, pud_t *pud, | |||
158 | } | 158 | } |
159 | } while (pmd++, addr = next, addr != end); | 159 | } while (pmd++, addr = next, addr != end); |
160 | 160 | ||
161 | if (kvm_pmd_table_empty(start_pmd)) | 161 | if (kvm_pmd_table_empty(kvm, start_pmd)) |
162 | clear_pud_entry(kvm, pud, start_addr); | 162 | clear_pud_entry(kvm, pud, start_addr); |
163 | } | 163 | } |
164 | 164 | ||
@@ -182,7 +182,7 @@ static void unmap_puds(struct kvm *kvm, pgd_t *pgd, | |||
182 | } | 182 | } |
183 | } while (pud++, addr = next, addr != end); | 183 | } while (pud++, addr = next, addr != end); |
184 | 184 | ||
185 | if (kvm_pud_table_empty(start_pud)) | 185 | if (kvm_pud_table_empty(kvm, start_pud)) |
186 | clear_pgd_entry(kvm, pgd, start_addr); | 186 | clear_pgd_entry(kvm, pgd, start_addr); |
187 | } | 187 | } |
188 | 188 | ||
@@ -306,7 +306,7 @@ void free_boot_hyp_pgd(void) | |||
306 | if (boot_hyp_pgd) { | 306 | if (boot_hyp_pgd) { |
307 | unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); | 307 | unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); |
308 | unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); | 308 | unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); |
309 | free_pages((unsigned long)boot_hyp_pgd, pgd_order); | 309 | free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order); |
310 | boot_hyp_pgd = NULL; | 310 | boot_hyp_pgd = NULL; |
311 | } | 311 | } |
312 | 312 | ||
@@ -343,7 +343,7 @@ void free_hyp_pgds(void) | |||
343 | for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) | 343 | for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) |
344 | unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); | 344 | unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); |
345 | 345 | ||
346 | free_pages((unsigned long)hyp_pgd, pgd_order); | 346 | free_pages((unsigned long)hyp_pgd, hyp_pgd_order); |
347 | hyp_pgd = NULL; | 347 | hyp_pgd = NULL; |
348 | } | 348 | } |
349 | 349 | ||
@@ -401,13 +401,46 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start, | |||
401 | return 0; | 401 | return 0; |
402 | } | 402 | } |
403 | 403 | ||
404 | static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start, | ||
405 | unsigned long end, unsigned long pfn, | ||
406 | pgprot_t prot) | ||
407 | { | ||
408 | pud_t *pud; | ||
409 | pmd_t *pmd; | ||
410 | unsigned long addr, next; | ||
411 | int ret; | ||
412 | |||
413 | addr = start; | ||
414 | do { | ||
415 | pud = pud_offset(pgd, addr); | ||
416 | |||
417 | if (pud_none_or_clear_bad(pud)) { | ||
418 | pmd = pmd_alloc_one(NULL, addr); | ||
419 | if (!pmd) { | ||
420 | kvm_err("Cannot allocate Hyp pmd\n"); | ||
421 | return -ENOMEM; | ||
422 | } | ||
423 | pud_populate(NULL, pud, pmd); | ||
424 | get_page(virt_to_page(pud)); | ||
425 | kvm_flush_dcache_to_poc(pud, sizeof(*pud)); | ||
426 | } | ||
427 | |||
428 | next = pud_addr_end(addr, end); | ||
429 | ret = create_hyp_pmd_mappings(pud, addr, next, pfn, prot); | ||
430 | if (ret) | ||
431 | return ret; | ||
432 | pfn += (next - addr) >> PAGE_SHIFT; | ||
433 | } while (addr = next, addr != end); | ||
434 | |||
435 | return 0; | ||
436 | } | ||
437 | |||
404 | static int __create_hyp_mappings(pgd_t *pgdp, | 438 | static int __create_hyp_mappings(pgd_t *pgdp, |
405 | unsigned long start, unsigned long end, | 439 | unsigned long start, unsigned long end, |
406 | unsigned long pfn, pgprot_t prot) | 440 | unsigned long pfn, pgprot_t prot) |
407 | { | 441 | { |
408 | pgd_t *pgd; | 442 | pgd_t *pgd; |
409 | pud_t *pud; | 443 | pud_t *pud; |
410 | pmd_t *pmd; | ||
411 | unsigned long addr, next; | 444 | unsigned long addr, next; |
412 | int err = 0; | 445 | int err = 0; |
413 | 446 | ||
@@ -416,22 +449,21 @@ static int __create_hyp_mappings(pgd_t *pgdp, | |||
416 | end = PAGE_ALIGN(end); | 449 | end = PAGE_ALIGN(end); |
417 | do { | 450 | do { |
418 | pgd = pgdp + pgd_index(addr); | 451 | pgd = pgdp + pgd_index(addr); |
419 | pud = pud_offset(pgd, addr); | ||
420 | 452 | ||
421 | if (pud_none_or_clear_bad(pud)) { | 453 | if (pgd_none(*pgd)) { |
422 | pmd = pmd_alloc_one(NULL, addr); | 454 | pud = pud_alloc_one(NULL, addr); |
423 | if (!pmd) { | 455 | if (!pud) { |
424 | kvm_err("Cannot allocate Hyp pmd\n"); | 456 | kvm_err("Cannot allocate Hyp pud\n"); |
425 | err = -ENOMEM; | 457 | err = -ENOMEM; |
426 | goto out; | 458 | goto out; |
427 | } | 459 | } |
428 | pud_populate(NULL, pud, pmd); | 460 | pgd_populate(NULL, pgd, pud); |
429 | get_page(virt_to_page(pud)); | 461 | get_page(virt_to_page(pgd)); |
430 | kvm_flush_dcache_to_poc(pud, sizeof(*pud)); | 462 | kvm_flush_dcache_to_poc(pgd, sizeof(*pgd)); |
431 | } | 463 | } |
432 | 464 | ||
433 | next = pgd_addr_end(addr, end); | 465 | next = pgd_addr_end(addr, end); |
434 | err = create_hyp_pmd_mappings(pud, addr, next, pfn, prot); | 466 | err = create_hyp_pud_mappings(pgd, addr, next, pfn, prot); |
435 | if (err) | 467 | if (err) |
436 | goto out; | 468 | goto out; |
437 | pfn += (next - addr) >> PAGE_SHIFT; | 469 | pfn += (next - addr) >> PAGE_SHIFT; |
@@ -521,6 +553,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr) | |||
521 | */ | 553 | */ |
522 | int kvm_alloc_stage2_pgd(struct kvm *kvm) | 554 | int kvm_alloc_stage2_pgd(struct kvm *kvm) |
523 | { | 555 | { |
556 | int ret; | ||
524 | pgd_t *pgd; | 557 | pgd_t *pgd; |
525 | 558 | ||
526 | if (kvm->arch.pgd != NULL) { | 559 | if (kvm->arch.pgd != NULL) { |
@@ -528,15 +561,38 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) | |||
528 | return -EINVAL; | 561 | return -EINVAL; |
529 | } | 562 | } |
530 | 563 | ||
531 | pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, S2_PGD_ORDER); | 564 | if (KVM_PREALLOC_LEVEL > 0) { |
565 | /* | ||
566 | * Allocate fake pgd for the page table manipulation macros to | ||
567 | * work. This is not used by the hardware and we have no | ||
568 | * alignment requirement for this allocation. | ||
569 | */ | ||
570 | pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t), | ||
571 | GFP_KERNEL | __GFP_ZERO); | ||
572 | } else { | ||
573 | /* | ||
574 | * Allocate actual first-level Stage-2 page table used by the | ||
575 | * hardware for Stage-2 page table walks. | ||
576 | */ | ||
577 | pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, S2_PGD_ORDER); | ||
578 | } | ||
579 | |||
532 | if (!pgd) | 580 | if (!pgd) |
533 | return -ENOMEM; | 581 | return -ENOMEM; |
534 | 582 | ||
535 | memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t)); | 583 | ret = kvm_prealloc_hwpgd(kvm, pgd); |
584 | if (ret) | ||
585 | goto out_err; | ||
586 | |||
536 | kvm_clean_pgd(pgd); | 587 | kvm_clean_pgd(pgd); |
537 | kvm->arch.pgd = pgd; | 588 | kvm->arch.pgd = pgd; |
538 | |||
539 | return 0; | 589 | return 0; |
590 | out_err: | ||
591 | if (KVM_PREALLOC_LEVEL > 0) | ||
592 | kfree(pgd); | ||
593 | else | ||
594 | free_pages((unsigned long)pgd, S2_PGD_ORDER); | ||
595 | return ret; | ||
540 | } | 596 | } |
541 | 597 | ||
542 | /** | 598 | /** |
@@ -572,19 +628,39 @@ void kvm_free_stage2_pgd(struct kvm *kvm) | |||
572 | return; | 628 | return; |
573 | 629 | ||
574 | unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); | 630 | unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); |
575 | free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER); | 631 | kvm_free_hwpgd(kvm); |
632 | if (KVM_PREALLOC_LEVEL > 0) | ||
633 | kfree(kvm->arch.pgd); | ||
634 | else | ||
635 | free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER); | ||
576 | kvm->arch.pgd = NULL; | 636 | kvm->arch.pgd = NULL; |
577 | } | 637 | } |
578 | 638 | ||
579 | static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, | 639 | static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, |
580 | phys_addr_t addr) | 640 | phys_addr_t addr) |
581 | { | 641 | { |
582 | pgd_t *pgd; | 642 | pgd_t *pgd; |
583 | pud_t *pud; | 643 | pud_t *pud; |
584 | pmd_t *pmd; | ||
585 | 644 | ||
586 | pgd = kvm->arch.pgd + pgd_index(addr); | 645 | pgd = kvm->arch.pgd + pgd_index(addr); |
587 | pud = pud_offset(pgd, addr); | 646 | if (WARN_ON(pgd_none(*pgd))) { |
647 | if (!cache) | ||
648 | return NULL; | ||
649 | pud = mmu_memory_cache_alloc(cache); | ||
650 | pgd_populate(NULL, pgd, pud); | ||
651 | get_page(virt_to_page(pgd)); | ||
652 | } | ||
653 | |||
654 | return pud_offset(pgd, addr); | ||
655 | } | ||
656 | |||
657 | static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, | ||
658 | phys_addr_t addr) | ||
659 | { | ||
660 | pud_t *pud; | ||
661 | pmd_t *pmd; | ||
662 | |||
663 | pud = stage2_get_pud(kvm, cache, addr); | ||
588 | if (pud_none(*pud)) { | 664 | if (pud_none(*pud)) { |
589 | if (!cache) | 665 | if (!cache) |
590 | return NULL; | 666 | return NULL; |
@@ -630,7 +706,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, | |||
630 | pmd_t *pmd; | 706 | pmd_t *pmd; |
631 | pte_t *pte, old_pte; | 707 | pte_t *pte, old_pte; |
632 | 708 | ||
633 | /* Create stage-2 page table mapping - Level 1 */ | 709 | /* Create stage-2 page table mapping - Levels 0 and 1 */ |
634 | pmd = stage2_get_pmd(kvm, cache, addr); | 710 | pmd = stage2_get_pmd(kvm, cache, addr); |
635 | if (!pmd) { | 711 | if (!pmd) { |
636 | /* | 712 | /* |
@@ -675,7 +751,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, | |||
675 | * @size: The size of the mapping | 751 | * @size: The size of the mapping |
676 | */ | 752 | */ |
677 | int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, | 753 | int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, |
678 | phys_addr_t pa, unsigned long size) | 754 | phys_addr_t pa, unsigned long size, bool writable) |
679 | { | 755 | { |
680 | phys_addr_t addr, end; | 756 | phys_addr_t addr, end; |
681 | int ret = 0; | 757 | int ret = 0; |
@@ -688,7 +764,11 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, | |||
688 | for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) { | 764 | for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) { |
689 | pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE); | 765 | pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE); |
690 | 766 | ||
691 | ret = mmu_topup_memory_cache(&cache, 2, 2); | 767 | if (writable) |
768 | kvm_set_s2pte_writable(&pte); | ||
769 | |||
770 | ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES, | ||
771 | KVM_NR_MEM_OBJS); | ||
692 | if (ret) | 772 | if (ret) |
693 | goto out; | 773 | goto out; |
694 | spin_lock(&kvm->mmu_lock); | 774 | spin_lock(&kvm->mmu_lock); |
@@ -777,6 +857,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, | |||
777 | /* Let's check if we will get back a huge page backed by hugetlbfs */ | 857 | /* Let's check if we will get back a huge page backed by hugetlbfs */ |
778 | down_read(¤t->mm->mmap_sem); | 858 | down_read(¤t->mm->mmap_sem); |
779 | vma = find_vma_intersection(current->mm, hva, hva + 1); | 859 | vma = find_vma_intersection(current->mm, hva, hva + 1); |
860 | if (unlikely(!vma)) { | ||
861 | kvm_err("Failed to find VMA for hva 0x%lx\n", hva); | ||
862 | up_read(¤t->mm->mmap_sem); | ||
863 | return -EFAULT; | ||
864 | } | ||
865 | |||
780 | if (is_vm_hugetlb_page(vma)) { | 866 | if (is_vm_hugetlb_page(vma)) { |
781 | hugetlb = true; | 867 | hugetlb = true; |
782 | gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; | 868 | gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; |
@@ -797,7 +883,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, | |||
797 | up_read(¤t->mm->mmap_sem); | 883 | up_read(¤t->mm->mmap_sem); |
798 | 884 | ||
799 | /* We need minimum second+third level pages */ | 885 | /* We need minimum second+third level pages */ |
800 | ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS); | 886 | ret = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES, |
887 | KVM_NR_MEM_OBJS); | ||
801 | if (ret) | 888 | if (ret) |
802 | return ret; | 889 | return ret; |
803 | 890 | ||
@@ -843,7 +930,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, | |||
843 | } | 930 | } |
844 | coherent_cache_guest_page(vcpu, hva, PAGE_SIZE); | 931 | coherent_cache_guest_page(vcpu, hva, PAGE_SIZE); |
845 | ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, | 932 | ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, |
846 | mem_type == PAGE_S2_DEVICE); | 933 | pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE)); |
847 | } | 934 | } |
848 | 935 | ||
849 | 936 | ||
@@ -916,6 +1003,9 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
916 | goto out_unlock; | 1003 | goto out_unlock; |
917 | } | 1004 | } |
918 | 1005 | ||
1006 | /* Userspace should not be able to register out-of-bounds IPAs */ | ||
1007 | VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE); | ||
1008 | |||
919 | ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status); | 1009 | ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status); |
920 | if (ret == 0) | 1010 | if (ret == 0) |
921 | ret = 1; | 1011 | ret = 1; |
@@ -1072,8 +1162,8 @@ int kvm_mmu_init(void) | |||
1072 | (unsigned long)phys_base); | 1162 | (unsigned long)phys_base); |
1073 | } | 1163 | } |
1074 | 1164 | ||
1075 | hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order); | 1165 | hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order); |
1076 | boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order); | 1166 | boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order); |
1077 | 1167 | ||
1078 | if (!hyp_pgd || !boot_hyp_pgd) { | 1168 | if (!hyp_pgd || !boot_hyp_pgd) { |
1079 | kvm_err("Hyp mode PGD not allocated\n"); | 1169 | kvm_err("Hyp mode PGD not allocated\n"); |
@@ -1126,13 +1216,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
1126 | const struct kvm_memory_slot *old, | 1216 | const struct kvm_memory_slot *old, |
1127 | enum kvm_mr_change change) | 1217 | enum kvm_mr_change change) |
1128 | { | 1218 | { |
1129 | gpa_t gpa = old->base_gfn << PAGE_SHIFT; | ||
1130 | phys_addr_t size = old->npages << PAGE_SHIFT; | ||
1131 | if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) { | ||
1132 | spin_lock(&kvm->mmu_lock); | ||
1133 | unmap_stage2_range(kvm, gpa, size); | ||
1134 | spin_unlock(&kvm->mmu_lock); | ||
1135 | } | ||
1136 | } | 1219 | } |
1137 | 1220 | ||
1138 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 1221 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
@@ -1140,7 +1223,77 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, | |||
1140 | struct kvm_userspace_memory_region *mem, | 1223 | struct kvm_userspace_memory_region *mem, |
1141 | enum kvm_mr_change change) | 1224 | enum kvm_mr_change change) |
1142 | { | 1225 | { |
1143 | return 0; | 1226 | hva_t hva = mem->userspace_addr; |
1227 | hva_t reg_end = hva + mem->memory_size; | ||
1228 | bool writable = !(mem->flags & KVM_MEM_READONLY); | ||
1229 | int ret = 0; | ||
1230 | |||
1231 | if (change != KVM_MR_CREATE && change != KVM_MR_MOVE) | ||
1232 | return 0; | ||
1233 | |||
1234 | /* | ||
1235 | * Prevent userspace from creating a memory region outside of the IPA | ||
1236 | * space addressable by the KVM guest IPA space. | ||
1237 | */ | ||
1238 | if (memslot->base_gfn + memslot->npages >= | ||
1239 | (KVM_PHYS_SIZE >> PAGE_SHIFT)) | ||
1240 | return -EFAULT; | ||
1241 | |||
1242 | /* | ||
1243 | * A memory region could potentially cover multiple VMAs, and any holes | ||
1244 | * between them, so iterate over all of them to find out if we can map | ||
1245 | * any of them right now. | ||
1246 | * | ||
1247 | * +--------------------------------------------+ | ||
1248 | * +---------------+----------------+ +----------------+ | ||
1249 | * | : VMA 1 | VMA 2 | | VMA 3 : | | ||
1250 | * +---------------+----------------+ +----------------+ | ||
1251 | * | memory region | | ||
1252 | * +--------------------------------------------+ | ||
1253 | */ | ||
1254 | do { | ||
1255 | struct vm_area_struct *vma = find_vma(current->mm, hva); | ||
1256 | hva_t vm_start, vm_end; | ||
1257 | |||
1258 | if (!vma || vma->vm_start >= reg_end) | ||
1259 | break; | ||
1260 | |||
1261 | /* | ||
1262 | * Mapping a read-only VMA is only allowed if the | ||
1263 | * memory region is configured as read-only. | ||
1264 | */ | ||
1265 | if (writable && !(vma->vm_flags & VM_WRITE)) { | ||
1266 | ret = -EPERM; | ||
1267 | break; | ||
1268 | } | ||
1269 | |||
1270 | /* | ||
1271 | * Take the intersection of this VMA with the memory region | ||
1272 | */ | ||
1273 | vm_start = max(hva, vma->vm_start); | ||
1274 | vm_end = min(reg_end, vma->vm_end); | ||
1275 | |||
1276 | if (vma->vm_flags & VM_PFNMAP) { | ||
1277 | gpa_t gpa = mem->guest_phys_addr + | ||
1278 | (vm_start - mem->userspace_addr); | ||
1279 | phys_addr_t pa = (vma->vm_pgoff << PAGE_SHIFT) + | ||
1280 | vm_start - vma->vm_start; | ||
1281 | |||
1282 | ret = kvm_phys_addr_ioremap(kvm, gpa, pa, | ||
1283 | vm_end - vm_start, | ||
1284 | writable); | ||
1285 | if (ret) | ||
1286 | break; | ||
1287 | } | ||
1288 | hva = vm_end; | ||
1289 | } while (hva < reg_end); | ||
1290 | |||
1291 | if (ret) { | ||
1292 | spin_lock(&kvm->mmu_lock); | ||
1293 | unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size); | ||
1294 | spin_unlock(&kvm->mmu_lock); | ||
1295 | } | ||
1296 | return ret; | ||
1144 | } | 1297 | } |
1145 | 1298 | ||
1146 | void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, | 1299 | void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, |
@@ -1165,4 +1318,10 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm) | |||
1165 | void kvm_arch_flush_shadow_memslot(struct kvm *kvm, | 1318 | void kvm_arch_flush_shadow_memslot(struct kvm *kvm, |
1166 | struct kvm_memory_slot *slot) | 1319 | struct kvm_memory_slot *slot) |
1167 | { | 1320 | { |
1321 | gpa_t gpa = slot->base_gfn << PAGE_SHIFT; | ||
1322 | phys_addr_t size = slot->npages << PAGE_SHIFT; | ||
1323 | |||
1324 | spin_lock(&kvm->mmu_lock); | ||
1325 | unmap_stage2_range(kvm, gpa, size); | ||
1326 | spin_unlock(&kvm->mmu_lock); | ||
1168 | } | 1327 | } |