diff options
author | Paul Mackerras <paulus@samba.org> | 2011-12-12 07:31:00 -0500 |
---|---|---|
committer | Avi Kivity <avi@redhat.com> | 2012-03-05 07:52:36 -0500 |
commit | c77162dee7aff6ab5f075da9b60f649cbbeb86cc (patch) | |
tree | c1f3f4f71a9fdad6612da20c67520b4fc8fa0b65 /arch/powerpc | |
parent | 075295dd322b0c0de0c9ecf8e0cb19ee813438ed (diff) |
KVM: PPC: Only get pages when actually needed, not in prepare_memory_region()
This removes the code from kvmppc_core_prepare_memory_region() that
looked up the VMA for the region being added and called hva_to_page
to get the pfns for the memory. We have no guarantee that there will
be anything mapped there at the time of the KVM_SET_USER_MEMORY_REGION
ioctl call; userspace can do that ioctl and then map memory into the
region later.
Instead we defer looking up the pfn for each memory page until it is
needed, which generally means when the guest does an H_ENTER hcall on
the page. Since we can't call get_user_pages in real mode, if we don't
already have the pfn for the page, kvmppc_h_enter() will return
H_TOO_HARD and we then call kvmppc_virtmode_h_enter() once we get back
to kernel context. That calls kvmppc_get_guest_page() to get the pfn
for the page, and then calls back to kvmppc_h_enter() to redo the HPTE
insertion.
When the first vcpu starts executing, we need to have the RMO or VRMA
region mapped so that the guest's real mode accesses will work. Thus
we now have a check in kvmppc_vcpu_run() to see if the RMO/VRMA is set
up and if not, call kvmppc_hv_setup_rma(). It checks if the memslot
starting at guest physical 0 now has RMO memory mapped there; if so it
sets it up for the guest, otherwise on POWER7 it sets up the VRMA.
The function that does that, kvmppc_map_vrma, is now a bit simpler,
as it calls kvmppc_virtmode_h_enter instead of creating the HPTE itself.
Since we are now potentially updating entries in the slot_phys[]
arrays from multiple vcpu threads, we now have a spinlock protecting
those updates to ensure that we don't lose track of any references
to pages.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
Diffstat (limited to 'arch/powerpc')
-rw-r--r-- | arch/powerpc/include/asm/kvm_book3s.h | 4 | ||||
-rw-r--r-- | arch/powerpc/include/asm/kvm_book3s_64.h | 12 | ||||
-rw-r--r-- | arch/powerpc/include/asm/kvm_host.h | 2 | ||||
-rw-r--r-- | arch/powerpc/include/asm/kvm_ppc.h | 4 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu_hv.c | 130 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv.c | 244 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_rm_mmu.c | 54 |
7 files changed, 290 insertions, 160 deletions
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index bcf6f4f52a22..c700f43ba178 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h | |||
@@ -141,6 +141,10 @@ extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); | |||
141 | extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr, | 141 | extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr, |
142 | unsigned long *nb_ret); | 142 | unsigned long *nb_ret); |
143 | extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr); | 143 | extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr); |
144 | extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, | ||
145 | long pte_index, unsigned long pteh, unsigned long ptel); | ||
146 | extern long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, | ||
147 | long pte_index, unsigned long pteh, unsigned long ptel); | ||
144 | 148 | ||
145 | extern void kvmppc_entry_trampoline(void); | 149 | extern void kvmppc_entry_trampoline(void); |
146 | extern void kvmppc_hv_entry_trampoline(void); | 150 | extern void kvmppc_hv_entry_trampoline(void); |
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 300ec04a8381..7e6f2ede44ac 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h | |||
@@ -101,4 +101,16 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, | |||
101 | return rb; | 101 | return rb; |
102 | } | 102 | } |
103 | 103 | ||
104 | static inline unsigned long hpte_page_size(unsigned long h, unsigned long l) | ||
105 | { | ||
106 | /* only handle 4k, 64k and 16M pages for now */ | ||
107 | if (!(h & HPTE_V_LARGE)) | ||
108 | return 1ul << 12; /* 4k page */ | ||
109 | if ((l & 0xf000) == 0x1000 && cpu_has_feature(CPU_FTR_ARCH_206)) | ||
110 | return 1ul << 16; /* 64k page */ | ||
111 | if ((l & 0xff000) == 0) | ||
112 | return 1ul << 24; /* 16M page */ | ||
113 | return 0; /* error */ | ||
114 | } | ||
115 | |||
104 | #endif /* __ASM_KVM_BOOK3S_64_H__ */ | 116 | #endif /* __ASM_KVM_BOOK3S_64_H__ */ |
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 7a17ab5b9058..beb22ba71e26 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h | |||
@@ -194,7 +194,9 @@ struct kvm_arch { | |||
194 | unsigned long lpcr; | 194 | unsigned long lpcr; |
195 | unsigned long rmor; | 195 | unsigned long rmor; |
196 | struct kvmppc_rma_info *rma; | 196 | struct kvmppc_rma_info *rma; |
197 | int rma_setup_done; | ||
197 | struct list_head spapr_tce_tables; | 198 | struct list_head spapr_tce_tables; |
199 | spinlock_t slot_phys_lock; | ||
198 | unsigned long *slot_phys[KVM_MEM_SLOTS_NUM]; | 200 | unsigned long *slot_phys[KVM_MEM_SLOTS_NUM]; |
199 | int slot_npages[KVM_MEM_SLOTS_NUM]; | 201 | int slot_npages[KVM_MEM_SLOTS_NUM]; |
200 | unsigned short last_vcpu[NR_CPUS]; | 202 | unsigned short last_vcpu[NR_CPUS]; |
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 5192c2e70583..1458c6740ea3 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h | |||
@@ -121,8 +121,8 @@ extern long kvmppc_alloc_hpt(struct kvm *kvm); | |||
121 | extern void kvmppc_free_hpt(struct kvm *kvm); | 121 | extern void kvmppc_free_hpt(struct kvm *kvm); |
122 | extern long kvmppc_prepare_vrma(struct kvm *kvm, | 122 | extern long kvmppc_prepare_vrma(struct kvm *kvm, |
123 | struct kvm_userspace_memory_region *mem); | 123 | struct kvm_userspace_memory_region *mem); |
124 | extern void kvmppc_map_vrma(struct kvm *kvm, | 124 | extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu, |
125 | struct kvm_userspace_memory_region *mem); | 125 | struct kvm_memory_slot *memslot); |
126 | extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); | 126 | extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); |
127 | extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, | 127 | extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, |
128 | struct kvm_create_spapr_tce *args); | 128 | struct kvm_create_spapr_tce *args); |
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index dcd39dc64f07..87016ccd8648 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c | |||
@@ -95,19 +95,17 @@ void kvmppc_free_hpt(struct kvm *kvm) | |||
95 | free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT); | 95 | free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT); |
96 | } | 96 | } |
97 | 97 | ||
98 | void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem) | 98 | void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot) |
99 | { | 99 | { |
100 | struct kvm *kvm = vcpu->kvm; | ||
100 | unsigned long i; | 101 | unsigned long i; |
101 | unsigned long npages; | 102 | unsigned long npages; |
102 | unsigned long pa; | 103 | unsigned long hp_v, hp_r; |
103 | unsigned long *hpte; | 104 | unsigned long addr, hash; |
104 | unsigned long hash; | ||
105 | unsigned long porder = kvm->arch.ram_porder; | 105 | unsigned long porder = kvm->arch.ram_porder; |
106 | struct revmap_entry *rev; | 106 | long ret; |
107 | unsigned long *physp; | ||
108 | 107 | ||
109 | physp = kvm->arch.slot_phys[mem->slot]; | 108 | npages = kvm->arch.slot_npages[memslot->id]; |
110 | npages = kvm->arch.slot_npages[mem->slot]; | ||
111 | 109 | ||
112 | /* VRMA can't be > 1TB */ | 110 | /* VRMA can't be > 1TB */ |
113 | if (npages > 1ul << (40 - porder)) | 111 | if (npages > 1ul << (40 - porder)) |
@@ -117,10 +115,7 @@ void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem) | |||
117 | npages = HPT_NPTEG; | 115 | npages = HPT_NPTEG; |
118 | 116 | ||
119 | for (i = 0; i < npages; ++i) { | 117 | for (i = 0; i < npages; ++i) { |
120 | pa = physp[i]; | 118 | addr = i << porder; |
121 | if (!pa) | ||
122 | break; | ||
123 | pa &= PAGE_MASK; | ||
124 | /* can't use hpt_hash since va > 64 bits */ | 119 | /* can't use hpt_hash since va > 64 bits */ |
125 | hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & HPT_HASH_MASK; | 120 | hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & HPT_HASH_MASK; |
126 | /* | 121 | /* |
@@ -130,18 +125,16 @@ void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem) | |||
130 | * is available and use it. | 125 | * is available and use it. |
131 | */ | 126 | */ |
132 | hash = (hash << 3) + 7; | 127 | hash = (hash << 3) + 7; |
133 | hpte = (unsigned long *) (kvm->arch.hpt_virt + (hash << 4)); | 128 | hp_v = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) | |
134 | /* HPTE low word - RPN, protection, etc. */ | ||
135 | hpte[1] = pa | HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX; | ||
136 | smp_wmb(); | ||
137 | hpte[0] = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) | | ||
138 | (i << (VRMA_PAGE_ORDER - 16)) | HPTE_V_BOLTED | | 129 | (i << (VRMA_PAGE_ORDER - 16)) | HPTE_V_BOLTED | |
139 | HPTE_V_LARGE | HPTE_V_VALID; | 130 | HPTE_V_LARGE | HPTE_V_VALID; |
140 | 131 | hp_r = addr | HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX; | |
141 | /* Reverse map info */ | 132 | ret = kvmppc_virtmode_h_enter(vcpu, H_EXACT, hash, hp_v, hp_r); |
142 | rev = &kvm->arch.revmap[hash]; | 133 | if (ret != H_SUCCESS) { |
143 | rev->guest_rpte = (i << porder) | HPTE_R_R | HPTE_R_C | | 134 | pr_err("KVM: map_vrma at %lx failed, ret=%ld\n", |
144 | HPTE_R_M | PP_RWXX; | 135 | addr, ret); |
136 | break; | ||
137 | } | ||
145 | } | 138 | } |
146 | } | 139 | } |
147 | 140 | ||
@@ -178,6 +171,92 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu) | |||
178 | kvmppc_set_msr(vcpu, MSR_SF | MSR_ME); | 171 | kvmppc_set_msr(vcpu, MSR_SF | MSR_ME); |
179 | } | 172 | } |
180 | 173 | ||
174 | /* | ||
175 | * This is called to get a reference to a guest page if there isn't | ||
176 | * one already in the kvm->arch.slot_phys[][] arrays. | ||
177 | */ | ||
178 | static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn, | ||
179 | struct kvm_memory_slot *memslot) | ||
180 | { | ||
181 | unsigned long start; | ||
182 | long np; | ||
183 | struct page *page, *pages[1]; | ||
184 | unsigned long *physp; | ||
185 | unsigned long pfn, i; | ||
186 | |||
187 | physp = kvm->arch.slot_phys[memslot->id]; | ||
188 | if (!physp) | ||
189 | return -EINVAL; | ||
190 | i = (gfn - memslot->base_gfn) >> (kvm->arch.ram_porder - PAGE_SHIFT); | ||
191 | if (physp[i]) | ||
192 | return 0; | ||
193 | |||
194 | page = NULL; | ||
195 | start = gfn_to_hva_memslot(memslot, gfn); | ||
196 | |||
197 | /* Instantiate and get the page we want access to */ | ||
198 | np = get_user_pages_fast(start, 1, 1, pages); | ||
199 | if (np != 1) | ||
200 | return -EINVAL; | ||
201 | page = pages[0]; | ||
202 | |||
203 | /* Check it's a 16MB page */ | ||
204 | if (!PageHead(page) || | ||
205 | compound_order(page) != (kvm->arch.ram_porder - PAGE_SHIFT)) { | ||
206 | pr_err("page at %lx isn't 16MB (o=%d)\n", | ||
207 | start, compound_order(page)); | ||
208 | put_page(page); | ||
209 | return -EINVAL; | ||
210 | } | ||
211 | pfn = page_to_pfn(page); | ||
212 | |||
213 | spin_lock(&kvm->arch.slot_phys_lock); | ||
214 | if (!physp[i]) | ||
215 | physp[i] = (pfn << PAGE_SHIFT) | KVMPPC_GOT_PAGE; | ||
216 | else | ||
217 | put_page(page); | ||
218 | spin_unlock(&kvm->arch.slot_phys_lock); | ||
219 | |||
220 | return 0; | ||
221 | } | ||
222 | |||
223 | /* | ||
224 | * We come here on a H_ENTER call from the guest when | ||
225 | * we don't have the requested page pinned already. | ||
226 | */ | ||
227 | long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, | ||
228 | long pte_index, unsigned long pteh, unsigned long ptel) | ||
229 | { | ||
230 | struct kvm *kvm = vcpu->kvm; | ||
231 | unsigned long psize, gpa, gfn; | ||
232 | struct kvm_memory_slot *memslot; | ||
233 | long ret; | ||
234 | |||
235 | psize = hpte_page_size(pteh, ptel); | ||
236 | if (!psize) | ||
237 | return H_PARAMETER; | ||
238 | |||
239 | /* Find the memslot (if any) for this address */ | ||
240 | gpa = (ptel & HPTE_R_RPN) & ~(psize - 1); | ||
241 | gfn = gpa >> PAGE_SHIFT; | ||
242 | memslot = gfn_to_memslot(kvm, gfn); | ||
243 | if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) | ||
244 | return H_PARAMETER; | ||
245 | if (kvmppc_get_guest_page(kvm, gfn, memslot) < 0) | ||
246 | return H_PARAMETER; | ||
247 | |||
248 | preempt_disable(); | ||
249 | ret = kvmppc_h_enter(vcpu, flags, pte_index, pteh, ptel); | ||
250 | preempt_enable(); | ||
251 | if (ret == H_TOO_HARD) { | ||
252 | /* this can't happen */ | ||
253 | pr_err("KVM: Oops, kvmppc_h_enter returned too hard!\n"); | ||
254 | ret = H_RESOURCE; /* or something */ | ||
255 | } | ||
256 | return ret; | ||
257 | |||
258 | } | ||
259 | |||
181 | static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, | 260 | static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, |
182 | struct kvmppc_pte *gpte, bool data) | 261 | struct kvmppc_pte *gpte, bool data) |
183 | { | 262 | { |
@@ -203,8 +282,11 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, | |||
203 | physp += (gfn - memslot->base_gfn) >> | 282 | physp += (gfn - memslot->base_gfn) >> |
204 | (kvm->arch.ram_porder - PAGE_SHIFT); | 283 | (kvm->arch.ram_porder - PAGE_SHIFT); |
205 | pa = *physp; | 284 | pa = *physp; |
206 | if (!pa) | 285 | if (!pa) { |
207 | return NULL; | 286 | if (kvmppc_get_guest_page(kvm, gfn, memslot) < 0) |
287 | return NULL; | ||
288 | pa = *physp; | ||
289 | } | ||
208 | pfn = pa >> PAGE_SHIFT; | 290 | pfn = pa >> PAGE_SHIFT; |
209 | page = pfn_to_page(pfn); | 291 | page = pfn_to_page(pfn); |
210 | get_page(page); | 292 | get_page(page); |
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 82d71388eace..ce5a13fb974b 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c | |||
@@ -49,6 +49,7 @@ | |||
49 | #include <linux/sched.h> | 49 | #include <linux/sched.h> |
50 | #include <linux/vmalloc.h> | 50 | #include <linux/vmalloc.h> |
51 | #include <linux/highmem.h> | 51 | #include <linux/highmem.h> |
52 | #include <linux/hugetlb.h> | ||
52 | 53 | ||
53 | #define LARGE_PAGE_ORDER 24 /* 16MB pages */ | 54 | #define LARGE_PAGE_ORDER 24 /* 16MB pages */ |
54 | 55 | ||
@@ -57,6 +58,7 @@ | |||
57 | /* #define EXIT_DEBUG_INT */ | 58 | /* #define EXIT_DEBUG_INT */ |
58 | 59 | ||
59 | static void kvmppc_end_cede(struct kvm_vcpu *vcpu); | 60 | static void kvmppc_end_cede(struct kvm_vcpu *vcpu); |
61 | static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu); | ||
60 | 62 | ||
61 | void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | 63 | void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
62 | { | 64 | { |
@@ -231,6 +233,12 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) | |||
231 | struct kvm_vcpu *tvcpu; | 233 | struct kvm_vcpu *tvcpu; |
232 | 234 | ||
233 | switch (req) { | 235 | switch (req) { |
236 | case H_ENTER: | ||
237 | ret = kvmppc_virtmode_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4), | ||
238 | kvmppc_get_gpr(vcpu, 5), | ||
239 | kvmppc_get_gpr(vcpu, 6), | ||
240 | kvmppc_get_gpr(vcpu, 7)); | ||
241 | break; | ||
234 | case H_CEDE: | 242 | case H_CEDE: |
235 | break; | 243 | break; |
236 | case H_PROD: | 244 | case H_PROD: |
@@ -851,9 +859,12 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
851 | return -EINTR; | 859 | return -EINTR; |
852 | } | 860 | } |
853 | 861 | ||
854 | /* On PPC970, check that we have an RMA region */ | 862 | /* On the first time here, set up VRMA or RMA */ |
855 | if (!vcpu->kvm->arch.rma && cpu_has_feature(CPU_FTR_ARCH_201)) | 863 | if (!vcpu->kvm->arch.rma_setup_done) { |
856 | return -EPERM; | 864 | r = kvmppc_hv_setup_rma(vcpu); |
865 | if (r) | ||
866 | return r; | ||
867 | } | ||
857 | 868 | ||
858 | flush_fp_to_thread(current); | 869 | flush_fp_to_thread(current); |
859 | flush_altivec_to_thread(current); | 870 | flush_altivec_to_thread(current); |
@@ -1063,34 +1074,15 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) | |||
1063 | return fd; | 1074 | return fd; |
1064 | } | 1075 | } |
1065 | 1076 | ||
1066 | static struct page *hva_to_page(unsigned long addr) | ||
1067 | { | ||
1068 | struct page *page[1]; | ||
1069 | int npages; | ||
1070 | |||
1071 | might_sleep(); | ||
1072 | |||
1073 | npages = get_user_pages_fast(addr, 1, 1, page); | ||
1074 | |||
1075 | if (unlikely(npages != 1)) | ||
1076 | return 0; | ||
1077 | |||
1078 | return page[0]; | ||
1079 | } | ||
1080 | |||
1081 | int kvmppc_core_prepare_memory_region(struct kvm *kvm, | 1077 | int kvmppc_core_prepare_memory_region(struct kvm *kvm, |
1082 | struct kvm_userspace_memory_region *mem) | 1078 | struct kvm_userspace_memory_region *mem) |
1083 | { | 1079 | { |
1084 | unsigned long psize, porder; | 1080 | unsigned long psize; |
1085 | unsigned long i, npages; | 1081 | unsigned long npages; |
1086 | unsigned long hva; | ||
1087 | struct kvmppc_rma_info *ri = NULL; | ||
1088 | struct page *page; | ||
1089 | unsigned long *phys; | 1082 | unsigned long *phys; |
1090 | 1083 | ||
1091 | /* For now, only allow 16MB pages */ | 1084 | /* For now, only allow 16MB-aligned slots */ |
1092 | porder = LARGE_PAGE_ORDER; | 1085 | psize = kvm->arch.ram_psize; |
1093 | psize = 1ul << porder; | ||
1094 | if ((mem->memory_size & (psize - 1)) || | 1086 | if ((mem->memory_size & (psize - 1)) || |
1095 | (mem->guest_phys_addr & (psize - 1))) { | 1087 | (mem->guest_phys_addr & (psize - 1))) { |
1096 | pr_err("bad memory_size=%llx @ %llx\n", | 1088 | pr_err("bad memory_size=%llx @ %llx\n", |
@@ -1099,7 +1091,7 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, | |||
1099 | } | 1091 | } |
1100 | 1092 | ||
1101 | /* Allocate a slot_phys array */ | 1093 | /* Allocate a slot_phys array */ |
1102 | npages = mem->memory_size >> porder; | 1094 | npages = mem->memory_size >> kvm->arch.ram_porder; |
1103 | phys = kvm->arch.slot_phys[mem->slot]; | 1095 | phys = kvm->arch.slot_phys[mem->slot]; |
1104 | if (!phys) { | 1096 | if (!phys) { |
1105 | phys = vzalloc(npages * sizeof(unsigned long)); | 1097 | phys = vzalloc(npages * sizeof(unsigned long)); |
@@ -1109,39 +1101,110 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, | |||
1109 | kvm->arch.slot_npages[mem->slot] = npages; | 1101 | kvm->arch.slot_npages[mem->slot] = npages; |
1110 | } | 1102 | } |
1111 | 1103 | ||
1112 | /* Do we already have an RMA registered? */ | 1104 | return 0; |
1113 | if (mem->guest_phys_addr == 0 && kvm->arch.rma) | 1105 | } |
1114 | return -EINVAL; | ||
1115 | 1106 | ||
1116 | /* Is this one of our preallocated RMAs? */ | 1107 | static void unpin_slot(struct kvm *kvm, int slot_id) |
1117 | if (mem->guest_phys_addr == 0) { | 1108 | { |
1118 | struct vm_area_struct *vma; | 1109 | unsigned long *physp; |
1119 | 1110 | unsigned long j, npages, pfn; | |
1120 | down_read(¤t->mm->mmap_sem); | 1111 | struct page *page; |
1121 | vma = find_vma(current->mm, mem->userspace_addr); | 1112 | |
1122 | if (vma && vma->vm_file && | 1113 | physp = kvm->arch.slot_phys[slot_id]; |
1123 | vma->vm_file->f_op == &kvm_rma_fops && | 1114 | npages = kvm->arch.slot_npages[slot_id]; |
1124 | mem->userspace_addr == vma->vm_start) | 1115 | if (physp) { |
1125 | ri = vma->vm_file->private_data; | 1116 | spin_lock(&kvm->arch.slot_phys_lock); |
1126 | up_read(¤t->mm->mmap_sem); | 1117 | for (j = 0; j < npages; j++) { |
1127 | if (!ri && cpu_has_feature(CPU_FTR_ARCH_201)) { | 1118 | if (!(physp[j] & KVMPPC_GOT_PAGE)) |
1128 | pr_err("CPU requires an RMO\n"); | 1119 | continue; |
1129 | return -EINVAL; | 1120 | pfn = physp[j] >> PAGE_SHIFT; |
1121 | page = pfn_to_page(pfn); | ||
1122 | SetPageDirty(page); | ||
1123 | put_page(page); | ||
1130 | } | 1124 | } |
1125 | kvm->arch.slot_phys[slot_id] = NULL; | ||
1126 | spin_unlock(&kvm->arch.slot_phys_lock); | ||
1127 | vfree(physp); | ||
1131 | } | 1128 | } |
1129 | } | ||
1130 | |||
1131 | void kvmppc_core_commit_memory_region(struct kvm *kvm, | ||
1132 | struct kvm_userspace_memory_region *mem) | ||
1133 | { | ||
1134 | } | ||
1135 | |||
1136 | static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu) | ||
1137 | { | ||
1138 | int err = 0; | ||
1139 | struct kvm *kvm = vcpu->kvm; | ||
1140 | struct kvmppc_rma_info *ri = NULL; | ||
1141 | unsigned long hva; | ||
1142 | struct kvm_memory_slot *memslot; | ||
1143 | struct vm_area_struct *vma; | ||
1144 | unsigned long lpcr; | ||
1145 | unsigned long psize, porder; | ||
1146 | unsigned long rma_size; | ||
1147 | unsigned long rmls; | ||
1148 | unsigned long *physp; | ||
1149 | unsigned long i, npages, pa; | ||
1150 | |||
1151 | mutex_lock(&kvm->lock); | ||
1152 | if (kvm->arch.rma_setup_done) | ||
1153 | goto out; /* another vcpu beat us to it */ | ||
1132 | 1154 | ||
1133 | if (ri) { | 1155 | /* Look up the memslot for guest physical address 0 */ |
1134 | unsigned long rma_size; | 1156 | memslot = gfn_to_memslot(kvm, 0); |
1135 | unsigned long lpcr; | ||
1136 | long rmls; | ||
1137 | 1157 | ||
1138 | rma_size = ri->npages << PAGE_SHIFT; | 1158 | /* We must have some memory at 0 by now */ |
1139 | if (rma_size > mem->memory_size) | 1159 | err = -EINVAL; |
1140 | rma_size = mem->memory_size; | 1160 | if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) |
1161 | goto out; | ||
1162 | |||
1163 | /* Look up the VMA for the start of this memory slot */ | ||
1164 | hva = memslot->userspace_addr; | ||
1165 | down_read(¤t->mm->mmap_sem); | ||
1166 | vma = find_vma(current->mm, hva); | ||
1167 | if (!vma || vma->vm_start > hva || (vma->vm_flags & VM_IO)) | ||
1168 | goto up_out; | ||
1169 | |||
1170 | psize = vma_kernel_pagesize(vma); | ||
1171 | if (psize != kvm->arch.ram_psize) | ||
1172 | goto up_out; | ||
1173 | |||
1174 | /* Is this one of our preallocated RMAs? */ | ||
1175 | if (vma->vm_file && vma->vm_file->f_op == &kvm_rma_fops && | ||
1176 | hva == vma->vm_start) | ||
1177 | ri = vma->vm_file->private_data; | ||
1178 | |||
1179 | up_read(¤t->mm->mmap_sem); | ||
1180 | |||
1181 | if (!ri) { | ||
1182 | /* On POWER7, use VRMA; on PPC970, give up */ | ||
1183 | err = -EPERM; | ||
1184 | if (cpu_has_feature(CPU_FTR_ARCH_201)) { | ||
1185 | pr_err("KVM: CPU requires an RMO\n"); | ||
1186 | goto out; | ||
1187 | } | ||
1188 | |||
1189 | /* Update VRMASD field in the LPCR */ | ||
1190 | lpcr = kvm->arch.lpcr & ~(0x1fUL << LPCR_VRMASD_SH); | ||
1191 | lpcr |= LPCR_VRMA_L; | ||
1192 | kvm->arch.lpcr = lpcr; | ||
1193 | |||
1194 | /* Create HPTEs in the hash page table for the VRMA */ | ||
1195 | kvmppc_map_vrma(vcpu, memslot); | ||
1196 | |||
1197 | } else { | ||
1198 | /* Set up to use an RMO region */ | ||
1199 | rma_size = ri->npages; | ||
1200 | if (rma_size > memslot->npages) | ||
1201 | rma_size = memslot->npages; | ||
1202 | rma_size <<= PAGE_SHIFT; | ||
1141 | rmls = lpcr_rmls(rma_size); | 1203 | rmls = lpcr_rmls(rma_size); |
1204 | err = -EINVAL; | ||
1142 | if (rmls < 0) { | 1205 | if (rmls < 0) { |
1143 | pr_err("Can't use RMA of 0x%lx bytes\n", rma_size); | 1206 | pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size); |
1144 | return -EINVAL; | 1207 | goto out; |
1145 | } | 1208 | } |
1146 | atomic_inc(&ri->use_count); | 1209 | atomic_inc(&ri->use_count); |
1147 | kvm->arch.rma = ri; | 1210 | kvm->arch.rma = ri; |
@@ -1164,65 +1227,31 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, | |||
1164 | kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT; | 1227 | kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT; |
1165 | } | 1228 | } |
1166 | kvm->arch.lpcr = lpcr; | 1229 | kvm->arch.lpcr = lpcr; |
1167 | pr_info("Using RMO at %lx size %lx (LPCR = %lx)\n", | 1230 | pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n", |
1168 | ri->base_pfn << PAGE_SHIFT, rma_size, lpcr); | 1231 | ri->base_pfn << PAGE_SHIFT, rma_size, lpcr); |
1169 | } | ||
1170 | 1232 | ||
1171 | for (i = 0; i < npages; ++i) { | 1233 | /* Initialize phys addrs of pages in RMO */ |
1172 | if (ri && i < ri->npages) { | 1234 | porder = kvm->arch.ram_porder; |
1173 | phys[i] = (ri->base_pfn << PAGE_SHIFT) + (i << porder); | 1235 | npages = rma_size >> porder; |
1174 | continue; | 1236 | pa = ri->base_pfn << PAGE_SHIFT; |
1175 | } | 1237 | physp = kvm->arch.slot_phys[memslot->id]; |
1176 | hva = mem->userspace_addr + (i << porder); | 1238 | spin_lock(&kvm->arch.slot_phys_lock); |
1177 | page = hva_to_page(hva); | 1239 | for (i = 0; i < npages; ++i) |
1178 | if (!page) { | 1240 | physp[i] = pa + (i << porder); |
1179 | pr_err("oops, no pfn for hva %lx\n", hva); | 1241 | spin_unlock(&kvm->arch.slot_phys_lock); |
1180 | goto err; | ||
1181 | } | ||
1182 | /* Check it's a 16MB page */ | ||
1183 | if (!PageHead(page) || | ||
1184 | compound_order(page) != (LARGE_PAGE_ORDER - PAGE_SHIFT)) { | ||
1185 | pr_err("page at %lx isn't 16MB (o=%d)\n", | ||
1186 | hva, compound_order(page)); | ||
1187 | goto err; | ||
1188 | } | ||
1189 | phys[i] = (page_to_pfn(page) << PAGE_SHIFT) | KVMPPC_GOT_PAGE; | ||
1190 | } | 1242 | } |
1191 | 1243 | ||
1192 | return 0; | 1244 | /* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */ |
1193 | 1245 | smp_wmb(); | |
1194 | err: | 1246 | kvm->arch.rma_setup_done = 1; |
1195 | return -EINVAL; | 1247 | err = 0; |
1196 | } | 1248 | out: |
1197 | 1249 | mutex_unlock(&kvm->lock); | |
1198 | static void unpin_slot(struct kvm *kvm, int slot_id) | 1250 | return err; |
1199 | { | ||
1200 | unsigned long *physp; | ||
1201 | unsigned long j, npages, pfn; | ||
1202 | struct page *page; | ||
1203 | |||
1204 | physp = kvm->arch.slot_phys[slot_id]; | ||
1205 | npages = kvm->arch.slot_npages[slot_id]; | ||
1206 | if (physp) { | ||
1207 | for (j = 0; j < npages; j++) { | ||
1208 | if (!(physp[j] & KVMPPC_GOT_PAGE)) | ||
1209 | continue; | ||
1210 | pfn = physp[j] >> PAGE_SHIFT; | ||
1211 | page = pfn_to_page(pfn); | ||
1212 | SetPageDirty(page); | ||
1213 | put_page(page); | ||
1214 | } | ||
1215 | vfree(physp); | ||
1216 | kvm->arch.slot_phys[slot_id] = NULL; | ||
1217 | } | ||
1218 | } | ||
1219 | 1251 | ||
1220 | void kvmppc_core_commit_memory_region(struct kvm *kvm, | 1252 | up_out: |
1221 | struct kvm_userspace_memory_region *mem) | 1253 | up_read(¤t->mm->mmap_sem); |
1222 | { | 1254 | goto out; |
1223 | if (mem->guest_phys_addr == 0 && mem->memory_size != 0 && | ||
1224 | !kvm->arch.rma) | ||
1225 | kvmppc_map_vrma(kvm, mem); | ||
1226 | } | 1255 | } |
1227 | 1256 | ||
1228 | int kvmppc_core_init_vm(struct kvm *kvm) | 1257 | int kvmppc_core_init_vm(struct kvm *kvm) |
@@ -1261,6 +1290,7 @@ int kvmppc_core_init_vm(struct kvm *kvm) | |||
1261 | } | 1290 | } |
1262 | kvm->arch.lpcr = lpcr; | 1291 | kvm->arch.lpcr = lpcr; |
1263 | 1292 | ||
1293 | spin_lock_init(&kvm->arch.slot_phys_lock); | ||
1264 | return 0; | 1294 | return 0; |
1265 | } | 1295 | } |
1266 | 1296 | ||
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index a28a6030ec90..047c5e1fd70f 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/kvm.h> | 11 | #include <linux/kvm.h> |
12 | #include <linux/kvm_host.h> | 12 | #include <linux/kvm_host.h> |
13 | #include <linux/hugetlb.h> | 13 | #include <linux/hugetlb.h> |
14 | #include <linux/module.h> | ||
14 | 15 | ||
15 | #include <asm/tlbflush.h> | 16 | #include <asm/tlbflush.h> |
16 | #include <asm/kvm_ppc.h> | 17 | #include <asm/kvm_ppc.h> |
@@ -56,56 +57,54 @@ static void *real_vmalloc_addr(void *x) | |||
56 | long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, | 57 | long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, |
57 | long pte_index, unsigned long pteh, unsigned long ptel) | 58 | long pte_index, unsigned long pteh, unsigned long ptel) |
58 | { | 59 | { |
59 | unsigned long porder; | ||
60 | struct kvm *kvm = vcpu->kvm; | 60 | struct kvm *kvm = vcpu->kvm; |
61 | unsigned long i, gfn, lpn, pa; | 61 | unsigned long i, pa, gpa, gfn, psize; |
62 | unsigned long slot_fn; | ||
62 | unsigned long *hpte; | 63 | unsigned long *hpte; |
63 | struct revmap_entry *rev; | 64 | struct revmap_entry *rev; |
64 | unsigned long g_ptel = ptel; | 65 | unsigned long g_ptel = ptel; |
65 | struct kvm_memory_slot *memslot; | 66 | struct kvm_memory_slot *memslot; |
66 | unsigned long *physp; | 67 | unsigned long *physp, pte_size; |
68 | bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING; | ||
67 | 69 | ||
68 | /* only handle 4k, 64k and 16M pages for now */ | 70 | psize = hpte_page_size(pteh, ptel); |
69 | porder = 12; | 71 | if (!psize) |
70 | if (pteh & HPTE_V_LARGE) { | ||
71 | if (cpu_has_feature(CPU_FTR_ARCH_206) && | ||
72 | (ptel & 0xf000) == 0x1000) { | ||
73 | /* 64k page */ | ||
74 | porder = 16; | ||
75 | } else if ((ptel & 0xff000) == 0) { | ||
76 | /* 16M page */ | ||
77 | porder = 24; | ||
78 | /* lowest AVA bit must be 0 for 16M pages */ | ||
79 | if (pteh & 0x80) | ||
80 | return H_PARAMETER; | ||
81 | } else | ||
82 | return H_PARAMETER; | ||
83 | } | ||
84 | if (porder > kvm->arch.ram_porder) | ||
85 | return H_PARAMETER; | 72 | return H_PARAMETER; |
86 | 73 | ||
87 | gfn = ((ptel & HPTE_R_RPN) & ~((1ul << porder) - 1)) >> PAGE_SHIFT; | 74 | /* Find the memslot (if any) for this address */ |
75 | gpa = (ptel & HPTE_R_RPN) & ~(psize - 1); | ||
76 | gfn = gpa >> PAGE_SHIFT; | ||
88 | memslot = builtin_gfn_to_memslot(kvm, gfn); | 77 | memslot = builtin_gfn_to_memslot(kvm, gfn); |
89 | if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) | 78 | if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) |
90 | return H_PARAMETER; | 79 | return H_PARAMETER; |
80 | slot_fn = gfn - memslot->base_gfn; | ||
81 | |||
91 | physp = kvm->arch.slot_phys[memslot->id]; | 82 | physp = kvm->arch.slot_phys[memslot->id]; |
92 | if (!physp) | 83 | if (!physp) |
93 | return H_PARAMETER; | 84 | return H_PARAMETER; |
94 | 85 | physp += slot_fn; | |
95 | lpn = (gfn - memslot->base_gfn) >> (kvm->arch.ram_porder - PAGE_SHIFT); | 86 | if (realmode) |
96 | physp = real_vmalloc_addr(physp + lpn); | 87 | physp = real_vmalloc_addr(physp); |
97 | pa = *physp; | 88 | pa = *physp; |
98 | if (!pa) | 89 | if (!pa) |
99 | return H_PARAMETER; | 90 | return H_TOO_HARD; |
100 | pa &= PAGE_MASK; | 91 | pa &= PAGE_MASK; |
101 | 92 | ||
93 | pte_size = kvm->arch.ram_psize; | ||
94 | if (pte_size < psize) | ||
95 | return H_PARAMETER; | ||
96 | if (pa && pte_size > psize) | ||
97 | pa |= gpa & (pte_size - 1); | ||
98 | |||
99 | ptel &= ~(HPTE_R_PP0 - psize); | ||
100 | ptel |= pa; | ||
101 | |||
102 | /* Check WIMG */ | 102 | /* Check WIMG */ |
103 | if ((ptel & HPTE_R_WIMG) != HPTE_R_M && | 103 | if ((ptel & HPTE_R_WIMG) != HPTE_R_M && |
104 | (ptel & HPTE_R_WIMG) != (HPTE_R_W | HPTE_R_I | HPTE_R_M)) | 104 | (ptel & HPTE_R_WIMG) != (HPTE_R_W | HPTE_R_I | HPTE_R_M)) |
105 | return H_PARAMETER; | 105 | return H_PARAMETER; |
106 | pteh &= ~0x60UL; | 106 | pteh &= ~0x60UL; |
107 | ptel &= ~(HPTE_R_PP0 - kvm->arch.ram_psize); | 107 | pteh |= HPTE_V_VALID; |
108 | ptel |= pa; | ||
109 | 108 | ||
110 | if (pte_index >= HPT_NPTE) | 109 | if (pte_index >= HPT_NPTE) |
111 | return H_PARAMETER; | 110 | return H_PARAMETER; |
@@ -162,6 +161,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, | |||
162 | vcpu->arch.gpr[4] = pte_index; | 161 | vcpu->arch.gpr[4] = pte_index; |
163 | return H_SUCCESS; | 162 | return H_SUCCESS; |
164 | } | 163 | } |
164 | EXPORT_SYMBOL_GPL(kvmppc_h_enter); | ||
165 | 165 | ||
166 | #define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token)) | 166 | #define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token)) |
167 | 167 | ||