aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2011-12-12 07:31:00 -0500
committerAvi Kivity <avi@redhat.com>2012-03-05 07:52:36 -0500
commitc77162dee7aff6ab5f075da9b60f649cbbeb86cc (patch)
treec1f3f4f71a9fdad6612da20c67520b4fc8fa0b65
parent075295dd322b0c0de0c9ecf8e0cb19ee813438ed (diff)
KVM: PPC: Only get pages when actually needed, not in prepare_memory_region()
This removes the code from kvmppc_core_prepare_memory_region() that looked up the VMA for the region being added and called hva_to_page to get the pfns for the memory. We have no guarantee that there will be anything mapped there at the time of the KVM_SET_USER_MEMORY_REGION ioctl call; userspace can do that ioctl and then map memory into the region later. Instead we defer looking up the pfn for each memory page until it is needed, which generally means when the guest does an H_ENTER hcall on the page. Since we can't call get_user_pages in real mode, if we don't already have the pfn for the page, kvmppc_h_enter() will return H_TOO_HARD and we then call kvmppc_virtmode_h_enter() once we get back to kernel context. That calls kvmppc_get_guest_page() to get the pfn for the page, and then calls back to kvmppc_h_enter() to redo the HPTE insertion. When the first vcpu starts executing, we need to have the RMO or VRMA region mapped so that the guest's real mode accesses will work. Thus we now have a check in kvmppc_vcpu_run() to see if the RMO/VRMA is set up and if not, call kvmppc_hv_setup_rma(). It checks if the memslot starting at guest physical 0 now has RMO memory mapped there; if so it sets it up for the guest, otherwise on POWER7 it sets up the VRMA. The function that does that, kvmppc_map_vrma, is now a bit simpler, as it calls kvmppc_virtmode_h_enter instead of creating the HPTE itself. Since we are now potentially updating entries in the slot_phys[] arrays from multiple vcpu threads, we now have a spinlock protecting those updates to ensure that we don't lose track of any references to pages. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de> Signed-off-by: Avi Kivity <avi@redhat.com>
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h4
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h12
-rw-r--r--arch/powerpc/include/asm/kvm_host.h2
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h4
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c130
-rw-r--r--arch/powerpc/kvm/book3s_hv.c244
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c54
7 files changed, 290 insertions, 160 deletions
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index bcf6f4f52a2..c700f43ba17 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -141,6 +141,10 @@ extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
141extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr, 141extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr,
142 unsigned long *nb_ret); 142 unsigned long *nb_ret);
143extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr); 143extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr);
144extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
145 long pte_index, unsigned long pteh, unsigned long ptel);
146extern long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
147 long pte_index, unsigned long pteh, unsigned long ptel);
144 148
145extern void kvmppc_entry_trampoline(void); 149extern void kvmppc_entry_trampoline(void);
146extern void kvmppc_hv_entry_trampoline(void); 150extern void kvmppc_hv_entry_trampoline(void);
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 300ec04a838..7e6f2ede44a 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -101,4 +101,16 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
101 return rb; 101 return rb;
102} 102}
103 103
104static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
105{
106 /* only handle 4k, 64k and 16M pages for now */
107 if (!(h & HPTE_V_LARGE))
108 return 1ul << 12; /* 4k page */
109 if ((l & 0xf000) == 0x1000 && cpu_has_feature(CPU_FTR_ARCH_206))
110 return 1ul << 16; /* 64k page */
111 if ((l & 0xff000) == 0)
112 return 1ul << 24; /* 16M page */
113 return 0; /* error */
114}
115
104#endif /* __ASM_KVM_BOOK3S_64_H__ */ 116#endif /* __ASM_KVM_BOOK3S_64_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 7a17ab5b905..beb22ba71e2 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -194,7 +194,9 @@ struct kvm_arch {
194 unsigned long lpcr; 194 unsigned long lpcr;
195 unsigned long rmor; 195 unsigned long rmor;
196 struct kvmppc_rma_info *rma; 196 struct kvmppc_rma_info *rma;
197 int rma_setup_done;
197 struct list_head spapr_tce_tables; 198 struct list_head spapr_tce_tables;
199 spinlock_t slot_phys_lock;
198 unsigned long *slot_phys[KVM_MEM_SLOTS_NUM]; 200 unsigned long *slot_phys[KVM_MEM_SLOTS_NUM];
199 int slot_npages[KVM_MEM_SLOTS_NUM]; 201 int slot_npages[KVM_MEM_SLOTS_NUM];
200 unsigned short last_vcpu[NR_CPUS]; 202 unsigned short last_vcpu[NR_CPUS];
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 5192c2e7058..1458c6740ea 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -121,8 +121,8 @@ extern long kvmppc_alloc_hpt(struct kvm *kvm);
121extern void kvmppc_free_hpt(struct kvm *kvm); 121extern void kvmppc_free_hpt(struct kvm *kvm);
122extern long kvmppc_prepare_vrma(struct kvm *kvm, 122extern long kvmppc_prepare_vrma(struct kvm *kvm,
123 struct kvm_userspace_memory_region *mem); 123 struct kvm_userspace_memory_region *mem);
124extern void kvmppc_map_vrma(struct kvm *kvm, 124extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
125 struct kvm_userspace_memory_region *mem); 125 struct kvm_memory_slot *memslot);
126extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); 126extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
127extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, 127extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
128 struct kvm_create_spapr_tce *args); 128 struct kvm_create_spapr_tce *args);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index dcd39dc64f0..87016ccd864 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -95,19 +95,17 @@ void kvmppc_free_hpt(struct kvm *kvm)
95 free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT); 95 free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT);
96} 96}
97 97
98void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem) 98void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot)
99{ 99{
100 struct kvm *kvm = vcpu->kvm;
100 unsigned long i; 101 unsigned long i;
101 unsigned long npages; 102 unsigned long npages;
102 unsigned long pa; 103 unsigned long hp_v, hp_r;
103 unsigned long *hpte; 104 unsigned long addr, hash;
104 unsigned long hash;
105 unsigned long porder = kvm->arch.ram_porder; 105 unsigned long porder = kvm->arch.ram_porder;
106 struct revmap_entry *rev; 106 long ret;
107 unsigned long *physp;
108 107
109 physp = kvm->arch.slot_phys[mem->slot]; 108 npages = kvm->arch.slot_npages[memslot->id];
110 npages = kvm->arch.slot_npages[mem->slot];
111 109
112 /* VRMA can't be > 1TB */ 110 /* VRMA can't be > 1TB */
113 if (npages > 1ul << (40 - porder)) 111 if (npages > 1ul << (40 - porder))
@@ -117,10 +115,7 @@ void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem)
117 npages = HPT_NPTEG; 115 npages = HPT_NPTEG;
118 116
119 for (i = 0; i < npages; ++i) { 117 for (i = 0; i < npages; ++i) {
120 pa = physp[i]; 118 addr = i << porder;
121 if (!pa)
122 break;
123 pa &= PAGE_MASK;
124 /* can't use hpt_hash since va > 64 bits */ 119 /* can't use hpt_hash since va > 64 bits */
125 hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & HPT_HASH_MASK; 120 hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & HPT_HASH_MASK;
126 /* 121 /*
@@ -130,18 +125,16 @@ void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem)
130 * is available and use it. 125 * is available and use it.
131 */ 126 */
132 hash = (hash << 3) + 7; 127 hash = (hash << 3) + 7;
133 hpte = (unsigned long *) (kvm->arch.hpt_virt + (hash << 4)); 128 hp_v = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
134 /* HPTE low word - RPN, protection, etc. */
135 hpte[1] = pa | HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX;
136 smp_wmb();
137 hpte[0] = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
138 (i << (VRMA_PAGE_ORDER - 16)) | HPTE_V_BOLTED | 129 (i << (VRMA_PAGE_ORDER - 16)) | HPTE_V_BOLTED |
139 HPTE_V_LARGE | HPTE_V_VALID; 130 HPTE_V_LARGE | HPTE_V_VALID;
140 131 hp_r = addr | HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX;
141 /* Reverse map info */ 132 ret = kvmppc_virtmode_h_enter(vcpu, H_EXACT, hash, hp_v, hp_r);
142 rev = &kvm->arch.revmap[hash]; 133 if (ret != H_SUCCESS) {
143 rev->guest_rpte = (i << porder) | HPTE_R_R | HPTE_R_C | 134 pr_err("KVM: map_vrma at %lx failed, ret=%ld\n",
144 HPTE_R_M | PP_RWXX; 135 addr, ret);
136 break;
137 }
145 } 138 }
146} 139}
147 140
@@ -178,6 +171,92 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
178 kvmppc_set_msr(vcpu, MSR_SF | MSR_ME); 171 kvmppc_set_msr(vcpu, MSR_SF | MSR_ME);
179} 172}
180 173
174/*
175 * This is called to get a reference to a guest page if there isn't
176 * one already in the kvm->arch.slot_phys[][] arrays.
177 */
178static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
179 struct kvm_memory_slot *memslot)
180{
181 unsigned long start;
182 long np;
183 struct page *page, *pages[1];
184 unsigned long *physp;
185 unsigned long pfn, i;
186
187 physp = kvm->arch.slot_phys[memslot->id];
188 if (!physp)
189 return -EINVAL;
190 i = (gfn - memslot->base_gfn) >> (kvm->arch.ram_porder - PAGE_SHIFT);
191 if (physp[i])
192 return 0;
193
194 page = NULL;
195 start = gfn_to_hva_memslot(memslot, gfn);
196
197 /* Instantiate and get the page we want access to */
198 np = get_user_pages_fast(start, 1, 1, pages);
199 if (np != 1)
200 return -EINVAL;
201 page = pages[0];
202
203 /* Check it's a 16MB page */
204 if (!PageHead(page) ||
205 compound_order(page) != (kvm->arch.ram_porder - PAGE_SHIFT)) {
206 pr_err("page at %lx isn't 16MB (o=%d)\n",
207 start, compound_order(page));
208 put_page(page);
209 return -EINVAL;
210 }
211 pfn = page_to_pfn(page);
212
213 spin_lock(&kvm->arch.slot_phys_lock);
214 if (!physp[i])
215 physp[i] = (pfn << PAGE_SHIFT) | KVMPPC_GOT_PAGE;
216 else
217 put_page(page);
218 spin_unlock(&kvm->arch.slot_phys_lock);
219
220 return 0;
221}
222
223/*
224 * We come here on a H_ENTER call from the guest when
225 * we don't have the requested page pinned already.
226 */
227long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
228 long pte_index, unsigned long pteh, unsigned long ptel)
229{
230 struct kvm *kvm = vcpu->kvm;
231 unsigned long psize, gpa, gfn;
232 struct kvm_memory_slot *memslot;
233 long ret;
234
235 psize = hpte_page_size(pteh, ptel);
236 if (!psize)
237 return H_PARAMETER;
238
239 /* Find the memslot (if any) for this address */
240 gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
241 gfn = gpa >> PAGE_SHIFT;
242 memslot = gfn_to_memslot(kvm, gfn);
243 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
244 return H_PARAMETER;
245 if (kvmppc_get_guest_page(kvm, gfn, memslot) < 0)
246 return H_PARAMETER;
247
248 preempt_disable();
249 ret = kvmppc_h_enter(vcpu, flags, pte_index, pteh, ptel);
250 preempt_enable();
251 if (ret == H_TOO_HARD) {
252 /* this can't happen */
253 pr_err("KVM: Oops, kvmppc_h_enter returned too hard!\n");
254 ret = H_RESOURCE; /* or something */
255 }
256 return ret;
257
258}
259
181static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, 260static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
182 struct kvmppc_pte *gpte, bool data) 261 struct kvmppc_pte *gpte, bool data)
183{ 262{
@@ -203,8 +282,11 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
203 physp += (gfn - memslot->base_gfn) >> 282 physp += (gfn - memslot->base_gfn) >>
204 (kvm->arch.ram_porder - PAGE_SHIFT); 283 (kvm->arch.ram_porder - PAGE_SHIFT);
205 pa = *physp; 284 pa = *physp;
206 if (!pa) 285 if (!pa) {
207 return NULL; 286 if (kvmppc_get_guest_page(kvm, gfn, memslot) < 0)
287 return NULL;
288 pa = *physp;
289 }
208 pfn = pa >> PAGE_SHIFT; 290 pfn = pa >> PAGE_SHIFT;
209 page = pfn_to_page(pfn); 291 page = pfn_to_page(pfn);
210 get_page(page); 292 get_page(page);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 82d71388eac..ce5a13fb974 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -49,6 +49,7 @@
49#include <linux/sched.h> 49#include <linux/sched.h>
50#include <linux/vmalloc.h> 50#include <linux/vmalloc.h>
51#include <linux/highmem.h> 51#include <linux/highmem.h>
52#include <linux/hugetlb.h>
52 53
53#define LARGE_PAGE_ORDER 24 /* 16MB pages */ 54#define LARGE_PAGE_ORDER 24 /* 16MB pages */
54 55
@@ -57,6 +58,7 @@
57/* #define EXIT_DEBUG_INT */ 58/* #define EXIT_DEBUG_INT */
58 59
59static void kvmppc_end_cede(struct kvm_vcpu *vcpu); 60static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
61static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu);
60 62
61void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 63void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
62{ 64{
@@ -231,6 +233,12 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
231 struct kvm_vcpu *tvcpu; 233 struct kvm_vcpu *tvcpu;
232 234
233 switch (req) { 235 switch (req) {
236 case H_ENTER:
237 ret = kvmppc_virtmode_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4),
238 kvmppc_get_gpr(vcpu, 5),
239 kvmppc_get_gpr(vcpu, 6),
240 kvmppc_get_gpr(vcpu, 7));
241 break;
234 case H_CEDE: 242 case H_CEDE:
235 break; 243 break;
236 case H_PROD: 244 case H_PROD:
@@ -851,9 +859,12 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
851 return -EINTR; 859 return -EINTR;
852 } 860 }
853 861
854 /* On PPC970, check that we have an RMA region */ 862 /* On the first time here, set up VRMA or RMA */
855 if (!vcpu->kvm->arch.rma && cpu_has_feature(CPU_FTR_ARCH_201)) 863 if (!vcpu->kvm->arch.rma_setup_done) {
856 return -EPERM; 864 r = kvmppc_hv_setup_rma(vcpu);
865 if (r)
866 return r;
867 }
857 868
858 flush_fp_to_thread(current); 869 flush_fp_to_thread(current);
859 flush_altivec_to_thread(current); 870 flush_altivec_to_thread(current);
@@ -1063,34 +1074,15 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
1063 return fd; 1074 return fd;
1064} 1075}
1065 1076
1066static struct page *hva_to_page(unsigned long addr)
1067{
1068 struct page *page[1];
1069 int npages;
1070
1071 might_sleep();
1072
1073 npages = get_user_pages_fast(addr, 1, 1, page);
1074
1075 if (unlikely(npages != 1))
1076 return 0;
1077
1078 return page[0];
1079}
1080
1081int kvmppc_core_prepare_memory_region(struct kvm *kvm, 1077int kvmppc_core_prepare_memory_region(struct kvm *kvm,
1082 struct kvm_userspace_memory_region *mem) 1078 struct kvm_userspace_memory_region *mem)
1083{ 1079{
1084 unsigned long psize, porder; 1080 unsigned long psize;
1085 unsigned long i, npages; 1081 unsigned long npages;
1086 unsigned long hva;
1087 struct kvmppc_rma_info *ri = NULL;
1088 struct page *page;
1089 unsigned long *phys; 1082 unsigned long *phys;
1090 1083
1091 /* For now, only allow 16MB pages */ 1084 /* For now, only allow 16MB-aligned slots */
1092 porder = LARGE_PAGE_ORDER; 1085 psize = kvm->arch.ram_psize;
1093 psize = 1ul << porder;
1094 if ((mem->memory_size & (psize - 1)) || 1086 if ((mem->memory_size & (psize - 1)) ||
1095 (mem->guest_phys_addr & (psize - 1))) { 1087 (mem->guest_phys_addr & (psize - 1))) {
1096 pr_err("bad memory_size=%llx @ %llx\n", 1088 pr_err("bad memory_size=%llx @ %llx\n",
@@ -1099,7 +1091,7 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm,
1099 } 1091 }
1100 1092
1101 /* Allocate a slot_phys array */ 1093 /* Allocate a slot_phys array */
1102 npages = mem->memory_size >> porder; 1094 npages = mem->memory_size >> kvm->arch.ram_porder;
1103 phys = kvm->arch.slot_phys[mem->slot]; 1095 phys = kvm->arch.slot_phys[mem->slot];
1104 if (!phys) { 1096 if (!phys) {
1105 phys = vzalloc(npages * sizeof(unsigned long)); 1097 phys = vzalloc(npages * sizeof(unsigned long));
@@ -1109,39 +1101,110 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm,
1109 kvm->arch.slot_npages[mem->slot] = npages; 1101 kvm->arch.slot_npages[mem->slot] = npages;
1110 } 1102 }
1111 1103
1112 /* Do we already have an RMA registered? */ 1104 return 0;
1113 if (mem->guest_phys_addr == 0 && kvm->arch.rma) 1105}
1114 return -EINVAL;
1115 1106
1116 /* Is this one of our preallocated RMAs? */ 1107static void unpin_slot(struct kvm *kvm, int slot_id)
1117 if (mem->guest_phys_addr == 0) { 1108{
1118 struct vm_area_struct *vma; 1109 unsigned long *physp;
1119 1110 unsigned long j, npages, pfn;
1120 down_read(&current->mm->mmap_sem); 1111 struct page *page;
1121 vma = find_vma(current->mm, mem->userspace_addr); 1112
1122 if (vma && vma->vm_file && 1113 physp = kvm->arch.slot_phys[slot_id];
1123 vma->vm_file->f_op == &kvm_rma_fops && 1114 npages = kvm->arch.slot_npages[slot_id];
1124 mem->userspace_addr == vma->vm_start) 1115 if (physp) {
1125 ri = vma->vm_file->private_data; 1116 spin_lock(&kvm->arch.slot_phys_lock);
1126 up_read(&current->mm->mmap_sem); 1117 for (j = 0; j < npages; j++) {
1127 if (!ri && cpu_has_feature(CPU_FTR_ARCH_201)) { 1118 if (!(physp[j] & KVMPPC_GOT_PAGE))
1128 pr_err("CPU requires an RMO\n"); 1119 continue;
1129 return -EINVAL; 1120 pfn = physp[j] >> PAGE_SHIFT;
1121 page = pfn_to_page(pfn);
1122 SetPageDirty(page);
1123 put_page(page);
1130 } 1124 }
1125 kvm->arch.slot_phys[slot_id] = NULL;
1126 spin_unlock(&kvm->arch.slot_phys_lock);
1127 vfree(physp);
1131 } 1128 }
1129}
1130
1131void kvmppc_core_commit_memory_region(struct kvm *kvm,
1132 struct kvm_userspace_memory_region *mem)
1133{
1134}
1135
1136static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu)
1137{
1138 int err = 0;
1139 struct kvm *kvm = vcpu->kvm;
1140 struct kvmppc_rma_info *ri = NULL;
1141 unsigned long hva;
1142 struct kvm_memory_slot *memslot;
1143 struct vm_area_struct *vma;
1144 unsigned long lpcr;
1145 unsigned long psize, porder;
1146 unsigned long rma_size;
1147 unsigned long rmls;
1148 unsigned long *physp;
1149 unsigned long i, npages, pa;
1150
1151 mutex_lock(&kvm->lock);
1152 if (kvm->arch.rma_setup_done)
1153 goto out; /* another vcpu beat us to it */
1132 1154
1133 if (ri) { 1155 /* Look up the memslot for guest physical address 0 */
1134 unsigned long rma_size; 1156 memslot = gfn_to_memslot(kvm, 0);
1135 unsigned long lpcr;
1136 long rmls;
1137 1157
1138 rma_size = ri->npages << PAGE_SHIFT; 1158 /* We must have some memory at 0 by now */
1139 if (rma_size > mem->memory_size) 1159 err = -EINVAL;
1140 rma_size = mem->memory_size; 1160 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
1161 goto out;
1162
1163 /* Look up the VMA for the start of this memory slot */
1164 hva = memslot->userspace_addr;
1165 down_read(&current->mm->mmap_sem);
1166 vma = find_vma(current->mm, hva);
1167 if (!vma || vma->vm_start > hva || (vma->vm_flags & VM_IO))
1168 goto up_out;
1169
1170 psize = vma_kernel_pagesize(vma);
1171 if (psize != kvm->arch.ram_psize)
1172 goto up_out;
1173
1174 /* Is this one of our preallocated RMAs? */
1175 if (vma->vm_file && vma->vm_file->f_op == &kvm_rma_fops &&
1176 hva == vma->vm_start)
1177 ri = vma->vm_file->private_data;
1178
1179 up_read(&current->mm->mmap_sem);
1180
1181 if (!ri) {
1182 /* On POWER7, use VRMA; on PPC970, give up */
1183 err = -EPERM;
1184 if (cpu_has_feature(CPU_FTR_ARCH_201)) {
1185 pr_err("KVM: CPU requires an RMO\n");
1186 goto out;
1187 }
1188
1189 /* Update VRMASD field in the LPCR */
1190 lpcr = kvm->arch.lpcr & ~(0x1fUL << LPCR_VRMASD_SH);
1191 lpcr |= LPCR_VRMA_L;
1192 kvm->arch.lpcr = lpcr;
1193
1194 /* Create HPTEs in the hash page table for the VRMA */
1195 kvmppc_map_vrma(vcpu, memslot);
1196
1197 } else {
1198 /* Set up to use an RMO region */
1199 rma_size = ri->npages;
1200 if (rma_size > memslot->npages)
1201 rma_size = memslot->npages;
1202 rma_size <<= PAGE_SHIFT;
1141 rmls = lpcr_rmls(rma_size); 1203 rmls = lpcr_rmls(rma_size);
1204 err = -EINVAL;
1142 if (rmls < 0) { 1205 if (rmls < 0) {
1143 pr_err("Can't use RMA of 0x%lx bytes\n", rma_size); 1206 pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size);
1144 return -EINVAL; 1207 goto out;
1145 } 1208 }
1146 atomic_inc(&ri->use_count); 1209 atomic_inc(&ri->use_count);
1147 kvm->arch.rma = ri; 1210 kvm->arch.rma = ri;
@@ -1164,65 +1227,31 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm,
1164 kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT; 1227 kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT;
1165 } 1228 }
1166 kvm->arch.lpcr = lpcr; 1229 kvm->arch.lpcr = lpcr;
1167 pr_info("Using RMO at %lx size %lx (LPCR = %lx)\n", 1230 pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
1168 ri->base_pfn << PAGE_SHIFT, rma_size, lpcr); 1231 ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
1169 }
1170 1232
1171 for (i = 0; i < npages; ++i) { 1233 /* Initialize phys addrs of pages in RMO */
1172 if (ri && i < ri->npages) { 1234 porder = kvm->arch.ram_porder;
1173 phys[i] = (ri->base_pfn << PAGE_SHIFT) + (i << porder); 1235 npages = rma_size >> porder;
1174 continue; 1236 pa = ri->base_pfn << PAGE_SHIFT;
1175 } 1237 physp = kvm->arch.slot_phys[memslot->id];
1176 hva = mem->userspace_addr + (i << porder); 1238 spin_lock(&kvm->arch.slot_phys_lock);
1177 page = hva_to_page(hva); 1239 for (i = 0; i < npages; ++i)
1178 if (!page) { 1240 physp[i] = pa + (i << porder);
1179 pr_err("oops, no pfn for hva %lx\n", hva); 1241 spin_unlock(&kvm->arch.slot_phys_lock);
1180 goto err;
1181 }
1182 /* Check it's a 16MB page */
1183 if (!PageHead(page) ||
1184 compound_order(page) != (LARGE_PAGE_ORDER - PAGE_SHIFT)) {
1185 pr_err("page at %lx isn't 16MB (o=%d)\n",
1186 hva, compound_order(page));
1187 goto err;
1188 }
1189 phys[i] = (page_to_pfn(page) << PAGE_SHIFT) | KVMPPC_GOT_PAGE;
1190 } 1242 }
1191 1243
1192 return 0; 1244 /* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */
1193 1245 smp_wmb();
1194 err: 1246 kvm->arch.rma_setup_done = 1;
1195 return -EINVAL; 1247 err = 0;
1196} 1248 out:
1197 1249 mutex_unlock(&kvm->lock);
1198static void unpin_slot(struct kvm *kvm, int slot_id) 1250 return err;
1199{
1200 unsigned long *physp;
1201 unsigned long j, npages, pfn;
1202 struct page *page;
1203
1204 physp = kvm->arch.slot_phys[slot_id];
1205 npages = kvm->arch.slot_npages[slot_id];
1206 if (physp) {
1207 for (j = 0; j < npages; j++) {
1208 if (!(physp[j] & KVMPPC_GOT_PAGE))
1209 continue;
1210 pfn = physp[j] >> PAGE_SHIFT;
1211 page = pfn_to_page(pfn);
1212 SetPageDirty(page);
1213 put_page(page);
1214 }
1215 vfree(physp);
1216 kvm->arch.slot_phys[slot_id] = NULL;
1217 }
1218}
1219 1251
1220void kvmppc_core_commit_memory_region(struct kvm *kvm, 1252 up_out:
1221 struct kvm_userspace_memory_region *mem) 1253 up_read(&current->mm->mmap_sem);
1222{ 1254 goto out;
1223 if (mem->guest_phys_addr == 0 && mem->memory_size != 0 &&
1224 !kvm->arch.rma)
1225 kvmppc_map_vrma(kvm, mem);
1226} 1255}
1227 1256
1228int kvmppc_core_init_vm(struct kvm *kvm) 1257int kvmppc_core_init_vm(struct kvm *kvm)
@@ -1261,6 +1290,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
1261 } 1290 }
1262 kvm->arch.lpcr = lpcr; 1291 kvm->arch.lpcr = lpcr;
1263 1292
1293 spin_lock_init(&kvm->arch.slot_phys_lock);
1264 return 0; 1294 return 0;
1265} 1295}
1266 1296
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index a28a6030ec9..047c5e1fd70 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -11,6 +11,7 @@
11#include <linux/kvm.h> 11#include <linux/kvm.h>
12#include <linux/kvm_host.h> 12#include <linux/kvm_host.h>
13#include <linux/hugetlb.h> 13#include <linux/hugetlb.h>
14#include <linux/module.h>
14 15
15#include <asm/tlbflush.h> 16#include <asm/tlbflush.h>
16#include <asm/kvm_ppc.h> 17#include <asm/kvm_ppc.h>
@@ -56,56 +57,54 @@ static void *real_vmalloc_addr(void *x)
56long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, 57long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
57 long pte_index, unsigned long pteh, unsigned long ptel) 58 long pte_index, unsigned long pteh, unsigned long ptel)
58{ 59{
59 unsigned long porder;
60 struct kvm *kvm = vcpu->kvm; 60 struct kvm *kvm = vcpu->kvm;
61 unsigned long i, gfn, lpn, pa; 61 unsigned long i, pa, gpa, gfn, psize;
62 unsigned long slot_fn;
62 unsigned long *hpte; 63 unsigned long *hpte;
63 struct revmap_entry *rev; 64 struct revmap_entry *rev;
64 unsigned long g_ptel = ptel; 65 unsigned long g_ptel = ptel;
65 struct kvm_memory_slot *memslot; 66 struct kvm_memory_slot *memslot;
66 unsigned long *physp; 67 unsigned long *physp, pte_size;
68 bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING;
67 69
68 /* only handle 4k, 64k and 16M pages for now */ 70 psize = hpte_page_size(pteh, ptel);
69 porder = 12; 71 if (!psize)
70 if (pteh & HPTE_V_LARGE) {
71 if (cpu_has_feature(CPU_FTR_ARCH_206) &&
72 (ptel & 0xf000) == 0x1000) {
73 /* 64k page */
74 porder = 16;
75 } else if ((ptel & 0xff000) == 0) {
76 /* 16M page */
77 porder = 24;
78 /* lowest AVA bit must be 0 for 16M pages */
79 if (pteh & 0x80)
80 return H_PARAMETER;
81 } else
82 return H_PARAMETER;
83 }
84 if (porder > kvm->arch.ram_porder)
85 return H_PARAMETER; 72 return H_PARAMETER;
86 73
87 gfn = ((ptel & HPTE_R_RPN) & ~((1ul << porder) - 1)) >> PAGE_SHIFT; 74 /* Find the memslot (if any) for this address */
75 gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
76 gfn = gpa >> PAGE_SHIFT;
88 memslot = builtin_gfn_to_memslot(kvm, gfn); 77 memslot = builtin_gfn_to_memslot(kvm, gfn);
89 if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) 78 if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID)))
90 return H_PARAMETER; 79 return H_PARAMETER;
80 slot_fn = gfn - memslot->base_gfn;
81
91 physp = kvm->arch.slot_phys[memslot->id]; 82 physp = kvm->arch.slot_phys[memslot->id];
92 if (!physp) 83 if (!physp)
93 return H_PARAMETER; 84 return H_PARAMETER;
94 85 physp += slot_fn;
95 lpn = (gfn - memslot->base_gfn) >> (kvm->arch.ram_porder - PAGE_SHIFT); 86 if (realmode)
96 physp = real_vmalloc_addr(physp + lpn); 87 physp = real_vmalloc_addr(physp);
97 pa = *physp; 88 pa = *physp;
98 if (!pa) 89 if (!pa)
99 return H_PARAMETER; 90 return H_TOO_HARD;
100 pa &= PAGE_MASK; 91 pa &= PAGE_MASK;
101 92
93 pte_size = kvm->arch.ram_psize;
94 if (pte_size < psize)
95 return H_PARAMETER;
96 if (pa && pte_size > psize)
97 pa |= gpa & (pte_size - 1);
98
99 ptel &= ~(HPTE_R_PP0 - psize);
100 ptel |= pa;
101
102 /* Check WIMG */ 102 /* Check WIMG */
103 if ((ptel & HPTE_R_WIMG) != HPTE_R_M && 103 if ((ptel & HPTE_R_WIMG) != HPTE_R_M &&
104 (ptel & HPTE_R_WIMG) != (HPTE_R_W | HPTE_R_I | HPTE_R_M)) 104 (ptel & HPTE_R_WIMG) != (HPTE_R_W | HPTE_R_I | HPTE_R_M))
105 return H_PARAMETER; 105 return H_PARAMETER;
106 pteh &= ~0x60UL; 106 pteh &= ~0x60UL;
107 ptel &= ~(HPTE_R_PP0 - kvm->arch.ram_psize); 107 pteh |= HPTE_V_VALID;
108 ptel |= pa;
109 108
110 if (pte_index >= HPT_NPTE) 109 if (pte_index >= HPT_NPTE)
111 return H_PARAMETER; 110 return H_PARAMETER;
@@ -162,6 +161,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
162 vcpu->arch.gpr[4] = pte_index; 161 vcpu->arch.gpr[4] = pte_index;
163 return H_SUCCESS; 162 return H_SUCCESS;
164} 163}
164EXPORT_SYMBOL_GPL(kvmppc_h_enter);
165 165
166#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token)) 166#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token))
167 167