aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2011-12-12 07:27:39 -0500
committerAvi Kivity <avi@redhat.com>2012-03-05 07:52:35 -0500
commit8936dda4c2ed070ecebd786baf35b08584accf4a (patch)
tree7f75079f3814304050cbf880ecd7ddb9505f63a4
parent4e72dbe13528394a413889d73e5025dbdf6cab70 (diff)
KVM: PPC: Keep a record of HV guest view of hashed page table entries
This adds an array that parallels the guest hashed page table (HPT), that is, it has one entry per HPTE, used to store the guest's view of the second doubleword of the corresponding HPTE. The first doubleword in the HPTE is the same as the guest's idea of it, so we don't need to store a copy, but the second doubleword in the HPTE has the real page number rather than the guest's logical page number. This allows us to remove the back_translate() and reverse_xlate() functions. This "reverse mapping" array is vmalloc'd, meaning that to access it in real mode we have to walk the kernel's page tables explicitly. That is done by the new real_vmalloc_addr() function. (In fact this returns an address in the linear mapping, so the result is usable both in real mode and in virtual mode.) There are also some minor cleanups here: moving the definitions of HPT_ORDER etc. to a header file and defining HPT_NPTE for HPT_NPTEG << 3. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de> Signed-off-by: Avi Kivity <avi@redhat.com>
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h8
-rw-r--r--arch/powerpc/include/asm/kvm_host.h10
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c44
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c87
4 files changed, 103 insertions, 46 deletions
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 2054e4726ba2..fa3dc79af702 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -35,6 +35,14 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
35 35
36#define SPAPR_TCE_SHIFT 12 36#define SPAPR_TCE_SHIFT 12
37 37
38#ifdef CONFIG_KVM_BOOK3S_64_HV
39/* For now use fixed-size 16MB page table */
40#define HPT_ORDER 24
41#define HPT_NPTEG (1ul << (HPT_ORDER - 7)) /* 128B per pteg */
42#define HPT_NPTE (HPT_NPTEG << 3) /* 8 PTEs per PTEG */
43#define HPT_HASH_MASK (HPT_NPTEG - 1)
44#endif
45
38static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, 46static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
39 unsigned long pte_index) 47 unsigned long pte_index)
40{ 48{
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 66c75cddaec6..629df2ed22f7 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -166,9 +166,19 @@ struct kvmppc_rma_info {
166 atomic_t use_count; 166 atomic_t use_count;
167}; 167};
168 168
169/*
170 * The reverse mapping array has one entry for each HPTE,
171 * which stores the guest's view of the second word of the HPTE
172 * (including the guest physical address of the mapping).
173 */
174struct revmap_entry {
175 unsigned long guest_rpte;
176};
177
169struct kvm_arch { 178struct kvm_arch {
170#ifdef CONFIG_KVM_BOOK3S_64_HV 179#ifdef CONFIG_KVM_BOOK3S_64_HV
171 unsigned long hpt_virt; 180 unsigned long hpt_virt;
181 struct revmap_entry *revmap;
172 unsigned long ram_npages; 182 unsigned long ram_npages;
173 unsigned long ram_psize; 183 unsigned long ram_psize;
174 unsigned long ram_porder; 184 unsigned long ram_porder;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index bc3a2ea94217..80ece8de4070 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -23,6 +23,7 @@
23#include <linux/gfp.h> 23#include <linux/gfp.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/hugetlb.h> 25#include <linux/hugetlb.h>
26#include <linux/vmalloc.h>
26 27
27#include <asm/tlbflush.h> 28#include <asm/tlbflush.h>
28#include <asm/kvm_ppc.h> 29#include <asm/kvm_ppc.h>
@@ -33,11 +34,6 @@
33#include <asm/ppc-opcode.h> 34#include <asm/ppc-opcode.h>
34#include <asm/cputable.h> 35#include <asm/cputable.h>
35 36
36/* For now use fixed-size 16MB page table */
37#define HPT_ORDER 24
38#define HPT_NPTEG (1ul << (HPT_ORDER - 7)) /* 128B per pteg */
39#define HPT_HASH_MASK (HPT_NPTEG - 1)
40
41/* Pages in the VRMA are 16MB pages */ 37/* Pages in the VRMA are 16MB pages */
42#define VRMA_PAGE_ORDER 24 38#define VRMA_PAGE_ORDER 24
43#define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */ 39#define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */
@@ -51,7 +47,9 @@ long kvmppc_alloc_hpt(struct kvm *kvm)
51{ 47{
52 unsigned long hpt; 48 unsigned long hpt;
53 unsigned long lpid; 49 unsigned long lpid;
50 struct revmap_entry *rev;
54 51
52 /* Allocate guest's hashed page table */
55 hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|__GFP_NOWARN, 53 hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|__GFP_NOWARN,
56 HPT_ORDER - PAGE_SHIFT); 54 HPT_ORDER - PAGE_SHIFT);
57 if (!hpt) { 55 if (!hpt) {
@@ -60,12 +58,20 @@ long kvmppc_alloc_hpt(struct kvm *kvm)
60 } 58 }
61 kvm->arch.hpt_virt = hpt; 59 kvm->arch.hpt_virt = hpt;
62 60
61 /* Allocate reverse map array */
62 rev = vmalloc(sizeof(struct revmap_entry) * HPT_NPTE);
63 if (!rev) {
64 pr_err("kvmppc_alloc_hpt: Couldn't alloc reverse map array\n");
65 goto out_freehpt;
66 }
67 kvm->arch.revmap = rev;
68
69 /* Allocate the guest's logical partition ID */
63 do { 70 do {
64 lpid = find_first_zero_bit(lpid_inuse, NR_LPIDS); 71 lpid = find_first_zero_bit(lpid_inuse, NR_LPIDS);
65 if (lpid >= NR_LPIDS) { 72 if (lpid >= NR_LPIDS) {
66 pr_err("kvm_alloc_hpt: No LPIDs free\n"); 73 pr_err("kvm_alloc_hpt: No LPIDs free\n");
67 free_pages(hpt, HPT_ORDER - PAGE_SHIFT); 74 goto out_freeboth;
68 return -ENOMEM;
69 } 75 }
70 } while (test_and_set_bit(lpid, lpid_inuse)); 76 } while (test_and_set_bit(lpid, lpid_inuse));
71 77
@@ -74,11 +80,18 @@ long kvmppc_alloc_hpt(struct kvm *kvm)
74 80
75 pr_info("KVM guest htab at %lx, LPID %lx\n", hpt, lpid); 81 pr_info("KVM guest htab at %lx, LPID %lx\n", hpt, lpid);
76 return 0; 82 return 0;
83
84 out_freeboth:
85 vfree(rev);
86 out_freehpt:
87 free_pages(hpt, HPT_ORDER - PAGE_SHIFT);
88 return -ENOMEM;
77} 89}
78 90
79void kvmppc_free_hpt(struct kvm *kvm) 91void kvmppc_free_hpt(struct kvm *kvm)
80{ 92{
81 clear_bit(kvm->arch.lpid, lpid_inuse); 93 clear_bit(kvm->arch.lpid, lpid_inuse);
94 vfree(kvm->arch.revmap);
82 free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT); 95 free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT);
83} 96}
84 97
@@ -89,14 +102,16 @@ void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem)
89 unsigned long pfn; 102 unsigned long pfn;
90 unsigned long *hpte; 103 unsigned long *hpte;
91 unsigned long hash; 104 unsigned long hash;
105 unsigned long porder = kvm->arch.ram_porder;
106 struct revmap_entry *rev;
92 struct kvmppc_pginfo *pginfo = kvm->arch.ram_pginfo; 107 struct kvmppc_pginfo *pginfo = kvm->arch.ram_pginfo;
93 108
94 if (!pginfo) 109 if (!pginfo)
95 return; 110 return;
96 111
97 /* VRMA can't be > 1TB */ 112 /* VRMA can't be > 1TB */
98 if (npages > 1ul << (40 - kvm->arch.ram_porder)) 113 if (npages > 1ul << (40 - porder))
99 npages = 1ul << (40 - kvm->arch.ram_porder); 114 npages = 1ul << (40 - porder);
100 /* Can't use more than 1 HPTE per HPTEG */ 115 /* Can't use more than 1 HPTE per HPTEG */
101 if (npages > HPT_NPTEG) 116 if (npages > HPT_NPTEG)
102 npages = HPT_NPTEG; 117 npages = HPT_NPTEG;
@@ -113,15 +128,20 @@ void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem)
113 * at most one HPTE per HPTEG, we just assume entry 7 128 * at most one HPTE per HPTEG, we just assume entry 7
114 * is available and use it. 129 * is available and use it.
115 */ 130 */
116 hpte = (unsigned long *) (kvm->arch.hpt_virt + (hash << 7)); 131 hash = (hash << 3) + 7;
117 hpte += 7 * 2; 132 hpte = (unsigned long *) (kvm->arch.hpt_virt + (hash << 4));
118 /* HPTE low word - RPN, protection, etc. */ 133 /* HPTE low word - RPN, protection, etc. */
119 hpte[1] = (pfn << PAGE_SHIFT) | HPTE_R_R | HPTE_R_C | 134 hpte[1] = (pfn << PAGE_SHIFT) | HPTE_R_R | HPTE_R_C |
120 HPTE_R_M | PP_RWXX; 135 HPTE_R_M | PP_RWXX;
121 wmb(); 136 smp_wmb();
122 hpte[0] = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) | 137 hpte[0] = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
123 (i << (VRMA_PAGE_ORDER - 16)) | HPTE_V_BOLTED | 138 (i << (VRMA_PAGE_ORDER - 16)) | HPTE_V_BOLTED |
124 HPTE_V_LARGE | HPTE_V_VALID; 139 HPTE_V_LARGE | HPTE_V_VALID;
140
141 /* Reverse map info */
142 rev = &kvm->arch.revmap[hash];
143 rev->guest_rpte = (i << porder) | HPTE_R_R | HPTE_R_C |
144 HPTE_R_M | PP_RWXX;
125 } 145 }
126} 146}
127 147
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index bacb0cfa3602..614849360a0a 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -20,10 +20,19 @@
20#include <asm/synch.h> 20#include <asm/synch.h>
21#include <asm/ppc-opcode.h> 21#include <asm/ppc-opcode.h>
22 22
23/* For now use fixed-size 16MB page table */ 23/* Translate address of a vmalloc'd thing to a linear map address */
24#define HPT_ORDER 24 24static void *real_vmalloc_addr(void *x)
25#define HPT_NPTEG (1ul << (HPT_ORDER - 7)) /* 128B per pteg */ 25{
26#define HPT_HASH_MASK (HPT_NPTEG - 1) 26 unsigned long addr = (unsigned long) x;
27 pte_t *p;
28
29 p = find_linux_pte(swapper_pg_dir, addr);
30 if (!p || !pte_present(*p))
31 return NULL;
32 /* assume we don't have huge pages in vmalloc space... */
33 addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK);
34 return __va(addr);
35}
27 36
28#define HPTE_V_HVLOCK 0x40UL 37#define HPTE_V_HVLOCK 0x40UL
29 38
@@ -52,6 +61,8 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
52 struct kvm *kvm = vcpu->kvm; 61 struct kvm *kvm = vcpu->kvm;
53 unsigned long i, lpn, pa; 62 unsigned long i, lpn, pa;
54 unsigned long *hpte; 63 unsigned long *hpte;
64 struct revmap_entry *rev;
65 unsigned long g_ptel = ptel;
55 66
56 /* only handle 4k, 64k and 16M pages for now */ 67 /* only handle 4k, 64k and 16M pages for now */
57 porder = 12; 68 porder = 12;
@@ -82,7 +93,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
82 pteh &= ~0x60UL; 93 pteh &= ~0x60UL;
83 ptel &= ~(HPTE_R_PP0 - kvm->arch.ram_psize); 94 ptel &= ~(HPTE_R_PP0 - kvm->arch.ram_psize);
84 ptel |= pa; 95 ptel |= pa;
85 if (pte_index >= (HPT_NPTEG << 3)) 96 if (pte_index >= HPT_NPTE)
86 return H_PARAMETER; 97 return H_PARAMETER;
87 if (likely((flags & H_EXACT) == 0)) { 98 if (likely((flags & H_EXACT) == 0)) {
88 pte_index &= ~7UL; 99 pte_index &= ~7UL;
@@ -95,18 +106,22 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
95 break; 106 break;
96 hpte += 2; 107 hpte += 2;
97 } 108 }
109 pte_index += i;
98 } else { 110 } else {
99 i = 0;
100 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); 111 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
101 if (!lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID)) 112 if (!lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID))
102 return H_PTEG_FULL; 113 return H_PTEG_FULL;
103 } 114 }
115
116 /* Save away the guest's idea of the second HPTE dword */
117 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
118 if (rev)
119 rev->guest_rpte = g_ptel;
104 hpte[1] = ptel; 120 hpte[1] = ptel;
105 eieio(); 121 eieio();
106 hpte[0] = pteh; 122 hpte[0] = pteh;
107 asm volatile("ptesync" : : : "memory"); 123 asm volatile("ptesync" : : : "memory");
108 atomic_inc(&kvm->arch.ram_pginfo[lpn].refcnt); 124 vcpu->arch.gpr[4] = pte_index;
109 vcpu->arch.gpr[4] = pte_index + i;
110 return H_SUCCESS; 125 return H_SUCCESS;
111} 126}
112 127
@@ -138,7 +153,7 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
138 unsigned long *hpte; 153 unsigned long *hpte;
139 unsigned long v, r, rb; 154 unsigned long v, r, rb;
140 155
141 if (pte_index >= (HPT_NPTEG << 3)) 156 if (pte_index >= HPT_NPTE)
142 return H_PARAMETER; 157 return H_PARAMETER;
143 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); 158 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
144 while (!lock_hpte(hpte, HPTE_V_HVLOCK)) 159 while (!lock_hpte(hpte, HPTE_V_HVLOCK))
@@ -193,7 +208,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
193 if (req == 3) 208 if (req == 3)
194 break; 209 break;
195 if (req != 1 || flags == 3 || 210 if (req != 1 || flags == 3 ||
196 pte_index >= (HPT_NPTEG << 3)) { 211 pte_index >= HPT_NPTE) {
197 /* parameter error */ 212 /* parameter error */
198 args[i * 2] = ((0xa0 | flags) << 56) + pte_index; 213 args[i * 2] = ((0xa0 | flags) << 56) + pte_index;
199 ret = H_PARAMETER; 214 ret = H_PARAMETER;
@@ -256,9 +271,10 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
256{ 271{
257 struct kvm *kvm = vcpu->kvm; 272 struct kvm *kvm = vcpu->kvm;
258 unsigned long *hpte; 273 unsigned long *hpte;
259 unsigned long v, r, rb; 274 struct revmap_entry *rev;
275 unsigned long v, r, rb, mask, bits;
260 276
261 if (pte_index >= (HPT_NPTEG << 3)) 277 if (pte_index >= HPT_NPTE)
262 return H_PARAMETER; 278 return H_PARAMETER;
263 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); 279 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
264 while (!lock_hpte(hpte, HPTE_V_HVLOCK)) 280 while (!lock_hpte(hpte, HPTE_V_HVLOCK))
@@ -271,11 +287,21 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
271 if (atomic_read(&kvm->online_vcpus) == 1) 287 if (atomic_read(&kvm->online_vcpus) == 1)
272 flags |= H_LOCAL; 288 flags |= H_LOCAL;
273 v = hpte[0]; 289 v = hpte[0];
274 r = hpte[1] & ~(HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N | 290 bits = (flags << 55) & HPTE_R_PP0;
275 HPTE_R_KEY_HI | HPTE_R_KEY_LO); 291 bits |= (flags << 48) & HPTE_R_KEY_HI;
276 r |= (flags << 55) & HPTE_R_PP0; 292 bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
277 r |= (flags << 48) & HPTE_R_KEY_HI; 293
278 r |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO); 294 /* Update guest view of 2nd HPTE dword */
295 mask = HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N |
296 HPTE_R_KEY_HI | HPTE_R_KEY_LO;
297 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
298 if (rev) {
299 r = (rev->guest_rpte & ~mask) | bits;
300 rev->guest_rpte = r;
301 }
302 r = (hpte[1] & ~mask) | bits;
303
304 /* Update HPTE */
279 rb = compute_tlbie_rb(v, r, pte_index); 305 rb = compute_tlbie_rb(v, r, pte_index);
280 hpte[0] = v & ~HPTE_V_VALID; 306 hpte[0] = v & ~HPTE_V_VALID;
281 if (!(flags & H_LOCAL)) { 307 if (!(flags & H_LOCAL)) {
@@ -298,38 +324,31 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
298 return H_SUCCESS; 324 return H_SUCCESS;
299} 325}
300 326
301static unsigned long reverse_xlate(struct kvm *kvm, unsigned long realaddr)
302{
303 long int i;
304 unsigned long offset, rpn;
305
306 offset = realaddr & (kvm->arch.ram_psize - 1);
307 rpn = (realaddr - offset) >> PAGE_SHIFT;
308 for (i = 0; i < kvm->arch.ram_npages; ++i)
309 if (rpn == kvm->arch.ram_pginfo[i].pfn)
310 return (i << PAGE_SHIFT) + offset;
311 return HPTE_R_RPN; /* all 1s in the RPN field */
312}
313
314long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, 327long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
315 unsigned long pte_index) 328 unsigned long pte_index)
316{ 329{
317 struct kvm *kvm = vcpu->kvm; 330 struct kvm *kvm = vcpu->kvm;
318 unsigned long *hpte, r; 331 unsigned long *hpte, r;
319 int i, n = 1; 332 int i, n = 1;
333 struct revmap_entry *rev = NULL;
320 334
321 if (pte_index >= (HPT_NPTEG << 3)) 335 if (pte_index >= HPT_NPTE)
322 return H_PARAMETER; 336 return H_PARAMETER;
323 if (flags & H_READ_4) { 337 if (flags & H_READ_4) {
324 pte_index &= ~3; 338 pte_index &= ~3;
325 n = 4; 339 n = 4;
326 } 340 }
341 if (flags & H_R_XLATE)
342 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
327 for (i = 0; i < n; ++i, ++pte_index) { 343 for (i = 0; i < n; ++i, ++pte_index) {
328 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); 344 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
329 r = hpte[1]; 345 r = hpte[1];
330 if ((flags & H_R_XLATE) && (hpte[0] & HPTE_V_VALID)) 346 if (hpte[0] & HPTE_V_VALID) {
331 r = reverse_xlate(kvm, r & HPTE_R_RPN) | 347 if (rev)
332 (r & ~HPTE_R_RPN); 348 r = rev[i].guest_rpte;
349 else
350 r = hpte[1] | HPTE_R_RPN;
351 }
333 vcpu->arch.gpr[4 + i * 2] = hpte[0]; 352 vcpu->arch.gpr[4 + i * 2] = hpte[0];
334 vcpu->arch.gpr[5 + i * 2] = r; 353 vcpu->arch.gpr[5 + i * 2] = r;
335 } 354 }