aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2011-12-12 07:33:07 -0500
committerAvi Kivity <avi@redhat.com>2012-03-05 07:52:37 -0500
commit06ce2c63d933e347f8a199f123a8a293619ab3d2 (patch)
tree455cd4b0e245675f542649fe509797f60adfe76f
parent9d0ef5ea043d1242897d15c71bd1a15da79b4a5d (diff)
KVM: PPC: Maintain a doubly-linked list of guest HPTEs for each gfn
This expands the reverse mapping array to contain two links for each HPTE which are used to link together HPTEs that correspond to the same guest logical page. Each circular list of HPTEs is pointed to by the rmap array entry for the guest logical page, pointed to by the relevant memslot. Links are 32-bit HPT entry indexes rather than full 64-bit pointers, to save space. We use 3 of the remaining 32 bits in the rmap array entries as a lock bit, a referenced bit and a present bit (the present bit is needed since HPTE index 0 is valid). The bit lock for the rmap chain nests inside the HPTE lock bit. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de> Signed-off-by: Avi Kivity <avi@redhat.com>
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h18
-rw-r--r--arch/powerpc/include/asm/kvm_host.h17
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c84
3 files changed, 117 insertions, 2 deletions
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 18b590d261ff..9508c03e6671 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -113,6 +113,11 @@ static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
113 return 0; /* error */ 113 return 0; /* error */
114} 114}
115 115
116static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize)
117{
118 return ((ptel & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
119}
120
116static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type) 121static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type)
117{ 122{
118 unsigned int wimg = ptel & HPTE_R_WIMG; 123 unsigned int wimg = ptel & HPTE_R_WIMG;
@@ -139,6 +144,19 @@ static inline unsigned long hpte_cache_bits(unsigned long pte_val)
139#endif 144#endif
140} 145}
141 146
147static inline void lock_rmap(unsigned long *rmap)
148{
149 do {
150 while (test_bit(KVMPPC_RMAP_LOCK_BIT, rmap))
151 cpu_relax();
152 } while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmap));
153}
154
155static inline void unlock_rmap(unsigned long *rmap)
156{
157 __clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmap);
158}
159
142static inline bool slot_is_aligned(struct kvm_memory_slot *memslot, 160static inline bool slot_is_aligned(struct kvm_memory_slot *memslot,
143 unsigned long pagesize) 161 unsigned long pagesize)
144{ 162{
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 243bc8038572..97cb2d7865f3 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -170,12 +170,27 @@ struct kvmppc_rma_info {
170/* 170/*
171 * The reverse mapping array has one entry for each HPTE, 171 * The reverse mapping array has one entry for each HPTE,
172 * which stores the guest's view of the second word of the HPTE 172 * which stores the guest's view of the second word of the HPTE
173 * (including the guest physical address of the mapping). 173 * (including the guest physical address of the mapping),
174 * plus forward and backward pointers in a doubly-linked ring
175 * of HPTEs that map the same host page. The pointers in this
176 * ring are 32-bit HPTE indexes, to save space.
174 */ 177 */
175struct revmap_entry { 178struct revmap_entry {
176 unsigned long guest_rpte; 179 unsigned long guest_rpte;
180 unsigned int forw, back;
177}; 181};
178 182
183/*
184 * We use the top bit of each memslot->rmap entry as a lock bit,
185 * and bit 32 as a present flag. The bottom 32 bits are the
186 * index in the guest HPT of a HPTE that points to the page.
187 */
188#define KVMPPC_RMAP_LOCK_BIT 63
189#define KVMPPC_RMAP_REF_BIT 33
190#define KVMPPC_RMAP_REFERENCED (1ul << KVMPPC_RMAP_REF_BIT)
191#define KVMPPC_RMAP_PRESENT 0x100000000ul
192#define KVMPPC_RMAP_INDEX 0xfffffffful
193
179/* Low-order bits in kvm->arch.slot_phys[][] */ 194/* Low-order bits in kvm->arch.slot_phys[][] */
180#define KVMPPC_PAGE_ORDER_MASK 0x1f 195#define KVMPPC_PAGE_ORDER_MASK 0x1f
181#define KVMPPC_PAGE_NO_CACHE HPTE_R_I /* 0x20 */ 196#define KVMPPC_PAGE_NO_CACHE HPTE_R_I /* 0x20 */
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 3f5b016490d0..5b31caa4b314 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -54,6 +54,70 @@ static void *real_vmalloc_addr(void *x)
54 return __va(addr); 54 return __va(addr);
55} 55}
56 56
57/*
58 * Add this HPTE into the chain for the real page.
59 * Must be called with the chain locked; it unlocks the chain.
60 */
61static void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
62 unsigned long *rmap, long pte_index, int realmode)
63{
64 struct revmap_entry *head, *tail;
65 unsigned long i;
66
67 if (*rmap & KVMPPC_RMAP_PRESENT) {
68 i = *rmap & KVMPPC_RMAP_INDEX;
69 head = &kvm->arch.revmap[i];
70 if (realmode)
71 head = real_vmalloc_addr(head);
72 tail = &kvm->arch.revmap[head->back];
73 if (realmode)
74 tail = real_vmalloc_addr(tail);
75 rev->forw = i;
76 rev->back = head->back;
77 tail->forw = pte_index;
78 head->back = pte_index;
79 } else {
80 rev->forw = rev->back = pte_index;
81 i = pte_index;
82 }
83 smp_wmb();
84 *rmap = i | KVMPPC_RMAP_REFERENCED | KVMPPC_RMAP_PRESENT; /* unlock */
85}
86
87/* Remove this HPTE from the chain for a real page */
88static void remove_revmap_chain(struct kvm *kvm, long pte_index,
89 unsigned long hpte_v)
90{
91 struct revmap_entry *rev, *next, *prev;
92 unsigned long gfn, ptel, head;
93 struct kvm_memory_slot *memslot;
94 unsigned long *rmap;
95
96 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
97 ptel = rev->guest_rpte;
98 gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel));
99 memslot = builtin_gfn_to_memslot(kvm, gfn);
100 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
101 return;
102
103 rmap = real_vmalloc_addr(&memslot->rmap[gfn - memslot->base_gfn]);
104 lock_rmap(rmap);
105
106 head = *rmap & KVMPPC_RMAP_INDEX;
107 next = real_vmalloc_addr(&kvm->arch.revmap[rev->forw]);
108 prev = real_vmalloc_addr(&kvm->arch.revmap[rev->back]);
109 next->back = rev->back;
110 prev->forw = rev->forw;
111 if (head == pte_index) {
112 head = rev->forw;
113 if (head == pte_index)
114 *rmap &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
115 else
116 *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head;
117 }
118 unlock_rmap(rmap);
119}
120
57long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, 121long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
58 long pte_index, unsigned long pteh, unsigned long ptel) 122 long pte_index, unsigned long pteh, unsigned long ptel)
59{ 123{
@@ -66,6 +130,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
66 struct kvm_memory_slot *memslot; 130 struct kvm_memory_slot *memslot;
67 unsigned long *physp, pte_size; 131 unsigned long *physp, pte_size;
68 unsigned long is_io; 132 unsigned long is_io;
133 unsigned long *rmap;
69 bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING; 134 bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING;
70 135
71 psize = hpte_page_size(pteh, ptel); 136 psize = hpte_page_size(pteh, ptel);
@@ -83,6 +148,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
83 if (!slot_is_aligned(memslot, psize)) 148 if (!slot_is_aligned(memslot, psize))
84 return H_PARAMETER; 149 return H_PARAMETER;
85 slot_fn = gfn - memslot->base_gfn; 150 slot_fn = gfn - memslot->base_gfn;
151 rmap = &memslot->rmap[slot_fn];
86 152
87 physp = kvm->arch.slot_phys[memslot->id]; 153 physp = kvm->arch.slot_phys[memslot->id];
88 if (!physp) 154 if (!physp)
@@ -164,13 +230,25 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
164 } 230 }
165 231
166 /* Save away the guest's idea of the second HPTE dword */ 232 /* Save away the guest's idea of the second HPTE dword */
167 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); 233 rev = &kvm->arch.revmap[pte_index];
234 if (realmode)
235 rev = real_vmalloc_addr(rev);
168 if (rev) 236 if (rev)
169 rev->guest_rpte = g_ptel; 237 rev->guest_rpte = g_ptel;
238
239 /* Link HPTE into reverse-map chain */
240 if (realmode)
241 rmap = real_vmalloc_addr(rmap);
242 lock_rmap(rmap);
243 kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index, realmode);
244
170 hpte[1] = ptel; 245 hpte[1] = ptel;
246
247 /* Write the first HPTE dword, unlocking the HPTE and making it valid */
171 eieio(); 248 eieio();
172 hpte[0] = pteh; 249 hpte[0] = pteh;
173 asm volatile("ptesync" : : : "memory"); 250 asm volatile("ptesync" : : : "memory");
251
174 vcpu->arch.gpr[4] = pte_index; 252 vcpu->arch.gpr[4] = pte_index;
175 return H_SUCCESS; 253 return H_SUCCESS;
176} 254}
@@ -220,6 +298,8 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
220 vcpu->arch.gpr[4] = v = hpte[0] & ~HPTE_V_HVLOCK; 298 vcpu->arch.gpr[4] = v = hpte[0] & ~HPTE_V_HVLOCK;
221 vcpu->arch.gpr[5] = r = hpte[1]; 299 vcpu->arch.gpr[5] = r = hpte[1];
222 rb = compute_tlbie_rb(v, r, pte_index); 300 rb = compute_tlbie_rb(v, r, pte_index);
301 remove_revmap_chain(kvm, pte_index, v);
302 smp_wmb();
223 hpte[0] = 0; 303 hpte[0] = 0;
224 if (!(flags & H_LOCAL)) { 304 if (!(flags & H_LOCAL)) {
225 while(!try_lock_tlbie(&kvm->arch.tlbie_lock)) 305 while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
@@ -293,6 +373,8 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
293 flags |= (hp[1] >> 5) & 0x0c; 373 flags |= (hp[1] >> 5) & 0x0c;
294 args[i * 2] = ((0x80 | flags) << 56) + pte_index; 374 args[i * 2] = ((0x80 | flags) << 56) + pte_index;
295 tlbrb[n_inval++] = compute_tlbie_rb(hp[0], hp[1], pte_index); 375 tlbrb[n_inval++] = compute_tlbie_rb(hp[0], hp[1], pte_index);
376 remove_revmap_chain(kvm, pte_index, hp[0]);
377 smp_wmb();
296 hp[0] = 0; 378 hp[0] = 0;
297 } 379 }
298 if (n_inval == 0) 380 if (n_inval == 0)