diff options
author | Paul Mackerras <paulus@samba.org> | 2013-09-20 00:52:48 -0400 |
---|---|---|
committer | Alexander Graf <agraf@suse.de> | 2013-10-17 08:45:04 -0400 |
commit | 9308ab8e2da933d895ebbb903bf459e33ed94dec (patch) | |
tree | 7b0b1dd03688fa0396a283ba53c29084dcb69eb0 /arch/powerpc/kvm | |
parent | 5cd92a9521d35013fff904d3c805323027a33d73 (diff) |
KVM: PPC: Book3S PR: Make HPT accesses and updates SMP-safe
This adds a per-VM mutex to provide mutual exclusion between vcpus
for accesses to and updates of the guest hashed page table (HPT).
This also makes the code use single-byte writes to the HPT entry
when updating of the reference (R) and change (C) bits. The reason
for doing this, rather than writing back the whole HPTE, is that on
non-PAPR virtual machines, the guest OS might be writing to the HPTE
concurrently, and writing back the whole HPTE might conflict with
that. Also, real hardware does single-byte writes to update R and C.
The new mutex is taken in kvmppc_mmu_book3s_64_xlate() when reading
the HPT and updating R and/or C, and in the PAPR HPT update hcalls
(H_ENTER, H_REMOVE, etc.). Having the mutex means that we don't need
to use a hypervisor lock bit in the HPT update hcalls, and we don't
need to be careful about the order in which the bytes of the HPTE are
updated by those hcalls.
The other change here is to make emulated TLB invalidations (tlbie)
effective across all vcpus. To do this we call kvmppc_mmu_pte_vflush
for all vcpus in kvmppc_ppc_book3s_64_tlbie().
For 32-bit, this makes the setting of the accessed and dirty bits use
single-byte writes, and makes tlbie invalidate shadow HPTEs for all
vcpus.
With this, PR KVM can successfully run SMP guests.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r-- | arch/powerpc/kvm/book3s_32_mmu.c | 36 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu.c | 33 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_pr.c | 1 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_pr_papr.c | 33 |
4 files changed, 69 insertions, 34 deletions
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c index af045533e685..856af988ad59 100644 --- a/arch/powerpc/kvm/book3s_32_mmu.c +++ b/arch/powerpc/kvm/book3s_32_mmu.c | |||
@@ -271,19 +271,22 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr, | |||
271 | /* Update PTE C and A bits, so the guest's swapper knows we used the | 271 | /* Update PTE C and A bits, so the guest's swapper knows we used the |
272 | page */ | 272 | page */ |
273 | if (found) { | 273 | if (found) { |
274 | u32 oldpte = pteg[i+1]; | 274 | u32 pte_r = pteg[i+1]; |
275 | 275 | char __user *addr = (char __user *) &pteg[i+1]; | |
276 | if (pte->may_read) | 276 | |
277 | pteg[i+1] |= PTEG_FLAG_ACCESSED; | 277 | /* |
278 | if (pte->may_write) | 278 | * Use single-byte writes to update the HPTE, to |
279 | pteg[i+1] |= PTEG_FLAG_DIRTY; | 279 | * conform to what real hardware does. |
280 | else | 280 | */ |
281 | dprintk_pte("KVM: Mapping read-only page!\n"); | 281 | if (pte->may_read && !(pte_r & PTEG_FLAG_ACCESSED)) { |
282 | 282 | pte_r |= PTEG_FLAG_ACCESSED; | |
283 | /* Write back into the PTEG */ | 283 | put_user(pte_r >> 8, addr + 2); |
284 | if (pteg[i+1] != oldpte) | 284 | } |
285 | copy_to_user((void __user *)ptegp, pteg, sizeof(pteg)); | 285 | if (pte->may_write && !(pte_r & PTEG_FLAG_DIRTY)) { |
286 | 286 | /* XXX should only set this for stores */ | |
287 | pte_r |= PTEG_FLAG_DIRTY; | ||
288 | put_user(pte_r, addr + 3); | ||
289 | } | ||
287 | return 0; | 290 | return 0; |
288 | } | 291 | } |
289 | 292 | ||
@@ -348,7 +351,12 @@ static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu *vcpu, u32 srnum, | |||
348 | 351 | ||
349 | static void kvmppc_mmu_book3s_32_tlbie(struct kvm_vcpu *vcpu, ulong ea, bool large) | 352 | static void kvmppc_mmu_book3s_32_tlbie(struct kvm_vcpu *vcpu, ulong ea, bool large) |
350 | { | 353 | { |
351 | kvmppc_mmu_pte_flush(vcpu, ea, 0x0FFFF000); | 354 | int i; |
355 | struct kvm_vcpu *v; | ||
356 | |||
357 | /* flush this VA on all cpus */ | ||
358 | kvm_for_each_vcpu(i, v, vcpu->kvm) | ||
359 | kvmppc_mmu_pte_flush(v, ea, 0x0FFFF000); | ||
352 | } | 360 | } |
353 | 361 | ||
354 | static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, | 362 | static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, |
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index 9e6e11270b70..ad9ecfd29c4c 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c | |||
@@ -257,6 +257,8 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, | |||
257 | 257 | ||
258 | pgsize = slbe->large ? MMU_PAGE_16M : MMU_PAGE_4K; | 258 | pgsize = slbe->large ? MMU_PAGE_16M : MMU_PAGE_4K; |
259 | 259 | ||
260 | mutex_lock(&vcpu->kvm->arch.hpt_mutex); | ||
261 | |||
260 | do_second: | 262 | do_second: |
261 | ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second); | 263 | ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second); |
262 | if (kvm_is_error_hva(ptegp)) | 264 | if (kvm_is_error_hva(ptegp)) |
@@ -332,30 +334,37 @@ do_second: | |||
332 | 334 | ||
333 | /* Update PTE R and C bits, so the guest's swapper knows we used the | 335 | /* Update PTE R and C bits, so the guest's swapper knows we used the |
334 | * page */ | 336 | * page */ |
335 | if (gpte->may_read) { | 337 | if (gpte->may_read && !(r & HPTE_R_R)) { |
336 | /* Set the accessed flag */ | 338 | /* |
339 | * Set the accessed flag. | ||
340 | * We have to write this back with a single byte write | ||
341 | * because another vcpu may be accessing this on | ||
342 | * non-PAPR platforms such as mac99, and this is | ||
343 | * what real hardware does. | ||
344 | */ | ||
345 | char __user *addr = (char __user *) &pteg[i+1]; | ||
337 | r |= HPTE_R_R; | 346 | r |= HPTE_R_R; |
347 | put_user(r >> 8, addr + 6); | ||
338 | } | 348 | } |
339 | if (data && gpte->may_write) { | 349 | if (data && gpte->may_write && !(r & HPTE_R_C)) { |
340 | /* Set the dirty flag -- XXX even if not writing */ | 350 | /* Set the dirty flag -- XXX even if not writing */ |
351 | /* Use a single byte write */ | ||
352 | char __user *addr = (char __user *) &pteg[i+1]; | ||
341 | r |= HPTE_R_C; | 353 | r |= HPTE_R_C; |
354 | put_user(r, addr + 7); | ||
342 | } | 355 | } |
343 | 356 | ||
344 | /* Write back into the PTEG */ | 357 | mutex_unlock(&vcpu->kvm->arch.hpt_mutex); |
345 | if (pteg[i+1] != r) { | ||
346 | pteg[i+1] = r; | ||
347 | copy_to_user((void __user *)ptegp, pteg, sizeof(pteg)); | ||
348 | } | ||
349 | 358 | ||
350 | if (!gpte->may_read) | 359 | if (!gpte->may_read) |
351 | return -EPERM; | 360 | return -EPERM; |
352 | return 0; | 361 | return 0; |
353 | 362 | ||
354 | no_page_found: | 363 | no_page_found: |
364 | mutex_unlock(&vcpu->kvm->arch.hpt_mutex); | ||
355 | return -ENOENT; | 365 | return -ENOENT; |
356 | 366 | ||
357 | no_seg_found: | 367 | no_seg_found: |
358 | |||
359 | dprintk("KVM MMU: Trigger segment fault\n"); | 368 | dprintk("KVM MMU: Trigger segment fault\n"); |
360 | return -EINVAL; | 369 | return -EINVAL; |
361 | } | 370 | } |
@@ -520,6 +529,8 @@ static void kvmppc_mmu_book3s_64_tlbie(struct kvm_vcpu *vcpu, ulong va, | |||
520 | bool large) | 529 | bool large) |
521 | { | 530 | { |
522 | u64 mask = 0xFFFFFFFFFULL; | 531 | u64 mask = 0xFFFFFFFFFULL; |
532 | long i; | ||
533 | struct kvm_vcpu *v; | ||
523 | 534 | ||
524 | dprintk("KVM MMU: tlbie(0x%lx)\n", va); | 535 | dprintk("KVM MMU: tlbie(0x%lx)\n", va); |
525 | 536 | ||
@@ -542,7 +553,9 @@ static void kvmppc_mmu_book3s_64_tlbie(struct kvm_vcpu *vcpu, ulong va, | |||
542 | if (large) | 553 | if (large) |
543 | mask = 0xFFFFFF000ULL; | 554 | mask = 0xFFFFFF000ULL; |
544 | } | 555 | } |
545 | kvmppc_mmu_pte_vflush(vcpu, va >> 12, mask); | 556 | /* flush this VA on all vcpus */ |
557 | kvm_for_each_vcpu(i, v, vcpu->kvm) | ||
558 | kvmppc_mmu_pte_vflush(v, va >> 12, mask); | ||
546 | } | 559 | } |
547 | 560 | ||
548 | #ifdef CONFIG_PPC_64K_PAGES | 561 | #ifdef CONFIG_PPC_64K_PAGES |
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index e9e8c748e673..4fa73c3f5713 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c | |||
@@ -1422,6 +1422,7 @@ int kvmppc_core_init_vm(struct kvm *kvm) | |||
1422 | INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); | 1422 | INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); |
1423 | INIT_LIST_HEAD(&kvm->arch.rtas_tokens); | 1423 | INIT_LIST_HEAD(&kvm->arch.rtas_tokens); |
1424 | #endif | 1424 | #endif |
1425 | mutex_init(&kvm->arch.hpt_mutex); | ||
1425 | 1426 | ||
1426 | if (firmware_has_feature(FW_FEATURE_SET_MODE)) { | 1427 | if (firmware_has_feature(FW_FEATURE_SET_MODE)) { |
1427 | spin_lock(&kvm_global_user_count_lock); | 1428 | spin_lock(&kvm_global_user_count_lock); |
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index 38f189975fe1..5efa97b993d8 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c | |||
@@ -48,6 +48,7 @@ static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu) | |||
48 | pte_index &= ~7UL; | 48 | pte_index &= ~7UL; |
49 | pteg_addr = get_pteg_addr(vcpu, pte_index); | 49 | pteg_addr = get_pteg_addr(vcpu, pte_index); |
50 | 50 | ||
51 | mutex_lock(&vcpu->kvm->arch.hpt_mutex); | ||
51 | copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg)); | 52 | copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg)); |
52 | hpte = pteg; | 53 | hpte = pteg; |
53 | 54 | ||
@@ -74,6 +75,7 @@ static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu) | |||
74 | ret = H_SUCCESS; | 75 | ret = H_SUCCESS; |
75 | 76 | ||
76 | done: | 77 | done: |
78 | mutex_unlock(&vcpu->kvm->arch.hpt_mutex); | ||
77 | kvmppc_set_gpr(vcpu, 3, ret); | 79 | kvmppc_set_gpr(vcpu, 3, ret); |
78 | 80 | ||
79 | return EMULATE_DONE; | 81 | return EMULATE_DONE; |
@@ -86,26 +88,31 @@ static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu) | |||
86 | unsigned long avpn = kvmppc_get_gpr(vcpu, 6); | 88 | unsigned long avpn = kvmppc_get_gpr(vcpu, 6); |
87 | unsigned long v = 0, pteg, rb; | 89 | unsigned long v = 0, pteg, rb; |
88 | unsigned long pte[2]; | 90 | unsigned long pte[2]; |
91 | long int ret; | ||
89 | 92 | ||
90 | pteg = get_pteg_addr(vcpu, pte_index); | 93 | pteg = get_pteg_addr(vcpu, pte_index); |
94 | mutex_lock(&vcpu->kvm->arch.hpt_mutex); | ||
91 | copy_from_user(pte, (void __user *)pteg, sizeof(pte)); | 95 | copy_from_user(pte, (void __user *)pteg, sizeof(pte)); |
92 | 96 | ||
97 | ret = H_NOT_FOUND; | ||
93 | if ((pte[0] & HPTE_V_VALID) == 0 || | 98 | if ((pte[0] & HPTE_V_VALID) == 0 || |
94 | ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn) || | 99 | ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn) || |
95 | ((flags & H_ANDCOND) && (pte[0] & avpn) != 0)) { | 100 | ((flags & H_ANDCOND) && (pte[0] & avpn) != 0)) |
96 | kvmppc_set_gpr(vcpu, 3, H_NOT_FOUND); | 101 | goto done; |
97 | return EMULATE_DONE; | ||
98 | } | ||
99 | 102 | ||
100 | copy_to_user((void __user *)pteg, &v, sizeof(v)); | 103 | copy_to_user((void __user *)pteg, &v, sizeof(v)); |
101 | 104 | ||
102 | rb = compute_tlbie_rb(pte[0], pte[1], pte_index); | 105 | rb = compute_tlbie_rb(pte[0], pte[1], pte_index); |
103 | vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false); | 106 | vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false); |
104 | 107 | ||
105 | kvmppc_set_gpr(vcpu, 3, H_SUCCESS); | 108 | ret = H_SUCCESS; |
106 | kvmppc_set_gpr(vcpu, 4, pte[0]); | 109 | kvmppc_set_gpr(vcpu, 4, pte[0]); |
107 | kvmppc_set_gpr(vcpu, 5, pte[1]); | 110 | kvmppc_set_gpr(vcpu, 5, pte[1]); |
108 | 111 | ||
112 | done: | ||
113 | mutex_unlock(&vcpu->kvm->arch.hpt_mutex); | ||
114 | kvmppc_set_gpr(vcpu, 3, ret); | ||
115 | |||
109 | return EMULATE_DONE; | 116 | return EMULATE_DONE; |
110 | } | 117 | } |
111 | 118 | ||
@@ -133,6 +140,7 @@ static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu) | |||
133 | int paramnr = 4; | 140 | int paramnr = 4; |
134 | int ret = H_SUCCESS; | 141 | int ret = H_SUCCESS; |
135 | 142 | ||
143 | mutex_lock(&vcpu->kvm->arch.hpt_mutex); | ||
136 | for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) { | 144 | for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) { |
137 | unsigned long tsh = kvmppc_get_gpr(vcpu, paramnr+(2*i)); | 145 | unsigned long tsh = kvmppc_get_gpr(vcpu, paramnr+(2*i)); |
138 | unsigned long tsl = kvmppc_get_gpr(vcpu, paramnr+(2*i)+1); | 146 | unsigned long tsl = kvmppc_get_gpr(vcpu, paramnr+(2*i)+1); |
@@ -181,6 +189,7 @@ static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu) | |||
181 | } | 189 | } |
182 | kvmppc_set_gpr(vcpu, paramnr+(2*i), tsh); | 190 | kvmppc_set_gpr(vcpu, paramnr+(2*i), tsh); |
183 | } | 191 | } |
192 | mutex_unlock(&vcpu->kvm->arch.hpt_mutex); | ||
184 | kvmppc_set_gpr(vcpu, 3, ret); | 193 | kvmppc_set_gpr(vcpu, 3, ret); |
185 | 194 | ||
186 | return EMULATE_DONE; | 195 | return EMULATE_DONE; |
@@ -193,15 +202,16 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu) | |||
193 | unsigned long avpn = kvmppc_get_gpr(vcpu, 6); | 202 | unsigned long avpn = kvmppc_get_gpr(vcpu, 6); |
194 | unsigned long rb, pteg, r, v; | 203 | unsigned long rb, pteg, r, v; |
195 | unsigned long pte[2]; | 204 | unsigned long pte[2]; |
205 | long int ret; | ||
196 | 206 | ||
197 | pteg = get_pteg_addr(vcpu, pte_index); | 207 | pteg = get_pteg_addr(vcpu, pte_index); |
208 | mutex_lock(&vcpu->kvm->arch.hpt_mutex); | ||
198 | copy_from_user(pte, (void __user *)pteg, sizeof(pte)); | 209 | copy_from_user(pte, (void __user *)pteg, sizeof(pte)); |
199 | 210 | ||
211 | ret = H_NOT_FOUND; | ||
200 | if ((pte[0] & HPTE_V_VALID) == 0 || | 212 | if ((pte[0] & HPTE_V_VALID) == 0 || |
201 | ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn)) { | 213 | ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn)) |
202 | kvmppc_set_gpr(vcpu, 3, H_NOT_FOUND); | 214 | goto done; |
203 | return EMULATE_DONE; | ||
204 | } | ||
205 | 215 | ||
206 | v = pte[0]; | 216 | v = pte[0]; |
207 | r = pte[1]; | 217 | r = pte[1]; |
@@ -216,8 +226,11 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu) | |||
216 | rb = compute_tlbie_rb(v, r, pte_index); | 226 | rb = compute_tlbie_rb(v, r, pte_index); |
217 | vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false); | 227 | vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false); |
218 | copy_to_user((void __user *)pteg, pte, sizeof(pte)); | 228 | copy_to_user((void __user *)pteg, pte, sizeof(pte)); |
229 | ret = H_SUCCESS; | ||
219 | 230 | ||
220 | kvmppc_set_gpr(vcpu, 3, H_SUCCESS); | 231 | done: |
232 | mutex_unlock(&vcpu->kvm->arch.hpt_mutex); | ||
233 | kvmppc_set_gpr(vcpu, 3, ret); | ||
221 | 234 | ||
222 | return EMULATE_DONE; | 235 | return EMULATE_DONE; |
223 | } | 236 | } |