diff options
author | Paul Mackerras <paulus@samba.org> | 2013-04-18 15:51:04 -0400 |
---|---|---|
committer | Alexander Graf <agraf@suse.de> | 2013-04-26 14:27:13 -0400 |
commit | c35635efdc0312e013ebda1c8f3b5dd038c0d0e7 (patch) | |
tree | b4a0f054975b1d7530a9d7de85f8ac1b9fe8c60c | |
parent | a1b4a0f6064aacad0d708105e6f60a06e93fbf37 (diff) |
KVM: PPC: Book3S HV: Report VPA and DTL modifications in dirty map
At present, the KVM_GET_DIRTY_LOG ioctl doesn't report modifications
done by the host to the virtual processor areas (VPAs) and dispatch
trace logs (DTLs) registered by the guest. This is because those
modifications are done either in real mode or in the host kernel
context, and in neither case does the access go through the guest's
HPT, and thus no change (C) bit gets set in the guest's HPT.
However, the changes done by the host do need to be tracked so that
the modified pages get transferred when doing live migration. In
order to track these modifications, this adds a dirty flag to the
struct representing the VPA/DTL areas, and arranges to set the flag
when the VPA/DTL gets modified by the host. Then, when we are
collecting the dirty log, we also check the dirty flags for the
VPA and DTL for each vcpu and set the relevant bit in the dirty log
if necessary. Doing this also means we now need to keep track of
the guest physical address of the VPA/DTL areas.
So as not to lose track of modifications to a VPA/DTL area when it gets
unregistered, or when a new area gets registered in its place, we need
to transfer the dirty state to the rmap chain. This adds code to
kvmppc_unpin_guest_page() to do that if the area was dirty. To simplify
that code, we now require that all VPA, DTL and SLB shadow buffer areas
fit within a single host page. Guests already comply with this
requirement because pHyp requires that these areas not cross a 4k
boundary.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
-rw-r--r-- | arch/powerpc/include/asm/kvm_book3s.h | 3 | ||||
-rw-r--r-- | arch/powerpc/include/asm/kvm_host.h | 2 | ||||
-rw-r--r-- | arch/powerpc/kernel/asm-offsets.c | 1 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu_hv.c | 61 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv.c | 30 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_rmhandlers.S | 4 |
6 files changed, 80 insertions, 21 deletions
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index bc81842ea25a..c55f7e6affaa 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h | |||
@@ -156,7 +156,8 @@ void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep, | |||
156 | unsigned long pte_index); | 156 | unsigned long pte_index); |
157 | extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr, | 157 | extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr, |
158 | unsigned long *nb_ret); | 158 | unsigned long *nb_ret); |
159 | extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr); | 159 | extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr, |
160 | unsigned long gpa, bool dirty); | ||
160 | extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, | 161 | extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, |
161 | long pte_index, unsigned long pteh, unsigned long ptel); | 162 | long pte_index, unsigned long pteh, unsigned long ptel); |
162 | extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, | 163 | extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, |
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 8a48e686a755..1443768a6588 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h | |||
@@ -301,11 +301,13 @@ struct kvmppc_vcore { | |||
301 | * that a guest can register. | 301 | * that a guest can register. |
302 | */ | 302 | */ |
303 | struct kvmppc_vpa { | 303 | struct kvmppc_vpa { |
304 | unsigned long gpa; /* Current guest phys addr */ | ||
304 | void *pinned_addr; /* Address in kernel linear mapping */ | 305 | void *pinned_addr; /* Address in kernel linear mapping */ |
305 | void *pinned_end; /* End of region */ | 306 | void *pinned_end; /* End of region */ |
306 | unsigned long next_gpa; /* Guest phys addr for update */ | 307 | unsigned long next_gpa; /* Guest phys addr for update */ |
307 | unsigned long len; /* Number of bytes required */ | 308 | unsigned long len; /* Number of bytes required */ |
308 | u8 update_pending; /* 1 => update pinned_addr from next_gpa */ | 309 | u8 update_pending; /* 1 => update pinned_addr from next_gpa */ |
310 | bool dirty; /* true => area has been modified by kernel */ | ||
309 | }; | 311 | }; |
310 | 312 | ||
311 | struct kvmppc_pte { | 313 | struct kvmppc_pte { |
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index d87c90886c75..dbfd5498f440 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c | |||
@@ -477,6 +477,7 @@ int main(void) | |||
477 | DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr)); | 477 | DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr)); |
478 | DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); | 478 | DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); |
479 | DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr)); | 479 | DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr)); |
480 | DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty)); | ||
480 | #endif | 481 | #endif |
481 | #ifdef CONFIG_PPC_BOOK3S | 482 | #ifdef CONFIG_PPC_BOOK3S |
482 | DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); | 483 | DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); |
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index d641a6634b02..69efe0d6cedc 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c | |||
@@ -1099,11 +1099,30 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp) | |||
1099 | return ret; | 1099 | return ret; |
1100 | } | 1100 | } |
1101 | 1101 | ||
1102 | static void harvest_vpa_dirty(struct kvmppc_vpa *vpa, | ||
1103 | struct kvm_memory_slot *memslot, | ||
1104 | unsigned long *map) | ||
1105 | { | ||
1106 | unsigned long gfn; | ||
1107 | |||
1108 | if (!vpa->dirty || !vpa->pinned_addr) | ||
1109 | return; | ||
1110 | gfn = vpa->gpa >> PAGE_SHIFT; | ||
1111 | if (gfn < memslot->base_gfn || | ||
1112 | gfn >= memslot->base_gfn + memslot->npages) | ||
1113 | return; | ||
1114 | |||
1115 | vpa->dirty = false; | ||
1116 | if (map) | ||
1117 | __set_bit_le(gfn - memslot->base_gfn, map); | ||
1118 | } | ||
1119 | |||
1102 | long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, | 1120 | long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, |
1103 | unsigned long *map) | 1121 | unsigned long *map) |
1104 | { | 1122 | { |
1105 | unsigned long i; | 1123 | unsigned long i; |
1106 | unsigned long *rmapp; | 1124 | unsigned long *rmapp; |
1125 | struct kvm_vcpu *vcpu; | ||
1107 | 1126 | ||
1108 | preempt_disable(); | 1127 | preempt_disable(); |
1109 | rmapp = memslot->arch.rmap; | 1128 | rmapp = memslot->arch.rmap; |
@@ -1112,6 +1131,15 @@ long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, | |||
1112 | __set_bit_le(i, map); | 1131 | __set_bit_le(i, map); |
1113 | ++rmapp; | 1132 | ++rmapp; |
1114 | } | 1133 | } |
1134 | |||
1135 | /* Harvest dirty bits from VPA and DTL updates */ | ||
1136 | /* Note: we never modify the SLB shadow buffer areas */ | ||
1137 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
1138 | spin_lock(&vcpu->arch.vpa_update_lock); | ||
1139 | harvest_vpa_dirty(&vcpu->arch.vpa, memslot, map); | ||
1140 | harvest_vpa_dirty(&vcpu->arch.dtl, memslot, map); | ||
1141 | spin_unlock(&vcpu->arch.vpa_update_lock); | ||
1142 | } | ||
1115 | preempt_enable(); | 1143 | preempt_enable(); |
1116 | return 0; | 1144 | return 0; |
1117 | } | 1145 | } |
@@ -1123,7 +1151,7 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, | |||
1123 | unsigned long gfn = gpa >> PAGE_SHIFT; | 1151 | unsigned long gfn = gpa >> PAGE_SHIFT; |
1124 | struct page *page, *pages[1]; | 1152 | struct page *page, *pages[1]; |
1125 | int npages; | 1153 | int npages; |
1126 | unsigned long hva, psize, offset; | 1154 | unsigned long hva, offset; |
1127 | unsigned long pa; | 1155 | unsigned long pa; |
1128 | unsigned long *physp; | 1156 | unsigned long *physp; |
1129 | int srcu_idx; | 1157 | int srcu_idx; |
@@ -1155,14 +1183,9 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, | |||
1155 | } | 1183 | } |
1156 | srcu_read_unlock(&kvm->srcu, srcu_idx); | 1184 | srcu_read_unlock(&kvm->srcu, srcu_idx); |
1157 | 1185 | ||
1158 | psize = PAGE_SIZE; | 1186 | offset = gpa & (PAGE_SIZE - 1); |
1159 | if (PageHuge(page)) { | ||
1160 | page = compound_head(page); | ||
1161 | psize <<= compound_order(page); | ||
1162 | } | ||
1163 | offset = gpa & (psize - 1); | ||
1164 | if (nb_ret) | 1187 | if (nb_ret) |
1165 | *nb_ret = psize - offset; | 1188 | *nb_ret = PAGE_SIZE - offset; |
1166 | return page_address(page) + offset; | 1189 | return page_address(page) + offset; |
1167 | 1190 | ||
1168 | err: | 1191 | err: |
@@ -1170,11 +1193,31 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, | |||
1170 | return NULL; | 1193 | return NULL; |
1171 | } | 1194 | } |
1172 | 1195 | ||
1173 | void kvmppc_unpin_guest_page(struct kvm *kvm, void *va) | 1196 | void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa, |
1197 | bool dirty) | ||
1174 | { | 1198 | { |
1175 | struct page *page = virt_to_page(va); | 1199 | struct page *page = virt_to_page(va); |
1200 | struct kvm_memory_slot *memslot; | ||
1201 | unsigned long gfn; | ||
1202 | unsigned long *rmap; | ||
1203 | int srcu_idx; | ||
1176 | 1204 | ||
1177 | put_page(page); | 1205 | put_page(page); |
1206 | |||
1207 | if (!dirty || !kvm->arch.using_mmu_notifiers) | ||
1208 | return; | ||
1209 | |||
1210 | /* We need to mark this page dirty in the rmap chain */ | ||
1211 | gfn = gpa >> PAGE_SHIFT; | ||
1212 | srcu_idx = srcu_read_lock(&kvm->srcu); | ||
1213 | memslot = gfn_to_memslot(kvm, gfn); | ||
1214 | if (memslot) { | ||
1215 | rmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; | ||
1216 | lock_rmap(rmap); | ||
1217 | *rmap |= KVMPPC_RMAP_CHANGED; | ||
1218 | unlock_rmap(rmap); | ||
1219 | } | ||
1220 | srcu_read_unlock(&kvm->srcu, srcu_idx); | ||
1178 | } | 1221 | } |
1179 | 1222 | ||
1180 | /* | 1223 | /* |
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 1e521baf9a7d..5af0f2979833 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c | |||
@@ -259,7 +259,7 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, | |||
259 | len = ((struct reg_vpa *)va)->length.hword; | 259 | len = ((struct reg_vpa *)va)->length.hword; |
260 | else | 260 | else |
261 | len = ((struct reg_vpa *)va)->length.word; | 261 | len = ((struct reg_vpa *)va)->length.word; |
262 | kvmppc_unpin_guest_page(kvm, va); | 262 | kvmppc_unpin_guest_page(kvm, va, vpa, false); |
263 | 263 | ||
264 | /* Check length */ | 264 | /* Check length */ |
265 | if (len > nb || len < sizeof(struct reg_vpa)) | 265 | if (len > nb || len < sizeof(struct reg_vpa)) |
@@ -359,13 +359,13 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap) | |||
359 | va = NULL; | 359 | va = NULL; |
360 | nb = 0; | 360 | nb = 0; |
361 | if (gpa) | 361 | if (gpa) |
362 | va = kvmppc_pin_guest_page(kvm, vpap->next_gpa, &nb); | 362 | va = kvmppc_pin_guest_page(kvm, gpa, &nb); |
363 | spin_lock(&vcpu->arch.vpa_update_lock); | 363 | spin_lock(&vcpu->arch.vpa_update_lock); |
364 | if (gpa == vpap->next_gpa) | 364 | if (gpa == vpap->next_gpa) |
365 | break; | 365 | break; |
366 | /* sigh... unpin that one and try again */ | 366 | /* sigh... unpin that one and try again */ |
367 | if (va) | 367 | if (va) |
368 | kvmppc_unpin_guest_page(kvm, va); | 368 | kvmppc_unpin_guest_page(kvm, va, gpa, false); |
369 | } | 369 | } |
370 | 370 | ||
371 | vpap->update_pending = 0; | 371 | vpap->update_pending = 0; |
@@ -375,12 +375,15 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap) | |||
375 | * has changed the mappings underlying guest memory, | 375 | * has changed the mappings underlying guest memory, |
376 | * so unregister the region. | 376 | * so unregister the region. |
377 | */ | 377 | */ |
378 | kvmppc_unpin_guest_page(kvm, va); | 378 | kvmppc_unpin_guest_page(kvm, va, gpa, false); |
379 | va = NULL; | 379 | va = NULL; |
380 | } | 380 | } |
381 | if (vpap->pinned_addr) | 381 | if (vpap->pinned_addr) |
382 | kvmppc_unpin_guest_page(kvm, vpap->pinned_addr); | 382 | kvmppc_unpin_guest_page(kvm, vpap->pinned_addr, vpap->gpa, |
383 | vpap->dirty); | ||
384 | vpap->gpa = gpa; | ||
383 | vpap->pinned_addr = va; | 385 | vpap->pinned_addr = va; |
386 | vpap->dirty = false; | ||
384 | if (va) | 387 | if (va) |
385 | vpap->pinned_end = va + vpap->len; | 388 | vpap->pinned_end = va + vpap->len; |
386 | } | 389 | } |
@@ -472,6 +475,7 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, | |||
472 | /* order writing *dt vs. writing vpa->dtl_idx */ | 475 | /* order writing *dt vs. writing vpa->dtl_idx */ |
473 | smp_wmb(); | 476 | smp_wmb(); |
474 | vpa->dtl_idx = ++vcpu->arch.dtl_index; | 477 | vpa->dtl_idx = ++vcpu->arch.dtl_index; |
478 | vcpu->arch.dtl.dirty = true; | ||
475 | } | 479 | } |
476 | 480 | ||
477 | int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) | 481 | int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) |
@@ -913,15 +917,19 @@ out: | |||
913 | return ERR_PTR(err); | 917 | return ERR_PTR(err); |
914 | } | 918 | } |
915 | 919 | ||
920 | static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa) | ||
921 | { | ||
922 | if (vpa->pinned_addr) | ||
923 | kvmppc_unpin_guest_page(kvm, vpa->pinned_addr, vpa->gpa, | ||
924 | vpa->dirty); | ||
925 | } | ||
926 | |||
916 | void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) | 927 | void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) |
917 | { | 928 | { |
918 | spin_lock(&vcpu->arch.vpa_update_lock); | 929 | spin_lock(&vcpu->arch.vpa_update_lock); |
919 | if (vcpu->arch.dtl.pinned_addr) | 930 | unpin_vpa(vcpu->kvm, &vcpu->arch.dtl); |
920 | kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl.pinned_addr); | 931 | unpin_vpa(vcpu->kvm, &vcpu->arch.slb_shadow); |
921 | if (vcpu->arch.slb_shadow.pinned_addr) | 932 | unpin_vpa(vcpu->kvm, &vcpu->arch.vpa); |
922 | kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow.pinned_addr); | ||
923 | if (vcpu->arch.vpa.pinned_addr) | ||
924 | kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa.pinned_addr); | ||
925 | spin_unlock(&vcpu->arch.vpa_update_lock); | 933 | spin_unlock(&vcpu->arch.vpa_update_lock); |
926 | kvm_vcpu_uninit(vcpu); | 934 | kvm_vcpu_uninit(vcpu); |
927 | kmem_cache_free(kvm_vcpu_cache, vcpu); | 935 | kmem_cache_free(kvm_vcpu_cache, vcpu); |
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index e33d11f1b977..0f23bb851711 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S | |||
@@ -260,6 +260,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) | |||
260 | lwz r5, LPPACA_YIELDCOUNT(r3) | 260 | lwz r5, LPPACA_YIELDCOUNT(r3) |
261 | addi r5, r5, 1 | 261 | addi r5, r5, 1 |
262 | stw r5, LPPACA_YIELDCOUNT(r3) | 262 | stw r5, LPPACA_YIELDCOUNT(r3) |
263 | li r6, 1 | ||
264 | stb r6, VCPU_VPA_DIRTY(r4) | ||
263 | 25: | 265 | 25: |
264 | /* Load up DAR and DSISR */ | 266 | /* Load up DAR and DSISR */ |
265 | ld r5, VCPU_DAR(r4) | 267 | ld r5, VCPU_DAR(r4) |
@@ -1018,6 +1020,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) | |||
1018 | lwz r3, LPPACA_YIELDCOUNT(r8) | 1020 | lwz r3, LPPACA_YIELDCOUNT(r8) |
1019 | addi r3, r3, 1 | 1021 | addi r3, r3, 1 |
1020 | stw r3, LPPACA_YIELDCOUNT(r8) | 1022 | stw r3, LPPACA_YIELDCOUNT(r8) |
1023 | li r3, 1 | ||
1024 | stb r3, VCPU_VPA_DIRTY(r9) | ||
1021 | 25: | 1025 | 25: |
1022 | /* Save PMU registers if requested */ | 1026 | /* Save PMU registers if requested */ |
1023 | /* r8 and cr0.eq are live here */ | 1027 | /* r8 and cr0.eq are live here */ |