aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@ozlabs.org>2017-01-30 05:21:48 -0500
committerMichael Ellerman <mpe@ellerman.id.au>2017-01-31 03:11:50 -0500
commit8f7b79b8379a85fb8dd0c3f42d9f452ec5552161 (patch)
tree017e423360269c9b86e218719a7ad5c237e9c88a
parent01756099e0a5f431bbada9693d566269acfb51f9 (diff)
KVM: PPC: Book3S HV: Implement dirty page logging for radix guests
This adds code to keep track of dirty pages when requested (that is, when memslot->dirty_bitmap is non-NULL) for radix guests. We use the dirty bits in the PTEs in the second-level (partition-scoped) page tables, together with a bitmap of pages that were dirty when their PTE was invalidated (e.g., when the page was paged out). This bitmap is stored in the first half of the memslot->dirty_bitmap area, and kvm_vm_ioctl_get_dirty_log_hv() now uses the second half for the bitmap that gets returned to userspace. Signed-off-by: Paul Mackerras <paulus@ozlabs.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h7
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c28
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_radix.c111
-rw-r--r--arch/powerpc/kvm/book3s_hv.c31
4 files changed, 144 insertions, 33 deletions
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 952cc4b954a1..57dc407cec4a 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -198,6 +198,8 @@ extern int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
198 unsigned long gfn); 198 unsigned long gfn);
199extern int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, 199extern int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
200 unsigned long gfn); 200 unsigned long gfn);
201extern long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
202 struct kvm_memory_slot *memslot, unsigned long *map);
201 203
202/* XXX remove this export when load_last_inst() is generic */ 204/* XXX remove this export when load_last_inst() is generic */
203extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); 205extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
@@ -228,8 +230,11 @@ extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
228extern long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, 230extern long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
229 unsigned long pte_index, unsigned long avpn, 231 unsigned long pte_index, unsigned long avpn,
230 unsigned long *hpret); 232 unsigned long *hpret);
231extern long kvmppc_hv_get_dirty_log(struct kvm *kvm, 233extern long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm,
232 struct kvm_memory_slot *memslot, unsigned long *map); 234 struct kvm_memory_slot *memslot, unsigned long *map);
235extern void kvmppc_harvest_vpa_dirty(struct kvmppc_vpa *vpa,
236 struct kvm_memory_slot *memslot,
237 unsigned long *map);
233extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, 238extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr,
234 unsigned long mask); 239 unsigned long mask);
235extern void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr); 240extern void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 088c82bb7ba4..c9e587a2849d 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -1068,7 +1068,7 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
1068 return npages_dirty; 1068 return npages_dirty;
1069} 1069}
1070 1070
1071static void harvest_vpa_dirty(struct kvmppc_vpa *vpa, 1071void kvmppc_harvest_vpa_dirty(struct kvmppc_vpa *vpa,
1072 struct kvm_memory_slot *memslot, 1072 struct kvm_memory_slot *memslot,
1073 unsigned long *map) 1073 unsigned long *map)
1074{ 1074{
@@ -1086,12 +1086,11 @@ static void harvest_vpa_dirty(struct kvmppc_vpa *vpa,
1086 __set_bit_le(gfn - memslot->base_gfn, map); 1086 __set_bit_le(gfn - memslot->base_gfn, map);
1087} 1087}
1088 1088
1089long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, 1089long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm,
1090 unsigned long *map) 1090 struct kvm_memory_slot *memslot, unsigned long *map)
1091{ 1091{
1092 unsigned long i, j; 1092 unsigned long i, j;
1093 unsigned long *rmapp; 1093 unsigned long *rmapp;
1094 struct kvm_vcpu *vcpu;
1095 1094
1096 preempt_disable(); 1095 preempt_disable();
1097 rmapp = memslot->arch.rmap; 1096 rmapp = memslot->arch.rmap;
@@ -1107,15 +1106,6 @@ long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot,
1107 __set_bit_le(j, map); 1106 __set_bit_le(j, map);
1108 ++rmapp; 1107 ++rmapp;
1109 } 1108 }
1110
1111 /* Harvest dirty bits from VPA and DTL updates */
1112 /* Note: we never modify the SLB shadow buffer areas */
1113 kvm_for_each_vcpu(i, vcpu, kvm) {
1114 spin_lock(&vcpu->arch.vpa_update_lock);
1115 harvest_vpa_dirty(&vcpu->arch.vpa, memslot, map);
1116 harvest_vpa_dirty(&vcpu->arch.dtl, memslot, map);
1117 spin_unlock(&vcpu->arch.vpa_update_lock);
1118 }
1119 preempt_enable(); 1109 preempt_enable();
1120 return 0; 1110 return 0;
1121} 1111}
@@ -1170,10 +1160,14 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
1170 srcu_idx = srcu_read_lock(&kvm->srcu); 1160 srcu_idx = srcu_read_lock(&kvm->srcu);
1171 memslot = gfn_to_memslot(kvm, gfn); 1161 memslot = gfn_to_memslot(kvm, gfn);
1172 if (memslot) { 1162 if (memslot) {
1173 rmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; 1163 if (!kvm_is_radix(kvm)) {
1174 lock_rmap(rmap); 1164 rmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
1175 *rmap |= KVMPPC_RMAP_CHANGED; 1165 lock_rmap(rmap);
1176 unlock_rmap(rmap); 1166 *rmap |= KVMPPC_RMAP_CHANGED;
1167 unlock_rmap(rmap);
1168 } else if (memslot->dirty_bitmap) {
1169 mark_page_dirty(kvm, gfn);
1170 }
1177 } 1171 }
1178 srcu_read_unlock(&kvm->srcu, srcu_idx); 1172 srcu_read_unlock(&kvm->srcu, srcu_idx);
1179} 1173}
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 69cabadc121a..125cc7ce1525 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -158,18 +158,21 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
158 asm volatile("ptesync": : :"memory"); 158 asm volatile("ptesync": : :"memory");
159} 159}
160 160
161void kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep, unsigned long clr, 161unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep,
162 unsigned long set, unsigned long addr, 162 unsigned long clr, unsigned long set,
163 unsigned int shift) 163 unsigned long addr, unsigned int shift)
164{ 164{
165 unsigned long old = 0;
166
165 if (!(clr & _PAGE_PRESENT) && cpu_has_feature(CPU_FTR_POWER9_DD1) && 167 if (!(clr & _PAGE_PRESENT) && cpu_has_feature(CPU_FTR_POWER9_DD1) &&
166 pte_present(*ptep)) { 168 pte_present(*ptep)) {
167 /* have to invalidate it first */ 169 /* have to invalidate it first */
168 __radix_pte_update(ptep, _PAGE_PRESENT, 0); 170 old = __radix_pte_update(ptep, _PAGE_PRESENT, 0);
169 kvmppc_radix_tlbie_page(kvm, addr, shift); 171 kvmppc_radix_tlbie_page(kvm, addr, shift);
170 set |= _PAGE_PRESENT; 172 set |= _PAGE_PRESENT;
173 old &= _PAGE_PRESENT;
171 } 174 }
172 __radix_pte_update(ptep, clr, set); 175 return __radix_pte_update(ptep, clr, set) | old;
173} 176}
174 177
175void kvmppc_radix_set_pte_at(struct kvm *kvm, unsigned long addr, 178void kvmppc_radix_set_pte_at(struct kvm *kvm, unsigned long addr,
@@ -197,6 +200,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
197 pud_t *pud, *new_pud = NULL; 200 pud_t *pud, *new_pud = NULL;
198 pmd_t *pmd, *new_pmd = NULL; 201 pmd_t *pmd, *new_pmd = NULL;
199 pte_t *ptep, *new_ptep = NULL; 202 pte_t *ptep, *new_ptep = NULL;
203 unsigned long old;
200 int ret; 204 int ret;
201 205
202 /* Traverse the guest's 2nd-level tree, allocate new levels needed */ 206 /* Traverse the guest's 2nd-level tree, allocate new levels needed */
@@ -262,9 +266,11 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
262 ptep = pte_offset_kernel(pmd, gpa); 266 ptep = pte_offset_kernel(pmd, gpa);
263 if (pte_present(*ptep)) { 267 if (pte_present(*ptep)) {
264 /* PTE was previously valid, so invalidate it */ 268 /* PTE was previously valid, so invalidate it */
265 kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, 269 old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT,
266 0, gpa, 0); 270 0, gpa, 0);
267 kvmppc_radix_tlbie_page(kvm, gpa, 0); 271 kvmppc_radix_tlbie_page(kvm, gpa, 0);
272 if (old & _PAGE_DIRTY)
273 mark_page_dirty(kvm, gpa >> PAGE_SHIFT);
268 } 274 }
269 kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte); 275 kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte);
270 } else { 276 } else {
@@ -463,6 +469,26 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
463 return ret; 469 return ret;
464} 470}
465 471
472static void mark_pages_dirty(struct kvm *kvm, struct kvm_memory_slot *memslot,
473 unsigned long gfn, unsigned int order)
474{
475 unsigned long i, limit;
476 unsigned long *dp;
477
478 if (!memslot->dirty_bitmap)
479 return;
480 limit = 1ul << order;
481 if (limit < BITS_PER_LONG) {
482 for (i = 0; i < limit; ++i)
483 mark_page_dirty(kvm, gfn + i);
484 return;
485 }
486 dp = memslot->dirty_bitmap + (gfn - memslot->base_gfn);
487 limit /= BITS_PER_LONG;
488 for (i = 0; i < limit; ++i)
489 *dp++ = ~0ul;
490}
491
466/* Called with kvm->lock held */ 492/* Called with kvm->lock held */
467int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, 493int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
468 unsigned long gfn) 494 unsigned long gfn)
@@ -470,13 +496,21 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
470 pte_t *ptep; 496 pte_t *ptep;
471 unsigned long gpa = gfn << PAGE_SHIFT; 497 unsigned long gpa = gfn << PAGE_SHIFT;
472 unsigned int shift; 498 unsigned int shift;
499 unsigned long old;
473 500
474 ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa, 501 ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa,
475 NULL, &shift); 502 NULL, &shift);
476 if (ptep && pte_present(*ptep)) { 503 if (ptep && pte_present(*ptep)) {
477 kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, 0, 504 old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, 0,
478 gpa, shift); 505 gpa, shift);
479 kvmppc_radix_tlbie_page(kvm, gpa, shift); 506 kvmppc_radix_tlbie_page(kvm, gpa, shift);
507 if (old & _PAGE_DIRTY) {
508 if (!shift)
509 mark_page_dirty(kvm, gfn);
510 else
511 mark_pages_dirty(kvm, memslot,
512 gfn, shift - PAGE_SHIFT);
513 }
480 } 514 }
481 return 0; 515 return 0;
482} 516}
@@ -517,6 +551,65 @@ int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
517 return ref; 551 return ref;
518} 552}
519 553
554/* Returns the number of PAGE_SIZE pages that are dirty */
555static int kvm_radix_test_clear_dirty(struct kvm *kvm,
556 struct kvm_memory_slot *memslot, int pagenum)
557{
558 unsigned long gfn = memslot->base_gfn + pagenum;
559 unsigned long gpa = gfn << PAGE_SHIFT;
560 pte_t *ptep;
561 unsigned int shift;
562 int ret = 0;
563
564 ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa,
565 NULL, &shift);
566 if (ptep && pte_present(*ptep) && pte_dirty(*ptep)) {
567 ret = 1;
568 if (shift)
569 ret = 1 << (shift - PAGE_SHIFT);
570 kvmppc_radix_update_pte(kvm, ptep, _PAGE_DIRTY, 0,
571 gpa, shift);
572 kvmppc_radix_tlbie_page(kvm, gpa, shift);
573 }
574 return ret;
575}
576
577long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
578 struct kvm_memory_slot *memslot, unsigned long *map)
579{
580 unsigned long i, j;
581 unsigned long n, *p;
582 int npages;
583
584 /*
585 * Radix accumulates dirty bits in the first half of the
586 * memslot's dirty_bitmap area, for when pages are paged
587 * out or modified by the host directly. Pick up these
588 * bits and add them to the map.
589 */
590 n = kvm_dirty_bitmap_bytes(memslot) / sizeof(long);
591 p = memslot->dirty_bitmap;
592 for (i = 0; i < n; ++i)
593 map[i] |= xchg(&p[i], 0);
594
595 for (i = 0; i < memslot->npages; i = j) {
596 npages = kvm_radix_test_clear_dirty(kvm, memslot, i);
597
598 /*
599 * Note that if npages > 0 then i must be a multiple of npages,
600 * since huge pages are only used to back the guest at guest
601 * real addresses that are a multiple of their size.
602 * Since we have at most one PTE covering any given guest
603 * real address, if npages > 1 we can skip to i + npages.
604 */
605 j = i + 1;
606 if (npages)
607 for (j = i; npages; ++j, --npages)
608 __set_bit_le(j, map);
609 }
610 return 0;
611}
612
520void kvmppc_free_radix(struct kvm *kvm) 613void kvmppc_free_radix(struct kvm *kvm)
521{ 614{
522 unsigned long ig, iu, im; 615 unsigned long ig, iu, im;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index d50251f9a3c9..401e4cc8a91f 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2961,8 +2961,10 @@ static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm,
2961{ 2961{
2962 struct kvm_memslots *slots; 2962 struct kvm_memslots *slots;
2963 struct kvm_memory_slot *memslot; 2963 struct kvm_memory_slot *memslot;
2964 int r; 2964 int i, r;
2965 unsigned long n; 2965 unsigned long n;
2966 unsigned long *buf;
2967 struct kvm_vcpu *vcpu;
2966 2968
2967 mutex_lock(&kvm->slots_lock); 2969 mutex_lock(&kvm->slots_lock);
2968 2970
@@ -2976,15 +2978,32 @@ static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm,
2976 if (!memslot->dirty_bitmap) 2978 if (!memslot->dirty_bitmap)
2977 goto out; 2979 goto out;
2978 2980
2981 /*
2982 * Use second half of bitmap area because radix accumulates
2983 * bits in the first half.
2984 */
2979 n = kvm_dirty_bitmap_bytes(memslot); 2985 n = kvm_dirty_bitmap_bytes(memslot);
2980 memset(memslot->dirty_bitmap, 0, n); 2986 buf = memslot->dirty_bitmap + n / sizeof(long);
2987 memset(buf, 0, n);
2981 2988
2982 r = kvmppc_hv_get_dirty_log(kvm, memslot, memslot->dirty_bitmap); 2989 if (kvm_is_radix(kvm))
2990 r = kvmppc_hv_get_dirty_log_radix(kvm, memslot, buf);
2991 else
2992 r = kvmppc_hv_get_dirty_log_hpt(kvm, memslot, buf);
2983 if (r) 2993 if (r)
2984 goto out; 2994 goto out;
2985 2995
2996 /* Harvest dirty bits from VPA and DTL updates */
2997 /* Note: we never modify the SLB shadow buffer areas */
2998 kvm_for_each_vcpu(i, vcpu, kvm) {
2999 spin_lock(&vcpu->arch.vpa_update_lock);
3000 kvmppc_harvest_vpa_dirty(&vcpu->arch.vpa, memslot, buf);
3001 kvmppc_harvest_vpa_dirty(&vcpu->arch.dtl, memslot, buf);
3002 spin_unlock(&vcpu->arch.vpa_update_lock);
3003 }
3004
2986 r = -EFAULT; 3005 r = -EFAULT;
2987 if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) 3006 if (copy_to_user(log->dirty_bitmap, buf, n))
2988 goto out; 3007 goto out;
2989 3008
2990 r = 0; 3009 r = 0;
@@ -3037,7 +3056,7 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
3037 if (npages) 3056 if (npages)
3038 atomic64_inc(&kvm->arch.mmio_update); 3057 atomic64_inc(&kvm->arch.mmio_update);
3039 3058
3040 if (npages && old->npages) { 3059 if (npages && old->npages && !kvm_is_radix(kvm)) {
3041 /* 3060 /*
3042 * If modifying a memslot, reset all the rmap dirty bits. 3061 * If modifying a memslot, reset all the rmap dirty bits.
3043 * If this is a new memslot, we don't need to do anything 3062 * If this is a new memslot, we don't need to do anything
@@ -3046,7 +3065,7 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
3046 */ 3065 */
3047 slots = kvm_memslots(kvm); 3066 slots = kvm_memslots(kvm);
3048 memslot = id_to_memslot(slots, mem->slot); 3067 memslot = id_to_memslot(slots, mem->slot);
3049 kvmppc_hv_get_dirty_log(kvm, memslot, NULL); 3068 kvmppc_hv_get_dirty_log_hpt(kvm, memslot, NULL);
3050 } 3069 }
3051} 3070}
3052 3071