summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/virtual/kvm/api.txt67
-rw-r--r--arch/mips/kvm/mips.c23
-rw-r--r--arch/x86/kvm/x86.c27
-rw-r--r--include/linux/kvm_host.h5
-rw-r--r--include/uapi/linux/kvm.h15
-rw-r--r--tools/testing/selftests/kvm/Makefile2
-rw-r--r--tools/testing/selftests/kvm/clear_dirty_log_test.c2
-rw-r--r--tools/testing/selftests/kvm/dirty_log_test.c19
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h2
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c13
-rw-r--r--virt/kvm/arm/arm.c16
-rw-r--r--virt/kvm/kvm_main.c132
12 files changed, 306 insertions, 17 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 1071c10cf1c7..f2c345f7b630 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -305,6 +305,9 @@ the address space for which you want to return the dirty bitmap.
305They must be less than the value that KVM_CHECK_EXTENSION returns for 305They must be less than the value that KVM_CHECK_EXTENSION returns for
306the KVM_CAP_MULTI_ADDRESS_SPACE capability. 306the KVM_CAP_MULTI_ADDRESS_SPACE capability.
307 307
308The bits in the dirty bitmap are cleared before the ioctl returns, unless
309KVM_CAP_MANUAL_DIRTY_LOG_PROTECT is enabled. For more information,
310see the description of the capability.
308 311
3094.9 KVM_SET_MEMORY_ALIAS 3124.9 KVM_SET_MEMORY_ALIAS
310 313
@@ -3758,6 +3761,46 @@ Coalesced pio is based on coalesced mmio. There is little difference
3758between coalesced mmio and pio except that coalesced pio records accesses 3761between coalesced mmio and pio except that coalesced pio records accesses
3759to I/O ports. 3762to I/O ports.
3760 3763
37644.117 KVM_CLEAR_DIRTY_LOG (vm ioctl)
3765
3766Capability: KVM_CAP_MANUAL_DIRTY_LOG_PROTECT
3767Architectures: x86
3768Type: vm ioctl
3769Parameters: struct kvm_dirty_log (in)
3770Returns: 0 on success, -1 on error
3771
3772/* for KVM_CLEAR_DIRTY_LOG */
3773struct kvm_clear_dirty_log {
3774 __u32 slot;
3775 __u32 num_pages;
3776 __u64 first_page;
3777 union {
3778 void __user *dirty_bitmap; /* one bit per page */
3779 __u64 padding;
3780 };
3781};
3782
3783The ioctl clears the dirty status of pages in a memory slot, according to
3784the bitmap that is passed in struct kvm_clear_dirty_log's dirty_bitmap
3785field. Bit 0 of the bitmap corresponds to page "first_page" in the
3786memory slot, and num_pages is the size in bits of the input bitmap.
3787Both first_page and num_pages must be a multiple of 64. For each bit
3788that is set in the input bitmap, the corresponding page is marked "clean"
3789in KVM's dirty bitmap, and dirty tracking is re-enabled for that page
3790(for example via write-protection, or by clearing the dirty bit in
3791a page table entry).
3792
3793If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 specifies
3794the address space for which you want to return the dirty bitmap.
3795They must be less than the value that KVM_CHECK_EXTENSION returns for
3796the KVM_CAP_MULTI_ADDRESS_SPACE capability.
3797
3798This ioctl is mostly useful when KVM_CAP_MANUAL_DIRTY_LOG_PROTECT
3799is enabled; for more information, see the description of the capability.
3800However, it can always be used as long as KVM_CHECK_EXTENSION confirms
3801that KVM_CAP_MANUAL_DIRTY_LOG_PROTECT is present.
3802
3803
37615. The kvm_run structure 38045. The kvm_run structure
3762------------------------ 3805------------------------
3763 3806
@@ -4652,6 +4695,30 @@ and injected exceptions.
4652* For the new DR6 bits, note that bit 16 is set iff the #DB exception 4695* For the new DR6 bits, note that bit 16 is set iff the #DB exception
4653 will clear DR6.RTM. 4696 will clear DR6.RTM.
4654 4697
46987.18 KVM_CAP_MANUAL_DIRTY_LOG_PROTECT
4699
4700Architectures: all
4701Parameters: args[0] whether feature should be enabled or not
4702
4703With this capability enabled, KVM_GET_DIRTY_LOG will not automatically
4704clear and write-protect all pages that are returned as dirty.
4705Rather, userspace will have to do this operation separately using
4706KVM_CLEAR_DIRTY_LOG.
4707
4708At the cost of a slightly more complicated operation, this provides better
4709scalability and responsiveness for two reasons. First,
4710KVM_CLEAR_DIRTY_LOG ioctl can operate on a 64-page granularity rather
4711than requiring to sync a full memslot; this ensures that KVM does not
4712take spinlocks for an extended period of time. Second, in some cases a
4713large amount of time can pass between a call to KVM_GET_DIRTY_LOG and
4714userspace actually using the data in the page. Pages can be modified
4715during this time, which is inefficint for both the guest and userspace:
4716the guest will incur a higher penalty due to write protection faults,
4717while userspace can see false reports of dirty pages. Manual reprotection
4718helps reducing this time, improving guest performance and reducing the
4719number of dirty log false positives.
4720
4721
46558. Other capabilities. 47228. Other capabilities.
4656---------------------- 4723----------------------
4657 4724
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 3898e657952e..3734cd58895e 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -1023,6 +1023,29 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
1023 return r; 1023 return r;
1024} 1024}
1025 1025
1026int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, struct kvm_clear_dirty_log *log)
1027{
1028 struct kvm_memslots *slots;
1029 struct kvm_memory_slot *memslot;
1030 bool flush = false;
1031 int r;
1032
1033 mutex_lock(&kvm->slots_lock);
1034
1035 r = kvm_clear_dirty_log_protect(kvm, log, &flush);
1036
1037 if (flush) {
1038 slots = kvm_memslots(kvm);
1039 memslot = id_to_memslot(slots, log->slot);
1040
1041 /* Let implementation handle TLB/GVA invalidation */
1042 kvm_mips_callbacks->flush_shadow_memslot(kvm, memslot);
1043 }
1044
1045 mutex_unlock(&kvm->slots_lock);
1046 return r;
1047}
1048
1026long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) 1049long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
1027{ 1050{
1028 long r; 1051 long r;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 448f011aa317..6af846c54660 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4418,6 +4418,33 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
4418 return r; 4418 return r;
4419} 4419}
4420 4420
4421int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, struct kvm_clear_dirty_log *log)
4422{
4423 bool flush = false;
4424 int r;
4425
4426 mutex_lock(&kvm->slots_lock);
4427
4428 /*
4429 * Flush potentially hardware-cached dirty pages to dirty_bitmap.
4430 */
4431 if (kvm_x86_ops->flush_log_dirty)
4432 kvm_x86_ops->flush_log_dirty(kvm);
4433
4434 r = kvm_clear_dirty_log_protect(kvm, log, &flush);
4435
4436 /*
4437 * All the TLBs can be flushed out of mmu lock, see the comments in
4438 * kvm_mmu_slot_remove_write_access().
4439 */
4440 lockdep_assert_held(&kvm->slots_lock);
4441 if (flush)
4442 kvm_flush_remote_tlbs(kvm);
4443
4444 mutex_unlock(&kvm->slots_lock);
4445 return r;
4446}
4447
4421int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, 4448int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
4422 bool line_status) 4449 bool line_status)
4423{ 4450{
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8c56b2873b13..e065aeaae29e 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -449,6 +449,7 @@ struct kvm {
449#endif 449#endif
450 long tlbs_dirty; 450 long tlbs_dirty;
451 struct list_head devices; 451 struct list_head devices;
452 bool manual_dirty_log_protect;
452 struct dentry *debugfs_dentry; 453 struct dentry *debugfs_dentry;
453 struct kvm_stat_data **debugfs_stat_data; 454 struct kvm_stat_data **debugfs_stat_data;
454 struct srcu_struct srcu; 455 struct srcu_struct srcu;
@@ -754,6 +755,8 @@ int kvm_get_dirty_log(struct kvm *kvm,
754 755
755int kvm_get_dirty_log_protect(struct kvm *kvm, 756int kvm_get_dirty_log_protect(struct kvm *kvm,
756 struct kvm_dirty_log *log, bool *flush); 757 struct kvm_dirty_log *log, bool *flush);
758int kvm_clear_dirty_log_protect(struct kvm *kvm,
759 struct kvm_clear_dirty_log *log, bool *flush);
757 760
758void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, 761void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
759 struct kvm_memory_slot *slot, 762 struct kvm_memory_slot *slot,
@@ -762,6 +765,8 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
762 765
763int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 766int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
764 struct kvm_dirty_log *log); 767 struct kvm_dirty_log *log);
768int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm,
769 struct kvm_clear_dirty_log *log);
765 770
766int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, 771int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
767 bool line_status); 772 bool line_status);
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 2b7a652c9fa4..9fe35f1ac938 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -492,6 +492,17 @@ struct kvm_dirty_log {
492 }; 492 };
493}; 493};
494 494
495/* for KVM_CLEAR_DIRTY_LOG */
496struct kvm_clear_dirty_log {
497 __u32 slot;
498 __u32 num_pages;
499 __u64 first_page;
500 union {
501 void __user *dirty_bitmap; /* one bit per page */
502 __u64 padding2;
503 };
504};
505
495/* for KVM_SET_SIGNAL_MASK */ 506/* for KVM_SET_SIGNAL_MASK */
496struct kvm_signal_mask { 507struct kvm_signal_mask {
497 __u32 len; 508 __u32 len;
@@ -975,6 +986,7 @@ struct kvm_ppc_resize_hpt {
975#define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163 986#define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163
976#define KVM_CAP_EXCEPTION_PAYLOAD 164 987#define KVM_CAP_EXCEPTION_PAYLOAD 164
977#define KVM_CAP_ARM_VM_IPA_SIZE 165 988#define KVM_CAP_ARM_VM_IPA_SIZE 165
989#define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166
978 990
979#ifdef KVM_CAP_IRQ_ROUTING 991#ifdef KVM_CAP_IRQ_ROUTING
980 992
@@ -1421,6 +1433,9 @@ struct kvm_enc_region {
1421#define KVM_GET_NESTED_STATE _IOWR(KVMIO, 0xbe, struct kvm_nested_state) 1433#define KVM_GET_NESTED_STATE _IOWR(KVMIO, 0xbe, struct kvm_nested_state)
1422#define KVM_SET_NESTED_STATE _IOW(KVMIO, 0xbf, struct kvm_nested_state) 1434#define KVM_SET_NESTED_STATE _IOW(KVMIO, 0xbf, struct kvm_nested_state)
1423 1435
1436/* Available with KVM_CAP_MANUAL_DIRTY_LOG_PROTECT */
1437#define KVM_CLEAR_DIRTY_LOG _IOWR(KVMIO, 0xc0, struct kvm_clear_dirty_log)
1438
1424/* Secure Encrypted Virtualization command */ 1439/* Secure Encrypted Virtualization command */
1425enum sev_cmd_id { 1440enum sev_cmd_id {
1426 /* Guest initialization commands */ 1441 /* Guest initialization commands */
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 52bfe5e76907..caaa0d5eba92 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -16,8 +16,10 @@ TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test
16TEST_GEN_PROGS_x86_64 += x86_64/state_test 16TEST_GEN_PROGS_x86_64 += x86_64/state_test
17TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test 17TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
18TEST_GEN_PROGS_x86_64 += dirty_log_test 18TEST_GEN_PROGS_x86_64 += dirty_log_test
19TEST_GEN_PROGS_x86_64 += clear_dirty_log_test
19 20
20TEST_GEN_PROGS_aarch64 += dirty_log_test 21TEST_GEN_PROGS_aarch64 += dirty_log_test
22TEST_GEN_PROGS_aarch64 += clear_dirty_log_test
21 23
22TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) 24TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
23LIBKVM += $(LIBKVM_$(UNAME_M)) 25LIBKVM += $(LIBKVM_$(UNAME_M))
diff --git a/tools/testing/selftests/kvm/clear_dirty_log_test.c b/tools/testing/selftests/kvm/clear_dirty_log_test.c
new file mode 100644
index 000000000000..749336937d37
--- /dev/null
+++ b/tools/testing/selftests/kvm/clear_dirty_log_test.c
@@ -0,0 +1,2 @@
1#define USE_CLEAR_DIRTY_LOG
2#include "dirty_log_test.c"
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index aeff95a91b15..4629c7ccfa28 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -275,6 +275,14 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
275 275
276 vm = create_vm(mode, VCPU_ID, guest_num_pages, guest_code); 276 vm = create_vm(mode, VCPU_ID, guest_num_pages, guest_code);
277 277
278#ifdef USE_CLEAR_DIRTY_LOG
279 struct kvm_enable_cap cap = {};
280
281 cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT;
282 cap.args[0] = 1;
283 vm_enable_cap(vm, &cap);
284#endif
285
278 /* Add an extra memory slot for testing dirty logging */ 286 /* Add an extra memory slot for testing dirty logging */
279 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 287 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
280 guest_test_mem, 288 guest_test_mem,
@@ -316,6 +324,10 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
316 /* Give the vcpu thread some time to dirty some pages */ 324 /* Give the vcpu thread some time to dirty some pages */
317 usleep(interval * 1000); 325 usleep(interval * 1000);
318 kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); 326 kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
327#ifdef USE_CLEAR_DIRTY_LOG
328 kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0,
329 DIV_ROUND_UP(host_num_pages, 64) * 64);
330#endif
319 vm_dirty_log_verify(bmap); 331 vm_dirty_log_verify(bmap);
320 iteration++; 332 iteration++;
321 sync_global_to_guest(vm, iteration); 333 sync_global_to_guest(vm, iteration);
@@ -392,6 +404,13 @@ int main(int argc, char *argv[])
392 unsigned int mode; 404 unsigned int mode;
393 int opt, i; 405 int opt, i;
394 406
407#ifdef USE_CLEAR_DIRTY_LOG
408 if (!kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT)) {
409 fprintf(stderr, "KVM_CLEAR_DIRTY_LOG not available, skipping tests\n");
410 exit(KSFT_SKIP);
411 }
412#endif
413
395 while ((opt = getopt(argc, argv, "hi:I:o:tm:")) != -1) { 414 while ((opt = getopt(argc, argv, "hi:I:o:tm:")) != -1) {
396 switch (opt) { 415 switch (opt) {
397 case 'i': 416 case 'i':
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index a4e59e3b4826..c51bfaba017a 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -58,6 +58,8 @@ void kvm_vm_free(struct kvm_vm *vmp);
58void kvm_vm_restart(struct kvm_vm *vmp, int perm); 58void kvm_vm_restart(struct kvm_vm *vmp, int perm);
59void kvm_vm_release(struct kvm_vm *vmp); 59void kvm_vm_release(struct kvm_vm *vmp);
60void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log); 60void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log);
61void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
62 uint64_t first_page, uint32_t num_pages);
61 63
62int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva, 64int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva,
63 size_t len); 65 size_t len);
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 1b41e71283d5..c9e94d6503af 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -231,6 +231,19 @@ void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
231 strerror(-ret)); 231 strerror(-ret));
232} 232}
233 233
234void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
235 uint64_t first_page, uint32_t num_pages)
236{
237 struct kvm_clear_dirty_log args = { .dirty_bitmap = log, .slot = slot,
238 .first_page = first_page,
239 .num_pages = num_pages };
240 int ret;
241
242 ret = ioctl(vm->fd, KVM_CLEAR_DIRTY_LOG, &args);
243 TEST_ASSERT(ret == 0, "%s: KVM_CLEAR_DIRTY_LOG failed: %s",
244 strerror(-ret));
245}
246
234/* 247/*
235 * Userspace Memory Region Find 248 * Userspace Memory Region Find
236 * 249 *
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 120a2663dab9..e91adf77d99a 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -1219,6 +1219,22 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
1219 return r; 1219 return r;
1220} 1220}
1221 1221
1222int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, struct kvm_clear_dirty_log *log)
1223{
1224 bool flush = false;
1225 int r;
1226
1227 mutex_lock(&kvm->slots_lock);
1228
1229 r = kvm_clear_dirty_log_protect(kvm, log, &flush);
1230
1231 if (flush)
1232 kvm_flush_remote_tlbs(kvm);
1233
1234 mutex_unlock(&kvm->slots_lock);
1235 return r;
1236}
1237
1222static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, 1238static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
1223 struct kvm_arm_device_addr *dev_addr) 1239 struct kvm_arm_device_addr *dev_addr)
1224{ 1240{
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 54f0fcfd431e..0041947b7390 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1133,7 +1133,7 @@ EXPORT_SYMBOL_GPL(kvm_get_dirty_log);
1133#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT 1133#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
1134/** 1134/**
1135 * kvm_get_dirty_log_protect - get a snapshot of dirty pages, and if any pages 1135 * kvm_get_dirty_log_protect - get a snapshot of dirty pages, and if any pages
1136 * are dirty write protect them for next write. 1136 * and reenable dirty page tracking for the corresponding pages.
1137 * @kvm: pointer to kvm instance 1137 * @kvm: pointer to kvm instance
1138 * @log: slot id and address to which we copy the log 1138 * @log: slot id and address to which we copy the log
1139 * @is_dirty: flag set if any page is dirty 1139 * @is_dirty: flag set if any page is dirty
@@ -1176,37 +1176,114 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,
1176 return -ENOENT; 1176 return -ENOENT;
1177 1177
1178 n = kvm_dirty_bitmap_bytes(memslot); 1178 n = kvm_dirty_bitmap_bytes(memslot);
1179 *flush = false;
1180 if (kvm->manual_dirty_log_protect) {
1181 /*
1182 * Unlike kvm_get_dirty_log, we always return false in *flush,
1183 * because no flush is needed until KVM_CLEAR_DIRTY_LOG. There
1184 * is some code duplication between this function and
1185 * kvm_get_dirty_log, but hopefully all architecture
1186 * transition to kvm_get_dirty_log_protect and kvm_get_dirty_log
1187 * can be eliminated.
1188 */
1189 dirty_bitmap_buffer = dirty_bitmap;
1190 } else {
1191 dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot);
1192 memset(dirty_bitmap_buffer, 0, n);
1179 1193
1180 dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot); 1194 spin_lock(&kvm->mmu_lock);
1181 memset(dirty_bitmap_buffer, 0, n); 1195 for (i = 0; i < n / sizeof(long); i++) {
1196 unsigned long mask;
1197 gfn_t offset;
1182 1198
1183 spin_lock(&kvm->mmu_lock); 1199 if (!dirty_bitmap[i])
1200 continue;
1201
1202 *flush = true;
1203 mask = xchg(&dirty_bitmap[i], 0);
1204 dirty_bitmap_buffer[i] = mask;
1205
1206 if (mask) {
1207 offset = i * BITS_PER_LONG;
1208 kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
1209 offset, mask);
1210 }
1211 }
1212 spin_unlock(&kvm->mmu_lock);
1213 }
1214
1215 if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
1216 return -EFAULT;
1217 return 0;
1218}
1219EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect);
1220
1221/**
1222 * kvm_clear_dirty_log_protect - clear dirty bits in the bitmap
1223 * and reenable dirty page tracking for the corresponding pages.
1224 * @kvm: pointer to kvm instance
1225 * @log: slot id and address from which to fetch the bitmap of dirty pages
1226 */
1227int kvm_clear_dirty_log_protect(struct kvm *kvm,
1228 struct kvm_clear_dirty_log *log, bool *flush)
1229{
1230 struct kvm_memslots *slots;
1231 struct kvm_memory_slot *memslot;
1232 int as_id, id, n;
1233 gfn_t offset;
1234 unsigned long i;
1235 unsigned long *dirty_bitmap;
1236 unsigned long *dirty_bitmap_buffer;
1237
1238 as_id = log->slot >> 16;
1239 id = (u16)log->slot;
1240 if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
1241 return -EINVAL;
1242
1243 if ((log->first_page & 63) || (log->num_pages & 63))
1244 return -EINVAL;
1245
1246 slots = __kvm_memslots(kvm, as_id);
1247 memslot = id_to_memslot(slots, id);
1248
1249 dirty_bitmap = memslot->dirty_bitmap;
1250 if (!dirty_bitmap)
1251 return -ENOENT;
1252
1253 n = kvm_dirty_bitmap_bytes(memslot);
1184 *flush = false; 1254 *flush = false;
1185 for (i = 0; i < n / sizeof(long); i++) { 1255 dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot);
1186 unsigned long mask; 1256 if (copy_from_user(dirty_bitmap_buffer, log->dirty_bitmap, n))
1187 gfn_t offset; 1257 return -EFAULT;
1188 1258
1189 if (!dirty_bitmap[i]) 1259 spin_lock(&kvm->mmu_lock);
1260 for (offset = log->first_page,
1261 i = offset / BITS_PER_LONG, n = log->num_pages / BITS_PER_LONG; n--;
1262 i++, offset += BITS_PER_LONG) {
1263 unsigned long mask = *dirty_bitmap_buffer++;
1264 atomic_long_t *p = (atomic_long_t *) &dirty_bitmap[i];
1265 if (!mask)
1190 continue; 1266 continue;
1191 1267
1192 *flush = true; 1268 mask &= atomic_long_fetch_andnot(mask, p);
1193
1194 mask = xchg(&dirty_bitmap[i], 0);
1195 dirty_bitmap_buffer[i] = mask;
1196 1269
1270 /*
1271 * mask contains the bits that really have been cleared. This
1272 * never includes any bits beyond the length of the memslot (if
1273 * the length is not aligned to 64 pages), therefore it is not
1274 * a problem if userspace sets them in log->dirty_bitmap.
1275 */
1197 if (mask) { 1276 if (mask) {
1198 offset = i * BITS_PER_LONG; 1277 *flush = true;
1199 kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, 1278 kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
1200 offset, mask); 1279 offset, mask);
1201 } 1280 }
1202 } 1281 }
1203
1204 spin_unlock(&kvm->mmu_lock); 1282 spin_unlock(&kvm->mmu_lock);
1205 if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n)) 1283
1206 return -EFAULT;
1207 return 0; 1284 return 0;
1208} 1285}
1209EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect); 1286EXPORT_SYMBOL_GPL(kvm_clear_dirty_log_protect);
1210#endif 1287#endif
1211 1288
1212bool kvm_largepages_enabled(void) 1289bool kvm_largepages_enabled(void)
@@ -2949,6 +3026,9 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
2949 case KVM_CAP_IOEVENTFD_ANY_LENGTH: 3026 case KVM_CAP_IOEVENTFD_ANY_LENGTH:
2950 case KVM_CAP_CHECK_EXTENSION_VM: 3027 case KVM_CAP_CHECK_EXTENSION_VM:
2951 case KVM_CAP_ENABLE_CAP_VM: 3028 case KVM_CAP_ENABLE_CAP_VM:
3029#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
3030 case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT:
3031#endif
2952 return 1; 3032 return 1;
2953#ifdef CONFIG_KVM_MMIO 3033#ifdef CONFIG_KVM_MMIO
2954 case KVM_CAP_COALESCED_MMIO: 3034 case KVM_CAP_COALESCED_MMIO:
@@ -2982,6 +3062,13 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
2982 struct kvm_enable_cap *cap) 3062 struct kvm_enable_cap *cap)
2983{ 3063{
2984 switch (cap->cap) { 3064 switch (cap->cap) {
3065#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
3066 case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT:
3067 if (cap->flags || (cap->args[0] & ~1))
3068 return -EINVAL;
3069 kvm->manual_dirty_log_protect = cap->args[0];
3070 return 0;
3071#endif
2985 default: 3072 default:
2986 return kvm_vm_ioctl_enable_cap(kvm, cap); 3073 return kvm_vm_ioctl_enable_cap(kvm, cap);
2987 } 3074 }
@@ -3029,6 +3116,17 @@ static long kvm_vm_ioctl(struct file *filp,
3029 r = kvm_vm_ioctl_get_dirty_log(kvm, &log); 3116 r = kvm_vm_ioctl_get_dirty_log(kvm, &log);
3030 break; 3117 break;
3031 } 3118 }
3119#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
3120 case KVM_CLEAR_DIRTY_LOG: {
3121 struct kvm_clear_dirty_log log;
3122
3123 r = -EFAULT;
3124 if (copy_from_user(&log, argp, sizeof(log)))
3125 goto out;
3126 r = kvm_vm_ioctl_clear_dirty_log(kvm, &log);
3127 break;
3128 }
3129#endif
3032#ifdef CONFIG_KVM_MMIO 3130#ifdef CONFIG_KVM_MMIO
3033 case KVM_REGISTER_COALESCED_MMIO: { 3131 case KVM_REGISTER_COALESCED_MMIO: {
3034 struct kvm_coalesced_mmio_zone zone; 3132 struct kvm_coalesced_mmio_zone zone;