diff options
-rw-r--r-- | Documentation/virtual/kvm/api.txt | 67 | ||||
-rw-r--r-- | arch/mips/kvm/mips.c | 23 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 27 | ||||
-rw-r--r-- | include/linux/kvm_host.h | 5 | ||||
-rw-r--r-- | include/uapi/linux/kvm.h | 15 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/Makefile | 2 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/clear_dirty_log_test.c | 2 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/dirty_log_test.c | 19 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/kvm_util.h | 2 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/kvm_util.c | 13 | ||||
-rw-r--r-- | virt/kvm/arm/arm.c | 16 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 132 |
12 files changed, 306 insertions, 17 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 1071c10cf1c7..f2c345f7b630 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt | |||
@@ -305,6 +305,9 @@ the address space for which you want to return the dirty bitmap. | |||
305 | They must be less than the value that KVM_CHECK_EXTENSION returns for | 305 | They must be less than the value that KVM_CHECK_EXTENSION returns for |
306 | the KVM_CAP_MULTI_ADDRESS_SPACE capability. | 306 | the KVM_CAP_MULTI_ADDRESS_SPACE capability. |
307 | 307 | ||
308 | The bits in the dirty bitmap are cleared before the ioctl returns, unless | ||
309 | KVM_CAP_MANUAL_DIRTY_LOG_PROTECT is enabled. For more information, | ||
310 | see the description of the capability. | ||
308 | 311 | ||
309 | 4.9 KVM_SET_MEMORY_ALIAS | 312 | 4.9 KVM_SET_MEMORY_ALIAS |
310 | 313 | ||
@@ -3758,6 +3761,46 @@ Coalesced pio is based on coalesced mmio. There is little difference | |||
3758 | between coalesced mmio and pio except that coalesced pio records accesses | 3761 | between coalesced mmio and pio except that coalesced pio records accesses |
3759 | to I/O ports. | 3762 | to I/O ports. |
3760 | 3763 | ||
3764 | 4.117 KVM_CLEAR_DIRTY_LOG (vm ioctl) | ||
3765 | |||
3766 | Capability: KVM_CAP_MANUAL_DIRTY_LOG_PROTECT | ||
3767 | Architectures: x86 | ||
3768 | Type: vm ioctl | ||
3769 | Parameters: struct kvm_dirty_log (in) | ||
3770 | Returns: 0 on success, -1 on error | ||
3771 | |||
3772 | /* for KVM_CLEAR_DIRTY_LOG */ | ||
3773 | struct kvm_clear_dirty_log { | ||
3774 | __u32 slot; | ||
3775 | __u32 num_pages; | ||
3776 | __u64 first_page; | ||
3777 | union { | ||
3778 | void __user *dirty_bitmap; /* one bit per page */ | ||
3779 | __u64 padding; | ||
3780 | }; | ||
3781 | }; | ||
3782 | |||
3783 | The ioctl clears the dirty status of pages in a memory slot, according to | ||
3784 | the bitmap that is passed in struct kvm_clear_dirty_log's dirty_bitmap | ||
3785 | field. Bit 0 of the bitmap corresponds to page "first_page" in the | ||
3786 | memory slot, and num_pages is the size in bits of the input bitmap. | ||
3787 | Both first_page and num_pages must be a multiple of 64. For each bit | ||
3788 | that is set in the input bitmap, the corresponding page is marked "clean" | ||
3789 | in KVM's dirty bitmap, and dirty tracking is re-enabled for that page | ||
3790 | (for example via write-protection, or by clearing the dirty bit in | ||
3791 | a page table entry). | ||
3792 | |||
3793 | If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 specifies | ||
3794 | the address space for which you want to return the dirty bitmap. | ||
3795 | They must be less than the value that KVM_CHECK_EXTENSION returns for | ||
3796 | the KVM_CAP_MULTI_ADDRESS_SPACE capability. | ||
3797 | |||
3798 | This ioctl is mostly useful when KVM_CAP_MANUAL_DIRTY_LOG_PROTECT | ||
3799 | is enabled; for more information, see the description of the capability. | ||
3800 | However, it can always be used as long as KVM_CHECK_EXTENSION confirms | ||
3801 | that KVM_CAP_MANUAL_DIRTY_LOG_PROTECT is present. | ||
3802 | |||
3803 | |||
3761 | 5. The kvm_run structure | 3804 | 5. The kvm_run structure |
3762 | ------------------------ | 3805 | ------------------------ |
3763 | 3806 | ||
@@ -4652,6 +4695,30 @@ and injected exceptions. | |||
4652 | * For the new DR6 bits, note that bit 16 is set iff the #DB exception | 4695 | * For the new DR6 bits, note that bit 16 is set iff the #DB exception |
4653 | will clear DR6.RTM. | 4696 | will clear DR6.RTM. |
4654 | 4697 | ||
4698 | 7.18 KVM_CAP_MANUAL_DIRTY_LOG_PROTECT | ||
4699 | |||
4700 | Architectures: all | ||
4701 | Parameters: args[0] whether feature should be enabled or not | ||
4702 | |||
4703 | With this capability enabled, KVM_GET_DIRTY_LOG will not automatically | ||
4704 | clear and write-protect all pages that are returned as dirty. | ||
4705 | Rather, userspace will have to do this operation separately using | ||
4706 | KVM_CLEAR_DIRTY_LOG. | ||
4707 | |||
4708 | At the cost of a slightly more complicated operation, this provides better | ||
4709 | scalability and responsiveness for two reasons. First, | ||
4710 | KVM_CLEAR_DIRTY_LOG ioctl can operate on a 64-page granularity rather | ||
4711 | than requiring to sync a full memslot; this ensures that KVM does not | ||
4712 | take spinlocks for an extended period of time. Second, in some cases a | ||
4713 | large amount of time can pass between a call to KVM_GET_DIRTY_LOG and | ||
4714 | userspace actually using the data in the page. Pages can be modified | ||
4715 | during this time, which is inefficint for both the guest and userspace: | ||
4716 | the guest will incur a higher penalty due to write protection faults, | ||
4717 | while userspace can see false reports of dirty pages. Manual reprotection | ||
4718 | helps reducing this time, improving guest performance and reducing the | ||
4719 | number of dirty log false positives. | ||
4720 | |||
4721 | |||
4655 | 8. Other capabilities. | 4722 | 8. Other capabilities. |
4656 | ---------------------- | 4723 | ---------------------- |
4657 | 4724 | ||
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 3898e657952e..3734cd58895e 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c | |||
@@ -1023,6 +1023,29 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) | |||
1023 | return r; | 1023 | return r; |
1024 | } | 1024 | } |
1025 | 1025 | ||
1026 | int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, struct kvm_clear_dirty_log *log) | ||
1027 | { | ||
1028 | struct kvm_memslots *slots; | ||
1029 | struct kvm_memory_slot *memslot; | ||
1030 | bool flush = false; | ||
1031 | int r; | ||
1032 | |||
1033 | mutex_lock(&kvm->slots_lock); | ||
1034 | |||
1035 | r = kvm_clear_dirty_log_protect(kvm, log, &flush); | ||
1036 | |||
1037 | if (flush) { | ||
1038 | slots = kvm_memslots(kvm); | ||
1039 | memslot = id_to_memslot(slots, log->slot); | ||
1040 | |||
1041 | /* Let implementation handle TLB/GVA invalidation */ | ||
1042 | kvm_mips_callbacks->flush_shadow_memslot(kvm, memslot); | ||
1043 | } | ||
1044 | |||
1045 | mutex_unlock(&kvm->slots_lock); | ||
1046 | return r; | ||
1047 | } | ||
1048 | |||
1026 | long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) | 1049 | long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) |
1027 | { | 1050 | { |
1028 | long r; | 1051 | long r; |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 448f011aa317..6af846c54660 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -4418,6 +4418,33 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) | |||
4418 | return r; | 4418 | return r; |
4419 | } | 4419 | } |
4420 | 4420 | ||
4421 | int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, struct kvm_clear_dirty_log *log) | ||
4422 | { | ||
4423 | bool flush = false; | ||
4424 | int r; | ||
4425 | |||
4426 | mutex_lock(&kvm->slots_lock); | ||
4427 | |||
4428 | /* | ||
4429 | * Flush potentially hardware-cached dirty pages to dirty_bitmap. | ||
4430 | */ | ||
4431 | if (kvm_x86_ops->flush_log_dirty) | ||
4432 | kvm_x86_ops->flush_log_dirty(kvm); | ||
4433 | |||
4434 | r = kvm_clear_dirty_log_protect(kvm, log, &flush); | ||
4435 | |||
4436 | /* | ||
4437 | * All the TLBs can be flushed out of mmu lock, see the comments in | ||
4438 | * kvm_mmu_slot_remove_write_access(). | ||
4439 | */ | ||
4440 | lockdep_assert_held(&kvm->slots_lock); | ||
4441 | if (flush) | ||
4442 | kvm_flush_remote_tlbs(kvm); | ||
4443 | |||
4444 | mutex_unlock(&kvm->slots_lock); | ||
4445 | return r; | ||
4446 | } | ||
4447 | |||
4421 | int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, | 4448 | int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, |
4422 | bool line_status) | 4449 | bool line_status) |
4423 | { | 4450 | { |
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8c56b2873b13..e065aeaae29e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
@@ -449,6 +449,7 @@ struct kvm { | |||
449 | #endif | 449 | #endif |
450 | long tlbs_dirty; | 450 | long tlbs_dirty; |
451 | struct list_head devices; | 451 | struct list_head devices; |
452 | bool manual_dirty_log_protect; | ||
452 | struct dentry *debugfs_dentry; | 453 | struct dentry *debugfs_dentry; |
453 | struct kvm_stat_data **debugfs_stat_data; | 454 | struct kvm_stat_data **debugfs_stat_data; |
454 | struct srcu_struct srcu; | 455 | struct srcu_struct srcu; |
@@ -754,6 +755,8 @@ int kvm_get_dirty_log(struct kvm *kvm, | |||
754 | 755 | ||
755 | int kvm_get_dirty_log_protect(struct kvm *kvm, | 756 | int kvm_get_dirty_log_protect(struct kvm *kvm, |
756 | struct kvm_dirty_log *log, bool *flush); | 757 | struct kvm_dirty_log *log, bool *flush); |
758 | int kvm_clear_dirty_log_protect(struct kvm *kvm, | ||
759 | struct kvm_clear_dirty_log *log, bool *flush); | ||
757 | 760 | ||
758 | void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, | 761 | void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, |
759 | struct kvm_memory_slot *slot, | 762 | struct kvm_memory_slot *slot, |
@@ -762,6 +765,8 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, | |||
762 | 765 | ||
763 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | 766 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, |
764 | struct kvm_dirty_log *log); | 767 | struct kvm_dirty_log *log); |
768 | int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, | ||
769 | struct kvm_clear_dirty_log *log); | ||
765 | 770 | ||
766 | int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, | 771 | int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, |
767 | bool line_status); | 772 | bool line_status); |
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 2b7a652c9fa4..9fe35f1ac938 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h | |||
@@ -492,6 +492,17 @@ struct kvm_dirty_log { | |||
492 | }; | 492 | }; |
493 | }; | 493 | }; |
494 | 494 | ||
495 | /* for KVM_CLEAR_DIRTY_LOG */ | ||
496 | struct kvm_clear_dirty_log { | ||
497 | __u32 slot; | ||
498 | __u32 num_pages; | ||
499 | __u64 first_page; | ||
500 | union { | ||
501 | void __user *dirty_bitmap; /* one bit per page */ | ||
502 | __u64 padding2; | ||
503 | }; | ||
504 | }; | ||
505 | |||
495 | /* for KVM_SET_SIGNAL_MASK */ | 506 | /* for KVM_SET_SIGNAL_MASK */ |
496 | struct kvm_signal_mask { | 507 | struct kvm_signal_mask { |
497 | __u32 len; | 508 | __u32 len; |
@@ -975,6 +986,7 @@ struct kvm_ppc_resize_hpt { | |||
975 | #define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163 | 986 | #define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163 |
976 | #define KVM_CAP_EXCEPTION_PAYLOAD 164 | 987 | #define KVM_CAP_EXCEPTION_PAYLOAD 164 |
977 | #define KVM_CAP_ARM_VM_IPA_SIZE 165 | 988 | #define KVM_CAP_ARM_VM_IPA_SIZE 165 |
989 | #define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166 | ||
978 | 990 | ||
979 | #ifdef KVM_CAP_IRQ_ROUTING | 991 | #ifdef KVM_CAP_IRQ_ROUTING |
980 | 992 | ||
@@ -1421,6 +1433,9 @@ struct kvm_enc_region { | |||
1421 | #define KVM_GET_NESTED_STATE _IOWR(KVMIO, 0xbe, struct kvm_nested_state) | 1433 | #define KVM_GET_NESTED_STATE _IOWR(KVMIO, 0xbe, struct kvm_nested_state) |
1422 | #define KVM_SET_NESTED_STATE _IOW(KVMIO, 0xbf, struct kvm_nested_state) | 1434 | #define KVM_SET_NESTED_STATE _IOW(KVMIO, 0xbf, struct kvm_nested_state) |
1423 | 1435 | ||
1436 | /* Available with KVM_CAP_MANUAL_DIRTY_LOG_PROTECT */ | ||
1437 | #define KVM_CLEAR_DIRTY_LOG _IOWR(KVMIO, 0xc0, struct kvm_clear_dirty_log) | ||
1438 | |||
1424 | /* Secure Encrypted Virtualization command */ | 1439 | /* Secure Encrypted Virtualization command */ |
1425 | enum sev_cmd_id { | 1440 | enum sev_cmd_id { |
1426 | /* Guest initialization commands */ | 1441 | /* Guest initialization commands */ |
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 52bfe5e76907..caaa0d5eba92 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile | |||
@@ -16,8 +16,10 @@ TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test | |||
16 | TEST_GEN_PROGS_x86_64 += x86_64/state_test | 16 | TEST_GEN_PROGS_x86_64 += x86_64/state_test |
17 | TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test | 17 | TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test |
18 | TEST_GEN_PROGS_x86_64 += dirty_log_test | 18 | TEST_GEN_PROGS_x86_64 += dirty_log_test |
19 | TEST_GEN_PROGS_x86_64 += clear_dirty_log_test | ||
19 | 20 | ||
20 | TEST_GEN_PROGS_aarch64 += dirty_log_test | 21 | TEST_GEN_PROGS_aarch64 += dirty_log_test |
22 | TEST_GEN_PROGS_aarch64 += clear_dirty_log_test | ||
21 | 23 | ||
22 | TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) | 24 | TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) |
23 | LIBKVM += $(LIBKVM_$(UNAME_M)) | 25 | LIBKVM += $(LIBKVM_$(UNAME_M)) |
diff --git a/tools/testing/selftests/kvm/clear_dirty_log_test.c b/tools/testing/selftests/kvm/clear_dirty_log_test.c new file mode 100644 index 000000000000..749336937d37 --- /dev/null +++ b/tools/testing/selftests/kvm/clear_dirty_log_test.c | |||
@@ -0,0 +1,2 @@ | |||
1 | #define USE_CLEAR_DIRTY_LOG | ||
2 | #include "dirty_log_test.c" | ||
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index aeff95a91b15..4629c7ccfa28 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c | |||
@@ -275,6 +275,14 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, | |||
275 | 275 | ||
276 | vm = create_vm(mode, VCPU_ID, guest_num_pages, guest_code); | 276 | vm = create_vm(mode, VCPU_ID, guest_num_pages, guest_code); |
277 | 277 | ||
278 | #ifdef USE_CLEAR_DIRTY_LOG | ||
279 | struct kvm_enable_cap cap = {}; | ||
280 | |||
281 | cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT; | ||
282 | cap.args[0] = 1; | ||
283 | vm_enable_cap(vm, &cap); | ||
284 | #endif | ||
285 | |||
278 | /* Add an extra memory slot for testing dirty logging */ | 286 | /* Add an extra memory slot for testing dirty logging */ |
279 | vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, | 287 | vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, |
280 | guest_test_mem, | 288 | guest_test_mem, |
@@ -316,6 +324,10 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, | |||
316 | /* Give the vcpu thread some time to dirty some pages */ | 324 | /* Give the vcpu thread some time to dirty some pages */ |
317 | usleep(interval * 1000); | 325 | usleep(interval * 1000); |
318 | kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); | 326 | kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); |
327 | #ifdef USE_CLEAR_DIRTY_LOG | ||
328 | kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0, | ||
329 | DIV_ROUND_UP(host_num_pages, 64) * 64); | ||
330 | #endif | ||
319 | vm_dirty_log_verify(bmap); | 331 | vm_dirty_log_verify(bmap); |
320 | iteration++; | 332 | iteration++; |
321 | sync_global_to_guest(vm, iteration); | 333 | sync_global_to_guest(vm, iteration); |
@@ -392,6 +404,13 @@ int main(int argc, char *argv[]) | |||
392 | unsigned int mode; | 404 | unsigned int mode; |
393 | int opt, i; | 405 | int opt, i; |
394 | 406 | ||
407 | #ifdef USE_CLEAR_DIRTY_LOG | ||
408 | if (!kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT)) { | ||
409 | fprintf(stderr, "KVM_CLEAR_DIRTY_LOG not available, skipping tests\n"); | ||
410 | exit(KSFT_SKIP); | ||
411 | } | ||
412 | #endif | ||
413 | |||
395 | while ((opt = getopt(argc, argv, "hi:I:o:tm:")) != -1) { | 414 | while ((opt = getopt(argc, argv, "hi:I:o:tm:")) != -1) { |
396 | switch (opt) { | 415 | switch (opt) { |
397 | case 'i': | 416 | case 'i': |
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index a4e59e3b4826..c51bfaba017a 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h | |||
@@ -58,6 +58,8 @@ void kvm_vm_free(struct kvm_vm *vmp); | |||
58 | void kvm_vm_restart(struct kvm_vm *vmp, int perm); | 58 | void kvm_vm_restart(struct kvm_vm *vmp, int perm); |
59 | void kvm_vm_release(struct kvm_vm *vmp); | 59 | void kvm_vm_release(struct kvm_vm *vmp); |
60 | void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log); | 60 | void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log); |
61 | void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log, | ||
62 | uint64_t first_page, uint32_t num_pages); | ||
61 | 63 | ||
62 | int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva, | 64 | int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva, |
63 | size_t len); | 65 | size_t len); |
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 1b41e71283d5..c9e94d6503af 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c | |||
@@ -231,6 +231,19 @@ void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log) | |||
231 | strerror(-ret)); | 231 | strerror(-ret)); |
232 | } | 232 | } |
233 | 233 | ||
234 | void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log, | ||
235 | uint64_t first_page, uint32_t num_pages) | ||
236 | { | ||
237 | struct kvm_clear_dirty_log args = { .dirty_bitmap = log, .slot = slot, | ||
238 | .first_page = first_page, | ||
239 | .num_pages = num_pages }; | ||
240 | int ret; | ||
241 | |||
242 | ret = ioctl(vm->fd, KVM_CLEAR_DIRTY_LOG, &args); | ||
243 | TEST_ASSERT(ret == 0, "%s: KVM_CLEAR_DIRTY_LOG failed: %s", | ||
244 | strerror(-ret)); | ||
245 | } | ||
246 | |||
234 | /* | 247 | /* |
235 | * Userspace Memory Region Find | 248 | * Userspace Memory Region Find |
236 | * | 249 | * |
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 120a2663dab9..e91adf77d99a 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c | |||
@@ -1219,6 +1219,22 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) | |||
1219 | return r; | 1219 | return r; |
1220 | } | 1220 | } |
1221 | 1221 | ||
1222 | int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, struct kvm_clear_dirty_log *log) | ||
1223 | { | ||
1224 | bool flush = false; | ||
1225 | int r; | ||
1226 | |||
1227 | mutex_lock(&kvm->slots_lock); | ||
1228 | |||
1229 | r = kvm_clear_dirty_log_protect(kvm, log, &flush); | ||
1230 | |||
1231 | if (flush) | ||
1232 | kvm_flush_remote_tlbs(kvm); | ||
1233 | |||
1234 | mutex_unlock(&kvm->slots_lock); | ||
1235 | return r; | ||
1236 | } | ||
1237 | |||
1222 | static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, | 1238 | static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, |
1223 | struct kvm_arm_device_addr *dev_addr) | 1239 | struct kvm_arm_device_addr *dev_addr) |
1224 | { | 1240 | { |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 54f0fcfd431e..0041947b7390 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -1133,7 +1133,7 @@ EXPORT_SYMBOL_GPL(kvm_get_dirty_log); | |||
1133 | #ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT | 1133 | #ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT |
1134 | /** | 1134 | /** |
1135 | * kvm_get_dirty_log_protect - get a snapshot of dirty pages, and if any pages | 1135 | * kvm_get_dirty_log_protect - get a snapshot of dirty pages, and if any pages |
1136 | * are dirty write protect them for next write. | 1136 | * and reenable dirty page tracking for the corresponding pages. |
1137 | * @kvm: pointer to kvm instance | 1137 | * @kvm: pointer to kvm instance |
1138 | * @log: slot id and address to which we copy the log | 1138 | * @log: slot id and address to which we copy the log |
1139 | * @is_dirty: flag set if any page is dirty | 1139 | * @is_dirty: flag set if any page is dirty |
@@ -1176,37 +1176,114 @@ int kvm_get_dirty_log_protect(struct kvm *kvm, | |||
1176 | return -ENOENT; | 1176 | return -ENOENT; |
1177 | 1177 | ||
1178 | n = kvm_dirty_bitmap_bytes(memslot); | 1178 | n = kvm_dirty_bitmap_bytes(memslot); |
1179 | *flush = false; | ||
1180 | if (kvm->manual_dirty_log_protect) { | ||
1181 | /* | ||
1182 | * Unlike kvm_get_dirty_log, we always return false in *flush, | ||
1183 | * because no flush is needed until KVM_CLEAR_DIRTY_LOG. There | ||
1184 | * is some code duplication between this function and | ||
1185 | * kvm_get_dirty_log, but hopefully all architecture | ||
1186 | * transition to kvm_get_dirty_log_protect and kvm_get_dirty_log | ||
1187 | * can be eliminated. | ||
1188 | */ | ||
1189 | dirty_bitmap_buffer = dirty_bitmap; | ||
1190 | } else { | ||
1191 | dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot); | ||
1192 | memset(dirty_bitmap_buffer, 0, n); | ||
1179 | 1193 | ||
1180 | dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot); | 1194 | spin_lock(&kvm->mmu_lock); |
1181 | memset(dirty_bitmap_buffer, 0, n); | 1195 | for (i = 0; i < n / sizeof(long); i++) { |
1196 | unsigned long mask; | ||
1197 | gfn_t offset; | ||
1182 | 1198 | ||
1183 | spin_lock(&kvm->mmu_lock); | 1199 | if (!dirty_bitmap[i]) |
1200 | continue; | ||
1201 | |||
1202 | *flush = true; | ||
1203 | mask = xchg(&dirty_bitmap[i], 0); | ||
1204 | dirty_bitmap_buffer[i] = mask; | ||
1205 | |||
1206 | if (mask) { | ||
1207 | offset = i * BITS_PER_LONG; | ||
1208 | kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, | ||
1209 | offset, mask); | ||
1210 | } | ||
1211 | } | ||
1212 | spin_unlock(&kvm->mmu_lock); | ||
1213 | } | ||
1214 | |||
1215 | if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n)) | ||
1216 | return -EFAULT; | ||
1217 | return 0; | ||
1218 | } | ||
1219 | EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect); | ||
1220 | |||
1221 | /** | ||
1222 | * kvm_clear_dirty_log_protect - clear dirty bits in the bitmap | ||
1223 | * and reenable dirty page tracking for the corresponding pages. | ||
1224 | * @kvm: pointer to kvm instance | ||
1225 | * @log: slot id and address from which to fetch the bitmap of dirty pages | ||
1226 | */ | ||
1227 | int kvm_clear_dirty_log_protect(struct kvm *kvm, | ||
1228 | struct kvm_clear_dirty_log *log, bool *flush) | ||
1229 | { | ||
1230 | struct kvm_memslots *slots; | ||
1231 | struct kvm_memory_slot *memslot; | ||
1232 | int as_id, id, n; | ||
1233 | gfn_t offset; | ||
1234 | unsigned long i; | ||
1235 | unsigned long *dirty_bitmap; | ||
1236 | unsigned long *dirty_bitmap_buffer; | ||
1237 | |||
1238 | as_id = log->slot >> 16; | ||
1239 | id = (u16)log->slot; | ||
1240 | if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) | ||
1241 | return -EINVAL; | ||
1242 | |||
1243 | if ((log->first_page & 63) || (log->num_pages & 63)) | ||
1244 | return -EINVAL; | ||
1245 | |||
1246 | slots = __kvm_memslots(kvm, as_id); | ||
1247 | memslot = id_to_memslot(slots, id); | ||
1248 | |||
1249 | dirty_bitmap = memslot->dirty_bitmap; | ||
1250 | if (!dirty_bitmap) | ||
1251 | return -ENOENT; | ||
1252 | |||
1253 | n = kvm_dirty_bitmap_bytes(memslot); | ||
1184 | *flush = false; | 1254 | *flush = false; |
1185 | for (i = 0; i < n / sizeof(long); i++) { | 1255 | dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot); |
1186 | unsigned long mask; | 1256 | if (copy_from_user(dirty_bitmap_buffer, log->dirty_bitmap, n)) |
1187 | gfn_t offset; | 1257 | return -EFAULT; |
1188 | 1258 | ||
1189 | if (!dirty_bitmap[i]) | 1259 | spin_lock(&kvm->mmu_lock); |
1260 | for (offset = log->first_page, | ||
1261 | i = offset / BITS_PER_LONG, n = log->num_pages / BITS_PER_LONG; n--; | ||
1262 | i++, offset += BITS_PER_LONG) { | ||
1263 | unsigned long mask = *dirty_bitmap_buffer++; | ||
1264 | atomic_long_t *p = (atomic_long_t *) &dirty_bitmap[i]; | ||
1265 | if (!mask) | ||
1190 | continue; | 1266 | continue; |
1191 | 1267 | ||
1192 | *flush = true; | 1268 | mask &= atomic_long_fetch_andnot(mask, p); |
1193 | |||
1194 | mask = xchg(&dirty_bitmap[i], 0); | ||
1195 | dirty_bitmap_buffer[i] = mask; | ||
1196 | 1269 | ||
1270 | /* | ||
1271 | * mask contains the bits that really have been cleared. This | ||
1272 | * never includes any bits beyond the length of the memslot (if | ||
1273 | * the length is not aligned to 64 pages), therefore it is not | ||
1274 | * a problem if userspace sets them in log->dirty_bitmap. | ||
1275 | */ | ||
1197 | if (mask) { | 1276 | if (mask) { |
1198 | offset = i * BITS_PER_LONG; | 1277 | *flush = true; |
1199 | kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, | 1278 | kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, |
1200 | offset, mask); | 1279 | offset, mask); |
1201 | } | 1280 | } |
1202 | } | 1281 | } |
1203 | |||
1204 | spin_unlock(&kvm->mmu_lock); | 1282 | spin_unlock(&kvm->mmu_lock); |
1205 | if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n)) | 1283 | |
1206 | return -EFAULT; | ||
1207 | return 0; | 1284 | return 0; |
1208 | } | 1285 | } |
1209 | EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect); | 1286 | EXPORT_SYMBOL_GPL(kvm_clear_dirty_log_protect); |
1210 | #endif | 1287 | #endif |
1211 | 1288 | ||
1212 | bool kvm_largepages_enabled(void) | 1289 | bool kvm_largepages_enabled(void) |
@@ -2949,6 +3026,9 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) | |||
2949 | case KVM_CAP_IOEVENTFD_ANY_LENGTH: | 3026 | case KVM_CAP_IOEVENTFD_ANY_LENGTH: |
2950 | case KVM_CAP_CHECK_EXTENSION_VM: | 3027 | case KVM_CAP_CHECK_EXTENSION_VM: |
2951 | case KVM_CAP_ENABLE_CAP_VM: | 3028 | case KVM_CAP_ENABLE_CAP_VM: |
3029 | #ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT | ||
3030 | case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT: | ||
3031 | #endif | ||
2952 | return 1; | 3032 | return 1; |
2953 | #ifdef CONFIG_KVM_MMIO | 3033 | #ifdef CONFIG_KVM_MMIO |
2954 | case KVM_CAP_COALESCED_MMIO: | 3034 | case KVM_CAP_COALESCED_MMIO: |
@@ -2982,6 +3062,13 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm, | |||
2982 | struct kvm_enable_cap *cap) | 3062 | struct kvm_enable_cap *cap) |
2983 | { | 3063 | { |
2984 | switch (cap->cap) { | 3064 | switch (cap->cap) { |
3065 | #ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT | ||
3066 | case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT: | ||
3067 | if (cap->flags || (cap->args[0] & ~1)) | ||
3068 | return -EINVAL; | ||
3069 | kvm->manual_dirty_log_protect = cap->args[0]; | ||
3070 | return 0; | ||
3071 | #endif | ||
2985 | default: | 3072 | default: |
2986 | return kvm_vm_ioctl_enable_cap(kvm, cap); | 3073 | return kvm_vm_ioctl_enable_cap(kvm, cap); |
2987 | } | 3074 | } |
@@ -3029,6 +3116,17 @@ static long kvm_vm_ioctl(struct file *filp, | |||
3029 | r = kvm_vm_ioctl_get_dirty_log(kvm, &log); | 3116 | r = kvm_vm_ioctl_get_dirty_log(kvm, &log); |
3030 | break; | 3117 | break; |
3031 | } | 3118 | } |
3119 | #ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT | ||
3120 | case KVM_CLEAR_DIRTY_LOG: { | ||
3121 | struct kvm_clear_dirty_log log; | ||
3122 | |||
3123 | r = -EFAULT; | ||
3124 | if (copy_from_user(&log, argp, sizeof(log))) | ||
3125 | goto out; | ||
3126 | r = kvm_vm_ioctl_clear_dirty_log(kvm, &log); | ||
3127 | break; | ||
3128 | } | ||
3129 | #endif | ||
3032 | #ifdef CONFIG_KVM_MMIO | 3130 | #ifdef CONFIG_KVM_MMIO |
3033 | case KVM_REGISTER_COALESCED_MMIO: { | 3131 | case KVM_REGISTER_COALESCED_MMIO: { |
3034 | struct kvm_coalesced_mmio_zone zone; | 3132 | struct kvm_coalesced_mmio_zone zone; |