1 files changed, 131 insertions, 13 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 458b9b14b15c..a1093700f3a4 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -66,6 +66,9 @@
 MODULE_AUTHOR("Qumranet");
 MODULE_LICENSE("GPL");
+unsigned int halt_poll_ns = 0;
+module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR);
 /*
 * Ordering of locks:
 *
@@ -89,7 +92,7 @@ struct dentry *kvm_debugfs_dir;
 static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
                           unsigned long arg);
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_KVM_COMPAT
 static long kvm_vcpu_compat_ioctl(struct file *file, unsigned int ioctl,
                                  unsigned long arg);
 #endif
@@ -176,6 +179,7 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
        return called;
 }
+#ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
 void kvm_flush_remote_tlbs(struct kvm *kvm)
 {
        long dirty_count = kvm->tlbs_dirty;
@@ -186,6 +190,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
        cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
 }
 EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
+#endif
 void kvm_reload_remote_mmus(struct kvm *kvm)
 {
@@ -673,6 +678,7 @@ static void update_memslots(struct kvm_memslots *slots,
        if (!new->npages) {
                WARN_ON(!mslots[i].npages);
                new->base_gfn = 0;
+                new->flags = 0;
                if (mslots[i].npages)
                        slots->used_slots--;
        } else {
@@ -993,6 +999,86 @@ out:
 }
 EXPORT_SYMBOL_GPL(kvm_get_dirty_log);
+#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
+/**
+ * kvm_get_dirty_log_protect - get a snapshot of dirty pages, and if any pages
+ *      are dirty write protect them for next write.
+ * @kvm:        pointer to kvm instance
+ * @log:        slot id and address to which we copy the log
+ * @is_dirty:   flag set if any page is dirty
+ *
+ * We need to keep it in mind that VCPU threads can write to the bitmap
+ * concurrently. So, to avoid losing track of dirty pages we keep the
+ * following order:
+ *
+ *    1. Take a snapshot of the bit and clear it if needed.
+ *    2. Write protect the corresponding page.
+ *    3. Copy the snapshot to the userspace.
+ *    4. Upon return caller flushes TLB's if needed.
+ *
+ * Between 2 and 4, the guest may write to the page using the remaining TLB
+ * entry.  This is not a problem because the page is reported dirty using
+ * the snapshot taken before and step 4 ensures that writes done after
+ * exiting to userspace will be logged for the next call.
+ *
+ */
+int kvm_get_dirty_log_protect(struct kvm *kvm,
+                        struct kvm_dirty_log *log, bool *is_dirty)
+{
+        struct kvm_memory_slot *memslot;
+        int r, i;
+        unsigned long n;
+        unsigned long *dirty_bitmap;
+        unsigned long *dirty_bitmap_buffer;
+        r = -EINVAL;
+        if (log->slot >= KVM_USER_MEM_SLOTS)
+                goto out;
+        memslot = id_to_memslot(kvm->memslots, log->slot);
+        dirty_bitmap = memslot->dirty_bitmap;
+        r = -ENOENT;
+        if (!dirty_bitmap)
+                goto out;
+        n = kvm_dirty_bitmap_bytes(memslot);
+        dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
+        memset(dirty_bitmap_buffer, 0, n);
+        spin_lock(&kvm->mmu_lock);
+        *is_dirty = false;
+        for (i = 0; i < n / sizeof(long); i++) {
+                unsigned long mask;
+                gfn_t offset;
+                if (!dirty_bitmap[i])
+                        continue;
+                *is_dirty = true;
+                mask = xchg(&dirty_bitmap[i], 0);
+                dirty_bitmap_buffer[i] = mask;
+                offset = i * BITS_PER_LONG;
+                kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, offset,
+                                                                mask);
+        }
+        spin_unlock(&kvm->mmu_lock);
+        r = -EFAULT;
+        if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
+                goto out;
+        r = 0;
+out:
+        return r;
+}
+EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect);
+#endif
 bool kvm_largepages_enabled(void)
 {
        return largepages_enabled;
@@ -1551,6 +1637,7 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
        }
        return 0;
 }
+EXPORT_SYMBOL_GPL(kvm_write_guest);
 int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
                              gpa_t gpa, unsigned long len)
@@ -1687,29 +1774,60 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
 }
 EXPORT_SYMBOL_GPL(mark_page_dirty);
+static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
+{
+        if (kvm_arch_vcpu_runnable(vcpu)) {
+                kvm_make_request(KVM_REQ_UNHALT, vcpu);
+                return -EINTR;
+        }
+        if (kvm_cpu_has_pending_timer(vcpu))
+                return -EINTR;
+        if (signal_pending(current))
+                return -EINTR;
+        return 0;
+}
 /*
 * The vCPU has executed a HLT instruction with in-kernel mode enabled.
 */
 void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 {
+        ktime_t start, cur;
        DEFINE_WAIT(wait);
+        bool waited = false;
+        start = cur = ktime_get();
+        if (halt_poll_ns) {
+                ktime_t stop = ktime_add_ns(ktime_get(), halt_poll_ns);
+                do {
+                        /*
+                         * This sets KVM_REQ_UNHALT if an interrupt
+                         * arrives.
+                         */
+                        if (kvm_vcpu_check_block(vcpu) < 0) {
+                                ++vcpu->stat.halt_successful_poll;
+                                goto out;
+                        }
+                        cur = ktime_get();
+                } while (single_task_running() && ktime_before(cur, stop));
+        }
        for (;;) {
                prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
-                if (kvm_arch_vcpu_runnable(vcpu)) {
+                if (kvm_vcpu_check_block(vcpu) < 0)
-                        kvm_make_request(KVM_REQ_UNHALT, vcpu);
-                        break;
-                }
-                if (kvm_cpu_has_pending_timer(vcpu))
-                        break;
-                if (signal_pending(current))
                        break;
+                waited = true;
                schedule();
        }
        finish_wait(&vcpu->wq, &wait);
+        cur = ktime_get();
+out:
+        trace_kvm_vcpu_wakeup(ktime_to_ns(cur) - ktime_to_ns(start), waited);
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_block);
@@ -1892,7 +2010,7 @@ static int kvm_vcpu_release(struct inode *inode, struct file *filp)
 static struct file_operations kvm_vcpu_fops = {
        .release        = kvm_vcpu_release,
        .unlocked_ioctl = kvm_vcpu_ioctl,
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_KVM_COMPAT
        .compat_ioctl   = kvm_vcpu_compat_ioctl,
 #endif
        .mmap           = kvm_vcpu_mmap,
@@ -2182,7 +2300,7 @@ out:
        return r;
 }
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_KVM_COMPAT
 static long kvm_vcpu_compat_ioctl(struct file *filp,
                                  unsigned int ioctl, unsigned long arg)
 {
@@ -2274,7 +2392,7 @@ static int kvm_device_release(struct inode *inode, struct file *filp)
 static const struct file_operations kvm_device_fops = {
        .unlocked_ioctl = kvm_device_ioctl,
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_KVM_COMPAT
        .compat_ioctl = kvm_device_ioctl,
 #endif
        .release = kvm_device_release,
@@ -2561,7 +2679,7 @@ out:
        return r;
 }
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_KVM_COMPAT
 struct compat_kvm_dirty_log {
        __u32 slot;
        __u32 padding1;
@@ -2608,7 +2726,7 @@ out:
 static struct file_operations kvm_vm_fops = {
        .release        = kvm_vm_release,
        .unlocked_ioctl = kvm_vm_ioctl,
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_KVM_COMPAT
        .compat_ioctl   = kvm_vm_compat_ioctl,
 #endif
        .llseek         = noop_llseek,

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 458b9b14b15c..a1093700f3a4 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c
@@ -66,6 +66,9 @@
66	MODULE_AUTHOR("Qumranet");	66	MODULE_AUTHOR("Qumranet");
67	MODULE_LICENSE("GPL");	67	MODULE_LICENSE("GPL");
68		68
		69	unsigned int halt_poll_ns = 0;
		70	module_param(halt_poll_ns, uint, S_IRUGO \| S_IWUSR);
		71
69	/*	72	/*
70	* Ordering of locks:	73	* Ordering of locks:
71	*	74	*
@@ -89,7 +92,7 @@ struct dentry *kvm_debugfs_dir;
89		92
90	static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,	93	static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
91	unsigned long arg);	94	unsigned long arg);
92	#ifdef CONFIG_COMPAT	95	#ifdef CONFIG_KVM_COMPAT
93	static long kvm_vcpu_compat_ioctl(struct file *file, unsigned int ioctl,	96	static long kvm_vcpu_compat_ioctl(struct file *file, unsigned int ioctl,
94	unsigned long arg);	97	unsigned long arg);
95	#endif	98	#endif
@@ -176,6 +179,7 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
176	return called;	179	return called;
177	}	180	}
178		181
		182	#ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
179	void kvm_flush_remote_tlbs(struct kvm *kvm)	183	void kvm_flush_remote_tlbs(struct kvm *kvm)
180	{	184	{
181	long dirty_count = kvm->tlbs_dirty;	185	long dirty_count = kvm->tlbs_dirty;
@@ -186,6 +190,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
186	cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);	190	cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
187	}	191	}
188	EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);	192	EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
		193	#endif
189		194
190	void kvm_reload_remote_mmus(struct kvm *kvm)	195	void kvm_reload_remote_mmus(struct kvm *kvm)
191	{	196	{
@@ -673,6 +678,7 @@ static void update_memslots(struct kvm_memslots *slots,
673	if (!new->npages) {	678	if (!new->npages) {
674	WARN_ON(!mslots[i].npages);	679	WARN_ON(!mslots[i].npages);
675	new->base_gfn = 0;	680	new->base_gfn = 0;
		681	new->flags = 0;
676	if (mslots[i].npages)	682	if (mslots[i].npages)
677	slots->used_slots--;	683	slots->used_slots--;
678	} else {	684	} else {
@@ -993,6 +999,86 @@ out:
993	}	999	}
994	EXPORT_SYMBOL_GPL(kvm_get_dirty_log);	1000	EXPORT_SYMBOL_GPL(kvm_get_dirty_log);
995		1001
		1002	#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
		1003	/**
		1004	* kvm_get_dirty_log_protect - get a snapshot of dirty pages, and if any pages
		1005	* are dirty write protect them for next write.
		1006	* @kvm: pointer to kvm instance
		1007	* @log: slot id and address to which we copy the log
		1008	* @is_dirty: flag set if any page is dirty
		1009	*
		1010	* We need to keep it in mind that VCPU threads can write to the bitmap
		1011	* concurrently. So, to avoid losing track of dirty pages we keep the
		1012	* following order:
		1013	*
		1014	* 1. Take a snapshot of the bit and clear it if needed.
		1015	* 2. Write protect the corresponding page.
		1016	* 3. Copy the snapshot to the userspace.
		1017	* 4. Upon return caller flushes TLB's if needed.
		1018	*
		1019	* Between 2 and 4, the guest may write to the page using the remaining TLB
		1020	* entry. This is not a problem because the page is reported dirty using
		1021	* the snapshot taken before and step 4 ensures that writes done after
		1022	* exiting to userspace will be logged for the next call.
		1023	*
		1024	*/
		1025	int kvm_get_dirty_log_protect(struct kvm *kvm,
		1026	struct kvm_dirty_log log, bool is_dirty)
		1027	{
		1028	struct kvm_memory_slot *memslot;
		1029	int r, i;
		1030	unsigned long n;
		1031	unsigned long *dirty_bitmap;
		1032	unsigned long *dirty_bitmap_buffer;
		1033
		1034	r = -EINVAL;
		1035	if (log->slot >= KVM_USER_MEM_SLOTS)
		1036	goto out;
		1037
		1038	memslot = id_to_memslot(kvm->memslots, log->slot);
		1039
		1040	dirty_bitmap = memslot->dirty_bitmap;
		1041	r = -ENOENT;
		1042	if (!dirty_bitmap)
		1043	goto out;
		1044
		1045	n = kvm_dirty_bitmap_bytes(memslot);
		1046
		1047	dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
		1048	memset(dirty_bitmap_buffer, 0, n);
		1049
		1050	spin_lock(&kvm->mmu_lock);
		1051	*is_dirty = false;
		1052	for (i = 0; i < n / sizeof(long); i++) {
		1053	unsigned long mask;
		1054	gfn_t offset;
		1055
		1056	if (!dirty_bitmap[i])
		1057	continue;
		1058
		1059	*is_dirty = true;
		1060
		1061	mask = xchg(&dirty_bitmap[i], 0);
		1062	dirty_bitmap_buffer[i] = mask;
		1063
		1064	offset = i * BITS_PER_LONG;
		1065	kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, offset,
		1066	mask);
		1067	}
		1068
		1069	spin_unlock(&kvm->mmu_lock);
		1070
		1071	r = -EFAULT;
		1072	if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
		1073	goto out;
		1074
		1075	r = 0;
		1076	out:
		1077	return r;
		1078	}
		1079	EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect);
		1080	#endif
		1081
996	bool kvm_largepages_enabled(void)	1082	bool kvm_largepages_enabled(void)
997	{	1083	{
998	return largepages_enabled;	1084	return largepages_enabled;
@@ -1551,6 +1637,7 @@ int kvm_write_guest(struct kvm kvm, gpa_t gpa, const void data,
1551	}	1637	}
1552	return 0;	1638	return 0;
1553	}	1639	}
		1640	EXPORT_SYMBOL_GPL(kvm_write_guest);
1554		1641
1555	int kvm_gfn_to_hva_cache_init(struct kvm kvm, struct gfn_to_hva_cache ghc,	1642	int kvm_gfn_to_hva_cache_init(struct kvm kvm, struct gfn_to_hva_cache ghc,
1556	gpa_t gpa, unsigned long len)	1643	gpa_t gpa, unsigned long len)
@@ -1687,29 +1774,60 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
1687	}	1774	}
1688	EXPORT_SYMBOL_GPL(mark_page_dirty);	1775	EXPORT_SYMBOL_GPL(mark_page_dirty);
1689		1776
		1777	static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
		1778	{
		1779	if (kvm_arch_vcpu_runnable(vcpu)) {
		1780	kvm_make_request(KVM_REQ_UNHALT, vcpu);
		1781	return -EINTR;
		1782	}
		1783	if (kvm_cpu_has_pending_timer(vcpu))
		1784	return -EINTR;
		1785	if (signal_pending(current))
		1786	return -EINTR;
		1787
		1788	return 0;
		1789	}
		1790
1690	/*	1791	/*
1691	* The vCPU has executed a HLT instruction with in-kernel mode enabled.	1792	* The vCPU has executed a HLT instruction with in-kernel mode enabled.
1692	*/	1793	*/
1693	void kvm_vcpu_block(struct kvm_vcpu *vcpu)	1794	void kvm_vcpu_block(struct kvm_vcpu *vcpu)
1694	{	1795	{
		1796	ktime_t start, cur;
1695	DEFINE_WAIT(wait);	1797	DEFINE_WAIT(wait);
		1798	bool waited = false;
		1799
		1800	start = cur = ktime_get();
		1801	if (halt_poll_ns) {
		1802	ktime_t stop = ktime_add_ns(ktime_get(), halt_poll_ns);
		1803	do {
		1804	/*
		1805	* This sets KVM_REQ_UNHALT if an interrupt
		1806	* arrives.
		1807	*/
		1808	if (kvm_vcpu_check_block(vcpu) < 0) {
		1809	++vcpu->stat.halt_successful_poll;
		1810	goto out;
		1811	}
		1812	cur = ktime_get();
		1813	} while (single_task_running() && ktime_before(cur, stop));
		1814	}
1696		1815
1697	for (;;) {	1816	for (;;) {
1698	prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);	1817	prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
1699		1818
1700	if (kvm_arch_vcpu_runnable(vcpu)) {	1819	if (kvm_vcpu_check_block(vcpu) < 0)
1701	kvm_make_request(KVM_REQ_UNHALT, vcpu);
1702	break;
1703	}
1704	if (kvm_cpu_has_pending_timer(vcpu))
1705	break;
1706	if (signal_pending(current))
1707	break;	1820	break;
1708		1821
		1822	waited = true;
1709	schedule();	1823	schedule();
1710	}	1824	}
1711		1825
1712	finish_wait(&vcpu->wq, &wait);	1826	finish_wait(&vcpu->wq, &wait);
		1827	cur = ktime_get();
		1828
		1829	out:
		1830	trace_kvm_vcpu_wakeup(ktime_to_ns(cur) - ktime_to_ns(start), waited);
1713	}	1831	}
1714	EXPORT_SYMBOL_GPL(kvm_vcpu_block);	1832	EXPORT_SYMBOL_GPL(kvm_vcpu_block);
1715		1833
@@ -1892,7 +2010,7 @@ static int kvm_vcpu_release(struct inode inode, struct file filp)
1892	static struct file_operations kvm_vcpu_fops = {	2010	static struct file_operations kvm_vcpu_fops = {
1893	.release = kvm_vcpu_release,	2011	.release = kvm_vcpu_release,
1894	.unlocked_ioctl = kvm_vcpu_ioctl,	2012	.unlocked_ioctl = kvm_vcpu_ioctl,
1895	#ifdef CONFIG_COMPAT	2013	#ifdef CONFIG_KVM_COMPAT
1896	.compat_ioctl = kvm_vcpu_compat_ioctl,	2014	.compat_ioctl = kvm_vcpu_compat_ioctl,
1897	#endif	2015	#endif
1898	.mmap = kvm_vcpu_mmap,	2016	.mmap = kvm_vcpu_mmap,
@@ -2182,7 +2300,7 @@ out:
2182	return r;	2300	return r;
2183	}	2301	}
2184		2302
2185	#ifdef CONFIG_COMPAT	2303	#ifdef CONFIG_KVM_COMPAT
2186	static long kvm_vcpu_compat_ioctl(struct file *filp,	2304	static long kvm_vcpu_compat_ioctl(struct file *filp,
2187	unsigned int ioctl, unsigned long arg)	2305	unsigned int ioctl, unsigned long arg)
2188	{	2306	{
@@ -2274,7 +2392,7 @@ static int kvm_device_release(struct inode inode, struct file filp)
2274		2392
2275	static const struct file_operations kvm_device_fops = {	2393	static const struct file_operations kvm_device_fops = {
2276	.unlocked_ioctl = kvm_device_ioctl,	2394	.unlocked_ioctl = kvm_device_ioctl,
2277	#ifdef CONFIG_COMPAT	2395	#ifdef CONFIG_KVM_COMPAT
2278	.compat_ioctl = kvm_device_ioctl,	2396	.compat_ioctl = kvm_device_ioctl,
2279	#endif	2397	#endif
2280	.release = kvm_device_release,	2398	.release = kvm_device_release,
@@ -2561,7 +2679,7 @@ out:
2561	return r;	2679	return r;
2562	}	2680	}
2563		2681
2564	#ifdef CONFIG_COMPAT	2682	#ifdef CONFIG_KVM_COMPAT
2565	struct compat_kvm_dirty_log {	2683	struct compat_kvm_dirty_log {
2566	__u32 slot;	2684	__u32 slot;
2567	__u32 padding1;	2685	__u32 padding1;
@@ -2608,7 +2726,7 @@ out:
2608	static struct file_operations kvm_vm_fops = {	2726	static struct file_operations kvm_vm_fops = {
2609	.release = kvm_vm_release,	2727	.release = kvm_vm_release,
2610	.unlocked_ioctl = kvm_vm_ioctl,	2728	.unlocked_ioctl = kvm_vm_ioctl,
2611	#ifdef CONFIG_COMPAT	2729	#ifdef CONFIG_KVM_COMPAT
2612	.compat_ioctl = kvm_vm_compat_ioctl,	2730	.compat_ioctl = kvm_vm_compat_ioctl,
2613	#endif	2731	#endif
2614	.llseek = noop_llseek,	2732	.llseek = noop_llseek,