KVM: Refactor hypercall infrastructure (v3)

This patch refactors the current hypercall infrastructure to better support live migration and SMP. It eliminates the hypercall page by trapping the UD exception that would occur if you used the wrong hypercall instruction for the underlying architecture and replacing it with the right one lazily. A fall-out of this patch is that the unhandled hypercalls no longer trap to userspace. There is very little reason though to use a hypercall to communicate with userspace as PIO or MMIO can be used. There is no code in tree that uses userspace hypercalls. [avi: fix #ud injection on vmx] Signed-off-by: Anthony Liguori <aliguori@us.ibm.com> Signed-off-by: Avi Kivity <avi@qumranet.com>
author: Anthony Liguori <aliguori@us.ibm.com> 2007-09-17 15:57:50 -0400
committer: Avi Kivity <avi@qumranet.com> 2008-01-30 10:52:46 -0500
commit: 7aa81cc04781b5b99a0647ec04533599d78cd219 (patch)
tree: 6ac8854faf3db2bc499e2c105fdfdab95df52170 /drivers/kvm/kvm_main.c
parent: aca7f96600b170e470b3056aba0ed8d7df8d330d (diff)
1 files changed, 46 insertions, 110 deletions
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index c0f372f1d761..1c662f63b7a9 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -39,6 +39,7 @@
 #include <linux/smp.h>
 #include <linux/anon_inodes.h>
 #include <linux/profile.h>
+#include <linux/kvm_para.h>
 #include <asm/processor.h>
 #include <asm/msr.h>
@@ -1362,51 +1363,61 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_halt);
-int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run)
+int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 {
-        unsigned long nr, a0, a1, a2, a3, a4, a5, ret;
+        unsigned long nr, a0, a1, a2, a3, ret;
        kvm_x86_ops->cache_regs(vcpu);
-        ret = -KVM_EINVAL;
-#ifdef CONFIG_X86_64
+        nr = vcpu->regs[VCPU_REGS_RAX];
-        if (is_long_mode(vcpu)) {
+        a0 = vcpu->regs[VCPU_REGS_RBX];
-                nr = vcpu->regs[VCPU_REGS_RAX];
+        a1 = vcpu->regs[VCPU_REGS_RCX];
-                a0 = vcpu->regs[VCPU_REGS_RDI];
+        a2 = vcpu->regs[VCPU_REGS_RDX];
-                a1 = vcpu->regs[VCPU_REGS_RSI];
+        a3 = vcpu->regs[VCPU_REGS_RSI];
-                a2 = vcpu->regs[VCPU_REGS_RDX];
-                a3 = vcpu->regs[VCPU_REGS_RCX];
+        if (!is_long_mode(vcpu)) {
-                a4 = vcpu->regs[VCPU_REGS_R8];
+                nr &= 0xFFFFFFFF;
-                a5 = vcpu->regs[VCPU_REGS_R9];
+                a0 &= 0xFFFFFFFF;
-        } else
+                a1 &= 0xFFFFFFFF;
-#endif
+                a2 &= 0xFFFFFFFF;
-        {
+                a3 &= 0xFFFFFFFF;
-                nr = vcpu->regs[VCPU_REGS_RBX] & -1u;
-                a0 = vcpu->regs[VCPU_REGS_RAX] & -1u;
-                a1 = vcpu->regs[VCPU_REGS_RCX] & -1u;
-                a2 = vcpu->regs[VCPU_REGS_RDX] & -1u;
-                a3 = vcpu->regs[VCPU_REGS_RSI] & -1u;
-                a4 = vcpu->regs[VCPU_REGS_RDI] & -1u;
-                a5 = vcpu->regs[VCPU_REGS_RBP] & -1u;
        }
        switch (nr) {
        default:
-                run->hypercall.nr = nr;
+                ret = -KVM_ENOSYS;
-                run->hypercall.args[0] = a0;
+                break;
-                run->hypercall.args[1] = a1;
-                run->hypercall.args[2] = a2;
-                run->hypercall.args[3] = a3;
-                run->hypercall.args[4] = a4;
-                run->hypercall.args[5] = a5;
-                run->hypercall.ret = ret;
-                run->hypercall.longmode = is_long_mode(vcpu);
-                kvm_x86_ops->decache_regs(vcpu);
-                return 0;
        }
        vcpu->regs[VCPU_REGS_RAX] = ret;
        kvm_x86_ops->decache_regs(vcpu);
-        return 1;
+        return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
+int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
+{
+        char instruction[3];
+        int ret = 0;
+        mutex_lock(&vcpu->kvm->lock);
+        /*
+         * Blow out the MMU to ensure that no other VCPU has an active mapping
+         * to ensure that the updated hypercall appears atomically across all
+         * VCPUs.
+         */
+        kvm_mmu_zap_all(vcpu->kvm);
+        kvm_x86_ops->cache_regs(vcpu);
+        kvm_x86_ops->patch_hypercall(vcpu, instruction);
+        if (emulator_write_emulated(vcpu->rip, instruction, 3, vcpu)
+            != X86EMUL_CONTINUE)
+                ret = -EFAULT;
+        mutex_unlock(&vcpu->kvm->lock);
+        return ret;
 }
-EXPORT_SYMBOL_GPL(kvm_hypercall);
 static u64 mk_cr_64(u64 curr_cr, u32 new_val)
 {
@@ -1474,75 +1485,6 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
        }
 }
-/*
- * Register the para guest with the host:
- */
-static int vcpu_register_para(struct kvm_vcpu *vcpu, gpa_t para_state_gpa)
-{
-        struct kvm_vcpu_para_state *para_state;
-        hpa_t para_state_hpa, hypercall_hpa;
-        struct page *para_state_page;
-        unsigned char *hypercall;
-        gpa_t hypercall_gpa;
-        printk(KERN_DEBUG "kvm: guest trying to enter paravirtual mode\n");
-        printk(KERN_DEBUG ".... para_state_gpa: %08Lx\n", para_state_gpa);
-        /*
-         * Needs to be page aligned:
-         */
-        if (para_state_gpa != PAGE_ALIGN(para_state_gpa))
-                goto err_gp;
-        para_state_hpa = gpa_to_hpa(vcpu, para_state_gpa);
-        printk(KERN_DEBUG ".... para_state_hpa: %08Lx\n", para_state_hpa);
-        if (is_error_hpa(para_state_hpa))
-                goto err_gp;
-        mark_page_dirty(vcpu->kvm, para_state_gpa >> PAGE_SHIFT);
-        para_state_page = pfn_to_page(para_state_hpa >> PAGE_SHIFT);
-        para_state = kmap(para_state_page);
-        printk(KERN_DEBUG "....  guest version: %d\n", para_state->guest_version);
-        printk(KERN_DEBUG "....           size: %d\n", para_state->size);
-        para_state->host_version = KVM_PARA_API_VERSION;
-        /*
-         * We cannot support guests that try to register themselves
-         * with a newer API version than the host supports:
-         */
-        if (para_state->guest_version > KVM_PARA_API_VERSION) {
-                para_state->ret = -KVM_EINVAL;
-                goto err_kunmap_skip;
-        }
-        hypercall_gpa = para_state->hypercall_gpa;
-        hypercall_hpa = gpa_to_hpa(vcpu, hypercall_gpa);
-        printk(KERN_DEBUG ".... hypercall_hpa: %08Lx\n", hypercall_hpa);
-        if (is_error_hpa(hypercall_hpa)) {
-                para_state->ret = -KVM_EINVAL;
-                goto err_kunmap_skip;
-        }
-        printk(KERN_DEBUG "kvm: para guest successfully registered.\n");
-        vcpu->para_state_page = para_state_page;
-        vcpu->para_state_gpa = para_state_gpa;
-        vcpu->hypercall_gpa = hypercall_gpa;
-        mark_page_dirty(vcpu->kvm, hypercall_gpa >> PAGE_SHIFT);
-        hypercall = kmap_atomic(pfn_to_page(hypercall_hpa >> PAGE_SHIFT),
-                                KM_USER1) + (hypercall_hpa & ~PAGE_MASK);
-        kvm_x86_ops->patch_hypercall(vcpu, hypercall);
-        kunmap_atomic(hypercall, KM_USER1);
-        para_state->ret = 0;
-err_kunmap_skip:
-        kunmap(para_state_page);
-        return 0;
-err_gp:
-        return 1;
-}
 int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 {
        u64 data;
@@ -1656,12 +1598,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
        case MSR_IA32_MISC_ENABLE:
                vcpu->ia32_misc_enable_msr = data;
                break;
-        /*
-         * This is the 'probe whether the host is KVM' logic:
-         */
-        case MSR_KVM_API_MAGIC:
-                return vcpu_register_para(vcpu, data);
        default:
                pr_unimpl(vcpu, "unhandled wrmsr: 0x%x\n", msr);
                return 1;
author	Anthony Liguori <aliguori@us.ibm.com>	2007-09-17 15:57:50 -0400
committer	Avi Kivity <avi@qumranet.com>	2008-01-30 10:52:46 -0500
commit	7aa81cc04781b5b99a0647ec04533599d78cd219 (patch)
tree	6ac8854faf3db2bc499e2c105fdfdab95df52170 /drivers/kvm/kvm_main.c
parent	aca7f96600b170e470b3056aba0ed8d7df8d330d (diff)

diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index c0f372f1d761..1c662f63b7a9 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c
@@ -39,6 +39,7 @@
39	#include <linux/smp.h>	39	#include <linux/smp.h>
40	#include <linux/anon_inodes.h>	40	#include <linux/anon_inodes.h>
41	#include <linux/profile.h>	41	#include <linux/profile.h>
		42	#include <linux/kvm_para.h>
42		43
43	#include <asm/processor.h>	44	#include <asm/processor.h>
44	#include <asm/msr.h>	45	#include <asm/msr.h>
@@ -1362,51 +1363,61 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
1362	}	1363	}
1363	EXPORT_SYMBOL_GPL(kvm_emulate_halt);	1364	EXPORT_SYMBOL_GPL(kvm_emulate_halt);
1364		1365
1365	int kvm_hypercall(struct kvm_vcpu vcpu, struct kvm_run run)	1366	int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
1366	{	1367	{
1367	unsigned long nr, a0, a1, a2, a3, a4, a5, ret;	1368	unsigned long nr, a0, a1, a2, a3, ret;
1368		1369
1369	kvm_x86_ops->cache_regs(vcpu);	1370	kvm_x86_ops->cache_regs(vcpu);
1370	ret = -KVM_EINVAL;	1371
1371	#ifdef CONFIG_X86_64	1372	nr = vcpu->regs[VCPU_REGS_RAX];
1372	if (is_long_mode(vcpu)) {	1373	a0 = vcpu->regs[VCPU_REGS_RBX];
1373	nr = vcpu->regs[VCPU_REGS_RAX];	1374	a1 = vcpu->regs[VCPU_REGS_RCX];
1374	a0 = vcpu->regs[VCPU_REGS_RDI];	1375	a2 = vcpu->regs[VCPU_REGS_RDX];
1375	a1 = vcpu->regs[VCPU_REGS_RSI];	1376	a3 = vcpu->regs[VCPU_REGS_RSI];
1376	a2 = vcpu->regs[VCPU_REGS_RDX];	1377
1377	a3 = vcpu->regs[VCPU_REGS_RCX];	1378	if (!is_long_mode(vcpu)) {
1378	a4 = vcpu->regs[VCPU_REGS_R8];	1379	nr &= 0xFFFFFFFF;
1379	a5 = vcpu->regs[VCPU_REGS_R9];	1380	a0 &= 0xFFFFFFFF;
1380	} else	1381	a1 &= 0xFFFFFFFF;
1381	#endif	1382	a2 &= 0xFFFFFFFF;
1382	{	1383	a3 &= 0xFFFFFFFF;
1383	nr = vcpu->regs[VCPU_REGS_RBX] & -1u;
1384	a0 = vcpu->regs[VCPU_REGS_RAX] & -1u;
1385	a1 = vcpu->regs[VCPU_REGS_RCX] & -1u;
1386	a2 = vcpu->regs[VCPU_REGS_RDX] & -1u;
1387	a3 = vcpu->regs[VCPU_REGS_RSI] & -1u;
1388	a4 = vcpu->regs[VCPU_REGS_RDI] & -1u;
1389	a5 = vcpu->regs[VCPU_REGS_RBP] & -1u;
1390	}	1384	}
		1385
1391	switch (nr) {	1386	switch (nr) {
1392	default:	1387	default:
1393	run->hypercall.nr = nr;	1388	ret = -KVM_ENOSYS;
1394	run->hypercall.args[0] = a0;	1389	break;
1395	run->hypercall.args[1] = a1;
1396	run->hypercall.args[2] = a2;
1397	run->hypercall.args[3] = a3;
1398	run->hypercall.args[4] = a4;
1399	run->hypercall.args[5] = a5;
1400	run->hypercall.ret = ret;
1401	run->hypercall.longmode = is_long_mode(vcpu);
1402	kvm_x86_ops->decache_regs(vcpu);
1403	return 0;
1404	}	1390	}
1405	vcpu->regs[VCPU_REGS_RAX] = ret;	1391	vcpu->regs[VCPU_REGS_RAX] = ret;
1406	kvm_x86_ops->decache_regs(vcpu);	1392	kvm_x86_ops->decache_regs(vcpu);
1407	return 1;	1393	return 0;
		1394	}
		1395	EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
		1396
		1397	int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
		1398	{
		1399	char instruction[3];
		1400	int ret = 0;
		1401
		1402	mutex_lock(&vcpu->kvm->lock);
		1403
		1404	/*
		1405	* Blow out the MMU to ensure that no other VCPU has an active mapping
		1406	* to ensure that the updated hypercall appears atomically across all
		1407	* VCPUs.
		1408	*/
		1409	kvm_mmu_zap_all(vcpu->kvm);
		1410
		1411	kvm_x86_ops->cache_regs(vcpu);
		1412	kvm_x86_ops->patch_hypercall(vcpu, instruction);
		1413	if (emulator_write_emulated(vcpu->rip, instruction, 3, vcpu)
		1414	!= X86EMUL_CONTINUE)
		1415	ret = -EFAULT;
		1416
		1417	mutex_unlock(&vcpu->kvm->lock);
		1418
		1419	return ret;
1408	}	1420	}
1409	EXPORT_SYMBOL_GPL(kvm_hypercall);
1410		1421
1411	static u64 mk_cr_64(u64 curr_cr, u32 new_val)	1422	static u64 mk_cr_64(u64 curr_cr, u32 new_val)
1412	{	1423	{
@@ -1474,75 +1485,6 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
1474	}	1485	}
1475	}	1486	}
1476		1487
1477	/*
1478	* Register the para guest with the host:
1479	*/
1480	static int vcpu_register_para(struct kvm_vcpu *vcpu, gpa_t para_state_gpa)
1481	{
1482	struct kvm_vcpu_para_state *para_state;
1483	hpa_t para_state_hpa, hypercall_hpa;
1484	struct page *para_state_page;
1485	unsigned char *hypercall;
1486	gpa_t hypercall_gpa;
1487
1488	printk(KERN_DEBUG "kvm: guest trying to enter paravirtual mode\n");
1489	printk(KERN_DEBUG ".... para_state_gpa: %08Lx\n", para_state_gpa);
1490
1491	/*
1492	* Needs to be page aligned:
1493	*/
1494	if (para_state_gpa != PAGE_ALIGN(para_state_gpa))
1495	goto err_gp;
1496
1497	para_state_hpa = gpa_to_hpa(vcpu, para_state_gpa);
1498	printk(KERN_DEBUG ".... para_state_hpa: %08Lx\n", para_state_hpa);
1499	if (is_error_hpa(para_state_hpa))
1500	goto err_gp;
1501
1502	mark_page_dirty(vcpu->kvm, para_state_gpa >> PAGE_SHIFT);
1503	para_state_page = pfn_to_page(para_state_hpa >> PAGE_SHIFT);
1504	para_state = kmap(para_state_page);
1505
1506	printk(KERN_DEBUG ".... guest version: %d\n", para_state->guest_version);
1507	printk(KERN_DEBUG ".... size: %d\n", para_state->size);
1508
1509	para_state->host_version = KVM_PARA_API_VERSION;
1510	/*
1511	* We cannot support guests that try to register themselves
1512	* with a newer API version than the host supports:
1513	*/
1514	if (para_state->guest_version > KVM_PARA_API_VERSION) {
1515	para_state->ret = -KVM_EINVAL;
1516	goto err_kunmap_skip;
1517	}
1518
1519	hypercall_gpa = para_state->hypercall_gpa;
1520	hypercall_hpa = gpa_to_hpa(vcpu, hypercall_gpa);
1521	printk(KERN_DEBUG ".... hypercall_hpa: %08Lx\n", hypercall_hpa);
1522	if (is_error_hpa(hypercall_hpa)) {
1523	para_state->ret = -KVM_EINVAL;
1524	goto err_kunmap_skip;
1525	}
1526
1527	printk(KERN_DEBUG "kvm: para guest successfully registered.\n");
1528	vcpu->para_state_page = para_state_page;
1529	vcpu->para_state_gpa = para_state_gpa;
1530	vcpu->hypercall_gpa = hypercall_gpa;
1531
1532	mark_page_dirty(vcpu->kvm, hypercall_gpa >> PAGE_SHIFT);
1533	hypercall = kmap_atomic(pfn_to_page(hypercall_hpa >> PAGE_SHIFT),
1534	KM_USER1) + (hypercall_hpa & ~PAGE_MASK);
1535	kvm_x86_ops->patch_hypercall(vcpu, hypercall);
1536	kunmap_atomic(hypercall, KM_USER1);
1537
1538	para_state->ret = 0;
1539	err_kunmap_skip:
1540	kunmap(para_state_page);
1541	return 0;
1542	err_gp:
1543	return 1;
1544	}
1545
1546	int kvm_get_msr_common(struct kvm_vcpu vcpu, u32 msr, u64 pdata)	1488	int kvm_get_msr_common(struct kvm_vcpu vcpu, u32 msr, u64 pdata)
1547	{	1489	{
1548	u64 data;	1490	u64 data;
@@ -1656,12 +1598,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1656	case MSR_IA32_MISC_ENABLE:	1598	case MSR_IA32_MISC_ENABLE:
1657	vcpu->ia32_misc_enable_msr = data;	1599	vcpu->ia32_misc_enable_msr = data;
1658	break;	1600	break;
1659	/*
1660	* This is the 'probe whether the host is KVM' logic:
1661	*/
1662	case MSR_KVM_API_MAGIC:
1663	return vcpu_register_para(vcpu, data);
1664
1665	default:	1601	default:
1666	pr_unimpl(vcpu, "unhandled wrmsr: 0x%x\n", msr);	1602	pr_unimpl(vcpu, "unhandled wrmsr: 0x%x\n", msr);
1667	return 1;	1603	return 1;