aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorAnthony Liguori <aliguori@us.ibm.com>2007-09-17 15:57:50 -0400
committerAvi Kivity <avi@qumranet.com>2008-01-30 10:52:46 -0500
commit7aa81cc04781b5b99a0647ec04533599d78cd219 (patch)
tree6ac8854faf3db2bc499e2c105fdfdab95df52170 /drivers
parentaca7f96600b170e470b3056aba0ed8d7df8d330d (diff)
KVM: Refactor hypercall infrastructure (v3)
This patch refactors the current hypercall infrastructure to better support live migration and SMP. It eliminates the hypercall page by trapping the UD exception that would occur if you used the wrong hypercall instruction for the underlying architecture and replacing it with the right one lazily. A fall-out of this patch is that the unhandled hypercalls no longer trap to userspace. There is very little reason though to use a hypercall to communicate with userspace as PIO or MMIO can be used. There is no code in tree that uses userspace hypercalls. [avi: fix #ud injection on vmx] Signed-off-by: Anthony Liguori <aliguori@us.ibm.com> Signed-off-by: Avi Kivity <avi@qumranet.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/kvm/kvm.h8
-rw-r--r--drivers/kvm/kvm_main.c156
-rw-r--r--drivers/kvm/svm.c19
-rw-r--r--drivers/kvm/vmx.c29
-rw-r--r--drivers/kvm/x86_emulate.c11
5 files changed, 101 insertions, 122 deletions
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 3b0bc4bda5f2..da9c3aa1c08c 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -46,6 +46,7 @@
46#define KVM_MAX_CPUID_ENTRIES 40 46#define KVM_MAX_CPUID_ENTRIES 40
47 47
48#define DE_VECTOR 0 48#define DE_VECTOR 0
49#define UD_VECTOR 6
49#define NM_VECTOR 7 50#define NM_VECTOR 7
50#define DF_VECTOR 8 51#define DF_VECTOR 8
51#define TS_VECTOR 10 52#define TS_VECTOR 10
@@ -317,9 +318,6 @@ struct kvm_vcpu {
317 unsigned long cr0; 318 unsigned long cr0;
318 unsigned long cr2; 319 unsigned long cr2;
319 unsigned long cr3; 320 unsigned long cr3;
320 gpa_t para_state_gpa;
321 struct page *para_state_page;
322 gpa_t hypercall_gpa;
323 unsigned long cr4; 321 unsigned long cr4;
324 unsigned long cr8; 322 unsigned long cr8;
325 u64 pdptrs[4]; /* pae */ 323 u64 pdptrs[4]; /* pae */
@@ -622,7 +620,9 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
622int kvm_mmu_load(struct kvm_vcpu *vcpu); 620int kvm_mmu_load(struct kvm_vcpu *vcpu);
623void kvm_mmu_unload(struct kvm_vcpu *vcpu); 621void kvm_mmu_unload(struct kvm_vcpu *vcpu);
624 622
625int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run); 623int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
624
625int kvm_fix_hypercall(struct kvm_vcpu *vcpu);
626 626
627static inline void kvm_guest_enter(void) 627static inline void kvm_guest_enter(void)
628{ 628{
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index c0f372f1d761..1c662f63b7a9 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -39,6 +39,7 @@
39#include <linux/smp.h> 39#include <linux/smp.h>
40#include <linux/anon_inodes.h> 40#include <linux/anon_inodes.h>
41#include <linux/profile.h> 41#include <linux/profile.h>
42#include <linux/kvm_para.h>
42 43
43#include <asm/processor.h> 44#include <asm/processor.h>
44#include <asm/msr.h> 45#include <asm/msr.h>
@@ -1362,51 +1363,61 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
1362} 1363}
1363EXPORT_SYMBOL_GPL(kvm_emulate_halt); 1364EXPORT_SYMBOL_GPL(kvm_emulate_halt);
1364 1365
1365int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run) 1366int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
1366{ 1367{
1367 unsigned long nr, a0, a1, a2, a3, a4, a5, ret; 1368 unsigned long nr, a0, a1, a2, a3, ret;
1368 1369
1369 kvm_x86_ops->cache_regs(vcpu); 1370 kvm_x86_ops->cache_regs(vcpu);
1370 ret = -KVM_EINVAL; 1371
1371#ifdef CONFIG_X86_64 1372 nr = vcpu->regs[VCPU_REGS_RAX];
1372 if (is_long_mode(vcpu)) { 1373 a0 = vcpu->regs[VCPU_REGS_RBX];
1373 nr = vcpu->regs[VCPU_REGS_RAX]; 1374 a1 = vcpu->regs[VCPU_REGS_RCX];
1374 a0 = vcpu->regs[VCPU_REGS_RDI]; 1375 a2 = vcpu->regs[VCPU_REGS_RDX];
1375 a1 = vcpu->regs[VCPU_REGS_RSI]; 1376 a3 = vcpu->regs[VCPU_REGS_RSI];
1376 a2 = vcpu->regs[VCPU_REGS_RDX]; 1377
1377 a3 = vcpu->regs[VCPU_REGS_RCX]; 1378 if (!is_long_mode(vcpu)) {
1378 a4 = vcpu->regs[VCPU_REGS_R8]; 1379 nr &= 0xFFFFFFFF;
1379 a5 = vcpu->regs[VCPU_REGS_R9]; 1380 a0 &= 0xFFFFFFFF;
1380 } else 1381 a1 &= 0xFFFFFFFF;
1381#endif 1382 a2 &= 0xFFFFFFFF;
1382 { 1383 a3 &= 0xFFFFFFFF;
1383 nr = vcpu->regs[VCPU_REGS_RBX] & -1u;
1384 a0 = vcpu->regs[VCPU_REGS_RAX] & -1u;
1385 a1 = vcpu->regs[VCPU_REGS_RCX] & -1u;
1386 a2 = vcpu->regs[VCPU_REGS_RDX] & -1u;
1387 a3 = vcpu->regs[VCPU_REGS_RSI] & -1u;
1388 a4 = vcpu->regs[VCPU_REGS_RDI] & -1u;
1389 a5 = vcpu->regs[VCPU_REGS_RBP] & -1u;
1390 } 1384 }
1385
1391 switch (nr) { 1386 switch (nr) {
1392 default: 1387 default:
1393 run->hypercall.nr = nr; 1388 ret = -KVM_ENOSYS;
1394 run->hypercall.args[0] = a0; 1389 break;
1395 run->hypercall.args[1] = a1;
1396 run->hypercall.args[2] = a2;
1397 run->hypercall.args[3] = a3;
1398 run->hypercall.args[4] = a4;
1399 run->hypercall.args[5] = a5;
1400 run->hypercall.ret = ret;
1401 run->hypercall.longmode = is_long_mode(vcpu);
1402 kvm_x86_ops->decache_regs(vcpu);
1403 return 0;
1404 } 1390 }
1405 vcpu->regs[VCPU_REGS_RAX] = ret; 1391 vcpu->regs[VCPU_REGS_RAX] = ret;
1406 kvm_x86_ops->decache_regs(vcpu); 1392 kvm_x86_ops->decache_regs(vcpu);
1407 return 1; 1393 return 0;
1394}
1395EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
1396
1397int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
1398{
1399 char instruction[3];
1400 int ret = 0;
1401
1402 mutex_lock(&vcpu->kvm->lock);
1403
1404 /*
1405 * Blow out the MMU to ensure that no other VCPU has an active mapping
1406 * to ensure that the updated hypercall appears atomically across all
1407 * VCPUs.
1408 */
1409 kvm_mmu_zap_all(vcpu->kvm);
1410
1411 kvm_x86_ops->cache_regs(vcpu);
1412 kvm_x86_ops->patch_hypercall(vcpu, instruction);
1413 if (emulator_write_emulated(vcpu->rip, instruction, 3, vcpu)
1414 != X86EMUL_CONTINUE)
1415 ret = -EFAULT;
1416
1417 mutex_unlock(&vcpu->kvm->lock);
1418
1419 return ret;
1408} 1420}
1409EXPORT_SYMBOL_GPL(kvm_hypercall);
1410 1421
1411static u64 mk_cr_64(u64 curr_cr, u32 new_val) 1422static u64 mk_cr_64(u64 curr_cr, u32 new_val)
1412{ 1423{
@@ -1474,75 +1485,6 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
1474 } 1485 }
1475} 1486}
1476 1487
1477/*
1478 * Register the para guest with the host:
1479 */
1480static int vcpu_register_para(struct kvm_vcpu *vcpu, gpa_t para_state_gpa)
1481{
1482 struct kvm_vcpu_para_state *para_state;
1483 hpa_t para_state_hpa, hypercall_hpa;
1484 struct page *para_state_page;
1485 unsigned char *hypercall;
1486 gpa_t hypercall_gpa;
1487
1488 printk(KERN_DEBUG "kvm: guest trying to enter paravirtual mode\n");
1489 printk(KERN_DEBUG ".... para_state_gpa: %08Lx\n", para_state_gpa);
1490
1491 /*
1492 * Needs to be page aligned:
1493 */
1494 if (para_state_gpa != PAGE_ALIGN(para_state_gpa))
1495 goto err_gp;
1496
1497 para_state_hpa = gpa_to_hpa(vcpu, para_state_gpa);
1498 printk(KERN_DEBUG ".... para_state_hpa: %08Lx\n", para_state_hpa);
1499 if (is_error_hpa(para_state_hpa))
1500 goto err_gp;
1501
1502 mark_page_dirty(vcpu->kvm, para_state_gpa >> PAGE_SHIFT);
1503 para_state_page = pfn_to_page(para_state_hpa >> PAGE_SHIFT);
1504 para_state = kmap(para_state_page);
1505
1506 printk(KERN_DEBUG ".... guest version: %d\n", para_state->guest_version);
1507 printk(KERN_DEBUG ".... size: %d\n", para_state->size);
1508
1509 para_state->host_version = KVM_PARA_API_VERSION;
1510 /*
1511 * We cannot support guests that try to register themselves
1512 * with a newer API version than the host supports:
1513 */
1514 if (para_state->guest_version > KVM_PARA_API_VERSION) {
1515 para_state->ret = -KVM_EINVAL;
1516 goto err_kunmap_skip;
1517 }
1518
1519 hypercall_gpa = para_state->hypercall_gpa;
1520 hypercall_hpa = gpa_to_hpa(vcpu, hypercall_gpa);
1521 printk(KERN_DEBUG ".... hypercall_hpa: %08Lx\n", hypercall_hpa);
1522 if (is_error_hpa(hypercall_hpa)) {
1523 para_state->ret = -KVM_EINVAL;
1524 goto err_kunmap_skip;
1525 }
1526
1527 printk(KERN_DEBUG "kvm: para guest successfully registered.\n");
1528 vcpu->para_state_page = para_state_page;
1529 vcpu->para_state_gpa = para_state_gpa;
1530 vcpu->hypercall_gpa = hypercall_gpa;
1531
1532 mark_page_dirty(vcpu->kvm, hypercall_gpa >> PAGE_SHIFT);
1533 hypercall = kmap_atomic(pfn_to_page(hypercall_hpa >> PAGE_SHIFT),
1534 KM_USER1) + (hypercall_hpa & ~PAGE_MASK);
1535 kvm_x86_ops->patch_hypercall(vcpu, hypercall);
1536 kunmap_atomic(hypercall, KM_USER1);
1537
1538 para_state->ret = 0;
1539err_kunmap_skip:
1540 kunmap(para_state_page);
1541 return 0;
1542err_gp:
1543 return 1;
1544}
1545
1546int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 1488int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1547{ 1489{
1548 u64 data; 1490 u64 data;
@@ -1656,12 +1598,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1656 case MSR_IA32_MISC_ENABLE: 1598 case MSR_IA32_MISC_ENABLE:
1657 vcpu->ia32_misc_enable_msr = data; 1599 vcpu->ia32_misc_enable_msr = data;
1658 break; 1600 break;
1659 /*
1660 * This is the 'probe whether the host is KVM' logic:
1661 */
1662 case MSR_KVM_API_MAGIC:
1663 return vcpu_register_para(vcpu, data);
1664
1665 default: 1601 default:
1666 pr_unimpl(vcpu, "unhandled wrmsr: 0x%x\n", msr); 1602 pr_unimpl(vcpu, "unhandled wrmsr: 0x%x\n", msr);
1667 return 1; 1603 return 1;
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index ced4ac1955db..794d95416f7b 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -476,7 +476,8 @@ static void init_vmcb(struct vmcb *vmcb)
476 INTERCEPT_DR5_MASK | 476 INTERCEPT_DR5_MASK |
477 INTERCEPT_DR7_MASK; 477 INTERCEPT_DR7_MASK;
478 478
479 control->intercept_exceptions = 1 << PF_VECTOR; 479 control->intercept_exceptions = (1 << PF_VECTOR) |
480 (1 << UD_VECTOR);
480 481
481 482
482 control->intercept = (1ULL << INTERCEPT_INTR) | 483 control->intercept = (1ULL << INTERCEPT_INTR) |
@@ -979,6 +980,17 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
979 return 0; 980 return 0;
980} 981}
981 982
983static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
984{
985 int er;
986
987 er = emulate_instruction(&svm->vcpu, kvm_run, 0, 0);
988 if (er != EMULATE_DONE)
989 inject_ud(&svm->vcpu);
990
991 return 1;
992}
993
982static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 994static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
983{ 995{
984 svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); 996 svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
@@ -1045,7 +1057,8 @@ static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1045{ 1057{
1046 svm->next_rip = svm->vmcb->save.rip + 3; 1058 svm->next_rip = svm->vmcb->save.rip + 3;
1047 skip_emulated_instruction(&svm->vcpu); 1059 skip_emulated_instruction(&svm->vcpu);
1048 return kvm_hypercall(&svm->vcpu, kvm_run); 1060 kvm_emulate_hypercall(&svm->vcpu);
1061 return 1;
1049} 1062}
1050 1063
1051static int invalid_op_interception(struct vcpu_svm *svm, 1064static int invalid_op_interception(struct vcpu_svm *svm,
@@ -1241,6 +1254,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
1241 [SVM_EXIT_WRITE_DR3] = emulate_on_interception, 1254 [SVM_EXIT_WRITE_DR3] = emulate_on_interception,
1242 [SVM_EXIT_WRITE_DR5] = emulate_on_interception, 1255 [SVM_EXIT_WRITE_DR5] = emulate_on_interception,
1243 [SVM_EXIT_WRITE_DR7] = emulate_on_interception, 1256 [SVM_EXIT_WRITE_DR7] = emulate_on_interception,
1257 [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception,
1244 [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, 1258 [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
1245 [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, 1259 [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception,
1246 [SVM_EXIT_INTR] = nop_on_interception, 1260 [SVM_EXIT_INTR] = nop_on_interception,
@@ -1675,7 +1689,6 @@ svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
1675 hypercall[0] = 0x0f; 1689 hypercall[0] = 0x0f;
1676 hypercall[1] = 0x01; 1690 hypercall[1] = 0x01;
1677 hypercall[2] = 0xd9; 1691 hypercall[2] = 0xd9;
1678 hypercall[3] = 0xc3;
1679} 1692}
1680 1693
1681static void svm_check_processor_compat(void *rtn) 1694static void svm_check_processor_compat(void *rtn)
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 5b397b6c9f93..47c827d3007c 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -164,6 +164,13 @@ static inline int is_no_device(u32 intr_info)
164 (INTR_TYPE_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK); 164 (INTR_TYPE_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK);
165} 165}
166 166
167static inline int is_invalid_opcode(u32 intr_info)
168{
169 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
170 INTR_INFO_VALID_MASK)) ==
171 (INTR_TYPE_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK);
172}
173
167static inline int is_external_interrupt(u32 intr_info) 174static inline int is_external_interrupt(u32 intr_info)
168{ 175{
169 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) 176 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
@@ -315,7 +322,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
315{ 322{
316 u32 eb; 323 u32 eb;
317 324
318 eb = 1u << PF_VECTOR; 325 eb = (1u << PF_VECTOR) | (1u << UD_VECTOR);
319 if (!vcpu->fpu_active) 326 if (!vcpu->fpu_active)
320 eb |= 1u << NM_VECTOR; 327 eb |= 1u << NM_VECTOR;
321 if (vcpu->guest_debug.enabled) 328 if (vcpu->guest_debug.enabled)
@@ -560,6 +567,14 @@ static void vmx_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code)
560 INTR_INFO_VALID_MASK); 567 INTR_INFO_VALID_MASK);
561} 568}
562 569
570static void vmx_inject_ud(struct kvm_vcpu *vcpu)
571{
572 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
573 UD_VECTOR |
574 INTR_TYPE_EXCEPTION |
575 INTR_INFO_VALID_MASK);
576}
577
563/* 578/*
564 * Swap MSR entry in host/guest MSR entry array. 579 * Swap MSR entry in host/guest MSR entry array.
565 */ 580 */
@@ -1771,6 +1786,14 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1771 return 1; 1786 return 1;
1772 } 1787 }
1773 1788
1789 if (is_invalid_opcode(intr_info)) {
1790 er = emulate_instruction(vcpu, kvm_run, 0, 0);
1791 if (er != EMULATE_DONE)
1792 vmx_inject_ud(vcpu);
1793
1794 return 1;
1795 }
1796
1774 error_code = 0; 1797 error_code = 0;
1775 rip = vmcs_readl(GUEST_RIP); 1798 rip = vmcs_readl(GUEST_RIP);
1776 if (intr_info & INTR_INFO_DELIEVER_CODE_MASK) 1799 if (intr_info & INTR_INFO_DELIEVER_CODE_MASK)
@@ -1873,7 +1896,6 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
1873 hypercall[0] = 0x0f; 1896 hypercall[0] = 0x0f;
1874 hypercall[1] = 0x01; 1897 hypercall[1] = 0x01;
1875 hypercall[2] = 0xc1; 1898 hypercall[2] = 0xc1;
1876 hypercall[3] = 0xc3;
1877} 1899}
1878 1900
1879static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1901static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
@@ -2059,7 +2081,8 @@ static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2059static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 2081static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2060{ 2082{
2061 skip_emulated_instruction(vcpu); 2083 skip_emulated_instruction(vcpu);
2062 return kvm_hypercall(vcpu, kvm_run); 2084 kvm_emulate_hypercall(vcpu);
2085 return 1;
2063} 2086}
2064 2087
2065/* 2088/*
diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
index 84af9cc737fa..f12bc2c74040 100644
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -1384,7 +1384,11 @@ twobyte_insn:
1384 if (modrm_mod != 3 || modrm_rm != 1) 1384 if (modrm_mod != 3 || modrm_rm != 1)
1385 goto cannot_emulate; 1385 goto cannot_emulate;
1386 1386
1387 /* nop */ 1387 rc = kvm_fix_hypercall(ctxt->vcpu);
1388 if (rc)
1389 goto done;
1390
1391 kvm_emulate_hypercall(ctxt->vcpu);
1388 break; 1392 break;
1389 case 2: /* lgdt */ 1393 case 2: /* lgdt */
1390 rc = read_descriptor(ctxt, ops, src.ptr, 1394 rc = read_descriptor(ctxt, ops, src.ptr,
@@ -1395,7 +1399,10 @@ twobyte_insn:
1395 break; 1399 break;
1396 case 3: /* lidt/vmmcall */ 1400 case 3: /* lidt/vmmcall */
1397 if (modrm_mod == 3 && modrm_rm == 1) { 1401 if (modrm_mod == 3 && modrm_rm == 1) {
1398 /* nop */ 1402 rc = kvm_fix_hypercall(ctxt->vcpu);
1403 if (rc)
1404 goto done;
1405 kvm_emulate_hypercall(ctxt->vcpu);
1399 } else { 1406 } else {
1400 rc = read_descriptor(ctxt, ops, src.ptr, 1407 rc = read_descriptor(ctxt, ops, src.ptr,
1401 &size, &address, 1408 &size, &address,