diff options
author | Alexander Graf <agraf@suse.de> | 2008-11-25 14:17:08 -0500 |
---|---|---|
committer | Avi Kivity <avi@redhat.com> | 2009-03-24 05:02:47 -0400 |
commit | cf74a78b229d07f77416d2fe1f029f183a8a31df (patch) | |
tree | 599fc27129e035c606814e91d4f88d142cf8bea0 /arch/x86/kvm/svm.c | |
parent | 3d6368ef580a4dff012960834bba4e28d3c1430c (diff) |
KVM: SVM: Add VMEXIT handler and intercepts
This adds the #VMEXIT intercept, so we return to the level 1 guest
when something happens in the level 2 guest that should return to
the level 1 guest.
v2 implements HIF handling and cleans up exception interception
v3 adds support for V_INTR_MASKING_MASK
v4 uses the host page hsave
v5 removes IOPM merging code
v6 moves mmu code out of the atomic section
Acked-by: Joerg Roedel <joro@8bytes.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
Diffstat (limited to 'arch/x86/kvm/svm.c')
-rw-r--r-- | arch/x86/kvm/svm.c | 293 |
1 files changed, 293 insertions, 0 deletions
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index fad187cbfabe..4cb2920b1527 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -72,6 +72,13 @@ module_param(npt, int, S_IRUGO); | |||
72 | static void kvm_reput_irq(struct vcpu_svm *svm); | 72 | static void kvm_reput_irq(struct vcpu_svm *svm); |
73 | static void svm_flush_tlb(struct kvm_vcpu *vcpu); | 73 | static void svm_flush_tlb(struct kvm_vcpu *vcpu); |
74 | 74 | ||
75 | static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override); | ||
76 | static int nested_svm_vmexit(struct vcpu_svm *svm); | ||
77 | static int nested_svm_vmsave(struct vcpu_svm *svm, void *nested_vmcb, | ||
78 | void *arg2, void *opaque); | ||
79 | static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, | ||
80 | bool has_error_code, u32 error_code); | ||
81 | |||
75 | static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) | 82 | static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) |
76 | { | 83 | { |
77 | return container_of(vcpu, struct vcpu_svm, vcpu); | 84 | return container_of(vcpu, struct vcpu_svm, vcpu); |
@@ -221,6 +228,11 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | |||
221 | { | 228 | { |
222 | struct vcpu_svm *svm = to_svm(vcpu); | 229 | struct vcpu_svm *svm = to_svm(vcpu); |
223 | 230 | ||
231 | /* If we are within a nested VM we'd better #VMEXIT and let the | ||
232 | guest handle the exception */ | ||
233 | if (nested_svm_check_exception(svm, nr, has_error_code, error_code)) | ||
234 | return; | ||
235 | |||
224 | svm->vmcb->control.event_inj = nr | 236 | svm->vmcb->control.event_inj = nr |
225 | | SVM_EVTINJ_VALID | 237 | | SVM_EVTINJ_VALID |
226 | | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0) | 238 | | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0) |
@@ -1198,6 +1210,46 @@ static int nested_svm_check_permissions(struct vcpu_svm *svm) | |||
1198 | return 0; | 1210 | return 0; |
1199 | } | 1211 | } |
1200 | 1212 | ||
1213 | static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, | ||
1214 | bool has_error_code, u32 error_code) | ||
1215 | { | ||
1216 | if (is_nested(svm)) { | ||
1217 | svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr; | ||
1218 | svm->vmcb->control.exit_code_hi = 0; | ||
1219 | svm->vmcb->control.exit_info_1 = error_code; | ||
1220 | svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; | ||
1221 | if (nested_svm_exit_handled(svm, false)) { | ||
1222 | nsvm_printk("VMexit -> EXCP 0x%x\n", nr); | ||
1223 | |||
1224 | nested_svm_vmexit(svm); | ||
1225 | return 1; | ||
1226 | } | ||
1227 | } | ||
1228 | |||
1229 | return 0; | ||
1230 | } | ||
1231 | |||
1232 | static inline int nested_svm_intr(struct vcpu_svm *svm) | ||
1233 | { | ||
1234 | if (is_nested(svm)) { | ||
1235 | if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) | ||
1236 | return 0; | ||
1237 | |||
1238 | if (!(svm->vcpu.arch.hflags & HF_HIF_MASK)) | ||
1239 | return 0; | ||
1240 | |||
1241 | svm->vmcb->control.exit_code = SVM_EXIT_INTR; | ||
1242 | |||
1243 | if (nested_svm_exit_handled(svm, false)) { | ||
1244 | nsvm_printk("VMexit -> INTR\n"); | ||
1245 | nested_svm_vmexit(svm); | ||
1246 | return 1; | ||
1247 | } | ||
1248 | } | ||
1249 | |||
1250 | return 0; | ||
1251 | } | ||
1252 | |||
1201 | static struct page *nested_svm_get_page(struct vcpu_svm *svm, u64 gpa) | 1253 | static struct page *nested_svm_get_page(struct vcpu_svm *svm, u64 gpa) |
1202 | { | 1254 | { |
1203 | struct page *page; | 1255 | struct page *page; |
@@ -1258,6 +1310,228 @@ static int nested_svm_do(struct vcpu_svm *svm, | |||
1258 | return retval; | 1310 | return retval; |
1259 | } | 1311 | } |
1260 | 1312 | ||
1313 | static int nested_svm_exit_handled_real(struct vcpu_svm *svm, | ||
1314 | void *arg1, | ||
1315 | void *arg2, | ||
1316 | void *opaque) | ||
1317 | { | ||
1318 | struct vmcb *nested_vmcb = (struct vmcb *)arg1; | ||
1319 | bool kvm_overrides = *(bool *)opaque; | ||
1320 | u32 exit_code = svm->vmcb->control.exit_code; | ||
1321 | |||
1322 | if (kvm_overrides) { | ||
1323 | switch (exit_code) { | ||
1324 | case SVM_EXIT_INTR: | ||
1325 | case SVM_EXIT_NMI: | ||
1326 | return 0; | ||
1327 | /* For now we are always handling NPFs when using them */ | ||
1328 | case SVM_EXIT_NPF: | ||
1329 | if (npt_enabled) | ||
1330 | return 0; | ||
1331 | break; | ||
1332 | /* When we're shadowing, trap PFs */ | ||
1333 | case SVM_EXIT_EXCP_BASE + PF_VECTOR: | ||
1334 | if (!npt_enabled) | ||
1335 | return 0; | ||
1336 | break; | ||
1337 | default: | ||
1338 | break; | ||
1339 | } | ||
1340 | } | ||
1341 | |||
1342 | switch (exit_code) { | ||
1343 | case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: { | ||
1344 | u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0); | ||
1345 | if (nested_vmcb->control.intercept_cr_read & cr_bits) | ||
1346 | return 1; | ||
1347 | break; | ||
1348 | } | ||
1349 | case SVM_EXIT_WRITE_CR0 ... SVM_EXIT_WRITE_CR8: { | ||
1350 | u32 cr_bits = 1 << (exit_code - SVM_EXIT_WRITE_CR0); | ||
1351 | if (nested_vmcb->control.intercept_cr_write & cr_bits) | ||
1352 | return 1; | ||
1353 | break; | ||
1354 | } | ||
1355 | case SVM_EXIT_READ_DR0 ... SVM_EXIT_READ_DR7: { | ||
1356 | u32 dr_bits = 1 << (exit_code - SVM_EXIT_READ_DR0); | ||
1357 | if (nested_vmcb->control.intercept_dr_read & dr_bits) | ||
1358 | return 1; | ||
1359 | break; | ||
1360 | } | ||
1361 | case SVM_EXIT_WRITE_DR0 ... SVM_EXIT_WRITE_DR7: { | ||
1362 | u32 dr_bits = 1 << (exit_code - SVM_EXIT_WRITE_DR0); | ||
1363 | if (nested_vmcb->control.intercept_dr_write & dr_bits) | ||
1364 | return 1; | ||
1365 | break; | ||
1366 | } | ||
1367 | case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: { | ||
1368 | u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE); | ||
1369 | if (nested_vmcb->control.intercept_exceptions & excp_bits) | ||
1370 | return 1; | ||
1371 | break; | ||
1372 | } | ||
1373 | default: { | ||
1374 | u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); | ||
1375 | nsvm_printk("exit code: 0x%x\n", exit_code); | ||
1376 | if (nested_vmcb->control.intercept & exit_bits) | ||
1377 | return 1; | ||
1378 | } | ||
1379 | } | ||
1380 | |||
1381 | return 0; | ||
1382 | } | ||
1383 | |||
1384 | static int nested_svm_exit_handled_msr(struct vcpu_svm *svm, | ||
1385 | void *arg1, void *arg2, | ||
1386 | void *opaque) | ||
1387 | { | ||
1388 | struct vmcb *nested_vmcb = (struct vmcb *)arg1; | ||
1389 | u8 *msrpm = (u8 *)arg2; | ||
1390 | u32 t0, t1; | ||
1391 | u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX]; | ||
1392 | u32 param = svm->vmcb->control.exit_info_1 & 1; | ||
1393 | |||
1394 | if (!(nested_vmcb->control.intercept & (1ULL << INTERCEPT_MSR_PROT))) | ||
1395 | return 0; | ||
1396 | |||
1397 | switch(msr) { | ||
1398 | case 0 ... 0x1fff: | ||
1399 | t0 = (msr * 2) % 8; | ||
1400 | t1 = msr / 8; | ||
1401 | break; | ||
1402 | case 0xc0000000 ... 0xc0001fff: | ||
1403 | t0 = (8192 + msr - 0xc0000000) * 2; | ||
1404 | t1 = (t0 / 8); | ||
1405 | t0 %= 8; | ||
1406 | break; | ||
1407 | case 0xc0010000 ... 0xc0011fff: | ||
1408 | t0 = (16384 + msr - 0xc0010000) * 2; | ||
1409 | t1 = (t0 / 8); | ||
1410 | t0 %= 8; | ||
1411 | break; | ||
1412 | default: | ||
1413 | return 1; | ||
1414 | break; | ||
1415 | } | ||
1416 | if (msrpm[t1] & ((1 << param) << t0)) | ||
1417 | return 1; | ||
1418 | |||
1419 | return 0; | ||
1420 | } | ||
1421 | |||
1422 | static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override) | ||
1423 | { | ||
1424 | bool k = kvm_override; | ||
1425 | |||
1426 | switch (svm->vmcb->control.exit_code) { | ||
1427 | case SVM_EXIT_MSR: | ||
1428 | return nested_svm_do(svm, svm->nested_vmcb, | ||
1429 | svm->nested_vmcb_msrpm, NULL, | ||
1430 | nested_svm_exit_handled_msr); | ||
1431 | default: break; | ||
1432 | } | ||
1433 | |||
1434 | return nested_svm_do(svm, svm->nested_vmcb, 0, &k, | ||
1435 | nested_svm_exit_handled_real); | ||
1436 | } | ||
1437 | |||
1438 | static int nested_svm_vmexit_real(struct vcpu_svm *svm, void *arg1, | ||
1439 | void *arg2, void *opaque) | ||
1440 | { | ||
1441 | struct vmcb *nested_vmcb = (struct vmcb *)arg1; | ||
1442 | struct vmcb *hsave = svm->hsave; | ||
1443 | u64 nested_save[] = { nested_vmcb->save.cr0, | ||
1444 | nested_vmcb->save.cr3, | ||
1445 | nested_vmcb->save.cr4, | ||
1446 | nested_vmcb->save.efer, | ||
1447 | nested_vmcb->control.intercept_cr_read, | ||
1448 | nested_vmcb->control.intercept_cr_write, | ||
1449 | nested_vmcb->control.intercept_dr_read, | ||
1450 | nested_vmcb->control.intercept_dr_write, | ||
1451 | nested_vmcb->control.intercept_exceptions, | ||
1452 | nested_vmcb->control.intercept, | ||
1453 | nested_vmcb->control.msrpm_base_pa, | ||
1454 | nested_vmcb->control.iopm_base_pa, | ||
1455 | nested_vmcb->control.tsc_offset }; | ||
1456 | |||
1457 | /* Give the current vmcb to the guest */ | ||
1458 | memcpy(nested_vmcb, svm->vmcb, sizeof(struct vmcb)); | ||
1459 | nested_vmcb->save.cr0 = nested_save[0]; | ||
1460 | if (!npt_enabled) | ||
1461 | nested_vmcb->save.cr3 = nested_save[1]; | ||
1462 | nested_vmcb->save.cr4 = nested_save[2]; | ||
1463 | nested_vmcb->save.efer = nested_save[3]; | ||
1464 | nested_vmcb->control.intercept_cr_read = nested_save[4]; | ||
1465 | nested_vmcb->control.intercept_cr_write = nested_save[5]; | ||
1466 | nested_vmcb->control.intercept_dr_read = nested_save[6]; | ||
1467 | nested_vmcb->control.intercept_dr_write = nested_save[7]; | ||
1468 | nested_vmcb->control.intercept_exceptions = nested_save[8]; | ||
1469 | nested_vmcb->control.intercept = nested_save[9]; | ||
1470 | nested_vmcb->control.msrpm_base_pa = nested_save[10]; | ||
1471 | nested_vmcb->control.iopm_base_pa = nested_save[11]; | ||
1472 | nested_vmcb->control.tsc_offset = nested_save[12]; | ||
1473 | |||
1474 | /* We always set V_INTR_MASKING and remember the old value in hflags */ | ||
1475 | if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) | ||
1476 | nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK; | ||
1477 | |||
1478 | if ((nested_vmcb->control.int_ctl & V_IRQ_MASK) && | ||
1479 | (nested_vmcb->control.int_vector)) { | ||
1480 | nsvm_printk("WARNING: IRQ 0x%x still enabled on #VMEXIT\n", | ||
1481 | nested_vmcb->control.int_vector); | ||
1482 | } | ||
1483 | |||
1484 | /* Restore the original control entries */ | ||
1485 | svm->vmcb->control = hsave->control; | ||
1486 | |||
1487 | /* Kill any pending exceptions */ | ||
1488 | if (svm->vcpu.arch.exception.pending == true) | ||
1489 | nsvm_printk("WARNING: Pending Exception\n"); | ||
1490 | svm->vcpu.arch.exception.pending = false; | ||
1491 | |||
1492 | /* Restore selected save entries */ | ||
1493 | svm->vmcb->save.es = hsave->save.es; | ||
1494 | svm->vmcb->save.cs = hsave->save.cs; | ||
1495 | svm->vmcb->save.ss = hsave->save.ss; | ||
1496 | svm->vmcb->save.ds = hsave->save.ds; | ||
1497 | svm->vmcb->save.gdtr = hsave->save.gdtr; | ||
1498 | svm->vmcb->save.idtr = hsave->save.idtr; | ||
1499 | svm->vmcb->save.rflags = hsave->save.rflags; | ||
1500 | svm_set_efer(&svm->vcpu, hsave->save.efer); | ||
1501 | svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE); | ||
1502 | svm_set_cr4(&svm->vcpu, hsave->save.cr4); | ||
1503 | if (npt_enabled) { | ||
1504 | svm->vmcb->save.cr3 = hsave->save.cr3; | ||
1505 | svm->vcpu.arch.cr3 = hsave->save.cr3; | ||
1506 | } else { | ||
1507 | kvm_set_cr3(&svm->vcpu, hsave->save.cr3); | ||
1508 | } | ||
1509 | kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax); | ||
1510 | kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp); | ||
1511 | kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip); | ||
1512 | svm->vmcb->save.dr7 = 0; | ||
1513 | svm->vmcb->save.cpl = 0; | ||
1514 | svm->vmcb->control.exit_int_info = 0; | ||
1515 | |||
1516 | svm->vcpu.arch.hflags &= ~HF_GIF_MASK; | ||
1517 | /* Exit nested SVM mode */ | ||
1518 | svm->nested_vmcb = 0; | ||
1519 | |||
1520 | return 0; | ||
1521 | } | ||
1522 | |||
1523 | static int nested_svm_vmexit(struct vcpu_svm *svm) | ||
1524 | { | ||
1525 | nsvm_printk("VMexit\n"); | ||
1526 | if (nested_svm_do(svm, svm->nested_vmcb, 0, | ||
1527 | NULL, nested_svm_vmexit_real)) | ||
1528 | return 1; | ||
1529 | |||
1530 | kvm_mmu_reset_context(&svm->vcpu); | ||
1531 | kvm_mmu_load(&svm->vcpu); | ||
1532 | |||
1533 | return 0; | ||
1534 | } | ||
1261 | 1535 | ||
1262 | static int nested_svm_vmrun_msrpm(struct vcpu_svm *svm, void *arg1, | 1536 | static int nested_svm_vmrun_msrpm(struct vcpu_svm *svm, void *arg1, |
1263 | void *arg2, void *opaque) | 1537 | void *arg2, void *opaque) |
@@ -1805,6 +2079,17 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
1805 | KVMTRACE_3D(VMEXIT, vcpu, exit_code, (u32)svm->vmcb->save.rip, | 2079 | KVMTRACE_3D(VMEXIT, vcpu, exit_code, (u32)svm->vmcb->save.rip, |
1806 | (u32)((u64)svm->vmcb->save.rip >> 32), entryexit); | 2080 | (u32)((u64)svm->vmcb->save.rip >> 32), entryexit); |
1807 | 2081 | ||
2082 | if (is_nested(svm)) { | ||
2083 | nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n", | ||
2084 | exit_code, svm->vmcb->control.exit_info_1, | ||
2085 | svm->vmcb->control.exit_info_2, svm->vmcb->save.rip); | ||
2086 | if (nested_svm_exit_handled(svm, true)) { | ||
2087 | nested_svm_vmexit(svm); | ||
2088 | nsvm_printk("-> #VMEXIT\n"); | ||
2089 | return 1; | ||
2090 | } | ||
2091 | } | ||
2092 | |||
1808 | if (npt_enabled) { | 2093 | if (npt_enabled) { |
1809 | int mmu_reload = 0; | 2094 | int mmu_reload = 0; |
1810 | if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) { | 2095 | if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) { |
@@ -1892,6 +2177,8 @@ static void svm_set_irq(struct kvm_vcpu *vcpu, int irq) | |||
1892 | { | 2177 | { |
1893 | struct vcpu_svm *svm = to_svm(vcpu); | 2178 | struct vcpu_svm *svm = to_svm(vcpu); |
1894 | 2179 | ||
2180 | nested_svm_intr(svm); | ||
2181 | |||
1895 | svm_inject_irq(svm, irq); | 2182 | svm_inject_irq(svm, irq); |
1896 | } | 2183 | } |
1897 | 2184 | ||
@@ -1937,6 +2224,9 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu) | |||
1937 | if (!kvm_cpu_has_interrupt(vcpu)) | 2224 | if (!kvm_cpu_has_interrupt(vcpu)) |
1938 | goto out; | 2225 | goto out; |
1939 | 2226 | ||
2227 | if (nested_svm_intr(svm)) | ||
2228 | goto out; | ||
2229 | |||
1940 | if (!(svm->vcpu.arch.hflags & HF_GIF_MASK)) | 2230 | if (!(svm->vcpu.arch.hflags & HF_GIF_MASK)) |
1941 | goto out; | 2231 | goto out; |
1942 | 2232 | ||
@@ -1989,6 +2279,9 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu, | |||
1989 | struct vcpu_svm *svm = to_svm(vcpu); | 2279 | struct vcpu_svm *svm = to_svm(vcpu); |
1990 | struct vmcb_control_area *control = &svm->vmcb->control; | 2280 | struct vmcb_control_area *control = &svm->vmcb->control; |
1991 | 2281 | ||
2282 | if (nested_svm_intr(svm)) | ||
2283 | return; | ||
2284 | |||
1992 | svm->vcpu.arch.interrupt_window_open = | 2285 | svm->vcpu.arch.interrupt_window_open = |
1993 | (!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) && | 2286 | (!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) && |
1994 | (svm->vmcb->save.rflags & X86_EFLAGS_IF) && | 2287 | (svm->vmcb->save.rflags & X86_EFLAGS_IF) && |