diff options
author | Michael S. Tsirkin <mst@redhat.com> | 2012-06-24 12:25:07 -0400 |
---|---|---|
committer | Avi Kivity <avi@redhat.com> | 2012-06-25 05:40:55 -0400 |
commit | ae7a2a3fb6f8b784c2752863f4f1f20c656f76fb (patch) | |
tree | 5f4053265d244bb12ccff88494d70124b6d603c3 | |
parent | d905c0693514e6f713b207377b67c9972c5d7d49 (diff) |
KVM: host side for eoi optimization
Implementation of PV EOI using shared memory.
This reduces the number of exits an interrupt
causes as much as by half.
The idea is simple: there's a bit, per APIC, in guest memory,
that tells the guest that it does not need EOI.
We set it before injecting an interrupt and clear
before injecting a nested one. Guest tests it using
a test and clear operation - this is necessary
so that host can detect interrupt nesting -
and if set, it can skip the EOI MSR.
There's a new MSR to set the address of said register
in guest memory. Otherwise not much changed:
- Guest EOI is not required
- Register is tested & ISR is automatically cleared on exit
For testing results see description of previous patch
'kvm_para: guest side for eoi avoidance'.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 12 | ||||
-rw-r--r-- | arch/x86/kvm/cpuid.c | 1 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.c | 141 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/trace.h | 34 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 7 |
6 files changed, 193 insertions, 4 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index db7c1f2709a2..24b76474d9de 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -175,6 +175,13 @@ enum { | |||
175 | 175 | ||
176 | /* apic attention bits */ | 176 | /* apic attention bits */ |
177 | #define KVM_APIC_CHECK_VAPIC 0 | 177 | #define KVM_APIC_CHECK_VAPIC 0 |
178 | /* | ||
179 | * The following bit is set with PV-EOI, unset on EOI. | ||
180 | * We detect PV-EOI changes by guest by comparing | ||
181 | * this bit with PV-EOI in guest memory. | ||
182 | * See the implementation in apic_update_pv_eoi. | ||
183 | */ | ||
184 | #define KVM_APIC_PV_EOI_PENDING 1 | ||
178 | 185 | ||
179 | /* | 186 | /* |
180 | * We don't want allocation failures within the mmu code, so we preallocate | 187 | * We don't want allocation failures within the mmu code, so we preallocate |
@@ -484,6 +491,11 @@ struct kvm_vcpu_arch { | |||
484 | u64 length; | 491 | u64 length; |
485 | u64 status; | 492 | u64 status; |
486 | } osvw; | 493 | } osvw; |
494 | |||
495 | struct { | ||
496 | u64 msr_val; | ||
497 | struct gfn_to_hva_cache data; | ||
498 | } pv_eoi; | ||
487 | }; | 499 | }; |
488 | 500 | ||
489 | struct kvm_lpage_info { | 501 | struct kvm_lpage_info { |
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 7df1c6d839fb..61ccbdf3d0ac 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c | |||
@@ -409,6 +409,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
409 | (1 << KVM_FEATURE_NOP_IO_DELAY) | | 409 | (1 << KVM_FEATURE_NOP_IO_DELAY) | |
410 | (1 << KVM_FEATURE_CLOCKSOURCE2) | | 410 | (1 << KVM_FEATURE_CLOCKSOURCE2) | |
411 | (1 << KVM_FEATURE_ASYNC_PF) | | 411 | (1 << KVM_FEATURE_ASYNC_PF) | |
412 | (1 << KVM_FEATURE_PV_EOI) | | ||
412 | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); | 413 | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); |
413 | 414 | ||
414 | if (sched_info_on()) | 415 | if (sched_info_on()) |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 805d887784f6..ce878788a39f 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -311,6 +311,54 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) | |||
311 | irq->level, irq->trig_mode); | 311 | irq->level, irq->trig_mode); |
312 | } | 312 | } |
313 | 313 | ||
314 | static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) | ||
315 | { | ||
316 | |||
317 | return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val, | ||
318 | sizeof(val)); | ||
319 | } | ||
320 | |||
321 | static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val) | ||
322 | { | ||
323 | |||
324 | return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val, | ||
325 | sizeof(*val)); | ||
326 | } | ||
327 | |||
328 | static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu) | ||
329 | { | ||
330 | return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED; | ||
331 | } | ||
332 | |||
333 | static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu) | ||
334 | { | ||
335 | u8 val; | ||
336 | if (pv_eoi_get_user(vcpu, &val) < 0) | ||
337 | apic_debug("Can't read EOI MSR value: 0x%llx\n", | ||
338 | (unsigned long long)vcpi->arch.pv_eoi.msr_val); | ||
339 | return val & 0x1; | ||
340 | } | ||
341 | |||
342 | static void pv_eoi_set_pending(struct kvm_vcpu *vcpu) | ||
343 | { | ||
344 | if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) { | ||
345 | apic_debug("Can't set EOI MSR value: 0x%llx\n", | ||
346 | (unsigned long long)vcpi->arch.pv_eoi.msr_val); | ||
347 | return; | ||
348 | } | ||
349 | __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); | ||
350 | } | ||
351 | |||
352 | static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) | ||
353 | { | ||
354 | if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) { | ||
355 | apic_debug("Can't clear EOI MSR value: 0x%llx\n", | ||
356 | (unsigned long long)vcpi->arch.pv_eoi.msr_val); | ||
357 | return; | ||
358 | } | ||
359 | __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); | ||
360 | } | ||
361 | |||
314 | static inline int apic_find_highest_isr(struct kvm_lapic *apic) | 362 | static inline int apic_find_highest_isr(struct kvm_lapic *apic) |
315 | { | 363 | { |
316 | int result; | 364 | int result; |
@@ -527,15 +575,18 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) | |||
527 | return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; | 575 | return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; |
528 | } | 576 | } |
529 | 577 | ||
530 | static void apic_set_eoi(struct kvm_lapic *apic) | 578 | static int apic_set_eoi(struct kvm_lapic *apic) |
531 | { | 579 | { |
532 | int vector = apic_find_highest_isr(apic); | 580 | int vector = apic_find_highest_isr(apic); |
581 | |||
582 | trace_kvm_eoi(apic, vector); | ||
583 | |||
533 | /* | 584 | /* |
534 | * Not every write EOI will has corresponding ISR, | 585 | * Not every write EOI will has corresponding ISR, |
535 | * one example is when Kernel check timer on setup_IO_APIC | 586 | * one example is when Kernel check timer on setup_IO_APIC |
536 | */ | 587 | */ |
537 | if (vector == -1) | 588 | if (vector == -1) |
538 | return; | 589 | return vector; |
539 | 590 | ||
540 | apic_clear_isr(vector, apic); | 591 | apic_clear_isr(vector, apic); |
541 | apic_update_ppr(apic); | 592 | apic_update_ppr(apic); |
@@ -550,6 +601,7 @@ static void apic_set_eoi(struct kvm_lapic *apic) | |||
550 | kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); | 601 | kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); |
551 | } | 602 | } |
552 | kvm_make_request(KVM_REQ_EVENT, apic->vcpu); | 603 | kvm_make_request(KVM_REQ_EVENT, apic->vcpu); |
604 | return vector; | ||
553 | } | 605 | } |
554 | 606 | ||
555 | static void apic_send_ipi(struct kvm_lapic *apic) | 607 | static void apic_send_ipi(struct kvm_lapic *apic) |
@@ -1132,6 +1184,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) | |||
1132 | atomic_set(&apic->lapic_timer.pending, 0); | 1184 | atomic_set(&apic->lapic_timer.pending, 0); |
1133 | if (kvm_vcpu_is_bsp(vcpu)) | 1185 | if (kvm_vcpu_is_bsp(vcpu)) |
1134 | vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP; | 1186 | vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP; |
1187 | vcpu->arch.pv_eoi.msr_val = 0; | ||
1135 | apic_update_ppr(apic); | 1188 | apic_update_ppr(apic); |
1136 | 1189 | ||
1137 | vcpu->arch.apic_arb_prio = 0; | 1190 | vcpu->arch.apic_arb_prio = 0; |
@@ -1332,11 +1385,51 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) | |||
1332 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); | 1385 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); |
1333 | } | 1386 | } |
1334 | 1387 | ||
1388 | /* | ||
1389 | * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt | ||
1390 | * | ||
1391 | * Detect whether guest triggered PV EOI since the | ||
1392 | * last entry. If yes, set EOI on guests's behalf. | ||
1393 | * Clear PV EOI in guest memory in any case. | ||
1394 | */ | ||
1395 | static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu, | ||
1396 | struct kvm_lapic *apic) | ||
1397 | { | ||
1398 | bool pending; | ||
1399 | int vector; | ||
1400 | /* | ||
1401 | * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host | ||
1402 | * and KVM_PV_EOI_ENABLED in guest memory as follows: | ||
1403 | * | ||
1404 | * KVM_APIC_PV_EOI_PENDING is unset: | ||
1405 | * -> host disabled PV EOI. | ||
1406 | * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set: | ||
1407 | * -> host enabled PV EOI, guest did not execute EOI yet. | ||
1408 | * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset: | ||
1409 | * -> host enabled PV EOI, guest executed EOI. | ||
1410 | */ | ||
1411 | BUG_ON(!pv_eoi_enabled(vcpu)); | ||
1412 | pending = pv_eoi_get_pending(vcpu); | ||
1413 | /* | ||
1414 | * Clear pending bit in any case: it will be set again on vmentry. | ||
1415 | * While this might not be ideal from performance point of view, | ||
1416 | * this makes sure pv eoi is only enabled when we know it's safe. | ||
1417 | */ | ||
1418 | pv_eoi_clr_pending(vcpu); | ||
1419 | if (pending) | ||
1420 | return; | ||
1421 | vector = apic_set_eoi(apic); | ||
1422 | trace_kvm_pv_eoi(apic, vector); | ||
1423 | } | ||
1424 | |||
1335 | void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) | 1425 | void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) |
1336 | { | 1426 | { |
1337 | u32 data; | 1427 | u32 data; |
1338 | void *vapic; | 1428 | void *vapic; |
1339 | 1429 | ||
1430 | if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention)) | ||
1431 | apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic); | ||
1432 | |||
1340 | if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) | 1433 | if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) |
1341 | return; | 1434 | return; |
1342 | 1435 | ||
@@ -1347,17 +1440,44 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) | |||
1347 | apic_set_tpr(vcpu->arch.apic, data & 0xff); | 1440 | apic_set_tpr(vcpu->arch.apic, data & 0xff); |
1348 | } | 1441 | } |
1349 | 1442 | ||
1443 | /* | ||
1444 | * apic_sync_pv_eoi_to_guest - called before vmentry | ||
1445 | * | ||
1446 | * Detect whether it's safe to enable PV EOI and | ||
1447 | * if yes do so. | ||
1448 | */ | ||
1449 | static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu, | ||
1450 | struct kvm_lapic *apic) | ||
1451 | { | ||
1452 | if (!pv_eoi_enabled(vcpu) || | ||
1453 | /* IRR set or many bits in ISR: could be nested. */ | ||
1454 | apic->irr_pending || | ||
1455 | /* Cache not set: could be safe but we don't bother. */ | ||
1456 | apic->highest_isr_cache == -1 || | ||
1457 | /* Need EOI to update ioapic. */ | ||
1458 | kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache)) { | ||
1459 | /* | ||
1460 | * PV EOI was disabled by apic_sync_pv_eoi_from_guest | ||
1461 | * so we need not do anything here. | ||
1462 | */ | ||
1463 | return; | ||
1464 | } | ||
1465 | |||
1466 | pv_eoi_set_pending(apic->vcpu); | ||
1467 | } | ||
1468 | |||
1350 | void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) | 1469 | void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) |
1351 | { | 1470 | { |
1352 | u32 data, tpr; | 1471 | u32 data, tpr; |
1353 | int max_irr, max_isr; | 1472 | int max_irr, max_isr; |
1354 | struct kvm_lapic *apic; | 1473 | struct kvm_lapic *apic = vcpu->arch.apic; |
1355 | void *vapic; | 1474 | void *vapic; |
1356 | 1475 | ||
1476 | apic_sync_pv_eoi_to_guest(vcpu, apic); | ||
1477 | |||
1357 | if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) | 1478 | if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) |
1358 | return; | 1479 | return; |
1359 | 1480 | ||
1360 | apic = vcpu->arch.apic; | ||
1361 | tpr = apic_get_reg(apic, APIC_TASKPRI) & 0xff; | 1481 | tpr = apic_get_reg(apic, APIC_TASKPRI) & 0xff; |
1362 | max_irr = apic_find_highest_irr(apic); | 1482 | max_irr = apic_find_highest_irr(apic); |
1363 | if (max_irr < 0) | 1483 | if (max_irr < 0) |
@@ -1443,3 +1563,16 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data) | |||
1443 | 1563 | ||
1444 | return 0; | 1564 | return 0; |
1445 | } | 1565 | } |
1566 | |||
1567 | int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data) | ||
1568 | { | ||
1569 | u64 addr = data & ~KVM_MSR_ENABLED; | ||
1570 | if (!IS_ALIGNED(addr, 4)) | ||
1571 | return 1; | ||
1572 | |||
1573 | vcpu->arch.pv_eoi.msr_val = data; | ||
1574 | if (!pv_eoi_enabled(vcpu)) | ||
1575 | return 0; | ||
1576 | return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data, | ||
1577 | addr); | ||
1578 | } | ||
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 5ac9e5e2fedd..4af5405ae1e2 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
@@ -69,4 +69,6 @@ static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu) | |||
69 | { | 69 | { |
70 | return vcpu->arch.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE; | 70 | return vcpu->arch.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE; |
71 | } | 71 | } |
72 | |||
73 | int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data); | ||
72 | #endif | 74 | #endif |
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 911d2641f14c..851914e207fc 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h | |||
@@ -517,6 +517,40 @@ TRACE_EVENT(kvm_apic_accept_irq, | |||
517 | __entry->coalesced ? " (coalesced)" : "") | 517 | __entry->coalesced ? " (coalesced)" : "") |
518 | ); | 518 | ); |
519 | 519 | ||
520 | TRACE_EVENT(kvm_eoi, | ||
521 | TP_PROTO(struct kvm_lapic *apic, int vector), | ||
522 | TP_ARGS(apic, vector), | ||
523 | |||
524 | TP_STRUCT__entry( | ||
525 | __field( __u32, apicid ) | ||
526 | __field( int, vector ) | ||
527 | ), | ||
528 | |||
529 | TP_fast_assign( | ||
530 | __entry->apicid = apic->vcpu->vcpu_id; | ||
531 | __entry->vector = vector; | ||
532 | ), | ||
533 | |||
534 | TP_printk("apicid %x vector %d", __entry->apicid, __entry->vector) | ||
535 | ); | ||
536 | |||
537 | TRACE_EVENT(kvm_pv_eoi, | ||
538 | TP_PROTO(struct kvm_lapic *apic, int vector), | ||
539 | TP_ARGS(apic, vector), | ||
540 | |||
541 | TP_STRUCT__entry( | ||
542 | __field( __u32, apicid ) | ||
543 | __field( int, vector ) | ||
544 | ), | ||
545 | |||
546 | TP_fast_assign( | ||
547 | __entry->apicid = apic->vcpu->vcpu_id; | ||
548 | __entry->vector = vector; | ||
549 | ), | ||
550 | |||
551 | TP_printk("apicid %x vector %d", __entry->apicid, __entry->vector) | ||
552 | ); | ||
553 | |||
520 | /* | 554 | /* |
521 | * Tracepoint for nested VMRUN | 555 | * Tracepoint for nested VMRUN |
522 | */ | 556 | */ |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7ea0f611bc89..8eacb2e64560 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -795,6 +795,7 @@ static u32 msrs_to_save[] = { | |||
795 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, | 795 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, |
796 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, | 796 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, |
797 | HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, | 797 | HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, |
798 | MSR_KVM_PV_EOI_EN, | ||
798 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, | 799 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, |
799 | MSR_STAR, | 800 | MSR_STAR, |
800 | #ifdef CONFIG_X86_64 | 801 | #ifdef CONFIG_X86_64 |
@@ -1653,6 +1654,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1653 | kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); | 1654 | kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); |
1654 | 1655 | ||
1655 | break; | 1656 | break; |
1657 | case MSR_KVM_PV_EOI_EN: | ||
1658 | if (kvm_lapic_enable_pv_eoi(vcpu, data)) | ||
1659 | return 1; | ||
1660 | break; | ||
1656 | 1661 | ||
1657 | case MSR_IA32_MCG_CTL: | 1662 | case MSR_IA32_MCG_CTL: |
1658 | case MSR_IA32_MCG_STATUS: | 1663 | case MSR_IA32_MCG_STATUS: |
@@ -5394,6 +5399,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5394 | 5399 | ||
5395 | cancel_injection: | 5400 | cancel_injection: |
5396 | kvm_x86_ops->cancel_injection(vcpu); | 5401 | kvm_x86_ops->cancel_injection(vcpu); |
5402 | if (unlikely(vcpu->arch.apic_attention)) | ||
5403 | kvm_lapic_sync_from_vapic(vcpu); | ||
5397 | out: | 5404 | out: |
5398 | return r; | 5405 | return r; |
5399 | } | 5406 | } |