aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael S. Tsirkin <mst@redhat.com>2012-06-24 12:25:07 -0400
committerAvi Kivity <avi@redhat.com>2012-06-25 05:40:55 -0400
commitae7a2a3fb6f8b784c2752863f4f1f20c656f76fb (patch)
tree5f4053265d244bb12ccff88494d70124b6d603c3
parentd905c0693514e6f713b207377b67c9972c5d7d49 (diff)
KVM: host side for eoi optimization
Implementation of PV EOI using shared memory. This reduces the number of exits an interrupt causes as much as by half. The idea is simple: there's a bit, per APIC, in guest memory, that tells the guest that it does not need EOI. We set it before injecting an interrupt and clear before injecting a nested one. Guest tests it using a test and clear operation - this is necessary so that host can detect interrupt nesting - and if set, it can skip the EOI MSR. There's a new MSR to set the address of said register in guest memory. Otherwise not much changed: - Guest EOI is not required - Register is tested & ISR is automatically cleared on exit For testing results see description of previous patch 'kvm_para: guest side for eoi avoidance'. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Avi Kivity <avi@redhat.com>
-rw-r--r--arch/x86/include/asm/kvm_host.h12
-rw-r--r--arch/x86/kvm/cpuid.c1
-rw-r--r--arch/x86/kvm/lapic.c141
-rw-r--r--arch/x86/kvm/lapic.h2
-rw-r--r--arch/x86/kvm/trace.h34
-rw-r--r--arch/x86/kvm/x86.c7
6 files changed, 193 insertions, 4 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index db7c1f2709a2..24b76474d9de 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -175,6 +175,13 @@ enum {
175 175
176/* apic attention bits */ 176/* apic attention bits */
177#define KVM_APIC_CHECK_VAPIC 0 177#define KVM_APIC_CHECK_VAPIC 0
178/*
179 * The following bit is set with PV-EOI, unset on EOI.
180 * We detect PV-EOI changes by guest by comparing
181 * this bit with PV-EOI in guest memory.
182 * See the implementation in apic_update_pv_eoi.
183 */
184#define KVM_APIC_PV_EOI_PENDING 1
178 185
179/* 186/*
180 * We don't want allocation failures within the mmu code, so we preallocate 187 * We don't want allocation failures within the mmu code, so we preallocate
@@ -484,6 +491,11 @@ struct kvm_vcpu_arch {
484 u64 length; 491 u64 length;
485 u64 status; 492 u64 status;
486 } osvw; 493 } osvw;
494
495 struct {
496 u64 msr_val;
497 struct gfn_to_hva_cache data;
498 } pv_eoi;
487}; 499};
488 500
489struct kvm_lpage_info { 501struct kvm_lpage_info {
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 7df1c6d839fb..61ccbdf3d0ac 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -409,6 +409,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
409 (1 << KVM_FEATURE_NOP_IO_DELAY) | 409 (1 << KVM_FEATURE_NOP_IO_DELAY) |
410 (1 << KVM_FEATURE_CLOCKSOURCE2) | 410 (1 << KVM_FEATURE_CLOCKSOURCE2) |
411 (1 << KVM_FEATURE_ASYNC_PF) | 411 (1 << KVM_FEATURE_ASYNC_PF) |
412 (1 << KVM_FEATURE_PV_EOI) |
412 (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); 413 (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
413 414
414 if (sched_info_on()) 415 if (sched_info_on())
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 805d887784f6..ce878788a39f 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -311,6 +311,54 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
311 irq->level, irq->trig_mode); 311 irq->level, irq->trig_mode);
312} 312}
313 313
314static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
315{
316
317 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val,
318 sizeof(val));
319}
320
321static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val)
322{
323
324 return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val,
325 sizeof(*val));
326}
327
328static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
329{
330 return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
331}
332
333static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
334{
335 u8 val;
336 if (pv_eoi_get_user(vcpu, &val) < 0)
337 apic_debug("Can't read EOI MSR value: 0x%llx\n",
338 (unsigned long long)vcpi->arch.pv_eoi.msr_val);
339 return val & 0x1;
340}
341
342static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
343{
344 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) {
345 apic_debug("Can't set EOI MSR value: 0x%llx\n",
346 (unsigned long long)vcpi->arch.pv_eoi.msr_val);
347 return;
348 }
349 __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
350}
351
352static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
353{
354 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) {
355 apic_debug("Can't clear EOI MSR value: 0x%llx\n",
356 (unsigned long long)vcpi->arch.pv_eoi.msr_val);
357 return;
358 }
359 __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
360}
361
314static inline int apic_find_highest_isr(struct kvm_lapic *apic) 362static inline int apic_find_highest_isr(struct kvm_lapic *apic)
315{ 363{
316 int result; 364 int result;
@@ -527,15 +575,18 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
527 return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; 575 return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
528} 576}
529 577
530static void apic_set_eoi(struct kvm_lapic *apic) 578static int apic_set_eoi(struct kvm_lapic *apic)
531{ 579{
532 int vector = apic_find_highest_isr(apic); 580 int vector = apic_find_highest_isr(apic);
581
582 trace_kvm_eoi(apic, vector);
583
533 /* 584 /*
534 * Not every write EOI will has corresponding ISR, 585 * Not every write EOI will has corresponding ISR,
535 * one example is when Kernel check timer on setup_IO_APIC 586 * one example is when Kernel check timer on setup_IO_APIC
536 */ 587 */
537 if (vector == -1) 588 if (vector == -1)
538 return; 589 return vector;
539 590
540 apic_clear_isr(vector, apic); 591 apic_clear_isr(vector, apic);
541 apic_update_ppr(apic); 592 apic_update_ppr(apic);
@@ -550,6 +601,7 @@ static void apic_set_eoi(struct kvm_lapic *apic)
550 kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); 601 kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
551 } 602 }
552 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 603 kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
604 return vector;
553} 605}
554 606
555static void apic_send_ipi(struct kvm_lapic *apic) 607static void apic_send_ipi(struct kvm_lapic *apic)
@@ -1132,6 +1184,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
1132 atomic_set(&apic->lapic_timer.pending, 0); 1184 atomic_set(&apic->lapic_timer.pending, 0);
1133 if (kvm_vcpu_is_bsp(vcpu)) 1185 if (kvm_vcpu_is_bsp(vcpu))
1134 vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP; 1186 vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
1187 vcpu->arch.pv_eoi.msr_val = 0;
1135 apic_update_ppr(apic); 1188 apic_update_ppr(apic);
1136 1189
1137 vcpu->arch.apic_arb_prio = 0; 1190 vcpu->arch.apic_arb_prio = 0;
@@ -1332,11 +1385,51 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
1332 hrtimer_start_expires(timer, HRTIMER_MODE_ABS); 1385 hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
1333} 1386}
1334 1387
1388/*
1389 * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt
1390 *
1391 * Detect whether guest triggered PV EOI since the
1392 * last entry. If yes, set EOI on guests's behalf.
1393 * Clear PV EOI in guest memory in any case.
1394 */
1395static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu,
1396 struct kvm_lapic *apic)
1397{
1398 bool pending;
1399 int vector;
1400 /*
1401 * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host
1402 * and KVM_PV_EOI_ENABLED in guest memory as follows:
1403 *
1404 * KVM_APIC_PV_EOI_PENDING is unset:
1405 * -> host disabled PV EOI.
1406 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set:
1407 * -> host enabled PV EOI, guest did not execute EOI yet.
1408 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset:
1409 * -> host enabled PV EOI, guest executed EOI.
1410 */
1411 BUG_ON(!pv_eoi_enabled(vcpu));
1412 pending = pv_eoi_get_pending(vcpu);
1413 /*
1414 * Clear pending bit in any case: it will be set again on vmentry.
1415 * While this might not be ideal from performance point of view,
1416 * this makes sure pv eoi is only enabled when we know it's safe.
1417 */
1418 pv_eoi_clr_pending(vcpu);
1419 if (pending)
1420 return;
1421 vector = apic_set_eoi(apic);
1422 trace_kvm_pv_eoi(apic, vector);
1423}
1424
1335void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) 1425void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
1336{ 1426{
1337 u32 data; 1427 u32 data;
1338 void *vapic; 1428 void *vapic;
1339 1429
1430 if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention))
1431 apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic);
1432
1340 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) 1433 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
1341 return; 1434 return;
1342 1435
@@ -1347,17 +1440,44 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
1347 apic_set_tpr(vcpu->arch.apic, data & 0xff); 1440 apic_set_tpr(vcpu->arch.apic, data & 0xff);
1348} 1441}
1349 1442
1443/*
1444 * apic_sync_pv_eoi_to_guest - called before vmentry
1445 *
1446 * Detect whether it's safe to enable PV EOI and
1447 * if yes do so.
1448 */
1449static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu,
1450 struct kvm_lapic *apic)
1451{
1452 if (!pv_eoi_enabled(vcpu) ||
1453 /* IRR set or many bits in ISR: could be nested. */
1454 apic->irr_pending ||
1455 /* Cache not set: could be safe but we don't bother. */
1456 apic->highest_isr_cache == -1 ||
1457 /* Need EOI to update ioapic. */
1458 kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache)) {
1459 /*
1460 * PV EOI was disabled by apic_sync_pv_eoi_from_guest
1461 * so we need not do anything here.
1462 */
1463 return;
1464 }
1465
1466 pv_eoi_set_pending(apic->vcpu);
1467}
1468
1350void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) 1469void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
1351{ 1470{
1352 u32 data, tpr; 1471 u32 data, tpr;
1353 int max_irr, max_isr; 1472 int max_irr, max_isr;
1354 struct kvm_lapic *apic; 1473 struct kvm_lapic *apic = vcpu->arch.apic;
1355 void *vapic; 1474 void *vapic;
1356 1475
1476 apic_sync_pv_eoi_to_guest(vcpu, apic);
1477
1357 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) 1478 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
1358 return; 1479 return;
1359 1480
1360 apic = vcpu->arch.apic;
1361 tpr = apic_get_reg(apic, APIC_TASKPRI) & 0xff; 1481 tpr = apic_get_reg(apic, APIC_TASKPRI) & 0xff;
1362 max_irr = apic_find_highest_irr(apic); 1482 max_irr = apic_find_highest_irr(apic);
1363 if (max_irr < 0) 1483 if (max_irr < 0)
@@ -1443,3 +1563,16 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
1443 1563
1444 return 0; 1564 return 0;
1445} 1565}
1566
1567int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data)
1568{
1569 u64 addr = data & ~KVM_MSR_ENABLED;
1570 if (!IS_ALIGNED(addr, 4))
1571 return 1;
1572
1573 vcpu->arch.pv_eoi.msr_val = data;
1574 if (!pv_eoi_enabled(vcpu))
1575 return 0;
1576 return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data,
1577 addr);
1578}
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 5ac9e5e2fedd..4af5405ae1e2 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -69,4 +69,6 @@ static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
69{ 69{
70 return vcpu->arch.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE; 70 return vcpu->arch.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE;
71} 71}
72
73int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data);
72#endif 74#endif
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 911d2641f14c..851914e207fc 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -517,6 +517,40 @@ TRACE_EVENT(kvm_apic_accept_irq,
517 __entry->coalesced ? " (coalesced)" : "") 517 __entry->coalesced ? " (coalesced)" : "")
518); 518);
519 519
520TRACE_EVENT(kvm_eoi,
521 TP_PROTO(struct kvm_lapic *apic, int vector),
522 TP_ARGS(apic, vector),
523
524 TP_STRUCT__entry(
525 __field( __u32, apicid )
526 __field( int, vector )
527 ),
528
529 TP_fast_assign(
530 __entry->apicid = apic->vcpu->vcpu_id;
531 __entry->vector = vector;
532 ),
533
534 TP_printk("apicid %x vector %d", __entry->apicid, __entry->vector)
535);
536
537TRACE_EVENT(kvm_pv_eoi,
538 TP_PROTO(struct kvm_lapic *apic, int vector),
539 TP_ARGS(apic, vector),
540
541 TP_STRUCT__entry(
542 __field( __u32, apicid )
543 __field( int, vector )
544 ),
545
546 TP_fast_assign(
547 __entry->apicid = apic->vcpu->vcpu_id;
548 __entry->vector = vector;
549 ),
550
551 TP_printk("apicid %x vector %d", __entry->apicid, __entry->vector)
552);
553
520/* 554/*
521 * Tracepoint for nested VMRUN 555 * Tracepoint for nested VMRUN
522 */ 556 */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7ea0f611bc89..8eacb2e64560 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -795,6 +795,7 @@ static u32 msrs_to_save[] = {
795 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, 795 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
796 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, 796 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
797 HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, 797 HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
798 MSR_KVM_PV_EOI_EN,
798 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, 799 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
799 MSR_STAR, 800 MSR_STAR,
800#ifdef CONFIG_X86_64 801#ifdef CONFIG_X86_64
@@ -1653,6 +1654,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1653 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); 1654 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
1654 1655
1655 break; 1656 break;
1657 case MSR_KVM_PV_EOI_EN:
1658 if (kvm_lapic_enable_pv_eoi(vcpu, data))
1659 return 1;
1660 break;
1656 1661
1657 case MSR_IA32_MCG_CTL: 1662 case MSR_IA32_MCG_CTL:
1658 case MSR_IA32_MCG_STATUS: 1663 case MSR_IA32_MCG_STATUS:
@@ -5394,6 +5399,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5394 5399
5395cancel_injection: 5400cancel_injection:
5396 kvm_x86_ops->cancel_injection(vcpu); 5401 kvm_x86_ops->cancel_injection(vcpu);
5402 if (unlikely(vcpu->arch.apic_attention))
5403 kvm_lapic_sync_from_vapic(vcpu);
5397out: 5404out:
5398 return r; 5405 return r;
5399} 5406}