diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-24 15:01:20 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-24 15:01:20 -0400 |
commit | 5fecc9d8f59e765c2a48379dd7c6f5cf88c7d75a (patch) | |
tree | d1fc25d9650d3ac24591bba6f5e2e7a1afc54796 /arch/x86/kvm/lapic.c | |
parent | 3c4cfadef6a1665d9cd02a543782d03d3e6740c6 (diff) | |
parent | 1a577b72475d161b6677c05abe57301362023bb2 (diff) |
Merge tag 'kvm-3.6-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Avi Kivity:
"Highlights include
- full big real mode emulation on pre-Westmere Intel hosts (can be
disabled with emulate_invalid_guest_state=0)
- relatively small ppc and s390 updates
- PCID/INVPCID support in guests
- EOI avoidance; 3.6 guests should perform better on 3.6 hosts on
interrupt intensive workloads)
- Lockless write faults during live migration
- EPT accessed/dirty bits support for new Intel processors"
Fix up conflicts in:
- Documentation/virtual/kvm/api.txt:
Stupid subchapter numbering, added next to each other.
- arch/powerpc/kvm/booke_interrupts.S:
PPC asm changes clashing with the KVM fixes
- arch/s390/include/asm/sigp.h, arch/s390/kvm/sigp.c:
Duplicated commits through the kvm tree and the s390 tree, with
subsequent edits in the KVM tree.
* tag 'kvm-3.6-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (93 commits)
KVM: fix race with level interrupts
x86, hyper: fix build with !CONFIG_KVM_GUEST
Revert "apic: fix kvm build on UP without IOAPIC"
KVM guest: switch to apic_set_eoi_write, apic_write
apic: add apic_set_eoi_write for PV use
KVM: VMX: Implement PCID/INVPCID for guests with EPT
KVM: Add x86_hyper_kvm to complete detect_hypervisor_platform check
KVM: PPC: Critical interrupt emulation support
KVM: PPC: e500mc: Fix tlbilx emulation for 64-bit guests
KVM: PPC64: booke: Set interrupt computation mode for 64-bit host
KVM: PPC: bookehv: Add ESR flag to Data Storage Interrupt
KVM: PPC: bookehv64: Add support for std/ld emulation.
booke: Added crit/mc exception handler for e500v2
booke/bookehv: Add host crit-watchdog exception support
KVM: MMU: document mmu-lock and fast page fault
KVM: MMU: fix kvm_mmu_pagetable_walk tracepoint
KVM: MMU: trace fast page fault
KVM: MMU: fast path of handling guest page fault
KVM: MMU: introduce SPTE_MMU_WRITEABLE bit
KVM: MMU: fold tlb flush judgement into mmu_spte_update
...
Diffstat (limited to 'arch/x86/kvm/lapic.c')
-rw-r--r-- | arch/x86/kvm/lapic.c | 194 |
1 files changed, 188 insertions, 6 deletions
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 93c15743f1ee..ce878788a39f 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -107,6 +107,16 @@ static inline void apic_clear_vector(int vec, void *bitmap) | |||
107 | clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | 107 | clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); |
108 | } | 108 | } |
109 | 109 | ||
110 | static inline int __apic_test_and_set_vector(int vec, void *bitmap) | ||
111 | { | ||
112 | return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | ||
113 | } | ||
114 | |||
115 | static inline int __apic_test_and_clear_vector(int vec, void *bitmap) | ||
116 | { | ||
117 | return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | ||
118 | } | ||
119 | |||
110 | static inline int apic_hw_enabled(struct kvm_lapic *apic) | 120 | static inline int apic_hw_enabled(struct kvm_lapic *apic) |
111 | { | 121 | { |
112 | return (apic)->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE; | 122 | return (apic)->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE; |
@@ -210,6 +220,16 @@ static int find_highest_vector(void *bitmap) | |||
210 | return fls(word[word_offset << 2]) - 1 + (word_offset << 5); | 220 | return fls(word[word_offset << 2]) - 1 + (word_offset << 5); |
211 | } | 221 | } |
212 | 222 | ||
223 | static u8 count_vectors(void *bitmap) | ||
224 | { | ||
225 | u32 *word = bitmap; | ||
226 | int word_offset; | ||
227 | u8 count = 0; | ||
228 | for (word_offset = 0; word_offset < MAX_APIC_VECTOR >> 5; ++word_offset) | ||
229 | count += hweight32(word[word_offset << 2]); | ||
230 | return count; | ||
231 | } | ||
232 | |||
213 | static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic) | 233 | static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic) |
214 | { | 234 | { |
215 | apic->irr_pending = true; | 235 | apic->irr_pending = true; |
@@ -242,6 +262,27 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) | |||
242 | apic->irr_pending = true; | 262 | apic->irr_pending = true; |
243 | } | 263 | } |
244 | 264 | ||
265 | static inline void apic_set_isr(int vec, struct kvm_lapic *apic) | ||
266 | { | ||
267 | if (!__apic_test_and_set_vector(vec, apic->regs + APIC_ISR)) | ||
268 | ++apic->isr_count; | ||
269 | BUG_ON(apic->isr_count > MAX_APIC_VECTOR); | ||
270 | /* | ||
271 | * ISR (in service register) bit is set when injecting an interrupt. | ||
272 | * The highest vector is injected. Thus the latest bit set matches | ||
273 | * the highest bit in ISR. | ||
274 | */ | ||
275 | apic->highest_isr_cache = vec; | ||
276 | } | ||
277 | |||
278 | static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) | ||
279 | { | ||
280 | if (__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR)) | ||
281 | --apic->isr_count; | ||
282 | BUG_ON(apic->isr_count < 0); | ||
283 | apic->highest_isr_cache = -1; | ||
284 | } | ||
285 | |||
245 | int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) | 286 | int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) |
246 | { | 287 | { |
247 | struct kvm_lapic *apic = vcpu->arch.apic; | 288 | struct kvm_lapic *apic = vcpu->arch.apic; |
@@ -270,9 +311,61 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) | |||
270 | irq->level, irq->trig_mode); | 311 | irq->level, irq->trig_mode); |
271 | } | 312 | } |
272 | 313 | ||
314 | static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) | ||
315 | { | ||
316 | |||
317 | return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val, | ||
318 | sizeof(val)); | ||
319 | } | ||
320 | |||
321 | static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val) | ||
322 | { | ||
323 | |||
324 | return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val, | ||
325 | sizeof(*val)); | ||
326 | } | ||
327 | |||
328 | static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu) | ||
329 | { | ||
330 | return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED; | ||
331 | } | ||
332 | |||
333 | static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu) | ||
334 | { | ||
335 | u8 val; | ||
336 | if (pv_eoi_get_user(vcpu, &val) < 0) | ||
337 | apic_debug("Can't read EOI MSR value: 0x%llx\n", | ||
338 | (unsigned long long)vcpi->arch.pv_eoi.msr_val); | ||
339 | return val & 0x1; | ||
340 | } | ||
341 | |||
342 | static void pv_eoi_set_pending(struct kvm_vcpu *vcpu) | ||
343 | { | ||
344 | if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) { | ||
345 | apic_debug("Can't set EOI MSR value: 0x%llx\n", | ||
346 | (unsigned long long)vcpi->arch.pv_eoi.msr_val); | ||
347 | return; | ||
348 | } | ||
349 | __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); | ||
350 | } | ||
351 | |||
352 | static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) | ||
353 | { | ||
354 | if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) { | ||
355 | apic_debug("Can't clear EOI MSR value: 0x%llx\n", | ||
356 | (unsigned long long)vcpi->arch.pv_eoi.msr_val); | ||
357 | return; | ||
358 | } | ||
359 | __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); | ||
360 | } | ||
361 | |||
273 | static inline int apic_find_highest_isr(struct kvm_lapic *apic) | 362 | static inline int apic_find_highest_isr(struct kvm_lapic *apic) |
274 | { | 363 | { |
275 | int result; | 364 | int result; |
365 | if (!apic->isr_count) | ||
366 | return -1; | ||
367 | if (likely(apic->highest_isr_cache != -1)) | ||
368 | return apic->highest_isr_cache; | ||
276 | 369 | ||
277 | result = find_highest_vector(apic->regs + APIC_ISR); | 370 | result = find_highest_vector(apic->regs + APIC_ISR); |
278 | ASSERT(result == -1 || result >= 16); | 371 | ASSERT(result == -1 || result >= 16); |
@@ -482,17 +575,20 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) | |||
482 | return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; | 575 | return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; |
483 | } | 576 | } |
484 | 577 | ||
485 | static void apic_set_eoi(struct kvm_lapic *apic) | 578 | static int apic_set_eoi(struct kvm_lapic *apic) |
486 | { | 579 | { |
487 | int vector = apic_find_highest_isr(apic); | 580 | int vector = apic_find_highest_isr(apic); |
581 | |||
582 | trace_kvm_eoi(apic, vector); | ||
583 | |||
488 | /* | 584 | /* |
489 | * Not every write EOI will has corresponding ISR, | 585 | * Not every write EOI will has corresponding ISR, |
490 | * one example is when Kernel check timer on setup_IO_APIC | 586 | * one example is when Kernel check timer on setup_IO_APIC |
491 | */ | 587 | */ |
492 | if (vector == -1) | 588 | if (vector == -1) |
493 | return; | 589 | return vector; |
494 | 590 | ||
495 | apic_clear_vector(vector, apic->regs + APIC_ISR); | 591 | apic_clear_isr(vector, apic); |
496 | apic_update_ppr(apic); | 592 | apic_update_ppr(apic); |
497 | 593 | ||
498 | if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && | 594 | if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && |
@@ -505,6 +601,7 @@ static void apic_set_eoi(struct kvm_lapic *apic) | |||
505 | kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); | 601 | kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); |
506 | } | 602 | } |
507 | kvm_make_request(KVM_REQ_EVENT, apic->vcpu); | 603 | kvm_make_request(KVM_REQ_EVENT, apic->vcpu); |
604 | return vector; | ||
508 | } | 605 | } |
509 | 606 | ||
510 | static void apic_send_ipi(struct kvm_lapic *apic) | 607 | static void apic_send_ipi(struct kvm_lapic *apic) |
@@ -1081,10 +1178,13 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) | |||
1081 | apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); | 1178 | apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); |
1082 | } | 1179 | } |
1083 | apic->irr_pending = false; | 1180 | apic->irr_pending = false; |
1181 | apic->isr_count = 0; | ||
1182 | apic->highest_isr_cache = -1; | ||
1084 | update_divide_count(apic); | 1183 | update_divide_count(apic); |
1085 | atomic_set(&apic->lapic_timer.pending, 0); | 1184 | atomic_set(&apic->lapic_timer.pending, 0); |
1086 | if (kvm_vcpu_is_bsp(vcpu)) | 1185 | if (kvm_vcpu_is_bsp(vcpu)) |
1087 | vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP; | 1186 | vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP; |
1187 | vcpu->arch.pv_eoi.msr_val = 0; | ||
1088 | apic_update_ppr(apic); | 1188 | apic_update_ppr(apic); |
1089 | 1189 | ||
1090 | vcpu->arch.apic_arb_prio = 0; | 1190 | vcpu->arch.apic_arb_prio = 0; |
@@ -1248,7 +1348,7 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) | |||
1248 | if (vector == -1) | 1348 | if (vector == -1) |
1249 | return -1; | 1349 | return -1; |
1250 | 1350 | ||
1251 | apic_set_vector(vector, apic->regs + APIC_ISR); | 1351 | apic_set_isr(vector, apic); |
1252 | apic_update_ppr(apic); | 1352 | apic_update_ppr(apic); |
1253 | apic_clear_irr(vector, apic); | 1353 | apic_clear_irr(vector, apic); |
1254 | return vector; | 1354 | return vector; |
@@ -1267,6 +1367,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu) | |||
1267 | update_divide_count(apic); | 1367 | update_divide_count(apic); |
1268 | start_apic_timer(apic); | 1368 | start_apic_timer(apic); |
1269 | apic->irr_pending = true; | 1369 | apic->irr_pending = true; |
1370 | apic->isr_count = count_vectors(apic->regs + APIC_ISR); | ||
1371 | apic->highest_isr_cache = -1; | ||
1270 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 1372 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
1271 | } | 1373 | } |
1272 | 1374 | ||
@@ -1283,11 +1385,51 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) | |||
1283 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); | 1385 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); |
1284 | } | 1386 | } |
1285 | 1387 | ||
1388 | /* | ||
1389 | * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt | ||
1390 | * | ||
1391 | * Detect whether guest triggered PV EOI since the | ||
1392 | * last entry. If yes, set EOI on guests's behalf. | ||
1393 | * Clear PV EOI in guest memory in any case. | ||
1394 | */ | ||
1395 | static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu, | ||
1396 | struct kvm_lapic *apic) | ||
1397 | { | ||
1398 | bool pending; | ||
1399 | int vector; | ||
1400 | /* | ||
1401 | * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host | ||
1402 | * and KVM_PV_EOI_ENABLED in guest memory as follows: | ||
1403 | * | ||
1404 | * KVM_APIC_PV_EOI_PENDING is unset: | ||
1405 | * -> host disabled PV EOI. | ||
1406 | * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set: | ||
1407 | * -> host enabled PV EOI, guest did not execute EOI yet. | ||
1408 | * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset: | ||
1409 | * -> host enabled PV EOI, guest executed EOI. | ||
1410 | */ | ||
1411 | BUG_ON(!pv_eoi_enabled(vcpu)); | ||
1412 | pending = pv_eoi_get_pending(vcpu); | ||
1413 | /* | ||
1414 | * Clear pending bit in any case: it will be set again on vmentry. | ||
1415 | * While this might not be ideal from performance point of view, | ||
1416 | * this makes sure pv eoi is only enabled when we know it's safe. | ||
1417 | */ | ||
1418 | pv_eoi_clr_pending(vcpu); | ||
1419 | if (pending) | ||
1420 | return; | ||
1421 | vector = apic_set_eoi(apic); | ||
1422 | trace_kvm_pv_eoi(apic, vector); | ||
1423 | } | ||
1424 | |||
1286 | void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) | 1425 | void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) |
1287 | { | 1426 | { |
1288 | u32 data; | 1427 | u32 data; |
1289 | void *vapic; | 1428 | void *vapic; |
1290 | 1429 | ||
1430 | if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention)) | ||
1431 | apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic); | ||
1432 | |||
1291 | if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) | 1433 | if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) |
1292 | return; | 1434 | return; |
1293 | 1435 | ||
@@ -1298,17 +1440,44 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) | |||
1298 | apic_set_tpr(vcpu->arch.apic, data & 0xff); | 1440 | apic_set_tpr(vcpu->arch.apic, data & 0xff); |
1299 | } | 1441 | } |
1300 | 1442 | ||
1443 | /* | ||
1444 | * apic_sync_pv_eoi_to_guest - called before vmentry | ||
1445 | * | ||
1446 | * Detect whether it's safe to enable PV EOI and | ||
1447 | * if yes do so. | ||
1448 | */ | ||
1449 | static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu, | ||
1450 | struct kvm_lapic *apic) | ||
1451 | { | ||
1452 | if (!pv_eoi_enabled(vcpu) || | ||
1453 | /* IRR set or many bits in ISR: could be nested. */ | ||
1454 | apic->irr_pending || | ||
1455 | /* Cache not set: could be safe but we don't bother. */ | ||
1456 | apic->highest_isr_cache == -1 || | ||
1457 | /* Need EOI to update ioapic. */ | ||
1458 | kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache)) { | ||
1459 | /* | ||
1460 | * PV EOI was disabled by apic_sync_pv_eoi_from_guest | ||
1461 | * so we need not do anything here. | ||
1462 | */ | ||
1463 | return; | ||
1464 | } | ||
1465 | |||
1466 | pv_eoi_set_pending(apic->vcpu); | ||
1467 | } | ||
1468 | |||
1301 | void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) | 1469 | void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) |
1302 | { | 1470 | { |
1303 | u32 data, tpr; | 1471 | u32 data, tpr; |
1304 | int max_irr, max_isr; | 1472 | int max_irr, max_isr; |
1305 | struct kvm_lapic *apic; | 1473 | struct kvm_lapic *apic = vcpu->arch.apic; |
1306 | void *vapic; | 1474 | void *vapic; |
1307 | 1475 | ||
1476 | apic_sync_pv_eoi_to_guest(vcpu, apic); | ||
1477 | |||
1308 | if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) | 1478 | if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) |
1309 | return; | 1479 | return; |
1310 | 1480 | ||
1311 | apic = vcpu->arch.apic; | ||
1312 | tpr = apic_get_reg(apic, APIC_TASKPRI) & 0xff; | 1481 | tpr = apic_get_reg(apic, APIC_TASKPRI) & 0xff; |
1313 | max_irr = apic_find_highest_irr(apic); | 1482 | max_irr = apic_find_highest_irr(apic); |
1314 | if (max_irr < 0) | 1483 | if (max_irr < 0) |
@@ -1394,3 +1563,16 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data) | |||
1394 | 1563 | ||
1395 | return 0; | 1564 | return 0; |
1396 | } | 1565 | } |
1566 | |||
1567 | int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data) | ||
1568 | { | ||
1569 | u64 addr = data & ~KVM_MSR_ENABLED; | ||
1570 | if (!IS_ALIGNED(addr, 4)) | ||
1571 | return 1; | ||
1572 | |||
1573 | vcpu->arch.pv_eoi.msr_val = data; | ||
1574 | if (!pv_eoi_enabled(vcpu)) | ||
1575 | return 0; | ||
1576 | return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data, | ||
1577 | addr); | ||
1578 | } | ||