diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-03 19:49:46 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-03 19:49:46 -0400 |
commit | aa1be08f52585fe36ecfaf5bddfdc784eb4c94cf (patch) | |
tree | bb8a647ba51f6990c880234c32c7ffe0cc8ec826 | |
parent | 82463436a7fa40345c6febf0baa4c954af506ca6 (diff) | |
parent | e8ab8d24b488632d07ce5ddb261f1d454114415b (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM fixes from Paolo Bonzini:
- PPC and ARM bugfixes from submaintainers
- Fix old Windows versions on AMD (recent regression)
- Fix old Linux versions on processors without EPT
- Fixes for LAPIC timer optimizations
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (21 commits)
KVM: nVMX: Fix size checks in vmx_set_nested_state
KVM: selftests: make hyperv_cpuid test pass on AMD
KVM: lapic: Check for in-kernel LAPIC before deferencing apic pointer
KVM: fix KVM_CLEAR_DIRTY_LOG for memory slots of unaligned size
x86/kvm/mmu: reset MMU context when 32-bit guest switches PAE
KVM: x86: Whitelist port 0x7e for pre-incrementing %rip
Documentation: kvm: fix dirty log ioctl arch lists
KVM: VMX: Move RSB stuffing to before the first RET after VM-Exit
KVM: arm/arm64: Don't emulate virtual timers on userspace ioctls
kvm: arm: Skip stage2 huge mappings for unaligned ipa backed by THP
KVM: arm/arm64: Ensure vcpu target is unset on reset failure
KVM: lapic: Convert guest TSC to host time domain if necessary
KVM: lapic: Allow user to disable adaptive tuning of timer advancement
KVM: lapic: Track lapic timer advance per vCPU
KVM: lapic: Disable timer advancement if adaptive tuning goes haywire
x86: kvm: hyper-v: deal with buggy TLB flush requests from WS2012
KVM: x86: Consider LAPIC TSC-Deadline timer expired if deadline too short
KVM: PPC: Book3S: Protect memslots while validating user address
KVM: PPC: Book3S HV: Perserve PSSCR FAKE_SUSPEND bit on guest exit
KVM: arm/arm64: vgic-v3: Retire pending interrupts on disabling LPIs
...
-rw-r--r-- | Documentation/virtual/kvm/api.txt | 11 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_vio.c | 6 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv.c | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 1 | ||||
-rw-r--r-- | arch/x86/include/uapi/asm/kvm.h | 1 | ||||
-rw-r--r-- | arch/x86/kvm/hyperv.c | 11 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.c | 73 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.h | 4 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 1 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/nested.c | 4 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmenter.S | 12 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 7 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 36 | ||||
-rw-r--r-- | arch/x86/kvm/x86.h | 2 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/dirty_log_test.c | 9 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c | 9 | ||||
-rw-r--r-- | virt/kvm/arm/arch_timer.c | 17 | ||||
-rw-r--r-- | virt/kvm/arm/arm.c | 11 | ||||
-rw-r--r-- | virt/kvm/arm/mmu.c | 6 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-mmio-v3.c | 3 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic.c | 21 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic.h | 1 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 7 |
23 files changed, 192 insertions, 65 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 67068c47c591..64b38dfcc243 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt | |||
@@ -321,7 +321,7 @@ cpu's hardware control block. | |||
321 | 4.8 KVM_GET_DIRTY_LOG (vm ioctl) | 321 | 4.8 KVM_GET_DIRTY_LOG (vm ioctl) |
322 | 322 | ||
323 | Capability: basic | 323 | Capability: basic |
324 | Architectures: x86 | 324 | Architectures: all |
325 | Type: vm ioctl | 325 | Type: vm ioctl |
326 | Parameters: struct kvm_dirty_log (in/out) | 326 | Parameters: struct kvm_dirty_log (in/out) |
327 | Returns: 0 on success, -1 on error | 327 | Returns: 0 on success, -1 on error |
@@ -3810,7 +3810,7 @@ to I/O ports. | |||
3810 | 4.117 KVM_CLEAR_DIRTY_LOG (vm ioctl) | 3810 | 4.117 KVM_CLEAR_DIRTY_LOG (vm ioctl) |
3811 | 3811 | ||
3812 | Capability: KVM_CAP_MANUAL_DIRTY_LOG_PROTECT | 3812 | Capability: KVM_CAP_MANUAL_DIRTY_LOG_PROTECT |
3813 | Architectures: x86 | 3813 | Architectures: x86, arm, arm64, mips |
3814 | Type: vm ioctl | 3814 | Type: vm ioctl |
3815 | Parameters: struct kvm_dirty_log (in) | 3815 | Parameters: struct kvm_dirty_log (in) |
3816 | Returns: 0 on success, -1 on error | 3816 | Returns: 0 on success, -1 on error |
@@ -3830,8 +3830,9 @@ The ioctl clears the dirty status of pages in a memory slot, according to | |||
3830 | the bitmap that is passed in struct kvm_clear_dirty_log's dirty_bitmap | 3830 | the bitmap that is passed in struct kvm_clear_dirty_log's dirty_bitmap |
3831 | field. Bit 0 of the bitmap corresponds to page "first_page" in the | 3831 | field. Bit 0 of the bitmap corresponds to page "first_page" in the |
3832 | memory slot, and num_pages is the size in bits of the input bitmap. | 3832 | memory slot, and num_pages is the size in bits of the input bitmap. |
3833 | Both first_page and num_pages must be a multiple of 64. For each bit | 3833 | first_page must be a multiple of 64; num_pages must also be a multiple of |
3834 | that is set in the input bitmap, the corresponding page is marked "clean" | 3834 | 64 unless first_page + num_pages is the size of the memory slot. For each |
3835 | bit that is set in the input bitmap, the corresponding page is marked "clean" | ||
3835 | in KVM's dirty bitmap, and dirty tracking is re-enabled for that page | 3836 | in KVM's dirty bitmap, and dirty tracking is re-enabled for that page |
3836 | (for example via write-protection, or by clearing the dirty bit in | 3837 | (for example via write-protection, or by clearing the dirty bit in |
3837 | a page table entry). | 3838 | a page table entry). |
@@ -4799,7 +4800,7 @@ and injected exceptions. | |||
4799 | 4800 | ||
4800 | 7.18 KVM_CAP_MANUAL_DIRTY_LOG_PROTECT | 4801 | 7.18 KVM_CAP_MANUAL_DIRTY_LOG_PROTECT |
4801 | 4802 | ||
4802 | Architectures: all | 4803 | Architectures: x86, arm, arm64, mips |
4803 | Parameters: args[0] whether feature should be enabled or not | 4804 | Parameters: args[0] whether feature should be enabled or not |
4804 | 4805 | ||
4805 | With this capability enabled, KVM_GET_DIRTY_LOG will not automatically | 4806 | With this capability enabled, KVM_GET_DIRTY_LOG will not automatically |
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index f02b04973710..f100e331e69b 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c | |||
@@ -543,14 +543,14 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, | |||
543 | if (ret != H_SUCCESS) | 543 | if (ret != H_SUCCESS) |
544 | return ret; | 544 | return ret; |
545 | 545 | ||
546 | idx = srcu_read_lock(&vcpu->kvm->srcu); | ||
547 | |||
546 | ret = kvmppc_tce_validate(stt, tce); | 548 | ret = kvmppc_tce_validate(stt, tce); |
547 | if (ret != H_SUCCESS) | 549 | if (ret != H_SUCCESS) |
548 | return ret; | 550 | goto unlock_exit; |
549 | 551 | ||
550 | dir = iommu_tce_direction(tce); | 552 | dir = iommu_tce_direction(tce); |
551 | 553 | ||
552 | idx = srcu_read_lock(&vcpu->kvm->srcu); | ||
553 | |||
554 | if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) { | 554 | if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) { |
555 | ret = H_PARAMETER; | 555 | ret = H_PARAMETER; |
556 | goto unlock_exit; | 556 | goto unlock_exit; |
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 06964350b97a..b2b29d4f9842 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c | |||
@@ -3423,7 +3423,9 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit, | |||
3423 | vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2); | 3423 | vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2); |
3424 | vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3); | 3424 | vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3); |
3425 | 3425 | ||
3426 | mtspr(SPRN_PSSCR, host_psscr); | 3426 | /* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */ |
3427 | mtspr(SPRN_PSSCR, host_psscr | | ||
3428 | (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG)); | ||
3427 | mtspr(SPRN_HFSCR, host_hfscr); | 3429 | mtspr(SPRN_HFSCR, host_hfscr); |
3428 | mtspr(SPRN_CIABR, host_ciabr); | 3430 | mtspr(SPRN_CIABR, host_ciabr); |
3429 | mtspr(SPRN_DAWR, host_dawr); | 3431 | mtspr(SPRN_DAWR, host_dawr); |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a9d03af34030..c79abe7ca093 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -295,6 +295,7 @@ union kvm_mmu_extended_role { | |||
295 | unsigned int valid:1; | 295 | unsigned int valid:1; |
296 | unsigned int execonly:1; | 296 | unsigned int execonly:1; |
297 | unsigned int cr0_pg:1; | 297 | unsigned int cr0_pg:1; |
298 | unsigned int cr4_pae:1; | ||
298 | unsigned int cr4_pse:1; | 299 | unsigned int cr4_pse:1; |
299 | unsigned int cr4_pke:1; | 300 | unsigned int cr4_pke:1; |
300 | unsigned int cr4_smap:1; | 301 | unsigned int cr4_smap:1; |
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index dabfcf7c3941..7a0e64ccd6ff 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h | |||
@@ -381,6 +381,7 @@ struct kvm_sync_regs { | |||
381 | #define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) | 381 | #define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) |
382 | #define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1) | 382 | #define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1) |
383 | #define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2) | 383 | #define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2) |
384 | #define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3) | ||
384 | 385 | ||
385 | #define KVM_STATE_NESTED_GUEST_MODE 0x00000001 | 386 | #define KVM_STATE_NESTED_GUEST_MODE 0x00000001 |
386 | #define KVM_STATE_NESTED_RUN_PENDING 0x00000002 | 387 | #define KVM_STATE_NESTED_RUN_PENDING 0x00000002 |
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 421899f6ad7b..cc24b3a32c44 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c | |||
@@ -1371,7 +1371,16 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa, | |||
1371 | 1371 | ||
1372 | valid_bank_mask = BIT_ULL(0); | 1372 | valid_bank_mask = BIT_ULL(0); |
1373 | sparse_banks[0] = flush.processor_mask; | 1373 | sparse_banks[0] = flush.processor_mask; |
1374 | all_cpus = flush.flags & HV_FLUSH_ALL_PROCESSORS; | 1374 | |
1375 | /* | ||
1376 | * Work around possible WS2012 bug: it sends hypercalls | ||
1377 | * with processor_mask = 0x0 and HV_FLUSH_ALL_PROCESSORS clear, | ||
1378 | * while also expecting us to flush something and crashing if | ||
1379 | * we don't. Let's treat processor_mask == 0 same as | ||
1380 | * HV_FLUSH_ALL_PROCESSORS. | ||
1381 | */ | ||
1382 | all_cpus = (flush.flags & HV_FLUSH_ALL_PROCESSORS) || | ||
1383 | flush.processor_mask == 0; | ||
1375 | } else { | 1384 | } else { |
1376 | if (unlikely(kvm_read_guest(kvm, ingpa, &flush_ex, | 1385 | if (unlikely(kvm_read_guest(kvm, ingpa, &flush_ex, |
1377 | sizeof(flush_ex)))) | 1386 | sizeof(flush_ex)))) |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 9bf70cf84564..bd13fdddbdc4 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -70,7 +70,6 @@ | |||
70 | #define APIC_BROADCAST 0xFF | 70 | #define APIC_BROADCAST 0xFF |
71 | #define X2APIC_BROADCAST 0xFFFFFFFFul | 71 | #define X2APIC_BROADCAST 0xFFFFFFFFul |
72 | 72 | ||
73 | static bool lapic_timer_advance_adjust_done = false; | ||
74 | #define LAPIC_TIMER_ADVANCE_ADJUST_DONE 100 | 73 | #define LAPIC_TIMER_ADVANCE_ADJUST_DONE 100 |
75 | /* step-by-step approximation to mitigate fluctuation */ | 74 | /* step-by-step approximation to mitigate fluctuation */ |
76 | #define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8 | 75 | #define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8 |
@@ -1482,14 +1481,32 @@ static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu) | |||
1482 | return false; | 1481 | return false; |
1483 | } | 1482 | } |
1484 | 1483 | ||
1484 | static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles) | ||
1485 | { | ||
1486 | u64 timer_advance_ns = vcpu->arch.apic->lapic_timer.timer_advance_ns; | ||
1487 | |||
1488 | /* | ||
1489 | * If the guest TSC is running at a different ratio than the host, then | ||
1490 | * convert the delay to nanoseconds to achieve an accurate delay. Note | ||
1491 | * that __delay() uses delay_tsc whenever the hardware has TSC, thus | ||
1492 | * always for VMX enabled hardware. | ||
1493 | */ | ||
1494 | if (vcpu->arch.tsc_scaling_ratio == kvm_default_tsc_scaling_ratio) { | ||
1495 | __delay(min(guest_cycles, | ||
1496 | nsec_to_cycles(vcpu, timer_advance_ns))); | ||
1497 | } else { | ||
1498 | u64 delay_ns = guest_cycles * 1000000ULL; | ||
1499 | do_div(delay_ns, vcpu->arch.virtual_tsc_khz); | ||
1500 | ndelay(min_t(u32, delay_ns, timer_advance_ns)); | ||
1501 | } | ||
1502 | } | ||
1503 | |||
1485 | void wait_lapic_expire(struct kvm_vcpu *vcpu) | 1504 | void wait_lapic_expire(struct kvm_vcpu *vcpu) |
1486 | { | 1505 | { |
1487 | struct kvm_lapic *apic = vcpu->arch.apic; | 1506 | struct kvm_lapic *apic = vcpu->arch.apic; |
1507 | u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns; | ||
1488 | u64 guest_tsc, tsc_deadline, ns; | 1508 | u64 guest_tsc, tsc_deadline, ns; |
1489 | 1509 | ||
1490 | if (!lapic_in_kernel(vcpu)) | ||
1491 | return; | ||
1492 | |||
1493 | if (apic->lapic_timer.expired_tscdeadline == 0) | 1510 | if (apic->lapic_timer.expired_tscdeadline == 0) |
1494 | return; | 1511 | return; |
1495 | 1512 | ||
@@ -1501,33 +1518,37 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu) | |||
1501 | guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); | 1518 | guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); |
1502 | trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline); | 1519 | trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline); |
1503 | 1520 | ||
1504 | /* __delay is delay_tsc whenever the hardware has TSC, thus always. */ | ||
1505 | if (guest_tsc < tsc_deadline) | 1521 | if (guest_tsc < tsc_deadline) |
1506 | __delay(min(tsc_deadline - guest_tsc, | 1522 | __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc); |
1507 | nsec_to_cycles(vcpu, lapic_timer_advance_ns))); | ||
1508 | 1523 | ||
1509 | if (!lapic_timer_advance_adjust_done) { | 1524 | if (!apic->lapic_timer.timer_advance_adjust_done) { |
1510 | /* too early */ | 1525 | /* too early */ |
1511 | if (guest_tsc < tsc_deadline) { | 1526 | if (guest_tsc < tsc_deadline) { |
1512 | ns = (tsc_deadline - guest_tsc) * 1000000ULL; | 1527 | ns = (tsc_deadline - guest_tsc) * 1000000ULL; |
1513 | do_div(ns, vcpu->arch.virtual_tsc_khz); | 1528 | do_div(ns, vcpu->arch.virtual_tsc_khz); |
1514 | lapic_timer_advance_ns -= min((unsigned int)ns, | 1529 | timer_advance_ns -= min((u32)ns, |
1515 | lapic_timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP); | 1530 | timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP); |
1516 | } else { | 1531 | } else { |
1517 | /* too late */ | 1532 | /* too late */ |
1518 | ns = (guest_tsc - tsc_deadline) * 1000000ULL; | 1533 | ns = (guest_tsc - tsc_deadline) * 1000000ULL; |
1519 | do_div(ns, vcpu->arch.virtual_tsc_khz); | 1534 | do_div(ns, vcpu->arch.virtual_tsc_khz); |
1520 | lapic_timer_advance_ns += min((unsigned int)ns, | 1535 | timer_advance_ns += min((u32)ns, |
1521 | lapic_timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP); | 1536 | timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP); |
1522 | } | 1537 | } |
1523 | if (abs(guest_tsc - tsc_deadline) < LAPIC_TIMER_ADVANCE_ADJUST_DONE) | 1538 | if (abs(guest_tsc - tsc_deadline) < LAPIC_TIMER_ADVANCE_ADJUST_DONE) |
1524 | lapic_timer_advance_adjust_done = true; | 1539 | apic->lapic_timer.timer_advance_adjust_done = true; |
1540 | if (unlikely(timer_advance_ns > 5000)) { | ||
1541 | timer_advance_ns = 0; | ||
1542 | apic->lapic_timer.timer_advance_adjust_done = true; | ||
1543 | } | ||
1544 | apic->lapic_timer.timer_advance_ns = timer_advance_ns; | ||
1525 | } | 1545 | } |
1526 | } | 1546 | } |
1527 | 1547 | ||
1528 | static void start_sw_tscdeadline(struct kvm_lapic *apic) | 1548 | static void start_sw_tscdeadline(struct kvm_lapic *apic) |
1529 | { | 1549 | { |
1530 | u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline; | 1550 | struct kvm_timer *ktimer = &apic->lapic_timer; |
1551 | u64 guest_tsc, tscdeadline = ktimer->tscdeadline; | ||
1531 | u64 ns = 0; | 1552 | u64 ns = 0; |
1532 | ktime_t expire; | 1553 | ktime_t expire; |
1533 | struct kvm_vcpu *vcpu = apic->vcpu; | 1554 | struct kvm_vcpu *vcpu = apic->vcpu; |
@@ -1542,13 +1563,15 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic) | |||
1542 | 1563 | ||
1543 | now = ktime_get(); | 1564 | now = ktime_get(); |
1544 | guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); | 1565 | guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); |
1545 | if (likely(tscdeadline > guest_tsc)) { | 1566 | |
1546 | ns = (tscdeadline - guest_tsc) * 1000000ULL; | 1567 | ns = (tscdeadline - guest_tsc) * 1000000ULL; |
1547 | do_div(ns, this_tsc_khz); | 1568 | do_div(ns, this_tsc_khz); |
1569 | |||
1570 | if (likely(tscdeadline > guest_tsc) && | ||
1571 | likely(ns > apic->lapic_timer.timer_advance_ns)) { | ||
1548 | expire = ktime_add_ns(now, ns); | 1572 | expire = ktime_add_ns(now, ns); |
1549 | expire = ktime_sub_ns(expire, lapic_timer_advance_ns); | 1573 | expire = ktime_sub_ns(expire, ktimer->timer_advance_ns); |
1550 | hrtimer_start(&apic->lapic_timer.timer, | 1574 | hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_PINNED); |
1551 | expire, HRTIMER_MODE_ABS_PINNED); | ||
1552 | } else | 1575 | } else |
1553 | apic_timer_expired(apic); | 1576 | apic_timer_expired(apic); |
1554 | 1577 | ||
@@ -2255,7 +2278,7 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) | |||
2255 | return HRTIMER_NORESTART; | 2278 | return HRTIMER_NORESTART; |
2256 | } | 2279 | } |
2257 | 2280 | ||
2258 | int kvm_create_lapic(struct kvm_vcpu *vcpu) | 2281 | int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns) |
2259 | { | 2282 | { |
2260 | struct kvm_lapic *apic; | 2283 | struct kvm_lapic *apic; |
2261 | 2284 | ||
@@ -2279,6 +2302,14 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) | |||
2279 | hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, | 2302 | hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, |
2280 | HRTIMER_MODE_ABS_PINNED); | 2303 | HRTIMER_MODE_ABS_PINNED); |
2281 | apic->lapic_timer.timer.function = apic_timer_fn; | 2304 | apic->lapic_timer.timer.function = apic_timer_fn; |
2305 | if (timer_advance_ns == -1) { | ||
2306 | apic->lapic_timer.timer_advance_ns = 1000; | ||
2307 | apic->lapic_timer.timer_advance_adjust_done = false; | ||
2308 | } else { | ||
2309 | apic->lapic_timer.timer_advance_ns = timer_advance_ns; | ||
2310 | apic->lapic_timer.timer_advance_adjust_done = true; | ||
2311 | } | ||
2312 | |||
2282 | 2313 | ||
2283 | /* | 2314 | /* |
2284 | * APIC is created enabled. This will prevent kvm_lapic_set_base from | 2315 | * APIC is created enabled. This will prevent kvm_lapic_set_base from |
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index ff6ef9c3d760..d6d049ba3045 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
@@ -31,8 +31,10 @@ struct kvm_timer { | |||
31 | u32 timer_mode_mask; | 31 | u32 timer_mode_mask; |
32 | u64 tscdeadline; | 32 | u64 tscdeadline; |
33 | u64 expired_tscdeadline; | 33 | u64 expired_tscdeadline; |
34 | u32 timer_advance_ns; | ||
34 | atomic_t pending; /* accumulated triggered timers */ | 35 | atomic_t pending; /* accumulated triggered timers */ |
35 | bool hv_timer_in_use; | 36 | bool hv_timer_in_use; |
37 | bool timer_advance_adjust_done; | ||
36 | }; | 38 | }; |
37 | 39 | ||
38 | struct kvm_lapic { | 40 | struct kvm_lapic { |
@@ -62,7 +64,7 @@ struct kvm_lapic { | |||
62 | 64 | ||
63 | struct dest_map; | 65 | struct dest_map; |
64 | 66 | ||
65 | int kvm_create_lapic(struct kvm_vcpu *vcpu); | 67 | int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns); |
66 | void kvm_free_lapic(struct kvm_vcpu *vcpu); | 68 | void kvm_free_lapic(struct kvm_vcpu *vcpu); |
67 | 69 | ||
68 | int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); | 70 | int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index e10962dfc203..d9c7b45d231f 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -4781,6 +4781,7 @@ static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu) | |||
4781 | union kvm_mmu_extended_role ext = {0}; | 4781 | union kvm_mmu_extended_role ext = {0}; |
4782 | 4782 | ||
4783 | ext.cr0_pg = !!is_paging(vcpu); | 4783 | ext.cr0_pg = !!is_paging(vcpu); |
4784 | ext.cr4_pae = !!is_pae(vcpu); | ||
4784 | ext.cr4_smep = !!kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); | 4785 | ext.cr4_smep = !!kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); |
4785 | ext.cr4_smap = !!kvm_read_cr4_bits(vcpu, X86_CR4_SMAP); | 4786 | ext.cr4_smap = !!kvm_read_cr4_bits(vcpu, X86_CR4_SMAP); |
4786 | ext.cr4_pse = !!is_pse(vcpu); | 4787 | ext.cr4_pse = !!is_pse(vcpu); |
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 6401eb7ef19c..0c601d079cd2 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c | |||
@@ -5423,7 +5423,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, | |||
5423 | return ret; | 5423 | return ret; |
5424 | 5424 | ||
5425 | /* Empty 'VMXON' state is permitted */ | 5425 | /* Empty 'VMXON' state is permitted */ |
5426 | if (kvm_state->size < sizeof(kvm_state) + sizeof(*vmcs12)) | 5426 | if (kvm_state->size < sizeof(*kvm_state) + sizeof(*vmcs12)) |
5427 | return 0; | 5427 | return 0; |
5428 | 5428 | ||
5429 | if (kvm_state->vmx.vmcs_pa != -1ull) { | 5429 | if (kvm_state->vmx.vmcs_pa != -1ull) { |
@@ -5467,7 +5467,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, | |||
5467 | vmcs12->vmcs_link_pointer != -1ull) { | 5467 | vmcs12->vmcs_link_pointer != -1ull) { |
5468 | struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu); | 5468 | struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu); |
5469 | 5469 | ||
5470 | if (kvm_state->size < sizeof(kvm_state) + 2 * sizeof(*vmcs12)) | 5470 | if (kvm_state->size < sizeof(*kvm_state) + 2 * sizeof(*vmcs12)) |
5471 | return -EINVAL; | 5471 | return -EINVAL; |
5472 | 5472 | ||
5473 | if (copy_from_user(shadow_vmcs12, | 5473 | if (copy_from_user(shadow_vmcs12, |
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 7b272738c576..d4cb1945b2e3 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S | |||
@@ -3,6 +3,7 @@ | |||
3 | #include <asm/asm.h> | 3 | #include <asm/asm.h> |
4 | #include <asm/bitsperlong.h> | 4 | #include <asm/bitsperlong.h> |
5 | #include <asm/kvm_vcpu_regs.h> | 5 | #include <asm/kvm_vcpu_regs.h> |
6 | #include <asm/nospec-branch.h> | ||
6 | 7 | ||
7 | #define WORD_SIZE (BITS_PER_LONG / 8) | 8 | #define WORD_SIZE (BITS_PER_LONG / 8) |
8 | 9 | ||
@@ -77,6 +78,17 @@ ENDPROC(vmx_vmenter) | |||
77 | * referred to by VMCS.HOST_RIP. | 78 | * referred to by VMCS.HOST_RIP. |
78 | */ | 79 | */ |
79 | ENTRY(vmx_vmexit) | 80 | ENTRY(vmx_vmexit) |
81 | #ifdef CONFIG_RETPOLINE | ||
82 | ALTERNATIVE "jmp .Lvmexit_skip_rsb", "", X86_FEATURE_RETPOLINE | ||
83 | /* Preserve guest's RAX, it's used to stuff the RSB. */ | ||
84 | push %_ASM_AX | ||
85 | |||
86 | /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ | ||
87 | FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE | ||
88 | |||
89 | pop %_ASM_AX | ||
90 | .Lvmexit_skip_rsb: | ||
91 | #endif | ||
80 | ret | 92 | ret |
81 | ENDPROC(vmx_vmexit) | 93 | ENDPROC(vmx_vmexit) |
82 | 94 | ||
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index b4e7d645275a..0c955bb286ff 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c | |||
@@ -6462,9 +6462,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
6462 | 6462 | ||
6463 | x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0); | 6463 | x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0); |
6464 | 6464 | ||
6465 | /* Eliminate branch target predictions from guest mode */ | ||
6466 | vmexit_fill_RSB(); | ||
6467 | |||
6468 | /* All fields are clean at this point */ | 6465 | /* All fields are clean at this point */ |
6469 | if (static_branch_unlikely(&enable_evmcs)) | 6466 | if (static_branch_unlikely(&enable_evmcs)) |
6470 | current_evmcs->hv_clean_fields |= | 6467 | current_evmcs->hv_clean_fields |= |
@@ -7032,6 +7029,7 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc) | |||
7032 | { | 7029 | { |
7033 | struct vcpu_vmx *vmx; | 7030 | struct vcpu_vmx *vmx; |
7034 | u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles; | 7031 | u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles; |
7032 | struct kvm_timer *ktimer = &vcpu->arch.apic->lapic_timer; | ||
7035 | 7033 | ||
7036 | if (kvm_mwait_in_guest(vcpu->kvm)) | 7034 | if (kvm_mwait_in_guest(vcpu->kvm)) |
7037 | return -EOPNOTSUPP; | 7035 | return -EOPNOTSUPP; |
@@ -7040,7 +7038,8 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc) | |||
7040 | tscl = rdtsc(); | 7038 | tscl = rdtsc(); |
7041 | guest_tscl = kvm_read_l1_tsc(vcpu, tscl); | 7039 | guest_tscl = kvm_read_l1_tsc(vcpu, tscl); |
7042 | delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl; | 7040 | delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl; |
7043 | lapic_timer_advance_cycles = nsec_to_cycles(vcpu, lapic_timer_advance_ns); | 7041 | lapic_timer_advance_cycles = nsec_to_cycles(vcpu, |
7042 | ktimer->timer_advance_ns); | ||
7044 | 7043 | ||
7045 | if (delta_tsc > lapic_timer_advance_cycles) | 7044 | if (delta_tsc > lapic_timer_advance_cycles) |
7046 | delta_tsc -= lapic_timer_advance_cycles; | 7045 | delta_tsc -= lapic_timer_advance_cycles; |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a0d1fc80ac5a..b5edc8e3ce1d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -136,10 +136,14 @@ EXPORT_SYMBOL_GPL(kvm_default_tsc_scaling_ratio); | |||
136 | static u32 __read_mostly tsc_tolerance_ppm = 250; | 136 | static u32 __read_mostly tsc_tolerance_ppm = 250; |
137 | module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR); | 137 | module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR); |
138 | 138 | ||
139 | /* lapic timer advance (tscdeadline mode only) in nanoseconds */ | 139 | /* |
140 | unsigned int __read_mostly lapic_timer_advance_ns = 1000; | 140 | * lapic timer advance (tscdeadline mode only) in nanoseconds. '-1' enables |
141 | * adaptive tuning starting from default advancment of 1000ns. '0' disables | ||
142 | * advancement entirely. Any other value is used as-is and disables adaptive | ||
143 | * tuning, i.e. allows priveleged userspace to set an exact advancement time. | ||
144 | */ | ||
145 | static int __read_mostly lapic_timer_advance_ns = -1; | ||
141 | module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR); | 146 | module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR); |
142 | EXPORT_SYMBOL_GPL(lapic_timer_advance_ns); | ||
143 | 147 | ||
144 | static bool __read_mostly vector_hashing = true; | 148 | static bool __read_mostly vector_hashing = true; |
145 | module_param(vector_hashing, bool, S_IRUGO); | 149 | module_param(vector_hashing, bool, S_IRUGO); |
@@ -6535,6 +6539,12 @@ int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, | |||
6535 | } | 6539 | } |
6536 | EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer); | 6540 | EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer); |
6537 | 6541 | ||
6542 | static int complete_fast_pio_out_port_0x7e(struct kvm_vcpu *vcpu) | ||
6543 | { | ||
6544 | vcpu->arch.pio.count = 0; | ||
6545 | return 1; | ||
6546 | } | ||
6547 | |||
6538 | static int complete_fast_pio_out(struct kvm_vcpu *vcpu) | 6548 | static int complete_fast_pio_out(struct kvm_vcpu *vcpu) |
6539 | { | 6549 | { |
6540 | vcpu->arch.pio.count = 0; | 6550 | vcpu->arch.pio.count = 0; |
@@ -6551,12 +6561,23 @@ static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, | |||
6551 | unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); | 6561 | unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); |
6552 | int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt, | 6562 | int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt, |
6553 | size, port, &val, 1); | 6563 | size, port, &val, 1); |
6564 | if (ret) | ||
6565 | return ret; | ||
6554 | 6566 | ||
6555 | if (!ret) { | 6567 | /* |
6568 | * Workaround userspace that relies on old KVM behavior of %rip being | ||
6569 | * incremented prior to exiting to userspace to handle "OUT 0x7e". | ||
6570 | */ | ||
6571 | if (port == 0x7e && | ||
6572 | kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_OUT_7E_INC_RIP)) { | ||
6573 | vcpu->arch.complete_userspace_io = | ||
6574 | complete_fast_pio_out_port_0x7e; | ||
6575 | kvm_skip_emulated_instruction(vcpu); | ||
6576 | } else { | ||
6556 | vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu); | 6577 | vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu); |
6557 | vcpu->arch.complete_userspace_io = complete_fast_pio_out; | 6578 | vcpu->arch.complete_userspace_io = complete_fast_pio_out; |
6558 | } | 6579 | } |
6559 | return ret; | 6580 | return 0; |
6560 | } | 6581 | } |
6561 | 6582 | ||
6562 | static int complete_fast_pio_in(struct kvm_vcpu *vcpu) | 6583 | static int complete_fast_pio_in(struct kvm_vcpu *vcpu) |
@@ -7873,7 +7894,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
7873 | } | 7894 | } |
7874 | 7895 | ||
7875 | trace_kvm_entry(vcpu->vcpu_id); | 7896 | trace_kvm_entry(vcpu->vcpu_id); |
7876 | if (lapic_timer_advance_ns) | 7897 | if (lapic_in_kernel(vcpu) && |
7898 | vcpu->arch.apic->lapic_timer.timer_advance_ns) | ||
7877 | wait_lapic_expire(vcpu); | 7899 | wait_lapic_expire(vcpu); |
7878 | guest_enter_irqoff(); | 7900 | guest_enter_irqoff(); |
7879 | 7901 | ||
@@ -9061,7 +9083,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
9061 | 9083 | ||
9062 | if (irqchip_in_kernel(vcpu->kvm)) { | 9084 | if (irqchip_in_kernel(vcpu->kvm)) { |
9063 | vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu); | 9085 | vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu); |
9064 | r = kvm_create_lapic(vcpu); | 9086 | r = kvm_create_lapic(vcpu, lapic_timer_advance_ns); |
9065 | if (r < 0) | 9087 | if (r < 0) |
9066 | goto fail_mmu_destroy; | 9088 | goto fail_mmu_destroy; |
9067 | } else | 9089 | } else |
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index aedc5d0d4989..534d3f28bb01 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
@@ -294,8 +294,6 @@ extern u64 kvm_supported_xcr0(void); | |||
294 | 294 | ||
295 | extern unsigned int min_timer_period_us; | 295 | extern unsigned int min_timer_period_us; |
296 | 296 | ||
297 | extern unsigned int lapic_timer_advance_ns; | ||
298 | |||
299 | extern bool enable_vmware_backdoor; | 297 | extern bool enable_vmware_backdoor; |
300 | 298 | ||
301 | extern struct static_key kvm_no_apic_vcpu; | 299 | extern struct static_key kvm_no_apic_vcpu; |
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 4715cfba20dc..93f99c6b7d79 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c | |||
@@ -288,8 +288,11 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, | |||
288 | #endif | 288 | #endif |
289 | max_gfn = (1ul << (guest_pa_bits - guest_page_shift)) - 1; | 289 | max_gfn = (1ul << (guest_pa_bits - guest_page_shift)) - 1; |
290 | guest_page_size = (1ul << guest_page_shift); | 290 | guest_page_size = (1ul << guest_page_shift); |
291 | /* 1G of guest page sized pages */ | 291 | /* |
292 | guest_num_pages = (1ul << (30 - guest_page_shift)); | 292 | * A little more than 1G of guest page sized pages. Cover the |
293 | * case where the size is not aligned to 64 pages. | ||
294 | */ | ||
295 | guest_num_pages = (1ul << (30 - guest_page_shift)) + 3; | ||
293 | host_page_size = getpagesize(); | 296 | host_page_size = getpagesize(); |
294 | host_num_pages = (guest_num_pages * guest_page_size) / host_page_size + | 297 | host_num_pages = (guest_num_pages * guest_page_size) / host_page_size + |
295 | !!((guest_num_pages * guest_page_size) % host_page_size); | 298 | !!((guest_num_pages * guest_page_size) % host_page_size); |
@@ -359,7 +362,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, | |||
359 | kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); | 362 | kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); |
360 | #ifdef USE_CLEAR_DIRTY_LOG | 363 | #ifdef USE_CLEAR_DIRTY_LOG |
361 | kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0, | 364 | kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0, |
362 | DIV_ROUND_UP(host_num_pages, 64) * 64); | 365 | host_num_pages); |
363 | #endif | 366 | #endif |
364 | vm_dirty_log_verify(bmap); | 367 | vm_dirty_log_verify(bmap); |
365 | iteration++; | 368 | iteration++; |
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c index 264425f75806..9a21e912097c 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c | |||
@@ -141,7 +141,13 @@ int main(int argc, char *argv[]) | |||
141 | 141 | ||
142 | free(hv_cpuid_entries); | 142 | free(hv_cpuid_entries); |
143 | 143 | ||
144 | vcpu_ioctl(vm, VCPU_ID, KVM_ENABLE_CAP, &enable_evmcs_cap); | 144 | rv = _vcpu_ioctl(vm, VCPU_ID, KVM_ENABLE_CAP, &enable_evmcs_cap); |
145 | |||
146 | if (rv) { | ||
147 | fprintf(stderr, | ||
148 | "Enlightened VMCS is unsupported, skip related test\n"); | ||
149 | goto vm_free; | ||
150 | } | ||
145 | 151 | ||
146 | hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm); | 152 | hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm); |
147 | if (!hv_cpuid_entries) | 153 | if (!hv_cpuid_entries) |
@@ -151,6 +157,7 @@ int main(int argc, char *argv[]) | |||
151 | 157 | ||
152 | free(hv_cpuid_entries); | 158 | free(hv_cpuid_entries); |
153 | 159 | ||
160 | vm_free: | ||
154 | kvm_vm_free(vm); | 161 | kvm_vm_free(vm); |
155 | 162 | ||
156 | return 0; | 163 | return 0; |
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 3417f2dbc366..7fc272ecae16 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c | |||
@@ -508,6 +508,14 @@ static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu) | |||
508 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | 508 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); |
509 | 509 | ||
510 | /* | 510 | /* |
511 | * Update the timer output so that it is likely to match the | ||
512 | * state we're about to restore. If the timer expires between | ||
513 | * this point and the register restoration, we'll take the | ||
514 | * interrupt anyway. | ||
515 | */ | ||
516 | kvm_timer_update_irq(vcpu, kvm_timer_should_fire(vtimer), vtimer); | ||
517 | |||
518 | /* | ||
511 | * When using a userspace irqchip with the architected timers and a | 519 | * When using a userspace irqchip with the architected timers and a |
512 | * host interrupt controller that doesn't support an active state, we | 520 | * host interrupt controller that doesn't support an active state, we |
513 | * must still prevent continuously exiting from the guest, and | 521 | * must still prevent continuously exiting from the guest, and |
@@ -730,7 +738,6 @@ static void kvm_timer_init_interrupt(void *info) | |||
730 | int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) | 738 | int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) |
731 | { | 739 | { |
732 | struct arch_timer_context *timer; | 740 | struct arch_timer_context *timer; |
733 | bool level; | ||
734 | 741 | ||
735 | switch (regid) { | 742 | switch (regid) { |
736 | case KVM_REG_ARM_TIMER_CTL: | 743 | case KVM_REG_ARM_TIMER_CTL: |
@@ -758,10 +765,6 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) | |||
758 | return -1; | 765 | return -1; |
759 | } | 766 | } |
760 | 767 | ||
761 | level = kvm_timer_should_fire(timer); | ||
762 | kvm_timer_update_irq(vcpu, level, timer); | ||
763 | timer_emulate(timer); | ||
764 | |||
765 | return 0; | 768 | return 0; |
766 | } | 769 | } |
767 | 770 | ||
@@ -812,7 +815,7 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, | |||
812 | 815 | ||
813 | switch (treg) { | 816 | switch (treg) { |
814 | case TIMER_REG_TVAL: | 817 | case TIMER_REG_TVAL: |
815 | val = kvm_phys_timer_read() - timer->cntvoff - timer->cnt_cval; | 818 | val = timer->cnt_cval - kvm_phys_timer_read() + timer->cntvoff; |
816 | break; | 819 | break; |
817 | 820 | ||
818 | case TIMER_REG_CTL: | 821 | case TIMER_REG_CTL: |
@@ -858,7 +861,7 @@ static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, | |||
858 | { | 861 | { |
859 | switch (treg) { | 862 | switch (treg) { |
860 | case TIMER_REG_TVAL: | 863 | case TIMER_REG_TVAL: |
861 | timer->cnt_cval = val - kvm_phys_timer_read() - timer->cntvoff; | 864 | timer->cnt_cval = kvm_phys_timer_read() - timer->cntvoff + val; |
862 | break; | 865 | break; |
863 | 866 | ||
864 | case TIMER_REG_CTL: | 867 | case TIMER_REG_CTL: |
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 99c37384ba7b..f412ebc90610 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c | |||
@@ -934,7 +934,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, | |||
934 | static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, | 934 | static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, |
935 | const struct kvm_vcpu_init *init) | 935 | const struct kvm_vcpu_init *init) |
936 | { | 936 | { |
937 | unsigned int i; | 937 | unsigned int i, ret; |
938 | int phys_target = kvm_target_cpu(); | 938 | int phys_target = kvm_target_cpu(); |
939 | 939 | ||
940 | if (init->target != phys_target) | 940 | if (init->target != phys_target) |
@@ -969,9 +969,14 @@ static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, | |||
969 | vcpu->arch.target = phys_target; | 969 | vcpu->arch.target = phys_target; |
970 | 970 | ||
971 | /* Now we know what it is, we can reset it. */ | 971 | /* Now we know what it is, we can reset it. */ |
972 | return kvm_reset_vcpu(vcpu); | 972 | ret = kvm_reset_vcpu(vcpu); |
973 | } | 973 | if (ret) { |
974 | vcpu->arch.target = -1; | ||
975 | bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); | ||
976 | } | ||
974 | 977 | ||
978 | return ret; | ||
979 | } | ||
975 | 980 | ||
976 | static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, | 981 | static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, |
977 | struct kvm_vcpu_init *init) | 982 | struct kvm_vcpu_init *init) |
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 27c958306449..a39dcfdbcc65 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c | |||
@@ -1781,8 +1781,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, | |||
1781 | * Only PMD_SIZE transparent hugepages(THP) are | 1781 | * Only PMD_SIZE transparent hugepages(THP) are |
1782 | * currently supported. This code will need to be | 1782 | * currently supported. This code will need to be |
1783 | * updated to support other THP sizes. | 1783 | * updated to support other THP sizes. |
1784 | * | ||
1785 | * Make sure the host VA and the guest IPA are sufficiently | ||
1786 | * aligned and that the block is contained within the memslot. | ||
1784 | */ | 1787 | */ |
1785 | if (transparent_hugepage_adjust(&pfn, &fault_ipa)) | 1788 | if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE) && |
1789 | transparent_hugepage_adjust(&pfn, &fault_ipa)) | ||
1786 | vma_pagesize = PMD_SIZE; | 1790 | vma_pagesize = PMD_SIZE; |
1787 | } | 1791 | } |
1788 | 1792 | ||
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index 4a12322bf7df..9f4843fe9cda 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c | |||
@@ -200,6 +200,9 @@ static void vgic_mmio_write_v3r_ctlr(struct kvm_vcpu *vcpu, | |||
200 | 200 | ||
201 | vgic_cpu->lpis_enabled = val & GICR_CTLR_ENABLE_LPIS; | 201 | vgic_cpu->lpis_enabled = val & GICR_CTLR_ENABLE_LPIS; |
202 | 202 | ||
203 | if (was_enabled && !vgic_cpu->lpis_enabled) | ||
204 | vgic_flush_pending_lpis(vcpu); | ||
205 | |||
203 | if (!was_enabled && vgic_cpu->lpis_enabled) | 206 | if (!was_enabled && vgic_cpu->lpis_enabled) |
204 | vgic_enable_lpis(vcpu); | 207 | vgic_enable_lpis(vcpu); |
205 | } | 208 | } |
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 3af69f2a3866..191deccf60bf 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c | |||
@@ -151,6 +151,27 @@ void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) | |||
151 | kfree(irq); | 151 | kfree(irq); |
152 | } | 152 | } |
153 | 153 | ||
154 | void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu) | ||
155 | { | ||
156 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; | ||
157 | struct vgic_irq *irq, *tmp; | ||
158 | unsigned long flags; | ||
159 | |||
160 | raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags); | ||
161 | |||
162 | list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) { | ||
163 | if (irq->intid >= VGIC_MIN_LPI) { | ||
164 | raw_spin_lock(&irq->irq_lock); | ||
165 | list_del(&irq->ap_list); | ||
166 | irq->vcpu = NULL; | ||
167 | raw_spin_unlock(&irq->irq_lock); | ||
168 | vgic_put_irq(vcpu->kvm, irq); | ||
169 | } | ||
170 | } | ||
171 | |||
172 | raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags); | ||
173 | } | ||
174 | |||
154 | void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending) | 175 | void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending) |
155 | { | 176 | { |
156 | WARN_ON(irq_set_irqchip_state(irq->host_irq, | 177 | WARN_ON(irq_set_irqchip_state(irq->host_irq, |
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index a90024718ca4..abeeffabc456 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h | |||
@@ -238,6 +238,7 @@ void vgic_v3_put(struct kvm_vcpu *vcpu); | |||
238 | bool vgic_has_its(struct kvm *kvm); | 238 | bool vgic_has_its(struct kvm *kvm); |
239 | int kvm_vgic_register_its_device(void); | 239 | int kvm_vgic_register_its_device(void); |
240 | void vgic_enable_lpis(struct kvm_vcpu *vcpu); | 240 | void vgic_enable_lpis(struct kvm_vcpu *vcpu); |
241 | void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu); | ||
241 | int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi); | 242 | int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi); |
242 | int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr); | 243 | int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr); |
243 | int vgic_v3_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, | 244 | int vgic_v3_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index dc8edc97ba85..a704d1f9bd96 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -1240,7 +1240,7 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm, | |||
1240 | if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) | 1240 | if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) |
1241 | return -EINVAL; | 1241 | return -EINVAL; |
1242 | 1242 | ||
1243 | if ((log->first_page & 63) || (log->num_pages & 63)) | 1243 | if (log->first_page & 63) |
1244 | return -EINVAL; | 1244 | return -EINVAL; |
1245 | 1245 | ||
1246 | slots = __kvm_memslots(kvm, as_id); | 1246 | slots = __kvm_memslots(kvm, as_id); |
@@ -1253,8 +1253,9 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm, | |||
1253 | n = kvm_dirty_bitmap_bytes(memslot); | 1253 | n = kvm_dirty_bitmap_bytes(memslot); |
1254 | 1254 | ||
1255 | if (log->first_page > memslot->npages || | 1255 | if (log->first_page > memslot->npages || |
1256 | log->num_pages > memslot->npages - log->first_page) | 1256 | log->num_pages > memslot->npages - log->first_page || |
1257 | return -EINVAL; | 1257 | (log->num_pages < memslot->npages - log->first_page && (log->num_pages & 63))) |
1258 | return -EINVAL; | ||
1258 | 1259 | ||
1259 | *flush = false; | 1260 | *flush = false; |
1260 | dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot); | 1261 | dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot); |