aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-05-03 19:49:46 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2019-05-03 19:49:46 -0400
commitaa1be08f52585fe36ecfaf5bddfdc784eb4c94cf (patch)
treebb8a647ba51f6990c880234c32c7ffe0cc8ec826
parent82463436a7fa40345c6febf0baa4c954af506ca6 (diff)
parente8ab8d24b488632d07ce5ddb261f1d454114415b (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM fixes from Paolo Bonzini: - PPC and ARM bugfixes from submaintainers - Fix old Windows versions on AMD (recent regression) - Fix old Linux versions on processors without EPT - Fixes for LAPIC timer optimizations * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (21 commits) KVM: nVMX: Fix size checks in vmx_set_nested_state KVM: selftests: make hyperv_cpuid test pass on AMD KVM: lapic: Check for in-kernel LAPIC before deferencing apic pointer KVM: fix KVM_CLEAR_DIRTY_LOG for memory slots of unaligned size x86/kvm/mmu: reset MMU context when 32-bit guest switches PAE KVM: x86: Whitelist port 0x7e for pre-incrementing %rip Documentation: kvm: fix dirty log ioctl arch lists KVM: VMX: Move RSB stuffing to before the first RET after VM-Exit KVM: arm/arm64: Don't emulate virtual timers on userspace ioctls kvm: arm: Skip stage2 huge mappings for unaligned ipa backed by THP KVM: arm/arm64: Ensure vcpu target is unset on reset failure KVM: lapic: Convert guest TSC to host time domain if necessary KVM: lapic: Allow user to disable adaptive tuning of timer advancement KVM: lapic: Track lapic timer advance per vCPU KVM: lapic: Disable timer advancement if adaptive tuning goes haywire x86: kvm: hyper-v: deal with buggy TLB flush requests from WS2012 KVM: x86: Consider LAPIC TSC-Deadline timer expired if deadline too short KVM: PPC: Book3S: Protect memslots while validating user address KVM: PPC: Book3S HV: Perserve PSSCR FAKE_SUSPEND bit on guest exit KVM: arm/arm64: vgic-v3: Retire pending interrupts on disabling LPIs ...
-rw-r--r--Documentation/virtual/kvm/api.txt11
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c6
-rw-r--r--arch/powerpc/kvm/book3s_hv.c4
-rw-r--r--arch/x86/include/asm/kvm_host.h1
-rw-r--r--arch/x86/include/uapi/asm/kvm.h1
-rw-r--r--arch/x86/kvm/hyperv.c11
-rw-r--r--arch/x86/kvm/lapic.c73
-rw-r--r--arch/x86/kvm/lapic.h4
-rw-r--r--arch/x86/kvm/mmu.c1
-rw-r--r--arch/x86/kvm/vmx/nested.c4
-rw-r--r--arch/x86/kvm/vmx/vmenter.S12
-rw-r--r--arch/x86/kvm/vmx/vmx.c7
-rw-r--r--arch/x86/kvm/x86.c36
-rw-r--r--arch/x86/kvm/x86.h2
-rw-r--r--tools/testing/selftests/kvm/dirty_log_test.c9
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c9
-rw-r--r--virt/kvm/arm/arch_timer.c17
-rw-r--r--virt/kvm/arm/arm.c11
-rw-r--r--virt/kvm/arm/mmu.c6
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio-v3.c3
-rw-r--r--virt/kvm/arm/vgic/vgic.c21
-rw-r--r--virt/kvm/arm/vgic/vgic.h1
-rw-r--r--virt/kvm/kvm_main.c7
23 files changed, 192 insertions, 65 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 67068c47c591..64b38dfcc243 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -321,7 +321,7 @@ cpu's hardware control block.
3214.8 KVM_GET_DIRTY_LOG (vm ioctl) 3214.8 KVM_GET_DIRTY_LOG (vm ioctl)
322 322
323Capability: basic 323Capability: basic
324Architectures: x86 324Architectures: all
325Type: vm ioctl 325Type: vm ioctl
326Parameters: struct kvm_dirty_log (in/out) 326Parameters: struct kvm_dirty_log (in/out)
327Returns: 0 on success, -1 on error 327Returns: 0 on success, -1 on error
@@ -3810,7 +3810,7 @@ to I/O ports.
38104.117 KVM_CLEAR_DIRTY_LOG (vm ioctl) 38104.117 KVM_CLEAR_DIRTY_LOG (vm ioctl)
3811 3811
3812Capability: KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 3812Capability: KVM_CAP_MANUAL_DIRTY_LOG_PROTECT
3813Architectures: x86 3813Architectures: x86, arm, arm64, mips
3814Type: vm ioctl 3814Type: vm ioctl
3815Parameters: struct kvm_dirty_log (in) 3815Parameters: struct kvm_dirty_log (in)
3816Returns: 0 on success, -1 on error 3816Returns: 0 on success, -1 on error
@@ -3830,8 +3830,9 @@ The ioctl clears the dirty status of pages in a memory slot, according to
3830the bitmap that is passed in struct kvm_clear_dirty_log's dirty_bitmap 3830the bitmap that is passed in struct kvm_clear_dirty_log's dirty_bitmap
3831field. Bit 0 of the bitmap corresponds to page "first_page" in the 3831field. Bit 0 of the bitmap corresponds to page "first_page" in the
3832memory slot, and num_pages is the size in bits of the input bitmap. 3832memory slot, and num_pages is the size in bits of the input bitmap.
3833Both first_page and num_pages must be a multiple of 64. For each bit 3833first_page must be a multiple of 64; num_pages must also be a multiple of
3834that is set in the input bitmap, the corresponding page is marked "clean" 383464 unless first_page + num_pages is the size of the memory slot. For each
3835bit that is set in the input bitmap, the corresponding page is marked "clean"
3835in KVM's dirty bitmap, and dirty tracking is re-enabled for that page 3836in KVM's dirty bitmap, and dirty tracking is re-enabled for that page
3836(for example via write-protection, or by clearing the dirty bit in 3837(for example via write-protection, or by clearing the dirty bit in
3837a page table entry). 3838a page table entry).
@@ -4799,7 +4800,7 @@ and injected exceptions.
4799 4800
48007.18 KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 48017.18 KVM_CAP_MANUAL_DIRTY_LOG_PROTECT
4801 4802
4802Architectures: all 4803Architectures: x86, arm, arm64, mips
4803Parameters: args[0] whether feature should be enabled or not 4804Parameters: args[0] whether feature should be enabled or not
4804 4805
4805With this capability enabled, KVM_GET_DIRTY_LOG will not automatically 4806With this capability enabled, KVM_GET_DIRTY_LOG will not automatically
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index f02b04973710..f100e331e69b 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -543,14 +543,14 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
543 if (ret != H_SUCCESS) 543 if (ret != H_SUCCESS)
544 return ret; 544 return ret;
545 545
546 idx = srcu_read_lock(&vcpu->kvm->srcu);
547
546 ret = kvmppc_tce_validate(stt, tce); 548 ret = kvmppc_tce_validate(stt, tce);
547 if (ret != H_SUCCESS) 549 if (ret != H_SUCCESS)
548 return ret; 550 goto unlock_exit;
549 551
550 dir = iommu_tce_direction(tce); 552 dir = iommu_tce_direction(tce);
551 553
552 idx = srcu_read_lock(&vcpu->kvm->srcu);
553
554 if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) { 554 if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) {
555 ret = H_PARAMETER; 555 ret = H_PARAMETER;
556 goto unlock_exit; 556 goto unlock_exit;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 06964350b97a..b2b29d4f9842 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3423,7 +3423,9 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
3423 vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2); 3423 vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
3424 vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3); 3424 vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);
3425 3425
3426 mtspr(SPRN_PSSCR, host_psscr); 3426 /* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */
3427 mtspr(SPRN_PSSCR, host_psscr |
3428 (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
3427 mtspr(SPRN_HFSCR, host_hfscr); 3429 mtspr(SPRN_HFSCR, host_hfscr);
3428 mtspr(SPRN_CIABR, host_ciabr); 3430 mtspr(SPRN_CIABR, host_ciabr);
3429 mtspr(SPRN_DAWR, host_dawr); 3431 mtspr(SPRN_DAWR, host_dawr);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a9d03af34030..c79abe7ca093 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -295,6 +295,7 @@ union kvm_mmu_extended_role {
295 unsigned int valid:1; 295 unsigned int valid:1;
296 unsigned int execonly:1; 296 unsigned int execonly:1;
297 unsigned int cr0_pg:1; 297 unsigned int cr0_pg:1;
298 unsigned int cr4_pae:1;
298 unsigned int cr4_pse:1; 299 unsigned int cr4_pse:1;
299 unsigned int cr4_pke:1; 300 unsigned int cr4_pke:1;
300 unsigned int cr4_smap:1; 301 unsigned int cr4_smap:1;
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index dabfcf7c3941..7a0e64ccd6ff 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -381,6 +381,7 @@ struct kvm_sync_regs {
381#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) 381#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0)
382#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1) 382#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1)
383#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2) 383#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2)
384#define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3)
384 385
385#define KVM_STATE_NESTED_GUEST_MODE 0x00000001 386#define KVM_STATE_NESTED_GUEST_MODE 0x00000001
386#define KVM_STATE_NESTED_RUN_PENDING 0x00000002 387#define KVM_STATE_NESTED_RUN_PENDING 0x00000002
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 421899f6ad7b..cc24b3a32c44 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1371,7 +1371,16 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
1371 1371
1372 valid_bank_mask = BIT_ULL(0); 1372 valid_bank_mask = BIT_ULL(0);
1373 sparse_banks[0] = flush.processor_mask; 1373 sparse_banks[0] = flush.processor_mask;
1374 all_cpus = flush.flags & HV_FLUSH_ALL_PROCESSORS; 1374
1375 /*
1376 * Work around possible WS2012 bug: it sends hypercalls
1377 * with processor_mask = 0x0 and HV_FLUSH_ALL_PROCESSORS clear,
1378 * while also expecting us to flush something and crashing if
1379 * we don't. Let's treat processor_mask == 0 same as
1380 * HV_FLUSH_ALL_PROCESSORS.
1381 */
1382 all_cpus = (flush.flags & HV_FLUSH_ALL_PROCESSORS) ||
1383 flush.processor_mask == 0;
1375 } else { 1384 } else {
1376 if (unlikely(kvm_read_guest(kvm, ingpa, &flush_ex, 1385 if (unlikely(kvm_read_guest(kvm, ingpa, &flush_ex,
1377 sizeof(flush_ex)))) 1386 sizeof(flush_ex))))
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 9bf70cf84564..bd13fdddbdc4 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -70,7 +70,6 @@
70#define APIC_BROADCAST 0xFF 70#define APIC_BROADCAST 0xFF
71#define X2APIC_BROADCAST 0xFFFFFFFFul 71#define X2APIC_BROADCAST 0xFFFFFFFFul
72 72
73static bool lapic_timer_advance_adjust_done = false;
74#define LAPIC_TIMER_ADVANCE_ADJUST_DONE 100 73#define LAPIC_TIMER_ADVANCE_ADJUST_DONE 100
75/* step-by-step approximation to mitigate fluctuation */ 74/* step-by-step approximation to mitigate fluctuation */
76#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8 75#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
@@ -1482,14 +1481,32 @@ static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
1482 return false; 1481 return false;
1483} 1482}
1484 1483
1484static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles)
1485{
1486 u64 timer_advance_ns = vcpu->arch.apic->lapic_timer.timer_advance_ns;
1487
1488 /*
1489 * If the guest TSC is running at a different ratio than the host, then
1490 * convert the delay to nanoseconds to achieve an accurate delay. Note
1491 * that __delay() uses delay_tsc whenever the hardware has TSC, thus
1492 * always for VMX enabled hardware.
1493 */
1494 if (vcpu->arch.tsc_scaling_ratio == kvm_default_tsc_scaling_ratio) {
1495 __delay(min(guest_cycles,
1496 nsec_to_cycles(vcpu, timer_advance_ns)));
1497 } else {
1498 u64 delay_ns = guest_cycles * 1000000ULL;
1499 do_div(delay_ns, vcpu->arch.virtual_tsc_khz);
1500 ndelay(min_t(u32, delay_ns, timer_advance_ns));
1501 }
1502}
1503
1485void wait_lapic_expire(struct kvm_vcpu *vcpu) 1504void wait_lapic_expire(struct kvm_vcpu *vcpu)
1486{ 1505{
1487 struct kvm_lapic *apic = vcpu->arch.apic; 1506 struct kvm_lapic *apic = vcpu->arch.apic;
1507 u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
1488 u64 guest_tsc, tsc_deadline, ns; 1508 u64 guest_tsc, tsc_deadline, ns;
1489 1509
1490 if (!lapic_in_kernel(vcpu))
1491 return;
1492
1493 if (apic->lapic_timer.expired_tscdeadline == 0) 1510 if (apic->lapic_timer.expired_tscdeadline == 0)
1494 return; 1511 return;
1495 1512
@@ -1501,33 +1518,37 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
1501 guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); 1518 guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1502 trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline); 1519 trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
1503 1520
1504 /* __delay is delay_tsc whenever the hardware has TSC, thus always. */
1505 if (guest_tsc < tsc_deadline) 1521 if (guest_tsc < tsc_deadline)
1506 __delay(min(tsc_deadline - guest_tsc, 1522 __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
1507 nsec_to_cycles(vcpu, lapic_timer_advance_ns)));
1508 1523
1509 if (!lapic_timer_advance_adjust_done) { 1524 if (!apic->lapic_timer.timer_advance_adjust_done) {
1510 /* too early */ 1525 /* too early */
1511 if (guest_tsc < tsc_deadline) { 1526 if (guest_tsc < tsc_deadline) {
1512 ns = (tsc_deadline - guest_tsc) * 1000000ULL; 1527 ns = (tsc_deadline - guest_tsc) * 1000000ULL;
1513 do_div(ns, vcpu->arch.virtual_tsc_khz); 1528 do_div(ns, vcpu->arch.virtual_tsc_khz);
1514 lapic_timer_advance_ns -= min((unsigned int)ns, 1529 timer_advance_ns -= min((u32)ns,
1515 lapic_timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP); 1530 timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
1516 } else { 1531 } else {
1517 /* too late */ 1532 /* too late */
1518 ns = (guest_tsc - tsc_deadline) * 1000000ULL; 1533 ns = (guest_tsc - tsc_deadline) * 1000000ULL;
1519 do_div(ns, vcpu->arch.virtual_tsc_khz); 1534 do_div(ns, vcpu->arch.virtual_tsc_khz);
1520 lapic_timer_advance_ns += min((unsigned int)ns, 1535 timer_advance_ns += min((u32)ns,
1521 lapic_timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP); 1536 timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
1522 } 1537 }
1523 if (abs(guest_tsc - tsc_deadline) < LAPIC_TIMER_ADVANCE_ADJUST_DONE) 1538 if (abs(guest_tsc - tsc_deadline) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
1524 lapic_timer_advance_adjust_done = true; 1539 apic->lapic_timer.timer_advance_adjust_done = true;
1540 if (unlikely(timer_advance_ns > 5000)) {
1541 timer_advance_ns = 0;
1542 apic->lapic_timer.timer_advance_adjust_done = true;
1543 }
1544 apic->lapic_timer.timer_advance_ns = timer_advance_ns;
1525 } 1545 }
1526} 1546}
1527 1547
1528static void start_sw_tscdeadline(struct kvm_lapic *apic) 1548static void start_sw_tscdeadline(struct kvm_lapic *apic)
1529{ 1549{
1530 u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline; 1550 struct kvm_timer *ktimer = &apic->lapic_timer;
1551 u64 guest_tsc, tscdeadline = ktimer->tscdeadline;
1531 u64 ns = 0; 1552 u64 ns = 0;
1532 ktime_t expire; 1553 ktime_t expire;
1533 struct kvm_vcpu *vcpu = apic->vcpu; 1554 struct kvm_vcpu *vcpu = apic->vcpu;
@@ -1542,13 +1563,15 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
1542 1563
1543 now = ktime_get(); 1564 now = ktime_get();
1544 guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); 1565 guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1545 if (likely(tscdeadline > guest_tsc)) { 1566
1546 ns = (tscdeadline - guest_tsc) * 1000000ULL; 1567 ns = (tscdeadline - guest_tsc) * 1000000ULL;
1547 do_div(ns, this_tsc_khz); 1568 do_div(ns, this_tsc_khz);
1569
1570 if (likely(tscdeadline > guest_tsc) &&
1571 likely(ns > apic->lapic_timer.timer_advance_ns)) {
1548 expire = ktime_add_ns(now, ns); 1572 expire = ktime_add_ns(now, ns);
1549 expire = ktime_sub_ns(expire, lapic_timer_advance_ns); 1573 expire = ktime_sub_ns(expire, ktimer->timer_advance_ns);
1550 hrtimer_start(&apic->lapic_timer.timer, 1574 hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_PINNED);
1551 expire, HRTIMER_MODE_ABS_PINNED);
1552 } else 1575 } else
1553 apic_timer_expired(apic); 1576 apic_timer_expired(apic);
1554 1577
@@ -2255,7 +2278,7 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
2255 return HRTIMER_NORESTART; 2278 return HRTIMER_NORESTART;
2256} 2279}
2257 2280
2258int kvm_create_lapic(struct kvm_vcpu *vcpu) 2281int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
2259{ 2282{
2260 struct kvm_lapic *apic; 2283 struct kvm_lapic *apic;
2261 2284
@@ -2279,6 +2302,14 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
2279 hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, 2302 hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
2280 HRTIMER_MODE_ABS_PINNED); 2303 HRTIMER_MODE_ABS_PINNED);
2281 apic->lapic_timer.timer.function = apic_timer_fn; 2304 apic->lapic_timer.timer.function = apic_timer_fn;
2305 if (timer_advance_ns == -1) {
2306 apic->lapic_timer.timer_advance_ns = 1000;
2307 apic->lapic_timer.timer_advance_adjust_done = false;
2308 } else {
2309 apic->lapic_timer.timer_advance_ns = timer_advance_ns;
2310 apic->lapic_timer.timer_advance_adjust_done = true;
2311 }
2312
2282 2313
2283 /* 2314 /*
2284 * APIC is created enabled. This will prevent kvm_lapic_set_base from 2315 * APIC is created enabled. This will prevent kvm_lapic_set_base from
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index ff6ef9c3d760..d6d049ba3045 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -31,8 +31,10 @@ struct kvm_timer {
31 u32 timer_mode_mask; 31 u32 timer_mode_mask;
32 u64 tscdeadline; 32 u64 tscdeadline;
33 u64 expired_tscdeadline; 33 u64 expired_tscdeadline;
34 u32 timer_advance_ns;
34 atomic_t pending; /* accumulated triggered timers */ 35 atomic_t pending; /* accumulated triggered timers */
35 bool hv_timer_in_use; 36 bool hv_timer_in_use;
37 bool timer_advance_adjust_done;
36}; 38};
37 39
38struct kvm_lapic { 40struct kvm_lapic {
@@ -62,7 +64,7 @@ struct kvm_lapic {
62 64
63struct dest_map; 65struct dest_map;
64 66
65int kvm_create_lapic(struct kvm_vcpu *vcpu); 67int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns);
66void kvm_free_lapic(struct kvm_vcpu *vcpu); 68void kvm_free_lapic(struct kvm_vcpu *vcpu);
67 69
68int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); 70int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index e10962dfc203..d9c7b45d231f 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4781,6 +4781,7 @@ static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu)
4781 union kvm_mmu_extended_role ext = {0}; 4781 union kvm_mmu_extended_role ext = {0};
4782 4782
4783 ext.cr0_pg = !!is_paging(vcpu); 4783 ext.cr0_pg = !!is_paging(vcpu);
4784 ext.cr4_pae = !!is_pae(vcpu);
4784 ext.cr4_smep = !!kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); 4785 ext.cr4_smep = !!kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
4785 ext.cr4_smap = !!kvm_read_cr4_bits(vcpu, X86_CR4_SMAP); 4786 ext.cr4_smap = !!kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
4786 ext.cr4_pse = !!is_pse(vcpu); 4787 ext.cr4_pse = !!is_pse(vcpu);
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 6401eb7ef19c..0c601d079cd2 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -5423,7 +5423,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
5423 return ret; 5423 return ret;
5424 5424
5425 /* Empty 'VMXON' state is permitted */ 5425 /* Empty 'VMXON' state is permitted */
5426 if (kvm_state->size < sizeof(kvm_state) + sizeof(*vmcs12)) 5426 if (kvm_state->size < sizeof(*kvm_state) + sizeof(*vmcs12))
5427 return 0; 5427 return 0;
5428 5428
5429 if (kvm_state->vmx.vmcs_pa != -1ull) { 5429 if (kvm_state->vmx.vmcs_pa != -1ull) {
@@ -5467,7 +5467,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
5467 vmcs12->vmcs_link_pointer != -1ull) { 5467 vmcs12->vmcs_link_pointer != -1ull) {
5468 struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu); 5468 struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu);
5469 5469
5470 if (kvm_state->size < sizeof(kvm_state) + 2 * sizeof(*vmcs12)) 5470 if (kvm_state->size < sizeof(*kvm_state) + 2 * sizeof(*vmcs12))
5471 return -EINVAL; 5471 return -EINVAL;
5472 5472
5473 if (copy_from_user(shadow_vmcs12, 5473 if (copy_from_user(shadow_vmcs12,
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 7b272738c576..d4cb1945b2e3 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -3,6 +3,7 @@
3#include <asm/asm.h> 3#include <asm/asm.h>
4#include <asm/bitsperlong.h> 4#include <asm/bitsperlong.h>
5#include <asm/kvm_vcpu_regs.h> 5#include <asm/kvm_vcpu_regs.h>
6#include <asm/nospec-branch.h>
6 7
7#define WORD_SIZE (BITS_PER_LONG / 8) 8#define WORD_SIZE (BITS_PER_LONG / 8)
8 9
@@ -77,6 +78,17 @@ ENDPROC(vmx_vmenter)
77 * referred to by VMCS.HOST_RIP. 78 * referred to by VMCS.HOST_RIP.
78 */ 79 */
79ENTRY(vmx_vmexit) 80ENTRY(vmx_vmexit)
81#ifdef CONFIG_RETPOLINE
82 ALTERNATIVE "jmp .Lvmexit_skip_rsb", "", X86_FEATURE_RETPOLINE
83 /* Preserve guest's RAX, it's used to stuff the RSB. */
84 push %_ASM_AX
85
86 /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
87 FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
88
89 pop %_ASM_AX
90.Lvmexit_skip_rsb:
91#endif
80 ret 92 ret
81ENDPROC(vmx_vmexit) 93ENDPROC(vmx_vmexit)
82 94
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index b4e7d645275a..0c955bb286ff 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6462,9 +6462,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
6462 6462
6463 x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0); 6463 x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
6464 6464
6465 /* Eliminate branch target predictions from guest mode */
6466 vmexit_fill_RSB();
6467
6468 /* All fields are clean at this point */ 6465 /* All fields are clean at this point */
6469 if (static_branch_unlikely(&enable_evmcs)) 6466 if (static_branch_unlikely(&enable_evmcs))
6470 current_evmcs->hv_clean_fields |= 6467 current_evmcs->hv_clean_fields |=
@@ -7032,6 +7029,7 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
7032{ 7029{
7033 struct vcpu_vmx *vmx; 7030 struct vcpu_vmx *vmx;
7034 u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles; 7031 u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles;
7032 struct kvm_timer *ktimer = &vcpu->arch.apic->lapic_timer;
7035 7033
7036 if (kvm_mwait_in_guest(vcpu->kvm)) 7034 if (kvm_mwait_in_guest(vcpu->kvm))
7037 return -EOPNOTSUPP; 7035 return -EOPNOTSUPP;
@@ -7040,7 +7038,8 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
7040 tscl = rdtsc(); 7038 tscl = rdtsc();
7041 guest_tscl = kvm_read_l1_tsc(vcpu, tscl); 7039 guest_tscl = kvm_read_l1_tsc(vcpu, tscl);
7042 delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl; 7040 delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl;
7043 lapic_timer_advance_cycles = nsec_to_cycles(vcpu, lapic_timer_advance_ns); 7041 lapic_timer_advance_cycles = nsec_to_cycles(vcpu,
7042 ktimer->timer_advance_ns);
7044 7043
7045 if (delta_tsc > lapic_timer_advance_cycles) 7044 if (delta_tsc > lapic_timer_advance_cycles)
7046 delta_tsc -= lapic_timer_advance_cycles; 7045 delta_tsc -= lapic_timer_advance_cycles;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a0d1fc80ac5a..b5edc8e3ce1d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -136,10 +136,14 @@ EXPORT_SYMBOL_GPL(kvm_default_tsc_scaling_ratio);
136static u32 __read_mostly tsc_tolerance_ppm = 250; 136static u32 __read_mostly tsc_tolerance_ppm = 250;
137module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR); 137module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
138 138
139/* lapic timer advance (tscdeadline mode only) in nanoseconds */ 139/*
140unsigned int __read_mostly lapic_timer_advance_ns = 1000; 140 * lapic timer advance (tscdeadline mode only) in nanoseconds. '-1' enables
141 * adaptive tuning starting from default advancment of 1000ns. '0' disables
142 * advancement entirely. Any other value is used as-is and disables adaptive
143 * tuning, i.e. allows priveleged userspace to set an exact advancement time.
144 */
145static int __read_mostly lapic_timer_advance_ns = -1;
141module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR); 146module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
142EXPORT_SYMBOL_GPL(lapic_timer_advance_ns);
143 147
144static bool __read_mostly vector_hashing = true; 148static bool __read_mostly vector_hashing = true;
145module_param(vector_hashing, bool, S_IRUGO); 149module_param(vector_hashing, bool, S_IRUGO);
@@ -6535,6 +6539,12 @@ int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
6535} 6539}
6536EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer); 6540EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer);
6537 6541
6542static int complete_fast_pio_out_port_0x7e(struct kvm_vcpu *vcpu)
6543{
6544 vcpu->arch.pio.count = 0;
6545 return 1;
6546}
6547
6538static int complete_fast_pio_out(struct kvm_vcpu *vcpu) 6548static int complete_fast_pio_out(struct kvm_vcpu *vcpu)
6539{ 6549{
6540 vcpu->arch.pio.count = 0; 6550 vcpu->arch.pio.count = 0;
@@ -6551,12 +6561,23 @@ static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size,
6551 unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); 6561 unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
6552 int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt, 6562 int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt,
6553 size, port, &val, 1); 6563 size, port, &val, 1);
6564 if (ret)
6565 return ret;
6554 6566
6555 if (!ret) { 6567 /*
6568 * Workaround userspace that relies on old KVM behavior of %rip being
6569 * incremented prior to exiting to userspace to handle "OUT 0x7e".
6570 */
6571 if (port == 0x7e &&
6572 kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_OUT_7E_INC_RIP)) {
6573 vcpu->arch.complete_userspace_io =
6574 complete_fast_pio_out_port_0x7e;
6575 kvm_skip_emulated_instruction(vcpu);
6576 } else {
6556 vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu); 6577 vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu);
6557 vcpu->arch.complete_userspace_io = complete_fast_pio_out; 6578 vcpu->arch.complete_userspace_io = complete_fast_pio_out;
6558 } 6579 }
6559 return ret; 6580 return 0;
6560} 6581}
6561 6582
6562static int complete_fast_pio_in(struct kvm_vcpu *vcpu) 6583static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
@@ -7873,7 +7894,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
7873 } 7894 }
7874 7895
7875 trace_kvm_entry(vcpu->vcpu_id); 7896 trace_kvm_entry(vcpu->vcpu_id);
7876 if (lapic_timer_advance_ns) 7897 if (lapic_in_kernel(vcpu) &&
7898 vcpu->arch.apic->lapic_timer.timer_advance_ns)
7877 wait_lapic_expire(vcpu); 7899 wait_lapic_expire(vcpu);
7878 guest_enter_irqoff(); 7900 guest_enter_irqoff();
7879 7901
@@ -9061,7 +9083,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
9061 9083
9062 if (irqchip_in_kernel(vcpu->kvm)) { 9084 if (irqchip_in_kernel(vcpu->kvm)) {
9063 vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu); 9085 vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu);
9064 r = kvm_create_lapic(vcpu); 9086 r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
9065 if (r < 0) 9087 if (r < 0)
9066 goto fail_mmu_destroy; 9088 goto fail_mmu_destroy;
9067 } else 9089 } else
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index aedc5d0d4989..534d3f28bb01 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -294,8 +294,6 @@ extern u64 kvm_supported_xcr0(void);
294 294
295extern unsigned int min_timer_period_us; 295extern unsigned int min_timer_period_us;
296 296
297extern unsigned int lapic_timer_advance_ns;
298
299extern bool enable_vmware_backdoor; 297extern bool enable_vmware_backdoor;
300 298
301extern struct static_key kvm_no_apic_vcpu; 299extern struct static_key kvm_no_apic_vcpu;
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index 4715cfba20dc..93f99c6b7d79 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -288,8 +288,11 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
288#endif 288#endif
289 max_gfn = (1ul << (guest_pa_bits - guest_page_shift)) - 1; 289 max_gfn = (1ul << (guest_pa_bits - guest_page_shift)) - 1;
290 guest_page_size = (1ul << guest_page_shift); 290 guest_page_size = (1ul << guest_page_shift);
291 /* 1G of guest page sized pages */ 291 /*
292 guest_num_pages = (1ul << (30 - guest_page_shift)); 292 * A little more than 1G of guest page sized pages. Cover the
293 * case where the size is not aligned to 64 pages.
294 */
295 guest_num_pages = (1ul << (30 - guest_page_shift)) + 3;
293 host_page_size = getpagesize(); 296 host_page_size = getpagesize();
294 host_num_pages = (guest_num_pages * guest_page_size) / host_page_size + 297 host_num_pages = (guest_num_pages * guest_page_size) / host_page_size +
295 !!((guest_num_pages * guest_page_size) % host_page_size); 298 !!((guest_num_pages * guest_page_size) % host_page_size);
@@ -359,7 +362,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
359 kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); 362 kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
360#ifdef USE_CLEAR_DIRTY_LOG 363#ifdef USE_CLEAR_DIRTY_LOG
361 kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0, 364 kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0,
362 DIV_ROUND_UP(host_num_pages, 64) * 64); 365 host_num_pages);
363#endif 366#endif
364 vm_dirty_log_verify(bmap); 367 vm_dirty_log_verify(bmap);
365 iteration++; 368 iteration++;
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
index 264425f75806..9a21e912097c 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
@@ -141,7 +141,13 @@ int main(int argc, char *argv[])
141 141
142 free(hv_cpuid_entries); 142 free(hv_cpuid_entries);
143 143
144 vcpu_ioctl(vm, VCPU_ID, KVM_ENABLE_CAP, &enable_evmcs_cap); 144 rv = _vcpu_ioctl(vm, VCPU_ID, KVM_ENABLE_CAP, &enable_evmcs_cap);
145
146 if (rv) {
147 fprintf(stderr,
148 "Enlightened VMCS is unsupported, skip related test\n");
149 goto vm_free;
150 }
145 151
146 hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm); 152 hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm);
147 if (!hv_cpuid_entries) 153 if (!hv_cpuid_entries)
@@ -151,6 +157,7 @@ int main(int argc, char *argv[])
151 157
152 free(hv_cpuid_entries); 158 free(hv_cpuid_entries);
153 159
160vm_free:
154 kvm_vm_free(vm); 161 kvm_vm_free(vm);
155 162
156 return 0; 163 return 0;
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 3417f2dbc366..7fc272ecae16 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -508,6 +508,14 @@ static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
508 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 508 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
509 509
510 /* 510 /*
511 * Update the timer output so that it is likely to match the
512 * state we're about to restore. If the timer expires between
513 * this point and the register restoration, we'll take the
514 * interrupt anyway.
515 */
516 kvm_timer_update_irq(vcpu, kvm_timer_should_fire(vtimer), vtimer);
517
518 /*
511 * When using a userspace irqchip with the architected timers and a 519 * When using a userspace irqchip with the architected timers and a
512 * host interrupt controller that doesn't support an active state, we 520 * host interrupt controller that doesn't support an active state, we
513 * must still prevent continuously exiting from the guest, and 521 * must still prevent continuously exiting from the guest, and
@@ -730,7 +738,6 @@ static void kvm_timer_init_interrupt(void *info)
730int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) 738int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
731{ 739{
732 struct arch_timer_context *timer; 740 struct arch_timer_context *timer;
733 bool level;
734 741
735 switch (regid) { 742 switch (regid) {
736 case KVM_REG_ARM_TIMER_CTL: 743 case KVM_REG_ARM_TIMER_CTL:
@@ -758,10 +765,6 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
758 return -1; 765 return -1;
759 } 766 }
760 767
761 level = kvm_timer_should_fire(timer);
762 kvm_timer_update_irq(vcpu, level, timer);
763 timer_emulate(timer);
764
765 return 0; 768 return 0;
766} 769}
767 770
@@ -812,7 +815,7 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
812 815
813 switch (treg) { 816 switch (treg) {
814 case TIMER_REG_TVAL: 817 case TIMER_REG_TVAL:
815 val = kvm_phys_timer_read() - timer->cntvoff - timer->cnt_cval; 818 val = timer->cnt_cval - kvm_phys_timer_read() + timer->cntvoff;
816 break; 819 break;
817 820
818 case TIMER_REG_CTL: 821 case TIMER_REG_CTL:
@@ -858,7 +861,7 @@ static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
858{ 861{
859 switch (treg) { 862 switch (treg) {
860 case TIMER_REG_TVAL: 863 case TIMER_REG_TVAL:
861 timer->cnt_cval = val - kvm_phys_timer_read() - timer->cntvoff; 864 timer->cnt_cval = kvm_phys_timer_read() - timer->cntvoff + val;
862 break; 865 break;
863 866
864 case TIMER_REG_CTL: 867 case TIMER_REG_CTL:
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 99c37384ba7b..f412ebc90610 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -934,7 +934,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
934static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, 934static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
935 const struct kvm_vcpu_init *init) 935 const struct kvm_vcpu_init *init)
936{ 936{
937 unsigned int i; 937 unsigned int i, ret;
938 int phys_target = kvm_target_cpu(); 938 int phys_target = kvm_target_cpu();
939 939
940 if (init->target != phys_target) 940 if (init->target != phys_target)
@@ -969,9 +969,14 @@ static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
969 vcpu->arch.target = phys_target; 969 vcpu->arch.target = phys_target;
970 970
971 /* Now we know what it is, we can reset it. */ 971 /* Now we know what it is, we can reset it. */
972 return kvm_reset_vcpu(vcpu); 972 ret = kvm_reset_vcpu(vcpu);
973} 973 if (ret) {
974 vcpu->arch.target = -1;
975 bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
976 }
974 977
978 return ret;
979}
975 980
976static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, 981static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
977 struct kvm_vcpu_init *init) 982 struct kvm_vcpu_init *init)
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 27c958306449..a39dcfdbcc65 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -1781,8 +1781,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
1781 * Only PMD_SIZE transparent hugepages(THP) are 1781 * Only PMD_SIZE transparent hugepages(THP) are
1782 * currently supported. This code will need to be 1782 * currently supported. This code will need to be
1783 * updated to support other THP sizes. 1783 * updated to support other THP sizes.
1784 *
1785 * Make sure the host VA and the guest IPA are sufficiently
1786 * aligned and that the block is contained within the memslot.
1784 */ 1787 */
1785 if (transparent_hugepage_adjust(&pfn, &fault_ipa)) 1788 if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE) &&
1789 transparent_hugepage_adjust(&pfn, &fault_ipa))
1786 vma_pagesize = PMD_SIZE; 1790 vma_pagesize = PMD_SIZE;
1787 } 1791 }
1788 1792
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index 4a12322bf7df..9f4843fe9cda 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -200,6 +200,9 @@ static void vgic_mmio_write_v3r_ctlr(struct kvm_vcpu *vcpu,
200 200
201 vgic_cpu->lpis_enabled = val & GICR_CTLR_ENABLE_LPIS; 201 vgic_cpu->lpis_enabled = val & GICR_CTLR_ENABLE_LPIS;
202 202
203 if (was_enabled && !vgic_cpu->lpis_enabled)
204 vgic_flush_pending_lpis(vcpu);
205
203 if (!was_enabled && vgic_cpu->lpis_enabled) 206 if (!was_enabled && vgic_cpu->lpis_enabled)
204 vgic_enable_lpis(vcpu); 207 vgic_enable_lpis(vcpu);
205} 208}
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index 3af69f2a3866..191deccf60bf 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -151,6 +151,27 @@ void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
151 kfree(irq); 151 kfree(irq);
152} 152}
153 153
154void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu)
155{
156 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
157 struct vgic_irq *irq, *tmp;
158 unsigned long flags;
159
160 raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
161
162 list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
163 if (irq->intid >= VGIC_MIN_LPI) {
164 raw_spin_lock(&irq->irq_lock);
165 list_del(&irq->ap_list);
166 irq->vcpu = NULL;
167 raw_spin_unlock(&irq->irq_lock);
168 vgic_put_irq(vcpu->kvm, irq);
169 }
170 }
171
172 raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
173}
174
154void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending) 175void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending)
155{ 176{
156 WARN_ON(irq_set_irqchip_state(irq->host_irq, 177 WARN_ON(irq_set_irqchip_state(irq->host_irq,
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index a90024718ca4..abeeffabc456 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -238,6 +238,7 @@ void vgic_v3_put(struct kvm_vcpu *vcpu);
238bool vgic_has_its(struct kvm *kvm); 238bool vgic_has_its(struct kvm *kvm);
239int kvm_vgic_register_its_device(void); 239int kvm_vgic_register_its_device(void);
240void vgic_enable_lpis(struct kvm_vcpu *vcpu); 240void vgic_enable_lpis(struct kvm_vcpu *vcpu);
241void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu);
241int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi); 242int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi);
242int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr); 243int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr);
243int vgic_v3_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, 244int vgic_v3_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index dc8edc97ba85..a704d1f9bd96 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1240,7 +1240,7 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm,
1240 if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) 1240 if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
1241 return -EINVAL; 1241 return -EINVAL;
1242 1242
1243 if ((log->first_page & 63) || (log->num_pages & 63)) 1243 if (log->first_page & 63)
1244 return -EINVAL; 1244 return -EINVAL;
1245 1245
1246 slots = __kvm_memslots(kvm, as_id); 1246 slots = __kvm_memslots(kvm, as_id);
@@ -1253,8 +1253,9 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm,
1253 n = kvm_dirty_bitmap_bytes(memslot); 1253 n = kvm_dirty_bitmap_bytes(memslot);
1254 1254
1255 if (log->first_page > memslot->npages || 1255 if (log->first_page > memslot->npages ||
1256 log->num_pages > memslot->npages - log->first_page) 1256 log->num_pages > memslot->npages - log->first_page ||
1257 return -EINVAL; 1257 (log->num_pages < memslot->npages - log->first_page && (log->num_pages & 63)))
1258 return -EINVAL;
1258 1259
1259 *flush = false; 1260 *flush = false;
1260 dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot); 1261 dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot);