Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM updates from Paolo Bonzini: "ARM: - some cleanups - direct physical timer assignment - cache sanitization for 32-bit guests s390: - interrupt cleanup - introduction of the Guest Information Block - preparation for processor subfunctions in cpu models PPC: - bug fixes and improvements, especially related to machine checks and protection keys x86: - many, many cleanups, including removing a bunch of MMU code for unnecessary optimizations - AVIC fixes Generic: - memcg accounting" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (147 commits) kvm: vmx: fix formatting of a comment KVM: doc: Document the life cycle of a VM and its resources MAINTAINERS: Add KVM selftests to existing KVM entry Revert "KVM/MMU: Flush tlb directly in the kvm_zap_gfn_range()" KVM: PPC: Book3S: Add count cache flush parameters to kvmppc_get_cpu_char() KVM: PPC: Fix compilation when KVM is not enabled KVM: Minor cleanups for kvm_main.c KVM: s390: add debug logging for cpu model subfunctions KVM: s390: implement subfunction processor calls arm64: KVM: Fix architecturally invalid reset value for FPEXC32_EL2 KVM: arm/arm64: Remove unused timer variable KVM: PPC: Book3S: Improve KVM reference counting KVM: PPC: Book3S HV: Fix build failure without IOMMU support Revert "KVM: Eliminate extra function calls in kvm_get_dirty_log_protect()" x86: kvmguest: use TSC clocksource if invariant TSC is exposed KVM: Never start grow vCPU halt_poll_ns from value below halt_poll_ns_grow_start KVM: Expose the initial start value in grow_halt_poll_ns() as a module parameter KVM: grow_halt_poll_ns() should never shrink vCPU halt_poll_ns KVM: x86/mmu: Consolidate kvm_mmu_zap_all() and kvm_mmu_zap_mmio_sptes() KVM: x86/mmu: WARN if zapping a MMIO spte results in zapping children ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2019-03-15 18:00:28 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2019-03-15 18:00:28 -0400
commit: 636deed6c0bc137a7c4f4a97ae1fcf0ad75323da (patch)
tree: 7bd27189b8e30e3c1466f7730831a08db65f8646 /virt
parent: aa2e3ac64ace127f403be85aa4d6015b859385f2 (diff)
parent: 4a605bc08e98381d8df61c30a4acb2eac15eb7da (diff)
11 files changed, 631 insertions, 295 deletions
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index b07ac4614e1c..3417f2dbc366 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -25,6 +25,7 @@
 #include <clocksource/arm_arch_timer.h>
 #include <asm/arch_timer.h>
+#include <asm/kvm_emulate.h>
 #include <asm/kvm_hyp.h>
 #include <kvm/arm_vgic.h>
@@ -34,7 +35,9 @@
 static struct timecounter *timecounter;
 static unsigned int host_vtimer_irq;
+static unsigned int host_ptimer_irq;
 static u32 host_vtimer_irq_flags;
+static u32 host_ptimer_irq_flags;
 static DEFINE_STATIC_KEY_FALSE(has_gic_active_state);
@@ -52,12 +55,34 @@ static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx);
 static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
                                 struct arch_timer_context *timer_ctx);
 static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx);
+static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
+                                struct arch_timer_context *timer,
+                                enum kvm_arch_timer_regs treg,
+                                u64 val);
+static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
+                              struct arch_timer_context *timer,
+                              enum kvm_arch_timer_regs treg);
 u64 kvm_phys_timer_read(void)
 {
        return timecounter->cc->read(timecounter->cc);
 }
+static void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
+{
+        if (has_vhe()) {
+                map->direct_vtimer = vcpu_vtimer(vcpu);
+                map->direct_ptimer = vcpu_ptimer(vcpu);
+                map->emul_ptimer = NULL;
+        } else {
+                map->direct_vtimer = vcpu_vtimer(vcpu);
+                map->direct_ptimer = NULL;
+                map->emul_ptimer = vcpu_ptimer(vcpu);
+        }
+        trace_kvm_get_timer_map(vcpu->vcpu_id, map);
+}
 static inline bool userspace_irqchip(struct kvm *kvm)
 {
        return static_branch_unlikely(&userspace_irqchip_in_use) &&
@@ -78,20 +103,27 @@ static void soft_timer_cancel(struct hrtimer *hrt)
 static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
 {
        struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
-        struct arch_timer_context *vtimer;
+        struct arch_timer_context *ctx;
+        struct timer_map map;
        /*
         * We may see a timer interrupt after vcpu_put() has been called which
         * sets the CPU's vcpu pointer to NULL, because even though the timer
-         * has been disabled in vtimer_save_state(), the hardware interrupt
+         * has been disabled in timer_save_state(), the hardware interrupt
         * signal may not have been retired from the interrupt controller yet.
         */
        if (!vcpu)
                return IRQ_HANDLED;
-        vtimer = vcpu_vtimer(vcpu);
+        get_timer_map(vcpu, &map);
-        if (kvm_timer_should_fire(vtimer))
-                kvm_timer_update_irq(vcpu, true, vtimer);
+        if (irq == host_vtimer_irq)
+                ctx = map.direct_vtimer;
+        else
+                ctx = map.direct_ptimer;
+        if (kvm_timer_should_fire(ctx))
+                kvm_timer_update_irq(vcpu, true, ctx);
        if (userspace_irqchip(vcpu->kvm) &&
            !static_branch_unlikely(&has_gic_active_state))
@@ -122,7 +154,9 @@ static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx)
 static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx)
 {
-        return !(timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_IT_MASK) &&
+        WARN_ON(timer_ctx && timer_ctx->loaded);
+        return timer_ctx &&
+               !(timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_IT_MASK) &&
                (timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_ENABLE);
 }
@@ -132,21 +166,22 @@ static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx)
 */
 static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu)
 {
-        u64 min_virt = ULLONG_MAX, min_phys = ULLONG_MAX;
+        u64 min_delta = ULLONG_MAX;
-        struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+        int i;
-        struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
-        if (kvm_timer_irq_can_fire(vtimer))
+        for (i = 0; i < NR_KVM_TIMERS; i++) {
-                min_virt = kvm_timer_compute_delta(vtimer);
+                struct arch_timer_context *ctx = &vcpu->arch.timer_cpu.timers[i];
-        if (kvm_timer_irq_can_fire(ptimer))
+                WARN(ctx->loaded, "timer %d loaded\n", i);
-                min_phys = kvm_timer_compute_delta(ptimer);
+                if (kvm_timer_irq_can_fire(ctx))
+                        min_delta = min(min_delta, kvm_timer_compute_delta(ctx));
+        }
        /* If none of timers can fire, then return 0 */
-        if ((min_virt == ULLONG_MAX) && (min_phys == ULLONG_MAX))
+        if (min_delta == ULLONG_MAX)
                return 0;
-        return min(min_virt, min_phys);
+        return min_delta;
 }
 static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt)
@@ -173,41 +208,58 @@ static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt)
        return HRTIMER_NORESTART;
 }
-static enum hrtimer_restart kvm_phys_timer_expire(struct hrtimer *hrt)
+static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt)
 {
-        struct arch_timer_context *ptimer;
+        struct arch_timer_context *ctx;
-        struct arch_timer_cpu *timer;
        struct kvm_vcpu *vcpu;
        u64 ns;
-        timer = container_of(hrt, struct arch_timer_cpu, phys_timer);
+        ctx = container_of(hrt, struct arch_timer_context, hrtimer);
-        vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu);
+        vcpu = ctx->vcpu;
-        ptimer = vcpu_ptimer(vcpu);
+        trace_kvm_timer_hrtimer_expire(ctx);
        /*
         * Check that the timer has really expired from the guest's
         * PoV (NTP on the host may have forced it to expire
         * early). If not ready, schedule for a later time.
         */
-        ns = kvm_timer_compute_delta(ptimer);
+        ns = kvm_timer_compute_delta(ctx);
        if (unlikely(ns)) {
                hrtimer_forward_now(hrt, ns_to_ktime(ns));
                return HRTIMER_RESTART;
        }
-        kvm_timer_update_irq(vcpu, true, ptimer);
+        kvm_timer_update_irq(vcpu, true, ctx);
        return HRTIMER_NORESTART;
 }
 static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
 {
+        enum kvm_arch_timers index;
        u64 cval, now;
+        if (!timer_ctx)
+                return false;
+        index = arch_timer_ctx_index(timer_ctx);
        if (timer_ctx->loaded) {
-                u32 cnt_ctl;
+                u32 cnt_ctl = 0;
+                switch (index) {
+                case TIMER_VTIMER:
+                        cnt_ctl = read_sysreg_el0(cntv_ctl);
+                        break;
+                case TIMER_PTIMER:
+                        cnt_ctl = read_sysreg_el0(cntp_ctl);
+                        break;
+                case NR_KVM_TIMERS:
+                        /* GCC is braindead */
+                        cnt_ctl = 0;
+                        break;
+                }
-                /* Only the virtual timer can be loaded so far */
-                cnt_ctl = read_sysreg_el0(cntv_ctl);
                return  (cnt_ctl & ARCH_TIMER_CTRL_ENABLE) &&
                        (cnt_ctl & ARCH_TIMER_CTRL_IT_STAT) &&
                       !(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK);
@@ -224,13 +276,13 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
 bool kvm_timer_is_pending(struct kvm_vcpu *vcpu)
 {
-        struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+        struct timer_map map;
-        struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
-        if (kvm_timer_should_fire(vtimer))
+        get_timer_map(vcpu, &map);
-                return true;
-        return kvm_timer_should_fire(ptimer);
+        return kvm_timer_should_fire(map.direct_vtimer) ||
+               kvm_timer_should_fire(map.direct_ptimer) ||
+               kvm_timer_should_fire(map.emul_ptimer);
 }
 /*
@@ -269,77 +321,70 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
        }
 }
-/* Schedule the background timer for the emulated timer. */
+static void timer_emulate(struct arch_timer_context *ctx)
-static void phys_timer_emulate(struct kvm_vcpu *vcpu)
 {
-        struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+        bool should_fire = kvm_timer_should_fire(ctx);
-        struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+        trace_kvm_timer_emulate(ctx, should_fire);
+        if (should_fire) {
+                kvm_timer_update_irq(ctx->vcpu, true, ctx);
+                return;
+        }
        /*
         * If the timer can fire now, we don't need to have a soft timer
         * scheduled for the future.  If the timer cannot fire at all,
         * then we also don't need a soft timer.
         */
-        if (kvm_timer_should_fire(ptimer) || !kvm_timer_irq_can_fire(ptimer)) {
+        if (!kvm_timer_irq_can_fire(ctx)) {
-                soft_timer_cancel(&timer->phys_timer);
+                soft_timer_cancel(&ctx->hrtimer);
                return;
        }
-        soft_timer_start(&timer->phys_timer, kvm_timer_compute_delta(ptimer));
+        soft_timer_start(&ctx->hrtimer, kvm_timer_compute_delta(ctx));
 }
-/*
+static void timer_save_state(struct arch_timer_context *ctx)
- * Check if there was a change in the timer state, so that we should either
- * raise or lower the line level to the GIC or schedule a background timer to
- * emulate the physical timer.
- */
-static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
 {
-        struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+        struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu);
-        struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+        enum kvm_arch_timers index = arch_timer_ctx_index(ctx);
-        struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+        unsigned long flags;
-        bool level;
-        if (unlikely(!timer->enabled))
+        if (!timer->enabled)
                return;
-        /*
+        local_irq_save(flags);
-         * The vtimer virtual interrupt is a 'mapped' interrupt, meaning part
-         * of its lifecycle is offloaded to the hardware, and we therefore may
-         * not have lowered the irq.level value before having to signal a new
-         * interrupt, but have to signal an interrupt every time the level is
-         * asserted.
-         */
-        level = kvm_timer_should_fire(vtimer);
-        kvm_timer_update_irq(vcpu, level, vtimer);
-        phys_timer_emulate(vcpu);
+        if (!ctx->loaded)
+                goto out;
-        if (kvm_timer_should_fire(ptimer) != ptimer->irq.level)
+        switch (index) {
-                kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer);
+        case TIMER_VTIMER:
-}
+                ctx->cnt_ctl = read_sysreg_el0(cntv_ctl);
+                ctx->cnt_cval = read_sysreg_el0(cntv_cval);
-static void vtimer_save_state(struct kvm_vcpu *vcpu)
+                /* Disable the timer */
-{
+                write_sysreg_el0(0, cntv_ctl);
-        struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+                isb();
-        struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
-        unsigned long flags;
-        local_irq_save(flags);
+                break;
+        case TIMER_PTIMER:
+                ctx->cnt_ctl = read_sysreg_el0(cntp_ctl);
+                ctx->cnt_cval = read_sysreg_el0(cntp_cval);
-        if (!vtimer->loaded)
+                /* Disable the timer */
-                goto out;
+                write_sysreg_el0(0, cntp_ctl);
+                isb();
-        if (timer->enabled) {
+                break;
-                vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
+        case NR_KVM_TIMERS:
-                vtimer->cnt_cval = read_sysreg_el0(cntv_cval);
+                BUG();
        }
-        /* Disable the virtual timer */
+        trace_kvm_timer_save_state(ctx);
-        write_sysreg_el0(0, cntv_ctl);
-        isb();
-        vtimer->loaded = false;
+        ctx->loaded = false;
 out:
        local_irq_restore(flags);
 }
@@ -349,67 +394,72 @@ out:
 * thread is removed from its waitqueue and made runnable when there's a timer
 * interrupt to handle.
 */
-void kvm_timer_schedule(struct kvm_vcpu *vcpu)
+static void kvm_timer_blocking(struct kvm_vcpu *vcpu)
 {
-        struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+        struct arch_timer_cpu *timer = vcpu_timer(vcpu);
-        struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+        struct timer_map map;
-        struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
-        vtimer_save_state(vcpu);
-        /*
+        get_timer_map(vcpu, &map);
-         * No need to schedule a background timer if any guest timer has
-         * already expired, because kvm_vcpu_block will return before putting
-         * the thread to sleep.
-         */
-        if (kvm_timer_should_fire(vtimer) || kvm_timer_should_fire(ptimer))
-                return;
        /*
-         * If both timers are not capable of raising interrupts (disabled or
+         * If no timers are capable of raising interrupts (disabled or
         * masked), then there's no more work for us to do.
         */
-        if (!kvm_timer_irq_can_fire(vtimer) && !kvm_timer_irq_can_fire(ptimer))
+        if (!kvm_timer_irq_can_fire(map.direct_vtimer) &&
+            !kvm_timer_irq_can_fire(map.direct_ptimer) &&
+            !kvm_timer_irq_can_fire(map.emul_ptimer))
                return;
        /*
-         * The guest timers have not yet expired, schedule a background timer.
+         * At least one guest time will expire. Schedule a background timer.
         * Set the earliest expiration time among the guest timers.
         */
        soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu));
 }
-static void vtimer_restore_state(struct kvm_vcpu *vcpu)
+static void kvm_timer_unblocking(struct kvm_vcpu *vcpu)
 {
-        struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+        struct arch_timer_cpu *timer = vcpu_timer(vcpu);
-        struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+        soft_timer_cancel(&timer->bg_timer);
+}
+static void timer_restore_state(struct arch_timer_context *ctx)
+{
+        struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu);
+        enum kvm_arch_timers index = arch_timer_ctx_index(ctx);
        unsigned long flags;
+        if (!timer->enabled)
+                return;
        local_irq_save(flags);
-        if (vtimer->loaded)
+        if (ctx->loaded)
                goto out;
-        if (timer->enabled) {
+        switch (index) {
-                write_sysreg_el0(vtimer->cnt_cval, cntv_cval);
+        case TIMER_VTIMER:
+                write_sysreg_el0(ctx->cnt_cval, cntv_cval);
                isb();
-                write_sysreg_el0(vtimer->cnt_ctl, cntv_ctl);
+                write_sysreg_el0(ctx->cnt_ctl, cntv_ctl);
+                break;
+        case TIMER_PTIMER:
+                write_sysreg_el0(ctx->cnt_cval, cntp_cval);
+                isb();
+                write_sysreg_el0(ctx->cnt_ctl, cntp_ctl);
+                break;
+        case NR_KVM_TIMERS:
+                BUG();
        }
-        vtimer->loaded = true;
+        trace_kvm_timer_restore_state(ctx);
+        ctx->loaded = true;
 out:
        local_irq_restore(flags);
 }
-void kvm_timer_unschedule(struct kvm_vcpu *vcpu)
-{
-        struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-        vtimer_restore_state(vcpu);
-        soft_timer_cancel(&timer->bg_timer);
-}
 static void set_cntvoff(u64 cntvoff)
 {
        u32 low = lower_32_bits(cntvoff);
@@ -425,23 +475,32 @@ static void set_cntvoff(u64 cntvoff)
        kvm_call_hyp(__kvm_timer_set_cntvoff, low, high);
 }
-static inline void set_vtimer_irq_phys_active(struct kvm_vcpu *vcpu, bool active)
+static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, bool active)
 {
        int r;
-        r = irq_set_irqchip_state(host_vtimer_irq, IRQCHIP_STATE_ACTIVE, active);
+        r = irq_set_irqchip_state(ctx->host_timer_irq, IRQCHIP_STATE_ACTIVE, active);
        WARN_ON(r);
 }
-static void kvm_timer_vcpu_load_gic(struct kvm_vcpu *vcpu)
+static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx)
 {
-        struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+        struct kvm_vcpu *vcpu = ctx->vcpu;
-        bool phys_active;
+        bool phys_active = false;
+        /*
+         * Update the timer output so that it is likely to match the
+         * state we're about to restore. If the timer expires between
+         * this point and the register restoration, we'll take the
+         * interrupt anyway.
+         */
+        kvm_timer_update_irq(ctx->vcpu, kvm_timer_should_fire(ctx), ctx);
        if (irqchip_in_kernel(vcpu->kvm))
-                phys_active = kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
+                phys_active = kvm_vgic_map_is_active(vcpu, ctx->irq.irq);
-        else
-                phys_active = vtimer->irq.level;
+        phys_active |= ctx->irq.level;
-        set_vtimer_irq_phys_active(vcpu, phys_active);
+        set_timer_irq_phys_active(ctx, phys_active);
 }
 static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
@@ -466,28 +525,32 @@ static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
 void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
 {
-        struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+        struct arch_timer_cpu *timer = vcpu_timer(vcpu);
-        struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+        struct timer_map map;
-        struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
        if (unlikely(!timer->enabled))
                return;
-        if (static_branch_likely(&has_gic_active_state))
+        get_timer_map(vcpu, &map);
-                kvm_timer_vcpu_load_gic(vcpu);
-        else
+        if (static_branch_likely(&has_gic_active_state)) {
+                kvm_timer_vcpu_load_gic(map.direct_vtimer);
+                if (map.direct_ptimer)
+                        kvm_timer_vcpu_load_gic(map.direct_ptimer);
+        } else {
                kvm_timer_vcpu_load_nogic(vcpu);
+        }
-        set_cntvoff(vtimer->cntvoff);
+        set_cntvoff(map.direct_vtimer->cntvoff);
-        vtimer_restore_state(vcpu);
+        kvm_timer_unblocking(vcpu);
-        /* Set the background timer for the physical timer emulation. */
+        timer_restore_state(map.direct_vtimer);
-        phys_timer_emulate(vcpu);
+        if (map.direct_ptimer)
+                timer_restore_state(map.direct_ptimer);
-        /* If the timer fired while we weren't running, inject it now */
+        if (map.emul_ptimer)
-        if (kvm_timer_should_fire(ptimer) != ptimer->irq.level)
+                timer_emulate(map.emul_ptimer);
-                kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer);
 }
 bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
@@ -509,15 +572,20 @@ bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
 void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
 {
-        struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+        struct arch_timer_cpu *timer = vcpu_timer(vcpu);
+        struct timer_map map;
        if (unlikely(!timer->enabled))
                return;
-        vtimer_save_state(vcpu);
+        get_timer_map(vcpu, &map);
+        timer_save_state(map.direct_vtimer);
+        if (map.direct_ptimer)
+                timer_save_state(map.direct_ptimer);
        /*
-         * Cancel the physical timer emulation, because the only case where we
+         * Cancel soft timer emulation, because the only case where we
         * need it after a vcpu_put is in the context of a sleeping VCPU, and
         * in that case we already factor in the deadline for the physical
         * timer when scheduling the bg_timer.
@@ -525,7 +593,11 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
         * In any case, we re-schedule the hrtimer for the physical timer when
         * coming back to the VCPU thread in kvm_timer_vcpu_load().
         */
-        soft_timer_cancel(&timer->phys_timer);
+        if (map.emul_ptimer)
+                soft_timer_cancel(&map.emul_ptimer->hrtimer);
+        if (swait_active(kvm_arch_vcpu_wq(vcpu)))
+                kvm_timer_blocking(vcpu);
        /*
         * The kernel may decide to run userspace after calling vcpu_put, so
@@ -534,8 +606,7 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
         * counter of non-VHE case. For VHE, the virtual counter uses a fixed
         * virtual offset of zero, so no need to zero CNTVOFF_EL2 register.
         */
-        if (!has_vhe())
+        set_cntvoff(0);
-                set_cntvoff(0);
 }
 /*
@@ -550,7 +621,7 @@ static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu)
        if (!kvm_timer_should_fire(vtimer)) {
                kvm_timer_update_irq(vcpu, false, vtimer);
                if (static_branch_likely(&has_gic_active_state))
-                        set_vtimer_irq_phys_active(vcpu, false);
+                        set_timer_irq_phys_active(vtimer, false);
                else
                        enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
        }
@@ -558,7 +629,7 @@ static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu)
 void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
 {
-        struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+        struct arch_timer_cpu *timer = vcpu_timer(vcpu);
        if (unlikely(!timer->enabled))
                return;
@@ -569,9 +640,10 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
 int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
 {
-        struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+        struct arch_timer_cpu *timer = vcpu_timer(vcpu);
-        struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+        struct timer_map map;
-        struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+        get_timer_map(vcpu, &map);
        /*
         * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8
@@ -579,12 +651,22 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
         * resets the timer to be disabled and unmasked and is compliant with
         * the ARMv7 architecture.
         */
-        vtimer->cnt_ctl = 0;
+        vcpu_vtimer(vcpu)->cnt_ctl = 0;
-        ptimer->cnt_ctl = 0;
+        vcpu_ptimer(vcpu)->cnt_ctl = 0;
-        kvm_timer_update_state(vcpu);
-        if (timer->enabled && irqchip_in_kernel(vcpu->kvm))
+        if (timer->enabled) {
-                kvm_vgic_reset_mapped_irq(vcpu, vtimer->irq.irq);
+                kvm_timer_update_irq(vcpu, false, vcpu_vtimer(vcpu));
+                kvm_timer_update_irq(vcpu, false, vcpu_ptimer(vcpu));
+                if (irqchip_in_kernel(vcpu->kvm)) {
+                        kvm_vgic_reset_mapped_irq(vcpu, map.direct_vtimer->irq.irq);
+                        if (map.direct_ptimer)
+                                kvm_vgic_reset_mapped_irq(vcpu, map.direct_ptimer->irq.irq);
+                }
+        }
+        if (map.emul_ptimer)
+                soft_timer_cancel(&map.emul_ptimer->hrtimer);
        return 0;
 }
@@ -610,56 +692,76 @@ static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff)
 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
 {
-        struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+        struct arch_timer_cpu *timer = vcpu_timer(vcpu);
        struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
        struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
        /* Synchronize cntvoff across all vtimers of a VM. */
        update_vtimer_cntvoff(vcpu, kvm_phys_timer_read());
-        vcpu_ptimer(vcpu)->cntvoff = 0;
+        ptimer->cntvoff = 0;
        hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
        timer->bg_timer.function = kvm_bg_timer_expire;
-        hrtimer_init(&timer->phys_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+        hrtimer_init(&vtimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
-        timer->phys_timer.function = kvm_phys_timer_expire;
+        hrtimer_init(&ptimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+        vtimer->hrtimer.function = kvm_hrtimer_expire;
+        ptimer->hrtimer.function = kvm_hrtimer_expire;
        vtimer->irq.irq = default_vtimer_irq.irq;
        ptimer->irq.irq = default_ptimer_irq.irq;
+        vtimer->host_timer_irq = host_vtimer_irq;
+        ptimer->host_timer_irq = host_ptimer_irq;
+        vtimer->host_timer_irq_flags = host_vtimer_irq_flags;
+        ptimer->host_timer_irq_flags = host_ptimer_irq_flags;
+        vtimer->vcpu = vcpu;
+        ptimer->vcpu = vcpu;
 }
 static void kvm_timer_init_interrupt(void *info)
 {
        enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
+        enable_percpu_irq(host_ptimer_irq, host_ptimer_irq_flags);
 }
 int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
 {
-        struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+        struct arch_timer_context *timer;
-        struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+        bool level;
        switch (regid) {
        case KVM_REG_ARM_TIMER_CTL:
-                vtimer->cnt_ctl = value & ~ARCH_TIMER_CTRL_IT_STAT;
+                timer = vcpu_vtimer(vcpu);
+                kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value);
                break;
        case KVM_REG_ARM_TIMER_CNT:
+                timer = vcpu_vtimer(vcpu);
                update_vtimer_cntvoff(vcpu, kvm_phys_timer_read() - value);
                break;
        case KVM_REG_ARM_TIMER_CVAL:
-                vtimer->cnt_cval = value;
+                timer = vcpu_vtimer(vcpu);
+                kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value);
                break;
        case KVM_REG_ARM_PTIMER_CTL:
-                ptimer->cnt_ctl = value & ~ARCH_TIMER_CTRL_IT_STAT;
+                timer = vcpu_ptimer(vcpu);
+                kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value);
                break;
        case KVM_REG_ARM_PTIMER_CVAL:
-                ptimer->cnt_cval = value;
+                timer = vcpu_ptimer(vcpu);
+                kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value);
                break;
        default:
                return -1;
        }
-        kvm_timer_update_state(vcpu);
+        level = kvm_timer_should_fire(timer);
+        kvm_timer_update_irq(vcpu, level, timer);
+        timer_emulate(timer);
        return 0;
 }
@@ -679,26 +781,113 @@ static u64 read_timer_ctl(struct arch_timer_context *timer)
 u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
 {
-        struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
-        struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
        switch (regid) {
        case KVM_REG_ARM_TIMER_CTL:
-                return read_timer_ctl(vtimer);
+                return kvm_arm_timer_read(vcpu,
+                                          vcpu_vtimer(vcpu), TIMER_REG_CTL);
        case KVM_REG_ARM_TIMER_CNT:
-                return kvm_phys_timer_read() - vtimer->cntvoff;
+                return kvm_arm_timer_read(vcpu,
+                                          vcpu_vtimer(vcpu), TIMER_REG_CNT);
        case KVM_REG_ARM_TIMER_CVAL:
-                return vtimer->cnt_cval;
+                return kvm_arm_timer_read(vcpu,
+                                          vcpu_vtimer(vcpu), TIMER_REG_CVAL);
        case KVM_REG_ARM_PTIMER_CTL:
-                return read_timer_ctl(ptimer);
+                return kvm_arm_timer_read(vcpu,
-        case KVM_REG_ARM_PTIMER_CVAL:
+                                          vcpu_ptimer(vcpu), TIMER_REG_CTL);
-                return ptimer->cnt_cval;
        case KVM_REG_ARM_PTIMER_CNT:
-                return kvm_phys_timer_read();
+                return kvm_arm_timer_read(vcpu,
+                                          vcpu_vtimer(vcpu), TIMER_REG_CNT);
+        case KVM_REG_ARM_PTIMER_CVAL:
+                return kvm_arm_timer_read(vcpu,
+                                          vcpu_ptimer(vcpu), TIMER_REG_CVAL);
        }
        return (u64)-1;
 }
+static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
+                              struct arch_timer_context *timer,
+                              enum kvm_arch_timer_regs treg)
+{
+        u64 val;
+        switch (treg) {
+        case TIMER_REG_TVAL:
+                val = kvm_phys_timer_read() - timer->cntvoff - timer->cnt_cval;
+                break;
+        case TIMER_REG_CTL:
+                val = read_timer_ctl(timer);
+                break;
+        case TIMER_REG_CVAL:
+                val = timer->cnt_cval;
+                break;
+        case TIMER_REG_CNT:
+                val = kvm_phys_timer_read() - timer->cntvoff;
+                break;
+        default:
+                BUG();
+        }
+        return val;
+}
+u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu,
+                              enum kvm_arch_timers tmr,
+                              enum kvm_arch_timer_regs treg)
+{
+        u64 val;
+        preempt_disable();
+        kvm_timer_vcpu_put(vcpu);
+        val = kvm_arm_timer_read(vcpu, vcpu_get_timer(vcpu, tmr), treg);
+        kvm_timer_vcpu_load(vcpu);
+        preempt_enable();
+        return val;
+}
+static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
+                                struct arch_timer_context *timer,
+                                enum kvm_arch_timer_regs treg,
+                                u64 val)
+{
+        switch (treg) {
+        case TIMER_REG_TVAL:
+                timer->cnt_cval = val - kvm_phys_timer_read() - timer->cntvoff;
+                break;
+        case TIMER_REG_CTL:
+                timer->cnt_ctl = val & ~ARCH_TIMER_CTRL_IT_STAT;
+                break;
+        case TIMER_REG_CVAL:
+                timer->cnt_cval = val;
+                break;
+        default:
+                BUG();
+        }
+}
+void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu,
+                                enum kvm_arch_timers tmr,
+                                enum kvm_arch_timer_regs treg,
+                                u64 val)
+{
+        preempt_disable();
+        kvm_timer_vcpu_put(vcpu);
+        kvm_arm_timer_write(vcpu, vcpu_get_timer(vcpu, tmr), treg, val);
+        kvm_timer_vcpu_load(vcpu);
+        preempt_enable();
+}
 static int kvm_timer_starting_cpu(unsigned int cpu)
 {
        kvm_timer_init_interrupt(NULL);
@@ -724,6 +913,8 @@ int kvm_timer_hyp_init(bool has_gic)
                return -ENODEV;
        }
+        /* First, do the virtual EL1 timer irq */
        if (info->virtual_irq <= 0) {
                kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n",
                        info->virtual_irq);
@@ -734,15 +925,15 @@ int kvm_timer_hyp_init(bool has_gic)
        host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq);
        if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH &&
            host_vtimer_irq_flags != IRQF_TRIGGER_LOW) {
-                kvm_err("Invalid trigger for IRQ%d, assuming level low\n",
+                kvm_err("Invalid trigger for vtimer IRQ%d, assuming level low\n",
                        host_vtimer_irq);
                host_vtimer_irq_flags = IRQF_TRIGGER_LOW;
        }
        err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler,
-                                 "kvm guest timer", kvm_get_running_vcpus());
+                                 "kvm guest vtimer", kvm_get_running_vcpus());
        if (err) {
-                kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n",
+                kvm_err("kvm_arch_timer: can't request vtimer interrupt %d (%d)\n",
                        host_vtimer_irq, err);
                return err;
        }
@@ -760,6 +951,43 @@ int kvm_timer_hyp_init(bool has_gic)
        kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq);
+        /* Now let's do the physical EL1 timer irq */
+        if (info->physical_irq > 0) {
+                host_ptimer_irq = info->physical_irq;
+                host_ptimer_irq_flags = irq_get_trigger_type(host_ptimer_irq);
+                if (host_ptimer_irq_flags != IRQF_TRIGGER_HIGH &&
+                    host_ptimer_irq_flags != IRQF_TRIGGER_LOW) {
+                        kvm_err("Invalid trigger for ptimer IRQ%d, assuming level low\n",
+                                host_ptimer_irq);
+                        host_ptimer_irq_flags = IRQF_TRIGGER_LOW;
+                }
+                err = request_percpu_irq(host_ptimer_irq, kvm_arch_timer_handler,
+                                         "kvm guest ptimer", kvm_get_running_vcpus());
+                if (err) {
+                        kvm_err("kvm_arch_timer: can't request ptimer interrupt %d (%d)\n",
+                                host_ptimer_irq, err);
+                        return err;
+                }
+                if (has_gic) {
+                        err = irq_set_vcpu_affinity(host_ptimer_irq,
+                                                    kvm_get_running_vcpus());
+                        if (err) {
+                                kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
+                                goto out_free_irq;
+                        }
+                }
+                kvm_debug("physical timer IRQ%d\n", host_ptimer_irq);
+        } else if (has_vhe()) {
+                kvm_err("kvm_arch_timer: invalid physical timer IRQ: %d\n",
+                        info->physical_irq);
+                err = -ENODEV;
+                goto out_free_irq;
+        }
        cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING,
                          "kvm/arm/timer:starting", kvm_timer_starting_cpu,
                          kvm_timer_dying_cpu);
@@ -771,7 +999,7 @@ out_free_irq:
 void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
 {
-        struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+        struct arch_timer_cpu *timer = vcpu_timer(vcpu);
        soft_timer_cancel(&timer->bg_timer);
 }
@@ -807,16 +1035,18 @@ bool kvm_arch_timer_get_input_level(int vintid)
        if (vintid == vcpu_vtimer(vcpu)->irq.irq)
                timer = vcpu_vtimer(vcpu);
+        else if (vintid == vcpu_ptimer(vcpu)->irq.irq)
+                timer = vcpu_ptimer(vcpu);
        else
-                BUG(); /* We only map the vtimer so far */
+                BUG();
        return kvm_timer_should_fire(timer);
 }
 int kvm_timer_enable(struct kvm_vcpu *vcpu)
 {
-        struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+        struct arch_timer_cpu *timer = vcpu_timer(vcpu);
-        struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+        struct timer_map map;
        int ret;
        if (timer->enabled)
@@ -834,19 +1064,33 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
                return -EINVAL;
        }
-        ret = kvm_vgic_map_phys_irq(vcpu, host_vtimer_irq, vtimer->irq.irq,
+        get_timer_map(vcpu, &map);
+        ret = kvm_vgic_map_phys_irq(vcpu,
+                                    map.direct_vtimer->host_timer_irq,
+                                    map.direct_vtimer->irq.irq,
                                    kvm_arch_timer_get_input_level);
        if (ret)
                return ret;
+        if (map.direct_ptimer) {
+                ret = kvm_vgic_map_phys_irq(vcpu,
+                                            map.direct_ptimer->host_timer_irq,
+                                            map.direct_ptimer->irq.irq,
+                                            kvm_arch_timer_get_input_level);
+        }
+        if (ret)
+                return ret;
 no_vgic:
        timer->enabled = 1;
        return 0;
 }
 /*
- * On VHE system, we only need to configure trap on physical timer and counter
+ * On VHE system, we only need to configure the EL2 timer trap register once,
- * accesses in EL0 and EL1 once, not for every world switch.
+ * not for every world switch.
 * The host kernel runs at EL2 with HCR_EL2.TGE == 1,
 * and this makes those bits have no effect for the host kernel execution.
 */
@@ -857,11 +1101,11 @@ void kvm_timer_init_vhe(void)
        u64 val;
        /*
-         * Disallow physical timer access for the guest.
+         * VHE systems allow the guest direct access to the EL1 physical
-         * Physical counter access is allowed.
+         * timer/counter.
         */
        val = read_sysreg(cnthctl_el2);
-        val &= ~(CNTHCTL_EL1PCEN << cnthctl_shift);
+        val |= (CNTHCTL_EL1PCEN << cnthctl_shift);
        val |= (CNTHCTL_EL1PCTEN << cnthctl_shift);
        write_sysreg(val, cnthctl_el2);
 }
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 9c486fad3f9f..99c37384ba7b 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -65,7 +65,6 @@ static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu);
 /* The VMID used in the VTTBR */
 static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
 static u32 kvm_next_vmid;
-static unsigned int kvm_vmid_bits __read_mostly;
 static DEFINE_SPINLOCK(kvm_vmid_lock);
 static bool vgic_present;
@@ -142,7 +141,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
        kvm_vgic_early_init(kvm);
        /* Mark the initial VMID generation invalid */
-        kvm->arch.vmid_gen = 0;
+        kvm->arch.vmid.vmid_gen = 0;
        /* The maximum number of VCPUs is limited by the host's GIC model */
        kvm->arch.max_vcpus = vgic_present ?
@@ -336,13 +335,11 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
 {
-        kvm_timer_schedule(vcpu);
        kvm_vgic_v4_enable_doorbell(vcpu);
 }
 void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
 {
-        kvm_timer_unschedule(vcpu);
        kvm_vgic_v4_disable_doorbell(vcpu);
 }
@@ -472,37 +469,31 @@ void force_vm_exit(const cpumask_t *mask)
 /**
 * need_new_vmid_gen - check that the VMID is still valid
- * @kvm: The VM's VMID to check
+ * @vmid: The VMID to check
 *
 * return true if there is a new generation of VMIDs being used
 *
- * The hardware supports only 256 values with the value zero reserved for the
+ * The hardware supports a limited set of values with the value zero reserved
- * host, so we check if an assigned value belongs to a previous generation,
+ * for the host, so we check if an assigned value belongs to a previous
- * which which requires us to assign a new value. If we're the first to use a
+ * generation, which which requires us to assign a new value. If we're the
- * VMID for the new generation, we must flush necessary caches and TLBs on all
+ * first to use a VMID for the new generation, we must flush necessary caches
- * CPUs.
+ * and TLBs on all CPUs.
 */
-static bool need_new_vmid_gen(struct kvm *kvm)
+static bool need_new_vmid_gen(struct kvm_vmid *vmid)
 {
        u64 current_vmid_gen = atomic64_read(&kvm_vmid_gen);
        smp_rmb(); /* Orders read of kvm_vmid_gen and kvm->arch.vmid */
-        return unlikely(READ_ONCE(kvm->arch.vmid_gen) != current_vmid_gen);
+        return unlikely(READ_ONCE(vmid->vmid_gen) != current_vmid_gen);
 }
 /**
- * update_vttbr - Update the VTTBR with a valid VMID before the guest runs
+ * update_vmid - Update the vmid with a valid VMID for the current generation
- * @kvm The guest that we are about to run
+ * @kvm: The guest that struct vmid belongs to
- *
+ * @vmid: The stage-2 VMID information struct
- * Called from kvm_arch_vcpu_ioctl_run before entering the guest to ensure the
- * VM has a valid VMID, otherwise assigns a new one and flushes corresponding
- * caches and TLBs.
 */
-static void update_vttbr(struct kvm *kvm)
+static void update_vmid(struct kvm_vmid *vmid)
 {
-        phys_addr_t pgd_phys;
+        if (!need_new_vmid_gen(vmid))
-        u64 vmid, cnp = kvm_cpu_has_cnp() ? VTTBR_CNP_BIT : 0;
-        if (!need_new_vmid_gen(kvm))
                return;
        spin_lock(&kvm_vmid_lock);
@@ -512,7 +503,7 @@ static void update_vttbr(struct kvm *kvm)
         * already allocated a valid vmid for this vm, then this vcpu should
         * use the same vmid.
         */
-        if (!need_new_vmid_gen(kvm)) {
+        if (!need_new_vmid_gen(vmid)) {
                spin_unlock(&kvm_vmid_lock);
                return;
        }
@@ -536,18 +527,12 @@ static void update_vttbr(struct kvm *kvm)
                kvm_call_hyp(__kvm_flush_vm_context);
        }
-        kvm->arch.vmid = kvm_next_vmid;
+        vmid->vmid = kvm_next_vmid;
        kvm_next_vmid++;
-        kvm_next_vmid &= (1 << kvm_vmid_bits) - 1;
+        kvm_next_vmid &= (1 << kvm_get_vmid_bits()) - 1;
-        /* update vttbr to be used with the new vmid */
-        pgd_phys = virt_to_phys(kvm->arch.pgd);
-        BUG_ON(pgd_phys & ~kvm_vttbr_baddr_mask(kvm));
-        vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits);
-        kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid | cnp;
        smp_wmb();
-        WRITE_ONCE(kvm->arch.vmid_gen, atomic64_read(&kvm_vmid_gen));
+        WRITE_ONCE(vmid->vmid_gen, atomic64_read(&kvm_vmid_gen));
        spin_unlock(&kvm_vmid_lock);
 }
@@ -700,7 +685,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
                 */
                cond_resched();
-                update_vttbr(vcpu->kvm);
+                update_vmid(&vcpu->kvm->arch.vmid);
                check_vcpu_requests(vcpu);
@@ -749,7 +734,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
                 */
                smp_store_mb(vcpu->mode, IN_GUEST_MODE);
-                if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) ||
+                if (ret <= 0 || need_new_vmid_gen(&vcpu->kvm->arch.vmid) ||
                    kvm_request_pending(vcpu)) {
                        vcpu->mode = OUTSIDE_GUEST_MODE;
                        isb(); /* Ensure work in x_flush_hwstate is committed */
@@ -775,7 +760,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
                        ret = kvm_vcpu_run_vhe(vcpu);
                        kvm_arm_vhe_guest_exit();
                } else {
-                        ret = kvm_call_hyp(__kvm_vcpu_run_nvhe, vcpu);
+                        ret = kvm_call_hyp_ret(__kvm_vcpu_run_nvhe, vcpu);
                }
                vcpu->mode = OUTSIDE_GUEST_MODE;
@@ -1427,10 +1412,6 @@ static inline void hyp_cpu_pm_exit(void)
 static int init_common_resources(void)
 {
-        /* set size of VMID supported by CPU */
-        kvm_vmid_bits = kvm_get_vmid_bits();
-        kvm_info("%d-bit VMID\n", kvm_vmid_bits);
        kvm_set_ipa_limit();
        return 0;
@@ -1571,6 +1552,7 @@ static int init_hyp_mode(void)
                kvm_cpu_context_t *cpu_ctxt;
                cpu_ctxt = per_cpu_ptr(&kvm_host_cpu_state, cpu);
+                kvm_init_host_cpu_context(cpu_ctxt, cpu);
                err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1, PAGE_HYP);
                if (err) {
@@ -1581,7 +1563,7 @@ static int init_hyp_mode(void)
        err = hyp_map_aux_data();
        if (err)
-                kvm_err("Cannot map host auxilary data: %d\n", err);
+                kvm_err("Cannot map host auxiliary data: %d\n", err);
        return 0;
diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c
index 9652c453480f..264d92da3240 100644
--- a/virt/kvm/arm/hyp/vgic-v3-sr.c
+++ b/virt/kvm/arm/hyp/vgic-v3-sr.c
@@ -226,7 +226,7 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
                int i;
                u32 elrsr;
-                elrsr = read_gicreg(ICH_ELSR_EL2);
+                elrsr = read_gicreg(ICH_ELRSR_EL2);
                write_gicreg(cpu_if->vgic_hcr & ~ICH_HCR_EN, ICH_HCR_EL2);
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index e9d28a7ca673..ffd7acdceac7 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -908,6 +908,7 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
 */
 int kvm_alloc_stage2_pgd(struct kvm *kvm)
 {
+        phys_addr_t pgd_phys;
        pgd_t *pgd;
        if (kvm->arch.pgd != NULL) {
@@ -920,7 +921,12 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
        if (!pgd)
                return -ENOMEM;
+        pgd_phys = virt_to_phys(pgd);
+        if (WARN_ON(pgd_phys & ~kvm_vttbr_baddr_mask(kvm)))
+                return -EINVAL;
        kvm->arch.pgd = pgd;
+        kvm->arch.pgd_phys = pgd_phys;
        return 0;
 }
@@ -1008,6 +1014,7 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
                unmap_stage2_range(kvm, 0, kvm_phys_size(kvm));
                pgd = READ_ONCE(kvm->arch.pgd);
                kvm->arch.pgd = NULL;
+                kvm->arch.pgd_phys = 0;
        }
        spin_unlock(&kvm->mmu_lock);
@@ -1396,14 +1403,6 @@ static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap)
        return false;
 }
-static bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
-{
-        if (kvm_vcpu_trap_is_iabt(vcpu))
-                return false;
-        return kvm_vcpu_dabt_iswrite(vcpu);
-}
 /**
 * stage2_wp_ptes - write protect PMD range
 * @pmd:        pointer to pmd entry
@@ -1598,14 +1597,13 @@ static void kvm_send_hwpoison_signal(unsigned long address,
 static bool fault_supports_stage2_pmd_mappings(struct kvm_memory_slot *memslot,
                                               unsigned long hva)
 {
-        gpa_t gpa_start, gpa_end;
+        gpa_t gpa_start;
        hva_t uaddr_start, uaddr_end;
        size_t size;
        size = memslot->npages * PAGE_SIZE;
        gpa_start = memslot->base_gfn << PAGE_SHIFT;
-        gpa_end = gpa_start + size;
        uaddr_start = memslot->userspace_addr;
        uaddr_end = uaddr_start + size;
@@ -2353,7 +2351,7 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
        return 0;
 }
-void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots)
+void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
 {
 }
diff --git a/virt/kvm/arm/trace.h b/virt/kvm/arm/trace.h
index 3828beab93f2..204d210d01c2 100644
--- a/virt/kvm/arm/trace.h
+++ b/virt/kvm/arm/trace.h
@@ -2,6 +2,7 @@
 #if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_KVM_H
+#include <kvm/arm_arch_timer.h>
 #include <linux/tracepoint.h>
 #undef TRACE_SYSTEM
@@ -262,10 +263,114 @@ TRACE_EVENT(kvm_timer_update_irq,
                  __entry->vcpu_id, __entry->irq, __entry->level)
 );
+TRACE_EVENT(kvm_get_timer_map,
+        TP_PROTO(unsigned long vcpu_id, struct timer_map *map),
+        TP_ARGS(vcpu_id, map),
+        TP_STRUCT__entry(
+                __field(        unsigned long,          vcpu_id )
+                __field(        int,                    direct_vtimer   )
+                __field(        int,                    direct_ptimer   )
+                __field(        int,                    emul_ptimer     )
+        ),
+        TP_fast_assign(
+                __entry->vcpu_id                = vcpu_id;
+                __entry->direct_vtimer          = arch_timer_ctx_index(map->direct_vtimer);
+                __entry->direct_ptimer =
+                        (map->direct_ptimer) ? arch_timer_ctx_index(map->direct_ptimer) : -1;
+                __entry->emul_ptimer =
+                        (map->emul_ptimer) ? arch_timer_ctx_index(map->emul_ptimer) : -1;
+        ),
+        TP_printk("VCPU: %ld, dv: %d, dp: %d, ep: %d",
+                  __entry->vcpu_id,
+                  __entry->direct_vtimer,
+                  __entry->direct_ptimer,
+                  __entry->emul_ptimer)
+);
+TRACE_EVENT(kvm_timer_save_state,
+        TP_PROTO(struct arch_timer_context *ctx),
+        TP_ARGS(ctx),
+        TP_STRUCT__entry(
+                __field(        unsigned long,          ctl             )
+                __field(        unsigned long long,     cval            )
+                __field(        int,                    timer_idx       )
+        ),
+        TP_fast_assign(
+                __entry->ctl                    = ctx->cnt_ctl;
+                __entry->cval                   = ctx->cnt_cval;
+                __entry->timer_idx              = arch_timer_ctx_index(ctx);
+        ),
+        TP_printk("   CTL: %#08lx CVAL: %#16llx arch_timer_ctx_index: %d",
+                  __entry->ctl,
+                  __entry->cval,
+                  __entry->timer_idx)
+);
+TRACE_EVENT(kvm_timer_restore_state,
+        TP_PROTO(struct arch_timer_context *ctx),
+        TP_ARGS(ctx),
+        TP_STRUCT__entry(
+                __field(        unsigned long,          ctl             )
+                __field(        unsigned long long,     cval            )
+                __field(        int,                    timer_idx       )
+        ),
+        TP_fast_assign(
+                __entry->ctl                    = ctx->cnt_ctl;
+                __entry->cval                   = ctx->cnt_cval;
+                __entry->timer_idx              = arch_timer_ctx_index(ctx);
+        ),
+        TP_printk("CTL: %#08lx CVAL: %#16llx arch_timer_ctx_index: %d",
+                  __entry->ctl,
+                  __entry->cval,
+                  __entry->timer_idx)
+);
+TRACE_EVENT(kvm_timer_hrtimer_expire,
+        TP_PROTO(struct arch_timer_context *ctx),
+        TP_ARGS(ctx),
+        TP_STRUCT__entry(
+                __field(        int,                    timer_idx       )
+        ),
+        TP_fast_assign(
+                __entry->timer_idx              = arch_timer_ctx_index(ctx);
+        ),
+        TP_printk("arch_timer_ctx_index: %d", __entry->timer_idx)
+);
+TRACE_EVENT(kvm_timer_emulate,
+        TP_PROTO(struct arch_timer_context *ctx, bool should_fire),
+        TP_ARGS(ctx, should_fire),
+        TP_STRUCT__entry(
+                __field(        int,                    timer_idx       )
+                __field(        bool,                   should_fire     )
+        ),
+        TP_fast_assign(
+                __entry->timer_idx              = arch_timer_ctx_index(ctx);
+                __entry->should_fire            = should_fire;
+        ),
+        TP_printk("arch_timer_ctx_index: %d (should_fire: %d)",
+                  __entry->timer_idx, __entry->should_fire)
+);
 #endif /* _TRACE_KVM_H */
 #undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH ../../../virt/kvm/arm
+#define TRACE_INCLUDE_PATH ../../virt/kvm/arm
 #undef TRACE_INCLUDE_FILE
 #define TRACE_INCLUDE_FILE trace
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index 4ee0aeb9a905..408a78eb6a97 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -589,7 +589,7 @@ early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable);
 */
 int vgic_v3_probe(const struct gic_kvm_info *info)
 {
-        u32 ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2);
+        u32 ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_ich_vtr_el2);
        int ret;
        /*
@@ -679,7 +679,7 @@ void vgic_v3_put(struct kvm_vcpu *vcpu)
        struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
        if (likely(cpu_if->vgic_sre))
-                cpu_if->vgic_vmcr = kvm_call_hyp(__vgic_v3_read_vmcr);
+                cpu_if->vgic_vmcr = kvm_call_hyp_ret(__vgic_v3_read_vmcr);
        kvm_call_hyp(__vgic_v3_save_aprs, vcpu);
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index 6855cce3e528..5294abb3f178 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -144,7 +144,8 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
        if (zone->pio != 1 && zone->pio != 0)
                return -EINVAL;
-        dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL);
+        dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev),
+                      GFP_KERNEL_ACCOUNT);
        if (!dev)
                return -ENOMEM;
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index b20b751286fc..4325250afd72 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -297,7 +297,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
        if (!kvm_arch_intc_initialized(kvm))
                return -EAGAIN;
-        irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
+        irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL_ACCOUNT);
        if (!irqfd)
                return -ENOMEM;
@@ -345,7 +345,8 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
                }
                if (!irqfd->resampler) {
-                        resampler = kzalloc(sizeof(*resampler), GFP_KERNEL);
+                        resampler = kzalloc(sizeof(*resampler),
+                                            GFP_KERNEL_ACCOUNT);
                        if (!resampler) {
                                ret = -ENOMEM;
                                mutex_unlock(&kvm->irqfds.resampler_lock);
@@ -797,7 +798,7 @@ static int kvm_assign_ioeventfd_idx(struct kvm *kvm,
        if (IS_ERR(eventfd))
                return PTR_ERR(eventfd);
-        p = kzalloc(sizeof(*p), GFP_KERNEL);
+        p = kzalloc(sizeof(*p), GFP_KERNEL_ACCOUNT);
        if (!p) {
                ret = -ENOMEM;
                goto fail;
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index b1286c4e0712..3547b0d8c91e 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -196,7 +196,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
        nr_rt_entries += 1;
        new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head)),
-                      GFP_KERNEL);
+                      GFP_KERNEL_ACCOUNT);
        if (!new)
                return -ENOMEM;
@@ -208,7 +208,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
        for (i = 0; i < nr; ++i) {
                r = -ENOMEM;
-                e = kzalloc(sizeof(*e), GFP_KERNEL);
+                e = kzalloc(sizeof(*e), GFP_KERNEL_ACCOUNT);
                if (!e)
                        goto out;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d237d3350a99..f25aa98a94df 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -81,6 +81,11 @@ unsigned int halt_poll_ns_grow = 2;
 module_param(halt_poll_ns_grow, uint, 0644);
 EXPORT_SYMBOL_GPL(halt_poll_ns_grow);
+/* The start value to grow halt_poll_ns from */
+unsigned int halt_poll_ns_grow_start = 10000; /* 10us */
+module_param(halt_poll_ns_grow_start, uint, 0644);
+EXPORT_SYMBOL_GPL(halt_poll_ns_grow_start);
 /* Default resets per-vcpu halt_poll_ns . */
 unsigned int halt_poll_ns_shrink;
 module_param(halt_poll_ns_shrink, uint, 0644);
@@ -525,7 +530,7 @@ static struct kvm_memslots *kvm_alloc_memslots(void)
        int i;
        struct kvm_memslots *slots;
-        slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
+        slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL_ACCOUNT);
        if (!slots)
                return NULL;
@@ -601,12 +606,12 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
        kvm->debugfs_stat_data = kcalloc(kvm_debugfs_num_entries,
                                         sizeof(*kvm->debugfs_stat_data),
-                                         GFP_KERNEL);
+                                         GFP_KERNEL_ACCOUNT);
        if (!kvm->debugfs_stat_data)
                return -ENOMEM;
        for (p = debugfs_entries; p->name; p++) {
-                stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL);
+                stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL_ACCOUNT);
                if (!stat_data)
                        return -ENOMEM;
@@ -656,12 +661,8 @@ static struct kvm *kvm_create_vm(unsigned long type)
                struct kvm_memslots *slots = kvm_alloc_memslots();
                if (!slots)
                        goto out_err_no_srcu;
-                /*
+                /* Generations must be different for each address space. */
-                 * Generations must be different for each address space.
+                slots->generation = i;
-                 * Init kvm generation close to the maximum to easily test the
-                 * code of handling generation number wrap-around.
-                 */
-                slots->generation = i * 2 - 150;
                rcu_assign_pointer(kvm->memslots[i], slots);
        }
@@ -671,7 +672,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
                goto out_err_no_irq_srcu;
        for (i = 0; i < KVM_NR_BUSES; i++) {
                rcu_assign_pointer(kvm->buses[i],
-                        kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL));
+                        kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL_ACCOUNT));
                if (!kvm->buses[i])
                        goto out_err;
        }
@@ -789,7 +790,7 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
 {
        unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot);
-        memslot->dirty_bitmap = kvzalloc(dirty_bytes, GFP_KERNEL);
+        memslot->dirty_bitmap = kvzalloc(dirty_bytes, GFP_KERNEL_ACCOUNT);
        if (!memslot->dirty_bitmap)
                return -ENOMEM;
@@ -874,31 +875,34 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
                int as_id, struct kvm_memslots *slots)
 {
        struct kvm_memslots *old_memslots = __kvm_memslots(kvm, as_id);
+        u64 gen = old_memslots->generation;
-        /*
+        WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS);
-         * Set the low bit in the generation, which disables SPTE caching
+        slots->generation = gen | KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS;
-         * until the end of synchronize_srcu_expedited.
-         */
-        WARN_ON(old_memslots->generation & 1);
-        slots->generation = old_memslots->generation + 1;
        rcu_assign_pointer(kvm->memslots[as_id], slots);
        synchronize_srcu_expedited(&kvm->srcu);
        /*
-         * Increment the new memslot generation a second time. This prevents
+         * Increment the new memslot generation a second time, dropping the
-         * vm exits that race with memslot updates from caching a memslot
+         * update in-progress flag and incrementing then generation based on
-         * generation that will (potentially) be valid forever.
+         * the number of address spaces.  This provides a unique and easily
-         *
+         * identifiable generation number while the memslots are in flux.
+         */
+        gen = slots->generation & ~KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS;
+        /*
         * Generations must be unique even across address spaces.  We do not need
         * a global counter for that, instead the generation space is evenly split
         * across address spaces.  For example, with two address spaces, address
-         * space 0 will use generations 0, 4, 8, ... while * address space 1 will
+         * space 0 will use generations 0, 2, 4, ... while address space 1 will
-         * use generations 2, 6, 10, 14, ...
+         * use generations 1, 3, 5, ...
         */
-        slots->generation += KVM_ADDRESS_SPACE_NUM * 2 - 1;
+        gen += KVM_ADDRESS_SPACE_NUM;
+        kvm_arch_memslots_updated(kvm, gen);
-        kvm_arch_memslots_updated(kvm, slots);
+        slots->generation = gen;
        return old_memslots;
 }
@@ -1018,7 +1022,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
                        goto out_free;
        }
-        slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
+        slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL_ACCOUNT);
        if (!slots)
                goto out_free;
        memcpy(slots, __kvm_memslots(kvm, as_id), sizeof(struct kvm_memslots));
@@ -1201,11 +1205,9 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,
                        mask = xchg(&dirty_bitmap[i], 0);
                        dirty_bitmap_buffer[i] = mask;
-                        if (mask) {
+                        offset = i * BITS_PER_LONG;
-                                offset = i * BITS_PER_LONG;
+                        kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
-                                kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
+                                                                offset, mask);
-                                                                        offset, mask);
-                        }
                }
                spin_unlock(&kvm->mmu_lock);
        }
@@ -2185,20 +2187,23 @@ void kvm_sigset_deactivate(struct kvm_vcpu *vcpu)
 static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
 {
-        unsigned int old, val, grow;
+        unsigned int old, val, grow, grow_start;
        old = val = vcpu->halt_poll_ns;
+        grow_start = READ_ONCE(halt_poll_ns_grow_start);
        grow = READ_ONCE(halt_poll_ns_grow);
-        /* 10us base */
+        if (!grow)
-        if (val == 0 && grow)
+                goto out;
-                val = 10000;
-        else
+        val *= grow;
-                val *= grow;
+        if (val < grow_start)
+                val = grow_start;
        if (val > halt_poll_ns)
                val = halt_poll_ns;
        vcpu->halt_poll_ns = val;
+out:
        trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old);
 }
@@ -2683,7 +2688,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
                struct kvm_regs *kvm_regs;
                r = -ENOMEM;
-                kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL);
+                kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL_ACCOUNT);
                if (!kvm_regs)
                        goto out;
                r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs);
@@ -2711,7 +2716,8 @@ out_free1:
                break;
        }
        case KVM_GET_SREGS: {
-                kvm_sregs = kzalloc(sizeof(struct kvm_sregs), GFP_KERNEL);
+                kvm_sregs = kzalloc(sizeof(struct kvm_sregs),
+                                    GFP_KERNEL_ACCOUNT);
                r = -ENOMEM;
                if (!kvm_sregs)
                        goto out;
@@ -2803,7 +2809,7 @@ out_free1:
                break;
        }
        case KVM_GET_FPU: {
-                fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL);
+                fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL_ACCOUNT);
                r = -ENOMEM;
                if (!fpu)
                        goto out;
@@ -2980,7 +2986,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
        if (test)
                return 0;
-        dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+        dev = kzalloc(sizeof(*dev), GFP_KERNEL_ACCOUNT);
        if (!dev)
                return -ENOMEM;
@@ -3625,6 +3631,7 @@ int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
        r = __kvm_io_bus_write(vcpu, bus, &range, val);
        return r < 0 ? r : 0;
 }
+EXPORT_SYMBOL_GPL(kvm_io_bus_write);
 /* kvm_io_bus_write_cookie - called under kvm->slots_lock */
 int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx,
@@ -3675,7 +3682,6 @@ static int __kvm_io_bus_read(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus,
        return -EOPNOTSUPP;
 }
-EXPORT_SYMBOL_GPL(kvm_io_bus_write);
 /* kvm_io_bus_read - called under kvm->slots_lock */
 int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
@@ -3697,7 +3703,6 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
        return r < 0 ? r : 0;
 }
 /* Caller must hold slots_lock. */
 int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
                            int len, struct kvm_io_device *dev)
@@ -3714,8 +3719,8 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
        if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1)
                return -ENOSPC;
-        new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count + 1) *
+        new_bus = kmalloc(struct_size(bus, range, bus->dev_count + 1),
-                          sizeof(struct kvm_io_range)), GFP_KERNEL);
+                          GFP_KERNEL_ACCOUNT);
        if (!new_bus)
                return -ENOMEM;
@@ -3760,8 +3765,8 @@ void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
        if (i == bus->dev_count)
                return;
-        new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) *
+        new_bus = kmalloc(struct_size(bus, range, bus->dev_count - 1),
-                          sizeof(struct kvm_io_range)), GFP_KERNEL);
+                          GFP_KERNEL_ACCOUNT);
        if (!new_bus)  {
                pr_err("kvm: failed to shrink bus, removing it completely\n");
                goto broken;
@@ -4029,7 +4034,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
        active = kvm_active_vms;
        spin_unlock(&kvm_lock);
-        env = kzalloc(sizeof(*env), GFP_KERNEL);
+        env = kzalloc(sizeof(*env), GFP_KERNEL_ACCOUNT);
        if (!env)
                return;
@@ -4045,7 +4050,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
        add_uevent_var(env, "PID=%d", kvm->userspace_pid);
        if (!IS_ERR_OR_NULL(kvm->debugfs_dentry)) {
-                char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL);
+                char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL_ACCOUNT);
                if (p) {
                        tmp = dentry_path_raw(kvm->debugfs_dentry, p, PATH_MAX);
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index d99850c462a1..524cbd20379f 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -219,7 +219,7 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
                        }
                }
-                kvg = kzalloc(sizeof(*kvg), GFP_KERNEL);
+                kvg = kzalloc(sizeof(*kvg), GFP_KERNEL_ACCOUNT);
                if (!kvg) {
                        mutex_unlock(&kv->lock);
                        kvm_vfio_group_put_external_user(vfio_group);
@@ -405,7 +405,7 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type)
                if (tmp->ops == &kvm_vfio_ops)
                        return -EBUSY;
-        kv = kzalloc(sizeof(*kv), GFP_KERNEL);
+        kv = kzalloc(sizeof(*kv), GFP_KERNEL_ACCOUNT);
        if (!kv)
                return -ENOMEM;
author	Linus Torvalds <torvalds@linux-foundation.org>	2019-03-15 18:00:28 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2019-03-15 18:00:28 -0400
commit	636deed6c0bc137a7c4f4a97ae1fcf0ad75323da (patch)
tree	7bd27189b8e30e3c1466f7730831a08db65f8646 /virt
parent	aa2e3ac64ace127f403be85aa4d6015b859385f2 (diff)
parent	4a605bc08e98381d8df61c30a4acb2eac15eb7da (diff)