summaryrefslogtreecommitdiffstats
path: root/virt
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-03-15 18:00:28 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2019-03-15 18:00:28 -0400
commit636deed6c0bc137a7c4f4a97ae1fcf0ad75323da (patch)
tree7bd27189b8e30e3c1466f7730831a08db65f8646 /virt
parentaa2e3ac64ace127f403be85aa4d6015b859385f2 (diff)
parent4a605bc08e98381d8df61c30a4acb2eac15eb7da (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini: "ARM: - some cleanups - direct physical timer assignment - cache sanitization for 32-bit guests s390: - interrupt cleanup - introduction of the Guest Information Block - preparation for processor subfunctions in cpu models PPC: - bug fixes and improvements, especially related to machine checks and protection keys x86: - many, many cleanups, including removing a bunch of MMU code for unnecessary optimizations - AVIC fixes Generic: - memcg accounting" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (147 commits) kvm: vmx: fix formatting of a comment KVM: doc: Document the life cycle of a VM and its resources MAINTAINERS: Add KVM selftests to existing KVM entry Revert "KVM/MMU: Flush tlb directly in the kvm_zap_gfn_range()" KVM: PPC: Book3S: Add count cache flush parameters to kvmppc_get_cpu_char() KVM: PPC: Fix compilation when KVM is not enabled KVM: Minor cleanups for kvm_main.c KVM: s390: add debug logging for cpu model subfunctions KVM: s390: implement subfunction processor calls arm64: KVM: Fix architecturally invalid reset value for FPEXC32_EL2 KVM: arm/arm64: Remove unused timer variable KVM: PPC: Book3S: Improve KVM reference counting KVM: PPC: Book3S HV: Fix build failure without IOMMU support Revert "KVM: Eliminate extra function calls in kvm_get_dirty_log_protect()" x86: kvmguest: use TSC clocksource if invariant TSC is exposed KVM: Never start grow vCPU halt_poll_ns from value below halt_poll_ns_grow_start KVM: Expose the initial start value in grow_halt_poll_ns() as a module parameter KVM: grow_halt_poll_ns() should never shrink vCPU halt_poll_ns KVM: x86/mmu: Consolidate kvm_mmu_zap_all() and kvm_mmu_zap_mmio_sptes() KVM: x86/mmu: WARN if zapping a MMIO spte results in zapping children ...
Diffstat (limited to 'virt')
-rw-r--r--virt/kvm/arm/arch_timer.c608
-rw-r--r--virt/kvm/arm/arm.c64
-rw-r--r--virt/kvm/arm/hyp/vgic-v3-sr.c2
-rw-r--r--virt/kvm/arm/mmu.c20
-rw-r--r--virt/kvm/arm/trace.h107
-rw-r--r--virt/kvm/arm/vgic/vgic-v3.c4
-rw-r--r--virt/kvm/coalesced_mmio.c3
-rw-r--r--virt/kvm/eventfd.c7
-rw-r--r--virt/kvm/irqchip.c4
-rw-r--r--virt/kvm/kvm_main.c103
-rw-r--r--virt/kvm/vfio.c4
11 files changed, 631 insertions, 295 deletions
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index b07ac4614e1c..3417f2dbc366 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -25,6 +25,7 @@
25 25
26#include <clocksource/arm_arch_timer.h> 26#include <clocksource/arm_arch_timer.h>
27#include <asm/arch_timer.h> 27#include <asm/arch_timer.h>
28#include <asm/kvm_emulate.h>
28#include <asm/kvm_hyp.h> 29#include <asm/kvm_hyp.h>
29 30
30#include <kvm/arm_vgic.h> 31#include <kvm/arm_vgic.h>
@@ -34,7 +35,9 @@
34 35
35static struct timecounter *timecounter; 36static struct timecounter *timecounter;
36static unsigned int host_vtimer_irq; 37static unsigned int host_vtimer_irq;
38static unsigned int host_ptimer_irq;
37static u32 host_vtimer_irq_flags; 39static u32 host_vtimer_irq_flags;
40static u32 host_ptimer_irq_flags;
38 41
39static DEFINE_STATIC_KEY_FALSE(has_gic_active_state); 42static DEFINE_STATIC_KEY_FALSE(has_gic_active_state);
40 43
@@ -52,12 +55,34 @@ static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx);
52static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, 55static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
53 struct arch_timer_context *timer_ctx); 56 struct arch_timer_context *timer_ctx);
54static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx); 57static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx);
58static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
59 struct arch_timer_context *timer,
60 enum kvm_arch_timer_regs treg,
61 u64 val);
62static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
63 struct arch_timer_context *timer,
64 enum kvm_arch_timer_regs treg);
55 65
56u64 kvm_phys_timer_read(void) 66u64 kvm_phys_timer_read(void)
57{ 67{
58 return timecounter->cc->read(timecounter->cc); 68 return timecounter->cc->read(timecounter->cc);
59} 69}
60 70
71static void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
72{
73 if (has_vhe()) {
74 map->direct_vtimer = vcpu_vtimer(vcpu);
75 map->direct_ptimer = vcpu_ptimer(vcpu);
76 map->emul_ptimer = NULL;
77 } else {
78 map->direct_vtimer = vcpu_vtimer(vcpu);
79 map->direct_ptimer = NULL;
80 map->emul_ptimer = vcpu_ptimer(vcpu);
81 }
82
83 trace_kvm_get_timer_map(vcpu->vcpu_id, map);
84}
85
61static inline bool userspace_irqchip(struct kvm *kvm) 86static inline bool userspace_irqchip(struct kvm *kvm)
62{ 87{
63 return static_branch_unlikely(&userspace_irqchip_in_use) && 88 return static_branch_unlikely(&userspace_irqchip_in_use) &&
@@ -78,20 +103,27 @@ static void soft_timer_cancel(struct hrtimer *hrt)
78static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) 103static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
79{ 104{
80 struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; 105 struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
81 struct arch_timer_context *vtimer; 106 struct arch_timer_context *ctx;
107 struct timer_map map;
82 108
83 /* 109 /*
84 * We may see a timer interrupt after vcpu_put() has been called which 110 * We may see a timer interrupt after vcpu_put() has been called which
85 * sets the CPU's vcpu pointer to NULL, because even though the timer 111 * sets the CPU's vcpu pointer to NULL, because even though the timer
86 * has been disabled in vtimer_save_state(), the hardware interrupt 112 * has been disabled in timer_save_state(), the hardware interrupt
87 * signal may not have been retired from the interrupt controller yet. 113 * signal may not have been retired from the interrupt controller yet.
88 */ 114 */
89 if (!vcpu) 115 if (!vcpu)
90 return IRQ_HANDLED; 116 return IRQ_HANDLED;
91 117
92 vtimer = vcpu_vtimer(vcpu); 118 get_timer_map(vcpu, &map);
93 if (kvm_timer_should_fire(vtimer)) 119
94 kvm_timer_update_irq(vcpu, true, vtimer); 120 if (irq == host_vtimer_irq)
121 ctx = map.direct_vtimer;
122 else
123 ctx = map.direct_ptimer;
124
125 if (kvm_timer_should_fire(ctx))
126 kvm_timer_update_irq(vcpu, true, ctx);
95 127
96 if (userspace_irqchip(vcpu->kvm) && 128 if (userspace_irqchip(vcpu->kvm) &&
97 !static_branch_unlikely(&has_gic_active_state)) 129 !static_branch_unlikely(&has_gic_active_state))
@@ -122,7 +154,9 @@ static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx)
122 154
123static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx) 155static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx)
124{ 156{
125 return !(timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_IT_MASK) && 157 WARN_ON(timer_ctx && timer_ctx->loaded);
158 return timer_ctx &&
159 !(timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_IT_MASK) &&
126 (timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_ENABLE); 160 (timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_ENABLE);
127} 161}
128 162
@@ -132,21 +166,22 @@ static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx)
132 */ 166 */
133static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu) 167static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu)
134{ 168{
135 u64 min_virt = ULLONG_MAX, min_phys = ULLONG_MAX; 169 u64 min_delta = ULLONG_MAX;
136 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 170 int i;
137 struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
138 171
139 if (kvm_timer_irq_can_fire(vtimer)) 172 for (i = 0; i < NR_KVM_TIMERS; i++) {
140 min_virt = kvm_timer_compute_delta(vtimer); 173 struct arch_timer_context *ctx = &vcpu->arch.timer_cpu.timers[i];
141 174
142 if (kvm_timer_irq_can_fire(ptimer)) 175 WARN(ctx->loaded, "timer %d loaded\n", i);
143 min_phys = kvm_timer_compute_delta(ptimer); 176 if (kvm_timer_irq_can_fire(ctx))
177 min_delta = min(min_delta, kvm_timer_compute_delta(ctx));
178 }
144 179
145 /* If none of timers can fire, then return 0 */ 180 /* If none of timers can fire, then return 0 */
146 if ((min_virt == ULLONG_MAX) && (min_phys == ULLONG_MAX)) 181 if (min_delta == ULLONG_MAX)
147 return 0; 182 return 0;
148 183
149 return min(min_virt, min_phys); 184 return min_delta;
150} 185}
151 186
152static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt) 187static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt)
@@ -173,41 +208,58 @@ static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt)
173 return HRTIMER_NORESTART; 208 return HRTIMER_NORESTART;
174} 209}
175 210
176static enum hrtimer_restart kvm_phys_timer_expire(struct hrtimer *hrt) 211static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt)
177{ 212{
178 struct arch_timer_context *ptimer; 213 struct arch_timer_context *ctx;
179 struct arch_timer_cpu *timer;
180 struct kvm_vcpu *vcpu; 214 struct kvm_vcpu *vcpu;
181 u64 ns; 215 u64 ns;
182 216
183 timer = container_of(hrt, struct arch_timer_cpu, phys_timer); 217 ctx = container_of(hrt, struct arch_timer_context, hrtimer);
184 vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu); 218 vcpu = ctx->vcpu;
185 ptimer = vcpu_ptimer(vcpu); 219
220 trace_kvm_timer_hrtimer_expire(ctx);
186 221
187 /* 222 /*
188 * Check that the timer has really expired from the guest's 223 * Check that the timer has really expired from the guest's
189 * PoV (NTP on the host may have forced it to expire 224 * PoV (NTP on the host may have forced it to expire
190 * early). If not ready, schedule for a later time. 225 * early). If not ready, schedule for a later time.
191 */ 226 */
192 ns = kvm_timer_compute_delta(ptimer); 227 ns = kvm_timer_compute_delta(ctx);
193 if (unlikely(ns)) { 228 if (unlikely(ns)) {
194 hrtimer_forward_now(hrt, ns_to_ktime(ns)); 229 hrtimer_forward_now(hrt, ns_to_ktime(ns));
195 return HRTIMER_RESTART; 230 return HRTIMER_RESTART;
196 } 231 }
197 232
198 kvm_timer_update_irq(vcpu, true, ptimer); 233 kvm_timer_update_irq(vcpu, true, ctx);
199 return HRTIMER_NORESTART; 234 return HRTIMER_NORESTART;
200} 235}
201 236
202static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) 237static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
203{ 238{
239 enum kvm_arch_timers index;
204 u64 cval, now; 240 u64 cval, now;
205 241
242 if (!timer_ctx)
243 return false;
244
245 index = arch_timer_ctx_index(timer_ctx);
246
206 if (timer_ctx->loaded) { 247 if (timer_ctx->loaded) {
207 u32 cnt_ctl; 248 u32 cnt_ctl = 0;
249
250 switch (index) {
251 case TIMER_VTIMER:
252 cnt_ctl = read_sysreg_el0(cntv_ctl);
253 break;
254 case TIMER_PTIMER:
255 cnt_ctl = read_sysreg_el0(cntp_ctl);
256 break;
257 case NR_KVM_TIMERS:
258 /* GCC is braindead */
259 cnt_ctl = 0;
260 break;
261 }
208 262
209 /* Only the virtual timer can be loaded so far */
210 cnt_ctl = read_sysreg_el0(cntv_ctl);
211 return (cnt_ctl & ARCH_TIMER_CTRL_ENABLE) && 263 return (cnt_ctl & ARCH_TIMER_CTRL_ENABLE) &&
212 (cnt_ctl & ARCH_TIMER_CTRL_IT_STAT) && 264 (cnt_ctl & ARCH_TIMER_CTRL_IT_STAT) &&
213 !(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK); 265 !(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK);
@@ -224,13 +276,13 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
224 276
225bool kvm_timer_is_pending(struct kvm_vcpu *vcpu) 277bool kvm_timer_is_pending(struct kvm_vcpu *vcpu)
226{ 278{
227 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 279 struct timer_map map;
228 struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
229 280
230 if (kvm_timer_should_fire(vtimer)) 281 get_timer_map(vcpu, &map);
231 return true;
232 282
233 return kvm_timer_should_fire(ptimer); 283 return kvm_timer_should_fire(map.direct_vtimer) ||
284 kvm_timer_should_fire(map.direct_ptimer) ||
285 kvm_timer_should_fire(map.emul_ptimer);
234} 286}
235 287
236/* 288/*
@@ -269,77 +321,70 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
269 } 321 }
270} 322}
271 323
272/* Schedule the background timer for the emulated timer. */ 324static void timer_emulate(struct arch_timer_context *ctx)
273static void phys_timer_emulate(struct kvm_vcpu *vcpu)
274{ 325{
275 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 326 bool should_fire = kvm_timer_should_fire(ctx);
276 struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); 327
328 trace_kvm_timer_emulate(ctx, should_fire);
329
330 if (should_fire) {
331 kvm_timer_update_irq(ctx->vcpu, true, ctx);
332 return;
333 }
277 334
278 /* 335 /*
279 * If the timer can fire now, we don't need to have a soft timer 336 * If the timer can fire now, we don't need to have a soft timer
280 * scheduled for the future. If the timer cannot fire at all, 337 * scheduled for the future. If the timer cannot fire at all,
281 * then we also don't need a soft timer. 338 * then we also don't need a soft timer.
282 */ 339 */
283 if (kvm_timer_should_fire(ptimer) || !kvm_timer_irq_can_fire(ptimer)) { 340 if (!kvm_timer_irq_can_fire(ctx)) {
284 soft_timer_cancel(&timer->phys_timer); 341 soft_timer_cancel(&ctx->hrtimer);
285 return; 342 return;
286 } 343 }
287 344
288 soft_timer_start(&timer->phys_timer, kvm_timer_compute_delta(ptimer)); 345 soft_timer_start(&ctx->hrtimer, kvm_timer_compute_delta(ctx));
289} 346}
290 347
291/* 348static void timer_save_state(struct arch_timer_context *ctx)
292 * Check if there was a change in the timer state, so that we should either
293 * raise or lower the line level to the GIC or schedule a background timer to
294 * emulate the physical timer.
295 */
296static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
297{ 349{
298 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 350 struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu);
299 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 351 enum kvm_arch_timers index = arch_timer_ctx_index(ctx);
300 struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); 352 unsigned long flags;
301 bool level;
302 353
303 if (unlikely(!timer->enabled)) 354 if (!timer->enabled)
304 return; 355 return;
305 356
306 /* 357 local_irq_save(flags);
307 * The vtimer virtual interrupt is a 'mapped' interrupt, meaning part
308 * of its lifecycle is offloaded to the hardware, and we therefore may
309 * not have lowered the irq.level value before having to signal a new
310 * interrupt, but have to signal an interrupt every time the level is
311 * asserted.
312 */
313 level = kvm_timer_should_fire(vtimer);
314 kvm_timer_update_irq(vcpu, level, vtimer);
315 358
316 phys_timer_emulate(vcpu); 359 if (!ctx->loaded)
360 goto out;
317 361
318 if (kvm_timer_should_fire(ptimer) != ptimer->irq.level) 362 switch (index) {
319 kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer); 363 case TIMER_VTIMER:
320} 364 ctx->cnt_ctl = read_sysreg_el0(cntv_ctl);
365 ctx->cnt_cval = read_sysreg_el0(cntv_cval);
321 366
322static void vtimer_save_state(struct kvm_vcpu *vcpu) 367 /* Disable the timer */
323{ 368 write_sysreg_el0(0, cntv_ctl);
324 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 369 isb();
325 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
326 unsigned long flags;
327 370
328 local_irq_save(flags); 371 break;
372 case TIMER_PTIMER:
373 ctx->cnt_ctl = read_sysreg_el0(cntp_ctl);
374 ctx->cnt_cval = read_sysreg_el0(cntp_cval);
329 375
330 if (!vtimer->loaded) 376 /* Disable the timer */
331 goto out; 377 write_sysreg_el0(0, cntp_ctl);
378 isb();
332 379
333 if (timer->enabled) { 380 break;
334 vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl); 381 case NR_KVM_TIMERS:
335 vtimer->cnt_cval = read_sysreg_el0(cntv_cval); 382 BUG();
336 } 383 }
337 384
338 /* Disable the virtual timer */ 385 trace_kvm_timer_save_state(ctx);
339 write_sysreg_el0(0, cntv_ctl);
340 isb();
341 386
342 vtimer->loaded = false; 387 ctx->loaded = false;
343out: 388out:
344 local_irq_restore(flags); 389 local_irq_restore(flags);
345} 390}
@@ -349,67 +394,72 @@ out:
349 * thread is removed from its waitqueue and made runnable when there's a timer 394 * thread is removed from its waitqueue and made runnable when there's a timer
350 * interrupt to handle. 395 * interrupt to handle.
351 */ 396 */
352void kvm_timer_schedule(struct kvm_vcpu *vcpu) 397static void kvm_timer_blocking(struct kvm_vcpu *vcpu)
353{ 398{
354 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 399 struct arch_timer_cpu *timer = vcpu_timer(vcpu);
355 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 400 struct timer_map map;
356 struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
357
358 vtimer_save_state(vcpu);
359 401
360 /* 402 get_timer_map(vcpu, &map);
361 * No need to schedule a background timer if any guest timer has
362 * already expired, because kvm_vcpu_block will return before putting
363 * the thread to sleep.
364 */
365 if (kvm_timer_should_fire(vtimer) || kvm_timer_should_fire(ptimer))
366 return;
367 403
368 /* 404 /*
369 * If both timers are not capable of raising interrupts (disabled or 405 * If no timers are capable of raising interrupts (disabled or
370 * masked), then there's no more work for us to do. 406 * masked), then there's no more work for us to do.
371 */ 407 */
372 if (!kvm_timer_irq_can_fire(vtimer) && !kvm_timer_irq_can_fire(ptimer)) 408 if (!kvm_timer_irq_can_fire(map.direct_vtimer) &&
409 !kvm_timer_irq_can_fire(map.direct_ptimer) &&
410 !kvm_timer_irq_can_fire(map.emul_ptimer))
373 return; 411 return;
374 412
375 /* 413 /*
376 * The guest timers have not yet expired, schedule a background timer. 414 * At least one guest time will expire. Schedule a background timer.
377 * Set the earliest expiration time among the guest timers. 415 * Set the earliest expiration time among the guest timers.
378 */ 416 */
379 soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu)); 417 soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu));
380} 418}
381 419
382static void vtimer_restore_state(struct kvm_vcpu *vcpu) 420static void kvm_timer_unblocking(struct kvm_vcpu *vcpu)
383{ 421{
384 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 422 struct arch_timer_cpu *timer = vcpu_timer(vcpu);
385 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 423
424 soft_timer_cancel(&timer->bg_timer);
425}
426
427static void timer_restore_state(struct arch_timer_context *ctx)
428{
429 struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu);
430 enum kvm_arch_timers index = arch_timer_ctx_index(ctx);
386 unsigned long flags; 431 unsigned long flags;
387 432
433 if (!timer->enabled)
434 return;
435
388 local_irq_save(flags); 436 local_irq_save(flags);
389 437
390 if (vtimer->loaded) 438 if (ctx->loaded)
391 goto out; 439 goto out;
392 440
393 if (timer->enabled) { 441 switch (index) {
394 write_sysreg_el0(vtimer->cnt_cval, cntv_cval); 442 case TIMER_VTIMER:
443 write_sysreg_el0(ctx->cnt_cval, cntv_cval);
395 isb(); 444 isb();
396 write_sysreg_el0(vtimer->cnt_ctl, cntv_ctl); 445 write_sysreg_el0(ctx->cnt_ctl, cntv_ctl);
446 break;
447 case TIMER_PTIMER:
448 write_sysreg_el0(ctx->cnt_cval, cntp_cval);
449 isb();
450 write_sysreg_el0(ctx->cnt_ctl, cntp_ctl);
451 break;
452 case NR_KVM_TIMERS:
453 BUG();
397 } 454 }
398 455
399 vtimer->loaded = true; 456 trace_kvm_timer_restore_state(ctx);
457
458 ctx->loaded = true;
400out: 459out:
401 local_irq_restore(flags); 460 local_irq_restore(flags);
402} 461}
403 462
404void kvm_timer_unschedule(struct kvm_vcpu *vcpu)
405{
406 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
407
408 vtimer_restore_state(vcpu);
409
410 soft_timer_cancel(&timer->bg_timer);
411}
412
413static void set_cntvoff(u64 cntvoff) 463static void set_cntvoff(u64 cntvoff)
414{ 464{
415 u32 low = lower_32_bits(cntvoff); 465 u32 low = lower_32_bits(cntvoff);
@@ -425,23 +475,32 @@ static void set_cntvoff(u64 cntvoff)
425 kvm_call_hyp(__kvm_timer_set_cntvoff, low, high); 475 kvm_call_hyp(__kvm_timer_set_cntvoff, low, high);
426} 476}
427 477
428static inline void set_vtimer_irq_phys_active(struct kvm_vcpu *vcpu, bool active) 478static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, bool active)
429{ 479{
430 int r; 480 int r;
431 r = irq_set_irqchip_state(host_vtimer_irq, IRQCHIP_STATE_ACTIVE, active); 481 r = irq_set_irqchip_state(ctx->host_timer_irq, IRQCHIP_STATE_ACTIVE, active);
432 WARN_ON(r); 482 WARN_ON(r);
433} 483}
434 484
435static void kvm_timer_vcpu_load_gic(struct kvm_vcpu *vcpu) 485static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx)
436{ 486{
437 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 487 struct kvm_vcpu *vcpu = ctx->vcpu;
438 bool phys_active; 488 bool phys_active = false;
489
490 /*
491 * Update the timer output so that it is likely to match the
492 * state we're about to restore. If the timer expires between
493 * this point and the register restoration, we'll take the
494 * interrupt anyway.
495 */
496 kvm_timer_update_irq(ctx->vcpu, kvm_timer_should_fire(ctx), ctx);
439 497
440 if (irqchip_in_kernel(vcpu->kvm)) 498 if (irqchip_in_kernel(vcpu->kvm))
441 phys_active = kvm_vgic_map_is_active(vcpu, vtimer->irq.irq); 499 phys_active = kvm_vgic_map_is_active(vcpu, ctx->irq.irq);
442 else 500
443 phys_active = vtimer->irq.level; 501 phys_active |= ctx->irq.level;
444 set_vtimer_irq_phys_active(vcpu, phys_active); 502
503 set_timer_irq_phys_active(ctx, phys_active);
445} 504}
446 505
447static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu) 506static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
@@ -466,28 +525,32 @@ static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
466 525
467void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) 526void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
468{ 527{
469 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 528 struct arch_timer_cpu *timer = vcpu_timer(vcpu);
470 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 529 struct timer_map map;
471 struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
472 530
473 if (unlikely(!timer->enabled)) 531 if (unlikely(!timer->enabled))
474 return; 532 return;
475 533
476 if (static_branch_likely(&has_gic_active_state)) 534 get_timer_map(vcpu, &map);
477 kvm_timer_vcpu_load_gic(vcpu); 535
478 else 536 if (static_branch_likely(&has_gic_active_state)) {
537 kvm_timer_vcpu_load_gic(map.direct_vtimer);
538 if (map.direct_ptimer)
539 kvm_timer_vcpu_load_gic(map.direct_ptimer);
540 } else {
479 kvm_timer_vcpu_load_nogic(vcpu); 541 kvm_timer_vcpu_load_nogic(vcpu);
542 }
480 543
481 set_cntvoff(vtimer->cntvoff); 544 set_cntvoff(map.direct_vtimer->cntvoff);
482 545
483 vtimer_restore_state(vcpu); 546 kvm_timer_unblocking(vcpu);
484 547
485 /* Set the background timer for the physical timer emulation. */ 548 timer_restore_state(map.direct_vtimer);
486 phys_timer_emulate(vcpu); 549 if (map.direct_ptimer)
550 timer_restore_state(map.direct_ptimer);
487 551
488 /* If the timer fired while we weren't running, inject it now */ 552 if (map.emul_ptimer)
489 if (kvm_timer_should_fire(ptimer) != ptimer->irq.level) 553 timer_emulate(map.emul_ptimer);
490 kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer);
491} 554}
492 555
493bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) 556bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
@@ -509,15 +572,20 @@ bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
509 572
510void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) 573void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
511{ 574{
512 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 575 struct arch_timer_cpu *timer = vcpu_timer(vcpu);
576 struct timer_map map;
513 577
514 if (unlikely(!timer->enabled)) 578 if (unlikely(!timer->enabled))
515 return; 579 return;
516 580
517 vtimer_save_state(vcpu); 581 get_timer_map(vcpu, &map);
582
583 timer_save_state(map.direct_vtimer);
584 if (map.direct_ptimer)
585 timer_save_state(map.direct_ptimer);
518 586
519 /* 587 /*
520 * Cancel the physical timer emulation, because the only case where we 588 * Cancel soft timer emulation, because the only case where we
521 * need it after a vcpu_put is in the context of a sleeping VCPU, and 589 * need it after a vcpu_put is in the context of a sleeping VCPU, and
522 * in that case we already factor in the deadline for the physical 590 * in that case we already factor in the deadline for the physical
523 * timer when scheduling the bg_timer. 591 * timer when scheduling the bg_timer.
@@ -525,7 +593,11 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
525 * In any case, we re-schedule the hrtimer for the physical timer when 593 * In any case, we re-schedule the hrtimer for the physical timer when
526 * coming back to the VCPU thread in kvm_timer_vcpu_load(). 594 * coming back to the VCPU thread in kvm_timer_vcpu_load().
527 */ 595 */
528 soft_timer_cancel(&timer->phys_timer); 596 if (map.emul_ptimer)
597 soft_timer_cancel(&map.emul_ptimer->hrtimer);
598
599 if (swait_active(kvm_arch_vcpu_wq(vcpu)))
600 kvm_timer_blocking(vcpu);
529 601
530 /* 602 /*
531 * The kernel may decide to run userspace after calling vcpu_put, so 603 * The kernel may decide to run userspace after calling vcpu_put, so
@@ -534,8 +606,7 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
534 * counter of non-VHE case. For VHE, the virtual counter uses a fixed 606 * counter of non-VHE case. For VHE, the virtual counter uses a fixed
535 * virtual offset of zero, so no need to zero CNTVOFF_EL2 register. 607 * virtual offset of zero, so no need to zero CNTVOFF_EL2 register.
536 */ 608 */
537 if (!has_vhe()) 609 set_cntvoff(0);
538 set_cntvoff(0);
539} 610}
540 611
541/* 612/*
@@ -550,7 +621,7 @@ static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu)
550 if (!kvm_timer_should_fire(vtimer)) { 621 if (!kvm_timer_should_fire(vtimer)) {
551 kvm_timer_update_irq(vcpu, false, vtimer); 622 kvm_timer_update_irq(vcpu, false, vtimer);
552 if (static_branch_likely(&has_gic_active_state)) 623 if (static_branch_likely(&has_gic_active_state))
553 set_vtimer_irq_phys_active(vcpu, false); 624 set_timer_irq_phys_active(vtimer, false);
554 else 625 else
555 enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); 626 enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
556 } 627 }
@@ -558,7 +629,7 @@ static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu)
558 629
559void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) 630void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
560{ 631{
561 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 632 struct arch_timer_cpu *timer = vcpu_timer(vcpu);
562 633
563 if (unlikely(!timer->enabled)) 634 if (unlikely(!timer->enabled))
564 return; 635 return;
@@ -569,9 +640,10 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
569 640
570int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) 641int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
571{ 642{
572 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 643 struct arch_timer_cpu *timer = vcpu_timer(vcpu);
573 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 644 struct timer_map map;
574 struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); 645
646 get_timer_map(vcpu, &map);
575 647
576 /* 648 /*
577 * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 649 * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8
@@ -579,12 +651,22 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
579 * resets the timer to be disabled and unmasked and is compliant with 651 * resets the timer to be disabled and unmasked and is compliant with
580 * the ARMv7 architecture. 652 * the ARMv7 architecture.
581 */ 653 */
582 vtimer->cnt_ctl = 0; 654 vcpu_vtimer(vcpu)->cnt_ctl = 0;
583 ptimer->cnt_ctl = 0; 655 vcpu_ptimer(vcpu)->cnt_ctl = 0;
584 kvm_timer_update_state(vcpu);
585 656
586 if (timer->enabled && irqchip_in_kernel(vcpu->kvm)) 657 if (timer->enabled) {
587 kvm_vgic_reset_mapped_irq(vcpu, vtimer->irq.irq); 658 kvm_timer_update_irq(vcpu, false, vcpu_vtimer(vcpu));
659 kvm_timer_update_irq(vcpu, false, vcpu_ptimer(vcpu));
660
661 if (irqchip_in_kernel(vcpu->kvm)) {
662 kvm_vgic_reset_mapped_irq(vcpu, map.direct_vtimer->irq.irq);
663 if (map.direct_ptimer)
664 kvm_vgic_reset_mapped_irq(vcpu, map.direct_ptimer->irq.irq);
665 }
666 }
667
668 if (map.emul_ptimer)
669 soft_timer_cancel(&map.emul_ptimer->hrtimer);
588 670
589 return 0; 671 return 0;
590} 672}
@@ -610,56 +692,76 @@ static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff)
610 692
611void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) 693void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
612{ 694{
613 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 695 struct arch_timer_cpu *timer = vcpu_timer(vcpu);
614 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 696 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
615 struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); 697 struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
616 698
617 /* Synchronize cntvoff across all vtimers of a VM. */ 699 /* Synchronize cntvoff across all vtimers of a VM. */
618 update_vtimer_cntvoff(vcpu, kvm_phys_timer_read()); 700 update_vtimer_cntvoff(vcpu, kvm_phys_timer_read());
619 vcpu_ptimer(vcpu)->cntvoff = 0; 701 ptimer->cntvoff = 0;
620 702
621 hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 703 hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
622 timer->bg_timer.function = kvm_bg_timer_expire; 704 timer->bg_timer.function = kvm_bg_timer_expire;
623 705
624 hrtimer_init(&timer->phys_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 706 hrtimer_init(&vtimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
625 timer->phys_timer.function = kvm_phys_timer_expire; 707 hrtimer_init(&ptimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
708 vtimer->hrtimer.function = kvm_hrtimer_expire;
709 ptimer->hrtimer.function = kvm_hrtimer_expire;
626 710
627 vtimer->irq.irq = default_vtimer_irq.irq; 711 vtimer->irq.irq = default_vtimer_irq.irq;
628 ptimer->irq.irq = default_ptimer_irq.irq; 712 ptimer->irq.irq = default_ptimer_irq.irq;
713
714 vtimer->host_timer_irq = host_vtimer_irq;
715 ptimer->host_timer_irq = host_ptimer_irq;
716
717 vtimer->host_timer_irq_flags = host_vtimer_irq_flags;
718 ptimer->host_timer_irq_flags = host_ptimer_irq_flags;
719
720 vtimer->vcpu = vcpu;
721 ptimer->vcpu = vcpu;
629} 722}
630 723
631static void kvm_timer_init_interrupt(void *info) 724static void kvm_timer_init_interrupt(void *info)
632{ 725{
633 enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); 726 enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
727 enable_percpu_irq(host_ptimer_irq, host_ptimer_irq_flags);
634} 728}
635 729
636int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) 730int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
637{ 731{
638 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 732 struct arch_timer_context *timer;
639 struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); 733 bool level;
640 734
641 switch (regid) { 735 switch (regid) {
642 case KVM_REG_ARM_TIMER_CTL: 736 case KVM_REG_ARM_TIMER_CTL:
643 vtimer->cnt_ctl = value & ~ARCH_TIMER_CTRL_IT_STAT; 737 timer = vcpu_vtimer(vcpu);
738 kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value);
644 break; 739 break;
645 case KVM_REG_ARM_TIMER_CNT: 740 case KVM_REG_ARM_TIMER_CNT:
741 timer = vcpu_vtimer(vcpu);
646 update_vtimer_cntvoff(vcpu, kvm_phys_timer_read() - value); 742 update_vtimer_cntvoff(vcpu, kvm_phys_timer_read() - value);
647 break; 743 break;
648 case KVM_REG_ARM_TIMER_CVAL: 744 case KVM_REG_ARM_TIMER_CVAL:
649 vtimer->cnt_cval = value; 745 timer = vcpu_vtimer(vcpu);
746 kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value);
650 break; 747 break;
651 case KVM_REG_ARM_PTIMER_CTL: 748 case KVM_REG_ARM_PTIMER_CTL:
652 ptimer->cnt_ctl = value & ~ARCH_TIMER_CTRL_IT_STAT; 749 timer = vcpu_ptimer(vcpu);
750 kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value);
653 break; 751 break;
654 case KVM_REG_ARM_PTIMER_CVAL: 752 case KVM_REG_ARM_PTIMER_CVAL:
655 ptimer->cnt_cval = value; 753 timer = vcpu_ptimer(vcpu);
754 kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value);
656 break; 755 break;
657 756
658 default: 757 default:
659 return -1; 758 return -1;
660 } 759 }
661 760
662 kvm_timer_update_state(vcpu); 761 level = kvm_timer_should_fire(timer);
762 kvm_timer_update_irq(vcpu, level, timer);
763 timer_emulate(timer);
764
663 return 0; 765 return 0;
664} 766}
665 767
@@ -679,26 +781,113 @@ static u64 read_timer_ctl(struct arch_timer_context *timer)
679 781
680u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) 782u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
681{ 783{
682 struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
683 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
684
685 switch (regid) { 784 switch (regid) {
686 case KVM_REG_ARM_TIMER_CTL: 785 case KVM_REG_ARM_TIMER_CTL:
687 return read_timer_ctl(vtimer); 786 return kvm_arm_timer_read(vcpu,
787 vcpu_vtimer(vcpu), TIMER_REG_CTL);
688 case KVM_REG_ARM_TIMER_CNT: 788 case KVM_REG_ARM_TIMER_CNT:
689 return kvm_phys_timer_read() - vtimer->cntvoff; 789 return kvm_arm_timer_read(vcpu,
790 vcpu_vtimer(vcpu), TIMER_REG_CNT);
690 case KVM_REG_ARM_TIMER_CVAL: 791 case KVM_REG_ARM_TIMER_CVAL:
691 return vtimer->cnt_cval; 792 return kvm_arm_timer_read(vcpu,
793 vcpu_vtimer(vcpu), TIMER_REG_CVAL);
692 case KVM_REG_ARM_PTIMER_CTL: 794 case KVM_REG_ARM_PTIMER_CTL:
693 return read_timer_ctl(ptimer); 795 return kvm_arm_timer_read(vcpu,
694 case KVM_REG_ARM_PTIMER_CVAL: 796 vcpu_ptimer(vcpu), TIMER_REG_CTL);
695 return ptimer->cnt_cval;
696 case KVM_REG_ARM_PTIMER_CNT: 797 case KVM_REG_ARM_PTIMER_CNT:
697 return kvm_phys_timer_read(); 798 return kvm_arm_timer_read(vcpu,
799 vcpu_vtimer(vcpu), TIMER_REG_CNT);
800 case KVM_REG_ARM_PTIMER_CVAL:
801 return kvm_arm_timer_read(vcpu,
802 vcpu_ptimer(vcpu), TIMER_REG_CVAL);
698 } 803 }
699 return (u64)-1; 804 return (u64)-1;
700} 805}
701 806
807static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
808 struct arch_timer_context *timer,
809 enum kvm_arch_timer_regs treg)
810{
811 u64 val;
812
813 switch (treg) {
814 case TIMER_REG_TVAL:
815 val = kvm_phys_timer_read() - timer->cntvoff - timer->cnt_cval;
816 break;
817
818 case TIMER_REG_CTL:
819 val = read_timer_ctl(timer);
820 break;
821
822 case TIMER_REG_CVAL:
823 val = timer->cnt_cval;
824 break;
825
826 case TIMER_REG_CNT:
827 val = kvm_phys_timer_read() - timer->cntvoff;
828 break;
829
830 default:
831 BUG();
832 }
833
834 return val;
835}
836
837u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu,
838 enum kvm_arch_timers tmr,
839 enum kvm_arch_timer_regs treg)
840{
841 u64 val;
842
843 preempt_disable();
844 kvm_timer_vcpu_put(vcpu);
845
846 val = kvm_arm_timer_read(vcpu, vcpu_get_timer(vcpu, tmr), treg);
847
848 kvm_timer_vcpu_load(vcpu);
849 preempt_enable();
850
851 return val;
852}
853
854static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
855 struct arch_timer_context *timer,
856 enum kvm_arch_timer_regs treg,
857 u64 val)
858{
859 switch (treg) {
860 case TIMER_REG_TVAL:
861 timer->cnt_cval = val - kvm_phys_timer_read() - timer->cntvoff;
862 break;
863
864 case TIMER_REG_CTL:
865 timer->cnt_ctl = val & ~ARCH_TIMER_CTRL_IT_STAT;
866 break;
867
868 case TIMER_REG_CVAL:
869 timer->cnt_cval = val;
870 break;
871
872 default:
873 BUG();
874 }
875}
876
877void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu,
878 enum kvm_arch_timers tmr,
879 enum kvm_arch_timer_regs treg,
880 u64 val)
881{
882 preempt_disable();
883 kvm_timer_vcpu_put(vcpu);
884
885 kvm_arm_timer_write(vcpu, vcpu_get_timer(vcpu, tmr), treg, val);
886
887 kvm_timer_vcpu_load(vcpu);
888 preempt_enable();
889}
890
702static int kvm_timer_starting_cpu(unsigned int cpu) 891static int kvm_timer_starting_cpu(unsigned int cpu)
703{ 892{
704 kvm_timer_init_interrupt(NULL); 893 kvm_timer_init_interrupt(NULL);
@@ -724,6 +913,8 @@ int kvm_timer_hyp_init(bool has_gic)
724 return -ENODEV; 913 return -ENODEV;
725 } 914 }
726 915
916 /* First, do the virtual EL1 timer irq */
917
727 if (info->virtual_irq <= 0) { 918 if (info->virtual_irq <= 0) {
728 kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n", 919 kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n",
729 info->virtual_irq); 920 info->virtual_irq);
@@ -734,15 +925,15 @@ int kvm_timer_hyp_init(bool has_gic)
734 host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq); 925 host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq);
735 if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH && 926 if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH &&
736 host_vtimer_irq_flags != IRQF_TRIGGER_LOW) { 927 host_vtimer_irq_flags != IRQF_TRIGGER_LOW) {
737 kvm_err("Invalid trigger for IRQ%d, assuming level low\n", 928 kvm_err("Invalid trigger for vtimer IRQ%d, assuming level low\n",
738 host_vtimer_irq); 929 host_vtimer_irq);
739 host_vtimer_irq_flags = IRQF_TRIGGER_LOW; 930 host_vtimer_irq_flags = IRQF_TRIGGER_LOW;
740 } 931 }
741 932
742 err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler, 933 err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler,
743 "kvm guest timer", kvm_get_running_vcpus()); 934 "kvm guest vtimer", kvm_get_running_vcpus());
744 if (err) { 935 if (err) {
745 kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n", 936 kvm_err("kvm_arch_timer: can't request vtimer interrupt %d (%d)\n",
746 host_vtimer_irq, err); 937 host_vtimer_irq, err);
747 return err; 938 return err;
748 } 939 }
@@ -760,6 +951,43 @@ int kvm_timer_hyp_init(bool has_gic)
760 951
761 kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq); 952 kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq);
762 953
954 /* Now let's do the physical EL1 timer irq */
955
956 if (info->physical_irq > 0) {
957 host_ptimer_irq = info->physical_irq;
958 host_ptimer_irq_flags = irq_get_trigger_type(host_ptimer_irq);
959 if (host_ptimer_irq_flags != IRQF_TRIGGER_HIGH &&
960 host_ptimer_irq_flags != IRQF_TRIGGER_LOW) {
961 kvm_err("Invalid trigger for ptimer IRQ%d, assuming level low\n",
962 host_ptimer_irq);
963 host_ptimer_irq_flags = IRQF_TRIGGER_LOW;
964 }
965
966 err = request_percpu_irq(host_ptimer_irq, kvm_arch_timer_handler,
967 "kvm guest ptimer", kvm_get_running_vcpus());
968 if (err) {
969 kvm_err("kvm_arch_timer: can't request ptimer interrupt %d (%d)\n",
970 host_ptimer_irq, err);
971 return err;
972 }
973
974 if (has_gic) {
975 err = irq_set_vcpu_affinity(host_ptimer_irq,
976 kvm_get_running_vcpus());
977 if (err) {
978 kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
979 goto out_free_irq;
980 }
981 }
982
983 kvm_debug("physical timer IRQ%d\n", host_ptimer_irq);
984 } else if (has_vhe()) {
985 kvm_err("kvm_arch_timer: invalid physical timer IRQ: %d\n",
986 info->physical_irq);
987 err = -ENODEV;
988 goto out_free_irq;
989 }
990
763 cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING, 991 cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING,
764 "kvm/arm/timer:starting", kvm_timer_starting_cpu, 992 "kvm/arm/timer:starting", kvm_timer_starting_cpu,
765 kvm_timer_dying_cpu); 993 kvm_timer_dying_cpu);
@@ -771,7 +999,7 @@ out_free_irq:
771 999
772void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) 1000void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
773{ 1001{
774 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 1002 struct arch_timer_cpu *timer = vcpu_timer(vcpu);
775 1003
776 soft_timer_cancel(&timer->bg_timer); 1004 soft_timer_cancel(&timer->bg_timer);
777} 1005}
@@ -807,16 +1035,18 @@ bool kvm_arch_timer_get_input_level(int vintid)
807 1035
808 if (vintid == vcpu_vtimer(vcpu)->irq.irq) 1036 if (vintid == vcpu_vtimer(vcpu)->irq.irq)
809 timer = vcpu_vtimer(vcpu); 1037 timer = vcpu_vtimer(vcpu);
1038 else if (vintid == vcpu_ptimer(vcpu)->irq.irq)
1039 timer = vcpu_ptimer(vcpu);
810 else 1040 else
811 BUG(); /* We only map the vtimer so far */ 1041 BUG();
812 1042
813 return kvm_timer_should_fire(timer); 1043 return kvm_timer_should_fire(timer);
814} 1044}
815 1045
816int kvm_timer_enable(struct kvm_vcpu *vcpu) 1046int kvm_timer_enable(struct kvm_vcpu *vcpu)
817{ 1047{
818 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 1048 struct arch_timer_cpu *timer = vcpu_timer(vcpu);
819 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 1049 struct timer_map map;
820 int ret; 1050 int ret;
821 1051
822 if (timer->enabled) 1052 if (timer->enabled)
@@ -834,19 +1064,33 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
834 return -EINVAL; 1064 return -EINVAL;
835 } 1065 }
836 1066
837 ret = kvm_vgic_map_phys_irq(vcpu, host_vtimer_irq, vtimer->irq.irq, 1067 get_timer_map(vcpu, &map);
1068
1069 ret = kvm_vgic_map_phys_irq(vcpu,
1070 map.direct_vtimer->host_timer_irq,
1071 map.direct_vtimer->irq.irq,
838 kvm_arch_timer_get_input_level); 1072 kvm_arch_timer_get_input_level);
839 if (ret) 1073 if (ret)
840 return ret; 1074 return ret;
841 1075
1076 if (map.direct_ptimer) {
1077 ret = kvm_vgic_map_phys_irq(vcpu,
1078 map.direct_ptimer->host_timer_irq,
1079 map.direct_ptimer->irq.irq,
1080 kvm_arch_timer_get_input_level);
1081 }
1082
1083 if (ret)
1084 return ret;
1085
842no_vgic: 1086no_vgic:
843 timer->enabled = 1; 1087 timer->enabled = 1;
844 return 0; 1088 return 0;
845} 1089}
846 1090
847/* 1091/*
848 * On VHE system, we only need to configure trap on physical timer and counter 1092 * On VHE system, we only need to configure the EL2 timer trap register once,
849 * accesses in EL0 and EL1 once, not for every world switch. 1093 * not for every world switch.
850 * The host kernel runs at EL2 with HCR_EL2.TGE == 1, 1094 * The host kernel runs at EL2 with HCR_EL2.TGE == 1,
851 * and this makes those bits have no effect for the host kernel execution. 1095 * and this makes those bits have no effect for the host kernel execution.
852 */ 1096 */
@@ -857,11 +1101,11 @@ void kvm_timer_init_vhe(void)
857 u64 val; 1101 u64 val;
858 1102
859 /* 1103 /*
860 * Disallow physical timer access for the guest. 1104 * VHE systems allow the guest direct access to the EL1 physical
861 * Physical counter access is allowed. 1105 * timer/counter.
862 */ 1106 */
863 val = read_sysreg(cnthctl_el2); 1107 val = read_sysreg(cnthctl_el2);
864 val &= ~(CNTHCTL_EL1PCEN << cnthctl_shift); 1108 val |= (CNTHCTL_EL1PCEN << cnthctl_shift);
865 val |= (CNTHCTL_EL1PCTEN << cnthctl_shift); 1109 val |= (CNTHCTL_EL1PCTEN << cnthctl_shift);
866 write_sysreg(val, cnthctl_el2); 1110 write_sysreg(val, cnthctl_el2);
867} 1111}
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 9c486fad3f9f..99c37384ba7b 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -65,7 +65,6 @@ static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu);
65/* The VMID used in the VTTBR */ 65/* The VMID used in the VTTBR */
66static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); 66static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
67static u32 kvm_next_vmid; 67static u32 kvm_next_vmid;
68static unsigned int kvm_vmid_bits __read_mostly;
69static DEFINE_SPINLOCK(kvm_vmid_lock); 68static DEFINE_SPINLOCK(kvm_vmid_lock);
70 69
71static bool vgic_present; 70static bool vgic_present;
@@ -142,7 +141,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
142 kvm_vgic_early_init(kvm); 141 kvm_vgic_early_init(kvm);
143 142
144 /* Mark the initial VMID generation invalid */ 143 /* Mark the initial VMID generation invalid */
145 kvm->arch.vmid_gen = 0; 144 kvm->arch.vmid.vmid_gen = 0;
146 145
147 /* The maximum number of VCPUs is limited by the host's GIC model */ 146 /* The maximum number of VCPUs is limited by the host's GIC model */
148 kvm->arch.max_vcpus = vgic_present ? 147 kvm->arch.max_vcpus = vgic_present ?
@@ -336,13 +335,11 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
336 335
337void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) 336void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
338{ 337{
339 kvm_timer_schedule(vcpu);
340 kvm_vgic_v4_enable_doorbell(vcpu); 338 kvm_vgic_v4_enable_doorbell(vcpu);
341} 339}
342 340
343void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) 341void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
344{ 342{
345 kvm_timer_unschedule(vcpu);
346 kvm_vgic_v4_disable_doorbell(vcpu); 343 kvm_vgic_v4_disable_doorbell(vcpu);
347} 344}
348 345
@@ -472,37 +469,31 @@ void force_vm_exit(const cpumask_t *mask)
472 469
473/** 470/**
474 * need_new_vmid_gen - check that the VMID is still valid 471 * need_new_vmid_gen - check that the VMID is still valid
475 * @kvm: The VM's VMID to check 472 * @vmid: The VMID to check
476 * 473 *
477 * return true if there is a new generation of VMIDs being used 474 * return true if there is a new generation of VMIDs being used
478 * 475 *
479 * The hardware supports only 256 values with the value zero reserved for the 476 * The hardware supports a limited set of values with the value zero reserved
480 * host, so we check if an assigned value belongs to a previous generation, 477 * for the host, so we check if an assigned value belongs to a previous
481 * which which requires us to assign a new value. If we're the first to use a 478 * generation, which which requires us to assign a new value. If we're the
482 * VMID for the new generation, we must flush necessary caches and TLBs on all 479 * first to use a VMID for the new generation, we must flush necessary caches
483 * CPUs. 480 * and TLBs on all CPUs.
484 */ 481 */
485static bool need_new_vmid_gen(struct kvm *kvm) 482static bool need_new_vmid_gen(struct kvm_vmid *vmid)
486{ 483{
487 u64 current_vmid_gen = atomic64_read(&kvm_vmid_gen); 484 u64 current_vmid_gen = atomic64_read(&kvm_vmid_gen);
488 smp_rmb(); /* Orders read of kvm_vmid_gen and kvm->arch.vmid */ 485 smp_rmb(); /* Orders read of kvm_vmid_gen and kvm->arch.vmid */
489 return unlikely(READ_ONCE(kvm->arch.vmid_gen) != current_vmid_gen); 486 return unlikely(READ_ONCE(vmid->vmid_gen) != current_vmid_gen);
490} 487}
491 488
492/** 489/**
493 * update_vttbr - Update the VTTBR with a valid VMID before the guest runs 490 * update_vmid - Update the vmid with a valid VMID for the current generation
494 * @kvm The guest that we are about to run 491 * @kvm: The guest that struct vmid belongs to
495 * 492 * @vmid: The stage-2 VMID information struct
496 * Called from kvm_arch_vcpu_ioctl_run before entering the guest to ensure the
497 * VM has a valid VMID, otherwise assigns a new one and flushes corresponding
498 * caches and TLBs.
499 */ 493 */
500static void update_vttbr(struct kvm *kvm) 494static void update_vmid(struct kvm_vmid *vmid)
501{ 495{
502 phys_addr_t pgd_phys; 496 if (!need_new_vmid_gen(vmid))
503 u64 vmid, cnp = kvm_cpu_has_cnp() ? VTTBR_CNP_BIT : 0;
504
505 if (!need_new_vmid_gen(kvm))
506 return; 497 return;
507 498
508 spin_lock(&kvm_vmid_lock); 499 spin_lock(&kvm_vmid_lock);
@@ -512,7 +503,7 @@ static void update_vttbr(struct kvm *kvm)
512 * already allocated a valid vmid for this vm, then this vcpu should 503 * already allocated a valid vmid for this vm, then this vcpu should
513 * use the same vmid. 504 * use the same vmid.
514 */ 505 */
515 if (!need_new_vmid_gen(kvm)) { 506 if (!need_new_vmid_gen(vmid)) {
516 spin_unlock(&kvm_vmid_lock); 507 spin_unlock(&kvm_vmid_lock);
517 return; 508 return;
518 } 509 }
@@ -536,18 +527,12 @@ static void update_vttbr(struct kvm *kvm)
536 kvm_call_hyp(__kvm_flush_vm_context); 527 kvm_call_hyp(__kvm_flush_vm_context);
537 } 528 }
538 529
539 kvm->arch.vmid = kvm_next_vmid; 530 vmid->vmid = kvm_next_vmid;
540 kvm_next_vmid++; 531 kvm_next_vmid++;
541 kvm_next_vmid &= (1 << kvm_vmid_bits) - 1; 532 kvm_next_vmid &= (1 << kvm_get_vmid_bits()) - 1;
542
543 /* update vttbr to be used with the new vmid */
544 pgd_phys = virt_to_phys(kvm->arch.pgd);
545 BUG_ON(pgd_phys & ~kvm_vttbr_baddr_mask(kvm));
546 vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits);
547 kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid | cnp;
548 533
549 smp_wmb(); 534 smp_wmb();
550 WRITE_ONCE(kvm->arch.vmid_gen, atomic64_read(&kvm_vmid_gen)); 535 WRITE_ONCE(vmid->vmid_gen, atomic64_read(&kvm_vmid_gen));
551 536
552 spin_unlock(&kvm_vmid_lock); 537 spin_unlock(&kvm_vmid_lock);
553} 538}
@@ -700,7 +685,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
700 */ 685 */
701 cond_resched(); 686 cond_resched();
702 687
703 update_vttbr(vcpu->kvm); 688 update_vmid(&vcpu->kvm->arch.vmid);
704 689
705 check_vcpu_requests(vcpu); 690 check_vcpu_requests(vcpu);
706 691
@@ -749,7 +734,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
749 */ 734 */
750 smp_store_mb(vcpu->mode, IN_GUEST_MODE); 735 smp_store_mb(vcpu->mode, IN_GUEST_MODE);
751 736
752 if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) || 737 if (ret <= 0 || need_new_vmid_gen(&vcpu->kvm->arch.vmid) ||
753 kvm_request_pending(vcpu)) { 738 kvm_request_pending(vcpu)) {
754 vcpu->mode = OUTSIDE_GUEST_MODE; 739 vcpu->mode = OUTSIDE_GUEST_MODE;
755 isb(); /* Ensure work in x_flush_hwstate is committed */ 740 isb(); /* Ensure work in x_flush_hwstate is committed */
@@ -775,7 +760,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
775 ret = kvm_vcpu_run_vhe(vcpu); 760 ret = kvm_vcpu_run_vhe(vcpu);
776 kvm_arm_vhe_guest_exit(); 761 kvm_arm_vhe_guest_exit();
777 } else { 762 } else {
778 ret = kvm_call_hyp(__kvm_vcpu_run_nvhe, vcpu); 763 ret = kvm_call_hyp_ret(__kvm_vcpu_run_nvhe, vcpu);
779 } 764 }
780 765
781 vcpu->mode = OUTSIDE_GUEST_MODE; 766 vcpu->mode = OUTSIDE_GUEST_MODE;
@@ -1427,10 +1412,6 @@ static inline void hyp_cpu_pm_exit(void)
1427 1412
1428static int init_common_resources(void) 1413static int init_common_resources(void)
1429{ 1414{
1430 /* set size of VMID supported by CPU */
1431 kvm_vmid_bits = kvm_get_vmid_bits();
1432 kvm_info("%d-bit VMID\n", kvm_vmid_bits);
1433
1434 kvm_set_ipa_limit(); 1415 kvm_set_ipa_limit();
1435 1416
1436 return 0; 1417 return 0;
@@ -1571,6 +1552,7 @@ static int init_hyp_mode(void)
1571 kvm_cpu_context_t *cpu_ctxt; 1552 kvm_cpu_context_t *cpu_ctxt;
1572 1553
1573 cpu_ctxt = per_cpu_ptr(&kvm_host_cpu_state, cpu); 1554 cpu_ctxt = per_cpu_ptr(&kvm_host_cpu_state, cpu);
1555 kvm_init_host_cpu_context(cpu_ctxt, cpu);
1574 err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1, PAGE_HYP); 1556 err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1, PAGE_HYP);
1575 1557
1576 if (err) { 1558 if (err) {
@@ -1581,7 +1563,7 @@ static int init_hyp_mode(void)
1581 1563
1582 err = hyp_map_aux_data(); 1564 err = hyp_map_aux_data();
1583 if (err) 1565 if (err)
1584 kvm_err("Cannot map host auxilary data: %d\n", err); 1566 kvm_err("Cannot map host auxiliary data: %d\n", err);
1585 1567
1586 return 0; 1568 return 0;
1587 1569
diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c
index 9652c453480f..264d92da3240 100644
--- a/virt/kvm/arm/hyp/vgic-v3-sr.c
+++ b/virt/kvm/arm/hyp/vgic-v3-sr.c
@@ -226,7 +226,7 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
226 int i; 226 int i;
227 u32 elrsr; 227 u32 elrsr;
228 228
229 elrsr = read_gicreg(ICH_ELSR_EL2); 229 elrsr = read_gicreg(ICH_ELRSR_EL2);
230 230
231 write_gicreg(cpu_if->vgic_hcr & ~ICH_HCR_EN, ICH_HCR_EL2); 231 write_gicreg(cpu_if->vgic_hcr & ~ICH_HCR_EN, ICH_HCR_EL2);
232 232
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index e9d28a7ca673..ffd7acdceac7 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -908,6 +908,7 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
908 */ 908 */
909int kvm_alloc_stage2_pgd(struct kvm *kvm) 909int kvm_alloc_stage2_pgd(struct kvm *kvm)
910{ 910{
911 phys_addr_t pgd_phys;
911 pgd_t *pgd; 912 pgd_t *pgd;
912 913
913 if (kvm->arch.pgd != NULL) { 914 if (kvm->arch.pgd != NULL) {
@@ -920,7 +921,12 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
920 if (!pgd) 921 if (!pgd)
921 return -ENOMEM; 922 return -ENOMEM;
922 923
924 pgd_phys = virt_to_phys(pgd);
925 if (WARN_ON(pgd_phys & ~kvm_vttbr_baddr_mask(kvm)))
926 return -EINVAL;
927
923 kvm->arch.pgd = pgd; 928 kvm->arch.pgd = pgd;
929 kvm->arch.pgd_phys = pgd_phys;
924 return 0; 930 return 0;
925} 931}
926 932
@@ -1008,6 +1014,7 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
1008 unmap_stage2_range(kvm, 0, kvm_phys_size(kvm)); 1014 unmap_stage2_range(kvm, 0, kvm_phys_size(kvm));
1009 pgd = READ_ONCE(kvm->arch.pgd); 1015 pgd = READ_ONCE(kvm->arch.pgd);
1010 kvm->arch.pgd = NULL; 1016 kvm->arch.pgd = NULL;
1017 kvm->arch.pgd_phys = 0;
1011 } 1018 }
1012 spin_unlock(&kvm->mmu_lock); 1019 spin_unlock(&kvm->mmu_lock);
1013 1020
@@ -1396,14 +1403,6 @@ static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap)
1396 return false; 1403 return false;
1397} 1404}
1398 1405
1399static bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
1400{
1401 if (kvm_vcpu_trap_is_iabt(vcpu))
1402 return false;
1403
1404 return kvm_vcpu_dabt_iswrite(vcpu);
1405}
1406
1407/** 1406/**
1408 * stage2_wp_ptes - write protect PMD range 1407 * stage2_wp_ptes - write protect PMD range
1409 * @pmd: pointer to pmd entry 1408 * @pmd: pointer to pmd entry
@@ -1598,14 +1597,13 @@ static void kvm_send_hwpoison_signal(unsigned long address,
1598static bool fault_supports_stage2_pmd_mappings(struct kvm_memory_slot *memslot, 1597static bool fault_supports_stage2_pmd_mappings(struct kvm_memory_slot *memslot,
1599 unsigned long hva) 1598 unsigned long hva)
1600{ 1599{
1601 gpa_t gpa_start, gpa_end; 1600 gpa_t gpa_start;
1602 hva_t uaddr_start, uaddr_end; 1601 hva_t uaddr_start, uaddr_end;
1603 size_t size; 1602 size_t size;
1604 1603
1605 size = memslot->npages * PAGE_SIZE; 1604 size = memslot->npages * PAGE_SIZE;
1606 1605
1607 gpa_start = memslot->base_gfn << PAGE_SHIFT; 1606 gpa_start = memslot->base_gfn << PAGE_SHIFT;
1608 gpa_end = gpa_start + size;
1609 1607
1610 uaddr_start = memslot->userspace_addr; 1608 uaddr_start = memslot->userspace_addr;
1611 uaddr_end = uaddr_start + size; 1609 uaddr_end = uaddr_start + size;
@@ -2353,7 +2351,7 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2353 return 0; 2351 return 0;
2354} 2352}
2355 2353
2356void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) 2354void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
2357{ 2355{
2358} 2356}
2359 2357
diff --git a/virt/kvm/arm/trace.h b/virt/kvm/arm/trace.h
index 3828beab93f2..204d210d01c2 100644
--- a/virt/kvm/arm/trace.h
+++ b/virt/kvm/arm/trace.h
@@ -2,6 +2,7 @@
2#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ) 2#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
3#define _TRACE_KVM_H 3#define _TRACE_KVM_H
4 4
5#include <kvm/arm_arch_timer.h>
5#include <linux/tracepoint.h> 6#include <linux/tracepoint.h>
6 7
7#undef TRACE_SYSTEM 8#undef TRACE_SYSTEM
@@ -262,10 +263,114 @@ TRACE_EVENT(kvm_timer_update_irq,
262 __entry->vcpu_id, __entry->irq, __entry->level) 263 __entry->vcpu_id, __entry->irq, __entry->level)
263); 264);
264 265
266TRACE_EVENT(kvm_get_timer_map,
267 TP_PROTO(unsigned long vcpu_id, struct timer_map *map),
268 TP_ARGS(vcpu_id, map),
269
270 TP_STRUCT__entry(
271 __field( unsigned long, vcpu_id )
272 __field( int, direct_vtimer )
273 __field( int, direct_ptimer )
274 __field( int, emul_ptimer )
275 ),
276
277 TP_fast_assign(
278 __entry->vcpu_id = vcpu_id;
279 __entry->direct_vtimer = arch_timer_ctx_index(map->direct_vtimer);
280 __entry->direct_ptimer =
281 (map->direct_ptimer) ? arch_timer_ctx_index(map->direct_ptimer) : -1;
282 __entry->emul_ptimer =
283 (map->emul_ptimer) ? arch_timer_ctx_index(map->emul_ptimer) : -1;
284 ),
285
286 TP_printk("VCPU: %ld, dv: %d, dp: %d, ep: %d",
287 __entry->vcpu_id,
288 __entry->direct_vtimer,
289 __entry->direct_ptimer,
290 __entry->emul_ptimer)
291);
292
293TRACE_EVENT(kvm_timer_save_state,
294 TP_PROTO(struct arch_timer_context *ctx),
295 TP_ARGS(ctx),
296
297 TP_STRUCT__entry(
298 __field( unsigned long, ctl )
299 __field( unsigned long long, cval )
300 __field( int, timer_idx )
301 ),
302
303 TP_fast_assign(
304 __entry->ctl = ctx->cnt_ctl;
305 __entry->cval = ctx->cnt_cval;
306 __entry->timer_idx = arch_timer_ctx_index(ctx);
307 ),
308
309 TP_printk(" CTL: %#08lx CVAL: %#16llx arch_timer_ctx_index: %d",
310 __entry->ctl,
311 __entry->cval,
312 __entry->timer_idx)
313);
314
315TRACE_EVENT(kvm_timer_restore_state,
316 TP_PROTO(struct arch_timer_context *ctx),
317 TP_ARGS(ctx),
318
319 TP_STRUCT__entry(
320 __field( unsigned long, ctl )
321 __field( unsigned long long, cval )
322 __field( int, timer_idx )
323 ),
324
325 TP_fast_assign(
326 __entry->ctl = ctx->cnt_ctl;
327 __entry->cval = ctx->cnt_cval;
328 __entry->timer_idx = arch_timer_ctx_index(ctx);
329 ),
330
331 TP_printk("CTL: %#08lx CVAL: %#16llx arch_timer_ctx_index: %d",
332 __entry->ctl,
333 __entry->cval,
334 __entry->timer_idx)
335);
336
337TRACE_EVENT(kvm_timer_hrtimer_expire,
338 TP_PROTO(struct arch_timer_context *ctx),
339 TP_ARGS(ctx),
340
341 TP_STRUCT__entry(
342 __field( int, timer_idx )
343 ),
344
345 TP_fast_assign(
346 __entry->timer_idx = arch_timer_ctx_index(ctx);
347 ),
348
349 TP_printk("arch_timer_ctx_index: %d", __entry->timer_idx)
350);
351
352TRACE_EVENT(kvm_timer_emulate,
353 TP_PROTO(struct arch_timer_context *ctx, bool should_fire),
354 TP_ARGS(ctx, should_fire),
355
356 TP_STRUCT__entry(
357 __field( int, timer_idx )
358 __field( bool, should_fire )
359 ),
360
361 TP_fast_assign(
362 __entry->timer_idx = arch_timer_ctx_index(ctx);
363 __entry->should_fire = should_fire;
364 ),
365
366 TP_printk("arch_timer_ctx_index: %d (should_fire: %d)",
367 __entry->timer_idx, __entry->should_fire)
368);
369
265#endif /* _TRACE_KVM_H */ 370#endif /* _TRACE_KVM_H */
266 371
267#undef TRACE_INCLUDE_PATH 372#undef TRACE_INCLUDE_PATH
268#define TRACE_INCLUDE_PATH ../../../virt/kvm/arm 373#define TRACE_INCLUDE_PATH ../../virt/kvm/arm
269#undef TRACE_INCLUDE_FILE 374#undef TRACE_INCLUDE_FILE
270#define TRACE_INCLUDE_FILE trace 375#define TRACE_INCLUDE_FILE trace
271 376
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index 4ee0aeb9a905..408a78eb6a97 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -589,7 +589,7 @@ early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable);
589 */ 589 */
590int vgic_v3_probe(const struct gic_kvm_info *info) 590int vgic_v3_probe(const struct gic_kvm_info *info)
591{ 591{
592 u32 ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2); 592 u32 ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_ich_vtr_el2);
593 int ret; 593 int ret;
594 594
595 /* 595 /*
@@ -679,7 +679,7 @@ void vgic_v3_put(struct kvm_vcpu *vcpu)
679 struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; 679 struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
680 680
681 if (likely(cpu_if->vgic_sre)) 681 if (likely(cpu_if->vgic_sre))
682 cpu_if->vgic_vmcr = kvm_call_hyp(__vgic_v3_read_vmcr); 682 cpu_if->vgic_vmcr = kvm_call_hyp_ret(__vgic_v3_read_vmcr);
683 683
684 kvm_call_hyp(__vgic_v3_save_aprs, vcpu); 684 kvm_call_hyp(__vgic_v3_save_aprs, vcpu);
685 685
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index 6855cce3e528..5294abb3f178 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -144,7 +144,8 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
144 if (zone->pio != 1 && zone->pio != 0) 144 if (zone->pio != 1 && zone->pio != 0)
145 return -EINVAL; 145 return -EINVAL;
146 146
147 dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL); 147 dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev),
148 GFP_KERNEL_ACCOUNT);
148 if (!dev) 149 if (!dev)
149 return -ENOMEM; 150 return -ENOMEM;
150 151
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index b20b751286fc..4325250afd72 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -297,7 +297,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
297 if (!kvm_arch_intc_initialized(kvm)) 297 if (!kvm_arch_intc_initialized(kvm))
298 return -EAGAIN; 298 return -EAGAIN;
299 299
300 irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL); 300 irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL_ACCOUNT);
301 if (!irqfd) 301 if (!irqfd)
302 return -ENOMEM; 302 return -ENOMEM;
303 303
@@ -345,7 +345,8 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
345 } 345 }
346 346
347 if (!irqfd->resampler) { 347 if (!irqfd->resampler) {
348 resampler = kzalloc(sizeof(*resampler), GFP_KERNEL); 348 resampler = kzalloc(sizeof(*resampler),
349 GFP_KERNEL_ACCOUNT);
349 if (!resampler) { 350 if (!resampler) {
350 ret = -ENOMEM; 351 ret = -ENOMEM;
351 mutex_unlock(&kvm->irqfds.resampler_lock); 352 mutex_unlock(&kvm->irqfds.resampler_lock);
@@ -797,7 +798,7 @@ static int kvm_assign_ioeventfd_idx(struct kvm *kvm,
797 if (IS_ERR(eventfd)) 798 if (IS_ERR(eventfd))
798 return PTR_ERR(eventfd); 799 return PTR_ERR(eventfd);
799 800
800 p = kzalloc(sizeof(*p), GFP_KERNEL); 801 p = kzalloc(sizeof(*p), GFP_KERNEL_ACCOUNT);
801 if (!p) { 802 if (!p) {
802 ret = -ENOMEM; 803 ret = -ENOMEM;
803 goto fail; 804 goto fail;
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index b1286c4e0712..3547b0d8c91e 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -196,7 +196,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
196 nr_rt_entries += 1; 196 nr_rt_entries += 1;
197 197
198 new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head)), 198 new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head)),
199 GFP_KERNEL); 199 GFP_KERNEL_ACCOUNT);
200 200
201 if (!new) 201 if (!new)
202 return -ENOMEM; 202 return -ENOMEM;
@@ -208,7 +208,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
208 208
209 for (i = 0; i < nr; ++i) { 209 for (i = 0; i < nr; ++i) {
210 r = -ENOMEM; 210 r = -ENOMEM;
211 e = kzalloc(sizeof(*e), GFP_KERNEL); 211 e = kzalloc(sizeof(*e), GFP_KERNEL_ACCOUNT);
212 if (!e) 212 if (!e)
213 goto out; 213 goto out;
214 214
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d237d3350a99..f25aa98a94df 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -81,6 +81,11 @@ unsigned int halt_poll_ns_grow = 2;
81module_param(halt_poll_ns_grow, uint, 0644); 81module_param(halt_poll_ns_grow, uint, 0644);
82EXPORT_SYMBOL_GPL(halt_poll_ns_grow); 82EXPORT_SYMBOL_GPL(halt_poll_ns_grow);
83 83
84/* The start value to grow halt_poll_ns from */
85unsigned int halt_poll_ns_grow_start = 10000; /* 10us */
86module_param(halt_poll_ns_grow_start, uint, 0644);
87EXPORT_SYMBOL_GPL(halt_poll_ns_grow_start);
88
84/* Default resets per-vcpu halt_poll_ns . */ 89/* Default resets per-vcpu halt_poll_ns . */
85unsigned int halt_poll_ns_shrink; 90unsigned int halt_poll_ns_shrink;
86module_param(halt_poll_ns_shrink, uint, 0644); 91module_param(halt_poll_ns_shrink, uint, 0644);
@@ -525,7 +530,7 @@ static struct kvm_memslots *kvm_alloc_memslots(void)
525 int i; 530 int i;
526 struct kvm_memslots *slots; 531 struct kvm_memslots *slots;
527 532
528 slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); 533 slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL_ACCOUNT);
529 if (!slots) 534 if (!slots)
530 return NULL; 535 return NULL;
531 536
@@ -601,12 +606,12 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
601 606
602 kvm->debugfs_stat_data = kcalloc(kvm_debugfs_num_entries, 607 kvm->debugfs_stat_data = kcalloc(kvm_debugfs_num_entries,
603 sizeof(*kvm->debugfs_stat_data), 608 sizeof(*kvm->debugfs_stat_data),
604 GFP_KERNEL); 609 GFP_KERNEL_ACCOUNT);
605 if (!kvm->debugfs_stat_data) 610 if (!kvm->debugfs_stat_data)
606 return -ENOMEM; 611 return -ENOMEM;
607 612
608 for (p = debugfs_entries; p->name; p++) { 613 for (p = debugfs_entries; p->name; p++) {
609 stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL); 614 stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL_ACCOUNT);
610 if (!stat_data) 615 if (!stat_data)
611 return -ENOMEM; 616 return -ENOMEM;
612 617
@@ -656,12 +661,8 @@ static struct kvm *kvm_create_vm(unsigned long type)
656 struct kvm_memslots *slots = kvm_alloc_memslots(); 661 struct kvm_memslots *slots = kvm_alloc_memslots();
657 if (!slots) 662 if (!slots)
658 goto out_err_no_srcu; 663 goto out_err_no_srcu;
659 /* 664 /* Generations must be different for each address space. */
660 * Generations must be different for each address space. 665 slots->generation = i;
661 * Init kvm generation close to the maximum to easily test the
662 * code of handling generation number wrap-around.
663 */
664 slots->generation = i * 2 - 150;
665 rcu_assign_pointer(kvm->memslots[i], slots); 666 rcu_assign_pointer(kvm->memslots[i], slots);
666 } 667 }
667 668
@@ -671,7 +672,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
671 goto out_err_no_irq_srcu; 672 goto out_err_no_irq_srcu;
672 for (i = 0; i < KVM_NR_BUSES; i++) { 673 for (i = 0; i < KVM_NR_BUSES; i++) {
673 rcu_assign_pointer(kvm->buses[i], 674 rcu_assign_pointer(kvm->buses[i],
674 kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL)); 675 kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL_ACCOUNT));
675 if (!kvm->buses[i]) 676 if (!kvm->buses[i])
676 goto out_err; 677 goto out_err;
677 } 678 }
@@ -789,7 +790,7 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
789{ 790{
790 unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot); 791 unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot);
791 792
792 memslot->dirty_bitmap = kvzalloc(dirty_bytes, GFP_KERNEL); 793 memslot->dirty_bitmap = kvzalloc(dirty_bytes, GFP_KERNEL_ACCOUNT);
793 if (!memslot->dirty_bitmap) 794 if (!memslot->dirty_bitmap)
794 return -ENOMEM; 795 return -ENOMEM;
795 796
@@ -874,31 +875,34 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
874 int as_id, struct kvm_memslots *slots) 875 int as_id, struct kvm_memslots *slots)
875{ 876{
876 struct kvm_memslots *old_memslots = __kvm_memslots(kvm, as_id); 877 struct kvm_memslots *old_memslots = __kvm_memslots(kvm, as_id);
878 u64 gen = old_memslots->generation;
877 879
878 /* 880 WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS);
879 * Set the low bit in the generation, which disables SPTE caching 881 slots->generation = gen | KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS;
880 * until the end of synchronize_srcu_expedited.
881 */
882 WARN_ON(old_memslots->generation & 1);
883 slots->generation = old_memslots->generation + 1;
884 882
885 rcu_assign_pointer(kvm->memslots[as_id], slots); 883 rcu_assign_pointer(kvm->memslots[as_id], slots);
886 synchronize_srcu_expedited(&kvm->srcu); 884 synchronize_srcu_expedited(&kvm->srcu);
887 885
888 /* 886 /*
889 * Increment the new memslot generation a second time. This prevents 887 * Increment the new memslot generation a second time, dropping the
890 * vm exits that race with memslot updates from caching a memslot 888 * update in-progress flag and incrementing then generation based on
891 * generation that will (potentially) be valid forever. 889 * the number of address spaces. This provides a unique and easily
892 * 890 * identifiable generation number while the memslots are in flux.
891 */
892 gen = slots->generation & ~KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS;
893
894 /*
893 * Generations must be unique even across address spaces. We do not need 895 * Generations must be unique even across address spaces. We do not need
894 * a global counter for that, instead the generation space is evenly split 896 * a global counter for that, instead the generation space is evenly split
895 * across address spaces. For example, with two address spaces, address 897 * across address spaces. For example, with two address spaces, address
896 * space 0 will use generations 0, 4, 8, ... while * address space 1 will 898 * space 0 will use generations 0, 2, 4, ... while address space 1 will
897 * use generations 2, 6, 10, 14, ... 899 * use generations 1, 3, 5, ...
898 */ 900 */
899 slots->generation += KVM_ADDRESS_SPACE_NUM * 2 - 1; 901 gen += KVM_ADDRESS_SPACE_NUM;
902
903 kvm_arch_memslots_updated(kvm, gen);
900 904
901 kvm_arch_memslots_updated(kvm, slots); 905 slots->generation = gen;
902 906
903 return old_memslots; 907 return old_memslots;
904} 908}
@@ -1018,7 +1022,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
1018 goto out_free; 1022 goto out_free;
1019 } 1023 }
1020 1024
1021 slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); 1025 slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL_ACCOUNT);
1022 if (!slots) 1026 if (!slots)
1023 goto out_free; 1027 goto out_free;
1024 memcpy(slots, __kvm_memslots(kvm, as_id), sizeof(struct kvm_memslots)); 1028 memcpy(slots, __kvm_memslots(kvm, as_id), sizeof(struct kvm_memslots));
@@ -1201,11 +1205,9 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,
1201 mask = xchg(&dirty_bitmap[i], 0); 1205 mask = xchg(&dirty_bitmap[i], 0);
1202 dirty_bitmap_buffer[i] = mask; 1206 dirty_bitmap_buffer[i] = mask;
1203 1207
1204 if (mask) { 1208 offset = i * BITS_PER_LONG;
1205 offset = i * BITS_PER_LONG; 1209 kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
1206 kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, 1210 offset, mask);
1207 offset, mask);
1208 }
1209 } 1211 }
1210 spin_unlock(&kvm->mmu_lock); 1212 spin_unlock(&kvm->mmu_lock);
1211 } 1213 }
@@ -2185,20 +2187,23 @@ void kvm_sigset_deactivate(struct kvm_vcpu *vcpu)
2185 2187
2186static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) 2188static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
2187{ 2189{
2188 unsigned int old, val, grow; 2190 unsigned int old, val, grow, grow_start;
2189 2191
2190 old = val = vcpu->halt_poll_ns; 2192 old = val = vcpu->halt_poll_ns;
2193 grow_start = READ_ONCE(halt_poll_ns_grow_start);
2191 grow = READ_ONCE(halt_poll_ns_grow); 2194 grow = READ_ONCE(halt_poll_ns_grow);
2192 /* 10us base */ 2195 if (!grow)
2193 if (val == 0 && grow) 2196 goto out;
2194 val = 10000; 2197
2195 else 2198 val *= grow;
2196 val *= grow; 2199 if (val < grow_start)
2200 val = grow_start;
2197 2201
2198 if (val > halt_poll_ns) 2202 if (val > halt_poll_ns)
2199 val = halt_poll_ns; 2203 val = halt_poll_ns;
2200 2204
2201 vcpu->halt_poll_ns = val; 2205 vcpu->halt_poll_ns = val;
2206out:
2202 trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old); 2207 trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old);
2203} 2208}
2204 2209
@@ -2683,7 +2688,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
2683 struct kvm_regs *kvm_regs; 2688 struct kvm_regs *kvm_regs;
2684 2689
2685 r = -ENOMEM; 2690 r = -ENOMEM;
2686 kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); 2691 kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL_ACCOUNT);
2687 if (!kvm_regs) 2692 if (!kvm_regs)
2688 goto out; 2693 goto out;
2689 r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs); 2694 r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs);
@@ -2711,7 +2716,8 @@ out_free1:
2711 break; 2716 break;
2712 } 2717 }
2713 case KVM_GET_SREGS: { 2718 case KVM_GET_SREGS: {
2714 kvm_sregs = kzalloc(sizeof(struct kvm_sregs), GFP_KERNEL); 2719 kvm_sregs = kzalloc(sizeof(struct kvm_sregs),
2720 GFP_KERNEL_ACCOUNT);
2715 r = -ENOMEM; 2721 r = -ENOMEM;
2716 if (!kvm_sregs) 2722 if (!kvm_sregs)
2717 goto out; 2723 goto out;
@@ -2803,7 +2809,7 @@ out_free1:
2803 break; 2809 break;
2804 } 2810 }
2805 case KVM_GET_FPU: { 2811 case KVM_GET_FPU: {
2806 fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL); 2812 fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL_ACCOUNT);
2807 r = -ENOMEM; 2813 r = -ENOMEM;
2808 if (!fpu) 2814 if (!fpu)
2809 goto out; 2815 goto out;
@@ -2980,7 +2986,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
2980 if (test) 2986 if (test)
2981 return 0; 2987 return 0;
2982 2988
2983 dev = kzalloc(sizeof(*dev), GFP_KERNEL); 2989 dev = kzalloc(sizeof(*dev), GFP_KERNEL_ACCOUNT);
2984 if (!dev) 2990 if (!dev)
2985 return -ENOMEM; 2991 return -ENOMEM;
2986 2992
@@ -3625,6 +3631,7 @@ int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
3625 r = __kvm_io_bus_write(vcpu, bus, &range, val); 3631 r = __kvm_io_bus_write(vcpu, bus, &range, val);
3626 return r < 0 ? r : 0; 3632 return r < 0 ? r : 0;
3627} 3633}
3634EXPORT_SYMBOL_GPL(kvm_io_bus_write);
3628 3635
3629/* kvm_io_bus_write_cookie - called under kvm->slots_lock */ 3636/* kvm_io_bus_write_cookie - called under kvm->slots_lock */
3630int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, 3637int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx,
@@ -3675,7 +3682,6 @@ static int __kvm_io_bus_read(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus,
3675 3682
3676 return -EOPNOTSUPP; 3683 return -EOPNOTSUPP;
3677} 3684}
3678EXPORT_SYMBOL_GPL(kvm_io_bus_write);
3679 3685
3680/* kvm_io_bus_read - called under kvm->slots_lock */ 3686/* kvm_io_bus_read - called under kvm->slots_lock */
3681int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, 3687int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
@@ -3697,7 +3703,6 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
3697 return r < 0 ? r : 0; 3703 return r < 0 ? r : 0;
3698} 3704}
3699 3705
3700
3701/* Caller must hold slots_lock. */ 3706/* Caller must hold slots_lock. */
3702int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, 3707int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
3703 int len, struct kvm_io_device *dev) 3708 int len, struct kvm_io_device *dev)
@@ -3714,8 +3719,8 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
3714 if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1) 3719 if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1)
3715 return -ENOSPC; 3720 return -ENOSPC;
3716 3721
3717 new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count + 1) * 3722 new_bus = kmalloc(struct_size(bus, range, bus->dev_count + 1),
3718 sizeof(struct kvm_io_range)), GFP_KERNEL); 3723 GFP_KERNEL_ACCOUNT);
3719 if (!new_bus) 3724 if (!new_bus)
3720 return -ENOMEM; 3725 return -ENOMEM;
3721 3726
@@ -3760,8 +3765,8 @@ void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
3760 if (i == bus->dev_count) 3765 if (i == bus->dev_count)
3761 return; 3766 return;
3762 3767
3763 new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) * 3768 new_bus = kmalloc(struct_size(bus, range, bus->dev_count - 1),
3764 sizeof(struct kvm_io_range)), GFP_KERNEL); 3769 GFP_KERNEL_ACCOUNT);
3765 if (!new_bus) { 3770 if (!new_bus) {
3766 pr_err("kvm: failed to shrink bus, removing it completely\n"); 3771 pr_err("kvm: failed to shrink bus, removing it completely\n");
3767 goto broken; 3772 goto broken;
@@ -4029,7 +4034,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
4029 active = kvm_active_vms; 4034 active = kvm_active_vms;
4030 spin_unlock(&kvm_lock); 4035 spin_unlock(&kvm_lock);
4031 4036
4032 env = kzalloc(sizeof(*env), GFP_KERNEL); 4037 env = kzalloc(sizeof(*env), GFP_KERNEL_ACCOUNT);
4033 if (!env) 4038 if (!env)
4034 return; 4039 return;
4035 4040
@@ -4045,7 +4050,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
4045 add_uevent_var(env, "PID=%d", kvm->userspace_pid); 4050 add_uevent_var(env, "PID=%d", kvm->userspace_pid);
4046 4051
4047 if (!IS_ERR_OR_NULL(kvm->debugfs_dentry)) { 4052 if (!IS_ERR_OR_NULL(kvm->debugfs_dentry)) {
4048 char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL); 4053 char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL_ACCOUNT);
4049 4054
4050 if (p) { 4055 if (p) {
4051 tmp = dentry_path_raw(kvm->debugfs_dentry, p, PATH_MAX); 4056 tmp = dentry_path_raw(kvm->debugfs_dentry, p, PATH_MAX);
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index d99850c462a1..524cbd20379f 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -219,7 +219,7 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
219 } 219 }
220 } 220 }
221 221
222 kvg = kzalloc(sizeof(*kvg), GFP_KERNEL); 222 kvg = kzalloc(sizeof(*kvg), GFP_KERNEL_ACCOUNT);
223 if (!kvg) { 223 if (!kvg) {
224 mutex_unlock(&kv->lock); 224 mutex_unlock(&kv->lock);
225 kvm_vfio_group_put_external_user(vfio_group); 225 kvm_vfio_group_put_external_user(vfio_group);
@@ -405,7 +405,7 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type)
405 if (tmp->ops == &kvm_vfio_ops) 405 if (tmp->ops == &kvm_vfio_ops)
406 return -EBUSY; 406 return -EBUSY;
407 407
408 kv = kzalloc(sizeof(*kv), GFP_KERNEL); 408 kv = kzalloc(sizeof(*kv), GFP_KERNEL_ACCOUNT);
409 if (!kv) 409 if (!kv)
410 return -ENOMEM; 410 return -ENOMEM;
411 411