diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-03-15 18:00:28 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-03-15 18:00:28 -0400 |
commit | 636deed6c0bc137a7c4f4a97ae1fcf0ad75323da (patch) | |
tree | 7bd27189b8e30e3c1466f7730831a08db65f8646 /virt | |
parent | aa2e3ac64ace127f403be85aa4d6015b859385f2 (diff) | |
parent | 4a605bc08e98381d8df61c30a4acb2eac15eb7da (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini:
"ARM:
- some cleanups
- direct physical timer assignment
- cache sanitization for 32-bit guests
s390:
- interrupt cleanup
- introduction of the Guest Information Block
- preparation for processor subfunctions in cpu models
PPC:
- bug fixes and improvements, especially related to machine checks
and protection keys
x86:
- many, many cleanups, including removing a bunch of MMU code for
unnecessary optimizations
- AVIC fixes
Generic:
- memcg accounting"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (147 commits)
kvm: vmx: fix formatting of a comment
KVM: doc: Document the life cycle of a VM and its resources
MAINTAINERS: Add KVM selftests to existing KVM entry
Revert "KVM/MMU: Flush tlb directly in the kvm_zap_gfn_range()"
KVM: PPC: Book3S: Add count cache flush parameters to kvmppc_get_cpu_char()
KVM: PPC: Fix compilation when KVM is not enabled
KVM: Minor cleanups for kvm_main.c
KVM: s390: add debug logging for cpu model subfunctions
KVM: s390: implement subfunction processor calls
arm64: KVM: Fix architecturally invalid reset value for FPEXC32_EL2
KVM: arm/arm64: Remove unused timer variable
KVM: PPC: Book3S: Improve KVM reference counting
KVM: PPC: Book3S HV: Fix build failure without IOMMU support
Revert "KVM: Eliminate extra function calls in kvm_get_dirty_log_protect()"
x86: kvmguest: use TSC clocksource if invariant TSC is exposed
KVM: Never start grow vCPU halt_poll_ns from value below halt_poll_ns_grow_start
KVM: Expose the initial start value in grow_halt_poll_ns() as a module parameter
KVM: grow_halt_poll_ns() should never shrink vCPU halt_poll_ns
KVM: x86/mmu: Consolidate kvm_mmu_zap_all() and kvm_mmu_zap_mmio_sptes()
KVM: x86/mmu: WARN if zapping a MMIO spte results in zapping children
...
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/arm/arch_timer.c | 608 | ||||
-rw-r--r-- | virt/kvm/arm/arm.c | 64 | ||||
-rw-r--r-- | virt/kvm/arm/hyp/vgic-v3-sr.c | 2 | ||||
-rw-r--r-- | virt/kvm/arm/mmu.c | 20 | ||||
-rw-r--r-- | virt/kvm/arm/trace.h | 107 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-v3.c | 4 | ||||
-rw-r--r-- | virt/kvm/coalesced_mmio.c | 3 | ||||
-rw-r--r-- | virt/kvm/eventfd.c | 7 | ||||
-rw-r--r-- | virt/kvm/irqchip.c | 4 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 103 | ||||
-rw-r--r-- | virt/kvm/vfio.c | 4 |
11 files changed, 631 insertions, 295 deletions
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index b07ac4614e1c..3417f2dbc366 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c | |||
@@ -25,6 +25,7 @@ | |||
25 | 25 | ||
26 | #include <clocksource/arm_arch_timer.h> | 26 | #include <clocksource/arm_arch_timer.h> |
27 | #include <asm/arch_timer.h> | 27 | #include <asm/arch_timer.h> |
28 | #include <asm/kvm_emulate.h> | ||
28 | #include <asm/kvm_hyp.h> | 29 | #include <asm/kvm_hyp.h> |
29 | 30 | ||
30 | #include <kvm/arm_vgic.h> | 31 | #include <kvm/arm_vgic.h> |
@@ -34,7 +35,9 @@ | |||
34 | 35 | ||
35 | static struct timecounter *timecounter; | 36 | static struct timecounter *timecounter; |
36 | static unsigned int host_vtimer_irq; | 37 | static unsigned int host_vtimer_irq; |
38 | static unsigned int host_ptimer_irq; | ||
37 | static u32 host_vtimer_irq_flags; | 39 | static u32 host_vtimer_irq_flags; |
40 | static u32 host_ptimer_irq_flags; | ||
38 | 41 | ||
39 | static DEFINE_STATIC_KEY_FALSE(has_gic_active_state); | 42 | static DEFINE_STATIC_KEY_FALSE(has_gic_active_state); |
40 | 43 | ||
@@ -52,12 +55,34 @@ static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx); | |||
52 | static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, | 55 | static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, |
53 | struct arch_timer_context *timer_ctx); | 56 | struct arch_timer_context *timer_ctx); |
54 | static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx); | 57 | static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx); |
58 | static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, | ||
59 | struct arch_timer_context *timer, | ||
60 | enum kvm_arch_timer_regs treg, | ||
61 | u64 val); | ||
62 | static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, | ||
63 | struct arch_timer_context *timer, | ||
64 | enum kvm_arch_timer_regs treg); | ||
55 | 65 | ||
56 | u64 kvm_phys_timer_read(void) | 66 | u64 kvm_phys_timer_read(void) |
57 | { | 67 | { |
58 | return timecounter->cc->read(timecounter->cc); | 68 | return timecounter->cc->read(timecounter->cc); |
59 | } | 69 | } |
60 | 70 | ||
71 | static void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map) | ||
72 | { | ||
73 | if (has_vhe()) { | ||
74 | map->direct_vtimer = vcpu_vtimer(vcpu); | ||
75 | map->direct_ptimer = vcpu_ptimer(vcpu); | ||
76 | map->emul_ptimer = NULL; | ||
77 | } else { | ||
78 | map->direct_vtimer = vcpu_vtimer(vcpu); | ||
79 | map->direct_ptimer = NULL; | ||
80 | map->emul_ptimer = vcpu_ptimer(vcpu); | ||
81 | } | ||
82 | |||
83 | trace_kvm_get_timer_map(vcpu->vcpu_id, map); | ||
84 | } | ||
85 | |||
61 | static inline bool userspace_irqchip(struct kvm *kvm) | 86 | static inline bool userspace_irqchip(struct kvm *kvm) |
62 | { | 87 | { |
63 | return static_branch_unlikely(&userspace_irqchip_in_use) && | 88 | return static_branch_unlikely(&userspace_irqchip_in_use) && |
@@ -78,20 +103,27 @@ static void soft_timer_cancel(struct hrtimer *hrt) | |||
78 | static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) | 103 | static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) |
79 | { | 104 | { |
80 | struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; | 105 | struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; |
81 | struct arch_timer_context *vtimer; | 106 | struct arch_timer_context *ctx; |
107 | struct timer_map map; | ||
82 | 108 | ||
83 | /* | 109 | /* |
84 | * We may see a timer interrupt after vcpu_put() has been called which | 110 | * We may see a timer interrupt after vcpu_put() has been called which |
85 | * sets the CPU's vcpu pointer to NULL, because even though the timer | 111 | * sets the CPU's vcpu pointer to NULL, because even though the timer |
86 | * has been disabled in vtimer_save_state(), the hardware interrupt | 112 | * has been disabled in timer_save_state(), the hardware interrupt |
87 | * signal may not have been retired from the interrupt controller yet. | 113 | * signal may not have been retired from the interrupt controller yet. |
88 | */ | 114 | */ |
89 | if (!vcpu) | 115 | if (!vcpu) |
90 | return IRQ_HANDLED; | 116 | return IRQ_HANDLED; |
91 | 117 | ||
92 | vtimer = vcpu_vtimer(vcpu); | 118 | get_timer_map(vcpu, &map); |
93 | if (kvm_timer_should_fire(vtimer)) | 119 | |
94 | kvm_timer_update_irq(vcpu, true, vtimer); | 120 | if (irq == host_vtimer_irq) |
121 | ctx = map.direct_vtimer; | ||
122 | else | ||
123 | ctx = map.direct_ptimer; | ||
124 | |||
125 | if (kvm_timer_should_fire(ctx)) | ||
126 | kvm_timer_update_irq(vcpu, true, ctx); | ||
95 | 127 | ||
96 | if (userspace_irqchip(vcpu->kvm) && | 128 | if (userspace_irqchip(vcpu->kvm) && |
97 | !static_branch_unlikely(&has_gic_active_state)) | 129 | !static_branch_unlikely(&has_gic_active_state)) |
@@ -122,7 +154,9 @@ static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx) | |||
122 | 154 | ||
123 | static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx) | 155 | static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx) |
124 | { | 156 | { |
125 | return !(timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_IT_MASK) && | 157 | WARN_ON(timer_ctx && timer_ctx->loaded); |
158 | return timer_ctx && | ||
159 | !(timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_IT_MASK) && | ||
126 | (timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_ENABLE); | 160 | (timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_ENABLE); |
127 | } | 161 | } |
128 | 162 | ||
@@ -132,21 +166,22 @@ static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx) | |||
132 | */ | 166 | */ |
133 | static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu) | 167 | static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu) |
134 | { | 168 | { |
135 | u64 min_virt = ULLONG_MAX, min_phys = ULLONG_MAX; | 169 | u64 min_delta = ULLONG_MAX; |
136 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | 170 | int i; |
137 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | ||
138 | 171 | ||
139 | if (kvm_timer_irq_can_fire(vtimer)) | 172 | for (i = 0; i < NR_KVM_TIMERS; i++) { |
140 | min_virt = kvm_timer_compute_delta(vtimer); | 173 | struct arch_timer_context *ctx = &vcpu->arch.timer_cpu.timers[i]; |
141 | 174 | ||
142 | if (kvm_timer_irq_can_fire(ptimer)) | 175 | WARN(ctx->loaded, "timer %d loaded\n", i); |
143 | min_phys = kvm_timer_compute_delta(ptimer); | 176 | if (kvm_timer_irq_can_fire(ctx)) |
177 | min_delta = min(min_delta, kvm_timer_compute_delta(ctx)); | ||
178 | } | ||
144 | 179 | ||
145 | /* If none of timers can fire, then return 0 */ | 180 | /* If none of timers can fire, then return 0 */ |
146 | if ((min_virt == ULLONG_MAX) && (min_phys == ULLONG_MAX)) | 181 | if (min_delta == ULLONG_MAX) |
147 | return 0; | 182 | return 0; |
148 | 183 | ||
149 | return min(min_virt, min_phys); | 184 | return min_delta; |
150 | } | 185 | } |
151 | 186 | ||
152 | static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt) | 187 | static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt) |
@@ -173,41 +208,58 @@ static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt) | |||
173 | return HRTIMER_NORESTART; | 208 | return HRTIMER_NORESTART; |
174 | } | 209 | } |
175 | 210 | ||
176 | static enum hrtimer_restart kvm_phys_timer_expire(struct hrtimer *hrt) | 211 | static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt) |
177 | { | 212 | { |
178 | struct arch_timer_context *ptimer; | 213 | struct arch_timer_context *ctx; |
179 | struct arch_timer_cpu *timer; | ||
180 | struct kvm_vcpu *vcpu; | 214 | struct kvm_vcpu *vcpu; |
181 | u64 ns; | 215 | u64 ns; |
182 | 216 | ||
183 | timer = container_of(hrt, struct arch_timer_cpu, phys_timer); | 217 | ctx = container_of(hrt, struct arch_timer_context, hrtimer); |
184 | vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu); | 218 | vcpu = ctx->vcpu; |
185 | ptimer = vcpu_ptimer(vcpu); | 219 | |
220 | trace_kvm_timer_hrtimer_expire(ctx); | ||
186 | 221 | ||
187 | /* | 222 | /* |
188 | * Check that the timer has really expired from the guest's | 223 | * Check that the timer has really expired from the guest's |
189 | * PoV (NTP on the host may have forced it to expire | 224 | * PoV (NTP on the host may have forced it to expire |
190 | * early). If not ready, schedule for a later time. | 225 | * early). If not ready, schedule for a later time. |
191 | */ | 226 | */ |
192 | ns = kvm_timer_compute_delta(ptimer); | 227 | ns = kvm_timer_compute_delta(ctx); |
193 | if (unlikely(ns)) { | 228 | if (unlikely(ns)) { |
194 | hrtimer_forward_now(hrt, ns_to_ktime(ns)); | 229 | hrtimer_forward_now(hrt, ns_to_ktime(ns)); |
195 | return HRTIMER_RESTART; | 230 | return HRTIMER_RESTART; |
196 | } | 231 | } |
197 | 232 | ||
198 | kvm_timer_update_irq(vcpu, true, ptimer); | 233 | kvm_timer_update_irq(vcpu, true, ctx); |
199 | return HRTIMER_NORESTART; | 234 | return HRTIMER_NORESTART; |
200 | } | 235 | } |
201 | 236 | ||
202 | static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) | 237 | static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) |
203 | { | 238 | { |
239 | enum kvm_arch_timers index; | ||
204 | u64 cval, now; | 240 | u64 cval, now; |
205 | 241 | ||
242 | if (!timer_ctx) | ||
243 | return false; | ||
244 | |||
245 | index = arch_timer_ctx_index(timer_ctx); | ||
246 | |||
206 | if (timer_ctx->loaded) { | 247 | if (timer_ctx->loaded) { |
207 | u32 cnt_ctl; | 248 | u32 cnt_ctl = 0; |
249 | |||
250 | switch (index) { | ||
251 | case TIMER_VTIMER: | ||
252 | cnt_ctl = read_sysreg_el0(cntv_ctl); | ||
253 | break; | ||
254 | case TIMER_PTIMER: | ||
255 | cnt_ctl = read_sysreg_el0(cntp_ctl); | ||
256 | break; | ||
257 | case NR_KVM_TIMERS: | ||
258 | /* GCC is braindead */ | ||
259 | cnt_ctl = 0; | ||
260 | break; | ||
261 | } | ||
208 | 262 | ||
209 | /* Only the virtual timer can be loaded so far */ | ||
210 | cnt_ctl = read_sysreg_el0(cntv_ctl); | ||
211 | return (cnt_ctl & ARCH_TIMER_CTRL_ENABLE) && | 263 | return (cnt_ctl & ARCH_TIMER_CTRL_ENABLE) && |
212 | (cnt_ctl & ARCH_TIMER_CTRL_IT_STAT) && | 264 | (cnt_ctl & ARCH_TIMER_CTRL_IT_STAT) && |
213 | !(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK); | 265 | !(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK); |
@@ -224,13 +276,13 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) | |||
224 | 276 | ||
225 | bool kvm_timer_is_pending(struct kvm_vcpu *vcpu) | 277 | bool kvm_timer_is_pending(struct kvm_vcpu *vcpu) |
226 | { | 278 | { |
227 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | 279 | struct timer_map map; |
228 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | ||
229 | 280 | ||
230 | if (kvm_timer_should_fire(vtimer)) | 281 | get_timer_map(vcpu, &map); |
231 | return true; | ||
232 | 282 | ||
233 | return kvm_timer_should_fire(ptimer); | 283 | return kvm_timer_should_fire(map.direct_vtimer) || |
284 | kvm_timer_should_fire(map.direct_ptimer) || | ||
285 | kvm_timer_should_fire(map.emul_ptimer); | ||
234 | } | 286 | } |
235 | 287 | ||
236 | /* | 288 | /* |
@@ -269,77 +321,70 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, | |||
269 | } | 321 | } |
270 | } | 322 | } |
271 | 323 | ||
272 | /* Schedule the background timer for the emulated timer. */ | 324 | static void timer_emulate(struct arch_timer_context *ctx) |
273 | static void phys_timer_emulate(struct kvm_vcpu *vcpu) | ||
274 | { | 325 | { |
275 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 326 | bool should_fire = kvm_timer_should_fire(ctx); |
276 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | 327 | |
328 | trace_kvm_timer_emulate(ctx, should_fire); | ||
329 | |||
330 | if (should_fire) { | ||
331 | kvm_timer_update_irq(ctx->vcpu, true, ctx); | ||
332 | return; | ||
333 | } | ||
277 | 334 | ||
278 | /* | 335 | /* |
279 | * If the timer can fire now, we don't need to have a soft timer | 336 | * If the timer can fire now, we don't need to have a soft timer |
280 | * scheduled for the future. If the timer cannot fire at all, | 337 | * scheduled for the future. If the timer cannot fire at all, |
281 | * then we also don't need a soft timer. | 338 | * then we also don't need a soft timer. |
282 | */ | 339 | */ |
283 | if (kvm_timer_should_fire(ptimer) || !kvm_timer_irq_can_fire(ptimer)) { | 340 | if (!kvm_timer_irq_can_fire(ctx)) { |
284 | soft_timer_cancel(&timer->phys_timer); | 341 | soft_timer_cancel(&ctx->hrtimer); |
285 | return; | 342 | return; |
286 | } | 343 | } |
287 | 344 | ||
288 | soft_timer_start(&timer->phys_timer, kvm_timer_compute_delta(ptimer)); | 345 | soft_timer_start(&ctx->hrtimer, kvm_timer_compute_delta(ctx)); |
289 | } | 346 | } |
290 | 347 | ||
291 | /* | 348 | static void timer_save_state(struct arch_timer_context *ctx) |
292 | * Check if there was a change in the timer state, so that we should either | ||
293 | * raise or lower the line level to the GIC or schedule a background timer to | ||
294 | * emulate the physical timer. | ||
295 | */ | ||
296 | static void kvm_timer_update_state(struct kvm_vcpu *vcpu) | ||
297 | { | 349 | { |
298 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 350 | struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu); |
299 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | 351 | enum kvm_arch_timers index = arch_timer_ctx_index(ctx); |
300 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | 352 | unsigned long flags; |
301 | bool level; | ||
302 | 353 | ||
303 | if (unlikely(!timer->enabled)) | 354 | if (!timer->enabled) |
304 | return; | 355 | return; |
305 | 356 | ||
306 | /* | 357 | local_irq_save(flags); |
307 | * The vtimer virtual interrupt is a 'mapped' interrupt, meaning part | ||
308 | * of its lifecycle is offloaded to the hardware, and we therefore may | ||
309 | * not have lowered the irq.level value before having to signal a new | ||
310 | * interrupt, but have to signal an interrupt every time the level is | ||
311 | * asserted. | ||
312 | */ | ||
313 | level = kvm_timer_should_fire(vtimer); | ||
314 | kvm_timer_update_irq(vcpu, level, vtimer); | ||
315 | 358 | ||
316 | phys_timer_emulate(vcpu); | 359 | if (!ctx->loaded) |
360 | goto out; | ||
317 | 361 | ||
318 | if (kvm_timer_should_fire(ptimer) != ptimer->irq.level) | 362 | switch (index) { |
319 | kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer); | 363 | case TIMER_VTIMER: |
320 | } | 364 | ctx->cnt_ctl = read_sysreg_el0(cntv_ctl); |
365 | ctx->cnt_cval = read_sysreg_el0(cntv_cval); | ||
321 | 366 | ||
322 | static void vtimer_save_state(struct kvm_vcpu *vcpu) | 367 | /* Disable the timer */ |
323 | { | 368 | write_sysreg_el0(0, cntv_ctl); |
324 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 369 | isb(); |
325 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | ||
326 | unsigned long flags; | ||
327 | 370 | ||
328 | local_irq_save(flags); | 371 | break; |
372 | case TIMER_PTIMER: | ||
373 | ctx->cnt_ctl = read_sysreg_el0(cntp_ctl); | ||
374 | ctx->cnt_cval = read_sysreg_el0(cntp_cval); | ||
329 | 375 | ||
330 | if (!vtimer->loaded) | 376 | /* Disable the timer */ |
331 | goto out; | 377 | write_sysreg_el0(0, cntp_ctl); |
378 | isb(); | ||
332 | 379 | ||
333 | if (timer->enabled) { | 380 | break; |
334 | vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl); | 381 | case NR_KVM_TIMERS: |
335 | vtimer->cnt_cval = read_sysreg_el0(cntv_cval); | 382 | BUG(); |
336 | } | 383 | } |
337 | 384 | ||
338 | /* Disable the virtual timer */ | 385 | trace_kvm_timer_save_state(ctx); |
339 | write_sysreg_el0(0, cntv_ctl); | ||
340 | isb(); | ||
341 | 386 | ||
342 | vtimer->loaded = false; | 387 | ctx->loaded = false; |
343 | out: | 388 | out: |
344 | local_irq_restore(flags); | 389 | local_irq_restore(flags); |
345 | } | 390 | } |
@@ -349,67 +394,72 @@ out: | |||
349 | * thread is removed from its waitqueue and made runnable when there's a timer | 394 | * thread is removed from its waitqueue and made runnable when there's a timer |
350 | * interrupt to handle. | 395 | * interrupt to handle. |
351 | */ | 396 | */ |
352 | void kvm_timer_schedule(struct kvm_vcpu *vcpu) | 397 | static void kvm_timer_blocking(struct kvm_vcpu *vcpu) |
353 | { | 398 | { |
354 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 399 | struct arch_timer_cpu *timer = vcpu_timer(vcpu); |
355 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | 400 | struct timer_map map; |
356 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | ||
357 | |||
358 | vtimer_save_state(vcpu); | ||
359 | 401 | ||
360 | /* | 402 | get_timer_map(vcpu, &map); |
361 | * No need to schedule a background timer if any guest timer has | ||
362 | * already expired, because kvm_vcpu_block will return before putting | ||
363 | * the thread to sleep. | ||
364 | */ | ||
365 | if (kvm_timer_should_fire(vtimer) || kvm_timer_should_fire(ptimer)) | ||
366 | return; | ||
367 | 403 | ||
368 | /* | 404 | /* |
369 | * If both timers are not capable of raising interrupts (disabled or | 405 | * If no timers are capable of raising interrupts (disabled or |
370 | * masked), then there's no more work for us to do. | 406 | * masked), then there's no more work for us to do. |
371 | */ | 407 | */ |
372 | if (!kvm_timer_irq_can_fire(vtimer) && !kvm_timer_irq_can_fire(ptimer)) | 408 | if (!kvm_timer_irq_can_fire(map.direct_vtimer) && |
409 | !kvm_timer_irq_can_fire(map.direct_ptimer) && | ||
410 | !kvm_timer_irq_can_fire(map.emul_ptimer)) | ||
373 | return; | 411 | return; |
374 | 412 | ||
375 | /* | 413 | /* |
376 | * The guest timers have not yet expired, schedule a background timer. | 414 | * At least one guest time will expire. Schedule a background timer. |
377 | * Set the earliest expiration time among the guest timers. | 415 | * Set the earliest expiration time among the guest timers. |
378 | */ | 416 | */ |
379 | soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu)); | 417 | soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu)); |
380 | } | 418 | } |
381 | 419 | ||
382 | static void vtimer_restore_state(struct kvm_vcpu *vcpu) | 420 | static void kvm_timer_unblocking(struct kvm_vcpu *vcpu) |
383 | { | 421 | { |
384 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 422 | struct arch_timer_cpu *timer = vcpu_timer(vcpu); |
385 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | 423 | |
424 | soft_timer_cancel(&timer->bg_timer); | ||
425 | } | ||
426 | |||
427 | static void timer_restore_state(struct arch_timer_context *ctx) | ||
428 | { | ||
429 | struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu); | ||
430 | enum kvm_arch_timers index = arch_timer_ctx_index(ctx); | ||
386 | unsigned long flags; | 431 | unsigned long flags; |
387 | 432 | ||
433 | if (!timer->enabled) | ||
434 | return; | ||
435 | |||
388 | local_irq_save(flags); | 436 | local_irq_save(flags); |
389 | 437 | ||
390 | if (vtimer->loaded) | 438 | if (ctx->loaded) |
391 | goto out; | 439 | goto out; |
392 | 440 | ||
393 | if (timer->enabled) { | 441 | switch (index) { |
394 | write_sysreg_el0(vtimer->cnt_cval, cntv_cval); | 442 | case TIMER_VTIMER: |
443 | write_sysreg_el0(ctx->cnt_cval, cntv_cval); | ||
395 | isb(); | 444 | isb(); |
396 | write_sysreg_el0(vtimer->cnt_ctl, cntv_ctl); | 445 | write_sysreg_el0(ctx->cnt_ctl, cntv_ctl); |
446 | break; | ||
447 | case TIMER_PTIMER: | ||
448 | write_sysreg_el0(ctx->cnt_cval, cntp_cval); | ||
449 | isb(); | ||
450 | write_sysreg_el0(ctx->cnt_ctl, cntp_ctl); | ||
451 | break; | ||
452 | case NR_KVM_TIMERS: | ||
453 | BUG(); | ||
397 | } | 454 | } |
398 | 455 | ||
399 | vtimer->loaded = true; | 456 | trace_kvm_timer_restore_state(ctx); |
457 | |||
458 | ctx->loaded = true; | ||
400 | out: | 459 | out: |
401 | local_irq_restore(flags); | 460 | local_irq_restore(flags); |
402 | } | 461 | } |
403 | 462 | ||
404 | void kvm_timer_unschedule(struct kvm_vcpu *vcpu) | ||
405 | { | ||
406 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | ||
407 | |||
408 | vtimer_restore_state(vcpu); | ||
409 | |||
410 | soft_timer_cancel(&timer->bg_timer); | ||
411 | } | ||
412 | |||
413 | static void set_cntvoff(u64 cntvoff) | 463 | static void set_cntvoff(u64 cntvoff) |
414 | { | 464 | { |
415 | u32 low = lower_32_bits(cntvoff); | 465 | u32 low = lower_32_bits(cntvoff); |
@@ -425,23 +475,32 @@ static void set_cntvoff(u64 cntvoff) | |||
425 | kvm_call_hyp(__kvm_timer_set_cntvoff, low, high); | 475 | kvm_call_hyp(__kvm_timer_set_cntvoff, low, high); |
426 | } | 476 | } |
427 | 477 | ||
428 | static inline void set_vtimer_irq_phys_active(struct kvm_vcpu *vcpu, bool active) | 478 | static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, bool active) |
429 | { | 479 | { |
430 | int r; | 480 | int r; |
431 | r = irq_set_irqchip_state(host_vtimer_irq, IRQCHIP_STATE_ACTIVE, active); | 481 | r = irq_set_irqchip_state(ctx->host_timer_irq, IRQCHIP_STATE_ACTIVE, active); |
432 | WARN_ON(r); | 482 | WARN_ON(r); |
433 | } | 483 | } |
434 | 484 | ||
435 | static void kvm_timer_vcpu_load_gic(struct kvm_vcpu *vcpu) | 485 | static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx) |
436 | { | 486 | { |
437 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | 487 | struct kvm_vcpu *vcpu = ctx->vcpu; |
438 | bool phys_active; | 488 | bool phys_active = false; |
489 | |||
490 | /* | ||
491 | * Update the timer output so that it is likely to match the | ||
492 | * state we're about to restore. If the timer expires between | ||
493 | * this point and the register restoration, we'll take the | ||
494 | * interrupt anyway. | ||
495 | */ | ||
496 | kvm_timer_update_irq(ctx->vcpu, kvm_timer_should_fire(ctx), ctx); | ||
439 | 497 | ||
440 | if (irqchip_in_kernel(vcpu->kvm)) | 498 | if (irqchip_in_kernel(vcpu->kvm)) |
441 | phys_active = kvm_vgic_map_is_active(vcpu, vtimer->irq.irq); | 499 | phys_active = kvm_vgic_map_is_active(vcpu, ctx->irq.irq); |
442 | else | 500 | |
443 | phys_active = vtimer->irq.level; | 501 | phys_active |= ctx->irq.level; |
444 | set_vtimer_irq_phys_active(vcpu, phys_active); | 502 | |
503 | set_timer_irq_phys_active(ctx, phys_active); | ||
445 | } | 504 | } |
446 | 505 | ||
447 | static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu) | 506 | static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu) |
@@ -466,28 +525,32 @@ static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu) | |||
466 | 525 | ||
467 | void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) | 526 | void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) |
468 | { | 527 | { |
469 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 528 | struct arch_timer_cpu *timer = vcpu_timer(vcpu); |
470 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | 529 | struct timer_map map; |
471 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | ||
472 | 530 | ||
473 | if (unlikely(!timer->enabled)) | 531 | if (unlikely(!timer->enabled)) |
474 | return; | 532 | return; |
475 | 533 | ||
476 | if (static_branch_likely(&has_gic_active_state)) | 534 | get_timer_map(vcpu, &map); |
477 | kvm_timer_vcpu_load_gic(vcpu); | 535 | |
478 | else | 536 | if (static_branch_likely(&has_gic_active_state)) { |
537 | kvm_timer_vcpu_load_gic(map.direct_vtimer); | ||
538 | if (map.direct_ptimer) | ||
539 | kvm_timer_vcpu_load_gic(map.direct_ptimer); | ||
540 | } else { | ||
479 | kvm_timer_vcpu_load_nogic(vcpu); | 541 | kvm_timer_vcpu_load_nogic(vcpu); |
542 | } | ||
480 | 543 | ||
481 | set_cntvoff(vtimer->cntvoff); | 544 | set_cntvoff(map.direct_vtimer->cntvoff); |
482 | 545 | ||
483 | vtimer_restore_state(vcpu); | 546 | kvm_timer_unblocking(vcpu); |
484 | 547 | ||
485 | /* Set the background timer for the physical timer emulation. */ | 548 | timer_restore_state(map.direct_vtimer); |
486 | phys_timer_emulate(vcpu); | 549 | if (map.direct_ptimer) |
550 | timer_restore_state(map.direct_ptimer); | ||
487 | 551 | ||
488 | /* If the timer fired while we weren't running, inject it now */ | 552 | if (map.emul_ptimer) |
489 | if (kvm_timer_should_fire(ptimer) != ptimer->irq.level) | 553 | timer_emulate(map.emul_ptimer); |
490 | kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer); | ||
491 | } | 554 | } |
492 | 555 | ||
493 | bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) | 556 | bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) |
@@ -509,15 +572,20 @@ bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) | |||
509 | 572 | ||
510 | void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) | 573 | void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) |
511 | { | 574 | { |
512 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 575 | struct arch_timer_cpu *timer = vcpu_timer(vcpu); |
576 | struct timer_map map; | ||
513 | 577 | ||
514 | if (unlikely(!timer->enabled)) | 578 | if (unlikely(!timer->enabled)) |
515 | return; | 579 | return; |
516 | 580 | ||
517 | vtimer_save_state(vcpu); | 581 | get_timer_map(vcpu, &map); |
582 | |||
583 | timer_save_state(map.direct_vtimer); | ||
584 | if (map.direct_ptimer) | ||
585 | timer_save_state(map.direct_ptimer); | ||
518 | 586 | ||
519 | /* | 587 | /* |
520 | * Cancel the physical timer emulation, because the only case where we | 588 | * Cancel soft timer emulation, because the only case where we |
521 | * need it after a vcpu_put is in the context of a sleeping VCPU, and | 589 | * need it after a vcpu_put is in the context of a sleeping VCPU, and |
522 | * in that case we already factor in the deadline for the physical | 590 | * in that case we already factor in the deadline for the physical |
523 | * timer when scheduling the bg_timer. | 591 | * timer when scheduling the bg_timer. |
@@ -525,7 +593,11 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) | |||
525 | * In any case, we re-schedule the hrtimer for the physical timer when | 593 | * In any case, we re-schedule the hrtimer for the physical timer when |
526 | * coming back to the VCPU thread in kvm_timer_vcpu_load(). | 594 | * coming back to the VCPU thread in kvm_timer_vcpu_load(). |
527 | */ | 595 | */ |
528 | soft_timer_cancel(&timer->phys_timer); | 596 | if (map.emul_ptimer) |
597 | soft_timer_cancel(&map.emul_ptimer->hrtimer); | ||
598 | |||
599 | if (swait_active(kvm_arch_vcpu_wq(vcpu))) | ||
600 | kvm_timer_blocking(vcpu); | ||
529 | 601 | ||
530 | /* | 602 | /* |
531 | * The kernel may decide to run userspace after calling vcpu_put, so | 603 | * The kernel may decide to run userspace after calling vcpu_put, so |
@@ -534,8 +606,7 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) | |||
534 | * counter of non-VHE case. For VHE, the virtual counter uses a fixed | 606 | * counter of non-VHE case. For VHE, the virtual counter uses a fixed |
535 | * virtual offset of zero, so no need to zero CNTVOFF_EL2 register. | 607 | * virtual offset of zero, so no need to zero CNTVOFF_EL2 register. |
536 | */ | 608 | */ |
537 | if (!has_vhe()) | 609 | set_cntvoff(0); |
538 | set_cntvoff(0); | ||
539 | } | 610 | } |
540 | 611 | ||
541 | /* | 612 | /* |
@@ -550,7 +621,7 @@ static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu) | |||
550 | if (!kvm_timer_should_fire(vtimer)) { | 621 | if (!kvm_timer_should_fire(vtimer)) { |
551 | kvm_timer_update_irq(vcpu, false, vtimer); | 622 | kvm_timer_update_irq(vcpu, false, vtimer); |
552 | if (static_branch_likely(&has_gic_active_state)) | 623 | if (static_branch_likely(&has_gic_active_state)) |
553 | set_vtimer_irq_phys_active(vcpu, false); | 624 | set_timer_irq_phys_active(vtimer, false); |
554 | else | 625 | else |
555 | enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); | 626 | enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); |
556 | } | 627 | } |
@@ -558,7 +629,7 @@ static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu) | |||
558 | 629 | ||
559 | void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) | 630 | void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) |
560 | { | 631 | { |
561 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 632 | struct arch_timer_cpu *timer = vcpu_timer(vcpu); |
562 | 633 | ||
563 | if (unlikely(!timer->enabled)) | 634 | if (unlikely(!timer->enabled)) |
564 | return; | 635 | return; |
@@ -569,9 +640,10 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) | |||
569 | 640 | ||
570 | int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) | 641 | int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) |
571 | { | 642 | { |
572 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 643 | struct arch_timer_cpu *timer = vcpu_timer(vcpu); |
573 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | 644 | struct timer_map map; |
574 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | 645 | |
646 | get_timer_map(vcpu, &map); | ||
575 | 647 | ||
576 | /* | 648 | /* |
577 | * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 | 649 | * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 |
@@ -579,12 +651,22 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) | |||
579 | * resets the timer to be disabled and unmasked and is compliant with | 651 | * resets the timer to be disabled and unmasked and is compliant with |
580 | * the ARMv7 architecture. | 652 | * the ARMv7 architecture. |
581 | */ | 653 | */ |
582 | vtimer->cnt_ctl = 0; | 654 | vcpu_vtimer(vcpu)->cnt_ctl = 0; |
583 | ptimer->cnt_ctl = 0; | 655 | vcpu_ptimer(vcpu)->cnt_ctl = 0; |
584 | kvm_timer_update_state(vcpu); | ||
585 | 656 | ||
586 | if (timer->enabled && irqchip_in_kernel(vcpu->kvm)) | 657 | if (timer->enabled) { |
587 | kvm_vgic_reset_mapped_irq(vcpu, vtimer->irq.irq); | 658 | kvm_timer_update_irq(vcpu, false, vcpu_vtimer(vcpu)); |
659 | kvm_timer_update_irq(vcpu, false, vcpu_ptimer(vcpu)); | ||
660 | |||
661 | if (irqchip_in_kernel(vcpu->kvm)) { | ||
662 | kvm_vgic_reset_mapped_irq(vcpu, map.direct_vtimer->irq.irq); | ||
663 | if (map.direct_ptimer) | ||
664 | kvm_vgic_reset_mapped_irq(vcpu, map.direct_ptimer->irq.irq); | ||
665 | } | ||
666 | } | ||
667 | |||
668 | if (map.emul_ptimer) | ||
669 | soft_timer_cancel(&map.emul_ptimer->hrtimer); | ||
588 | 670 | ||
589 | return 0; | 671 | return 0; |
590 | } | 672 | } |
@@ -610,56 +692,76 @@ static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff) | |||
610 | 692 | ||
611 | void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) | 693 | void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) |
612 | { | 694 | { |
613 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 695 | struct arch_timer_cpu *timer = vcpu_timer(vcpu); |
614 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | 696 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); |
615 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | 697 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); |
616 | 698 | ||
617 | /* Synchronize cntvoff across all vtimers of a VM. */ | 699 | /* Synchronize cntvoff across all vtimers of a VM. */ |
618 | update_vtimer_cntvoff(vcpu, kvm_phys_timer_read()); | 700 | update_vtimer_cntvoff(vcpu, kvm_phys_timer_read()); |
619 | vcpu_ptimer(vcpu)->cntvoff = 0; | 701 | ptimer->cntvoff = 0; |
620 | 702 | ||
621 | hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | 703 | hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); |
622 | timer->bg_timer.function = kvm_bg_timer_expire; | 704 | timer->bg_timer.function = kvm_bg_timer_expire; |
623 | 705 | ||
624 | hrtimer_init(&timer->phys_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | 706 | hrtimer_init(&vtimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); |
625 | timer->phys_timer.function = kvm_phys_timer_expire; | 707 | hrtimer_init(&ptimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); |
708 | vtimer->hrtimer.function = kvm_hrtimer_expire; | ||
709 | ptimer->hrtimer.function = kvm_hrtimer_expire; | ||
626 | 710 | ||
627 | vtimer->irq.irq = default_vtimer_irq.irq; | 711 | vtimer->irq.irq = default_vtimer_irq.irq; |
628 | ptimer->irq.irq = default_ptimer_irq.irq; | 712 | ptimer->irq.irq = default_ptimer_irq.irq; |
713 | |||
714 | vtimer->host_timer_irq = host_vtimer_irq; | ||
715 | ptimer->host_timer_irq = host_ptimer_irq; | ||
716 | |||
717 | vtimer->host_timer_irq_flags = host_vtimer_irq_flags; | ||
718 | ptimer->host_timer_irq_flags = host_ptimer_irq_flags; | ||
719 | |||
720 | vtimer->vcpu = vcpu; | ||
721 | ptimer->vcpu = vcpu; | ||
629 | } | 722 | } |
630 | 723 | ||
631 | static void kvm_timer_init_interrupt(void *info) | 724 | static void kvm_timer_init_interrupt(void *info) |
632 | { | 725 | { |
633 | enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); | 726 | enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); |
727 | enable_percpu_irq(host_ptimer_irq, host_ptimer_irq_flags); | ||
634 | } | 728 | } |
635 | 729 | ||
636 | int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) | 730 | int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) |
637 | { | 731 | { |
638 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | 732 | struct arch_timer_context *timer; |
639 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | 733 | bool level; |
640 | 734 | ||
641 | switch (regid) { | 735 | switch (regid) { |
642 | case KVM_REG_ARM_TIMER_CTL: | 736 | case KVM_REG_ARM_TIMER_CTL: |
643 | vtimer->cnt_ctl = value & ~ARCH_TIMER_CTRL_IT_STAT; | 737 | timer = vcpu_vtimer(vcpu); |
738 | kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value); | ||
644 | break; | 739 | break; |
645 | case KVM_REG_ARM_TIMER_CNT: | 740 | case KVM_REG_ARM_TIMER_CNT: |
741 | timer = vcpu_vtimer(vcpu); | ||
646 | update_vtimer_cntvoff(vcpu, kvm_phys_timer_read() - value); | 742 | update_vtimer_cntvoff(vcpu, kvm_phys_timer_read() - value); |
647 | break; | 743 | break; |
648 | case KVM_REG_ARM_TIMER_CVAL: | 744 | case KVM_REG_ARM_TIMER_CVAL: |
649 | vtimer->cnt_cval = value; | 745 | timer = vcpu_vtimer(vcpu); |
746 | kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value); | ||
650 | break; | 747 | break; |
651 | case KVM_REG_ARM_PTIMER_CTL: | 748 | case KVM_REG_ARM_PTIMER_CTL: |
652 | ptimer->cnt_ctl = value & ~ARCH_TIMER_CTRL_IT_STAT; | 749 | timer = vcpu_ptimer(vcpu); |
750 | kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value); | ||
653 | break; | 751 | break; |
654 | case KVM_REG_ARM_PTIMER_CVAL: | 752 | case KVM_REG_ARM_PTIMER_CVAL: |
655 | ptimer->cnt_cval = value; | 753 | timer = vcpu_ptimer(vcpu); |
754 | kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value); | ||
656 | break; | 755 | break; |
657 | 756 | ||
658 | default: | 757 | default: |
659 | return -1; | 758 | return -1; |
660 | } | 759 | } |
661 | 760 | ||
662 | kvm_timer_update_state(vcpu); | 761 | level = kvm_timer_should_fire(timer); |
762 | kvm_timer_update_irq(vcpu, level, timer); | ||
763 | timer_emulate(timer); | ||
764 | |||
663 | return 0; | 765 | return 0; |
664 | } | 766 | } |
665 | 767 | ||
@@ -679,26 +781,113 @@ static u64 read_timer_ctl(struct arch_timer_context *timer) | |||
679 | 781 | ||
680 | u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) | 782 | u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) |
681 | { | 783 | { |
682 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | ||
683 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | ||
684 | |||
685 | switch (regid) { | 784 | switch (regid) { |
686 | case KVM_REG_ARM_TIMER_CTL: | 785 | case KVM_REG_ARM_TIMER_CTL: |
687 | return read_timer_ctl(vtimer); | 786 | return kvm_arm_timer_read(vcpu, |
787 | vcpu_vtimer(vcpu), TIMER_REG_CTL); | ||
688 | case KVM_REG_ARM_TIMER_CNT: | 788 | case KVM_REG_ARM_TIMER_CNT: |
689 | return kvm_phys_timer_read() - vtimer->cntvoff; | 789 | return kvm_arm_timer_read(vcpu, |
790 | vcpu_vtimer(vcpu), TIMER_REG_CNT); | ||
690 | case KVM_REG_ARM_TIMER_CVAL: | 791 | case KVM_REG_ARM_TIMER_CVAL: |
691 | return vtimer->cnt_cval; | 792 | return kvm_arm_timer_read(vcpu, |
793 | vcpu_vtimer(vcpu), TIMER_REG_CVAL); | ||
692 | case KVM_REG_ARM_PTIMER_CTL: | 794 | case KVM_REG_ARM_PTIMER_CTL: |
693 | return read_timer_ctl(ptimer); | 795 | return kvm_arm_timer_read(vcpu, |
694 | case KVM_REG_ARM_PTIMER_CVAL: | 796 | vcpu_ptimer(vcpu), TIMER_REG_CTL); |
695 | return ptimer->cnt_cval; | ||
696 | case KVM_REG_ARM_PTIMER_CNT: | 797 | case KVM_REG_ARM_PTIMER_CNT: |
697 | return kvm_phys_timer_read(); | 798 | return kvm_arm_timer_read(vcpu, |
799 | vcpu_vtimer(vcpu), TIMER_REG_CNT); | ||
800 | case KVM_REG_ARM_PTIMER_CVAL: | ||
801 | return kvm_arm_timer_read(vcpu, | ||
802 | vcpu_ptimer(vcpu), TIMER_REG_CVAL); | ||
698 | } | 803 | } |
699 | return (u64)-1; | 804 | return (u64)-1; |
700 | } | 805 | } |
701 | 806 | ||
807 | static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, | ||
808 | struct arch_timer_context *timer, | ||
809 | enum kvm_arch_timer_regs treg) | ||
810 | { | ||
811 | u64 val; | ||
812 | |||
813 | switch (treg) { | ||
814 | case TIMER_REG_TVAL: | ||
815 | val = kvm_phys_timer_read() - timer->cntvoff - timer->cnt_cval; | ||
816 | break; | ||
817 | |||
818 | case TIMER_REG_CTL: | ||
819 | val = read_timer_ctl(timer); | ||
820 | break; | ||
821 | |||
822 | case TIMER_REG_CVAL: | ||
823 | val = timer->cnt_cval; | ||
824 | break; | ||
825 | |||
826 | case TIMER_REG_CNT: | ||
827 | val = kvm_phys_timer_read() - timer->cntvoff; | ||
828 | break; | ||
829 | |||
830 | default: | ||
831 | BUG(); | ||
832 | } | ||
833 | |||
834 | return val; | ||
835 | } | ||
836 | |||
837 | u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu, | ||
838 | enum kvm_arch_timers tmr, | ||
839 | enum kvm_arch_timer_regs treg) | ||
840 | { | ||
841 | u64 val; | ||
842 | |||
843 | preempt_disable(); | ||
844 | kvm_timer_vcpu_put(vcpu); | ||
845 | |||
846 | val = kvm_arm_timer_read(vcpu, vcpu_get_timer(vcpu, tmr), treg); | ||
847 | |||
848 | kvm_timer_vcpu_load(vcpu); | ||
849 | preempt_enable(); | ||
850 | |||
851 | return val; | ||
852 | } | ||
853 | |||
854 | static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, | ||
855 | struct arch_timer_context *timer, | ||
856 | enum kvm_arch_timer_regs treg, | ||
857 | u64 val) | ||
858 | { | ||
859 | switch (treg) { | ||
860 | case TIMER_REG_TVAL: | ||
861 | timer->cnt_cval = val - kvm_phys_timer_read() - timer->cntvoff; | ||
862 | break; | ||
863 | |||
864 | case TIMER_REG_CTL: | ||
865 | timer->cnt_ctl = val & ~ARCH_TIMER_CTRL_IT_STAT; | ||
866 | break; | ||
867 | |||
868 | case TIMER_REG_CVAL: | ||
869 | timer->cnt_cval = val; | ||
870 | break; | ||
871 | |||
872 | default: | ||
873 | BUG(); | ||
874 | } | ||
875 | } | ||
876 | |||
877 | void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu, | ||
878 | enum kvm_arch_timers tmr, | ||
879 | enum kvm_arch_timer_regs treg, | ||
880 | u64 val) | ||
881 | { | ||
882 | preempt_disable(); | ||
883 | kvm_timer_vcpu_put(vcpu); | ||
884 | |||
885 | kvm_arm_timer_write(vcpu, vcpu_get_timer(vcpu, tmr), treg, val); | ||
886 | |||
887 | kvm_timer_vcpu_load(vcpu); | ||
888 | preempt_enable(); | ||
889 | } | ||
890 | |||
702 | static int kvm_timer_starting_cpu(unsigned int cpu) | 891 | static int kvm_timer_starting_cpu(unsigned int cpu) |
703 | { | 892 | { |
704 | kvm_timer_init_interrupt(NULL); | 893 | kvm_timer_init_interrupt(NULL); |
@@ -724,6 +913,8 @@ int kvm_timer_hyp_init(bool has_gic) | |||
724 | return -ENODEV; | 913 | return -ENODEV; |
725 | } | 914 | } |
726 | 915 | ||
916 | /* First, do the virtual EL1 timer irq */ | ||
917 | |||
727 | if (info->virtual_irq <= 0) { | 918 | if (info->virtual_irq <= 0) { |
728 | kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n", | 919 | kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n", |
729 | info->virtual_irq); | 920 | info->virtual_irq); |
@@ -734,15 +925,15 @@ int kvm_timer_hyp_init(bool has_gic) | |||
734 | host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq); | 925 | host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq); |
735 | if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH && | 926 | if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH && |
736 | host_vtimer_irq_flags != IRQF_TRIGGER_LOW) { | 927 | host_vtimer_irq_flags != IRQF_TRIGGER_LOW) { |
737 | kvm_err("Invalid trigger for IRQ%d, assuming level low\n", | 928 | kvm_err("Invalid trigger for vtimer IRQ%d, assuming level low\n", |
738 | host_vtimer_irq); | 929 | host_vtimer_irq); |
739 | host_vtimer_irq_flags = IRQF_TRIGGER_LOW; | 930 | host_vtimer_irq_flags = IRQF_TRIGGER_LOW; |
740 | } | 931 | } |
741 | 932 | ||
742 | err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler, | 933 | err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler, |
743 | "kvm guest timer", kvm_get_running_vcpus()); | 934 | "kvm guest vtimer", kvm_get_running_vcpus()); |
744 | if (err) { | 935 | if (err) { |
745 | kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n", | 936 | kvm_err("kvm_arch_timer: can't request vtimer interrupt %d (%d)\n", |
746 | host_vtimer_irq, err); | 937 | host_vtimer_irq, err); |
747 | return err; | 938 | return err; |
748 | } | 939 | } |
@@ -760,6 +951,43 @@ int kvm_timer_hyp_init(bool has_gic) | |||
760 | 951 | ||
761 | kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq); | 952 | kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq); |
762 | 953 | ||
954 | /* Now let's do the physical EL1 timer irq */ | ||
955 | |||
956 | if (info->physical_irq > 0) { | ||
957 | host_ptimer_irq = info->physical_irq; | ||
958 | host_ptimer_irq_flags = irq_get_trigger_type(host_ptimer_irq); | ||
959 | if (host_ptimer_irq_flags != IRQF_TRIGGER_HIGH && | ||
960 | host_ptimer_irq_flags != IRQF_TRIGGER_LOW) { | ||
961 | kvm_err("Invalid trigger for ptimer IRQ%d, assuming level low\n", | ||
962 | host_ptimer_irq); | ||
963 | host_ptimer_irq_flags = IRQF_TRIGGER_LOW; | ||
964 | } | ||
965 | |||
966 | err = request_percpu_irq(host_ptimer_irq, kvm_arch_timer_handler, | ||
967 | "kvm guest ptimer", kvm_get_running_vcpus()); | ||
968 | if (err) { | ||
969 | kvm_err("kvm_arch_timer: can't request ptimer interrupt %d (%d)\n", | ||
970 | host_ptimer_irq, err); | ||
971 | return err; | ||
972 | } | ||
973 | |||
974 | if (has_gic) { | ||
975 | err = irq_set_vcpu_affinity(host_ptimer_irq, | ||
976 | kvm_get_running_vcpus()); | ||
977 | if (err) { | ||
978 | kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); | ||
979 | goto out_free_irq; | ||
980 | } | ||
981 | } | ||
982 | |||
983 | kvm_debug("physical timer IRQ%d\n", host_ptimer_irq); | ||
984 | } else if (has_vhe()) { | ||
985 | kvm_err("kvm_arch_timer: invalid physical timer IRQ: %d\n", | ||
986 | info->physical_irq); | ||
987 | err = -ENODEV; | ||
988 | goto out_free_irq; | ||
989 | } | ||
990 | |||
763 | cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING, | 991 | cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING, |
764 | "kvm/arm/timer:starting", kvm_timer_starting_cpu, | 992 | "kvm/arm/timer:starting", kvm_timer_starting_cpu, |
765 | kvm_timer_dying_cpu); | 993 | kvm_timer_dying_cpu); |
@@ -771,7 +999,7 @@ out_free_irq: | |||
771 | 999 | ||
772 | void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) | 1000 | void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) |
773 | { | 1001 | { |
774 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 1002 | struct arch_timer_cpu *timer = vcpu_timer(vcpu); |
775 | 1003 | ||
776 | soft_timer_cancel(&timer->bg_timer); | 1004 | soft_timer_cancel(&timer->bg_timer); |
777 | } | 1005 | } |
@@ -807,16 +1035,18 @@ bool kvm_arch_timer_get_input_level(int vintid) | |||
807 | 1035 | ||
808 | if (vintid == vcpu_vtimer(vcpu)->irq.irq) | 1036 | if (vintid == vcpu_vtimer(vcpu)->irq.irq) |
809 | timer = vcpu_vtimer(vcpu); | 1037 | timer = vcpu_vtimer(vcpu); |
1038 | else if (vintid == vcpu_ptimer(vcpu)->irq.irq) | ||
1039 | timer = vcpu_ptimer(vcpu); | ||
810 | else | 1040 | else |
811 | BUG(); /* We only map the vtimer so far */ | 1041 | BUG(); |
812 | 1042 | ||
813 | return kvm_timer_should_fire(timer); | 1043 | return kvm_timer_should_fire(timer); |
814 | } | 1044 | } |
815 | 1045 | ||
816 | int kvm_timer_enable(struct kvm_vcpu *vcpu) | 1046 | int kvm_timer_enable(struct kvm_vcpu *vcpu) |
817 | { | 1047 | { |
818 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 1048 | struct arch_timer_cpu *timer = vcpu_timer(vcpu); |
819 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | 1049 | struct timer_map map; |
820 | int ret; | 1050 | int ret; |
821 | 1051 | ||
822 | if (timer->enabled) | 1052 | if (timer->enabled) |
@@ -834,19 +1064,33 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) | |||
834 | return -EINVAL; | 1064 | return -EINVAL; |
835 | } | 1065 | } |
836 | 1066 | ||
837 | ret = kvm_vgic_map_phys_irq(vcpu, host_vtimer_irq, vtimer->irq.irq, | 1067 | get_timer_map(vcpu, &map); |
1068 | |||
1069 | ret = kvm_vgic_map_phys_irq(vcpu, | ||
1070 | map.direct_vtimer->host_timer_irq, | ||
1071 | map.direct_vtimer->irq.irq, | ||
838 | kvm_arch_timer_get_input_level); | 1072 | kvm_arch_timer_get_input_level); |
839 | if (ret) | 1073 | if (ret) |
840 | return ret; | 1074 | return ret; |
841 | 1075 | ||
1076 | if (map.direct_ptimer) { | ||
1077 | ret = kvm_vgic_map_phys_irq(vcpu, | ||
1078 | map.direct_ptimer->host_timer_irq, | ||
1079 | map.direct_ptimer->irq.irq, | ||
1080 | kvm_arch_timer_get_input_level); | ||
1081 | } | ||
1082 | |||
1083 | if (ret) | ||
1084 | return ret; | ||
1085 | |||
842 | no_vgic: | 1086 | no_vgic: |
843 | timer->enabled = 1; | 1087 | timer->enabled = 1; |
844 | return 0; | 1088 | return 0; |
845 | } | 1089 | } |
846 | 1090 | ||
847 | /* | 1091 | /* |
848 | * On VHE system, we only need to configure trap on physical timer and counter | 1092 | * On VHE system, we only need to configure the EL2 timer trap register once, |
849 | * accesses in EL0 and EL1 once, not for every world switch. | 1093 | * not for every world switch. |
850 | * The host kernel runs at EL2 with HCR_EL2.TGE == 1, | 1094 | * The host kernel runs at EL2 with HCR_EL2.TGE == 1, |
851 | * and this makes those bits have no effect for the host kernel execution. | 1095 | * and this makes those bits have no effect for the host kernel execution. |
852 | */ | 1096 | */ |
@@ -857,11 +1101,11 @@ void kvm_timer_init_vhe(void) | |||
857 | u64 val; | 1101 | u64 val; |
858 | 1102 | ||
859 | /* | 1103 | /* |
860 | * Disallow physical timer access for the guest. | 1104 | * VHE systems allow the guest direct access to the EL1 physical |
861 | * Physical counter access is allowed. | 1105 | * timer/counter. |
862 | */ | 1106 | */ |
863 | val = read_sysreg(cnthctl_el2); | 1107 | val = read_sysreg(cnthctl_el2); |
864 | val &= ~(CNTHCTL_EL1PCEN << cnthctl_shift); | 1108 | val |= (CNTHCTL_EL1PCEN << cnthctl_shift); |
865 | val |= (CNTHCTL_EL1PCTEN << cnthctl_shift); | 1109 | val |= (CNTHCTL_EL1PCTEN << cnthctl_shift); |
866 | write_sysreg(val, cnthctl_el2); | 1110 | write_sysreg(val, cnthctl_el2); |
867 | } | 1111 | } |
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 9c486fad3f9f..99c37384ba7b 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c | |||
@@ -65,7 +65,6 @@ static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu); | |||
65 | /* The VMID used in the VTTBR */ | 65 | /* The VMID used in the VTTBR */ |
66 | static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); | 66 | static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); |
67 | static u32 kvm_next_vmid; | 67 | static u32 kvm_next_vmid; |
68 | static unsigned int kvm_vmid_bits __read_mostly; | ||
69 | static DEFINE_SPINLOCK(kvm_vmid_lock); | 68 | static DEFINE_SPINLOCK(kvm_vmid_lock); |
70 | 69 | ||
71 | static bool vgic_present; | 70 | static bool vgic_present; |
@@ -142,7 +141,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
142 | kvm_vgic_early_init(kvm); | 141 | kvm_vgic_early_init(kvm); |
143 | 142 | ||
144 | /* Mark the initial VMID generation invalid */ | 143 | /* Mark the initial VMID generation invalid */ |
145 | kvm->arch.vmid_gen = 0; | 144 | kvm->arch.vmid.vmid_gen = 0; |
146 | 145 | ||
147 | /* The maximum number of VCPUs is limited by the host's GIC model */ | 146 | /* The maximum number of VCPUs is limited by the host's GIC model */ |
148 | kvm->arch.max_vcpus = vgic_present ? | 147 | kvm->arch.max_vcpus = vgic_present ? |
@@ -336,13 +335,11 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) | |||
336 | 335 | ||
337 | void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) | 336 | void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) |
338 | { | 337 | { |
339 | kvm_timer_schedule(vcpu); | ||
340 | kvm_vgic_v4_enable_doorbell(vcpu); | 338 | kvm_vgic_v4_enable_doorbell(vcpu); |
341 | } | 339 | } |
342 | 340 | ||
343 | void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) | 341 | void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) |
344 | { | 342 | { |
345 | kvm_timer_unschedule(vcpu); | ||
346 | kvm_vgic_v4_disable_doorbell(vcpu); | 343 | kvm_vgic_v4_disable_doorbell(vcpu); |
347 | } | 344 | } |
348 | 345 | ||
@@ -472,37 +469,31 @@ void force_vm_exit(const cpumask_t *mask) | |||
472 | 469 | ||
473 | /** | 470 | /** |
474 | * need_new_vmid_gen - check that the VMID is still valid | 471 | * need_new_vmid_gen - check that the VMID is still valid |
475 | * @kvm: The VM's VMID to check | 472 | * @vmid: The VMID to check |
476 | * | 473 | * |
477 | * return true if there is a new generation of VMIDs being used | 474 | * return true if there is a new generation of VMIDs being used |
478 | * | 475 | * |
479 | * The hardware supports only 256 values with the value zero reserved for the | 476 | * The hardware supports a limited set of values with the value zero reserved |
480 | * host, so we check if an assigned value belongs to a previous generation, | 477 | * for the host, so we check if an assigned value belongs to a previous |
481 | * which which requires us to assign a new value. If we're the first to use a | 478 | * generation, which which requires us to assign a new value. If we're the |
482 | * VMID for the new generation, we must flush necessary caches and TLBs on all | 479 | * first to use a VMID for the new generation, we must flush necessary caches |
483 | * CPUs. | 480 | * and TLBs on all CPUs. |
484 | */ | 481 | */ |
485 | static bool need_new_vmid_gen(struct kvm *kvm) | 482 | static bool need_new_vmid_gen(struct kvm_vmid *vmid) |
486 | { | 483 | { |
487 | u64 current_vmid_gen = atomic64_read(&kvm_vmid_gen); | 484 | u64 current_vmid_gen = atomic64_read(&kvm_vmid_gen); |
488 | smp_rmb(); /* Orders read of kvm_vmid_gen and kvm->arch.vmid */ | 485 | smp_rmb(); /* Orders read of kvm_vmid_gen and kvm->arch.vmid */ |
489 | return unlikely(READ_ONCE(kvm->arch.vmid_gen) != current_vmid_gen); | 486 | return unlikely(READ_ONCE(vmid->vmid_gen) != current_vmid_gen); |
490 | } | 487 | } |
491 | 488 | ||
492 | /** | 489 | /** |
493 | * update_vttbr - Update the VTTBR with a valid VMID before the guest runs | 490 | * update_vmid - Update the vmid with a valid VMID for the current generation |
494 | * @kvm The guest that we are about to run | 491 | * @kvm: The guest that struct vmid belongs to |
495 | * | 492 | * @vmid: The stage-2 VMID information struct |
496 | * Called from kvm_arch_vcpu_ioctl_run before entering the guest to ensure the | ||
497 | * VM has a valid VMID, otherwise assigns a new one and flushes corresponding | ||
498 | * caches and TLBs. | ||
499 | */ | 493 | */ |
500 | static void update_vttbr(struct kvm *kvm) | 494 | static void update_vmid(struct kvm_vmid *vmid) |
501 | { | 495 | { |
502 | phys_addr_t pgd_phys; | 496 | if (!need_new_vmid_gen(vmid)) |
503 | u64 vmid, cnp = kvm_cpu_has_cnp() ? VTTBR_CNP_BIT : 0; | ||
504 | |||
505 | if (!need_new_vmid_gen(kvm)) | ||
506 | return; | 497 | return; |
507 | 498 | ||
508 | spin_lock(&kvm_vmid_lock); | 499 | spin_lock(&kvm_vmid_lock); |
@@ -512,7 +503,7 @@ static void update_vttbr(struct kvm *kvm) | |||
512 | * already allocated a valid vmid for this vm, then this vcpu should | 503 | * already allocated a valid vmid for this vm, then this vcpu should |
513 | * use the same vmid. | 504 | * use the same vmid. |
514 | */ | 505 | */ |
515 | if (!need_new_vmid_gen(kvm)) { | 506 | if (!need_new_vmid_gen(vmid)) { |
516 | spin_unlock(&kvm_vmid_lock); | 507 | spin_unlock(&kvm_vmid_lock); |
517 | return; | 508 | return; |
518 | } | 509 | } |
@@ -536,18 +527,12 @@ static void update_vttbr(struct kvm *kvm) | |||
536 | kvm_call_hyp(__kvm_flush_vm_context); | 527 | kvm_call_hyp(__kvm_flush_vm_context); |
537 | } | 528 | } |
538 | 529 | ||
539 | kvm->arch.vmid = kvm_next_vmid; | 530 | vmid->vmid = kvm_next_vmid; |
540 | kvm_next_vmid++; | 531 | kvm_next_vmid++; |
541 | kvm_next_vmid &= (1 << kvm_vmid_bits) - 1; | 532 | kvm_next_vmid &= (1 << kvm_get_vmid_bits()) - 1; |
542 | |||
543 | /* update vttbr to be used with the new vmid */ | ||
544 | pgd_phys = virt_to_phys(kvm->arch.pgd); | ||
545 | BUG_ON(pgd_phys & ~kvm_vttbr_baddr_mask(kvm)); | ||
546 | vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits); | ||
547 | kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid | cnp; | ||
548 | 533 | ||
549 | smp_wmb(); | 534 | smp_wmb(); |
550 | WRITE_ONCE(kvm->arch.vmid_gen, atomic64_read(&kvm_vmid_gen)); | 535 | WRITE_ONCE(vmid->vmid_gen, atomic64_read(&kvm_vmid_gen)); |
551 | 536 | ||
552 | spin_unlock(&kvm_vmid_lock); | 537 | spin_unlock(&kvm_vmid_lock); |
553 | } | 538 | } |
@@ -700,7 +685,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
700 | */ | 685 | */ |
701 | cond_resched(); | 686 | cond_resched(); |
702 | 687 | ||
703 | update_vttbr(vcpu->kvm); | 688 | update_vmid(&vcpu->kvm->arch.vmid); |
704 | 689 | ||
705 | check_vcpu_requests(vcpu); | 690 | check_vcpu_requests(vcpu); |
706 | 691 | ||
@@ -749,7 +734,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
749 | */ | 734 | */ |
750 | smp_store_mb(vcpu->mode, IN_GUEST_MODE); | 735 | smp_store_mb(vcpu->mode, IN_GUEST_MODE); |
751 | 736 | ||
752 | if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) || | 737 | if (ret <= 0 || need_new_vmid_gen(&vcpu->kvm->arch.vmid) || |
753 | kvm_request_pending(vcpu)) { | 738 | kvm_request_pending(vcpu)) { |
754 | vcpu->mode = OUTSIDE_GUEST_MODE; | 739 | vcpu->mode = OUTSIDE_GUEST_MODE; |
755 | isb(); /* Ensure work in x_flush_hwstate is committed */ | 740 | isb(); /* Ensure work in x_flush_hwstate is committed */ |
@@ -775,7 +760,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
775 | ret = kvm_vcpu_run_vhe(vcpu); | 760 | ret = kvm_vcpu_run_vhe(vcpu); |
776 | kvm_arm_vhe_guest_exit(); | 761 | kvm_arm_vhe_guest_exit(); |
777 | } else { | 762 | } else { |
778 | ret = kvm_call_hyp(__kvm_vcpu_run_nvhe, vcpu); | 763 | ret = kvm_call_hyp_ret(__kvm_vcpu_run_nvhe, vcpu); |
779 | } | 764 | } |
780 | 765 | ||
781 | vcpu->mode = OUTSIDE_GUEST_MODE; | 766 | vcpu->mode = OUTSIDE_GUEST_MODE; |
@@ -1427,10 +1412,6 @@ static inline void hyp_cpu_pm_exit(void) | |||
1427 | 1412 | ||
1428 | static int init_common_resources(void) | 1413 | static int init_common_resources(void) |
1429 | { | 1414 | { |
1430 | /* set size of VMID supported by CPU */ | ||
1431 | kvm_vmid_bits = kvm_get_vmid_bits(); | ||
1432 | kvm_info("%d-bit VMID\n", kvm_vmid_bits); | ||
1433 | |||
1434 | kvm_set_ipa_limit(); | 1415 | kvm_set_ipa_limit(); |
1435 | 1416 | ||
1436 | return 0; | 1417 | return 0; |
@@ -1571,6 +1552,7 @@ static int init_hyp_mode(void) | |||
1571 | kvm_cpu_context_t *cpu_ctxt; | 1552 | kvm_cpu_context_t *cpu_ctxt; |
1572 | 1553 | ||
1573 | cpu_ctxt = per_cpu_ptr(&kvm_host_cpu_state, cpu); | 1554 | cpu_ctxt = per_cpu_ptr(&kvm_host_cpu_state, cpu); |
1555 | kvm_init_host_cpu_context(cpu_ctxt, cpu); | ||
1574 | err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1, PAGE_HYP); | 1556 | err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1, PAGE_HYP); |
1575 | 1557 | ||
1576 | if (err) { | 1558 | if (err) { |
@@ -1581,7 +1563,7 @@ static int init_hyp_mode(void) | |||
1581 | 1563 | ||
1582 | err = hyp_map_aux_data(); | 1564 | err = hyp_map_aux_data(); |
1583 | if (err) | 1565 | if (err) |
1584 | kvm_err("Cannot map host auxilary data: %d\n", err); | 1566 | kvm_err("Cannot map host auxiliary data: %d\n", err); |
1585 | 1567 | ||
1586 | return 0; | 1568 | return 0; |
1587 | 1569 | ||
diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index 9652c453480f..264d92da3240 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c | |||
@@ -226,7 +226,7 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) | |||
226 | int i; | 226 | int i; |
227 | u32 elrsr; | 227 | u32 elrsr; |
228 | 228 | ||
229 | elrsr = read_gicreg(ICH_ELSR_EL2); | 229 | elrsr = read_gicreg(ICH_ELRSR_EL2); |
230 | 230 | ||
231 | write_gicreg(cpu_if->vgic_hcr & ~ICH_HCR_EN, ICH_HCR_EL2); | 231 | write_gicreg(cpu_if->vgic_hcr & ~ICH_HCR_EN, ICH_HCR_EL2); |
232 | 232 | ||
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index e9d28a7ca673..ffd7acdceac7 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c | |||
@@ -908,6 +908,7 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, | |||
908 | */ | 908 | */ |
909 | int kvm_alloc_stage2_pgd(struct kvm *kvm) | 909 | int kvm_alloc_stage2_pgd(struct kvm *kvm) |
910 | { | 910 | { |
911 | phys_addr_t pgd_phys; | ||
911 | pgd_t *pgd; | 912 | pgd_t *pgd; |
912 | 913 | ||
913 | if (kvm->arch.pgd != NULL) { | 914 | if (kvm->arch.pgd != NULL) { |
@@ -920,7 +921,12 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) | |||
920 | if (!pgd) | 921 | if (!pgd) |
921 | return -ENOMEM; | 922 | return -ENOMEM; |
922 | 923 | ||
924 | pgd_phys = virt_to_phys(pgd); | ||
925 | if (WARN_ON(pgd_phys & ~kvm_vttbr_baddr_mask(kvm))) | ||
926 | return -EINVAL; | ||
927 | |||
923 | kvm->arch.pgd = pgd; | 928 | kvm->arch.pgd = pgd; |
929 | kvm->arch.pgd_phys = pgd_phys; | ||
924 | return 0; | 930 | return 0; |
925 | } | 931 | } |
926 | 932 | ||
@@ -1008,6 +1014,7 @@ void kvm_free_stage2_pgd(struct kvm *kvm) | |||
1008 | unmap_stage2_range(kvm, 0, kvm_phys_size(kvm)); | 1014 | unmap_stage2_range(kvm, 0, kvm_phys_size(kvm)); |
1009 | pgd = READ_ONCE(kvm->arch.pgd); | 1015 | pgd = READ_ONCE(kvm->arch.pgd); |
1010 | kvm->arch.pgd = NULL; | 1016 | kvm->arch.pgd = NULL; |
1017 | kvm->arch.pgd_phys = 0; | ||
1011 | } | 1018 | } |
1012 | spin_unlock(&kvm->mmu_lock); | 1019 | spin_unlock(&kvm->mmu_lock); |
1013 | 1020 | ||
@@ -1396,14 +1403,6 @@ static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap) | |||
1396 | return false; | 1403 | return false; |
1397 | } | 1404 | } |
1398 | 1405 | ||
1399 | static bool kvm_is_write_fault(struct kvm_vcpu *vcpu) | ||
1400 | { | ||
1401 | if (kvm_vcpu_trap_is_iabt(vcpu)) | ||
1402 | return false; | ||
1403 | |||
1404 | return kvm_vcpu_dabt_iswrite(vcpu); | ||
1405 | } | ||
1406 | |||
1407 | /** | 1406 | /** |
1408 | * stage2_wp_ptes - write protect PMD range | 1407 | * stage2_wp_ptes - write protect PMD range |
1409 | * @pmd: pointer to pmd entry | 1408 | * @pmd: pointer to pmd entry |
@@ -1598,14 +1597,13 @@ static void kvm_send_hwpoison_signal(unsigned long address, | |||
1598 | static bool fault_supports_stage2_pmd_mappings(struct kvm_memory_slot *memslot, | 1597 | static bool fault_supports_stage2_pmd_mappings(struct kvm_memory_slot *memslot, |
1599 | unsigned long hva) | 1598 | unsigned long hva) |
1600 | { | 1599 | { |
1601 | gpa_t gpa_start, gpa_end; | 1600 | gpa_t gpa_start; |
1602 | hva_t uaddr_start, uaddr_end; | 1601 | hva_t uaddr_start, uaddr_end; |
1603 | size_t size; | 1602 | size_t size; |
1604 | 1603 | ||
1605 | size = memslot->npages * PAGE_SIZE; | 1604 | size = memslot->npages * PAGE_SIZE; |
1606 | 1605 | ||
1607 | gpa_start = memslot->base_gfn << PAGE_SHIFT; | 1606 | gpa_start = memslot->base_gfn << PAGE_SHIFT; |
1608 | gpa_end = gpa_start + size; | ||
1609 | 1607 | ||
1610 | uaddr_start = memslot->userspace_addr; | 1608 | uaddr_start = memslot->userspace_addr; |
1611 | uaddr_end = uaddr_start + size; | 1609 | uaddr_end = uaddr_start + size; |
@@ -2353,7 +2351,7 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, | |||
2353 | return 0; | 2351 | return 0; |
2354 | } | 2352 | } |
2355 | 2353 | ||
2356 | void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) | 2354 | void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) |
2357 | { | 2355 | { |
2358 | } | 2356 | } |
2359 | 2357 | ||
diff --git a/virt/kvm/arm/trace.h b/virt/kvm/arm/trace.h index 3828beab93f2..204d210d01c2 100644 --- a/virt/kvm/arm/trace.h +++ b/virt/kvm/arm/trace.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ) | 2 | #if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ) |
3 | #define _TRACE_KVM_H | 3 | #define _TRACE_KVM_H |
4 | 4 | ||
5 | #include <kvm/arm_arch_timer.h> | ||
5 | #include <linux/tracepoint.h> | 6 | #include <linux/tracepoint.h> |
6 | 7 | ||
7 | #undef TRACE_SYSTEM | 8 | #undef TRACE_SYSTEM |
@@ -262,10 +263,114 @@ TRACE_EVENT(kvm_timer_update_irq, | |||
262 | __entry->vcpu_id, __entry->irq, __entry->level) | 263 | __entry->vcpu_id, __entry->irq, __entry->level) |
263 | ); | 264 | ); |
264 | 265 | ||
266 | TRACE_EVENT(kvm_get_timer_map, | ||
267 | TP_PROTO(unsigned long vcpu_id, struct timer_map *map), | ||
268 | TP_ARGS(vcpu_id, map), | ||
269 | |||
270 | TP_STRUCT__entry( | ||
271 | __field( unsigned long, vcpu_id ) | ||
272 | __field( int, direct_vtimer ) | ||
273 | __field( int, direct_ptimer ) | ||
274 | __field( int, emul_ptimer ) | ||
275 | ), | ||
276 | |||
277 | TP_fast_assign( | ||
278 | __entry->vcpu_id = vcpu_id; | ||
279 | __entry->direct_vtimer = arch_timer_ctx_index(map->direct_vtimer); | ||
280 | __entry->direct_ptimer = | ||
281 | (map->direct_ptimer) ? arch_timer_ctx_index(map->direct_ptimer) : -1; | ||
282 | __entry->emul_ptimer = | ||
283 | (map->emul_ptimer) ? arch_timer_ctx_index(map->emul_ptimer) : -1; | ||
284 | ), | ||
285 | |||
286 | TP_printk("VCPU: %ld, dv: %d, dp: %d, ep: %d", | ||
287 | __entry->vcpu_id, | ||
288 | __entry->direct_vtimer, | ||
289 | __entry->direct_ptimer, | ||
290 | __entry->emul_ptimer) | ||
291 | ); | ||
292 | |||
293 | TRACE_EVENT(kvm_timer_save_state, | ||
294 | TP_PROTO(struct arch_timer_context *ctx), | ||
295 | TP_ARGS(ctx), | ||
296 | |||
297 | TP_STRUCT__entry( | ||
298 | __field( unsigned long, ctl ) | ||
299 | __field( unsigned long long, cval ) | ||
300 | __field( int, timer_idx ) | ||
301 | ), | ||
302 | |||
303 | TP_fast_assign( | ||
304 | __entry->ctl = ctx->cnt_ctl; | ||
305 | __entry->cval = ctx->cnt_cval; | ||
306 | __entry->timer_idx = arch_timer_ctx_index(ctx); | ||
307 | ), | ||
308 | |||
309 | TP_printk(" CTL: %#08lx CVAL: %#16llx arch_timer_ctx_index: %d", | ||
310 | __entry->ctl, | ||
311 | __entry->cval, | ||
312 | __entry->timer_idx) | ||
313 | ); | ||
314 | |||
315 | TRACE_EVENT(kvm_timer_restore_state, | ||
316 | TP_PROTO(struct arch_timer_context *ctx), | ||
317 | TP_ARGS(ctx), | ||
318 | |||
319 | TP_STRUCT__entry( | ||
320 | __field( unsigned long, ctl ) | ||
321 | __field( unsigned long long, cval ) | ||
322 | __field( int, timer_idx ) | ||
323 | ), | ||
324 | |||
325 | TP_fast_assign( | ||
326 | __entry->ctl = ctx->cnt_ctl; | ||
327 | __entry->cval = ctx->cnt_cval; | ||
328 | __entry->timer_idx = arch_timer_ctx_index(ctx); | ||
329 | ), | ||
330 | |||
331 | TP_printk("CTL: %#08lx CVAL: %#16llx arch_timer_ctx_index: %d", | ||
332 | __entry->ctl, | ||
333 | __entry->cval, | ||
334 | __entry->timer_idx) | ||
335 | ); | ||
336 | |||
337 | TRACE_EVENT(kvm_timer_hrtimer_expire, | ||
338 | TP_PROTO(struct arch_timer_context *ctx), | ||
339 | TP_ARGS(ctx), | ||
340 | |||
341 | TP_STRUCT__entry( | ||
342 | __field( int, timer_idx ) | ||
343 | ), | ||
344 | |||
345 | TP_fast_assign( | ||
346 | __entry->timer_idx = arch_timer_ctx_index(ctx); | ||
347 | ), | ||
348 | |||
349 | TP_printk("arch_timer_ctx_index: %d", __entry->timer_idx) | ||
350 | ); | ||
351 | |||
352 | TRACE_EVENT(kvm_timer_emulate, | ||
353 | TP_PROTO(struct arch_timer_context *ctx, bool should_fire), | ||
354 | TP_ARGS(ctx, should_fire), | ||
355 | |||
356 | TP_STRUCT__entry( | ||
357 | __field( int, timer_idx ) | ||
358 | __field( bool, should_fire ) | ||
359 | ), | ||
360 | |||
361 | TP_fast_assign( | ||
362 | __entry->timer_idx = arch_timer_ctx_index(ctx); | ||
363 | __entry->should_fire = should_fire; | ||
364 | ), | ||
365 | |||
366 | TP_printk("arch_timer_ctx_index: %d (should_fire: %d)", | ||
367 | __entry->timer_idx, __entry->should_fire) | ||
368 | ); | ||
369 | |||
265 | #endif /* _TRACE_KVM_H */ | 370 | #endif /* _TRACE_KVM_H */ |
266 | 371 | ||
267 | #undef TRACE_INCLUDE_PATH | 372 | #undef TRACE_INCLUDE_PATH |
268 | #define TRACE_INCLUDE_PATH ../../../virt/kvm/arm | 373 | #define TRACE_INCLUDE_PATH ../../virt/kvm/arm |
269 | #undef TRACE_INCLUDE_FILE | 374 | #undef TRACE_INCLUDE_FILE |
270 | #define TRACE_INCLUDE_FILE trace | 375 | #define TRACE_INCLUDE_FILE trace |
271 | 376 | ||
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 4ee0aeb9a905..408a78eb6a97 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c | |||
@@ -589,7 +589,7 @@ early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable); | |||
589 | */ | 589 | */ |
590 | int vgic_v3_probe(const struct gic_kvm_info *info) | 590 | int vgic_v3_probe(const struct gic_kvm_info *info) |
591 | { | 591 | { |
592 | u32 ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2); | 592 | u32 ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_ich_vtr_el2); |
593 | int ret; | 593 | int ret; |
594 | 594 | ||
595 | /* | 595 | /* |
@@ -679,7 +679,7 @@ void vgic_v3_put(struct kvm_vcpu *vcpu) | |||
679 | struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; | 679 | struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; |
680 | 680 | ||
681 | if (likely(cpu_if->vgic_sre)) | 681 | if (likely(cpu_if->vgic_sre)) |
682 | cpu_if->vgic_vmcr = kvm_call_hyp(__vgic_v3_read_vmcr); | 682 | cpu_if->vgic_vmcr = kvm_call_hyp_ret(__vgic_v3_read_vmcr); |
683 | 683 | ||
684 | kvm_call_hyp(__vgic_v3_save_aprs, vcpu); | 684 | kvm_call_hyp(__vgic_v3_save_aprs, vcpu); |
685 | 685 | ||
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index 6855cce3e528..5294abb3f178 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c | |||
@@ -144,7 +144,8 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, | |||
144 | if (zone->pio != 1 && zone->pio != 0) | 144 | if (zone->pio != 1 && zone->pio != 0) |
145 | return -EINVAL; | 145 | return -EINVAL; |
146 | 146 | ||
147 | dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL); | 147 | dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), |
148 | GFP_KERNEL_ACCOUNT); | ||
148 | if (!dev) | 149 | if (!dev) |
149 | return -ENOMEM; | 150 | return -ENOMEM; |
150 | 151 | ||
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index b20b751286fc..4325250afd72 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c | |||
@@ -297,7 +297,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) | |||
297 | if (!kvm_arch_intc_initialized(kvm)) | 297 | if (!kvm_arch_intc_initialized(kvm)) |
298 | return -EAGAIN; | 298 | return -EAGAIN; |
299 | 299 | ||
300 | irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL); | 300 | irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL_ACCOUNT); |
301 | if (!irqfd) | 301 | if (!irqfd) |
302 | return -ENOMEM; | 302 | return -ENOMEM; |
303 | 303 | ||
@@ -345,7 +345,8 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) | |||
345 | } | 345 | } |
346 | 346 | ||
347 | if (!irqfd->resampler) { | 347 | if (!irqfd->resampler) { |
348 | resampler = kzalloc(sizeof(*resampler), GFP_KERNEL); | 348 | resampler = kzalloc(sizeof(*resampler), |
349 | GFP_KERNEL_ACCOUNT); | ||
349 | if (!resampler) { | 350 | if (!resampler) { |
350 | ret = -ENOMEM; | 351 | ret = -ENOMEM; |
351 | mutex_unlock(&kvm->irqfds.resampler_lock); | 352 | mutex_unlock(&kvm->irqfds.resampler_lock); |
@@ -797,7 +798,7 @@ static int kvm_assign_ioeventfd_idx(struct kvm *kvm, | |||
797 | if (IS_ERR(eventfd)) | 798 | if (IS_ERR(eventfd)) |
798 | return PTR_ERR(eventfd); | 799 | return PTR_ERR(eventfd); |
799 | 800 | ||
800 | p = kzalloc(sizeof(*p), GFP_KERNEL); | 801 | p = kzalloc(sizeof(*p), GFP_KERNEL_ACCOUNT); |
801 | if (!p) { | 802 | if (!p) { |
802 | ret = -ENOMEM; | 803 | ret = -ENOMEM; |
803 | goto fail; | 804 | goto fail; |
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index b1286c4e0712..3547b0d8c91e 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c | |||
@@ -196,7 +196,7 @@ int kvm_set_irq_routing(struct kvm *kvm, | |||
196 | nr_rt_entries += 1; | 196 | nr_rt_entries += 1; |
197 | 197 | ||
198 | new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head)), | 198 | new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head)), |
199 | GFP_KERNEL); | 199 | GFP_KERNEL_ACCOUNT); |
200 | 200 | ||
201 | if (!new) | 201 | if (!new) |
202 | return -ENOMEM; | 202 | return -ENOMEM; |
@@ -208,7 +208,7 @@ int kvm_set_irq_routing(struct kvm *kvm, | |||
208 | 208 | ||
209 | for (i = 0; i < nr; ++i) { | 209 | for (i = 0; i < nr; ++i) { |
210 | r = -ENOMEM; | 210 | r = -ENOMEM; |
211 | e = kzalloc(sizeof(*e), GFP_KERNEL); | 211 | e = kzalloc(sizeof(*e), GFP_KERNEL_ACCOUNT); |
212 | if (!e) | 212 | if (!e) |
213 | goto out; | 213 | goto out; |
214 | 214 | ||
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d237d3350a99..f25aa98a94df 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -81,6 +81,11 @@ unsigned int halt_poll_ns_grow = 2; | |||
81 | module_param(halt_poll_ns_grow, uint, 0644); | 81 | module_param(halt_poll_ns_grow, uint, 0644); |
82 | EXPORT_SYMBOL_GPL(halt_poll_ns_grow); | 82 | EXPORT_SYMBOL_GPL(halt_poll_ns_grow); |
83 | 83 | ||
84 | /* The start value to grow halt_poll_ns from */ | ||
85 | unsigned int halt_poll_ns_grow_start = 10000; /* 10us */ | ||
86 | module_param(halt_poll_ns_grow_start, uint, 0644); | ||
87 | EXPORT_SYMBOL_GPL(halt_poll_ns_grow_start); | ||
88 | |||
84 | /* Default resets per-vcpu halt_poll_ns . */ | 89 | /* Default resets per-vcpu halt_poll_ns . */ |
85 | unsigned int halt_poll_ns_shrink; | 90 | unsigned int halt_poll_ns_shrink; |
86 | module_param(halt_poll_ns_shrink, uint, 0644); | 91 | module_param(halt_poll_ns_shrink, uint, 0644); |
@@ -525,7 +530,7 @@ static struct kvm_memslots *kvm_alloc_memslots(void) | |||
525 | int i; | 530 | int i; |
526 | struct kvm_memslots *slots; | 531 | struct kvm_memslots *slots; |
527 | 532 | ||
528 | slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); | 533 | slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL_ACCOUNT); |
529 | if (!slots) | 534 | if (!slots) |
530 | return NULL; | 535 | return NULL; |
531 | 536 | ||
@@ -601,12 +606,12 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) | |||
601 | 606 | ||
602 | kvm->debugfs_stat_data = kcalloc(kvm_debugfs_num_entries, | 607 | kvm->debugfs_stat_data = kcalloc(kvm_debugfs_num_entries, |
603 | sizeof(*kvm->debugfs_stat_data), | 608 | sizeof(*kvm->debugfs_stat_data), |
604 | GFP_KERNEL); | 609 | GFP_KERNEL_ACCOUNT); |
605 | if (!kvm->debugfs_stat_data) | 610 | if (!kvm->debugfs_stat_data) |
606 | return -ENOMEM; | 611 | return -ENOMEM; |
607 | 612 | ||
608 | for (p = debugfs_entries; p->name; p++) { | 613 | for (p = debugfs_entries; p->name; p++) { |
609 | stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL); | 614 | stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL_ACCOUNT); |
610 | if (!stat_data) | 615 | if (!stat_data) |
611 | return -ENOMEM; | 616 | return -ENOMEM; |
612 | 617 | ||
@@ -656,12 +661,8 @@ static struct kvm *kvm_create_vm(unsigned long type) | |||
656 | struct kvm_memslots *slots = kvm_alloc_memslots(); | 661 | struct kvm_memslots *slots = kvm_alloc_memslots(); |
657 | if (!slots) | 662 | if (!slots) |
658 | goto out_err_no_srcu; | 663 | goto out_err_no_srcu; |
659 | /* | 664 | /* Generations must be different for each address space. */ |
660 | * Generations must be different for each address space. | 665 | slots->generation = i; |
661 | * Init kvm generation close to the maximum to easily test the | ||
662 | * code of handling generation number wrap-around. | ||
663 | */ | ||
664 | slots->generation = i * 2 - 150; | ||
665 | rcu_assign_pointer(kvm->memslots[i], slots); | 666 | rcu_assign_pointer(kvm->memslots[i], slots); |
666 | } | 667 | } |
667 | 668 | ||
@@ -671,7 +672,7 @@ static struct kvm *kvm_create_vm(unsigned long type) | |||
671 | goto out_err_no_irq_srcu; | 672 | goto out_err_no_irq_srcu; |
672 | for (i = 0; i < KVM_NR_BUSES; i++) { | 673 | for (i = 0; i < KVM_NR_BUSES; i++) { |
673 | rcu_assign_pointer(kvm->buses[i], | 674 | rcu_assign_pointer(kvm->buses[i], |
674 | kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL)); | 675 | kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL_ACCOUNT)); |
675 | if (!kvm->buses[i]) | 676 | if (!kvm->buses[i]) |
676 | goto out_err; | 677 | goto out_err; |
677 | } | 678 | } |
@@ -789,7 +790,7 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) | |||
789 | { | 790 | { |
790 | unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot); | 791 | unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot); |
791 | 792 | ||
792 | memslot->dirty_bitmap = kvzalloc(dirty_bytes, GFP_KERNEL); | 793 | memslot->dirty_bitmap = kvzalloc(dirty_bytes, GFP_KERNEL_ACCOUNT); |
793 | if (!memslot->dirty_bitmap) | 794 | if (!memslot->dirty_bitmap) |
794 | return -ENOMEM; | 795 | return -ENOMEM; |
795 | 796 | ||
@@ -874,31 +875,34 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, | |||
874 | int as_id, struct kvm_memslots *slots) | 875 | int as_id, struct kvm_memslots *slots) |
875 | { | 876 | { |
876 | struct kvm_memslots *old_memslots = __kvm_memslots(kvm, as_id); | 877 | struct kvm_memslots *old_memslots = __kvm_memslots(kvm, as_id); |
878 | u64 gen = old_memslots->generation; | ||
877 | 879 | ||
878 | /* | 880 | WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS); |
879 | * Set the low bit in the generation, which disables SPTE caching | 881 | slots->generation = gen | KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS; |
880 | * until the end of synchronize_srcu_expedited. | ||
881 | */ | ||
882 | WARN_ON(old_memslots->generation & 1); | ||
883 | slots->generation = old_memslots->generation + 1; | ||
884 | 882 | ||
885 | rcu_assign_pointer(kvm->memslots[as_id], slots); | 883 | rcu_assign_pointer(kvm->memslots[as_id], slots); |
886 | synchronize_srcu_expedited(&kvm->srcu); | 884 | synchronize_srcu_expedited(&kvm->srcu); |
887 | 885 | ||
888 | /* | 886 | /* |
889 | * Increment the new memslot generation a second time. This prevents | 887 | * Increment the new memslot generation a second time, dropping the |
890 | * vm exits that race with memslot updates from caching a memslot | 888 | * update in-progress flag and incrementing then generation based on |
891 | * generation that will (potentially) be valid forever. | 889 | * the number of address spaces. This provides a unique and easily |
892 | * | 890 | * identifiable generation number while the memslots are in flux. |
891 | */ | ||
892 | gen = slots->generation & ~KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS; | ||
893 | |||
894 | /* | ||
893 | * Generations must be unique even across address spaces. We do not need | 895 | * Generations must be unique even across address spaces. We do not need |
894 | * a global counter for that, instead the generation space is evenly split | 896 | * a global counter for that, instead the generation space is evenly split |
895 | * across address spaces. For example, with two address spaces, address | 897 | * across address spaces. For example, with two address spaces, address |
896 | * space 0 will use generations 0, 4, 8, ... while * address space 1 will | 898 | * space 0 will use generations 0, 2, 4, ... while address space 1 will |
897 | * use generations 2, 6, 10, 14, ... | 899 | * use generations 1, 3, 5, ... |
898 | */ | 900 | */ |
899 | slots->generation += KVM_ADDRESS_SPACE_NUM * 2 - 1; | 901 | gen += KVM_ADDRESS_SPACE_NUM; |
902 | |||
903 | kvm_arch_memslots_updated(kvm, gen); | ||
900 | 904 | ||
901 | kvm_arch_memslots_updated(kvm, slots); | 905 | slots->generation = gen; |
902 | 906 | ||
903 | return old_memslots; | 907 | return old_memslots; |
904 | } | 908 | } |
@@ -1018,7 +1022,7 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
1018 | goto out_free; | 1022 | goto out_free; |
1019 | } | 1023 | } |
1020 | 1024 | ||
1021 | slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); | 1025 | slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL_ACCOUNT); |
1022 | if (!slots) | 1026 | if (!slots) |
1023 | goto out_free; | 1027 | goto out_free; |
1024 | memcpy(slots, __kvm_memslots(kvm, as_id), sizeof(struct kvm_memslots)); | 1028 | memcpy(slots, __kvm_memslots(kvm, as_id), sizeof(struct kvm_memslots)); |
@@ -1201,11 +1205,9 @@ int kvm_get_dirty_log_protect(struct kvm *kvm, | |||
1201 | mask = xchg(&dirty_bitmap[i], 0); | 1205 | mask = xchg(&dirty_bitmap[i], 0); |
1202 | dirty_bitmap_buffer[i] = mask; | 1206 | dirty_bitmap_buffer[i] = mask; |
1203 | 1207 | ||
1204 | if (mask) { | 1208 | offset = i * BITS_PER_LONG; |
1205 | offset = i * BITS_PER_LONG; | 1209 | kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, |
1206 | kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, | 1210 | offset, mask); |
1207 | offset, mask); | ||
1208 | } | ||
1209 | } | 1211 | } |
1210 | spin_unlock(&kvm->mmu_lock); | 1212 | spin_unlock(&kvm->mmu_lock); |
1211 | } | 1213 | } |
@@ -2185,20 +2187,23 @@ void kvm_sigset_deactivate(struct kvm_vcpu *vcpu) | |||
2185 | 2187 | ||
2186 | static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) | 2188 | static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) |
2187 | { | 2189 | { |
2188 | unsigned int old, val, grow; | 2190 | unsigned int old, val, grow, grow_start; |
2189 | 2191 | ||
2190 | old = val = vcpu->halt_poll_ns; | 2192 | old = val = vcpu->halt_poll_ns; |
2193 | grow_start = READ_ONCE(halt_poll_ns_grow_start); | ||
2191 | grow = READ_ONCE(halt_poll_ns_grow); | 2194 | grow = READ_ONCE(halt_poll_ns_grow); |
2192 | /* 10us base */ | 2195 | if (!grow) |
2193 | if (val == 0 && grow) | 2196 | goto out; |
2194 | val = 10000; | 2197 | |
2195 | else | 2198 | val *= grow; |
2196 | val *= grow; | 2199 | if (val < grow_start) |
2200 | val = grow_start; | ||
2197 | 2201 | ||
2198 | if (val > halt_poll_ns) | 2202 | if (val > halt_poll_ns) |
2199 | val = halt_poll_ns; | 2203 | val = halt_poll_ns; |
2200 | 2204 | ||
2201 | vcpu->halt_poll_ns = val; | 2205 | vcpu->halt_poll_ns = val; |
2206 | out: | ||
2202 | trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old); | 2207 | trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old); |
2203 | } | 2208 | } |
2204 | 2209 | ||
@@ -2683,7 +2688,7 @@ static long kvm_vcpu_ioctl(struct file *filp, | |||
2683 | struct kvm_regs *kvm_regs; | 2688 | struct kvm_regs *kvm_regs; |
2684 | 2689 | ||
2685 | r = -ENOMEM; | 2690 | r = -ENOMEM; |
2686 | kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); | 2691 | kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL_ACCOUNT); |
2687 | if (!kvm_regs) | 2692 | if (!kvm_regs) |
2688 | goto out; | 2693 | goto out; |
2689 | r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs); | 2694 | r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs); |
@@ -2711,7 +2716,8 @@ out_free1: | |||
2711 | break; | 2716 | break; |
2712 | } | 2717 | } |
2713 | case KVM_GET_SREGS: { | 2718 | case KVM_GET_SREGS: { |
2714 | kvm_sregs = kzalloc(sizeof(struct kvm_sregs), GFP_KERNEL); | 2719 | kvm_sregs = kzalloc(sizeof(struct kvm_sregs), |
2720 | GFP_KERNEL_ACCOUNT); | ||
2715 | r = -ENOMEM; | 2721 | r = -ENOMEM; |
2716 | if (!kvm_sregs) | 2722 | if (!kvm_sregs) |
2717 | goto out; | 2723 | goto out; |
@@ -2803,7 +2809,7 @@ out_free1: | |||
2803 | break; | 2809 | break; |
2804 | } | 2810 | } |
2805 | case KVM_GET_FPU: { | 2811 | case KVM_GET_FPU: { |
2806 | fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL); | 2812 | fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL_ACCOUNT); |
2807 | r = -ENOMEM; | 2813 | r = -ENOMEM; |
2808 | if (!fpu) | 2814 | if (!fpu) |
2809 | goto out; | 2815 | goto out; |
@@ -2980,7 +2986,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm, | |||
2980 | if (test) | 2986 | if (test) |
2981 | return 0; | 2987 | return 0; |
2982 | 2988 | ||
2983 | dev = kzalloc(sizeof(*dev), GFP_KERNEL); | 2989 | dev = kzalloc(sizeof(*dev), GFP_KERNEL_ACCOUNT); |
2984 | if (!dev) | 2990 | if (!dev) |
2985 | return -ENOMEM; | 2991 | return -ENOMEM; |
2986 | 2992 | ||
@@ -3625,6 +3631,7 @@ int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, | |||
3625 | r = __kvm_io_bus_write(vcpu, bus, &range, val); | 3631 | r = __kvm_io_bus_write(vcpu, bus, &range, val); |
3626 | return r < 0 ? r : 0; | 3632 | return r < 0 ? r : 0; |
3627 | } | 3633 | } |
3634 | EXPORT_SYMBOL_GPL(kvm_io_bus_write); | ||
3628 | 3635 | ||
3629 | /* kvm_io_bus_write_cookie - called under kvm->slots_lock */ | 3636 | /* kvm_io_bus_write_cookie - called under kvm->slots_lock */ |
3630 | int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, | 3637 | int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, |
@@ -3675,7 +3682,6 @@ static int __kvm_io_bus_read(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus, | |||
3675 | 3682 | ||
3676 | return -EOPNOTSUPP; | 3683 | return -EOPNOTSUPP; |
3677 | } | 3684 | } |
3678 | EXPORT_SYMBOL_GPL(kvm_io_bus_write); | ||
3679 | 3685 | ||
3680 | /* kvm_io_bus_read - called under kvm->slots_lock */ | 3686 | /* kvm_io_bus_read - called under kvm->slots_lock */ |
3681 | int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, | 3687 | int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, |
@@ -3697,7 +3703,6 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, | |||
3697 | return r < 0 ? r : 0; | 3703 | return r < 0 ? r : 0; |
3698 | } | 3704 | } |
3699 | 3705 | ||
3700 | |||
3701 | /* Caller must hold slots_lock. */ | 3706 | /* Caller must hold slots_lock. */ |
3702 | int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | 3707 | int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, |
3703 | int len, struct kvm_io_device *dev) | 3708 | int len, struct kvm_io_device *dev) |
@@ -3714,8 +3719,8 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | |||
3714 | if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1) | 3719 | if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1) |
3715 | return -ENOSPC; | 3720 | return -ENOSPC; |
3716 | 3721 | ||
3717 | new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count + 1) * | 3722 | new_bus = kmalloc(struct_size(bus, range, bus->dev_count + 1), |
3718 | sizeof(struct kvm_io_range)), GFP_KERNEL); | 3723 | GFP_KERNEL_ACCOUNT); |
3719 | if (!new_bus) | 3724 | if (!new_bus) |
3720 | return -ENOMEM; | 3725 | return -ENOMEM; |
3721 | 3726 | ||
@@ -3760,8 +3765,8 @@ void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, | |||
3760 | if (i == bus->dev_count) | 3765 | if (i == bus->dev_count) |
3761 | return; | 3766 | return; |
3762 | 3767 | ||
3763 | new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) * | 3768 | new_bus = kmalloc(struct_size(bus, range, bus->dev_count - 1), |
3764 | sizeof(struct kvm_io_range)), GFP_KERNEL); | 3769 | GFP_KERNEL_ACCOUNT); |
3765 | if (!new_bus) { | 3770 | if (!new_bus) { |
3766 | pr_err("kvm: failed to shrink bus, removing it completely\n"); | 3771 | pr_err("kvm: failed to shrink bus, removing it completely\n"); |
3767 | goto broken; | 3772 | goto broken; |
@@ -4029,7 +4034,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) | |||
4029 | active = kvm_active_vms; | 4034 | active = kvm_active_vms; |
4030 | spin_unlock(&kvm_lock); | 4035 | spin_unlock(&kvm_lock); |
4031 | 4036 | ||
4032 | env = kzalloc(sizeof(*env), GFP_KERNEL); | 4037 | env = kzalloc(sizeof(*env), GFP_KERNEL_ACCOUNT); |
4033 | if (!env) | 4038 | if (!env) |
4034 | return; | 4039 | return; |
4035 | 4040 | ||
@@ -4045,7 +4050,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) | |||
4045 | add_uevent_var(env, "PID=%d", kvm->userspace_pid); | 4050 | add_uevent_var(env, "PID=%d", kvm->userspace_pid); |
4046 | 4051 | ||
4047 | if (!IS_ERR_OR_NULL(kvm->debugfs_dentry)) { | 4052 | if (!IS_ERR_OR_NULL(kvm->debugfs_dentry)) { |
4048 | char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL); | 4053 | char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL_ACCOUNT); |
4049 | 4054 | ||
4050 | if (p) { | 4055 | if (p) { |
4051 | tmp = dentry_path_raw(kvm->debugfs_dentry, p, PATH_MAX); | 4056 | tmp = dentry_path_raw(kvm->debugfs_dentry, p, PATH_MAX); |
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index d99850c462a1..524cbd20379f 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c | |||
@@ -219,7 +219,7 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) | |||
219 | } | 219 | } |
220 | } | 220 | } |
221 | 221 | ||
222 | kvg = kzalloc(sizeof(*kvg), GFP_KERNEL); | 222 | kvg = kzalloc(sizeof(*kvg), GFP_KERNEL_ACCOUNT); |
223 | if (!kvg) { | 223 | if (!kvg) { |
224 | mutex_unlock(&kv->lock); | 224 | mutex_unlock(&kv->lock); |
225 | kvm_vfio_group_put_external_user(vfio_group); | 225 | kvm_vfio_group_put_external_user(vfio_group); |
@@ -405,7 +405,7 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type) | |||
405 | if (tmp->ops == &kvm_vfio_ops) | 405 | if (tmp->ops == &kvm_vfio_ops) |
406 | return -EBUSY; | 406 | return -EBUSY; |
407 | 407 | ||
408 | kv = kzalloc(sizeof(*kv), GFP_KERNEL); | 408 | kv = kzalloc(sizeof(*kv), GFP_KERNEL_ACCOUNT); |
409 | if (!kv) | 409 | if (!kv) |
410 | return -ENOMEM; | 410 | return -ENOMEM; |
411 | 411 | ||