aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-09-10 19:42:49 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-09-10 19:42:49 -0400
commit519f526d391b0ef775aeb04c4b6f632ea6b3ee50 (patch)
tree36985d7882734c136fc3c9a48e9d9abf9e97c1f1
parent06ab838c2024db468855118087db16d8fa905ddc (diff)
parentba60c41ae392b473a1897faa0b8739fcb8759d69 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull more kvm updates from Paolo Bonzini: "ARM: - Full debug support for arm64 - Active state switching for timer interrupts - Lazy FP/SIMD save/restore for arm64 - Generic ARMv8 target PPC: - Book3S: A few bug fixes - Book3S: Allow micro-threading on POWER8 x86: - Compiler warnings Generic: - Adaptive polling for guest halt" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (49 commits) kvm: irqchip: fix memory leak kvm: move new trace event outside #ifdef CONFIG_KVM_ASYNC_PF KVM: trace kvm_halt_poll_ns grow/shrink KVM: dynamic halt-polling KVM: make halt_poll_ns per-vCPU Silence compiler warning in arch/x86/kvm/emulate.c kvm: compile process_smi_save_seg_64() only for x86_64 KVM: x86: avoid uninitialized variable warning KVM: PPC: Book3S: Fix typo in top comment about locking KVM: PPC: Book3S: Fix size of the PSPB register KVM: PPC: Book3S HV: Exit on H_DOORBELL if HOST_IPI is set KVM: PPC: Book3S HV: Fix race in starting secondary threads KVM: PPC: Book3S: correct width in XER handling KVM: PPC: Book3S HV: Fix preempted vcore stolen time calculation KVM: PPC: Book3S HV: Fix preempted vcore list locking KVM: PPC: Book3S HV: Implement H_CLEAR_REF and H_CLEAR_MOD KVM: PPC: Book3S HV: Fix bug in dirty page tracking KVM: PPC: Book3S HV: Fix race in reading change bit when removing HPTE KVM: PPC: Book3S HV: Implement dynamic micro-threading on POWER8 KVM: PPC: Book3S HV: Make use of unused threads when running guests ...
-rw-r--r--Documentation/virtual/kvm/api.txt15
-rw-r--r--arch/arm/include/asm/kvm_host.h5
-rw-r--r--arch/arm/kvm/arm.c36
-rw-r--r--arch/arm/kvm/guest.c6
-rw-r--r--arch/arm/kvm/interrupts.S14
-rw-r--r--arch/arm/kvm/reset.c4
-rw-r--r--arch/arm64/include/asm/hw_breakpoint.h14
-rw-r--r--arch/arm64/include/asm/kvm_arm.h5
-rw-r--r--arch/arm64/include/asm/kvm_asm.h26
-rw-r--r--arch/arm64/include/asm/kvm_host.h42
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h37
-rw-r--r--arch/arm64/kernel/asm-offsets.c9
-rw-r--r--arch/arm64/kernel/hw_breakpoint.c12
-rw-r--r--arch/arm64/kvm/Makefile2
-rw-r--r--arch/arm64/kvm/debug.c217
-rw-r--r--arch/arm64/kvm/guest.c43
-rw-r--r--arch/arm64/kvm/handle_exit.c44
-rw-r--r--arch/arm64/kvm/hyp.S617
-rw-r--r--arch/arm64/kvm/reset.c20
-rw-r--r--arch/arm64/kvm/sys_regs.c291
-rw-r--r--arch/arm64/kvm/sys_regs.h6
-rw-r--r--arch/arm64/kvm/sys_regs_generic_v8.c2
-rw-r--r--arch/arm64/kvm/trace.h123
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h5
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_asm.h22
-rw-r--r--arch/powerpc/include/asm/kvm_booke.h4
-rw-r--r--arch/powerpc/include/asm/kvm_host.h26
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h2
-rw-r--r--arch/powerpc/kernel/asm-offsets.c9
-rw-r--r--arch/powerpc/kvm/Kconfig8
-rw-r--r--arch/powerpc/kvm/book3s.c3
-rw-r--r--arch/powerpc/kvm/book3s_32_mmu_host.c1
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_host.c1
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c8
-rw-r--r--arch/powerpc/kvm/book3s_emulate.c1
-rw-r--r--arch/powerpc/kvm/book3s_hv.c664
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c32
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c161
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S137
-rw-r--r--arch/powerpc/kvm/book3s_paired_singles.c2
-rw-r--r--arch/powerpc/kvm/book3s_segment.S4
-rw-r--r--arch/powerpc/kvm/book3s_xics.c2
-rw-r--r--arch/powerpc/kvm/booke.c1
-rw-r--r--arch/powerpc/kvm/e500_mmu.c2
-rw-r--r--arch/powerpc/kvm/powerpc.c2
-rw-r--r--arch/x86/kvm/emulate.c2
-rw-r--r--arch/x86/kvm/mmu.c7
-rw-r--r--arch/x86/kvm/x86.c2
-rw-r--r--include/kvm/arm_arch_timer.h7
-rw-r--r--include/kvm/arm_vgic.h39
-rw-r--r--include/linux/irqchip/arm-gic-v3.h3
-rw-r--r--include/linux/irqchip/arm-gic.h3
-rw-r--r--include/linux/kvm_host.h1
-rw-r--r--include/trace/events/kvm.h30
-rw-r--r--include/uapi/linux/kvm.h5
-rw-r--r--virt/kvm/arm/arch_timer.c29
-rw-r--r--virt/kvm/arm/vgic-v2.c16
-rw-r--r--virt/kvm/arm/vgic-v3.c21
-rw-r--r--virt/kvm/arm/vgic.c427
-rw-r--r--virt/kvm/irqchip.c8
-rw-r--r--virt/kvm/kvm_main.c62
62 files changed, 2653 insertions, 700 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index a4ebcb712375..d9ecceea5a02 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2671,7 +2671,7 @@ handled.
26714.87 KVM_SET_GUEST_DEBUG 26714.87 KVM_SET_GUEST_DEBUG
2672 2672
2673Capability: KVM_CAP_SET_GUEST_DEBUG 2673Capability: KVM_CAP_SET_GUEST_DEBUG
2674Architectures: x86, s390, ppc 2674Architectures: x86, s390, ppc, arm64
2675Type: vcpu ioctl 2675Type: vcpu ioctl
2676Parameters: struct kvm_guest_debug (in) 2676Parameters: struct kvm_guest_debug (in)
2677Returns: 0 on success; -1 on error 2677Returns: 0 on success; -1 on error
@@ -2693,8 +2693,8 @@ when running. Common control bits are:
2693The top 16 bits of the control field are architecture specific control 2693The top 16 bits of the control field are architecture specific control
2694flags which can include the following: 2694flags which can include the following:
2695 2695
2696 - KVM_GUESTDBG_USE_SW_BP: using software breakpoints [x86] 2696 - KVM_GUESTDBG_USE_SW_BP: using software breakpoints [x86, arm64]
2697 - KVM_GUESTDBG_USE_HW_BP: using hardware breakpoints [x86, s390] 2697 - KVM_GUESTDBG_USE_HW_BP: using hardware breakpoints [x86, s390, arm64]
2698 - KVM_GUESTDBG_INJECT_DB: inject DB type exception [x86] 2698 - KVM_GUESTDBG_INJECT_DB: inject DB type exception [x86]
2699 - KVM_GUESTDBG_INJECT_BP: inject BP type exception [x86] 2699 - KVM_GUESTDBG_INJECT_BP: inject BP type exception [x86]
2700 - KVM_GUESTDBG_EXIT_PENDING: trigger an immediate guest exit [s390] 2700 - KVM_GUESTDBG_EXIT_PENDING: trigger an immediate guest exit [s390]
@@ -2709,6 +2709,11 @@ updated to the correct (supplied) values.
2709The second part of the structure is architecture specific and 2709The second part of the structure is architecture specific and
2710typically contains a set of debug registers. 2710typically contains a set of debug registers.
2711 2711
2712For arm64 the number of debug registers is implementation defined and
2713can be determined by querying the KVM_CAP_GUEST_DEBUG_HW_BPS and
2714KVM_CAP_GUEST_DEBUG_HW_WPS capabilities which return a positive number
2715indicating the number of supported registers.
2716
2712When debug events exit the main run loop with the reason 2717When debug events exit the main run loop with the reason
2713KVM_EXIT_DEBUG with the kvm_debug_exit_arch part of the kvm_run 2718KVM_EXIT_DEBUG with the kvm_debug_exit_arch part of the kvm_run
2714structure containing architecture specific debug information. 2719structure containing architecture specific debug information.
@@ -3111,11 +3116,13 @@ data_offset describes where the data is located (KVM_EXIT_IO_OUT) or
3111where kvm expects application code to place the data for the next 3116where kvm expects application code to place the data for the next
3112KVM_RUN invocation (KVM_EXIT_IO_IN). Data format is a packed array. 3117KVM_RUN invocation (KVM_EXIT_IO_IN). Data format is a packed array.
3113 3118
3119 /* KVM_EXIT_DEBUG */
3114 struct { 3120 struct {
3115 struct kvm_debug_exit_arch arch; 3121 struct kvm_debug_exit_arch arch;
3116 } debug; 3122 } debug;
3117 3123
3118Unused. 3124If the exit_reason is KVM_EXIT_DEBUG, then a vcpu is processing a debug event
3125for which architecture specific information is returned.
3119 3126
3120 /* KVM_EXIT_MMIO */ 3127 /* KVM_EXIT_MMIO */
3121 struct { 3128 struct {
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index e896d2c196e6..dcba0fa5176e 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -231,4 +231,9 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) {}
231static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} 231static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
232static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} 232static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
233 233
234static inline void kvm_arm_init_debug(void) {}
235static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
236static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {}
237static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {}
238
234#endif /* __ARM_KVM_HOST_H__ */ 239#endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index bc738d2b8392..ce404a5c3062 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -125,6 +125,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
125 if (ret) 125 if (ret)
126 goto out_free_stage2_pgd; 126 goto out_free_stage2_pgd;
127 127
128 kvm_vgic_early_init(kvm);
128 kvm_timer_init(kvm); 129 kvm_timer_init(kvm);
129 130
130 /* Mark the initial VMID generation invalid */ 131 /* Mark the initial VMID generation invalid */
@@ -249,6 +250,7 @@ out:
249 250
250void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) 251void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
251{ 252{
253 kvm_vgic_vcpu_early_init(vcpu);
252} 254}
253 255
254void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) 256void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
@@ -278,6 +280,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
278 /* Set up the timer */ 280 /* Set up the timer */
279 kvm_timer_vcpu_init(vcpu); 281 kvm_timer_vcpu_init(vcpu);
280 282
283 kvm_arm_reset_debug_ptr(vcpu);
284
281 return 0; 285 return 0;
282} 286}
283 287
@@ -301,13 +305,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
301 kvm_arm_set_running_vcpu(NULL); 305 kvm_arm_set_running_vcpu(NULL);
302} 306}
303 307
304int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
305 struct kvm_guest_debug *dbg)
306{
307 return -EINVAL;
308}
309
310
311int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 308int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
312 struct kvm_mp_state *mp_state) 309 struct kvm_mp_state *mp_state)
313{ 310{
@@ -528,10 +525,20 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
528 if (vcpu->arch.pause) 525 if (vcpu->arch.pause)
529 vcpu_pause(vcpu); 526 vcpu_pause(vcpu);
530 527
531 kvm_vgic_flush_hwstate(vcpu); 528 /*
529 * Disarming the background timer must be done in a
530 * preemptible context, as this call may sleep.
531 */
532 kvm_timer_flush_hwstate(vcpu); 532 kvm_timer_flush_hwstate(vcpu);
533 533
534 /*
535 * Preparing the interrupts to be injected also
536 * involves poking the GIC, which must be done in a
537 * non-preemptible context.
538 */
534 preempt_disable(); 539 preempt_disable();
540 kvm_vgic_flush_hwstate(vcpu);
541
535 local_irq_disable(); 542 local_irq_disable();
536 543
537 /* 544 /*
@@ -544,12 +551,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
544 551
545 if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) { 552 if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) {
546 local_irq_enable(); 553 local_irq_enable();
554 kvm_vgic_sync_hwstate(vcpu);
547 preempt_enable(); 555 preempt_enable();
548 kvm_timer_sync_hwstate(vcpu); 556 kvm_timer_sync_hwstate(vcpu);
549 kvm_vgic_sync_hwstate(vcpu);
550 continue; 557 continue;
551 } 558 }
552 559
560 kvm_arm_setup_debug(vcpu);
561
553 /************************************************************** 562 /**************************************************************
554 * Enter the guest 563 * Enter the guest
555 */ 564 */
@@ -564,6 +573,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
564 * Back from guest 573 * Back from guest
565 *************************************************************/ 574 *************************************************************/
566 575
576 kvm_arm_clear_debug(vcpu);
577
567 /* 578 /*
568 * We may have taken a host interrupt in HYP mode (ie 579 * We may have taken a host interrupt in HYP mode (ie
569 * while executing the guest). This interrupt is still 580 * while executing the guest). This interrupt is still
@@ -586,11 +597,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
586 */ 597 */
587 kvm_guest_exit(); 598 kvm_guest_exit();
588 trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); 599 trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
589 preempt_enable();
590 600
601 kvm_vgic_sync_hwstate(vcpu);
602
603 preempt_enable();
591 604
592 kvm_timer_sync_hwstate(vcpu); 605 kvm_timer_sync_hwstate(vcpu);
593 kvm_vgic_sync_hwstate(vcpu);
594 606
595 ret = handle_exit(vcpu, run, ret); 607 ret = handle_exit(vcpu, run, ret);
596 } 608 }
@@ -921,6 +933,8 @@ static void cpu_init_hyp_mode(void *dummy)
921 vector_ptr = (unsigned long)__kvm_hyp_vector; 933 vector_ptr = (unsigned long)__kvm_hyp_vector;
922 934
923 __cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr); 935 __cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr);
936
937 kvm_arm_init_debug();
924} 938}
925 939
926static int hyp_init_cpu_notify(struct notifier_block *self, 940static int hyp_init_cpu_notify(struct notifier_block *self,
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index d503fbb787d3..96e935bbc38c 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -290,3 +290,9 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
290{ 290{
291 return -EINVAL; 291 return -EINVAL;
292} 292}
293
294int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
295 struct kvm_guest_debug *dbg)
296{
297 return -EINVAL;
298}
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index 568494dbbbb5..900ef6dd8f72 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -361,10 +361,6 @@ hyp_hvc:
361 @ Check syndrome register 361 @ Check syndrome register
362 mrc p15, 4, r1, c5, c2, 0 @ HSR 362 mrc p15, 4, r1, c5, c2, 0 @ HSR
363 lsr r0, r1, #HSR_EC_SHIFT 363 lsr r0, r1, #HSR_EC_SHIFT
364#ifdef CONFIG_VFPv3
365 cmp r0, #HSR_EC_CP_0_13
366 beq switch_to_guest_vfp
367#endif
368 cmp r0, #HSR_EC_HVC 364 cmp r0, #HSR_EC_HVC
369 bne guest_trap @ Not HVC instr. 365 bne guest_trap @ Not HVC instr.
370 366
@@ -378,7 +374,10 @@ hyp_hvc:
378 cmp r2, #0 374 cmp r2, #0
379 bne guest_trap @ Guest called HVC 375 bne guest_trap @ Guest called HVC
380 376
381host_switch_to_hyp: 377 /*
378 * Getting here means host called HVC, we shift parameters and branch
379 * to Hyp function.
380 */
382 pop {r0, r1, r2} 381 pop {r0, r1, r2}
383 382
384 /* Check for __hyp_get_vectors */ 383 /* Check for __hyp_get_vectors */
@@ -409,6 +408,10 @@ guest_trap:
409 408
410 @ Check if we need the fault information 409 @ Check if we need the fault information
411 lsr r1, r1, #HSR_EC_SHIFT 410 lsr r1, r1, #HSR_EC_SHIFT
411#ifdef CONFIG_VFPv3
412 cmp r1, #HSR_EC_CP_0_13
413 beq switch_to_guest_vfp
414#endif
412 cmp r1, #HSR_EC_IABT 415 cmp r1, #HSR_EC_IABT
413 mrceq p15, 4, r2, c6, c0, 2 @ HIFAR 416 mrceq p15, 4, r2, c6, c0, 2 @ HIFAR
414 beq 2f 417 beq 2f
@@ -477,7 +480,6 @@ guest_trap:
477 */ 480 */
478#ifdef CONFIG_VFPv3 481#ifdef CONFIG_VFPv3
479switch_to_guest_vfp: 482switch_to_guest_vfp:
480 load_vcpu @ Load VCPU pointer to r0
481 push {r3-r7} 483 push {r3-r7}
482 484
483 @ NEON/VFP used. Turn on VFP access. 485 @ NEON/VFP used. Turn on VFP access.
diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c
index f558c073c023..eeb85858d6bb 100644
--- a/arch/arm/kvm/reset.c
+++ b/arch/arm/kvm/reset.c
@@ -77,7 +77,5 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
77 kvm_reset_coprocs(vcpu); 77 kvm_reset_coprocs(vcpu);
78 78
79 /* Reset arch_timer context */ 79 /* Reset arch_timer context */
80 kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq); 80 return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
81
82 return 0;
83} 81}
diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h
index 52b484b6aa1a..4c47cb2fbb52 100644
--- a/arch/arm64/include/asm/hw_breakpoint.h
+++ b/arch/arm64/include/asm/hw_breakpoint.h
@@ -16,6 +16,8 @@
16#ifndef __ASM_HW_BREAKPOINT_H 16#ifndef __ASM_HW_BREAKPOINT_H
17#define __ASM_HW_BREAKPOINT_H 17#define __ASM_HW_BREAKPOINT_H
18 18
19#include <asm/cputype.h>
20
19#ifdef __KERNEL__ 21#ifdef __KERNEL__
20 22
21struct arch_hw_breakpoint_ctrl { 23struct arch_hw_breakpoint_ctrl {
@@ -132,5 +134,17 @@ static inline void ptrace_hw_copy_thread(struct task_struct *task)
132 134
133extern struct pmu perf_ops_bp; 135extern struct pmu perf_ops_bp;
134 136
137/* Determine number of BRP registers available. */
138static inline int get_num_brps(void)
139{
140 return ((read_cpuid(ID_AA64DFR0_EL1) >> 12) & 0xf) + 1;
141}
142
143/* Determine number of WRP registers available. */
144static inline int get_num_wrps(void)
145{
146 return ((read_cpuid(ID_AA64DFR0_EL1) >> 20) & 0xf) + 1;
147}
148
135#endif /* __KERNEL__ */ 149#endif /* __KERNEL__ */
136#endif /* __ASM_BREAKPOINT_H */ 150#endif /* __ASM_BREAKPOINT_H */
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index ac6fafb95fe7..7605e095217f 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -171,10 +171,13 @@
171#define HSTR_EL2_TTEE (1 << 16) 171#define HSTR_EL2_TTEE (1 << 16)
172#define HSTR_EL2_T(x) (1 << x) 172#define HSTR_EL2_T(x) (1 << x)
173 173
174/* Hyp Coproccessor Trap Register Shifts */
175#define CPTR_EL2_TFP_SHIFT 10
176
174/* Hyp Coprocessor Trap Register */ 177/* Hyp Coprocessor Trap Register */
175#define CPTR_EL2_TCPAC (1 << 31) 178#define CPTR_EL2_TCPAC (1 << 31)
176#define CPTR_EL2_TTA (1 << 20) 179#define CPTR_EL2_TTA (1 << 20)
177#define CPTR_EL2_TFP (1 << 10) 180#define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT)
178 181
179/* Hyp Debug Configuration Register bits */ 182/* Hyp Debug Configuration Register bits */
180#define MDCR_EL2_TDRA (1 << 11) 183#define MDCR_EL2_TDRA (1 << 11)
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 3c5fe685a2d6..67fa0de3d483 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -46,24 +46,16 @@
46#define CNTKCTL_EL1 20 /* Timer Control Register (EL1) */ 46#define CNTKCTL_EL1 20 /* Timer Control Register (EL1) */
47#define PAR_EL1 21 /* Physical Address Register */ 47#define PAR_EL1 21 /* Physical Address Register */
48#define MDSCR_EL1 22 /* Monitor Debug System Control Register */ 48#define MDSCR_EL1 22 /* Monitor Debug System Control Register */
49#define DBGBCR0_EL1 23 /* Debug Breakpoint Control Registers (0-15) */ 49#define MDCCINT_EL1 23 /* Monitor Debug Comms Channel Interrupt Enable Reg */
50#define DBGBCR15_EL1 38
51#define DBGBVR0_EL1 39 /* Debug Breakpoint Value Registers (0-15) */
52#define DBGBVR15_EL1 54
53#define DBGWCR0_EL1 55 /* Debug Watchpoint Control Registers (0-15) */
54#define DBGWCR15_EL1 70
55#define DBGWVR0_EL1 71 /* Debug Watchpoint Value Registers (0-15) */
56#define DBGWVR15_EL1 86
57#define MDCCINT_EL1 87 /* Monitor Debug Comms Channel Interrupt Enable Reg */
58 50
59/* 32bit specific registers. Keep them at the end of the range */ 51/* 32bit specific registers. Keep them at the end of the range */
60#define DACR32_EL2 88 /* Domain Access Control Register */ 52#define DACR32_EL2 24 /* Domain Access Control Register */
61#define IFSR32_EL2 89 /* Instruction Fault Status Register */ 53#define IFSR32_EL2 25 /* Instruction Fault Status Register */
62#define FPEXC32_EL2 90 /* Floating-Point Exception Control Register */ 54#define FPEXC32_EL2 26 /* Floating-Point Exception Control Register */
63#define DBGVCR32_EL2 91 /* Debug Vector Catch Register */ 55#define DBGVCR32_EL2 27 /* Debug Vector Catch Register */
64#define TEECR32_EL1 92 /* ThumbEE Configuration Register */ 56#define TEECR32_EL1 28 /* ThumbEE Configuration Register */
65#define TEEHBR32_EL1 93 /* ThumbEE Handler Base Register */ 57#define TEEHBR32_EL1 29 /* ThumbEE Handler Base Register */
66#define NR_SYS_REGS 94 58#define NR_SYS_REGS 30
67 59
68/* 32bit mapping */ 60/* 32bit mapping */
69#define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */ 61#define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */
@@ -132,6 +124,8 @@ extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
132 124
133extern u64 __vgic_v3_get_ich_vtr_el2(void); 125extern u64 __vgic_v3_get_ich_vtr_el2(void);
134 126
127extern u32 __kvm_get_mdcr_el2(void);
128
135#endif 129#endif
136 130
137#endif /* __ARM_KVM_ASM_H__ */ 131#endif /* __ARM_KVM_ASM_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 2709db2a7eac..415938dc45cf 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -103,15 +103,34 @@ struct kvm_vcpu_arch {
103 103
104 /* HYP configuration */ 104 /* HYP configuration */
105 u64 hcr_el2; 105 u64 hcr_el2;
106 u32 mdcr_el2;
106 107
107 /* Exception Information */ 108 /* Exception Information */
108 struct kvm_vcpu_fault_info fault; 109 struct kvm_vcpu_fault_info fault;
109 110
110 /* Debug state */ 111 /* Guest debug state */
111 u64 debug_flags; 112 u64 debug_flags;
112 113
114 /*
115 * We maintain more than a single set of debug registers to support
116 * debugging the guest from the host and to maintain separate host and
117 * guest state during world switches. vcpu_debug_state are the debug
118 * registers of the vcpu as the guest sees them. host_debug_state are
119 * the host registers which are saved and restored during
120 * world switches. external_debug_state contains the debug
121 * values we want to debug the guest. This is set via the
122 * KVM_SET_GUEST_DEBUG ioctl.
123 *
124 * debug_ptr points to the set of debug registers that should be loaded
125 * onto the hardware when running the guest.
126 */
127 struct kvm_guest_debug_arch *debug_ptr;
128 struct kvm_guest_debug_arch vcpu_debug_state;
129 struct kvm_guest_debug_arch external_debug_state;
130
113 /* Pointer to host CPU context */ 131 /* Pointer to host CPU context */
114 kvm_cpu_context_t *host_cpu_context; 132 kvm_cpu_context_t *host_cpu_context;
133 struct kvm_guest_debug_arch host_debug_state;
115 134
116 /* VGIC state */ 135 /* VGIC state */
117 struct vgic_cpu vgic_cpu; 136 struct vgic_cpu vgic_cpu;
@@ -122,6 +141,17 @@ struct kvm_vcpu_arch {
122 * here. 141 * here.
123 */ 142 */
124 143
144 /*
145 * Guest registers we preserve during guest debugging.
146 *
147 * These shadow registers are updated by the kvm_handle_sys_reg
148 * trap handler if the guest accesses or updates them while we
149 * are using guest debug.
150 */
151 struct {
152 u32 mdscr_el1;
153 } guest_debug_preserved;
154
125 /* Don't run the guest */ 155 /* Don't run the guest */
126 bool pause; 156 bool pause;
127 157
@@ -216,15 +246,15 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
216 hyp_stack_ptr, vector_ptr); 246 hyp_stack_ptr, vector_ptr);
217} 247}
218 248
219struct vgic_sr_vectors {
220 void *save_vgic;
221 void *restore_vgic;
222};
223
224static inline void kvm_arch_hardware_disable(void) {} 249static inline void kvm_arch_hardware_disable(void) {}
225static inline void kvm_arch_hardware_unsetup(void) {} 250static inline void kvm_arch_hardware_unsetup(void) {}
226static inline void kvm_arch_sync_events(struct kvm *kvm) {} 251static inline void kvm_arch_sync_events(struct kvm *kvm) {}
227static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} 252static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
228static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} 253static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
229 254
255void kvm_arm_init_debug(void);
256void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
257void kvm_arm_clear_debug(struct kvm_vcpu *vcpu);
258void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu);
259
230#endif /* __ARM64_KVM_HOST_H__ */ 260#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index d26832022127..0cd7b5947dfc 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -53,14 +53,20 @@ struct kvm_regs {
53 struct user_fpsimd_state fp_regs; 53 struct user_fpsimd_state fp_regs;
54}; 54};
55 55
56/* Supported Processor Types */ 56/*
57 * Supported CPU Targets - Adding a new target type is not recommended,
58 * unless there are some special registers not supported by the
59 * genericv8 syreg table.
60 */
57#define KVM_ARM_TARGET_AEM_V8 0 61#define KVM_ARM_TARGET_AEM_V8 0
58#define KVM_ARM_TARGET_FOUNDATION_V8 1 62#define KVM_ARM_TARGET_FOUNDATION_V8 1
59#define KVM_ARM_TARGET_CORTEX_A57 2 63#define KVM_ARM_TARGET_CORTEX_A57 2
60#define KVM_ARM_TARGET_XGENE_POTENZA 3 64#define KVM_ARM_TARGET_XGENE_POTENZA 3
61#define KVM_ARM_TARGET_CORTEX_A53 4 65#define KVM_ARM_TARGET_CORTEX_A53 4
66/* Generic ARM v8 target */
67#define KVM_ARM_TARGET_GENERIC_V8 5
62 68
63#define KVM_ARM_NUM_TARGETS 5 69#define KVM_ARM_NUM_TARGETS 6
64 70
65/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ 71/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
66#define KVM_ARM_DEVICE_TYPE_SHIFT 0 72#define KVM_ARM_DEVICE_TYPE_SHIFT 0
@@ -100,12 +106,39 @@ struct kvm_sregs {
100struct kvm_fpu { 106struct kvm_fpu {
101}; 107};
102 108
109/*
110 * See v8 ARM ARM D7.3: Debug Registers
111 *
112 * The architectural limit is 16 debug registers of each type although
113 * in practice there are usually less (see ID_AA64DFR0_EL1).
114 *
115 * Although the control registers are architecturally defined as 32
116 * bits wide we use a 64 bit structure here to keep parity with
117 * KVM_GET/SET_ONE_REG behaviour which treats all system registers as
118 * 64 bit values. It also allows for the possibility of the
119 * architecture expanding the control registers without having to
120 * change the userspace ABI.
121 */
122#define KVM_ARM_MAX_DBG_REGS 16
103struct kvm_guest_debug_arch { 123struct kvm_guest_debug_arch {
124 __u64 dbg_bcr[KVM_ARM_MAX_DBG_REGS];
125 __u64 dbg_bvr[KVM_ARM_MAX_DBG_REGS];
126 __u64 dbg_wcr[KVM_ARM_MAX_DBG_REGS];
127 __u64 dbg_wvr[KVM_ARM_MAX_DBG_REGS];
104}; 128};
105 129
106struct kvm_debug_exit_arch { 130struct kvm_debug_exit_arch {
131 __u32 hsr;
132 __u64 far; /* used for watchpoints */
107}; 133};
108 134
135/*
136 * Architecture specific defines for kvm_guest_debug->control
137 */
138
139#define KVM_GUESTDBG_USE_SW_BP (1 << 16)
140#define KVM_GUESTDBG_USE_HW (1 << 17)
141
109struct kvm_sync_regs { 142struct kvm_sync_regs {
110}; 143};
111 144
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index c99701a34d7b..8d89cf8dae55 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -116,17 +116,22 @@ int main(void)
116 DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2)); 116 DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2));
117 DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2)); 117 DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2));
118 DEFINE(VCPU_DEBUG_FLAGS, offsetof(struct kvm_vcpu, arch.debug_flags)); 118 DEFINE(VCPU_DEBUG_FLAGS, offsetof(struct kvm_vcpu, arch.debug_flags));
119 DEFINE(VCPU_DEBUG_PTR, offsetof(struct kvm_vcpu, arch.debug_ptr));
120 DEFINE(DEBUG_BCR, offsetof(struct kvm_guest_debug_arch, dbg_bcr));
121 DEFINE(DEBUG_BVR, offsetof(struct kvm_guest_debug_arch, dbg_bvr));
122 DEFINE(DEBUG_WCR, offsetof(struct kvm_guest_debug_arch, dbg_wcr));
123 DEFINE(DEBUG_WVR, offsetof(struct kvm_guest_debug_arch, dbg_wvr));
119 DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2)); 124 DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2));
125 DEFINE(VCPU_MDCR_EL2, offsetof(struct kvm_vcpu, arch.mdcr_el2));
120 DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines)); 126 DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines));
121 DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); 127 DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context));
128 DEFINE(VCPU_HOST_DEBUG_STATE, offsetof(struct kvm_vcpu, arch.host_debug_state));
122 DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl)); 129 DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl));
123 DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval)); 130 DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval));
124 DEFINE(KVM_TIMER_CNTVOFF, offsetof(struct kvm, arch.timer.cntvoff)); 131 DEFINE(KVM_TIMER_CNTVOFF, offsetof(struct kvm, arch.timer.cntvoff));
125 DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled)); 132 DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled));
126 DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); 133 DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm));
127 DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu)); 134 DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu));
128 DEFINE(VGIC_SAVE_FN, offsetof(struct vgic_sr_vectors, save_vgic));
129 DEFINE(VGIC_RESTORE_FN, offsetof(struct vgic_sr_vectors, restore_vgic));
130 DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr)); 135 DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr));
131 DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr)); 136 DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr));
132 DEFINE(VGIC_V2_CPU_MISR, offsetof(struct vgic_cpu, vgic_v2.vgic_misr)); 137 DEFINE(VGIC_V2_CPU_MISR, offsetof(struct vgic_cpu, vgic_v2.vgic_misr));
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 003bc3d50636..c97040ecf838 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -48,18 +48,6 @@ static DEFINE_PER_CPU(int, stepping_kernel_bp);
48static int core_num_brps; 48static int core_num_brps;
49static int core_num_wrps; 49static int core_num_wrps;
50 50
51/* Determine number of BRP registers available. */
52static int get_num_brps(void)
53{
54 return ((read_cpuid(ID_AA64DFR0_EL1) >> 12) & 0xf) + 1;
55}
56
57/* Determine number of WRP registers available. */
58static int get_num_wrps(void)
59{
60 return ((read_cpuid(ID_AA64DFR0_EL1) >> 20) & 0xf) + 1;
61}
62
63int hw_breakpoint_slots(int type) 51int hw_breakpoint_slots(int type)
64{ 52{
65 /* 53 /*
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index f90f4aa7f88d..1949fe5f5424 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -17,7 +17,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o
17 17
18kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o 18kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o
19kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o 19kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
20kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o 20kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o
21 21
22kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic.o 22kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic.o
23kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2.o 23kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2.o
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
new file mode 100644
index 000000000000..47e5f0feaee8
--- /dev/null
+++ b/arch/arm64/kvm/debug.c
@@ -0,0 +1,217 @@
1/*
2 * Debug and Guest Debug support
3 *
4 * Copyright (C) 2015 - Linaro Ltd
5 * Author: Alex Bennée <alex.bennee@linaro.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include <linux/kvm_host.h>
21#include <linux/hw_breakpoint.h>
22
23#include <asm/debug-monitors.h>
24#include <asm/kvm_asm.h>
25#include <asm/kvm_arm.h>
26#include <asm/kvm_emulate.h>
27
28#include "trace.h"
29
30/* These are the bits of MDSCR_EL1 we may manipulate */
31#define MDSCR_EL1_DEBUG_MASK (DBG_MDSCR_SS | \
32 DBG_MDSCR_KDE | \
33 DBG_MDSCR_MDE)
34
35static DEFINE_PER_CPU(u32, mdcr_el2);
36
37/**
38 * save/restore_guest_debug_regs
39 *
40 * For some debug operations we need to tweak some guest registers. As
41 * a result we need to save the state of those registers before we
42 * make those modifications.
43 *
44 * Guest access to MDSCR_EL1 is trapped by the hypervisor and handled
45 * after we have restored the preserved value to the main context.
46 */
47static void save_guest_debug_regs(struct kvm_vcpu *vcpu)
48{
49 vcpu->arch.guest_debug_preserved.mdscr_el1 = vcpu_sys_reg(vcpu, MDSCR_EL1);
50
51 trace_kvm_arm_set_dreg32("Saved MDSCR_EL1",
52 vcpu->arch.guest_debug_preserved.mdscr_el1);
53}
54
55static void restore_guest_debug_regs(struct kvm_vcpu *vcpu)
56{
57 vcpu_sys_reg(vcpu, MDSCR_EL1) = vcpu->arch.guest_debug_preserved.mdscr_el1;
58
59 trace_kvm_arm_set_dreg32("Restored MDSCR_EL1",
60 vcpu_sys_reg(vcpu, MDSCR_EL1));
61}
62
63/**
64 * kvm_arm_init_debug - grab what we need for debug
65 *
66 * Currently the sole task of this function is to retrieve the initial
67 * value of mdcr_el2 so we can preserve MDCR_EL2.HPMN which has
68 * presumably been set-up by some knowledgeable bootcode.
69 *
70 * It is called once per-cpu during CPU hyp initialisation.
71 */
72
73void kvm_arm_init_debug(void)
74{
75 __this_cpu_write(mdcr_el2, kvm_call_hyp(__kvm_get_mdcr_el2));
76}
77
78/**
79 * kvm_arm_reset_debug_ptr - reset the debug ptr to point to the vcpu state
80 */
81
82void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu)
83{
84 vcpu->arch.debug_ptr = &vcpu->arch.vcpu_debug_state;
85}
86
87/**
88 * kvm_arm_setup_debug - set up debug related stuff
89 *
90 * @vcpu: the vcpu pointer
91 *
92 * This is called before each entry into the hypervisor to setup any
93 * debug related registers. Currently this just ensures we will trap
94 * access to:
95 * - Performance monitors (MDCR_EL2_TPM/MDCR_EL2_TPMCR)
96 * - Debug ROM Address (MDCR_EL2_TDRA)
97 * - OS related registers (MDCR_EL2_TDOSA)
98 *
99 * Additionally, KVM only traps guest accesses to the debug registers if
100 * the guest is not actively using them (see the KVM_ARM64_DEBUG_DIRTY
101 * flag on vcpu->arch.debug_flags). Since the guest must not interfere
102 * with the hardware state when debugging the guest, we must ensure that
103 * trapping is enabled whenever we are debugging the guest using the
104 * debug registers.
105 */
106
107void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
108{
109 bool trap_debug = !(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY);
110
111 trace_kvm_arm_setup_debug(vcpu, vcpu->guest_debug);
112
113 vcpu->arch.mdcr_el2 = __this_cpu_read(mdcr_el2) & MDCR_EL2_HPMN_MASK;
114 vcpu->arch.mdcr_el2 |= (MDCR_EL2_TPM |
115 MDCR_EL2_TPMCR |
116 MDCR_EL2_TDRA |
117 MDCR_EL2_TDOSA);
118
119 /* Is Guest debugging in effect? */
120 if (vcpu->guest_debug) {
121 /* Route all software debug exceptions to EL2 */
122 vcpu->arch.mdcr_el2 |= MDCR_EL2_TDE;
123
124 /* Save guest debug state */
125 save_guest_debug_regs(vcpu);
126
127 /*
128 * Single Step (ARM ARM D2.12.3 The software step state
129 * machine)
130 *
131 * If we are doing Single Step we need to manipulate
132 * the guest's MDSCR_EL1.SS and PSTATE.SS. Once the
133 * step has occurred the hypervisor will trap the
134 * debug exception and we return to userspace.
135 *
136 * If the guest attempts to single step its userspace
137 * we would have to deal with a trapped exception
138 * while in the guest kernel. Because this would be
139 * hard to unwind we suppress the guest's ability to
140 * do so by masking MDSCR_EL.SS.
141 *
142 * This confuses guest debuggers which use
143 * single-step behind the scenes but everything
144 * returns to normal once the host is no longer
145 * debugging the system.
146 */
147 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
148 *vcpu_cpsr(vcpu) |= DBG_SPSR_SS;
149 vcpu_sys_reg(vcpu, MDSCR_EL1) |= DBG_MDSCR_SS;
150 } else {
151 vcpu_sys_reg(vcpu, MDSCR_EL1) &= ~DBG_MDSCR_SS;
152 }
153
154 trace_kvm_arm_set_dreg32("SPSR_EL2", *vcpu_cpsr(vcpu));
155
156 /*
157 * HW Breakpoints and watchpoints
158 *
159 * We simply switch the debug_ptr to point to our new
160 * external_debug_state which has been populated by the
161 * debug ioctl. The existing KVM_ARM64_DEBUG_DIRTY
162 * mechanism ensures the registers are updated on the
163 * world switch.
164 */
165 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) {
166 /* Enable breakpoints/watchpoints */
167 vcpu_sys_reg(vcpu, MDSCR_EL1) |= DBG_MDSCR_MDE;
168
169 vcpu->arch.debug_ptr = &vcpu->arch.external_debug_state;
170 vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
171 trap_debug = true;
172
173 trace_kvm_arm_set_regset("BKPTS", get_num_brps(),
174 &vcpu->arch.debug_ptr->dbg_bcr[0],
175 &vcpu->arch.debug_ptr->dbg_bvr[0]);
176
177 trace_kvm_arm_set_regset("WAPTS", get_num_wrps(),
178 &vcpu->arch.debug_ptr->dbg_wcr[0],
179 &vcpu->arch.debug_ptr->dbg_wvr[0]);
180 }
181 }
182
183 BUG_ON(!vcpu->guest_debug &&
184 vcpu->arch.debug_ptr != &vcpu->arch.vcpu_debug_state);
185
186 /* Trap debug register access */
187 if (trap_debug)
188 vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA;
189
190 trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2);
191 trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_sys_reg(vcpu, MDSCR_EL1));
192}
193
194void kvm_arm_clear_debug(struct kvm_vcpu *vcpu)
195{
196 trace_kvm_arm_clear_debug(vcpu->guest_debug);
197
198 if (vcpu->guest_debug) {
199 restore_guest_debug_regs(vcpu);
200
201 /*
202 * If we were using HW debug we need to restore the
203 * debug_ptr to the guest debug state.
204 */
205 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) {
206 kvm_arm_reset_debug_ptr(vcpu);
207
208 trace_kvm_arm_set_regset("BKPTS", get_num_brps(),
209 &vcpu->arch.debug_ptr->dbg_bcr[0],
210 &vcpu->arch.debug_ptr->dbg_bvr[0]);
211
212 trace_kvm_arm_set_regset("WAPTS", get_num_wrps(),
213 &vcpu->arch.debug_ptr->dbg_wcr[0],
214 &vcpu->arch.debug_ptr->dbg_wvr[0]);
215 }
216 }
217}
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 9535bd555d1d..d250160d32bc 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -32,6 +32,8 @@
32#include <asm/kvm_emulate.h> 32#include <asm/kvm_emulate.h>
33#include <asm/kvm_coproc.h> 33#include <asm/kvm_coproc.h>
34 34
35#include "trace.h"
36
35struct kvm_stats_debugfs_item debugfs_entries[] = { 37struct kvm_stats_debugfs_item debugfs_entries[] = {
36 { NULL } 38 { NULL }
37}; 39};
@@ -293,7 +295,8 @@ int __attribute_const__ kvm_target_cpu(void)
293 break; 295 break;
294 }; 296 };
295 297
296 return -EINVAL; 298 /* Return a default generic target */
299 return KVM_ARM_TARGET_GENERIC_V8;
297} 300}
298 301
299int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) 302int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
@@ -331,3 +334,41 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
331{ 334{
332 return -EINVAL; 335 return -EINVAL;
333} 336}
337
338#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \
339 KVM_GUESTDBG_USE_SW_BP | \
340 KVM_GUESTDBG_USE_HW | \
341 KVM_GUESTDBG_SINGLESTEP)
342
343/**
344 * kvm_arch_vcpu_ioctl_set_guest_debug - set up guest debugging
345 * @kvm: pointer to the KVM struct
346 * @kvm_guest_debug: the ioctl data buffer
347 *
348 * This sets up and enables the VM for guest debugging. Userspace
349 * passes in a control flag to enable different debug types and
350 * potentially other architecture specific information in the rest of
351 * the structure.
352 */
353int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
354 struct kvm_guest_debug *dbg)
355{
356 trace_kvm_set_guest_debug(vcpu, dbg->control);
357
358 if (dbg->control & ~KVM_GUESTDBG_VALID_MASK)
359 return -EINVAL;
360
361 if (dbg->control & KVM_GUESTDBG_ENABLE) {
362 vcpu->guest_debug = dbg->control;
363
364 /* Hardware assisted Break and Watch points */
365 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) {
366 vcpu->arch.external_debug_state = dbg->arch;
367 }
368
369 } else {
370 /* If not enabled clear all flags */
371 vcpu->guest_debug = 0;
372 }
373 return 0;
374}
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 524fa25671fc..68a0759b1375 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -82,6 +82,45 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
82 return 1; 82 return 1;
83} 83}
84 84
85/**
86 * kvm_handle_guest_debug - handle a debug exception instruction
87 *
88 * @vcpu: the vcpu pointer
89 * @run: access to the kvm_run structure for results
90 *
91 * We route all debug exceptions through the same handler. If both the
92 * guest and host are using the same debug facilities it will be up to
93 * userspace to re-inject the correct exception for guest delivery.
94 *
95 * @return: 0 (while setting run->exit_reason), -1 for error
96 */
97static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run)
98{
99 u32 hsr = kvm_vcpu_get_hsr(vcpu);
100 int ret = 0;
101
102 run->exit_reason = KVM_EXIT_DEBUG;
103 run->debug.arch.hsr = hsr;
104
105 switch (hsr >> ESR_ELx_EC_SHIFT) {
106 case ESR_ELx_EC_WATCHPT_LOW:
107 run->debug.arch.far = vcpu->arch.fault.far_el2;
108 /* fall through */
109 case ESR_ELx_EC_SOFTSTP_LOW:
110 case ESR_ELx_EC_BREAKPT_LOW:
111 case ESR_ELx_EC_BKPT32:
112 case ESR_ELx_EC_BRK64:
113 break;
114 default:
115 kvm_err("%s: un-handled case hsr: %#08x\n",
116 __func__, (unsigned int) hsr);
117 ret = -1;
118 break;
119 }
120
121 return ret;
122}
123
85static exit_handle_fn arm_exit_handlers[] = { 124static exit_handle_fn arm_exit_handlers[] = {
86 [ESR_ELx_EC_WFx] = kvm_handle_wfx, 125 [ESR_ELx_EC_WFx] = kvm_handle_wfx,
87 [ESR_ELx_EC_CP15_32] = kvm_handle_cp15_32, 126 [ESR_ELx_EC_CP15_32] = kvm_handle_cp15_32,
@@ -96,6 +135,11 @@ static exit_handle_fn arm_exit_handlers[] = {
96 [ESR_ELx_EC_SYS64] = kvm_handle_sys_reg, 135 [ESR_ELx_EC_SYS64] = kvm_handle_sys_reg,
97 [ESR_ELx_EC_IABT_LOW] = kvm_handle_guest_abort, 136 [ESR_ELx_EC_IABT_LOW] = kvm_handle_guest_abort,
98 [ESR_ELx_EC_DABT_LOW] = kvm_handle_guest_abort, 137 [ESR_ELx_EC_DABT_LOW] = kvm_handle_guest_abort,
138 [ESR_ELx_EC_SOFTSTP_LOW]= kvm_handle_guest_debug,
139 [ESR_ELx_EC_WATCHPT_LOW]= kvm_handle_guest_debug,
140 [ESR_ELx_EC_BREAKPT_LOW]= kvm_handle_guest_debug,
141 [ESR_ELx_EC_BKPT32] = kvm_handle_guest_debug,
142 [ESR_ELx_EC_BRK64] = kvm_handle_guest_debug,
99}; 143};
100 144
101static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) 145static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 10915aaf0b01..37c89ea2c572 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -230,199 +230,52 @@
230 stp x24, x25, [x3, #160] 230 stp x24, x25, [x3, #160]
231.endm 231.endm
232 232
233.macro save_debug 233.macro save_debug type
234 // x2: base address for cpu context 234 // x4: pointer to register set
235 // x3: tmp register 235 // x5: number of registers to skip
236 236 // x6..x22 trashed
237 mrs x26, id_aa64dfr0_el1 237
238 ubfx x24, x26, #12, #4 // Extract BRPs 238 adr x22, 1f
239 ubfx x25, x26, #20, #4 // Extract WRPs 239 add x22, x22, x5, lsl #2
240 mov w26, #15 240 br x22
241 sub w24, w26, w24 // How many BPs to skip
242 sub w25, w26, w25 // How many WPs to skip
243
244 add x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1)
245
246 adr x26, 1f
247 add x26, x26, x24, lsl #2
248 br x26
2491:
250 mrs x20, dbgbcr15_el1
251 mrs x19, dbgbcr14_el1
252 mrs x18, dbgbcr13_el1
253 mrs x17, dbgbcr12_el1
254 mrs x16, dbgbcr11_el1
255 mrs x15, dbgbcr10_el1
256 mrs x14, dbgbcr9_el1
257 mrs x13, dbgbcr8_el1
258 mrs x12, dbgbcr7_el1
259 mrs x11, dbgbcr6_el1
260 mrs x10, dbgbcr5_el1
261 mrs x9, dbgbcr4_el1
262 mrs x8, dbgbcr3_el1
263 mrs x7, dbgbcr2_el1
264 mrs x6, dbgbcr1_el1
265 mrs x5, dbgbcr0_el1
266
267 adr x26, 1f
268 add x26, x26, x24, lsl #2
269 br x26
270
2711:
272 str x20, [x3, #(15 * 8)]
273 str x19, [x3, #(14 * 8)]
274 str x18, [x3, #(13 * 8)]
275 str x17, [x3, #(12 * 8)]
276 str x16, [x3, #(11 * 8)]
277 str x15, [x3, #(10 * 8)]
278 str x14, [x3, #(9 * 8)]
279 str x13, [x3, #(8 * 8)]
280 str x12, [x3, #(7 * 8)]
281 str x11, [x3, #(6 * 8)]
282 str x10, [x3, #(5 * 8)]
283 str x9, [x3, #(4 * 8)]
284 str x8, [x3, #(3 * 8)]
285 str x7, [x3, #(2 * 8)]
286 str x6, [x3, #(1 * 8)]
287 str x5, [x3, #(0 * 8)]
288
289 add x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1)
290
291 adr x26, 1f
292 add x26, x26, x24, lsl #2
293 br x26
2941: 2411:
295 mrs x20, dbgbvr15_el1 242 mrs x21, \type\()15_el1
296 mrs x19, dbgbvr14_el1 243 mrs x20, \type\()14_el1
297 mrs x18, dbgbvr13_el1 244 mrs x19, \type\()13_el1
298 mrs x17, dbgbvr12_el1 245 mrs x18, \type\()12_el1
299 mrs x16, dbgbvr11_el1 246 mrs x17, \type\()11_el1
300 mrs x15, dbgbvr10_el1 247 mrs x16, \type\()10_el1
301 mrs x14, dbgbvr9_el1 248 mrs x15, \type\()9_el1
302 mrs x13, dbgbvr8_el1 249 mrs x14, \type\()8_el1
303 mrs x12, dbgbvr7_el1 250 mrs x13, \type\()7_el1
304 mrs x11, dbgbvr6_el1 251 mrs x12, \type\()6_el1
305 mrs x10, dbgbvr5_el1 252 mrs x11, \type\()5_el1
306 mrs x9, dbgbvr4_el1 253 mrs x10, \type\()4_el1
307 mrs x8, dbgbvr3_el1 254 mrs x9, \type\()3_el1
308 mrs x7, dbgbvr2_el1 255 mrs x8, \type\()2_el1
309 mrs x6, dbgbvr1_el1 256 mrs x7, \type\()1_el1
310 mrs x5, dbgbvr0_el1 257 mrs x6, \type\()0_el1
311 258
312 adr x26, 1f 259 adr x22, 1f
313 add x26, x26, x24, lsl #2 260 add x22, x22, x5, lsl #2
314 br x26 261 br x22
315
3161:
317 str x20, [x3, #(15 * 8)]
318 str x19, [x3, #(14 * 8)]
319 str x18, [x3, #(13 * 8)]
320 str x17, [x3, #(12 * 8)]
321 str x16, [x3, #(11 * 8)]
322 str x15, [x3, #(10 * 8)]
323 str x14, [x3, #(9 * 8)]
324 str x13, [x3, #(8 * 8)]
325 str x12, [x3, #(7 * 8)]
326 str x11, [x3, #(6 * 8)]
327 str x10, [x3, #(5 * 8)]
328 str x9, [x3, #(4 * 8)]
329 str x8, [x3, #(3 * 8)]
330 str x7, [x3, #(2 * 8)]
331 str x6, [x3, #(1 * 8)]
332 str x5, [x3, #(0 * 8)]
333
334 add x3, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1)
335
336 adr x26, 1f
337 add x26, x26, x25, lsl #2
338 br x26
3391:
340 mrs x20, dbgwcr15_el1
341 mrs x19, dbgwcr14_el1
342 mrs x18, dbgwcr13_el1
343 mrs x17, dbgwcr12_el1
344 mrs x16, dbgwcr11_el1
345 mrs x15, dbgwcr10_el1
346 mrs x14, dbgwcr9_el1
347 mrs x13, dbgwcr8_el1
348 mrs x12, dbgwcr7_el1
349 mrs x11, dbgwcr6_el1
350 mrs x10, dbgwcr5_el1
351 mrs x9, dbgwcr4_el1
352 mrs x8, dbgwcr3_el1
353 mrs x7, dbgwcr2_el1
354 mrs x6, dbgwcr1_el1
355 mrs x5, dbgwcr0_el1
356
357 adr x26, 1f
358 add x26, x26, x25, lsl #2
359 br x26
360
3611:
362 str x20, [x3, #(15 * 8)]
363 str x19, [x3, #(14 * 8)]
364 str x18, [x3, #(13 * 8)]
365 str x17, [x3, #(12 * 8)]
366 str x16, [x3, #(11 * 8)]
367 str x15, [x3, #(10 * 8)]
368 str x14, [x3, #(9 * 8)]
369 str x13, [x3, #(8 * 8)]
370 str x12, [x3, #(7 * 8)]
371 str x11, [x3, #(6 * 8)]
372 str x10, [x3, #(5 * 8)]
373 str x9, [x3, #(4 * 8)]
374 str x8, [x3, #(3 * 8)]
375 str x7, [x3, #(2 * 8)]
376 str x6, [x3, #(1 * 8)]
377 str x5, [x3, #(0 * 8)]
378
379 add x3, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1)
380
381 adr x26, 1f
382 add x26, x26, x25, lsl #2
383 br x26
3841:
385 mrs x20, dbgwvr15_el1
386 mrs x19, dbgwvr14_el1
387 mrs x18, dbgwvr13_el1
388 mrs x17, dbgwvr12_el1
389 mrs x16, dbgwvr11_el1
390 mrs x15, dbgwvr10_el1
391 mrs x14, dbgwvr9_el1
392 mrs x13, dbgwvr8_el1
393 mrs x12, dbgwvr7_el1
394 mrs x11, dbgwvr6_el1
395 mrs x10, dbgwvr5_el1
396 mrs x9, dbgwvr4_el1
397 mrs x8, dbgwvr3_el1
398 mrs x7, dbgwvr2_el1
399 mrs x6, dbgwvr1_el1
400 mrs x5, dbgwvr0_el1
401
402 adr x26, 1f
403 add x26, x26, x25, lsl #2
404 br x26
405
4061: 2621:
407 str x20, [x3, #(15 * 8)] 263 str x21, [x4, #(15 * 8)]
408 str x19, [x3, #(14 * 8)] 264 str x20, [x4, #(14 * 8)]
409 str x18, [x3, #(13 * 8)] 265 str x19, [x4, #(13 * 8)]
410 str x17, [x3, #(12 * 8)] 266 str x18, [x4, #(12 * 8)]
411 str x16, [x3, #(11 * 8)] 267 str x17, [x4, #(11 * 8)]
412 str x15, [x3, #(10 * 8)] 268 str x16, [x4, #(10 * 8)]
413 str x14, [x3, #(9 * 8)] 269 str x15, [x4, #(9 * 8)]
414 str x13, [x3, #(8 * 8)] 270 str x14, [x4, #(8 * 8)]
415 str x12, [x3, #(7 * 8)] 271 str x13, [x4, #(7 * 8)]
416 str x11, [x3, #(6 * 8)] 272 str x12, [x4, #(6 * 8)]
417 str x10, [x3, #(5 * 8)] 273 str x11, [x4, #(5 * 8)]
418 str x9, [x3, #(4 * 8)] 274 str x10, [x4, #(4 * 8)]
419 str x8, [x3, #(3 * 8)] 275 str x9, [x4, #(3 * 8)]
420 str x7, [x3, #(2 * 8)] 276 str x8, [x4, #(2 * 8)]
421 str x6, [x3, #(1 * 8)] 277 str x7, [x4, #(1 * 8)]
422 str x5, [x3, #(0 * 8)] 278 str x6, [x4, #(0 * 8)]
423
424 mrs x21, mdccint_el1
425 str x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)]
426.endm 279.endm
427 280
428.macro restore_sysregs 281.macro restore_sysregs
@@ -467,195 +320,52 @@
467 msr mdscr_el1, x25 320 msr mdscr_el1, x25
468.endm 321.endm
469 322
470.macro restore_debug 323.macro restore_debug type
471 // x2: base address for cpu context 324 // x4: pointer to register set
472 // x3: tmp register 325 // x5: number of registers to skip
473 326 // x6..x22 trashed
474 mrs x26, id_aa64dfr0_el1
475 ubfx x24, x26, #12, #4 // Extract BRPs
476 ubfx x25, x26, #20, #4 // Extract WRPs
477 mov w26, #15
478 sub w24, w26, w24 // How many BPs to skip
479 sub w25, w26, w25 // How many WPs to skip
480
481 add x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1)
482 327
483 adr x26, 1f 328 adr x22, 1f
484 add x26, x26, x24, lsl #2 329 add x22, x22, x5, lsl #2
485 br x26 330 br x22
4861:
487 ldr x20, [x3, #(15 * 8)]
488 ldr x19, [x3, #(14 * 8)]
489 ldr x18, [x3, #(13 * 8)]
490 ldr x17, [x3, #(12 * 8)]
491 ldr x16, [x3, #(11 * 8)]
492 ldr x15, [x3, #(10 * 8)]
493 ldr x14, [x3, #(9 * 8)]
494 ldr x13, [x3, #(8 * 8)]
495 ldr x12, [x3, #(7 * 8)]
496 ldr x11, [x3, #(6 * 8)]
497 ldr x10, [x3, #(5 * 8)]
498 ldr x9, [x3, #(4 * 8)]
499 ldr x8, [x3, #(3 * 8)]
500 ldr x7, [x3, #(2 * 8)]
501 ldr x6, [x3, #(1 * 8)]
502 ldr x5, [x3, #(0 * 8)]
503
504 adr x26, 1f
505 add x26, x26, x24, lsl #2
506 br x26
5071: 3311:
508 msr dbgbcr15_el1, x20 332 ldr x21, [x4, #(15 * 8)]
509 msr dbgbcr14_el1, x19 333 ldr x20, [x4, #(14 * 8)]
510 msr dbgbcr13_el1, x18 334 ldr x19, [x4, #(13 * 8)]
511 msr dbgbcr12_el1, x17 335 ldr x18, [x4, #(12 * 8)]
512 msr dbgbcr11_el1, x16 336 ldr x17, [x4, #(11 * 8)]
513 msr dbgbcr10_el1, x15 337 ldr x16, [x4, #(10 * 8)]
514 msr dbgbcr9_el1, x14 338 ldr x15, [x4, #(9 * 8)]
515 msr dbgbcr8_el1, x13 339 ldr x14, [x4, #(8 * 8)]
516 msr dbgbcr7_el1, x12 340 ldr x13, [x4, #(7 * 8)]
517 msr dbgbcr6_el1, x11 341 ldr x12, [x4, #(6 * 8)]
518 msr dbgbcr5_el1, x10 342 ldr x11, [x4, #(5 * 8)]
519 msr dbgbcr4_el1, x9 343 ldr x10, [x4, #(4 * 8)]
520 msr dbgbcr3_el1, x8 344 ldr x9, [x4, #(3 * 8)]
521 msr dbgbcr2_el1, x7 345 ldr x8, [x4, #(2 * 8)]
522 msr dbgbcr1_el1, x6 346 ldr x7, [x4, #(1 * 8)]
523 msr dbgbcr0_el1, x5 347 ldr x6, [x4, #(0 * 8)]
524 348
525 add x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1) 349 adr x22, 1f
526 350 add x22, x22, x5, lsl #2
527 adr x26, 1f 351 br x22
528 add x26, x26, x24, lsl #2
529 br x26
5301: 3521:
531 ldr x20, [x3, #(15 * 8)] 353 msr \type\()15_el1, x21
532 ldr x19, [x3, #(14 * 8)] 354 msr \type\()14_el1, x20
533 ldr x18, [x3, #(13 * 8)] 355 msr \type\()13_el1, x19
534 ldr x17, [x3, #(12 * 8)] 356 msr \type\()12_el1, x18
535 ldr x16, [x3, #(11 * 8)] 357 msr \type\()11_el1, x17
536 ldr x15, [x3, #(10 * 8)] 358 msr \type\()10_el1, x16
537 ldr x14, [x3, #(9 * 8)] 359 msr \type\()9_el1, x15
538 ldr x13, [x3, #(8 * 8)] 360 msr \type\()8_el1, x14
539 ldr x12, [x3, #(7 * 8)] 361 msr \type\()7_el1, x13
540 ldr x11, [x3, #(6 * 8)] 362 msr \type\()6_el1, x12
541 ldr x10, [x3, #(5 * 8)] 363 msr \type\()5_el1, x11
542 ldr x9, [x3, #(4 * 8)] 364 msr \type\()4_el1, x10
543 ldr x8, [x3, #(3 * 8)] 365 msr \type\()3_el1, x9
544 ldr x7, [x3, #(2 * 8)] 366 msr \type\()2_el1, x8
545 ldr x6, [x3, #(1 * 8)] 367 msr \type\()1_el1, x7
546 ldr x5, [x3, #(0 * 8)] 368 msr \type\()0_el1, x6
547
548 adr x26, 1f
549 add x26, x26, x24, lsl #2
550 br x26
5511:
552 msr dbgbvr15_el1, x20
553 msr dbgbvr14_el1, x19
554 msr dbgbvr13_el1, x18
555 msr dbgbvr12_el1, x17
556 msr dbgbvr11_el1, x16
557 msr dbgbvr10_el1, x15
558 msr dbgbvr9_el1, x14
559 msr dbgbvr8_el1, x13
560 msr dbgbvr7_el1, x12
561 msr dbgbvr6_el1, x11
562 msr dbgbvr5_el1, x10
563 msr dbgbvr4_el1, x9
564 msr dbgbvr3_el1, x8
565 msr dbgbvr2_el1, x7
566 msr dbgbvr1_el1, x6
567 msr dbgbvr0_el1, x5
568
569 add x3, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1)
570
571 adr x26, 1f
572 add x26, x26, x25, lsl #2
573 br x26
5741:
575 ldr x20, [x3, #(15 * 8)]
576 ldr x19, [x3, #(14 * 8)]
577 ldr x18, [x3, #(13 * 8)]
578 ldr x17, [x3, #(12 * 8)]
579 ldr x16, [x3, #(11 * 8)]
580 ldr x15, [x3, #(10 * 8)]
581 ldr x14, [x3, #(9 * 8)]
582 ldr x13, [x3, #(8 * 8)]
583 ldr x12, [x3, #(7 * 8)]
584 ldr x11, [x3, #(6 * 8)]
585 ldr x10, [x3, #(5 * 8)]
586 ldr x9, [x3, #(4 * 8)]
587 ldr x8, [x3, #(3 * 8)]
588 ldr x7, [x3, #(2 * 8)]
589 ldr x6, [x3, #(1 * 8)]
590 ldr x5, [x3, #(0 * 8)]
591
592 adr x26, 1f
593 add x26, x26, x25, lsl #2
594 br x26
5951:
596 msr dbgwcr15_el1, x20
597 msr dbgwcr14_el1, x19
598 msr dbgwcr13_el1, x18
599 msr dbgwcr12_el1, x17
600 msr dbgwcr11_el1, x16
601 msr dbgwcr10_el1, x15
602 msr dbgwcr9_el1, x14
603 msr dbgwcr8_el1, x13
604 msr dbgwcr7_el1, x12
605 msr dbgwcr6_el1, x11
606 msr dbgwcr5_el1, x10
607 msr dbgwcr4_el1, x9
608 msr dbgwcr3_el1, x8
609 msr dbgwcr2_el1, x7
610 msr dbgwcr1_el1, x6
611 msr dbgwcr0_el1, x5
612
613 add x3, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1)
614
615 adr x26, 1f
616 add x26, x26, x25, lsl #2
617 br x26
6181:
619 ldr x20, [x3, #(15 * 8)]
620 ldr x19, [x3, #(14 * 8)]
621 ldr x18, [x3, #(13 * 8)]
622 ldr x17, [x3, #(12 * 8)]
623 ldr x16, [x3, #(11 * 8)]
624 ldr x15, [x3, #(10 * 8)]
625 ldr x14, [x3, #(9 * 8)]
626 ldr x13, [x3, #(8 * 8)]
627 ldr x12, [x3, #(7 * 8)]
628 ldr x11, [x3, #(6 * 8)]
629 ldr x10, [x3, #(5 * 8)]
630 ldr x9, [x3, #(4 * 8)]
631 ldr x8, [x3, #(3 * 8)]
632 ldr x7, [x3, #(2 * 8)]
633 ldr x6, [x3, #(1 * 8)]
634 ldr x5, [x3, #(0 * 8)]
635
636 adr x26, 1f
637 add x26, x26, x25, lsl #2
638 br x26
6391:
640 msr dbgwvr15_el1, x20
641 msr dbgwvr14_el1, x19
642 msr dbgwvr13_el1, x18
643 msr dbgwvr12_el1, x17
644 msr dbgwvr11_el1, x16
645 msr dbgwvr10_el1, x15
646 msr dbgwvr9_el1, x14
647 msr dbgwvr8_el1, x13
648 msr dbgwvr7_el1, x12
649 msr dbgwvr6_el1, x11
650 msr dbgwvr5_el1, x10
651 msr dbgwvr4_el1, x9
652 msr dbgwvr3_el1, x8
653 msr dbgwvr2_el1, x7
654 msr dbgwvr1_el1, x6
655 msr dbgwvr0_el1, x5
656
657 ldr x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)]
658 msr mdccint_el1, x21
659.endm 369.endm
660 370
661.macro skip_32bit_state tmp, target 371.macro skip_32bit_state tmp, target
@@ -675,6 +385,14 @@
675 tbz \tmp, #KVM_ARM64_DEBUG_DIRTY_SHIFT, \target 385 tbz \tmp, #KVM_ARM64_DEBUG_DIRTY_SHIFT, \target
676.endm 386.endm
677 387
388/*
389 * Branch to target if CPTR_EL2.TFP bit is set (VFP/SIMD trapping enabled)
390 */
391.macro skip_fpsimd_state tmp, target
392 mrs \tmp, cptr_el2
393 tbnz \tmp, #CPTR_EL2_TFP_SHIFT, \target
394.endm
395
678.macro compute_debug_state target 396.macro compute_debug_state target
679 // Compute debug state: If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY 397 // Compute debug state: If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY
680 // is set, we do a full save/restore cycle and disable trapping. 398 // is set, we do a full save/restore cycle and disable trapping.
@@ -713,10 +431,12 @@
713 add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2) 431 add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
714 mrs x4, dacr32_el2 432 mrs x4, dacr32_el2
715 mrs x5, ifsr32_el2 433 mrs x5, ifsr32_el2
716 mrs x6, fpexc32_el2
717 stp x4, x5, [x3] 434 stp x4, x5, [x3]
718 str x6, [x3, #16]
719 435
436 skip_fpsimd_state x8, 3f
437 mrs x6, fpexc32_el2
438 str x6, [x3, #16]
4393:
720 skip_debug_state x8, 2f 440 skip_debug_state x8, 2f
721 mrs x7, dbgvcr32_el2 441 mrs x7, dbgvcr32_el2
722 str x7, [x3, #24] 442 str x7, [x3, #24]
@@ -743,10 +463,8 @@
743 463
744 add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2) 464 add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
745 ldp x4, x5, [x3] 465 ldp x4, x5, [x3]
746 ldr x6, [x3, #16]
747 msr dacr32_el2, x4 466 msr dacr32_el2, x4
748 msr ifsr32_el2, x5 467 msr ifsr32_el2, x5
749 msr fpexc32_el2, x6
750 468
751 skip_debug_state x8, 2f 469 skip_debug_state x8, 2f
752 ldr x7, [x3, #24] 470 ldr x7, [x3, #24]
@@ -763,31 +481,35 @@
763 481
764.macro activate_traps 482.macro activate_traps
765 ldr x2, [x0, #VCPU_HCR_EL2] 483 ldr x2, [x0, #VCPU_HCR_EL2]
484
485 /*
486 * We are about to set CPTR_EL2.TFP to trap all floating point
487 * register accesses to EL2, however, the ARM ARM clearly states that
488 * traps are only taken to EL2 if the operation would not otherwise
489 * trap to EL1. Therefore, always make sure that for 32-bit guests,
490 * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit.
491 */
492 tbnz x2, #HCR_RW_SHIFT, 99f // open code skip_32bit_state
493 mov x3, #(1 << 30)
494 msr fpexc32_el2, x3
495 isb
49699:
766 msr hcr_el2, x2 497 msr hcr_el2, x2
767 mov x2, #CPTR_EL2_TTA 498 mov x2, #CPTR_EL2_TTA
499 orr x2, x2, #CPTR_EL2_TFP
768 msr cptr_el2, x2 500 msr cptr_el2, x2
769 501
770 mov x2, #(1 << 15) // Trap CP15 Cr=15 502 mov x2, #(1 << 15) // Trap CP15 Cr=15
771 msr hstr_el2, x2 503 msr hstr_el2, x2
772 504
773 mrs x2, mdcr_el2 505 // Monitor Debug Config - see kvm_arm_setup_debug()
774 and x2, x2, #MDCR_EL2_HPMN_MASK 506 ldr x2, [x0, #VCPU_MDCR_EL2]
775 orr x2, x2, #(MDCR_EL2_TPM | MDCR_EL2_TPMCR)
776 orr x2, x2, #(MDCR_EL2_TDRA | MDCR_EL2_TDOSA)
777
778 // Check for KVM_ARM64_DEBUG_DIRTY, and set debug to trap
779 // if not dirty.
780 ldr x3, [x0, #VCPU_DEBUG_FLAGS]
781 tbnz x3, #KVM_ARM64_DEBUG_DIRTY_SHIFT, 1f
782 orr x2, x2, #MDCR_EL2_TDA
7831:
784 msr mdcr_el2, x2 507 msr mdcr_el2, x2
785.endm 508.endm
786 509
787.macro deactivate_traps 510.macro deactivate_traps
788 mov x2, #HCR_RW 511 mov x2, #HCR_RW
789 msr hcr_el2, x2 512 msr hcr_el2, x2
790 msr cptr_el2, xzr
791 msr hstr_el2, xzr 513 msr hstr_el2, xzr
792 514
793 mrs x2, mdcr_el2 515 mrs x2, mdcr_el2
@@ -900,21 +622,101 @@ __restore_sysregs:
900 restore_sysregs 622 restore_sysregs
901 ret 623 ret
902 624
625/* Save debug state */
903__save_debug: 626__save_debug:
904 save_debug 627 // x2: ptr to CPU context
628 // x3: ptr to debug reg struct
629 // x4/x5/x6-22/x24-26: trashed
630
631 mrs x26, id_aa64dfr0_el1
632 ubfx x24, x26, #12, #4 // Extract BRPs
633 ubfx x25, x26, #20, #4 // Extract WRPs
634 mov w26, #15
635 sub w24, w26, w24 // How many BPs to skip
636 sub w25, w26, w25 // How many WPs to skip
637
638 mov x5, x24
639 add x4, x3, #DEBUG_BCR
640 save_debug dbgbcr
641 add x4, x3, #DEBUG_BVR
642 save_debug dbgbvr
643
644 mov x5, x25
645 add x4, x3, #DEBUG_WCR
646 save_debug dbgwcr
647 add x4, x3, #DEBUG_WVR
648 save_debug dbgwvr
649
650 mrs x21, mdccint_el1
651 str x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)]
905 ret 652 ret
906 653
654/* Restore debug state */
907__restore_debug: 655__restore_debug:
908 restore_debug 656 // x2: ptr to CPU context
657 // x3: ptr to debug reg struct
658 // x4/x5/x6-22/x24-26: trashed
659
660 mrs x26, id_aa64dfr0_el1
661 ubfx x24, x26, #12, #4 // Extract BRPs
662 ubfx x25, x26, #20, #4 // Extract WRPs
663 mov w26, #15
664 sub w24, w26, w24 // How many BPs to skip
665 sub w25, w26, w25 // How many WPs to skip
666
667 mov x5, x24
668 add x4, x3, #DEBUG_BCR
669 restore_debug dbgbcr
670 add x4, x3, #DEBUG_BVR
671 restore_debug dbgbvr
672
673 mov x5, x25
674 add x4, x3, #DEBUG_WCR
675 restore_debug dbgwcr
676 add x4, x3, #DEBUG_WVR
677 restore_debug dbgwvr
678
679 ldr x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)]
680 msr mdccint_el1, x21
681
909 ret 682 ret
910 683
911__save_fpsimd: 684__save_fpsimd:
685 skip_fpsimd_state x3, 1f
912 save_fpsimd 686 save_fpsimd
913 ret 6871: ret
914 688
915__restore_fpsimd: 689__restore_fpsimd:
690 skip_fpsimd_state x3, 1f
916 restore_fpsimd 691 restore_fpsimd
917 ret 6921: ret
693
694switch_to_guest_fpsimd:
695 push x4, lr
696
697 mrs x2, cptr_el2
698 bic x2, x2, #CPTR_EL2_TFP
699 msr cptr_el2, x2
700 isb
701
702 mrs x0, tpidr_el2
703
704 ldr x2, [x0, #VCPU_HOST_CONTEXT]
705 kern_hyp_va x2
706 bl __save_fpsimd
707
708 add x2, x0, #VCPU_CONTEXT
709 bl __restore_fpsimd
710
711 skip_32bit_state x3, 1f
712 ldr x4, [x2, #CPU_SYSREG_OFFSET(FPEXC32_EL2)]
713 msr fpexc32_el2, x4
7141:
715 pop x4, lr
716 pop x2, x3
717 pop x0, x1
718
719 eret
918 720
919/* 721/*
920 * u64 __kvm_vcpu_run(struct kvm_vcpu *vcpu); 722 * u64 __kvm_vcpu_run(struct kvm_vcpu *vcpu);
@@ -936,10 +738,10 @@ ENTRY(__kvm_vcpu_run)
936 kern_hyp_va x2 738 kern_hyp_va x2
937 739
938 save_host_regs 740 save_host_regs
939 bl __save_fpsimd
940 bl __save_sysregs 741 bl __save_sysregs
941 742
942 compute_debug_state 1f 743 compute_debug_state 1f
744 add x3, x0, #VCPU_HOST_DEBUG_STATE
943 bl __save_debug 745 bl __save_debug
9441: 7461:
945 activate_traps 747 activate_traps
@@ -952,9 +754,10 @@ ENTRY(__kvm_vcpu_run)
952 add x2, x0, #VCPU_CONTEXT 754 add x2, x0, #VCPU_CONTEXT
953 755
954 bl __restore_sysregs 756 bl __restore_sysregs
955 bl __restore_fpsimd
956 757
957 skip_debug_state x3, 1f 758 skip_debug_state x3, 1f
759 ldr x3, [x0, #VCPU_DEBUG_PTR]
760 kern_hyp_va x3
958 bl __restore_debug 761 bl __restore_debug
9591: 7621:
960 restore_guest_32bit_state 763 restore_guest_32bit_state
@@ -975,6 +778,8 @@ __kvm_vcpu_return:
975 bl __save_sysregs 778 bl __save_sysregs
976 779
977 skip_debug_state x3, 1f 780 skip_debug_state x3, 1f
781 ldr x3, [x0, #VCPU_DEBUG_PTR]
782 kern_hyp_va x3
978 bl __save_debug 783 bl __save_debug
9791: 7841:
980 save_guest_32bit_state 785 save_guest_32bit_state
@@ -991,12 +796,15 @@ __kvm_vcpu_return:
991 796
992 bl __restore_sysregs 797 bl __restore_sysregs
993 bl __restore_fpsimd 798 bl __restore_fpsimd
799 /* Clear FPSIMD and Trace trapping */
800 msr cptr_el2, xzr
994 801
995 skip_debug_state x3, 1f 802 skip_debug_state x3, 1f
996 // Clear the dirty flag for the next run, as all the state has 803 // Clear the dirty flag for the next run, as all the state has
997 // already been saved. Note that we nuke the whole 64bit word. 804 // already been saved. Note that we nuke the whole 64bit word.
998 // If we ever add more flags, we'll have to be more careful... 805 // If we ever add more flags, we'll have to be more careful...
999 str xzr, [x0, #VCPU_DEBUG_FLAGS] 806 str xzr, [x0, #VCPU_DEBUG_FLAGS]
807 add x3, x0, #VCPU_HOST_DEBUG_STATE
1000 bl __restore_debug 808 bl __restore_debug
10011: 8091:
1002 restore_host_regs 810 restore_host_regs
@@ -1199,6 +1007,11 @@ el1_trap:
1199 * x1: ESR 1007 * x1: ESR
1200 * x2: ESR_EC 1008 * x2: ESR_EC
1201 */ 1009 */
1010
1011 /* Guest accessed VFP/SIMD registers, save host, restore Guest */
1012 cmp x2, #ESR_ELx_EC_FP_ASIMD
1013 b.eq switch_to_guest_fpsimd
1014
1202 cmp x2, #ESR_ELx_EC_DABT_LOW 1015 cmp x2, #ESR_ELx_EC_DABT_LOW
1203 mov x0, #ESR_ELx_EC_IABT_LOW 1016 mov x0, #ESR_ELx_EC_IABT_LOW
1204 ccmp x2, x0, #4, ne 1017 ccmp x2, x0, #4, ne
@@ -1293,4 +1106,10 @@ ENTRY(__kvm_hyp_vector)
1293 ventry el1_error_invalid // Error 32-bit EL1 1106 ventry el1_error_invalid // Error 32-bit EL1
1294ENDPROC(__kvm_hyp_vector) 1107ENDPROC(__kvm_hyp_vector)
1295 1108
1109
1110ENTRY(__kvm_get_mdcr_el2)
1111 mrs x0, mdcr_el2
1112 ret
1113ENDPROC(__kvm_get_mdcr_el2)
1114
1296 .popsection 1115 .popsection
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 0b4326578985..91cf5350b328 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -22,6 +22,7 @@
22#include <linux/errno.h> 22#include <linux/errno.h>
23#include <linux/kvm_host.h> 23#include <linux/kvm_host.h>
24#include <linux/kvm.h> 24#include <linux/kvm.h>
25#include <linux/hw_breakpoint.h>
25 26
26#include <kvm/arm_arch_timer.h> 27#include <kvm/arm_arch_timer.h>
27 28
@@ -56,6 +57,12 @@ static bool cpu_has_32bit_el1(void)
56 return !!(pfr0 & 0x20); 57 return !!(pfr0 & 0x20);
57} 58}
58 59
60/**
61 * kvm_arch_dev_ioctl_check_extension
62 *
63 * We currently assume that the number of HW registers is uniform
64 * across all CPUs (see cpuinfo_sanity_check).
65 */
59int kvm_arch_dev_ioctl_check_extension(long ext) 66int kvm_arch_dev_ioctl_check_extension(long ext)
60{ 67{
61 int r; 68 int r;
@@ -64,6 +71,15 @@ int kvm_arch_dev_ioctl_check_extension(long ext)
64 case KVM_CAP_ARM_EL1_32BIT: 71 case KVM_CAP_ARM_EL1_32BIT:
65 r = cpu_has_32bit_el1(); 72 r = cpu_has_32bit_el1();
66 break; 73 break;
74 case KVM_CAP_GUEST_DEBUG_HW_BPS:
75 r = get_num_brps();
76 break;
77 case KVM_CAP_GUEST_DEBUG_HW_WPS:
78 r = get_num_wrps();
79 break;
80 case KVM_CAP_SET_GUEST_DEBUG:
81 r = 1;
82 break;
67 default: 83 default:
68 r = 0; 84 r = 0;
69 } 85 }
@@ -105,7 +121,5 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
105 kvm_reset_sys_regs(vcpu); 121 kvm_reset_sys_regs(vcpu);
106 122
107 /* Reset timer */ 123 /* Reset timer */
108 kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq); 124 return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
109
110 return 0;
111} 125}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index c370b4014799..b41607d270ac 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -38,6 +38,8 @@
38 38
39#include "sys_regs.h" 39#include "sys_regs.h"
40 40
41#include "trace.h"
42
41/* 43/*
42 * All of this file is extremly similar to the ARM coproc.c, but the 44 * All of this file is extremly similar to the ARM coproc.c, but the
43 * types are different. My gut feeling is that it should be pretty 45 * types are different. My gut feeling is that it should be pretty
@@ -208,9 +210,217 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu,
208 *vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, r->reg); 210 *vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, r->reg);
209 } 211 }
210 212
213 trace_trap_reg(__func__, r->reg, p->is_write, *vcpu_reg(vcpu, p->Rt));
214
215 return true;
216}
217
218/*
219 * reg_to_dbg/dbg_to_reg
220 *
221 * A 32 bit write to a debug register leave top bits alone
222 * A 32 bit read from a debug register only returns the bottom bits
223 *
224 * All writes will set the KVM_ARM64_DEBUG_DIRTY flag to ensure the
225 * hyp.S code switches between host and guest values in future.
226 */
227static inline void reg_to_dbg(struct kvm_vcpu *vcpu,
228 const struct sys_reg_params *p,
229 u64 *dbg_reg)
230{
231 u64 val = *vcpu_reg(vcpu, p->Rt);
232
233 if (p->is_32bit) {
234 val &= 0xffffffffUL;
235 val |= ((*dbg_reg >> 32) << 32);
236 }
237
238 *dbg_reg = val;
239 vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
240}
241
242static inline void dbg_to_reg(struct kvm_vcpu *vcpu,
243 const struct sys_reg_params *p,
244 u64 *dbg_reg)
245{
246 u64 val = *dbg_reg;
247
248 if (p->is_32bit)
249 val &= 0xffffffffUL;
250
251 *vcpu_reg(vcpu, p->Rt) = val;
252}
253
254static inline bool trap_bvr(struct kvm_vcpu *vcpu,
255 const struct sys_reg_params *p,
256 const struct sys_reg_desc *rd)
257{
258 u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
259
260 if (p->is_write)
261 reg_to_dbg(vcpu, p, dbg_reg);
262 else
263 dbg_to_reg(vcpu, p, dbg_reg);
264
265 trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
266
267 return true;
268}
269
270static int set_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
271 const struct kvm_one_reg *reg, void __user *uaddr)
272{
273 __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
274
275 if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
276 return -EFAULT;
277 return 0;
278}
279
280static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
281 const struct kvm_one_reg *reg, void __user *uaddr)
282{
283 __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
284
285 if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
286 return -EFAULT;
287 return 0;
288}
289
290static inline void reset_bvr(struct kvm_vcpu *vcpu,
291 const struct sys_reg_desc *rd)
292{
293 vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg] = rd->val;
294}
295
296static inline bool trap_bcr(struct kvm_vcpu *vcpu,
297 const struct sys_reg_params *p,
298 const struct sys_reg_desc *rd)
299{
300 u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg];
301
302 if (p->is_write)
303 reg_to_dbg(vcpu, p, dbg_reg);
304 else
305 dbg_to_reg(vcpu, p, dbg_reg);
306
307 trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
308
309 return true;
310}
311
312static int set_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
313 const struct kvm_one_reg *reg, void __user *uaddr)
314{
315 __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg];
316
317 if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
318 return -EFAULT;
319
320 return 0;
321}
322
323static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
324 const struct kvm_one_reg *reg, void __user *uaddr)
325{
326 __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg];
327
328 if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
329 return -EFAULT;
330 return 0;
331}
332
333static inline void reset_bcr(struct kvm_vcpu *vcpu,
334 const struct sys_reg_desc *rd)
335{
336 vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg] = rd->val;
337}
338
339static inline bool trap_wvr(struct kvm_vcpu *vcpu,
340 const struct sys_reg_params *p,
341 const struct sys_reg_desc *rd)
342{
343 u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg];
344
345 if (p->is_write)
346 reg_to_dbg(vcpu, p, dbg_reg);
347 else
348 dbg_to_reg(vcpu, p, dbg_reg);
349
350 trace_trap_reg(__func__, rd->reg, p->is_write,
351 vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]);
352
211 return true; 353 return true;
212} 354}
213 355
356static int set_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
357 const struct kvm_one_reg *reg, void __user *uaddr)
358{
359 __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg];
360
361 if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
362 return -EFAULT;
363 return 0;
364}
365
366static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
367 const struct kvm_one_reg *reg, void __user *uaddr)
368{
369 __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg];
370
371 if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
372 return -EFAULT;
373 return 0;
374}
375
376static inline void reset_wvr(struct kvm_vcpu *vcpu,
377 const struct sys_reg_desc *rd)
378{
379 vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg] = rd->val;
380}
381
382static inline bool trap_wcr(struct kvm_vcpu *vcpu,
383 const struct sys_reg_params *p,
384 const struct sys_reg_desc *rd)
385{
386 u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg];
387
388 if (p->is_write)
389 reg_to_dbg(vcpu, p, dbg_reg);
390 else
391 dbg_to_reg(vcpu, p, dbg_reg);
392
393 trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
394
395 return true;
396}
397
398static int set_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
399 const struct kvm_one_reg *reg, void __user *uaddr)
400{
401 __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg];
402
403 if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
404 return -EFAULT;
405 return 0;
406}
407
408static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
409 const struct kvm_one_reg *reg, void __user *uaddr)
410{
411 __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg];
412
413 if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
414 return -EFAULT;
415 return 0;
416}
417
418static inline void reset_wcr(struct kvm_vcpu *vcpu,
419 const struct sys_reg_desc *rd)
420{
421 vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg] = rd->val;
422}
423
214static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) 424static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
215{ 425{
216 u64 amair; 426 u64 amair;
@@ -240,16 +450,16 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
240#define DBG_BCR_BVR_WCR_WVR_EL1(n) \ 450#define DBG_BCR_BVR_WCR_WVR_EL1(n) \
241 /* DBGBVRn_EL1 */ \ 451 /* DBGBVRn_EL1 */ \
242 { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b100), \ 452 { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b100), \
243 trap_debug_regs, reset_val, (DBGBVR0_EL1 + (n)), 0 }, \ 453 trap_bvr, reset_bvr, n, 0, get_bvr, set_bvr }, \
244 /* DBGBCRn_EL1 */ \ 454 /* DBGBCRn_EL1 */ \
245 { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b101), \ 455 { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b101), \
246 trap_debug_regs, reset_val, (DBGBCR0_EL1 + (n)), 0 }, \ 456 trap_bcr, reset_bcr, n, 0, get_bcr, set_bcr }, \
247 /* DBGWVRn_EL1 */ \ 457 /* DBGWVRn_EL1 */ \
248 { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b110), \ 458 { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b110), \
249 trap_debug_regs, reset_val, (DBGWVR0_EL1 + (n)), 0 }, \ 459 trap_wvr, reset_wvr, n, 0, get_wvr, set_wvr }, \
250 /* DBGWCRn_EL1 */ \ 460 /* DBGWCRn_EL1 */ \
251 { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b111), \ 461 { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b111), \
252 trap_debug_regs, reset_val, (DBGWCR0_EL1 + (n)), 0 } 462 trap_wcr, reset_wcr, n, 0, get_wcr, set_wcr }
253 463
254/* 464/*
255 * Architected system registers. 465 * Architected system registers.
@@ -516,28 +726,57 @@ static bool trap_debug32(struct kvm_vcpu *vcpu,
516 return true; 726 return true;
517} 727}
518 728
519#define DBG_BCR_BVR_WCR_WVR(n) \ 729/* AArch32 debug register mappings
520 /* DBGBVRn */ \ 730 *
521 { Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_debug32, \ 731 * AArch32 DBGBVRn is mapped to DBGBVRn_EL1[31:0]
522 NULL, (cp14_DBGBVR0 + (n) * 2) }, \ 732 * AArch32 DBGBXVRn is mapped to DBGBVRn_EL1[63:32]
523 /* DBGBCRn */ \ 733 *
524 { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_debug32, \ 734 * All control registers and watchpoint value registers are mapped to
525 NULL, (cp14_DBGBCR0 + (n) * 2) }, \ 735 * the lower 32 bits of their AArch64 equivalents. We share the trap
526 /* DBGWVRn */ \ 736 * handlers with the above AArch64 code which checks what mode the
527 { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_debug32, \ 737 * system is in.
528 NULL, (cp14_DBGWVR0 + (n) * 2) }, \ 738 */
529 /* DBGWCRn */ \ 739
530 { Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_debug32, \ 740static inline bool trap_xvr(struct kvm_vcpu *vcpu,
531 NULL, (cp14_DBGWCR0 + (n) * 2) } 741 const struct sys_reg_params *p,
532 742 const struct sys_reg_desc *rd)
533#define DBGBXVR(n) \ 743{
534 { Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_debug32, \ 744 u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
535 NULL, cp14_DBGBXVR0 + n * 2 } 745
746 if (p->is_write) {
747 u64 val = *dbg_reg;
748
749 val &= 0xffffffffUL;
750 val |= *vcpu_reg(vcpu, p->Rt) << 32;
751 *dbg_reg = val;
752
753 vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
754 } else {
755 *vcpu_reg(vcpu, p->Rt) = *dbg_reg >> 32;
756 }
757
758 trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
759
760 return true;
761}
762
763#define DBG_BCR_BVR_WCR_WVR(n) \
764 /* DBGBVRn */ \
765 { Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_bvr, NULL, n }, \
766 /* DBGBCRn */ \
767 { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_bcr, NULL, n }, \
768 /* DBGWVRn */ \
769 { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_wvr, NULL, n }, \
770 /* DBGWCRn */ \
771 { Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_wcr, NULL, n }
772
773#define DBGBXVR(n) \
774 { Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_xvr, NULL, n }
536 775
537/* 776/*
538 * Trapped cp14 registers. We generally ignore most of the external 777 * Trapped cp14 registers. We generally ignore most of the external
539 * debug, on the principle that they don't really make sense to a 778 * debug, on the principle that they don't really make sense to a
540 * guest. Revisit this one day, whould this principle change. 779 * guest. Revisit this one day, would this principle change.
541 */ 780 */
542static const struct sys_reg_desc cp14_regs[] = { 781static const struct sys_reg_desc cp14_regs[] = {
543 /* DBGIDR */ 782 /* DBGIDR */
@@ -999,6 +1238,8 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run)
999 struct sys_reg_params params; 1238 struct sys_reg_params params;
1000 unsigned long esr = kvm_vcpu_get_hsr(vcpu); 1239 unsigned long esr = kvm_vcpu_get_hsr(vcpu);
1001 1240
1241 trace_kvm_handle_sys_reg(esr);
1242
1002 params.is_aarch32 = false; 1243 params.is_aarch32 = false;
1003 params.is_32bit = false; 1244 params.is_32bit = false;
1004 params.Op0 = (esr >> 20) & 3; 1245 params.Op0 = (esr >> 20) & 3;
@@ -1303,6 +1544,9 @@ int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg
1303 if (!r) 1544 if (!r)
1304 return get_invariant_sys_reg(reg->id, uaddr); 1545 return get_invariant_sys_reg(reg->id, uaddr);
1305 1546
1547 if (r->get_user)
1548 return (r->get_user)(vcpu, r, reg, uaddr);
1549
1306 return reg_to_user(uaddr, &vcpu_sys_reg(vcpu, r->reg), reg->id); 1550 return reg_to_user(uaddr, &vcpu_sys_reg(vcpu, r->reg), reg->id);
1307} 1551}
1308 1552
@@ -1321,6 +1565,9 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg
1321 if (!r) 1565 if (!r)
1322 return set_invariant_sys_reg(reg->id, uaddr); 1566 return set_invariant_sys_reg(reg->id, uaddr);
1323 1567
1568 if (r->set_user)
1569 return (r->set_user)(vcpu, r, reg, uaddr);
1570
1324 return reg_from_user(&vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id); 1571 return reg_from_user(&vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id);
1325} 1572}
1326 1573
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index d411e251412c..eaa324e4db4d 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -55,6 +55,12 @@ struct sys_reg_desc {
55 55
56 /* Value (usually reset value) */ 56 /* Value (usually reset value) */
57 u64 val; 57 u64 val;
58
59 /* Custom get/set_user functions, fallback to generic if NULL */
60 int (*get_user)(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
61 const struct kvm_one_reg *reg, void __user *uaddr);
62 int (*set_user)(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
63 const struct kvm_one_reg *reg, void __user *uaddr);
58}; 64};
59 65
60static inline void print_sys_reg_instr(const struct sys_reg_params *p) 66static inline void print_sys_reg_instr(const struct sys_reg_params *p)
diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c
index 475fd2929310..1e4576824165 100644
--- a/arch/arm64/kvm/sys_regs_generic_v8.c
+++ b/arch/arm64/kvm/sys_regs_generic_v8.c
@@ -94,6 +94,8 @@ static int __init sys_reg_genericv8_init(void)
94 &genericv8_target_table); 94 &genericv8_target_table);
95 kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA, 95 kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA,
96 &genericv8_target_table); 96 &genericv8_target_table);
97 kvm_register_target_sys_reg_table(KVM_ARM_TARGET_GENERIC_V8,
98 &genericv8_target_table);
97 99
98 return 0; 100 return 0;
99} 101}
diff --git a/arch/arm64/kvm/trace.h b/arch/arm64/kvm/trace.h
index 157416e963f2..7fb0008c4fa3 100644
--- a/arch/arm64/kvm/trace.h
+++ b/arch/arm64/kvm/trace.h
@@ -44,6 +44,129 @@ TRACE_EVENT(kvm_hvc_arm64,
44 __entry->vcpu_pc, __entry->r0, __entry->imm) 44 __entry->vcpu_pc, __entry->r0, __entry->imm)
45); 45);
46 46
47TRACE_EVENT(kvm_arm_setup_debug,
48 TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug),
49 TP_ARGS(vcpu, guest_debug),
50
51 TP_STRUCT__entry(
52 __field(struct kvm_vcpu *, vcpu)
53 __field(__u32, guest_debug)
54 ),
55
56 TP_fast_assign(
57 __entry->vcpu = vcpu;
58 __entry->guest_debug = guest_debug;
59 ),
60
61 TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug)
62);
63
64TRACE_EVENT(kvm_arm_clear_debug,
65 TP_PROTO(__u32 guest_debug),
66 TP_ARGS(guest_debug),
67
68 TP_STRUCT__entry(
69 __field(__u32, guest_debug)
70 ),
71
72 TP_fast_assign(
73 __entry->guest_debug = guest_debug;
74 ),
75
76 TP_printk("flags: 0x%08x", __entry->guest_debug)
77);
78
79TRACE_EVENT(kvm_arm_set_dreg32,
80 TP_PROTO(const char *name, __u32 value),
81 TP_ARGS(name, value),
82
83 TP_STRUCT__entry(
84 __field(const char *, name)
85 __field(__u32, value)
86 ),
87
88 TP_fast_assign(
89 __entry->name = name;
90 __entry->value = value;
91 ),
92
93 TP_printk("%s: 0x%08x", __entry->name, __entry->value)
94);
95
96TRACE_EVENT(kvm_arm_set_regset,
97 TP_PROTO(const char *type, int len, __u64 *control, __u64 *value),
98 TP_ARGS(type, len, control, value),
99 TP_STRUCT__entry(
100 __field(const char *, name)
101 __field(int, len)
102 __array(u64, ctrls, 16)
103 __array(u64, values, 16)
104 ),
105 TP_fast_assign(
106 __entry->name = type;
107 __entry->len = len;
108 memcpy(__entry->ctrls, control, len << 3);
109 memcpy(__entry->values, value, len << 3);
110 ),
111 TP_printk("%d %s CTRL:%s VALUE:%s", __entry->len, __entry->name,
112 __print_array(__entry->ctrls, __entry->len, sizeof(__u64)),
113 __print_array(__entry->values, __entry->len, sizeof(__u64)))
114);
115
116TRACE_EVENT(trap_reg,
117 TP_PROTO(const char *fn, int reg, bool is_write, u64 write_value),
118 TP_ARGS(fn, reg, is_write, write_value),
119
120 TP_STRUCT__entry(
121 __field(const char *, fn)
122 __field(int, reg)
123 __field(bool, is_write)
124 __field(u64, write_value)
125 ),
126
127 TP_fast_assign(
128 __entry->fn = fn;
129 __entry->reg = reg;
130 __entry->is_write = is_write;
131 __entry->write_value = write_value;
132 ),
133
134 TP_printk("%s %s reg %d (0x%08llx)", __entry->fn, __entry->is_write?"write to":"read from", __entry->reg, __entry->write_value)
135);
136
137TRACE_EVENT(kvm_handle_sys_reg,
138 TP_PROTO(unsigned long hsr),
139 TP_ARGS(hsr),
140
141 TP_STRUCT__entry(
142 __field(unsigned long, hsr)
143 ),
144
145 TP_fast_assign(
146 __entry->hsr = hsr;
147 ),
148
149 TP_printk("HSR 0x%08lx", __entry->hsr)
150);
151
152TRACE_EVENT(kvm_set_guest_debug,
153 TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug),
154 TP_ARGS(vcpu, guest_debug),
155
156 TP_STRUCT__entry(
157 __field(struct kvm_vcpu *, vcpu)
158 __field(__u32, guest_debug)
159 ),
160
161 TP_fast_assign(
162 __entry->vcpu = vcpu;
163 __entry->guest_debug = guest_debug;
164 ),
165
166 TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug)
167);
168
169
47#endif /* _TRACE_ARM64_KVM_H */ 170#endif /* _TRACE_ARM64_KVM_H */
48 171
49#undef TRACE_INCLUDE_PATH 172#undef TRACE_INCLUDE_PATH
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index b91e74a817d8..9fac01cb89c1 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -158,6 +158,7 @@ extern pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, bool writing,
158 bool *writable); 158 bool *writable);
159extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, 159extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
160 unsigned long *rmap, long pte_index, int realmode); 160 unsigned long *rmap, long pte_index, int realmode);
161extern void kvmppc_update_rmap_change(unsigned long *rmap, unsigned long psize);
161extern void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep, 162extern void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
162 unsigned long pte_index); 163 unsigned long pte_index);
163void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep, 164void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep,
@@ -225,12 +226,12 @@ static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
225 return vcpu->arch.cr; 226 return vcpu->arch.cr;
226} 227}
227 228
228static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val) 229static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
229{ 230{
230 vcpu->arch.xer = val; 231 vcpu->arch.xer = val;
231} 232}
232 233
233static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu) 234static inline ulong kvmppc_get_xer(struct kvm_vcpu *vcpu)
234{ 235{
235 return vcpu->arch.xer; 236 return vcpu->arch.xer;
236} 237}
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 5bdfb5dd3400..72b6225aca73 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -25,6 +25,12 @@
25#define XICS_MFRR 0xc 25#define XICS_MFRR 0xc
26#define XICS_IPI 2 /* interrupt source # for IPIs */ 26#define XICS_IPI 2 /* interrupt source # for IPIs */
27 27
28/* Maximum number of threads per physical core */
29#define MAX_SMT_THREADS 8
30
31/* Maximum number of subcores per physical core */
32#define MAX_SUBCORES 4
33
28#ifdef __ASSEMBLY__ 34#ifdef __ASSEMBLY__
29 35
30#ifdef CONFIG_KVM_BOOK3S_HANDLER 36#ifdef CONFIG_KVM_BOOK3S_HANDLER
@@ -65,6 +71,19 @@ kvmppc_resume_\intno:
65 71
66#else /*__ASSEMBLY__ */ 72#else /*__ASSEMBLY__ */
67 73
74struct kvmppc_vcore;
75
76/* Struct used for coordinating micro-threading (split-core) mode changes */
77struct kvm_split_mode {
78 unsigned long rpr;
79 unsigned long pmmar;
80 unsigned long ldbar;
81 u8 subcore_size;
82 u8 do_nap;
83 u8 napped[MAX_SMT_THREADS];
84 struct kvmppc_vcore *master_vcs[MAX_SUBCORES];
85};
86
68/* 87/*
69 * This struct goes in the PACA on 64-bit processors. It is used 88 * This struct goes in the PACA on 64-bit processors. It is used
70 * to store host state that needs to be saved when we enter a guest 89 * to store host state that needs to be saved when we enter a guest
@@ -100,6 +119,7 @@ struct kvmppc_host_state {
100 u64 host_spurr; 119 u64 host_spurr;
101 u64 host_dscr; 120 u64 host_dscr;
102 u64 dec_expires; 121 u64 dec_expires;
122 struct kvm_split_mode *kvm_split_mode;
103#endif 123#endif
104#ifdef CONFIG_PPC_BOOK3S_64 124#ifdef CONFIG_PPC_BOOK3S_64
105 u64 cfar; 125 u64 cfar;
@@ -112,7 +132,7 @@ struct kvmppc_book3s_shadow_vcpu {
112 bool in_use; 132 bool in_use;
113 ulong gpr[14]; 133 ulong gpr[14];
114 u32 cr; 134 u32 cr;
115 u32 xer; 135 ulong xer;
116 ulong ctr; 136 ulong ctr;
117 ulong lr; 137 ulong lr;
118 ulong pc; 138 ulong pc;
diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h
index 3286f0d6a86c..bc6e29e4dfd4 100644
--- a/arch/powerpc/include/asm/kvm_booke.h
+++ b/arch/powerpc/include/asm/kvm_booke.h
@@ -54,12 +54,12 @@ static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
54 return vcpu->arch.cr; 54 return vcpu->arch.cr;
55} 55}
56 56
57static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val) 57static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
58{ 58{
59 vcpu->arch.xer = val; 59 vcpu->arch.xer = val;
60} 60}
61 61
62static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu) 62static inline ulong kvmppc_get_xer(struct kvm_vcpu *vcpu)
63{ 63{
64 return vcpu->arch.xer; 64 return vcpu->arch.xer;
65} 65}
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index d91f65b28e32..98eebbf66340 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -205,8 +205,10 @@ struct revmap_entry {
205 */ 205 */
206#define KVMPPC_RMAP_LOCK_BIT 63 206#define KVMPPC_RMAP_LOCK_BIT 63
207#define KVMPPC_RMAP_RC_SHIFT 32 207#define KVMPPC_RMAP_RC_SHIFT 32
208#define KVMPPC_RMAP_CHG_SHIFT 48
208#define KVMPPC_RMAP_REFERENCED (HPTE_R_R << KVMPPC_RMAP_RC_SHIFT) 209#define KVMPPC_RMAP_REFERENCED (HPTE_R_R << KVMPPC_RMAP_RC_SHIFT)
209#define KVMPPC_RMAP_CHANGED (HPTE_R_C << KVMPPC_RMAP_RC_SHIFT) 210#define KVMPPC_RMAP_CHANGED (HPTE_R_C << KVMPPC_RMAP_RC_SHIFT)
211#define KVMPPC_RMAP_CHG_ORDER (0x3ful << KVMPPC_RMAP_CHG_SHIFT)
210#define KVMPPC_RMAP_PRESENT 0x100000000ul 212#define KVMPPC_RMAP_PRESENT 0x100000000ul
211#define KVMPPC_RMAP_INDEX 0xfffffffful 213#define KVMPPC_RMAP_INDEX 0xfffffffful
212 214
@@ -278,7 +280,9 @@ struct kvmppc_vcore {
278 u16 last_cpu; 280 u16 last_cpu;
279 u8 vcore_state; 281 u8 vcore_state;
280 u8 in_guest; 282 u8 in_guest;
283 struct kvmppc_vcore *master_vcore;
281 struct list_head runnable_threads; 284 struct list_head runnable_threads;
285 struct list_head preempt_list;
282 spinlock_t lock; 286 spinlock_t lock;
283 wait_queue_head_t wq; 287 wait_queue_head_t wq;
284 spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */ 288 spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */
@@ -300,12 +304,21 @@ struct kvmppc_vcore {
300#define VCORE_EXIT_MAP(vc) ((vc)->entry_exit_map >> 8) 304#define VCORE_EXIT_MAP(vc) ((vc)->entry_exit_map >> 8)
301#define VCORE_IS_EXITING(vc) (VCORE_EXIT_MAP(vc) != 0) 305#define VCORE_IS_EXITING(vc) (VCORE_EXIT_MAP(vc) != 0)
302 306
303/* Values for vcore_state */ 307/* This bit is used when a vcore exit is triggered from outside the vcore */
308#define VCORE_EXIT_REQ 0x10000
309
310/*
311 * Values for vcore_state.
312 * Note that these are arranged such that lower values
313 * (< VCORE_SLEEPING) don't require stolen time accounting
314 * on load/unload, and higher values do.
315 */
304#define VCORE_INACTIVE 0 316#define VCORE_INACTIVE 0
305#define VCORE_SLEEPING 1 317#define VCORE_PREEMPT 1
306#define VCORE_PREEMPT 2 318#define VCORE_PIGGYBACK 2
307#define VCORE_RUNNING 3 319#define VCORE_SLEEPING 3
308#define VCORE_EXITING 4 320#define VCORE_RUNNING 4
321#define VCORE_EXITING 5
309 322
310/* 323/*
311 * Struct used to manage memory for a virtual processor area 324 * Struct used to manage memory for a virtual processor area
@@ -473,7 +486,7 @@ struct kvm_vcpu_arch {
473 ulong ciabr; 486 ulong ciabr;
474 ulong cfar; 487 ulong cfar;
475 ulong ppr; 488 ulong ppr;
476 ulong pspb; 489 u32 pspb;
477 ulong fscr; 490 ulong fscr;
478 ulong shadow_fscr; 491 ulong shadow_fscr;
479 ulong ebbhr; 492 ulong ebbhr;
@@ -619,6 +632,7 @@ struct kvm_vcpu_arch {
619 int trap; 632 int trap;
620 int state; 633 int state;
621 int ptid; 634 int ptid;
635 int thread_cpu;
622 bool timer_running; 636 bool timer_running;
623 wait_queue_head_t cpu_run; 637 wait_queue_head_t cpu_run;
624 638
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 8452335661a5..790f5d1d9a46 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -287,7 +287,7 @@
287 287
288/* POWER8 Micro Partition Prefetch (MPP) parameters */ 288/* POWER8 Micro Partition Prefetch (MPP) parameters */
289/* Address mask is common for LOGMPP instruction and MPPR SPR */ 289/* Address mask is common for LOGMPP instruction and MPPR SPR */
290#define PPC_MPPE_ADDRESS_MASK 0xffffffffc000 290#define PPC_MPPE_ADDRESS_MASK 0xffffffffc000ULL
291 291
292/* Bits 60 and 61 of MPP SPR should be set to one of the following */ 292/* Bits 60 and 61 of MPP SPR should be set to one of the following */
293/* Aborting the fetch is indeed setting 00 in the table size bits */ 293/* Aborting the fetch is indeed setting 00 in the table size bits */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 810f433731dc..221d584d089f 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -511,6 +511,8 @@ int main(void)
511 DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr)); 511 DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
512 DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty)); 512 DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty));
513 DEFINE(VCPU_HEIR, offsetof(struct kvm_vcpu, arch.emul_inst)); 513 DEFINE(VCPU_HEIR, offsetof(struct kvm_vcpu, arch.emul_inst));
514 DEFINE(VCPU_CPU, offsetof(struct kvm_vcpu, cpu));
515 DEFINE(VCPU_THREAD_CPU, offsetof(struct kvm_vcpu, arch.thread_cpu));
514#endif 516#endif
515#ifdef CONFIG_PPC_BOOK3S 517#ifdef CONFIG_PPC_BOOK3S
516 DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); 518 DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
@@ -673,7 +675,14 @@ int main(void)
673 HSTATE_FIELD(HSTATE_DSCR, host_dscr); 675 HSTATE_FIELD(HSTATE_DSCR, host_dscr);
674 HSTATE_FIELD(HSTATE_DABR, dabr); 676 HSTATE_FIELD(HSTATE_DABR, dabr);
675 HSTATE_FIELD(HSTATE_DECEXP, dec_expires); 677 HSTATE_FIELD(HSTATE_DECEXP, dec_expires);
678 HSTATE_FIELD(HSTATE_SPLIT_MODE, kvm_split_mode);
676 DEFINE(IPI_PRIORITY, IPI_PRIORITY); 679 DEFINE(IPI_PRIORITY, IPI_PRIORITY);
680 DEFINE(KVM_SPLIT_RPR, offsetof(struct kvm_split_mode, rpr));
681 DEFINE(KVM_SPLIT_PMMAR, offsetof(struct kvm_split_mode, pmmar));
682 DEFINE(KVM_SPLIT_LDBAR, offsetof(struct kvm_split_mode, ldbar));
683 DEFINE(KVM_SPLIT_SIZE, offsetof(struct kvm_split_mode, subcore_size));
684 DEFINE(KVM_SPLIT_DO_NAP, offsetof(struct kvm_split_mode, do_nap));
685 DEFINE(KVM_SPLIT_NAPPED, offsetof(struct kvm_split_mode, napped));
677#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 686#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
678 687
679#ifdef CONFIG_PPC_BOOK3S_64 688#ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 3caec2c42105..c2024ac9d4e8 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -74,14 +74,14 @@ config KVM_BOOK3S_64
74 If unsure, say N. 74 If unsure, say N.
75 75
76config KVM_BOOK3S_64_HV 76config KVM_BOOK3S_64_HV
77 tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host" 77 tristate "KVM for POWER7 and later using hypervisor mode in host"
78 depends on KVM_BOOK3S_64 && PPC_POWERNV 78 depends on KVM_BOOK3S_64 && PPC_POWERNV
79 select KVM_BOOK3S_HV_POSSIBLE 79 select KVM_BOOK3S_HV_POSSIBLE
80 select MMU_NOTIFIER 80 select MMU_NOTIFIER
81 select CMA 81 select CMA
82 ---help--- 82 ---help---
83 Support running unmodified book3s_64 guest kernels in 83 Support running unmodified book3s_64 guest kernels in
84 virtual machines on POWER7 and PPC970 processors that have 84 virtual machines on POWER7 and newer processors that have
85 hypervisor mode available to the host. 85 hypervisor mode available to the host.
86 86
87 If you say Y here, KVM will use the hardware virtualization 87 If you say Y here, KVM will use the hardware virtualization
@@ -89,8 +89,8 @@ config KVM_BOOK3S_64_HV
89 guest operating systems will run at full hardware speed 89 guest operating systems will run at full hardware speed
90 using supervisor and user modes. However, this also means 90 using supervisor and user modes. However, this also means
91 that KVM is not usable under PowerVM (pHyp), is only usable 91 that KVM is not usable under PowerVM (pHyp), is only usable
92 on POWER7 (or later) processors and PPC970-family processors, 92 on POWER7 or later processors, and cannot emulate a
93 and cannot emulate a different processor from the host processor. 93 different processor from the host processor.
94 94
95 If unsure, say N. 95 If unsure, say N.
96 96
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 6d6398f4d632..d75bf325f54a 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -240,7 +240,8 @@ void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, ulong flags)
240 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE); 240 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE);
241} 241}
242 242
243int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) 243static int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu,
244 unsigned int priority)
244{ 245{
245 int deliver = 1; 246 int deliver = 1;
246 int vec = 0; 247 int vec = 0;
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index 2035d16a9262..d5c9bfeb0c9c 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -26,6 +26,7 @@
26#include <asm/machdep.h> 26#include <asm/machdep.h>
27#include <asm/mmu_context.h> 27#include <asm/mmu_context.h>
28#include <asm/hw_irq.h> 28#include <asm/hw_irq.h>
29#include "book3s.h"
29 30
30/* #define DEBUG_MMU */ 31/* #define DEBUG_MMU */
31/* #define DEBUG_SR */ 32/* #define DEBUG_SR */
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index b982d925c710..79ad35abd196 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -28,6 +28,7 @@
28#include <asm/mmu_context.h> 28#include <asm/mmu_context.h>
29#include <asm/hw_irq.h> 29#include <asm/hw_irq.h>
30#include "trace_pr.h" 30#include "trace_pr.h"
31#include "book3s.h"
31 32
32#define PTE_SIZE 12 33#define PTE_SIZE 12
33 34
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index dab68b7af3f2..1f9c0a17f445 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -761,6 +761,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
761 /* Harvest R and C */ 761 /* Harvest R and C */
762 rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C); 762 rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
763 *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT; 763 *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
764 if (rcbits & HPTE_R_C)
765 kvmppc_update_rmap_change(rmapp, psize);
764 if (rcbits & ~rev[i].guest_rpte) { 766 if (rcbits & ~rev[i].guest_rpte) {
765 rev[i].guest_rpte = ptel | rcbits; 767 rev[i].guest_rpte = ptel | rcbits;
766 note_hpte_modification(kvm, &rev[i]); 768 note_hpte_modification(kvm, &rev[i]);
@@ -927,8 +929,12 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
927 retry: 929 retry:
928 lock_rmap(rmapp); 930 lock_rmap(rmapp);
929 if (*rmapp & KVMPPC_RMAP_CHANGED) { 931 if (*rmapp & KVMPPC_RMAP_CHANGED) {
930 *rmapp &= ~KVMPPC_RMAP_CHANGED; 932 long change_order = (*rmapp & KVMPPC_RMAP_CHG_ORDER)
933 >> KVMPPC_RMAP_CHG_SHIFT;
934 *rmapp &= ~(KVMPPC_RMAP_CHANGED | KVMPPC_RMAP_CHG_ORDER);
931 npages_dirty = 1; 935 npages_dirty = 1;
936 if (change_order > PAGE_SHIFT)
937 npages_dirty = 1ul << (change_order - PAGE_SHIFT);
932 } 938 }
933 if (!(*rmapp & KVMPPC_RMAP_PRESENT)) { 939 if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
934 unlock_rmap(rmapp); 940 unlock_rmap(rmapp);
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 5a2bc4b0dfe5..2afdb9c0937d 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -23,6 +23,7 @@
23#include <asm/reg.h> 23#include <asm/reg.h>
24#include <asm/switch_to.h> 24#include <asm/switch_to.h>
25#include <asm/time.h> 25#include <asm/time.h>
26#include "book3s.h"
26 27
27#define OP_19_XOP_RFID 18 28#define OP_19_XOP_RFID 18
28#define OP_19_XOP_RFI 50 29#define OP_19_XOP_RFI 50
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index a9f753fb73a8..9754e6815e52 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -81,6 +81,12 @@ static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1);
81#define MPP_BUFFER_ORDER 3 81#define MPP_BUFFER_ORDER 3
82#endif 82#endif
83 83
84static int dynamic_mt_modes = 6;
85module_param(dynamic_mt_modes, int, S_IRUGO | S_IWUSR);
86MODULE_PARM_DESC(dynamic_mt_modes, "Set of allowed dynamic micro-threading modes: 0 (= none), 2, 4, or 6 (= 2 or 4)");
87static int target_smt_mode;
88module_param(target_smt_mode, int, S_IRUGO | S_IWUSR);
89MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
84 90
85static void kvmppc_end_cede(struct kvm_vcpu *vcpu); 91static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
86static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); 92static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
@@ -114,7 +120,7 @@ static bool kvmppc_ipi_thread(int cpu)
114 120
115static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) 121static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
116{ 122{
117 int cpu = vcpu->cpu; 123 int cpu;
118 wait_queue_head_t *wqp; 124 wait_queue_head_t *wqp;
119 125
120 wqp = kvm_arch_vcpu_wq(vcpu); 126 wqp = kvm_arch_vcpu_wq(vcpu);
@@ -123,10 +129,11 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
123 ++vcpu->stat.halt_wakeup; 129 ++vcpu->stat.halt_wakeup;
124 } 130 }
125 131
126 if (kvmppc_ipi_thread(cpu + vcpu->arch.ptid)) 132 if (kvmppc_ipi_thread(vcpu->arch.thread_cpu))
127 return; 133 return;
128 134
129 /* CPU points to the first thread of the core */ 135 /* CPU points to the first thread of the core */
136 cpu = vcpu->cpu;
130 if (cpu >= 0 && cpu < nr_cpu_ids && cpu_online(cpu)) 137 if (cpu >= 0 && cpu < nr_cpu_ids && cpu_online(cpu))
131 smp_send_reschedule(cpu); 138 smp_send_reschedule(cpu);
132} 139}
@@ -164,6 +171,27 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
164 * they should never fail.) 171 * they should never fail.)
165 */ 172 */
166 173
174static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc)
175{
176 unsigned long flags;
177
178 spin_lock_irqsave(&vc->stoltb_lock, flags);
179 vc->preempt_tb = mftb();
180 spin_unlock_irqrestore(&vc->stoltb_lock, flags);
181}
182
183static void kvmppc_core_end_stolen(struct kvmppc_vcore *vc)
184{
185 unsigned long flags;
186
187 spin_lock_irqsave(&vc->stoltb_lock, flags);
188 if (vc->preempt_tb != TB_NIL) {
189 vc->stolen_tb += mftb() - vc->preempt_tb;
190 vc->preempt_tb = TB_NIL;
191 }
192 spin_unlock_irqrestore(&vc->stoltb_lock, flags);
193}
194
167static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu) 195static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
168{ 196{
169 struct kvmppc_vcore *vc = vcpu->arch.vcore; 197 struct kvmppc_vcore *vc = vcpu->arch.vcore;
@@ -175,14 +203,9 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
175 * vcpu, and once it is set to this vcpu, only this task 203 * vcpu, and once it is set to this vcpu, only this task
176 * ever sets it to NULL. 204 * ever sets it to NULL.
177 */ 205 */
178 if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) { 206 if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING)
179 spin_lock_irqsave(&vc->stoltb_lock, flags); 207 kvmppc_core_end_stolen(vc);
180 if (vc->preempt_tb != TB_NIL) { 208
181 vc->stolen_tb += mftb() - vc->preempt_tb;
182 vc->preempt_tb = TB_NIL;
183 }
184 spin_unlock_irqrestore(&vc->stoltb_lock, flags);
185 }
186 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); 209 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
187 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST && 210 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST &&
188 vcpu->arch.busy_preempt != TB_NIL) { 211 vcpu->arch.busy_preempt != TB_NIL) {
@@ -197,11 +220,9 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
197 struct kvmppc_vcore *vc = vcpu->arch.vcore; 220 struct kvmppc_vcore *vc = vcpu->arch.vcore;
198 unsigned long flags; 221 unsigned long flags;
199 222
200 if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) { 223 if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING)
201 spin_lock_irqsave(&vc->stoltb_lock, flags); 224 kvmppc_core_start_stolen(vc);
202 vc->preempt_tb = mftb(); 225
203 spin_unlock_irqrestore(&vc->stoltb_lock, flags);
204 }
205 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); 226 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
206 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST) 227 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
207 vcpu->arch.busy_preempt = mftb(); 228 vcpu->arch.busy_preempt = mftb();
@@ -214,12 +235,12 @@ static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
214 kvmppc_end_cede(vcpu); 235 kvmppc_end_cede(vcpu);
215} 236}
216 237
217void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr) 238static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
218{ 239{
219 vcpu->arch.pvr = pvr; 240 vcpu->arch.pvr = pvr;
220} 241}
221 242
222int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) 243static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
223{ 244{
224 unsigned long pcr = 0; 245 unsigned long pcr = 0;
225 struct kvmppc_vcore *vc = vcpu->arch.vcore; 246 struct kvmppc_vcore *vc = vcpu->arch.vcore;
@@ -259,7 +280,7 @@ int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
259 return 0; 280 return 0;
260} 281}
261 282
262void kvmppc_dump_regs(struct kvm_vcpu *vcpu) 283static void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
263{ 284{
264 int r; 285 int r;
265 286
@@ -292,7 +313,7 @@ void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
292 vcpu->arch.last_inst); 313 vcpu->arch.last_inst);
293} 314}
294 315
295struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id) 316static struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id)
296{ 317{
297 int r; 318 int r;
298 struct kvm_vcpu *v, *ret = NULL; 319 struct kvm_vcpu *v, *ret = NULL;
@@ -641,7 +662,8 @@ static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target)
641 662
642 spin_lock(&vcore->lock); 663 spin_lock(&vcore->lock);
643 if (target->arch.state == KVMPPC_VCPU_RUNNABLE && 664 if (target->arch.state == KVMPPC_VCPU_RUNNABLE &&
644 vcore->vcore_state != VCORE_INACTIVE) 665 vcore->vcore_state != VCORE_INACTIVE &&
666 vcore->runner)
645 target = vcore->runner; 667 target = vcore->runner;
646 spin_unlock(&vcore->lock); 668 spin_unlock(&vcore->lock);
647 669
@@ -1431,6 +1453,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
1431 vcore->lpcr = kvm->arch.lpcr; 1453 vcore->lpcr = kvm->arch.lpcr;
1432 vcore->first_vcpuid = core * threads_per_subcore; 1454 vcore->first_vcpuid = core * threads_per_subcore;
1433 vcore->kvm = kvm; 1455 vcore->kvm = kvm;
1456 INIT_LIST_HEAD(&vcore->preempt_list);
1434 1457
1435 vcore->mpp_buffer_is_valid = false; 1458 vcore->mpp_buffer_is_valid = false;
1436 1459
@@ -1655,6 +1678,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
1655 spin_unlock(&vcore->lock); 1678 spin_unlock(&vcore->lock);
1656 vcpu->arch.vcore = vcore; 1679 vcpu->arch.vcore = vcore;
1657 vcpu->arch.ptid = vcpu->vcpu_id - vcore->first_vcpuid; 1680 vcpu->arch.ptid = vcpu->vcpu_id - vcore->first_vcpuid;
1681 vcpu->arch.thread_cpu = -1;
1658 1682
1659 vcpu->arch.cpu_type = KVM_CPU_3S_64; 1683 vcpu->arch.cpu_type = KVM_CPU_3S_64;
1660 kvmppc_sanity_check(vcpu); 1684 kvmppc_sanity_check(vcpu);
@@ -1749,6 +1773,7 @@ static int kvmppc_grab_hwthread(int cpu)
1749 1773
1750 /* Ensure the thread won't go into the kernel if it wakes */ 1774 /* Ensure the thread won't go into the kernel if it wakes */
1751 tpaca->kvm_hstate.kvm_vcpu = NULL; 1775 tpaca->kvm_hstate.kvm_vcpu = NULL;
1776 tpaca->kvm_hstate.kvm_vcore = NULL;
1752 tpaca->kvm_hstate.napping = 0; 1777 tpaca->kvm_hstate.napping = 0;
1753 smp_wmb(); 1778 smp_wmb();
1754 tpaca->kvm_hstate.hwthread_req = 1; 1779 tpaca->kvm_hstate.hwthread_req = 1;
@@ -1780,26 +1805,32 @@ static void kvmppc_release_hwthread(int cpu)
1780 tpaca = &paca[cpu]; 1805 tpaca = &paca[cpu];
1781 tpaca->kvm_hstate.hwthread_req = 0; 1806 tpaca->kvm_hstate.hwthread_req = 0;
1782 tpaca->kvm_hstate.kvm_vcpu = NULL; 1807 tpaca->kvm_hstate.kvm_vcpu = NULL;
1808 tpaca->kvm_hstate.kvm_vcore = NULL;
1809 tpaca->kvm_hstate.kvm_split_mode = NULL;
1783} 1810}
1784 1811
1785static void kvmppc_start_thread(struct kvm_vcpu *vcpu) 1812static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
1786{ 1813{
1787 int cpu; 1814 int cpu;
1788 struct paca_struct *tpaca; 1815 struct paca_struct *tpaca;
1789 struct kvmppc_vcore *vc = vcpu->arch.vcore; 1816 struct kvmppc_vcore *mvc = vc->master_vcore;
1790 1817
1791 if (vcpu->arch.timer_running) { 1818 cpu = vc->pcpu;
1792 hrtimer_try_to_cancel(&vcpu->arch.dec_timer); 1819 if (vcpu) {
1793 vcpu->arch.timer_running = 0; 1820 if (vcpu->arch.timer_running) {
1821 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
1822 vcpu->arch.timer_running = 0;
1823 }
1824 cpu += vcpu->arch.ptid;
1825 vcpu->cpu = mvc->pcpu;
1826 vcpu->arch.thread_cpu = cpu;
1794 } 1827 }
1795 cpu = vc->pcpu + vcpu->arch.ptid;
1796 tpaca = &paca[cpu]; 1828 tpaca = &paca[cpu];
1797 tpaca->kvm_hstate.kvm_vcore = vc;
1798 tpaca->kvm_hstate.ptid = vcpu->arch.ptid;
1799 vcpu->cpu = vc->pcpu;
1800 /* Order stores to hstate.kvm_vcore etc. before store to kvm_vcpu */
1801 smp_wmb();
1802 tpaca->kvm_hstate.kvm_vcpu = vcpu; 1829 tpaca->kvm_hstate.kvm_vcpu = vcpu;
1830 tpaca->kvm_hstate.ptid = cpu - mvc->pcpu;
1831 /* Order stores to hstate.kvm_vcpu etc. before store to kvm_vcore */
1832 smp_wmb();
1833 tpaca->kvm_hstate.kvm_vcore = mvc;
1803 if (cpu != smp_processor_id()) 1834 if (cpu != smp_processor_id())
1804 kvmppc_ipi_thread(cpu); 1835 kvmppc_ipi_thread(cpu);
1805} 1836}
@@ -1812,12 +1843,12 @@ static void kvmppc_wait_for_nap(void)
1812 for (loops = 0; loops < 1000000; ++loops) { 1843 for (loops = 0; loops < 1000000; ++loops) {
1813 /* 1844 /*
1814 * Check if all threads are finished. 1845 * Check if all threads are finished.
1815 * We set the vcpu pointer when starting a thread 1846 * We set the vcore pointer when starting a thread
1816 * and the thread clears it when finished, so we look 1847 * and the thread clears it when finished, so we look
1817 * for any threads that still have a non-NULL vcpu ptr. 1848 * for any threads that still have a non-NULL vcore ptr.
1818 */ 1849 */
1819 for (i = 1; i < threads_per_subcore; ++i) 1850 for (i = 1; i < threads_per_subcore; ++i)
1820 if (paca[cpu + i].kvm_hstate.kvm_vcpu) 1851 if (paca[cpu + i].kvm_hstate.kvm_vcore)
1821 break; 1852 break;
1822 if (i == threads_per_subcore) { 1853 if (i == threads_per_subcore) {
1823 HMT_medium(); 1854 HMT_medium();
@@ -1827,7 +1858,7 @@ static void kvmppc_wait_for_nap(void)
1827 } 1858 }
1828 HMT_medium(); 1859 HMT_medium();
1829 for (i = 1; i < threads_per_subcore; ++i) 1860 for (i = 1; i < threads_per_subcore; ++i)
1830 if (paca[cpu + i].kvm_hstate.kvm_vcpu) 1861 if (paca[cpu + i].kvm_hstate.kvm_vcore)
1831 pr_err("KVM: CPU %d seems to be stuck\n", cpu + i); 1862 pr_err("KVM: CPU %d seems to be stuck\n", cpu + i);
1832} 1863}
1833 1864
@@ -1890,6 +1921,278 @@ static void kvmppc_start_restoring_l2_cache(const struct kvmppc_vcore *vc)
1890 mtspr(SPRN_MPPR, mpp_addr | PPC_MPPR_FETCH_WHOLE_TABLE); 1921 mtspr(SPRN_MPPR, mpp_addr | PPC_MPPR_FETCH_WHOLE_TABLE);
1891} 1922}
1892 1923
1924/*
1925 * A list of virtual cores for each physical CPU.
1926 * These are vcores that could run but their runner VCPU tasks are
1927 * (or may be) preempted.
1928 */
1929struct preempted_vcore_list {
1930 struct list_head list;
1931 spinlock_t lock;
1932};
1933
1934static DEFINE_PER_CPU(struct preempted_vcore_list, preempted_vcores);
1935
1936static void init_vcore_lists(void)
1937{
1938 int cpu;
1939
1940 for_each_possible_cpu(cpu) {
1941 struct preempted_vcore_list *lp = &per_cpu(preempted_vcores, cpu);
1942 spin_lock_init(&lp->lock);
1943 INIT_LIST_HEAD(&lp->list);
1944 }
1945}
1946
1947static void kvmppc_vcore_preempt(struct kvmppc_vcore *vc)
1948{
1949 struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores);
1950
1951 vc->vcore_state = VCORE_PREEMPT;
1952 vc->pcpu = smp_processor_id();
1953 if (vc->num_threads < threads_per_subcore) {
1954 spin_lock(&lp->lock);
1955 list_add_tail(&vc->preempt_list, &lp->list);
1956 spin_unlock(&lp->lock);
1957 }
1958
1959 /* Start accumulating stolen time */
1960 kvmppc_core_start_stolen(vc);
1961}
1962
1963static void kvmppc_vcore_end_preempt(struct kvmppc_vcore *vc)
1964{
1965 struct preempted_vcore_list *lp;
1966
1967 kvmppc_core_end_stolen(vc);
1968 if (!list_empty(&vc->preempt_list)) {
1969 lp = &per_cpu(preempted_vcores, vc->pcpu);
1970 spin_lock(&lp->lock);
1971 list_del_init(&vc->preempt_list);
1972 spin_unlock(&lp->lock);
1973 }
1974 vc->vcore_state = VCORE_INACTIVE;
1975}
1976
1977/*
1978 * This stores information about the virtual cores currently
1979 * assigned to a physical core.
1980 */
1981struct core_info {
1982 int n_subcores;
1983 int max_subcore_threads;
1984 int total_threads;
1985 int subcore_threads[MAX_SUBCORES];
1986 struct kvm *subcore_vm[MAX_SUBCORES];
1987 struct list_head vcs[MAX_SUBCORES];
1988};
1989
1990/*
1991 * This mapping means subcores 0 and 1 can use threads 0-3 and 4-7
1992 * respectively in 2-way micro-threading (split-core) mode.
1993 */
1994static int subcore_thread_map[MAX_SUBCORES] = { 0, 4, 2, 6 };
1995
1996static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
1997{
1998 int sub;
1999
2000 memset(cip, 0, sizeof(*cip));
2001 cip->n_subcores = 1;
2002 cip->max_subcore_threads = vc->num_threads;
2003 cip->total_threads = vc->num_threads;
2004 cip->subcore_threads[0] = vc->num_threads;
2005 cip->subcore_vm[0] = vc->kvm;
2006 for (sub = 0; sub < MAX_SUBCORES; ++sub)
2007 INIT_LIST_HEAD(&cip->vcs[sub]);
2008 list_add_tail(&vc->preempt_list, &cip->vcs[0]);
2009}
2010
2011static bool subcore_config_ok(int n_subcores, int n_threads)
2012{
2013 /* Can only dynamically split if unsplit to begin with */
2014 if (n_subcores > 1 && threads_per_subcore < MAX_SMT_THREADS)
2015 return false;
2016 if (n_subcores > MAX_SUBCORES)
2017 return false;
2018 if (n_subcores > 1) {
2019 if (!(dynamic_mt_modes & 2))
2020 n_subcores = 4;
2021 if (n_subcores > 2 && !(dynamic_mt_modes & 4))
2022 return false;
2023 }
2024
2025 return n_subcores * roundup_pow_of_two(n_threads) <= MAX_SMT_THREADS;
2026}
2027
2028static void init_master_vcore(struct kvmppc_vcore *vc)
2029{
2030 vc->master_vcore = vc;
2031 vc->entry_exit_map = 0;
2032 vc->in_guest = 0;
2033 vc->napping_threads = 0;
2034 vc->conferring_threads = 0;
2035}
2036
2037/*
2038 * See if the existing subcores can be split into 3 (or fewer) subcores
2039 * of at most two threads each, so we can fit in another vcore. This
2040 * assumes there are at most two subcores and at most 6 threads in total.
2041 */
2042static bool can_split_piggybacked_subcores(struct core_info *cip)
2043{
2044 int sub, new_sub;
2045 int large_sub = -1;
2046 int thr;
2047 int n_subcores = cip->n_subcores;
2048 struct kvmppc_vcore *vc, *vcnext;
2049 struct kvmppc_vcore *master_vc = NULL;
2050
2051 for (sub = 0; sub < cip->n_subcores; ++sub) {
2052 if (cip->subcore_threads[sub] <= 2)
2053 continue;
2054 if (large_sub >= 0)
2055 return false;
2056 large_sub = sub;
2057 vc = list_first_entry(&cip->vcs[sub], struct kvmppc_vcore,
2058 preempt_list);
2059 if (vc->num_threads > 2)
2060 return false;
2061 n_subcores += (cip->subcore_threads[sub] - 1) >> 1;
2062 }
2063 if (n_subcores > 3 || large_sub < 0)
2064 return false;
2065
2066 /*
2067 * Seems feasible, so go through and move vcores to new subcores.
2068 * Note that when we have two or more vcores in one subcore,
2069 * all those vcores must have only one thread each.
2070 */
2071 new_sub = cip->n_subcores;
2072 thr = 0;
2073 sub = large_sub;
2074 list_for_each_entry_safe(vc, vcnext, &cip->vcs[sub], preempt_list) {
2075 if (thr >= 2) {
2076 list_del(&vc->preempt_list);
2077 list_add_tail(&vc->preempt_list, &cip->vcs[new_sub]);
2078 /* vc->num_threads must be 1 */
2079 if (++cip->subcore_threads[new_sub] == 1) {
2080 cip->subcore_vm[new_sub] = vc->kvm;
2081 init_master_vcore(vc);
2082 master_vc = vc;
2083 ++cip->n_subcores;
2084 } else {
2085 vc->master_vcore = master_vc;
2086 ++new_sub;
2087 }
2088 }
2089 thr += vc->num_threads;
2090 }
2091 cip->subcore_threads[large_sub] = 2;
2092 cip->max_subcore_threads = 2;
2093
2094 return true;
2095}
2096
2097static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
2098{
2099 int n_threads = vc->num_threads;
2100 int sub;
2101
2102 if (!cpu_has_feature(CPU_FTR_ARCH_207S))
2103 return false;
2104
2105 if (n_threads < cip->max_subcore_threads)
2106 n_threads = cip->max_subcore_threads;
2107 if (subcore_config_ok(cip->n_subcores + 1, n_threads)) {
2108 cip->max_subcore_threads = n_threads;
2109 } else if (cip->n_subcores <= 2 && cip->total_threads <= 6 &&
2110 vc->num_threads <= 2) {
2111 /*
2112 * We may be able to fit another subcore in by
2113 * splitting an existing subcore with 3 or 4
2114 * threads into two 2-thread subcores, or one
2115 * with 5 or 6 threads into three subcores.
2116 * We can only do this if those subcores have
2117 * piggybacked virtual cores.
2118 */
2119 if (!can_split_piggybacked_subcores(cip))
2120 return false;
2121 } else {
2122 return false;
2123 }
2124
2125 sub = cip->n_subcores;
2126 ++cip->n_subcores;
2127 cip->total_threads += vc->num_threads;
2128 cip->subcore_threads[sub] = vc->num_threads;
2129 cip->subcore_vm[sub] = vc->kvm;
2130 init_master_vcore(vc);
2131 list_del(&vc->preempt_list);
2132 list_add_tail(&vc->preempt_list, &cip->vcs[sub]);
2133
2134 return true;
2135}
2136
2137static bool can_piggyback_subcore(struct kvmppc_vcore *pvc,
2138 struct core_info *cip, int sub)
2139{
2140 struct kvmppc_vcore *vc;
2141 int n_thr;
2142
2143 vc = list_first_entry(&cip->vcs[sub], struct kvmppc_vcore,
2144 preempt_list);
2145
2146 /* require same VM and same per-core reg values */
2147 if (pvc->kvm != vc->kvm ||
2148 pvc->tb_offset != vc->tb_offset ||
2149 pvc->pcr != vc->pcr ||
2150 pvc->lpcr != vc->lpcr)
2151 return false;
2152
2153 /* P8 guest with > 1 thread per core would see wrong TIR value */
2154 if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
2155 (vc->num_threads > 1 || pvc->num_threads > 1))
2156 return false;
2157
2158 n_thr = cip->subcore_threads[sub] + pvc->num_threads;
2159 if (n_thr > cip->max_subcore_threads) {
2160 if (!subcore_config_ok(cip->n_subcores, n_thr))
2161 return false;
2162 cip->max_subcore_threads = n_thr;
2163 }
2164
2165 cip->total_threads += pvc->num_threads;
2166 cip->subcore_threads[sub] = n_thr;
2167 pvc->master_vcore = vc;
2168 list_del(&pvc->preempt_list);
2169 list_add_tail(&pvc->preempt_list, &cip->vcs[sub]);
2170
2171 return true;
2172}
2173
2174/*
2175 * Work out whether it is possible to piggyback the execution of
2176 * vcore *pvc onto the execution of the other vcores described in *cip.
2177 */
2178static bool can_piggyback(struct kvmppc_vcore *pvc, struct core_info *cip,
2179 int target_threads)
2180{
2181 int sub;
2182
2183 if (cip->total_threads + pvc->num_threads > target_threads)
2184 return false;
2185 for (sub = 0; sub < cip->n_subcores; ++sub)
2186 if (cip->subcore_threads[sub] &&
2187 can_piggyback_subcore(pvc, cip, sub))
2188 return true;
2189
2190 if (can_dynamic_split(pvc, cip))
2191 return true;
2192
2193 return false;
2194}
2195
1893static void prepare_threads(struct kvmppc_vcore *vc) 2196static void prepare_threads(struct kvmppc_vcore *vc)
1894{ 2197{
1895 struct kvm_vcpu *vcpu, *vnext; 2198 struct kvm_vcpu *vcpu, *vnext;
@@ -1909,12 +2212,45 @@ static void prepare_threads(struct kvmppc_vcore *vc)
1909 } 2212 }
1910} 2213}
1911 2214
1912static void post_guest_process(struct kvmppc_vcore *vc) 2215static void collect_piggybacks(struct core_info *cip, int target_threads)
2216{
2217 struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores);
2218 struct kvmppc_vcore *pvc, *vcnext;
2219
2220 spin_lock(&lp->lock);
2221 list_for_each_entry_safe(pvc, vcnext, &lp->list, preempt_list) {
2222 if (!spin_trylock(&pvc->lock))
2223 continue;
2224 prepare_threads(pvc);
2225 if (!pvc->n_runnable) {
2226 list_del_init(&pvc->preempt_list);
2227 if (pvc->runner == NULL) {
2228 pvc->vcore_state = VCORE_INACTIVE;
2229 kvmppc_core_end_stolen(pvc);
2230 }
2231 spin_unlock(&pvc->lock);
2232 continue;
2233 }
2234 if (!can_piggyback(pvc, cip, target_threads)) {
2235 spin_unlock(&pvc->lock);
2236 continue;
2237 }
2238 kvmppc_core_end_stolen(pvc);
2239 pvc->vcore_state = VCORE_PIGGYBACK;
2240 if (cip->total_threads >= target_threads)
2241 break;
2242 }
2243 spin_unlock(&lp->lock);
2244}
2245
2246static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
1913{ 2247{
2248 int still_running = 0;
1914 u64 now; 2249 u64 now;
1915 long ret; 2250 long ret;
1916 struct kvm_vcpu *vcpu, *vnext; 2251 struct kvm_vcpu *vcpu, *vnext;
1917 2252
2253 spin_lock(&vc->lock);
1918 now = get_tb(); 2254 now = get_tb();
1919 list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, 2255 list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
1920 arch.run_list) { 2256 arch.run_list) {
@@ -1933,17 +2269,36 @@ static void post_guest_process(struct kvmppc_vcore *vc)
1933 vcpu->arch.ret = ret; 2269 vcpu->arch.ret = ret;
1934 vcpu->arch.trap = 0; 2270 vcpu->arch.trap = 0;
1935 2271
1936 if (vcpu->arch.ceded) { 2272 if (is_kvmppc_resume_guest(vcpu->arch.ret)) {
1937 if (!is_kvmppc_resume_guest(ret)) 2273 if (vcpu->arch.pending_exceptions)
1938 kvmppc_end_cede(vcpu); 2274 kvmppc_core_prepare_to_enter(vcpu);
1939 else 2275 if (vcpu->arch.ceded)
1940 kvmppc_set_timer(vcpu); 2276 kvmppc_set_timer(vcpu);
1941 } 2277 else
1942 if (!is_kvmppc_resume_guest(vcpu->arch.ret)) { 2278 ++still_running;
2279 } else {
1943 kvmppc_remove_runnable(vc, vcpu); 2280 kvmppc_remove_runnable(vc, vcpu);
1944 wake_up(&vcpu->arch.cpu_run); 2281 wake_up(&vcpu->arch.cpu_run);
1945 } 2282 }
1946 } 2283 }
2284 list_del_init(&vc->preempt_list);
2285 if (!is_master) {
2286 if (still_running > 0) {
2287 kvmppc_vcore_preempt(vc);
2288 } else if (vc->runner) {
2289 vc->vcore_state = VCORE_PREEMPT;
2290 kvmppc_core_start_stolen(vc);
2291 } else {
2292 vc->vcore_state = VCORE_INACTIVE;
2293 }
2294 if (vc->n_runnable > 0 && vc->runner == NULL) {
2295 /* make sure there's a candidate runner awake */
2296 vcpu = list_first_entry(&vc->runnable_threads,
2297 struct kvm_vcpu, arch.run_list);
2298 wake_up(&vcpu->arch.cpu_run);
2299 }
2300 }
2301 spin_unlock(&vc->lock);
1947} 2302}
1948 2303
1949/* 2304/*
@@ -1955,6 +2310,15 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
1955 struct kvm_vcpu *vcpu, *vnext; 2310 struct kvm_vcpu *vcpu, *vnext;
1956 int i; 2311 int i;
1957 int srcu_idx; 2312 int srcu_idx;
2313 struct core_info core_info;
2314 struct kvmppc_vcore *pvc, *vcnext;
2315 struct kvm_split_mode split_info, *sip;
2316 int split, subcore_size, active;
2317 int sub;
2318 bool thr0_done;
2319 unsigned long cmd_bit, stat_bit;
2320 int pcpu, thr;
2321 int target_threads;
1958 2322
1959 /* 2323 /*
1960 * Remove from the list any threads that have a signal pending 2324 * Remove from the list any threads that have a signal pending
@@ -1969,11 +2333,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
1969 /* 2333 /*
1970 * Initialize *vc. 2334 * Initialize *vc.
1971 */ 2335 */
1972 vc->entry_exit_map = 0; 2336 init_master_vcore(vc);
1973 vc->preempt_tb = TB_NIL; 2337 vc->preempt_tb = TB_NIL;
1974 vc->in_guest = 0;
1975 vc->napping_threads = 0;
1976 vc->conferring_threads = 0;
1977 2338
1978 /* 2339 /*
1979 * Make sure we are running on primary threads, and that secondary 2340 * Make sure we are running on primary threads, and that secondary
@@ -1991,24 +2352,120 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
1991 goto out; 2352 goto out;
1992 } 2353 }
1993 2354
2355 /*
2356 * See if we could run any other vcores on the physical core
2357 * along with this one.
2358 */
2359 init_core_info(&core_info, vc);
2360 pcpu = smp_processor_id();
2361 target_threads = threads_per_subcore;
2362 if (target_smt_mode && target_smt_mode < target_threads)
2363 target_threads = target_smt_mode;
2364 if (vc->num_threads < target_threads)
2365 collect_piggybacks(&core_info, target_threads);
2366
2367 /* Decide on micro-threading (split-core) mode */
2368 subcore_size = threads_per_subcore;
2369 cmd_bit = stat_bit = 0;
2370 split = core_info.n_subcores;
2371 sip = NULL;
2372 if (split > 1) {
2373 /* threads_per_subcore must be MAX_SMT_THREADS (8) here */
2374 if (split == 2 && (dynamic_mt_modes & 2)) {
2375 cmd_bit = HID0_POWER8_1TO2LPAR;
2376 stat_bit = HID0_POWER8_2LPARMODE;
2377 } else {
2378 split = 4;
2379 cmd_bit = HID0_POWER8_1TO4LPAR;
2380 stat_bit = HID0_POWER8_4LPARMODE;
2381 }
2382 subcore_size = MAX_SMT_THREADS / split;
2383 sip = &split_info;
2384 memset(&split_info, 0, sizeof(split_info));
2385 split_info.rpr = mfspr(SPRN_RPR);
2386 split_info.pmmar = mfspr(SPRN_PMMAR);
2387 split_info.ldbar = mfspr(SPRN_LDBAR);
2388 split_info.subcore_size = subcore_size;
2389 for (sub = 0; sub < core_info.n_subcores; ++sub)
2390 split_info.master_vcs[sub] =
2391 list_first_entry(&core_info.vcs[sub],
2392 struct kvmppc_vcore, preempt_list);
2393 /* order writes to split_info before kvm_split_mode pointer */
2394 smp_wmb();
2395 }
2396 pcpu = smp_processor_id();
2397 for (thr = 0; thr < threads_per_subcore; ++thr)
2398 paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip;
2399
2400 /* Initiate micro-threading (split-core) if required */
2401 if (cmd_bit) {
2402 unsigned long hid0 = mfspr(SPRN_HID0);
2403
2404 hid0 |= cmd_bit | HID0_POWER8_DYNLPARDIS;
2405 mb();
2406 mtspr(SPRN_HID0, hid0);
2407 isync();
2408 for (;;) {
2409 hid0 = mfspr(SPRN_HID0);
2410 if (hid0 & stat_bit)
2411 break;
2412 cpu_relax();
2413 }
2414 }
1994 2415
1995 vc->pcpu = smp_processor_id(); 2416 /* Start all the threads */
1996 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { 2417 active = 0;
1997 kvmppc_start_thread(vcpu); 2418 for (sub = 0; sub < core_info.n_subcores; ++sub) {
1998 kvmppc_create_dtl_entry(vcpu, vc); 2419 thr = subcore_thread_map[sub];
1999 trace_kvm_guest_enter(vcpu); 2420 thr0_done = false;
2421 active |= 1 << thr;
2422 list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) {
2423 pvc->pcpu = pcpu + thr;
2424 list_for_each_entry(vcpu, &pvc->runnable_threads,
2425 arch.run_list) {
2426 kvmppc_start_thread(vcpu, pvc);
2427 kvmppc_create_dtl_entry(vcpu, pvc);
2428 trace_kvm_guest_enter(vcpu);
2429 if (!vcpu->arch.ptid)
2430 thr0_done = true;
2431 active |= 1 << (thr + vcpu->arch.ptid);
2432 }
2433 /*
2434 * We need to start the first thread of each subcore
2435 * even if it doesn't have a vcpu.
2436 */
2437 if (pvc->master_vcore == pvc && !thr0_done)
2438 kvmppc_start_thread(NULL, pvc);
2439 thr += pvc->num_threads;
2440 }
2000 } 2441 }
2001 2442
2002 /* Set this explicitly in case thread 0 doesn't have a vcpu */ 2443 /*
2003 get_paca()->kvm_hstate.kvm_vcore = vc; 2444 * Ensure that split_info.do_nap is set after setting
2004 get_paca()->kvm_hstate.ptid = 0; 2445 * the vcore pointer in the PACA of the secondaries.
2446 */
2447 smp_mb();
2448 if (cmd_bit)
2449 split_info.do_nap = 1; /* ask secondaries to nap when done */
2450
2451 /*
2452 * When doing micro-threading, poke the inactive threads as well.
2453 * This gets them to the nap instruction after kvm_do_nap,
2454 * which reduces the time taken to unsplit later.
2455 */
2456 if (split > 1)
2457 for (thr = 1; thr < threads_per_subcore; ++thr)
2458 if (!(active & (1 << thr)))
2459 kvmppc_ipi_thread(pcpu + thr);
2005 2460
2006 vc->vcore_state = VCORE_RUNNING; 2461 vc->vcore_state = VCORE_RUNNING;
2007 preempt_disable(); 2462 preempt_disable();
2008 2463
2009 trace_kvmppc_run_core(vc, 0); 2464 trace_kvmppc_run_core(vc, 0);
2010 2465
2011 spin_unlock(&vc->lock); 2466 for (sub = 0; sub < core_info.n_subcores; ++sub)
2467 list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list)
2468 spin_unlock(&pvc->lock);
2012 2469
2013 kvm_guest_enter(); 2470 kvm_guest_enter();
2014 2471
@@ -2019,32 +2476,58 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2019 2476
2020 __kvmppc_vcore_entry(); 2477 __kvmppc_vcore_entry();
2021 2478
2022 spin_lock(&vc->lock);
2023
2024 if (vc->mpp_buffer) 2479 if (vc->mpp_buffer)
2025 kvmppc_start_saving_l2_cache(vc); 2480 kvmppc_start_saving_l2_cache(vc);
2026 2481
2027 /* disable sending of IPIs on virtual external irqs */ 2482 srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
2028 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) 2483
2029 vcpu->cpu = -1; 2484 spin_lock(&vc->lock);
2030 /* wait for secondary threads to finish writing their state to memory */
2031 kvmppc_wait_for_nap();
2032 for (i = 0; i < threads_per_subcore; ++i)
2033 kvmppc_release_hwthread(vc->pcpu + i);
2034 /* prevent other vcpu threads from doing kvmppc_start_thread() now */ 2485 /* prevent other vcpu threads from doing kvmppc_start_thread() now */
2035 vc->vcore_state = VCORE_EXITING; 2486 vc->vcore_state = VCORE_EXITING;
2036 spin_unlock(&vc->lock);
2037 2487
2038 srcu_read_unlock(&vc->kvm->srcu, srcu_idx); 2488 /* wait for secondary threads to finish writing their state to memory */
2489 kvmppc_wait_for_nap();
2490
2491 /* Return to whole-core mode if we split the core earlier */
2492 if (split > 1) {
2493 unsigned long hid0 = mfspr(SPRN_HID0);
2494 unsigned long loops = 0;
2495
2496 hid0 &= ~HID0_POWER8_DYNLPARDIS;
2497 stat_bit = HID0_POWER8_2LPARMODE | HID0_POWER8_4LPARMODE;
2498 mb();
2499 mtspr(SPRN_HID0, hid0);
2500 isync();
2501 for (;;) {
2502 hid0 = mfspr(SPRN_HID0);
2503 if (!(hid0 & stat_bit))
2504 break;
2505 cpu_relax();
2506 ++loops;
2507 }
2508 split_info.do_nap = 0;
2509 }
2510
2511 /* Let secondaries go back to the offline loop */
2512 for (i = 0; i < threads_per_subcore; ++i) {
2513 kvmppc_release_hwthread(pcpu + i);
2514 if (sip && sip->napped[i])
2515 kvmppc_ipi_thread(pcpu + i);
2516 }
2517
2518 spin_unlock(&vc->lock);
2039 2519
2040 /* make sure updates to secondary vcpu structs are visible now */ 2520 /* make sure updates to secondary vcpu structs are visible now */
2041 smp_mb(); 2521 smp_mb();
2042 kvm_guest_exit(); 2522 kvm_guest_exit();
2043 2523
2044 preempt_enable(); 2524 for (sub = 0; sub < core_info.n_subcores; ++sub)
2525 list_for_each_entry_safe(pvc, vcnext, &core_info.vcs[sub],
2526 preempt_list)
2527 post_guest_process(pvc, pvc == vc);
2045 2528
2046 spin_lock(&vc->lock); 2529 spin_lock(&vc->lock);
2047 post_guest_process(vc); 2530 preempt_enable();
2048 2531
2049 out: 2532 out:
2050 vc->vcore_state = VCORE_INACTIVE; 2533 vc->vcore_state = VCORE_INACTIVE;
@@ -2055,13 +2538,17 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2055 * Wait for some other vcpu thread to execute us, and 2538 * Wait for some other vcpu thread to execute us, and
2056 * wake us up when we need to handle something in the host. 2539 * wake us up when we need to handle something in the host.
2057 */ 2540 */
2058static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state) 2541static void kvmppc_wait_for_exec(struct kvmppc_vcore *vc,
2542 struct kvm_vcpu *vcpu, int wait_state)
2059{ 2543{
2060 DEFINE_WAIT(wait); 2544 DEFINE_WAIT(wait);
2061 2545
2062 prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state); 2546 prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
2063 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) 2547 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
2548 spin_unlock(&vc->lock);
2064 schedule(); 2549 schedule();
2550 spin_lock(&vc->lock);
2551 }
2065 finish_wait(&vcpu->arch.cpu_run, &wait); 2552 finish_wait(&vcpu->arch.cpu_run, &wait);
2066} 2553}
2067 2554
@@ -2137,9 +2624,21 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2137 * this thread straight away and have it join in. 2624 * this thread straight away and have it join in.
2138 */ 2625 */
2139 if (!signal_pending(current)) { 2626 if (!signal_pending(current)) {
2140 if (vc->vcore_state == VCORE_RUNNING && !VCORE_IS_EXITING(vc)) { 2627 if (vc->vcore_state == VCORE_PIGGYBACK) {
2628 struct kvmppc_vcore *mvc = vc->master_vcore;
2629 if (spin_trylock(&mvc->lock)) {
2630 if (mvc->vcore_state == VCORE_RUNNING &&
2631 !VCORE_IS_EXITING(mvc)) {
2632 kvmppc_create_dtl_entry(vcpu, vc);
2633 kvmppc_start_thread(vcpu, vc);
2634 trace_kvm_guest_enter(vcpu);
2635 }
2636 spin_unlock(&mvc->lock);
2637 }
2638 } else if (vc->vcore_state == VCORE_RUNNING &&
2639 !VCORE_IS_EXITING(vc)) {
2141 kvmppc_create_dtl_entry(vcpu, vc); 2640 kvmppc_create_dtl_entry(vcpu, vc);
2142 kvmppc_start_thread(vcpu); 2641 kvmppc_start_thread(vcpu, vc);
2143 trace_kvm_guest_enter(vcpu); 2642 trace_kvm_guest_enter(vcpu);
2144 } else if (vc->vcore_state == VCORE_SLEEPING) { 2643 } else if (vc->vcore_state == VCORE_SLEEPING) {
2145 wake_up(&vc->wq); 2644 wake_up(&vc->wq);
@@ -2149,10 +2648,11 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2149 2648
2150 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE && 2649 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
2151 !signal_pending(current)) { 2650 !signal_pending(current)) {
2651 if (vc->vcore_state == VCORE_PREEMPT && vc->runner == NULL)
2652 kvmppc_vcore_end_preempt(vc);
2653
2152 if (vc->vcore_state != VCORE_INACTIVE) { 2654 if (vc->vcore_state != VCORE_INACTIVE) {
2153 spin_unlock(&vc->lock); 2655 kvmppc_wait_for_exec(vc, vcpu, TASK_INTERRUPTIBLE);
2154 kvmppc_wait_for_exec(vcpu, TASK_INTERRUPTIBLE);
2155 spin_lock(&vc->lock);
2156 continue; 2656 continue;
2157 } 2657 }
2158 list_for_each_entry_safe(v, vn, &vc->runnable_threads, 2658 list_for_each_entry_safe(v, vn, &vc->runnable_threads,
@@ -2179,10 +2679,11 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2179 if (n_ceded == vc->n_runnable) { 2679 if (n_ceded == vc->n_runnable) {
2180 kvmppc_vcore_blocked(vc); 2680 kvmppc_vcore_blocked(vc);
2181 } else if (need_resched()) { 2681 } else if (need_resched()) {
2182 vc->vcore_state = VCORE_PREEMPT; 2682 kvmppc_vcore_preempt(vc);
2183 /* Let something else run */ 2683 /* Let something else run */
2184 cond_resched_lock(&vc->lock); 2684 cond_resched_lock(&vc->lock);
2185 vc->vcore_state = VCORE_INACTIVE; 2685 if (vc->vcore_state == VCORE_PREEMPT)
2686 kvmppc_vcore_end_preempt(vc);
2186 } else { 2687 } else {
2187 kvmppc_run_core(vc); 2688 kvmppc_run_core(vc);
2188 } 2689 }
@@ -2191,11 +2692,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2191 2692
2192 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE && 2693 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
2193 (vc->vcore_state == VCORE_RUNNING || 2694 (vc->vcore_state == VCORE_RUNNING ||
2194 vc->vcore_state == VCORE_EXITING)) { 2695 vc->vcore_state == VCORE_EXITING))
2195 spin_unlock(&vc->lock); 2696 kvmppc_wait_for_exec(vc, vcpu, TASK_UNINTERRUPTIBLE);
2196 kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE);
2197 spin_lock(&vc->lock);
2198 }
2199 2697
2200 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) { 2698 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
2201 kvmppc_remove_runnable(vc, vcpu); 2699 kvmppc_remove_runnable(vc, vcpu);
@@ -2755,6 +3253,8 @@ static int kvmppc_book3s_init_hv(void)
2755 3253
2756 init_default_hcalls(); 3254 init_default_hcalls();
2757 3255
3256 init_vcore_lists();
3257
2758 r = kvmppc_mmu_hv_init(); 3258 r = kvmppc_mmu_hv_init();
2759 return r; 3259 return r;
2760} 3260}
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index ed2589d4593f..fd7006bf6b1a 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -110,14 +110,15 @@ void __init kvm_cma_reserve(void)
110long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target, 110long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
111 unsigned int yield_count) 111 unsigned int yield_count)
112{ 112{
113 struct kvmppc_vcore *vc = vcpu->arch.vcore; 113 struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore;
114 int ptid = local_paca->kvm_hstate.ptid;
114 int threads_running; 115 int threads_running;
115 int threads_ceded; 116 int threads_ceded;
116 int threads_conferring; 117 int threads_conferring;
117 u64 stop = get_tb() + 10 * tb_ticks_per_usec; 118 u64 stop = get_tb() + 10 * tb_ticks_per_usec;
118 int rv = H_SUCCESS; /* => don't yield */ 119 int rv = H_SUCCESS; /* => don't yield */
119 120
120 set_bit(vcpu->arch.ptid, &vc->conferring_threads); 121 set_bit(ptid, &vc->conferring_threads);
121 while ((get_tb() < stop) && !VCORE_IS_EXITING(vc)) { 122 while ((get_tb() < stop) && !VCORE_IS_EXITING(vc)) {
122 threads_running = VCORE_ENTRY_MAP(vc); 123 threads_running = VCORE_ENTRY_MAP(vc);
123 threads_ceded = vc->napping_threads; 124 threads_ceded = vc->napping_threads;
@@ -127,7 +128,7 @@ long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
127 break; 128 break;
128 } 129 }
129 } 130 }
130 clear_bit(vcpu->arch.ptid, &vc->conferring_threads); 131 clear_bit(ptid, &vc->conferring_threads);
131 return rv; 132 return rv;
132} 133}
133 134
@@ -238,7 +239,8 @@ void kvmhv_commence_exit(int trap)
238{ 239{
239 struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore; 240 struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore;
240 int ptid = local_paca->kvm_hstate.ptid; 241 int ptid = local_paca->kvm_hstate.ptid;
241 int me, ee; 242 struct kvm_split_mode *sip = local_paca->kvm_hstate.kvm_split_mode;
243 int me, ee, i;
242 244
243 /* Set our bit in the threads-exiting-guest map in the 0xff00 245 /* Set our bit in the threads-exiting-guest map in the 0xff00
244 bits of vcore->entry_exit_map */ 246 bits of vcore->entry_exit_map */
@@ -258,4 +260,26 @@ void kvmhv_commence_exit(int trap)
258 */ 260 */
259 if (trap != BOOK3S_INTERRUPT_HV_DECREMENTER) 261 if (trap != BOOK3S_INTERRUPT_HV_DECREMENTER)
260 kvmhv_interrupt_vcore(vc, ee & ~(1 << ptid)); 262 kvmhv_interrupt_vcore(vc, ee & ~(1 << ptid));
263
264 /*
265 * If we are doing dynamic micro-threading, interrupt the other
266 * subcores to pull them out of their guests too.
267 */
268 if (!sip)
269 return;
270
271 for (i = 0; i < MAX_SUBCORES; ++i) {
272 vc = sip->master_vcs[i];
273 if (!vc)
274 break;
275 do {
276 ee = vc->entry_exit_map;
277 /* Already asked to exit? */
278 if ((ee >> 8) != 0)
279 break;
280 } while (cmpxchg(&vc->entry_exit_map, ee,
281 ee | VCORE_EXIT_REQ) != ee);
282 if ((ee >> 8) == 0)
283 kvmhv_interrupt_vcore(vc, ee);
284 }
261} 285}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index b027a89737b6..c1df9bb1e413 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -12,6 +12,7 @@
12#include <linux/kvm_host.h> 12#include <linux/kvm_host.h>
13#include <linux/hugetlb.h> 13#include <linux/hugetlb.h>
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/log2.h>
15 16
16#include <asm/tlbflush.h> 17#include <asm/tlbflush.h>
17#include <asm/kvm_ppc.h> 18#include <asm/kvm_ppc.h>
@@ -97,25 +98,52 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
97} 98}
98EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain); 99EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
99 100
101/* Update the changed page order field of an rmap entry */
102void kvmppc_update_rmap_change(unsigned long *rmap, unsigned long psize)
103{
104 unsigned long order;
105
106 if (!psize)
107 return;
108 order = ilog2(psize);
109 order <<= KVMPPC_RMAP_CHG_SHIFT;
110 if (order > (*rmap & KVMPPC_RMAP_CHG_ORDER))
111 *rmap = (*rmap & ~KVMPPC_RMAP_CHG_ORDER) | order;
112}
113EXPORT_SYMBOL_GPL(kvmppc_update_rmap_change);
114
115/* Returns a pointer to the revmap entry for the page mapped by a HPTE */
116static unsigned long *revmap_for_hpte(struct kvm *kvm, unsigned long hpte_v,
117 unsigned long hpte_gr)
118{
119 struct kvm_memory_slot *memslot;
120 unsigned long *rmap;
121 unsigned long gfn;
122
123 gfn = hpte_rpn(hpte_gr, hpte_page_size(hpte_v, hpte_gr));
124 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
125 if (!memslot)
126 return NULL;
127
128 rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
129 return rmap;
130}
131
100/* Remove this HPTE from the chain for a real page */ 132/* Remove this HPTE from the chain for a real page */
101static void remove_revmap_chain(struct kvm *kvm, long pte_index, 133static void remove_revmap_chain(struct kvm *kvm, long pte_index,
102 struct revmap_entry *rev, 134 struct revmap_entry *rev,
103 unsigned long hpte_v, unsigned long hpte_r) 135 unsigned long hpte_v, unsigned long hpte_r)
104{ 136{
105 struct revmap_entry *next, *prev; 137 struct revmap_entry *next, *prev;
106 unsigned long gfn, ptel, head; 138 unsigned long ptel, head;
107 struct kvm_memory_slot *memslot;
108 unsigned long *rmap; 139 unsigned long *rmap;
109 unsigned long rcbits; 140 unsigned long rcbits;
110 141
111 rcbits = hpte_r & (HPTE_R_R | HPTE_R_C); 142 rcbits = hpte_r & (HPTE_R_R | HPTE_R_C);
112 ptel = rev->guest_rpte |= rcbits; 143 ptel = rev->guest_rpte |= rcbits;
113 gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel)); 144 rmap = revmap_for_hpte(kvm, hpte_v, ptel);
114 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); 145 if (!rmap)
115 if (!memslot)
116 return; 146 return;
117
118 rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
119 lock_rmap(rmap); 147 lock_rmap(rmap);
120 148
121 head = *rmap & KVMPPC_RMAP_INDEX; 149 head = *rmap & KVMPPC_RMAP_INDEX;
@@ -131,6 +159,8 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
131 *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head; 159 *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head;
132 } 160 }
133 *rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT; 161 *rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT;
162 if (rcbits & HPTE_R_C)
163 kvmppc_update_rmap_change(rmap, hpte_page_size(hpte_v, hpte_r));
134 unlock_rmap(rmap); 164 unlock_rmap(rmap);
135} 165}
136 166
@@ -421,14 +451,20 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
421 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); 451 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
422 v = pte & ~HPTE_V_HVLOCK; 452 v = pte & ~HPTE_V_HVLOCK;
423 if (v & HPTE_V_VALID) { 453 if (v & HPTE_V_VALID) {
424 u64 pte1;
425
426 pte1 = be64_to_cpu(hpte[1]);
427 hpte[0] &= ~cpu_to_be64(HPTE_V_VALID); 454 hpte[0] &= ~cpu_to_be64(HPTE_V_VALID);
428 rb = compute_tlbie_rb(v, pte1, pte_index); 455 rb = compute_tlbie_rb(v, be64_to_cpu(hpte[1]), pte_index);
429 do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); 456 do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
430 /* Read PTE low word after tlbie to get final R/C values */ 457 /*
431 remove_revmap_chain(kvm, pte_index, rev, v, pte1); 458 * The reference (R) and change (C) bits in a HPT
459 * entry can be set by hardware at any time up until
460 * the HPTE is invalidated and the TLB invalidation
461 * sequence has completed. This means that when
462 * removing a HPTE, we need to re-read the HPTE after
463 * the invalidation sequence has completed in order to
464 * obtain reliable values of R and C.
465 */
466 remove_revmap_chain(kvm, pte_index, rev, v,
467 be64_to_cpu(hpte[1]));
432 } 468 }
433 r = rev->guest_rpte & ~HPTE_GR_RESERVED; 469 r = rev->guest_rpte & ~HPTE_GR_RESERVED;
434 note_hpte_modification(kvm, rev); 470 note_hpte_modification(kvm, rev);
@@ -655,6 +691,105 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
655 return H_SUCCESS; 691 return H_SUCCESS;
656} 692}
657 693
694long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
695 unsigned long pte_index)
696{
697 struct kvm *kvm = vcpu->kvm;
698 __be64 *hpte;
699 unsigned long v, r, gr;
700 struct revmap_entry *rev;
701 unsigned long *rmap;
702 long ret = H_NOT_FOUND;
703
704 if (pte_index >= kvm->arch.hpt_npte)
705 return H_PARAMETER;
706
707 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
708 hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
709 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
710 cpu_relax();
711 v = be64_to_cpu(hpte[0]);
712 r = be64_to_cpu(hpte[1]);
713 if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
714 goto out;
715
716 gr = rev->guest_rpte;
717 if (rev->guest_rpte & HPTE_R_R) {
718 rev->guest_rpte &= ~HPTE_R_R;
719 note_hpte_modification(kvm, rev);
720 }
721 if (v & HPTE_V_VALID) {
722 gr |= r & (HPTE_R_R | HPTE_R_C);
723 if (r & HPTE_R_R) {
724 kvmppc_clear_ref_hpte(kvm, hpte, pte_index);
725 rmap = revmap_for_hpte(kvm, v, gr);
726 if (rmap) {
727 lock_rmap(rmap);
728 *rmap |= KVMPPC_RMAP_REFERENCED;
729 unlock_rmap(rmap);
730 }
731 }
732 }
733 vcpu->arch.gpr[4] = gr;
734 ret = H_SUCCESS;
735 out:
736 unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
737 return ret;
738}
739
740long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
741 unsigned long pte_index)
742{
743 struct kvm *kvm = vcpu->kvm;
744 __be64 *hpte;
745 unsigned long v, r, gr;
746 struct revmap_entry *rev;
747 unsigned long *rmap;
748 long ret = H_NOT_FOUND;
749
750 if (pte_index >= kvm->arch.hpt_npte)
751 return H_PARAMETER;
752
753 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
754 hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
755 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
756 cpu_relax();
757 v = be64_to_cpu(hpte[0]);
758 r = be64_to_cpu(hpte[1]);
759 if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
760 goto out;
761
762 gr = rev->guest_rpte;
763 if (gr & HPTE_R_C) {
764 rev->guest_rpte &= ~HPTE_R_C;
765 note_hpte_modification(kvm, rev);
766 }
767 if (v & HPTE_V_VALID) {
768 /* need to make it temporarily absent so C is stable */
769 hpte[0] |= cpu_to_be64(HPTE_V_ABSENT);
770 kvmppc_invalidate_hpte(kvm, hpte, pte_index);
771 r = be64_to_cpu(hpte[1]);
772 gr |= r & (HPTE_R_R | HPTE_R_C);
773 if (r & HPTE_R_C) {
774 unsigned long psize = hpte_page_size(v, r);
775 hpte[1] = cpu_to_be64(r & ~HPTE_R_C);
776 eieio();
777 rmap = revmap_for_hpte(kvm, v, gr);
778 if (rmap) {
779 lock_rmap(rmap);
780 *rmap |= KVMPPC_RMAP_CHANGED;
781 kvmppc_update_rmap_change(rmap, psize);
782 unlock_rmap(rmap);
783 }
784 }
785 }
786 vcpu->arch.gpr[4] = gr;
787 ret = H_SUCCESS;
788 out:
789 unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
790 return ret;
791}
792
658void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep, 793void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
659 unsigned long pte_index) 794 unsigned long pte_index)
660{ 795{
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 00e45b6d4f24..24f58076d49e 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -67,14 +67,12 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
67 } 67 }
68 68
69 /* Check if the core is loaded, if not, too hard */ 69 /* Check if the core is loaded, if not, too hard */
70 cpu = vcpu->cpu; 70 cpu = vcpu->arch.thread_cpu;
71 if (cpu < 0 || cpu >= nr_cpu_ids) { 71 if (cpu < 0 || cpu >= nr_cpu_ids) {
72 this_icp->rm_action |= XICS_RM_KICK_VCPU; 72 this_icp->rm_action |= XICS_RM_KICK_VCPU;
73 this_icp->rm_kick_target = vcpu; 73 this_icp->rm_kick_target = vcpu;
74 return; 74 return;
75 } 75 }
76 /* In SMT cpu will always point to thread 0, we adjust it */
77 cpu += vcpu->arch.ptid;
78 76
79 smp_mb(); 77 smp_mb();
80 kvmhv_rm_send_ipi(cpu); 78 kvmhv_rm_send_ipi(cpu);
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index faa86e9c0551..2273dcacef39 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -128,6 +128,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
128 subf r4, r4, r3 128 subf r4, r4, r3
129 mtspr SPRN_DEC, r4 129 mtspr SPRN_DEC, r4
130 130
131 /* hwthread_req may have got set by cede or no vcpu, so clear it */
132 li r0, 0
133 stb r0, HSTATE_HWTHREAD_REQ(r13)
134
131 /* 135 /*
132 * For external and machine check interrupts, we need 136 * For external and machine check interrupts, we need
133 * to call the Linux handler to process the interrupt. 137 * to call the Linux handler to process the interrupt.
@@ -215,7 +219,6 @@ kvm_novcpu_wakeup:
215 ld r5, HSTATE_KVM_VCORE(r13) 219 ld r5, HSTATE_KVM_VCORE(r13)
216 li r0, 0 220 li r0, 0
217 stb r0, HSTATE_NAPPING(r13) 221 stb r0, HSTATE_NAPPING(r13)
218 stb r0, HSTATE_HWTHREAD_REQ(r13)
219 222
220 /* check the wake reason */ 223 /* check the wake reason */
221 bl kvmppc_check_wake_reason 224 bl kvmppc_check_wake_reason
@@ -315,10 +318,10 @@ kvm_start_guest:
315 cmpdi r3, 0 318 cmpdi r3, 0
316 bge kvm_no_guest 319 bge kvm_no_guest
317 320
318 /* get vcpu pointer, NULL if we have no vcpu to run */ 321 /* get vcore pointer, NULL if we have nothing to run */
319 ld r4,HSTATE_KVM_VCPU(r13) 322 ld r5,HSTATE_KVM_VCORE(r13)
320 cmpdi r4,0 323 cmpdi r5,0
321 /* if we have no vcpu to run, go back to sleep */ 324 /* if we have no vcore to run, go back to sleep */
322 beq kvm_no_guest 325 beq kvm_no_guest
323 326
324kvm_secondary_got_guest: 327kvm_secondary_got_guest:
@@ -327,21 +330,42 @@ kvm_secondary_got_guest:
327 ld r6, PACA_DSCR_DEFAULT(r13) 330 ld r6, PACA_DSCR_DEFAULT(r13)
328 std r6, HSTATE_DSCR(r13) 331 std r6, HSTATE_DSCR(r13)
329 332
330 /* Order load of vcore, ptid etc. after load of vcpu */ 333 /* On thread 0 of a subcore, set HDEC to max */
334 lbz r4, HSTATE_PTID(r13)
335 cmpwi r4, 0
336 bne 63f
337 lis r6, 0x7fff
338 ori r6, r6, 0xffff
339 mtspr SPRN_HDEC, r6
340 /* and set per-LPAR registers, if doing dynamic micro-threading */
341 ld r6, HSTATE_SPLIT_MODE(r13)
342 cmpdi r6, 0
343 beq 63f
344 ld r0, KVM_SPLIT_RPR(r6)
345 mtspr SPRN_RPR, r0
346 ld r0, KVM_SPLIT_PMMAR(r6)
347 mtspr SPRN_PMMAR, r0
348 ld r0, KVM_SPLIT_LDBAR(r6)
349 mtspr SPRN_LDBAR, r0
350 isync
35163:
352 /* Order load of vcpu after load of vcore */
331 lwsync 353 lwsync
354 ld r4, HSTATE_KVM_VCPU(r13)
332 bl kvmppc_hv_entry 355 bl kvmppc_hv_entry
333 356
334 /* Back from the guest, go back to nap */ 357 /* Back from the guest, go back to nap */
335 /* Clear our vcpu pointer so we don't come back in early */ 358 /* Clear our vcpu and vcore pointers so we don't come back in early */
336 li r0, 0 359 li r0, 0
360 std r0, HSTATE_KVM_VCPU(r13)
337 /* 361 /*
338 * Once we clear HSTATE_KVM_VCPU(r13), the code in 362 * Once we clear HSTATE_KVM_VCORE(r13), the code in
339 * kvmppc_run_core() is going to assume that all our vcpu 363 * kvmppc_run_core() is going to assume that all our vcpu
340 * state is visible in memory. This lwsync makes sure 364 * state is visible in memory. This lwsync makes sure
341 * that that is true. 365 * that that is true.
342 */ 366 */
343 lwsync 367 lwsync
344 std r0, HSTATE_KVM_VCPU(r13) 368 std r0, HSTATE_KVM_VCORE(r13)
345 369
346/* 370/*
347 * At this point we have finished executing in the guest. 371 * At this point we have finished executing in the guest.
@@ -374,16 +398,71 @@ kvm_no_guest:
374 b power7_wakeup_loss 398 b power7_wakeup_loss
375 399
37653: HMT_LOW 40053: HMT_LOW
377 ld r4, HSTATE_KVM_VCPU(r13) 401 ld r5, HSTATE_KVM_VCORE(r13)
378 cmpdi r4, 0 402 cmpdi r5, 0
403 bne 60f
404 ld r3, HSTATE_SPLIT_MODE(r13)
405 cmpdi r3, 0
406 beq kvm_no_guest
407 lbz r0, KVM_SPLIT_DO_NAP(r3)
408 cmpwi r0, 0
379 beq kvm_no_guest 409 beq kvm_no_guest
380 HMT_MEDIUM 410 HMT_MEDIUM
411 b kvm_unsplit_nap
41260: HMT_MEDIUM
381 b kvm_secondary_got_guest 413 b kvm_secondary_got_guest
382 414
38354: li r0, KVM_HWTHREAD_IN_KVM 41554: li r0, KVM_HWTHREAD_IN_KVM
384 stb r0, HSTATE_HWTHREAD_STATE(r13) 416 stb r0, HSTATE_HWTHREAD_STATE(r13)
385 b kvm_no_guest 417 b kvm_no_guest
386 418
419/*
420 * Here the primary thread is trying to return the core to
421 * whole-core mode, so we need to nap.
422 */
423kvm_unsplit_nap:
424 /*
425 * Ensure that secondary doesn't nap when it has
426 * its vcore pointer set.
427 */
428 sync /* matches smp_mb() before setting split_info.do_nap */
429 ld r0, HSTATE_KVM_VCORE(r13)
430 cmpdi r0, 0
431 bne kvm_no_guest
432 /* clear any pending message */
433BEGIN_FTR_SECTION
434 lis r6, (PPC_DBELL_SERVER << (63-36))@h
435 PPC_MSGCLR(6)
436END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
437 /* Set kvm_split_mode.napped[tid] = 1 */
438 ld r3, HSTATE_SPLIT_MODE(r13)
439 li r0, 1
440 lhz r4, PACAPACAINDEX(r13)
441 clrldi r4, r4, 61 /* micro-threading => P8 => 8 threads/core */
442 addi r4, r4, KVM_SPLIT_NAPPED
443 stbx r0, r3, r4
444 /* Check the do_nap flag again after setting napped[] */
445 sync
446 lbz r0, KVM_SPLIT_DO_NAP(r3)
447 cmpwi r0, 0
448 beq 57f
449 li r3, (LPCR_PECEDH | LPCR_PECE0) >> 4
450 mfspr r4, SPRN_LPCR
451 rlwimi r4, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1)
452 mtspr SPRN_LPCR, r4
453 isync
454 std r0, HSTATE_SCRATCH0(r13)
455 ptesync
456 ld r0, HSTATE_SCRATCH0(r13)
4571: cmpd r0, r0
458 bne 1b
459 nap
460 b .
461
46257: li r0, 0
463 stbx r0, r3, r4
464 b kvm_no_guest
465
387/****************************************************************************** 466/******************************************************************************
388 * * 467 * *
389 * Entry code * 468 * Entry code *
@@ -854,7 +933,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
854 cmpwi r0, 0 933 cmpwi r0, 0
855 bne 21f 934 bne 21f
856 HMT_LOW 935 HMT_LOW
85720: lbz r0, VCORE_IN_GUEST(r5) 93620: lwz r3, VCORE_ENTRY_EXIT(r5)
937 cmpwi r3, 0x100
938 bge no_switch_exit
939 lbz r0, VCORE_IN_GUEST(r5)
858 cmpwi r0, 0 940 cmpwi r0, 0
859 beq 20b 941 beq 20b
860 HMT_MEDIUM 942 HMT_MEDIUM
@@ -870,7 +952,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
870 blt hdec_soon 952 blt hdec_soon
871 953
872 ld r6, VCPU_CTR(r4) 954 ld r6, VCPU_CTR(r4)
873 lwz r7, VCPU_XER(r4) 955 ld r7, VCPU_XER(r4)
874 956
875 mtctr r6 957 mtctr r6
876 mtxer r7 958 mtxer r7
@@ -985,9 +1067,13 @@ secondary_too_late:
985#endif 1067#endif
98611: b kvmhv_switch_to_host 106811: b kvmhv_switch_to_host
987 1069
1070no_switch_exit:
1071 HMT_MEDIUM
1072 li r12, 0
1073 b 12f
988hdec_soon: 1074hdec_soon:
989 li r12, BOOK3S_INTERRUPT_HV_DECREMENTER 1075 li r12, BOOK3S_INTERRUPT_HV_DECREMENTER
990 stw r12, VCPU_TRAP(r4) 107612: stw r12, VCPU_TRAP(r4)
991 mr r9, r4 1077 mr r9, r4
992#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING 1078#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
993 addi r3, r4, VCPU_TB_RMEXIT 1079 addi r3, r4, VCPU_TB_RMEXIT
@@ -1103,7 +1189,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
1103 mfctr r3 1189 mfctr r3
1104 mfxer r4 1190 mfxer r4
1105 std r3, VCPU_CTR(r9) 1191 std r3, VCPU_CTR(r9)
1106 stw r4, VCPU_XER(r9) 1192 std r4, VCPU_XER(r9)
1107 1193
1108 /* If this is a page table miss then see if it's theirs or ours */ 1194 /* If this is a page table miss then see if it's theirs or ours */
1109 cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE 1195 cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
@@ -1127,6 +1213,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
1127 cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL 1213 cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL
1128 bne 3f 1214 bne 3f
1129 lbz r0, HSTATE_HOST_IPI(r13) 1215 lbz r0, HSTATE_HOST_IPI(r13)
1216 cmpwi r0, 0
1130 beq 4f 1217 beq 4f
1131 b guest_exit_cont 1218 b guest_exit_cont
11323: 12193:
@@ -1176,6 +1263,11 @@ mc_cont:
1176 ld r9, HSTATE_KVM_VCPU(r13) 1263 ld r9, HSTATE_KVM_VCPU(r13)
1177 lwz r12, VCPU_TRAP(r9) 1264 lwz r12, VCPU_TRAP(r9)
1178 1265
1266 /* Stop others sending VCPU interrupts to this physical CPU */
1267 li r0, -1
1268 stw r0, VCPU_CPU(r9)
1269 stw r0, VCPU_THREAD_CPU(r9)
1270
1179 /* Save guest CTRL register, set runlatch to 1 */ 1271 /* Save guest CTRL register, set runlatch to 1 */
1180 mfspr r6,SPRN_CTRLF 1272 mfspr r6,SPRN_CTRLF
1181 stw r6,VCPU_CTRL(r9) 1273 stw r6,VCPU_CTRL(r9)
@@ -1540,12 +1632,17 @@ kvmhv_switch_to_host:
1540 1632
1541 /* Primary thread waits for all the secondaries to exit guest */ 1633 /* Primary thread waits for all the secondaries to exit guest */
154215: lwz r3,VCORE_ENTRY_EXIT(r5) 163415: lwz r3,VCORE_ENTRY_EXIT(r5)
1543 srwi r0,r3,8 1635 rlwinm r0,r3,32-8,0xff
1544 clrldi r3,r3,56 1636 clrldi r3,r3,56
1545 cmpw r3,r0 1637 cmpw r3,r0
1546 bne 15b 1638 bne 15b
1547 isync 1639 isync
1548 1640
1641 /* Did we actually switch to the guest at all? */
1642 lbz r6, VCORE_IN_GUEST(r5)
1643 cmpwi r6, 0
1644 beq 19f
1645
1549 /* Primary thread switches back to host partition */ 1646 /* Primary thread switches back to host partition */
1550 ld r6,KVM_HOST_SDR1(r4) 1647 ld r6,KVM_HOST_SDR1(r4)
1551 lwz r7,KVM_HOST_LPID(r4) 1648 lwz r7,KVM_HOST_LPID(r4)
@@ -1589,7 +1686,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
158918: 168618:
1590 /* Signal secondary CPUs to continue */ 1687 /* Signal secondary CPUs to continue */
1591 stb r0,VCORE_IN_GUEST(r5) 1688 stb r0,VCORE_IN_GUEST(r5)
1592 lis r8,0x7fff /* MAX_INT@h */ 168919: lis r8,0x7fff /* MAX_INT@h */
1593 mtspr SPRN_HDEC,r8 1690 mtspr SPRN_HDEC,r8
1594 1691
159516: ld r8,KVM_HOST_LPCR(r4) 169216: ld r8,KVM_HOST_LPCR(r4)
@@ -1675,7 +1772,7 @@ kvmppc_hdsi:
1675 bl kvmppc_msr_interrupt 1772 bl kvmppc_msr_interrupt
1676fast_interrupt_c_return: 1773fast_interrupt_c_return:
16776: ld r7, VCPU_CTR(r9) 17746: ld r7, VCPU_CTR(r9)
1678 lwz r8, VCPU_XER(r9) 1775 ld r8, VCPU_XER(r9)
1679 mtctr r7 1776 mtctr r7
1680 mtxer r8 1777 mtxer r8
1681 mr r4, r9 1778 mr r4, r9
@@ -1816,8 +1913,8 @@ hcall_real_table:
1816 .long DOTSYM(kvmppc_h_remove) - hcall_real_table 1913 .long DOTSYM(kvmppc_h_remove) - hcall_real_table
1817 .long DOTSYM(kvmppc_h_enter) - hcall_real_table 1914 .long DOTSYM(kvmppc_h_enter) - hcall_real_table
1818 .long DOTSYM(kvmppc_h_read) - hcall_real_table 1915 .long DOTSYM(kvmppc_h_read) - hcall_real_table
1819 .long 0 /* 0x10 - H_CLEAR_MOD */ 1916 .long DOTSYM(kvmppc_h_clear_mod) - hcall_real_table
1820 .long 0 /* 0x14 - H_CLEAR_REF */ 1917 .long DOTSYM(kvmppc_h_clear_ref) - hcall_real_table
1821 .long DOTSYM(kvmppc_h_protect) - hcall_real_table 1918 .long DOTSYM(kvmppc_h_protect) - hcall_real_table
1822 .long DOTSYM(kvmppc_h_get_tce) - hcall_real_table 1919 .long DOTSYM(kvmppc_h_get_tce) - hcall_real_table
1823 .long DOTSYM(kvmppc_h_put_tce) - hcall_real_table 1920 .long DOTSYM(kvmppc_h_put_tce) - hcall_real_table
diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c
index bd6ab1672ae6..a759d9adb0b6 100644
--- a/arch/powerpc/kvm/book3s_paired_singles.c
+++ b/arch/powerpc/kvm/book3s_paired_singles.c
@@ -352,7 +352,7 @@ static inline u32 inst_get_field(u32 inst, int msb, int lsb)
352 return kvmppc_get_field(inst, msb + 32, lsb + 32); 352 return kvmppc_get_field(inst, msb + 32, lsb + 32);
353} 353}
354 354
355bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst) 355static bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst)
356{ 356{
357 if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)) 357 if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE))
358 return false; 358 return false;
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index acee37cde840..ca8f174289bb 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -123,7 +123,7 @@ no_dcbz32_on:
123 PPC_LL r8, SVCPU_CTR(r3) 123 PPC_LL r8, SVCPU_CTR(r3)
124 PPC_LL r9, SVCPU_LR(r3) 124 PPC_LL r9, SVCPU_LR(r3)
125 lwz r10, SVCPU_CR(r3) 125 lwz r10, SVCPU_CR(r3)
126 lwz r11, SVCPU_XER(r3) 126 PPC_LL r11, SVCPU_XER(r3)
127 127
128 mtctr r8 128 mtctr r8
129 mtlr r9 129 mtlr r9
@@ -237,7 +237,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
237 mfctr r8 237 mfctr r8
238 mflr r9 238 mflr r9
239 239
240 stw r5, SVCPU_XER(r13) 240 PPC_STL r5, SVCPU_XER(r13)
241 PPC_STL r6, SVCPU_FAULT_DAR(r13) 241 PPC_STL r6, SVCPU_FAULT_DAR(r13)
242 stw r7, SVCPU_FAULT_DSISR(r13) 242 stw r7, SVCPU_FAULT_DSISR(r13)
243 PPC_STL r8, SVCPU_CTR(r13) 243 PPC_STL r8, SVCPU_CTR(r13)
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index c6ca7db64673..905e94a1370f 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -41,7 +41,7 @@
41 * ======= 41 * =======
42 * 42 *
43 * Each ICS has a spin lock protecting the information about the IRQ 43 * Each ICS has a spin lock protecting the information about the IRQ
44 * sources and avoiding simultaneous deliveries if the same interrupt. 44 * sources and avoiding simultaneous deliveries of the same interrupt.
45 * 45 *
46 * ICP operations are done via a single compare & swap transaction 46 * ICP operations are done via a single compare & swap transaction
47 * (most ICP state fits in the union kvmppc_icp_state) 47 * (most ICP state fits in the union kvmppc_icp_state)
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index cc5842657161..ae458f0fd061 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -933,6 +933,7 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu,
933#endif 933#endif
934 break; 934 break;
935 case BOOKE_INTERRUPT_CRITICAL: 935 case BOOKE_INTERRUPT_CRITICAL:
936 kvmppc_fill_pt_regs(&regs);
936 unknown_exception(&regs); 937 unknown_exception(&regs);
937 break; 938 break;
938 case BOOKE_INTERRUPT_DEBUG: 939 case BOOKE_INTERRUPT_DEBUG:
diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
index 50860e919cb8..29911a07bcdb 100644
--- a/arch/powerpc/kvm/e500_mmu.c
+++ b/arch/powerpc/kvm/e500_mmu.c
@@ -377,7 +377,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, gva_t ea)
377 | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); 377 | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
378 vcpu->arch.shared->mas1 = 378 vcpu->arch.shared->mas1 =
379 (vcpu->arch.shared->mas6 & MAS6_SPID0) 379 (vcpu->arch.shared->mas6 & MAS6_SPID0)
380 | (vcpu->arch.shared->mas6 & (MAS6_SAS ? MAS1_TS : 0)) 380 | ((vcpu->arch.shared->mas6 & MAS6_SAS) ? MAS1_TS : 0)
381 | (vcpu->arch.shared->mas4 & MAS4_TSIZED(~0)); 381 | (vcpu->arch.shared->mas4 & MAS4_TSIZED(~0));
382 vcpu->arch.shared->mas2 &= MAS2_EPN; 382 vcpu->arch.shared->mas2 &= MAS2_EPN;
383 vcpu->arch.shared->mas2 |= vcpu->arch.shared->mas4 & 383 vcpu->arch.shared->mas2 |= vcpu->arch.shared->mas4 &
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index e5dde32fe71f..2e51289610e4 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -660,7 +660,7 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
660 return kvmppc_core_pending_dec(vcpu); 660 return kvmppc_core_pending_dec(vcpu);
661} 661}
662 662
663enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer) 663static enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer)
664{ 664{
665 struct kvm_vcpu *vcpu; 665 struct kvm_vcpu *vcpu;
666 666
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index e7a4fde5d631..b372a7557c16 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -650,6 +650,7 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
650 u16 sel; 650 u16 sel;
651 651
652 la = seg_base(ctxt, addr.seg) + addr.ea; 652 la = seg_base(ctxt, addr.seg) + addr.ea;
653 *linear = la;
653 *max_size = 0; 654 *max_size = 0;
654 switch (mode) { 655 switch (mode) {
655 case X86EMUL_MODE_PROT64: 656 case X86EMUL_MODE_PROT64:
@@ -693,7 +694,6 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
693 } 694 }
694 if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0)) 695 if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0))
695 return emulate_gp(ctxt, 0); 696 return emulate_gp(ctxt, 0);
696 *linear = la;
697 return X86EMUL_CONTINUE; 697 return X86EMUL_CONTINUE;
698bad: 698bad:
699 if (addr.seg == VCPU_SREG_SS) 699 if (addr.seg == VCPU_SREG_SS)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index fb16a8ea3dee..69088a1ba509 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3309,13 +3309,14 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
3309 3309
3310 walk_shadow_page_lockless_begin(vcpu); 3310 walk_shadow_page_lockless_begin(vcpu);
3311 3311
3312 for (shadow_walk_init(&iterator, vcpu, addr), root = iterator.level; 3312 for (shadow_walk_init(&iterator, vcpu, addr),
3313 leaf = root = iterator.level;
3313 shadow_walk_okay(&iterator); 3314 shadow_walk_okay(&iterator);
3314 __shadow_walk_next(&iterator, spte)) { 3315 __shadow_walk_next(&iterator, spte)) {
3315 leaf = iterator.level;
3316 spte = mmu_spte_get_lockless(iterator.sptep); 3316 spte = mmu_spte_get_lockless(iterator.sptep);
3317 3317
3318 sptes[leaf - 1] = spte; 3318 sptes[leaf - 1] = spte;
3319 leaf--;
3319 3320
3320 if (!is_shadow_present_pte(spte)) 3321 if (!is_shadow_present_pte(spte))
3321 break; 3322 break;
@@ -3329,7 +3330,7 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
3329 if (reserved) { 3330 if (reserved) {
3330 pr_err("%s: detect reserved bits on spte, addr 0x%llx, dump hierarchy:\n", 3331 pr_err("%s: detect reserved bits on spte, addr 0x%llx, dump hierarchy:\n",
3331 __func__, addr); 3332 __func__, addr);
3332 while (root >= leaf) { 3333 while (root > leaf) {
3333 pr_err("------ spte 0x%llx level %d.\n", 3334 pr_err("------ spte 0x%llx level %d.\n",
3334 sptes[root - 1], root); 3335 sptes[root - 1], root);
3335 root--; 3336 root--;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1e7e76e14e89..a60bdbccff51 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5943,6 +5943,7 @@ static void process_smi_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
5943 put_smstate(u32, buf, offset, process_smi_get_segment_flags(&seg)); 5943 put_smstate(u32, buf, offset, process_smi_get_segment_flags(&seg));
5944} 5944}
5945 5945
5946#ifdef CONFIG_X86_64
5946static void process_smi_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n) 5947static void process_smi_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
5947{ 5948{
5948 struct kvm_segment seg; 5949 struct kvm_segment seg;
@@ -5958,6 +5959,7 @@ static void process_smi_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
5958 put_smstate(u32, buf, offset + 4, seg.limit); 5959 put_smstate(u32, buf, offset + 4, seg.limit);
5959 put_smstate(u64, buf, offset + 8, seg.base); 5960 put_smstate(u64, buf, offset + 8, seg.base);
5960} 5961}
5962#endif
5961 5963
5962static void process_smi_save_state_32(struct kvm_vcpu *vcpu, char *buf) 5964static void process_smi_save_state_32(struct kvm_vcpu *vcpu, char *buf)
5963{ 5965{
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index e5966758c093..e1e4d7c38dda 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -52,13 +52,16 @@ struct arch_timer_cpu {
52 52
53 /* Timer IRQ */ 53 /* Timer IRQ */
54 const struct kvm_irq_level *irq; 54 const struct kvm_irq_level *irq;
55
56 /* VGIC mapping */
57 struct irq_phys_map *map;
55}; 58};
56 59
57int kvm_timer_hyp_init(void); 60int kvm_timer_hyp_init(void);
58void kvm_timer_enable(struct kvm *kvm); 61void kvm_timer_enable(struct kvm *kvm);
59void kvm_timer_init(struct kvm *kvm); 62void kvm_timer_init(struct kvm *kvm);
60void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, 63int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
61 const struct kvm_irq_level *irq); 64 const struct kvm_irq_level *irq);
62void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); 65void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
63void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu); 66void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu);
64void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu); 67void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu);
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 133ea00aa83b..d901f1a47be6 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -95,11 +95,15 @@ enum vgic_type {
95#define LR_STATE_ACTIVE (1 << 1) 95#define LR_STATE_ACTIVE (1 << 1)
96#define LR_STATE_MASK (3 << 0) 96#define LR_STATE_MASK (3 << 0)
97#define LR_EOI_INT (1 << 2) 97#define LR_EOI_INT (1 << 2)
98#define LR_HW (1 << 3)
98 99
99struct vgic_lr { 100struct vgic_lr {
100 u16 irq; 101 unsigned irq:10;
101 u8 source; 102 union {
102 u8 state; 103 unsigned hwirq:10;
104 unsigned source:3;
105 };
106 unsigned state:4;
103}; 107};
104 108
105struct vgic_vmcr { 109struct vgic_vmcr {
@@ -155,6 +159,19 @@ struct vgic_io_device {
155 struct kvm_io_device dev; 159 struct kvm_io_device dev;
156}; 160};
157 161
162struct irq_phys_map {
163 u32 virt_irq;
164 u32 phys_irq;
165 u32 irq;
166 bool active;
167};
168
169struct irq_phys_map_entry {
170 struct list_head entry;
171 struct rcu_head rcu;
172 struct irq_phys_map map;
173};
174
158struct vgic_dist { 175struct vgic_dist {
159 spinlock_t lock; 176 spinlock_t lock;
160 bool in_kernel; 177 bool in_kernel;
@@ -252,6 +269,10 @@ struct vgic_dist {
252 struct vgic_vm_ops vm_ops; 269 struct vgic_vm_ops vm_ops;
253 struct vgic_io_device dist_iodev; 270 struct vgic_io_device dist_iodev;
254 struct vgic_io_device *redist_iodevs; 271 struct vgic_io_device *redist_iodevs;
272
273 /* Virtual irq to hwirq mapping */
274 spinlock_t irq_phys_map_lock;
275 struct list_head irq_phys_map_list;
255}; 276};
256 277
257struct vgic_v2_cpu_if { 278struct vgic_v2_cpu_if {
@@ -303,6 +324,9 @@ struct vgic_cpu {
303 struct vgic_v2_cpu_if vgic_v2; 324 struct vgic_v2_cpu_if vgic_v2;
304 struct vgic_v3_cpu_if vgic_v3; 325 struct vgic_v3_cpu_if vgic_v3;
305 }; 326 };
327
328 /* Protected by the distributor's irq_phys_map_lock */
329 struct list_head irq_phys_map_list;
306}; 330};
307 331
308#define LR_EMPTY 0xff 332#define LR_EMPTY 0xff
@@ -317,16 +341,25 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
317int kvm_vgic_hyp_init(void); 341int kvm_vgic_hyp_init(void);
318int kvm_vgic_map_resources(struct kvm *kvm); 342int kvm_vgic_map_resources(struct kvm *kvm);
319int kvm_vgic_get_max_vcpus(void); 343int kvm_vgic_get_max_vcpus(void);
344void kvm_vgic_early_init(struct kvm *kvm);
320int kvm_vgic_create(struct kvm *kvm, u32 type); 345int kvm_vgic_create(struct kvm *kvm, u32 type);
321void kvm_vgic_destroy(struct kvm *kvm); 346void kvm_vgic_destroy(struct kvm *kvm);
347void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu);
322void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu); 348void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
323void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); 349void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
324void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); 350void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
325int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, 351int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
326 bool level); 352 bool level);
353int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid,
354 struct irq_phys_map *map, bool level);
327void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg); 355void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
328int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); 356int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
329int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu); 357int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
358struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
359 int virt_irq, int irq);
360int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map);
361bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map);
362void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active);
330 363
331#define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) 364#define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel))
332#define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus)) 365#define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus))
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 71e4faf33091..9eeeb9589acf 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -270,9 +270,12 @@
270 270
271#define ICH_LR_EOI (1UL << 41) 271#define ICH_LR_EOI (1UL << 41)
272#define ICH_LR_GROUP (1UL << 60) 272#define ICH_LR_GROUP (1UL << 60)
273#define ICH_LR_HW (1UL << 61)
273#define ICH_LR_STATE (3UL << 62) 274#define ICH_LR_STATE (3UL << 62)
274#define ICH_LR_PENDING_BIT (1UL << 62) 275#define ICH_LR_PENDING_BIT (1UL << 62)
275#define ICH_LR_ACTIVE_BIT (1UL << 63) 276#define ICH_LR_ACTIVE_BIT (1UL << 63)
277#define ICH_LR_PHYS_ID_SHIFT 32
278#define ICH_LR_PHYS_ID_MASK (0x3ffUL << ICH_LR_PHYS_ID_SHIFT)
276 279
277#define ICH_MISR_EOI (1 << 0) 280#define ICH_MISR_EOI (1 << 0)
278#define ICH_MISR_U (1 << 1) 281#define ICH_MISR_U (1 << 1)
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index af3d29f70781..b8901dfd9e95 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -75,11 +75,12 @@
75 75
76#define GICH_LR_VIRTUALID (0x3ff << 0) 76#define GICH_LR_VIRTUALID (0x3ff << 0)
77#define GICH_LR_PHYSID_CPUID_SHIFT (10) 77#define GICH_LR_PHYSID_CPUID_SHIFT (10)
78#define GICH_LR_PHYSID_CPUID (7 << GICH_LR_PHYSID_CPUID_SHIFT) 78#define GICH_LR_PHYSID_CPUID (0x3ff << GICH_LR_PHYSID_CPUID_SHIFT)
79#define GICH_LR_STATE (3 << 28) 79#define GICH_LR_STATE (3 << 28)
80#define GICH_LR_PENDING_BIT (1 << 28) 80#define GICH_LR_PENDING_BIT (1 << 28)
81#define GICH_LR_ACTIVE_BIT (1 << 29) 81#define GICH_LR_ACTIVE_BIT (1 << 29)
82#define GICH_LR_EOI (1 << 19) 82#define GICH_LR_EOI (1 << 19)
83#define GICH_LR_HW (1 << 31)
83 84
84#define GICH_VMCR_CTRL_SHIFT 0 85#define GICH_VMCR_CTRL_SHIFT 0
85#define GICH_VMCR_CTRL_MASK (0x21f << GICH_VMCR_CTRL_SHIFT) 86#define GICH_VMCR_CTRL_MASK (0x21f << GICH_VMCR_CTRL_SHIFT)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 81089cf1f0c1..1bef9e21e725 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -242,6 +242,7 @@ struct kvm_vcpu {
242 int sigset_active; 242 int sigset_active;
243 sigset_t sigset; 243 sigset_t sigset;
244 struct kvm_vcpu_stat stat; 244 struct kvm_vcpu_stat stat;
245 unsigned int halt_poll_ns;
245 246
246#ifdef CONFIG_HAS_IOMEM 247#ifdef CONFIG_HAS_IOMEM
247 int mmio_needed; 248 int mmio_needed;
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index a44062da684b..d6f83222a6a1 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -358,6 +358,36 @@ TRACE_EVENT(
358 358
359#endif 359#endif
360 360
361TRACE_EVENT(kvm_halt_poll_ns,
362 TP_PROTO(bool grow, unsigned int vcpu_id, int new, int old),
363 TP_ARGS(grow, vcpu_id, new, old),
364
365 TP_STRUCT__entry(
366 __field(bool, grow)
367 __field(unsigned int, vcpu_id)
368 __field(int, new)
369 __field(int, old)
370 ),
371
372 TP_fast_assign(
373 __entry->grow = grow;
374 __entry->vcpu_id = vcpu_id;
375 __entry->new = new;
376 __entry->old = old;
377 ),
378
379 TP_printk("vcpu %u: halt_poll_ns %d (%s %d)",
380 __entry->vcpu_id,
381 __entry->new,
382 __entry->grow ? "grow" : "shrink",
383 __entry->old)
384);
385
386#define trace_kvm_halt_poll_ns_grow(vcpu_id, new, old) \
387 trace_kvm_halt_poll_ns(true, vcpu_id, new, old)
388#define trace_kvm_halt_poll_ns_shrink(vcpu_id, new, old) \
389 trace_kvm_halt_poll_ns(false, vcpu_id, new, old)
390
361#endif /* _TRACE_KVM_MAIN_H */ 391#endif /* _TRACE_KVM_MAIN_H */
362 392
363/* This part must be outside protection */ 393/* This part must be outside protection */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 0d831f94f8a8..a9256f0331ae 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -237,6 +237,7 @@ struct kvm_run {
237 __u32 count; 237 __u32 count;
238 __u64 data_offset; /* relative to kvm_run start */ 238 __u64 data_offset; /* relative to kvm_run start */
239 } io; 239 } io;
240 /* KVM_EXIT_DEBUG */
240 struct { 241 struct {
241 struct kvm_debug_exit_arch arch; 242 struct kvm_debug_exit_arch arch;
242 } debug; 243 } debug;
@@ -285,6 +286,7 @@ struct kvm_run {
285 __u32 data; 286 __u32 data;
286 __u8 is_write; 287 __u8 is_write;
287 } dcr; 288 } dcr;
289 /* KVM_EXIT_INTERNAL_ERROR */
288 struct { 290 struct {
289 __u32 suberror; 291 __u32 suberror;
290 /* Available with KVM_CAP_INTERNAL_ERROR_DATA: */ 292 /* Available with KVM_CAP_INTERNAL_ERROR_DATA: */
@@ -295,6 +297,7 @@ struct kvm_run {
295 struct { 297 struct {
296 __u64 gprs[32]; 298 __u64 gprs[32];
297 } osi; 299 } osi;
300 /* KVM_EXIT_PAPR_HCALL */
298 struct { 301 struct {
299 __u64 nr; 302 __u64 nr;
300 __u64 ret; 303 __u64 ret;
@@ -819,6 +822,8 @@ struct kvm_ppc_smmu_info {
819#define KVM_CAP_DISABLE_QUIRKS 116 822#define KVM_CAP_DISABLE_QUIRKS 116
820#define KVM_CAP_X86_SMM 117 823#define KVM_CAP_X86_SMM 117
821#define KVM_CAP_MULTI_ADDRESS_SPACE 118 824#define KVM_CAP_MULTI_ADDRESS_SPACE 118
825#define KVM_CAP_GUEST_DEBUG_HW_BPS 119
826#define KVM_CAP_GUEST_DEBUG_HW_WPS 120
822 827
823#ifdef KVM_CAP_IRQ_ROUTING 828#ifdef KVM_CAP_IRQ_ROUTING
824 829
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 98c95f2fcba4..76e38d231e99 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -64,10 +64,10 @@ static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu)
64 int ret; 64 int ret;
65 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 65 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
66 66
67 timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK; 67 kvm_vgic_set_phys_irq_active(timer->map, true);
68 ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, 68 ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id,
69 timer->irq->irq, 69 timer->map,
70 timer->irq->level); 70 timer->irq->level);
71 WARN_ON(ret); 71 WARN_ON(ret);
72} 72}
73 73
@@ -117,7 +117,8 @@ bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
117 cycle_t cval, now; 117 cycle_t cval, now;
118 118
119 if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) || 119 if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) ||
120 !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE)) 120 !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE) ||
121 kvm_vgic_get_phys_irq_active(timer->map))
121 return false; 122 return false;
122 123
123 cval = timer->cntv_cval; 124 cval = timer->cntv_cval;
@@ -184,10 +185,11 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
184 timer_arm(timer, ns); 185 timer_arm(timer, ns);
185} 186}
186 187
187void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, 188int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
188 const struct kvm_irq_level *irq) 189 const struct kvm_irq_level *irq)
189{ 190{
190 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 191 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
192 struct irq_phys_map *map;
191 193
192 /* 194 /*
193 * The vcpu timer irq number cannot be determined in 195 * The vcpu timer irq number cannot be determined in
@@ -196,6 +198,17 @@ void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
196 * vcpu timer irq number when the vcpu is reset. 198 * vcpu timer irq number when the vcpu is reset.
197 */ 199 */
198 timer->irq = irq; 200 timer->irq = irq;
201
202 /*
203 * Tell the VGIC that the virtual interrupt is tied to a
204 * physical interrupt. We do that once per VCPU.
205 */
206 map = kvm_vgic_map_phys_irq(vcpu, irq->irq, host_vtimer_irq);
207 if (WARN_ON(IS_ERR(map)))
208 return PTR_ERR(map);
209
210 timer->map = map;
211 return 0;
199} 212}
200 213
201void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) 214void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
@@ -335,6 +348,8 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
335 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 348 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
336 349
337 timer_disarm(timer); 350 timer_disarm(timer);
351 if (timer->map)
352 kvm_vgic_unmap_phys_irq(vcpu, timer->map);
338} 353}
339 354
340void kvm_timer_enable(struct kvm *kvm) 355void kvm_timer_enable(struct kvm *kvm)
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
index f9b9c7c51372..8d7b04db8471 100644
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -48,6 +48,10 @@ static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr)
48 lr_desc.state |= LR_STATE_ACTIVE; 48 lr_desc.state |= LR_STATE_ACTIVE;
49 if (val & GICH_LR_EOI) 49 if (val & GICH_LR_EOI)
50 lr_desc.state |= LR_EOI_INT; 50 lr_desc.state |= LR_EOI_INT;
51 if (val & GICH_LR_HW) {
52 lr_desc.state |= LR_HW;
53 lr_desc.hwirq = (val & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT;
54 }
51 55
52 return lr_desc; 56 return lr_desc;
53} 57}
@@ -55,7 +59,9 @@ static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr)
55static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr, 59static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr,
56 struct vgic_lr lr_desc) 60 struct vgic_lr lr_desc)
57{ 61{
58 u32 lr_val = (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | lr_desc.irq; 62 u32 lr_val;
63
64 lr_val = lr_desc.irq;
59 65
60 if (lr_desc.state & LR_STATE_PENDING) 66 if (lr_desc.state & LR_STATE_PENDING)
61 lr_val |= GICH_LR_PENDING_BIT; 67 lr_val |= GICH_LR_PENDING_BIT;
@@ -64,6 +70,14 @@ static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr,
64 if (lr_desc.state & LR_EOI_INT) 70 if (lr_desc.state & LR_EOI_INT)
65 lr_val |= GICH_LR_EOI; 71 lr_val |= GICH_LR_EOI;
66 72
73 if (lr_desc.state & LR_HW) {
74 lr_val |= GICH_LR_HW;
75 lr_val |= (u32)lr_desc.hwirq << GICH_LR_PHYSID_CPUID_SHIFT;
76 }
77
78 if (lr_desc.irq < VGIC_NR_SGIS)
79 lr_val |= (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT);
80
67 vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val; 81 vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val;
68} 82}
69 83
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index dff06021e748..afbf925b00f4 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -67,6 +67,10 @@ static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr)
67 lr_desc.state |= LR_STATE_ACTIVE; 67 lr_desc.state |= LR_STATE_ACTIVE;
68 if (val & ICH_LR_EOI) 68 if (val & ICH_LR_EOI)
69 lr_desc.state |= LR_EOI_INT; 69 lr_desc.state |= LR_EOI_INT;
70 if (val & ICH_LR_HW) {
71 lr_desc.state |= LR_HW;
72 lr_desc.hwirq = (val >> ICH_LR_PHYS_ID_SHIFT) & GENMASK(9, 0);
73 }
70 74
71 return lr_desc; 75 return lr_desc;
72} 76}
@@ -84,10 +88,17 @@ static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr,
84 * Eventually we want to make this configurable, so we may revisit 88 * Eventually we want to make this configurable, so we may revisit
85 * this in the future. 89 * this in the future.
86 */ 90 */
87 if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) 91 switch (vcpu->kvm->arch.vgic.vgic_model) {
92 case KVM_DEV_TYPE_ARM_VGIC_V3:
88 lr_val |= ICH_LR_GROUP; 93 lr_val |= ICH_LR_GROUP;
89 else 94 break;
90 lr_val |= (u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT; 95 case KVM_DEV_TYPE_ARM_VGIC_V2:
96 if (lr_desc.irq < VGIC_NR_SGIS)
97 lr_val |= (u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT;
98 break;
99 default:
100 BUG();
101 }
91 102
92 if (lr_desc.state & LR_STATE_PENDING) 103 if (lr_desc.state & LR_STATE_PENDING)
93 lr_val |= ICH_LR_PENDING_BIT; 104 lr_val |= ICH_LR_PENDING_BIT;
@@ -95,6 +106,10 @@ static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr,
95 lr_val |= ICH_LR_ACTIVE_BIT; 106 lr_val |= ICH_LR_ACTIVE_BIT;
96 if (lr_desc.state & LR_EOI_INT) 107 if (lr_desc.state & LR_EOI_INT)
97 lr_val |= ICH_LR_EOI; 108 lr_val |= ICH_LR_EOI;
109 if (lr_desc.state & LR_HW) {
110 lr_val |= ICH_LR_HW;
111 lr_val |= ((u64)lr_desc.hwirq) << ICH_LR_PHYS_ID_SHIFT;
112 }
98 113
99 vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val; 114 vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val;
100} 115}
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index bc40137a022d..9eb489a2c94c 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -24,6 +24,7 @@
24#include <linux/of.h> 24#include <linux/of.h>
25#include <linux/of_address.h> 25#include <linux/of_address.h>
26#include <linux/of_irq.h> 26#include <linux/of_irq.h>
27#include <linux/rculist.h>
27#include <linux/uaccess.h> 28#include <linux/uaccess.h>
28 29
29#include <asm/kvm_emulate.h> 30#include <asm/kvm_emulate.h>
@@ -74,6 +75,28 @@
74 * cause the interrupt to become inactive in such a situation. 75 * cause the interrupt to become inactive in such a situation.
75 * Conversely, writes to GICD_ICPENDRn do not cause the interrupt to become 76 * Conversely, writes to GICD_ICPENDRn do not cause the interrupt to become
76 * inactive as long as the external input line is held high. 77 * inactive as long as the external input line is held high.
78 *
79 *
80 * Initialization rules: there are multiple stages to the vgic
81 * initialization, both for the distributor and the CPU interfaces.
82 *
83 * Distributor:
84 *
85 * - kvm_vgic_early_init(): initialization of static data that doesn't
86 * depend on any sizing information or emulation type. No allocation
87 * is allowed there.
88 *
89 * - vgic_init(): allocation and initialization of the generic data
90 * structures that depend on sizing information (number of CPUs,
91 * number of interrupts). Also initializes the vcpu specific data
92 * structures. Can be executed lazily for GICv2.
93 * [to be renamed to kvm_vgic_init??]
94 *
95 * CPU Interface:
96 *
97 * - kvm_vgic_cpu_early_init(): initialization of static data that
98 * doesn't depend on any sizing information or emulation type. No
99 * allocation is allowed there.
77 */ 100 */
78 101
79#include "vgic.h" 102#include "vgic.h"
@@ -82,6 +105,8 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
82static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); 105static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
83static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); 106static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
84static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); 107static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
108static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu,
109 int virt_irq);
85 110
86static const struct vgic_ops *vgic_ops; 111static const struct vgic_ops *vgic_ops;
87static const struct vgic_params *vgic; 112static const struct vgic_params *vgic;
@@ -375,7 +400,7 @@ void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq)
375 400
376static bool vgic_can_sample_irq(struct kvm_vcpu *vcpu, int irq) 401static bool vgic_can_sample_irq(struct kvm_vcpu *vcpu, int irq)
377{ 402{
378 return vgic_irq_is_edge(vcpu, irq) || !vgic_irq_is_queued(vcpu, irq); 403 return !vgic_irq_is_queued(vcpu, irq);
379} 404}
380 405
381/** 406/**
@@ -1115,6 +1140,39 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq,
1115 if (!vgic_irq_is_edge(vcpu, irq)) 1140 if (!vgic_irq_is_edge(vcpu, irq))
1116 vlr.state |= LR_EOI_INT; 1141 vlr.state |= LR_EOI_INT;
1117 1142
1143 if (vlr.irq >= VGIC_NR_SGIS) {
1144 struct irq_phys_map *map;
1145 map = vgic_irq_map_search(vcpu, irq);
1146
1147 /*
1148 * If we have a mapping, and the virtual interrupt is
1149 * being injected, then we must set the state to
1150 * active in the physical world. Otherwise the
1151 * physical interrupt will fire and the guest will
1152 * exit before processing the virtual interrupt.
1153 */
1154 if (map) {
1155 int ret;
1156
1157 BUG_ON(!map->active);
1158 vlr.hwirq = map->phys_irq;
1159 vlr.state |= LR_HW;
1160 vlr.state &= ~LR_EOI_INT;
1161
1162 ret = irq_set_irqchip_state(map->irq,
1163 IRQCHIP_STATE_ACTIVE,
1164 true);
1165 WARN_ON(ret);
1166
1167 /*
1168 * Make sure we're not going to sample this
1169 * again, as a HW-backed interrupt cannot be
1170 * in the PENDING_ACTIVE stage.
1171 */
1172 vgic_irq_set_queued(vcpu, irq);
1173 }
1174 }
1175
1118 vgic_set_lr(vcpu, lr_nr, vlr); 1176 vgic_set_lr(vcpu, lr_nr, vlr);
1119 vgic_sync_lr_elrsr(vcpu, lr_nr, vlr); 1177 vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
1120} 1178}
@@ -1339,6 +1397,39 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
1339 return level_pending; 1397 return level_pending;
1340} 1398}
1341 1399
1400/*
1401 * Save the physical active state, and reset it to inactive.
1402 *
1403 * Return 1 if HW interrupt went from active to inactive, and 0 otherwise.
1404 */
1405static int vgic_sync_hwirq(struct kvm_vcpu *vcpu, struct vgic_lr vlr)
1406{
1407 struct irq_phys_map *map;
1408 int ret;
1409
1410 if (!(vlr.state & LR_HW))
1411 return 0;
1412
1413 map = vgic_irq_map_search(vcpu, vlr.irq);
1414 BUG_ON(!map || !map->active);
1415
1416 ret = irq_get_irqchip_state(map->irq,
1417 IRQCHIP_STATE_ACTIVE,
1418 &map->active);
1419
1420 WARN_ON(ret);
1421
1422 if (map->active) {
1423 ret = irq_set_irqchip_state(map->irq,
1424 IRQCHIP_STATE_ACTIVE,
1425 false);
1426 WARN_ON(ret);
1427 return 0;
1428 }
1429
1430 return 1;
1431}
1432
1342/* Sync back the VGIC state after a guest run */ 1433/* Sync back the VGIC state after a guest run */
1343static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) 1434static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
1344{ 1435{
@@ -1353,14 +1444,31 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
1353 elrsr = vgic_get_elrsr(vcpu); 1444 elrsr = vgic_get_elrsr(vcpu);
1354 elrsr_ptr = u64_to_bitmask(&elrsr); 1445 elrsr_ptr = u64_to_bitmask(&elrsr);
1355 1446
1356 /* Clear mappings for empty LRs */ 1447 /* Deal with HW interrupts, and clear mappings for empty LRs */
1357 for_each_set_bit(lr, elrsr_ptr, vgic->nr_lr) { 1448 for (lr = 0; lr < vgic->nr_lr; lr++) {
1358 struct vgic_lr vlr; 1449 struct vgic_lr vlr;
1359 1450
1360 if (!test_and_clear_bit(lr, vgic_cpu->lr_used)) 1451 if (!test_bit(lr, vgic_cpu->lr_used))
1361 continue; 1452 continue;
1362 1453
1363 vlr = vgic_get_lr(vcpu, lr); 1454 vlr = vgic_get_lr(vcpu, lr);
1455 if (vgic_sync_hwirq(vcpu, vlr)) {
1456 /*
1457 * So this is a HW interrupt that the guest
1458 * EOI-ed. Clean the LR state and allow the
1459 * interrupt to be sampled again.
1460 */
1461 vlr.state = 0;
1462 vlr.hwirq = 0;
1463 vgic_set_lr(vcpu, lr, vlr);
1464 vgic_irq_clear_queued(vcpu, vlr.irq);
1465 set_bit(lr, elrsr_ptr);
1466 }
1467
1468 if (!test_bit(lr, elrsr_ptr))
1469 continue;
1470
1471 clear_bit(lr, vgic_cpu->lr_used);
1364 1472
1365 BUG_ON(vlr.irq >= dist->nr_irqs); 1473 BUG_ON(vlr.irq >= dist->nr_irqs);
1366 vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY; 1474 vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY;
@@ -1447,7 +1555,8 @@ static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
1447} 1555}
1448 1556
1449static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, 1557static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
1450 unsigned int irq_num, bool level) 1558 struct irq_phys_map *map,
1559 unsigned int irq_num, bool level)
1451{ 1560{
1452 struct vgic_dist *dist = &kvm->arch.vgic; 1561 struct vgic_dist *dist = &kvm->arch.vgic;
1453 struct kvm_vcpu *vcpu; 1562 struct kvm_vcpu *vcpu;
@@ -1455,6 +1564,9 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
1455 int enabled; 1564 int enabled;
1456 bool ret = true, can_inject = true; 1565 bool ret = true, can_inject = true;
1457 1566
1567 if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020))
1568 return -EINVAL;
1569
1458 spin_lock(&dist->lock); 1570 spin_lock(&dist->lock);
1459 1571
1460 vcpu = kvm_get_vcpu(kvm, cpuid); 1572 vcpu = kvm_get_vcpu(kvm, cpuid);
@@ -1517,18 +1629,46 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
1517out: 1629out:
1518 spin_unlock(&dist->lock); 1630 spin_unlock(&dist->lock);
1519 1631
1520 return ret ? cpuid : -EINVAL; 1632 if (ret) {
1633 /* kick the specified vcpu */
1634 kvm_vcpu_kick(kvm_get_vcpu(kvm, cpuid));
1635 }
1636
1637 return 0;
1638}
1639
1640static int vgic_lazy_init(struct kvm *kvm)
1641{
1642 int ret = 0;
1643
1644 if (unlikely(!vgic_initialized(kvm))) {
1645 /*
1646 * We only provide the automatic initialization of the VGIC
1647 * for the legacy case of a GICv2. Any other type must
1648 * be explicitly initialized once setup with the respective
1649 * KVM device call.
1650 */
1651 if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2)
1652 return -EBUSY;
1653
1654 mutex_lock(&kvm->lock);
1655 ret = vgic_init(kvm);
1656 mutex_unlock(&kvm->lock);
1657 }
1658
1659 return ret;
1521} 1660}
1522 1661
1523/** 1662/**
1524 * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic 1663 * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
1525 * @kvm: The VM structure pointer 1664 * @kvm: The VM structure pointer
1526 * @cpuid: The CPU for PPIs 1665 * @cpuid: The CPU for PPIs
1527 * @irq_num: The IRQ number that is assigned to the device 1666 * @irq_num: The IRQ number that is assigned to the device. This IRQ
1667 * must not be mapped to a HW interrupt.
1528 * @level: Edge-triggered: true: to trigger the interrupt 1668 * @level: Edge-triggered: true: to trigger the interrupt
1529 * false: to ignore the call 1669 * false: to ignore the call
1530 * Level-sensitive true: activates an interrupt 1670 * Level-sensitive true: raise the input signal
1531 * false: deactivates an interrupt 1671 * false: lower the input signal
1532 * 1672 *
1533 * The GIC is not concerned with devices being active-LOW or active-HIGH for 1673 * The GIC is not concerned with devices being active-LOW or active-HIGH for
1534 * level-sensitive interrupts. You can think of the level parameter as 1 1674 * level-sensitive interrupts. You can think of the level parameter as 1
@@ -1537,39 +1677,44 @@ out:
1537int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, 1677int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
1538 bool level) 1678 bool level)
1539{ 1679{
1540 int ret = 0; 1680 struct irq_phys_map *map;
1541 int vcpu_id; 1681 int ret;
1542
1543 if (unlikely(!vgic_initialized(kvm))) {
1544 /*
1545 * We only provide the automatic initialization of the VGIC
1546 * for the legacy case of a GICv2. Any other type must
1547 * be explicitly initialized once setup with the respective
1548 * KVM device call.
1549 */
1550 if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2) {
1551 ret = -EBUSY;
1552 goto out;
1553 }
1554 mutex_lock(&kvm->lock);
1555 ret = vgic_init(kvm);
1556 mutex_unlock(&kvm->lock);
1557 1682
1558 if (ret) 1683 ret = vgic_lazy_init(kvm);
1559 goto out; 1684 if (ret)
1560 } 1685 return ret;
1561 1686
1562 if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020)) 1687 map = vgic_irq_map_search(kvm_get_vcpu(kvm, cpuid), irq_num);
1688 if (map)
1563 return -EINVAL; 1689 return -EINVAL;
1564 1690
1565 vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level); 1691 return vgic_update_irq_pending(kvm, cpuid, NULL, irq_num, level);
1566 if (vcpu_id >= 0) { 1692}
1567 /* kick the specified vcpu */
1568 kvm_vcpu_kick(kvm_get_vcpu(kvm, vcpu_id));
1569 }
1570 1693
1571out: 1694/**
1572 return ret; 1695 * kvm_vgic_inject_mapped_irq - Inject a physically mapped IRQ to the vgic
1696 * @kvm: The VM structure pointer
1697 * @cpuid: The CPU for PPIs
1698 * @map: Pointer to a irq_phys_map structure describing the mapping
1699 * @level: Edge-triggered: true: to trigger the interrupt
1700 * false: to ignore the call
1701 * Level-sensitive true: raise the input signal
1702 * false: lower the input signal
1703 *
1704 * The GIC is not concerned with devices being active-LOW or active-HIGH for
1705 * level-sensitive interrupts. You can think of the level parameter as 1
1706 * being HIGH and 0 being LOW and all devices being active-HIGH.
1707 */
1708int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid,
1709 struct irq_phys_map *map, bool level)
1710{
1711 int ret;
1712
1713 ret = vgic_lazy_init(kvm);
1714 if (ret)
1715 return ret;
1716
1717 return vgic_update_irq_pending(kvm, cpuid, map, map->virt_irq, level);
1573} 1718}
1574 1719
1575static irqreturn_t vgic_maintenance_handler(int irq, void *data) 1720static irqreturn_t vgic_maintenance_handler(int irq, void *data)
@@ -1583,6 +1728,188 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
1583 return IRQ_HANDLED; 1728 return IRQ_HANDLED;
1584} 1729}
1585 1730
1731static struct list_head *vgic_get_irq_phys_map_list(struct kvm_vcpu *vcpu,
1732 int virt_irq)
1733{
1734 if (virt_irq < VGIC_NR_PRIVATE_IRQS)
1735 return &vcpu->arch.vgic_cpu.irq_phys_map_list;
1736 else
1737 return &vcpu->kvm->arch.vgic.irq_phys_map_list;
1738}
1739
1740/**
1741 * kvm_vgic_map_phys_irq - map a virtual IRQ to a physical IRQ
1742 * @vcpu: The VCPU pointer
1743 * @virt_irq: The virtual irq number
1744 * @irq: The Linux IRQ number
1745 *
1746 * Establish a mapping between a guest visible irq (@virt_irq) and a
1747 * Linux irq (@irq). On injection, @virt_irq will be associated with
1748 * the physical interrupt represented by @irq. This mapping can be
1749 * established multiple times as long as the parameters are the same.
1750 *
1751 * Returns a valid pointer on success, and an error pointer otherwise
1752 */
1753struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
1754 int virt_irq, int irq)
1755{
1756 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1757 struct list_head *root = vgic_get_irq_phys_map_list(vcpu, virt_irq);
1758 struct irq_phys_map *map;
1759 struct irq_phys_map_entry *entry;
1760 struct irq_desc *desc;
1761 struct irq_data *data;
1762 int phys_irq;
1763
1764 desc = irq_to_desc(irq);
1765 if (!desc) {
1766 kvm_err("%s: no interrupt descriptor\n", __func__);
1767 return ERR_PTR(-EINVAL);
1768 }
1769
1770 data = irq_desc_get_irq_data(desc);
1771 while (data->parent_data)
1772 data = data->parent_data;
1773
1774 phys_irq = data->hwirq;
1775
1776 /* Create a new mapping */
1777 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1778 if (!entry)
1779 return ERR_PTR(-ENOMEM);
1780
1781 spin_lock(&dist->irq_phys_map_lock);
1782
1783 /* Try to match an existing mapping */
1784 map = vgic_irq_map_search(vcpu, virt_irq);
1785 if (map) {
1786 /* Make sure this mapping matches */
1787 if (map->phys_irq != phys_irq ||
1788 map->irq != irq)
1789 map = ERR_PTR(-EINVAL);
1790
1791 /* Found an existing, valid mapping */
1792 goto out;
1793 }
1794
1795 map = &entry->map;
1796 map->virt_irq = virt_irq;
1797 map->phys_irq = phys_irq;
1798 map->irq = irq;
1799
1800 list_add_tail_rcu(&entry->entry, root);
1801
1802out:
1803 spin_unlock(&dist->irq_phys_map_lock);
1804 /* If we've found a hit in the existing list, free the useless
1805 * entry */
1806 if (IS_ERR(map) || map != &entry->map)
1807 kfree(entry);
1808 return map;
1809}
1810
1811static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu,
1812 int virt_irq)
1813{
1814 struct list_head *root = vgic_get_irq_phys_map_list(vcpu, virt_irq);
1815 struct irq_phys_map_entry *entry;
1816 struct irq_phys_map *map;
1817
1818 rcu_read_lock();
1819
1820 list_for_each_entry_rcu(entry, root, entry) {
1821 map = &entry->map;
1822 if (map->virt_irq == virt_irq) {
1823 rcu_read_unlock();
1824 return map;
1825 }
1826 }
1827
1828 rcu_read_unlock();
1829
1830 return NULL;
1831}
1832
1833static void vgic_free_phys_irq_map_rcu(struct rcu_head *rcu)
1834{
1835 struct irq_phys_map_entry *entry;
1836
1837 entry = container_of(rcu, struct irq_phys_map_entry, rcu);
1838 kfree(entry);
1839}
1840
1841/**
1842 * kvm_vgic_get_phys_irq_active - Return the active state of a mapped IRQ
1843 *
1844 * Return the logical active state of a mapped interrupt. This doesn't
1845 * necessarily reflects the current HW state.
1846 */
1847bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map)
1848{
1849 BUG_ON(!map);
1850 return map->active;
1851}
1852
1853/**
1854 * kvm_vgic_set_phys_irq_active - Set the active state of a mapped IRQ
1855 *
1856 * Set the logical active state of a mapped interrupt. This doesn't
1857 * immediately affects the HW state.
1858 */
1859void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active)
1860{
1861 BUG_ON(!map);
1862 map->active = active;
1863}
1864
1865/**
1866 * kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping
1867 * @vcpu: The VCPU pointer
1868 * @map: The pointer to a mapping obtained through kvm_vgic_map_phys_irq
1869 *
1870 * Remove an existing mapping between virtual and physical interrupts.
1871 */
1872int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map)
1873{
1874 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1875 struct irq_phys_map_entry *entry;
1876 struct list_head *root;
1877
1878 if (!map)
1879 return -EINVAL;
1880
1881 root = vgic_get_irq_phys_map_list(vcpu, map->virt_irq);
1882
1883 spin_lock(&dist->irq_phys_map_lock);
1884
1885 list_for_each_entry(entry, root, entry) {
1886 if (&entry->map == map) {
1887 list_del_rcu(&entry->entry);
1888 call_rcu(&entry->rcu, vgic_free_phys_irq_map_rcu);
1889 break;
1890 }
1891 }
1892
1893 spin_unlock(&dist->irq_phys_map_lock);
1894
1895 return 0;
1896}
1897
1898static void vgic_destroy_irq_phys_map(struct kvm *kvm, struct list_head *root)
1899{
1900 struct vgic_dist *dist = &kvm->arch.vgic;
1901 struct irq_phys_map_entry *entry;
1902
1903 spin_lock(&dist->irq_phys_map_lock);
1904
1905 list_for_each_entry(entry, root, entry) {
1906 list_del_rcu(&entry->entry);
1907 call_rcu(&entry->rcu, vgic_free_phys_irq_map_rcu);
1908 }
1909
1910 spin_unlock(&dist->irq_phys_map_lock);
1911}
1912
1586void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) 1913void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
1587{ 1914{
1588 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 1915 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
@@ -1591,6 +1918,7 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
1591 kfree(vgic_cpu->active_shared); 1918 kfree(vgic_cpu->active_shared);
1592 kfree(vgic_cpu->pend_act_shared); 1919 kfree(vgic_cpu->pend_act_shared);
1593 kfree(vgic_cpu->vgic_irq_lr_map); 1920 kfree(vgic_cpu->vgic_irq_lr_map);
1921 vgic_destroy_irq_phys_map(vcpu->kvm, &vgic_cpu->irq_phys_map_list);
1594 vgic_cpu->pending_shared = NULL; 1922 vgic_cpu->pending_shared = NULL;
1595 vgic_cpu->active_shared = NULL; 1923 vgic_cpu->active_shared = NULL;
1596 vgic_cpu->pend_act_shared = NULL; 1924 vgic_cpu->pend_act_shared = NULL;
@@ -1628,6 +1956,17 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
1628} 1956}
1629 1957
1630/** 1958/**
1959 * kvm_vgic_vcpu_early_init - Earliest possible per-vcpu vgic init stage
1960 *
1961 * No memory allocation should be performed here, only static init.
1962 */
1963void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu)
1964{
1965 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1966 INIT_LIST_HEAD(&vgic_cpu->irq_phys_map_list);
1967}
1968
1969/**
1631 * kvm_vgic_get_max_vcpus - Get the maximum number of VCPUs allowed by HW 1970 * kvm_vgic_get_max_vcpus - Get the maximum number of VCPUs allowed by HW
1632 * 1971 *
1633 * The host's GIC naturally limits the maximum amount of VCPUs a guest 1972 * The host's GIC naturally limits the maximum amount of VCPUs a guest
@@ -1664,6 +2003,7 @@ void kvm_vgic_destroy(struct kvm *kvm)
1664 kfree(dist->irq_spi_target); 2003 kfree(dist->irq_spi_target);
1665 kfree(dist->irq_pending_on_cpu); 2004 kfree(dist->irq_pending_on_cpu);
1666 kfree(dist->irq_active_on_cpu); 2005 kfree(dist->irq_active_on_cpu);
2006 vgic_destroy_irq_phys_map(kvm, &dist->irq_phys_map_list);
1667 dist->irq_sgi_sources = NULL; 2007 dist->irq_sgi_sources = NULL;
1668 dist->irq_spi_cpu = NULL; 2008 dist->irq_spi_cpu = NULL;
1669 dist->irq_spi_target = NULL; 2009 dist->irq_spi_target = NULL;
@@ -1787,6 +2127,18 @@ static int init_vgic_model(struct kvm *kvm, int type)
1787 return 0; 2127 return 0;
1788} 2128}
1789 2129
2130/**
2131 * kvm_vgic_early_init - Earliest possible vgic initialization stage
2132 *
2133 * No memory allocation should be performed here, only static init.
2134 */
2135void kvm_vgic_early_init(struct kvm *kvm)
2136{
2137 spin_lock_init(&kvm->arch.vgic.lock);
2138 spin_lock_init(&kvm->arch.vgic.irq_phys_map_lock);
2139 INIT_LIST_HEAD(&kvm->arch.vgic.irq_phys_map_list);
2140}
2141
1790int kvm_vgic_create(struct kvm *kvm, u32 type) 2142int kvm_vgic_create(struct kvm *kvm, u32 type)
1791{ 2143{
1792 int i, vcpu_lock_idx = -1, ret; 2144 int i, vcpu_lock_idx = -1, ret;
@@ -1832,7 +2184,6 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
1832 if (ret) 2184 if (ret)
1833 goto out_unlock; 2185 goto out_unlock;
1834 2186
1835 spin_lock_init(&kvm->arch.vgic.lock);
1836 kvm->arch.vgic.in_kernel = true; 2187 kvm->arch.vgic.in_kernel = true;
1837 kvm->arch.vgic.vgic_model = type; 2188 kvm->arch.vgic.vgic_model = type;
1838 kvm->arch.vgic.vctrl_base = vgic->vctrl_base; 2189 kvm->arch.vgic.vctrl_base = vgic->vctrl_base;
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 21c14244f4c4..d7ea8e20dae4 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -213,11 +213,15 @@ int kvm_set_irq_routing(struct kvm *kvm,
213 goto out; 213 goto out;
214 214
215 r = -EINVAL; 215 r = -EINVAL;
216 if (ue->flags) 216 if (ue->flags) {
217 kfree(e);
217 goto out; 218 goto out;
219 }
218 r = setup_routing_entry(new, e, ue); 220 r = setup_routing_entry(new, e, ue);
219 if (r) 221 if (r) {
222 kfree(e);
220 goto out; 223 goto out;
224 }
221 ++ue; 225 ++ue;
222 } 226 }
223 227
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d8db2f8fce9c..4662a8877f6c 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -66,9 +66,18 @@
66MODULE_AUTHOR("Qumranet"); 66MODULE_AUTHOR("Qumranet");
67MODULE_LICENSE("GPL"); 67MODULE_LICENSE("GPL");
68 68
69static unsigned int halt_poll_ns; 69/* halt polling only reduces halt latency by 5-7 us, 500us is enough */
70static unsigned int halt_poll_ns = 500000;
70module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR); 71module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR);
71 72
73/* Default doubles per-vcpu halt_poll_ns. */
74static unsigned int halt_poll_ns_grow = 2;
75module_param(halt_poll_ns_grow, int, S_IRUGO);
76
77/* Default resets per-vcpu halt_poll_ns . */
78static unsigned int halt_poll_ns_shrink;
79module_param(halt_poll_ns_shrink, int, S_IRUGO);
80
72/* 81/*
73 * Ordering of locks: 82 * Ordering of locks:
74 * 83 *
@@ -217,6 +226,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
217 vcpu->kvm = kvm; 226 vcpu->kvm = kvm;
218 vcpu->vcpu_id = id; 227 vcpu->vcpu_id = id;
219 vcpu->pid = NULL; 228 vcpu->pid = NULL;
229 vcpu->halt_poll_ns = 0;
220 init_waitqueue_head(&vcpu->wq); 230 init_waitqueue_head(&vcpu->wq);
221 kvm_async_pf_vcpu_init(vcpu); 231 kvm_async_pf_vcpu_init(vcpu);
222 232
@@ -1906,6 +1916,35 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn)
1906} 1916}
1907EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty); 1917EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty);
1908 1918
1919static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
1920{
1921 int old, val;
1922
1923 old = val = vcpu->halt_poll_ns;
1924 /* 10us base */
1925 if (val == 0 && halt_poll_ns_grow)
1926 val = 10000;
1927 else
1928 val *= halt_poll_ns_grow;
1929
1930 vcpu->halt_poll_ns = val;
1931 trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old);
1932}
1933
1934static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu)
1935{
1936 int old, val;
1937
1938 old = val = vcpu->halt_poll_ns;
1939 if (halt_poll_ns_shrink == 0)
1940 val = 0;
1941 else
1942 val /= halt_poll_ns_shrink;
1943
1944 vcpu->halt_poll_ns = val;
1945 trace_kvm_halt_poll_ns_shrink(vcpu->vcpu_id, val, old);
1946}
1947
1909static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu) 1948static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
1910{ 1949{
1911 if (kvm_arch_vcpu_runnable(vcpu)) { 1950 if (kvm_arch_vcpu_runnable(vcpu)) {
@@ -1928,10 +1967,11 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
1928 ktime_t start, cur; 1967 ktime_t start, cur;
1929 DEFINE_WAIT(wait); 1968 DEFINE_WAIT(wait);
1930 bool waited = false; 1969 bool waited = false;
1970 u64 block_ns;
1931 1971
1932 start = cur = ktime_get(); 1972 start = cur = ktime_get();
1933 if (halt_poll_ns) { 1973 if (vcpu->halt_poll_ns) {
1934 ktime_t stop = ktime_add_ns(ktime_get(), halt_poll_ns); 1974 ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns);
1935 1975
1936 do { 1976 do {
1937 /* 1977 /*
@@ -1960,7 +2000,21 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
1960 cur = ktime_get(); 2000 cur = ktime_get();
1961 2001
1962out: 2002out:
1963 trace_kvm_vcpu_wakeup(ktime_to_ns(cur) - ktime_to_ns(start), waited); 2003 block_ns = ktime_to_ns(cur) - ktime_to_ns(start);
2004
2005 if (halt_poll_ns) {
2006 if (block_ns <= vcpu->halt_poll_ns)
2007 ;
2008 /* we had a long block, shrink polling */
2009 else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns)
2010 shrink_halt_poll_ns(vcpu);
2011 /* we had a short halt and our poll time is too small */
2012 else if (vcpu->halt_poll_ns < halt_poll_ns &&
2013 block_ns < halt_poll_ns)
2014 grow_halt_poll_ns(vcpu);
2015 }
2016
2017 trace_kvm_vcpu_wakeup(block_ns, waited);
1964} 2018}
1965EXPORT_SYMBOL_GPL(kvm_vcpu_block); 2019EXPORT_SYMBOL_GPL(kvm_vcpu_block);
1966 2020