aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-05-27 16:41:54 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-05-27 16:41:54 -0400
commite28e909c36bb5d6319953822d84df00fce7cbd18 (patch)
treea4aca971908a7a604c6fdd9a95360728f9f721b3
parentdc03c0f9d12d85286d5e3623aa96d5c2a271b8e6 (diff)
parentfabc712866435660f7fa1070e1fabe29eba5bc4c (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull second batch of KVM updates from Radim Krčmář: "General: - move kvm_stat tool from QEMU repo into tools/kvm/kvm_stat (kvm_stat had nothing to do with QEMU in the first place -- the tool only interprets debugfs) - expose per-vm statistics in debugfs and support them in kvm_stat (KVM always collected per-vm statistics, but they were summarised into global statistics) x86: - fix dynamic APICv (VMX was improperly configured and a guest could access host's APIC MSRs, CVE-2016-4440) - minor fixes ARM changes from Christoffer Dall: - new vgic reimplementation of our horribly broken legacy vgic implementation. The two implementations will live side-by-side (with the new being the configured default) for one kernel release and then we'll remove the legacy one. - fix for a non-critical issue with virtual abort injection to guests" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (70 commits) tools: kvm_stat: Add comments tools: kvm_stat: Introduce pid monitoring KVM: Create debugfs dir and stat files for each VM MAINTAINERS: Add kvm tools tools: kvm_stat: Powerpc related fixes tools: Add kvm_stat man page tools: Add kvm_stat vm monitor script kvm:vmx: more complete state update on APICv on/off KVM: SVM: Add more SVM_EXIT_REASONS KVM: Unify traced vector format svm: bitwise vs logical op typo KVM: arm/arm64: vgic-new: Synchronize changes to active state KVM: arm/arm64: vgic-new: enable build KVM: arm/arm64: vgic-new: implement mapped IRQ handling KVM: arm/arm64: vgic-new: Wire up irqfd injection KVM: arm/arm64: vgic-new: Add vgic_v2/v3_enable KVM: arm/arm64: vgic-new: vgic_init: implement map_resources KVM: arm/arm64: vgic-new: vgic_init: implement vgic_init KVM: arm/arm64: vgic-new: vgic_init: implement vgic_create KVM: arm/arm64: vgic-new: vgic_init: implement kvm_vgic_hyp_init ...
-rw-r--r--MAINTAINERS1
-rw-r--r--arch/arm/include/asm/kvm_host.h6
-rw-r--r--arch/arm/include/asm/kvm_mmio.h3
-rw-r--r--arch/arm/kvm/Kconfig7
-rw-r--r--arch/arm/kvm/Makefile11
-rw-r--r--arch/arm/kvm/arm.c37
-rw-r--r--arch/arm/kvm/mmio.c24
-rw-r--r--arch/arm64/include/asm/kvm_host.h6
-rw-r--r--arch/arm64/include/asm/kvm_mmio.h3
-rw-r--r--arch/arm64/kvm/Kconfig7
-rw-r--r--arch/arm64/kvm/Makefile12
-rw-r--r--arch/arm64/kvm/inject_fault.c2
-rw-r--r--arch/x86/include/uapi/asm/svm.h44
-rw-r--r--arch/x86/kvm/svm.c4
-rw-r--r--arch/x86/kvm/vmx.c48
-rw-r--r--include/kvm/arm_arch_timer.h11
-rw-r--r--include/kvm/arm_vgic.h20
-rw-r--r--include/kvm/vgic/vgic.h246
-rw-r--r--include/linux/irqchip/arm-gic-v3.h6
-rw-r--r--include/linux/irqchip/arm-gic.h2
-rw-r--r--include/linux/kvm_host.h7
-rw-r--r--include/trace/events/kvm.h6
-rw-r--r--tools/Makefile6
-rw-r--r--tools/kvm/kvm_stat/Makefile41
-rwxr-xr-xtools/kvm/kvm_stat/kvm_stat1127
-rw-r--r--tools/kvm/kvm_stat/kvm_stat.txt63
-rw-r--r--virt/kvm/arm/arch_timer.c68
-rw-r--r--virt/kvm/arm/hyp/timer-sr.c5
-rw-r--r--virt/kvm/arm/hyp/vgic-v2-sr.c17
-rw-r--r--virt/kvm/arm/pmu.c25
-rw-r--r--virt/kvm/arm/vgic-v2.c4
-rw-r--r--virt/kvm/arm/vgic-v3.c8
-rw-r--r--virt/kvm/arm/vgic.c86
-rw-r--r--virt/kvm/arm/vgic/vgic-init.c452
-rw-r--r--virt/kvm/arm/vgic/vgic-irqfd.c52
-rw-r--r--virt/kvm/arm/vgic/vgic-kvm-device.c431
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio-v2.c446
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio-v3.c455
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio.c526
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio.h150
-rw-r--r--virt/kvm/arm/vgic/vgic-v2.c352
-rw-r--r--virt/kvm/arm/vgic/vgic-v3.c330
-rw-r--r--virt/kvm/arm/vgic/vgic.c619
-rw-r--r--virt/kvm/arm/vgic/vgic.h131
-rw-r--r--virt/kvm/kvm_main.c187
45 files changed, 5901 insertions, 193 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 81e9c984d2f3..312cd77e820c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6491,6 +6491,7 @@ F: arch/*/include/asm/kvm*
6491F: include/linux/kvm* 6491F: include/linux/kvm*
6492F: include/uapi/linux/kvm* 6492F: include/uapi/linux/kvm*
6493F: virt/kvm/ 6493F: virt/kvm/
6494F: tools/kvm/
6494 6495
6495KERNEL VIRTUAL MACHINE (KVM) FOR AMD-V 6496KERNEL VIRTUAL MACHINE (KVM) FOR AMD-V
6496M: Joerg Roedel <joro@8bytes.org> 6497M: Joerg Roedel <joro@8bytes.org>
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 0df6b1fc9655..96387d477e91 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -41,6 +41,8 @@
41 41
42#define KVM_MAX_VCPUS VGIC_V2_MAX_CPUS 42#define KVM_MAX_VCPUS VGIC_V2_MAX_CPUS
43 43
44#define KVM_REQ_VCPU_EXIT 8
45
44u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); 46u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
45int __attribute_const__ kvm_target_cpu(void); 47int __attribute_const__ kvm_target_cpu(void);
46int kvm_reset_vcpu(struct kvm_vcpu *vcpu); 48int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
@@ -226,6 +228,10 @@ static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
226 228
227struct kvm_vcpu *kvm_arm_get_running_vcpu(void); 229struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
228struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); 230struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
231void kvm_arm_halt_guest(struct kvm *kvm);
232void kvm_arm_resume_guest(struct kvm *kvm);
233void kvm_arm_halt_vcpu(struct kvm_vcpu *vcpu);
234void kvm_arm_resume_vcpu(struct kvm_vcpu *vcpu);
229 235
230int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); 236int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
231unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu); 237unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu);
diff --git a/arch/arm/include/asm/kvm_mmio.h b/arch/arm/include/asm/kvm_mmio.h
index d8e90c8cb5fa..f3a7de71f515 100644
--- a/arch/arm/include/asm/kvm_mmio.h
+++ b/arch/arm/include/asm/kvm_mmio.h
@@ -28,6 +28,9 @@ struct kvm_decode {
28 bool sign_extend; 28 bool sign_extend;
29}; 29};
30 30
31void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data);
32unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len);
33
31int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); 34int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
32int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, 35int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
33 phys_addr_t fault_ipa); 36 phys_addr_t fault_ipa);
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 95a000515e43..02abfff68ee5 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -46,6 +46,13 @@ config KVM_ARM_HOST
46 ---help--- 46 ---help---
47 Provides host support for ARM processors. 47 Provides host support for ARM processors.
48 48
49config KVM_NEW_VGIC
50 bool "New VGIC implementation"
51 depends on KVM
52 default y
53 ---help---
54 uses the new VGIC implementation
55
49source drivers/vhost/Kconfig 56source drivers/vhost/Kconfig
50 57
51endif # VIRTUALIZATION 58endif # VIRTUALIZATION
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index eb1bf4309c13..a596b58f6d37 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -21,7 +21,18 @@ obj-$(CONFIG_KVM_ARM_HOST) += hyp/
21obj-y += kvm-arm.o init.o interrupts.o 21obj-y += kvm-arm.o init.o interrupts.o
22obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o 22obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
23obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o 23obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o
24
25ifeq ($(CONFIG_KVM_NEW_VGIC),y)
26obj-y += $(KVM)/arm/vgic/vgic.o
27obj-y += $(KVM)/arm/vgic/vgic-init.o
28obj-y += $(KVM)/arm/vgic/vgic-irqfd.o
29obj-y += $(KVM)/arm/vgic/vgic-v2.o
30obj-y += $(KVM)/arm/vgic/vgic-mmio.o
31obj-y += $(KVM)/arm/vgic/vgic-mmio-v2.o
32obj-y += $(KVM)/arm/vgic/vgic-kvm-device.o
33else
24obj-y += $(KVM)/arm/vgic.o 34obj-y += $(KVM)/arm/vgic.o
25obj-y += $(KVM)/arm/vgic-v2.o 35obj-y += $(KVM)/arm/vgic-v2.o
26obj-y += $(KVM)/arm/vgic-v2-emul.o 36obj-y += $(KVM)/arm/vgic-v2-emul.o
37endif
27obj-y += $(KVM)/arm/arch_timer.o 38obj-y += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 237d5d82f0af..893941ec98dc 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -455,7 +455,7 @@ static void update_vttbr(struct kvm *kvm)
455static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) 455static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
456{ 456{
457 struct kvm *kvm = vcpu->kvm; 457 struct kvm *kvm = vcpu->kvm;
458 int ret; 458 int ret = 0;
459 459
460 if (likely(vcpu->arch.has_run_once)) 460 if (likely(vcpu->arch.has_run_once))
461 return 0; 461 return 0;
@@ -478,9 +478,9 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
478 * interrupts from the virtual timer with a userspace gic. 478 * interrupts from the virtual timer with a userspace gic.
479 */ 479 */
480 if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) 480 if (irqchip_in_kernel(kvm) && vgic_initialized(kvm))
481 kvm_timer_enable(kvm); 481 ret = kvm_timer_enable(vcpu);
482 482
483 return 0; 483 return ret;
484} 484}
485 485
486bool kvm_arch_intc_initialized(struct kvm *kvm) 486bool kvm_arch_intc_initialized(struct kvm *kvm)
@@ -488,30 +488,37 @@ bool kvm_arch_intc_initialized(struct kvm *kvm)
488 return vgic_initialized(kvm); 488 return vgic_initialized(kvm);
489} 489}
490 490
491static void kvm_arm_halt_guest(struct kvm *kvm) __maybe_unused; 491void kvm_arm_halt_guest(struct kvm *kvm)
492static void kvm_arm_resume_guest(struct kvm *kvm) __maybe_unused;
493
494static void kvm_arm_halt_guest(struct kvm *kvm)
495{ 492{
496 int i; 493 int i;
497 struct kvm_vcpu *vcpu; 494 struct kvm_vcpu *vcpu;
498 495
499 kvm_for_each_vcpu(i, vcpu, kvm) 496 kvm_for_each_vcpu(i, vcpu, kvm)
500 vcpu->arch.pause = true; 497 vcpu->arch.pause = true;
501 force_vm_exit(cpu_all_mask); 498 kvm_make_all_cpus_request(kvm, KVM_REQ_VCPU_EXIT);
499}
500
501void kvm_arm_halt_vcpu(struct kvm_vcpu *vcpu)
502{
503 vcpu->arch.pause = true;
504 kvm_vcpu_kick(vcpu);
502} 505}
503 506
504static void kvm_arm_resume_guest(struct kvm *kvm) 507void kvm_arm_resume_vcpu(struct kvm_vcpu *vcpu)
508{
509 struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
510
511 vcpu->arch.pause = false;
512 swake_up(wq);
513}
514
515void kvm_arm_resume_guest(struct kvm *kvm)
505{ 516{
506 int i; 517 int i;
507 struct kvm_vcpu *vcpu; 518 struct kvm_vcpu *vcpu;
508 519
509 kvm_for_each_vcpu(i, vcpu, kvm) { 520 kvm_for_each_vcpu(i, vcpu, kvm)
510 struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu); 521 kvm_arm_resume_vcpu(vcpu);
511
512 vcpu->arch.pause = false;
513 swake_up(wq);
514 }
515} 522}
516 523
517static void vcpu_sleep(struct kvm_vcpu *vcpu) 524static void vcpu_sleep(struct kvm_vcpu *vcpu)
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 0f6600f05137..10f80a6c797a 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -23,7 +23,7 @@
23 23
24#include "trace.h" 24#include "trace.h"
25 25
26static void mmio_write_buf(char *buf, unsigned int len, unsigned long data) 26void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data)
27{ 27{
28 void *datap = NULL; 28 void *datap = NULL;
29 union { 29 union {
@@ -55,7 +55,7 @@ static void mmio_write_buf(char *buf, unsigned int len, unsigned long data)
55 memcpy(buf, datap, len); 55 memcpy(buf, datap, len);
56} 56}
57 57
58static unsigned long mmio_read_buf(char *buf, unsigned int len) 58unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len)
59{ 59{
60 unsigned long data = 0; 60 unsigned long data = 0;
61 union { 61 union {
@@ -66,7 +66,7 @@ static unsigned long mmio_read_buf(char *buf, unsigned int len)
66 66
67 switch (len) { 67 switch (len) {
68 case 1: 68 case 1:
69 data = buf[0]; 69 data = *(u8 *)buf;
70 break; 70 break;
71 case 2: 71 case 2:
72 memcpy(&tmp.hword, buf, len); 72 memcpy(&tmp.hword, buf, len);
@@ -87,11 +87,10 @@ static unsigned long mmio_read_buf(char *buf, unsigned int len)
87 87
88/** 88/**
89 * kvm_handle_mmio_return -- Handle MMIO loads after user space emulation 89 * kvm_handle_mmio_return -- Handle MMIO loads after user space emulation
90 * or in-kernel IO emulation
91 *
90 * @vcpu: The VCPU pointer 92 * @vcpu: The VCPU pointer
91 * @run: The VCPU run struct containing the mmio data 93 * @run: The VCPU run struct containing the mmio data
92 *
93 * This should only be called after returning from userspace for MMIO load
94 * emulation.
95 */ 94 */
96int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) 95int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
97{ 96{
@@ -104,7 +103,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
104 if (len > sizeof(unsigned long)) 103 if (len > sizeof(unsigned long))
105 return -EINVAL; 104 return -EINVAL;
106 105
107 data = mmio_read_buf(run->mmio.data, len); 106 data = kvm_mmio_read_buf(run->mmio.data, len);
108 107
109 if (vcpu->arch.mmio_decode.sign_extend && 108 if (vcpu->arch.mmio_decode.sign_extend &&
110 len < sizeof(unsigned long)) { 109 len < sizeof(unsigned long)) {
@@ -190,7 +189,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
190 len); 189 len);
191 190
192 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data); 191 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data);
193 mmio_write_buf(data_buf, len, data); 192 kvm_mmio_write_buf(data_buf, len, data);
194 193
195 ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len, 194 ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len,
196 data_buf); 195 data_buf);
@@ -206,18 +205,19 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
206 run->mmio.is_write = is_write; 205 run->mmio.is_write = is_write;
207 run->mmio.phys_addr = fault_ipa; 206 run->mmio.phys_addr = fault_ipa;
208 run->mmio.len = len; 207 run->mmio.len = len;
209 if (is_write)
210 memcpy(run->mmio.data, data_buf, len);
211 208
212 if (!ret) { 209 if (!ret) {
213 /* We handled the access successfully in the kernel. */ 210 /* We handled the access successfully in the kernel. */
211 if (!is_write)
212 memcpy(run->mmio.data, data_buf, len);
214 vcpu->stat.mmio_exit_kernel++; 213 vcpu->stat.mmio_exit_kernel++;
215 kvm_handle_mmio_return(vcpu, run); 214 kvm_handle_mmio_return(vcpu, run);
216 return 1; 215 return 1;
217 } else {
218 vcpu->stat.mmio_exit_user++;
219 } 216 }
220 217
218 if (is_write)
219 memcpy(run->mmio.data, data_buf, len);
220 vcpu->stat.mmio_exit_user++;
221 run->exit_reason = KVM_EXIT_MMIO; 221 run->exit_reason = KVM_EXIT_MMIO;
222 return 0; 222 return 0;
223} 223}
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index e63d23bad36e..49095fc4b482 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -43,6 +43,8 @@
43 43
44#define KVM_VCPU_MAX_FEATURES 4 44#define KVM_VCPU_MAX_FEATURES 4
45 45
46#define KVM_REQ_VCPU_EXIT 8
47
46int __attribute_const__ kvm_target_cpu(void); 48int __attribute_const__ kvm_target_cpu(void);
47int kvm_reset_vcpu(struct kvm_vcpu *vcpu); 49int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
48int kvm_arch_dev_ioctl_check_extension(long ext); 50int kvm_arch_dev_ioctl_check_extension(long ext);
@@ -327,6 +329,10 @@ static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
327 329
328struct kvm_vcpu *kvm_arm_get_running_vcpu(void); 330struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
329struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); 331struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
332void kvm_arm_halt_guest(struct kvm *kvm);
333void kvm_arm_resume_guest(struct kvm *kvm);
334void kvm_arm_halt_vcpu(struct kvm_vcpu *vcpu);
335void kvm_arm_resume_vcpu(struct kvm_vcpu *vcpu);
330 336
331u64 __kvm_call_hyp(void *hypfn, ...); 337u64 __kvm_call_hyp(void *hypfn, ...);
332#define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__) 338#define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__)
diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h
index fe612a962576..75ea42079757 100644
--- a/arch/arm64/include/asm/kvm_mmio.h
+++ b/arch/arm64/include/asm/kvm_mmio.h
@@ -30,6 +30,9 @@ struct kvm_decode {
30 bool sign_extend; 30 bool sign_extend;
31}; 31};
32 32
33void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data);
34unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len);
35
33int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); 36int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
34int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, 37int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
35 phys_addr_t fault_ipa); 38 phys_addr_t fault_ipa);
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index aa2e34e99582..c4f26ef91e77 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -54,6 +54,13 @@ config KVM_ARM_PMU
54 Adds support for a virtual Performance Monitoring Unit (PMU) in 54 Adds support for a virtual Performance Monitoring Unit (PMU) in
55 virtual machines. 55 virtual machines.
56 56
57config KVM_NEW_VGIC
58 bool "New VGIC implementation"
59 depends on KVM
60 default y
61 ---help---
62 uses the new VGIC implementation
63
57source drivers/vhost/Kconfig 64source drivers/vhost/Kconfig
58 65
59endif # VIRTUALIZATION 66endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 122cff482ac4..a7a958ca29d5 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -20,10 +20,22 @@ kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o
20kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o 20kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
21kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o 21kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o
22 22
23ifeq ($(CONFIG_KVM_NEW_VGIC),y)
24kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic.o
25kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-init.o
26kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-irqfd.o
27kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v2.o
28kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v3.o
29kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio.o
30kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v2.o
31kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v3.o
32kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-kvm-device.o
33else
23kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic.o 34kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic.o
24kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2.o 35kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2.o
25kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2-emul.o 36kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2-emul.o
26kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o 37kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o
27kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o 38kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o
39endif
28kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o 40kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
29kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o 41kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index 4d1ac81870d2..e9e0e6db73f6 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -162,7 +162,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
162 esr |= (ESR_ELx_EC_IABT_CUR << ESR_ELx_EC_SHIFT); 162 esr |= (ESR_ELx_EC_IABT_CUR << ESR_ELx_EC_SHIFT);
163 163
164 if (!is_iabt) 164 if (!is_iabt)
165 esr |= ESR_ELx_EC_DABT_LOW; 165 esr |= ESR_ELx_EC_DABT_LOW << ESR_ELx_EC_SHIFT;
166 166
167 vcpu_sys_reg(vcpu, ESR_EL1) = esr | ESR_ELx_FSC_EXTABT; 167 vcpu_sys_reg(vcpu, ESR_EL1) = esr | ESR_ELx_FSC_EXTABT;
168} 168}
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index b9e9bb2c6089..3725e145aa58 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -2,10 +2,12 @@
2#define _UAPI__SVM_H 2#define _UAPI__SVM_H
3 3
4#define SVM_EXIT_READ_CR0 0x000 4#define SVM_EXIT_READ_CR0 0x000
5#define SVM_EXIT_READ_CR2 0x002
5#define SVM_EXIT_READ_CR3 0x003 6#define SVM_EXIT_READ_CR3 0x003
6#define SVM_EXIT_READ_CR4 0x004 7#define SVM_EXIT_READ_CR4 0x004
7#define SVM_EXIT_READ_CR8 0x008 8#define SVM_EXIT_READ_CR8 0x008
8#define SVM_EXIT_WRITE_CR0 0x010 9#define SVM_EXIT_WRITE_CR0 0x010
10#define SVM_EXIT_WRITE_CR2 0x012
9#define SVM_EXIT_WRITE_CR3 0x013 11#define SVM_EXIT_WRITE_CR3 0x013
10#define SVM_EXIT_WRITE_CR4 0x014 12#define SVM_EXIT_WRITE_CR4 0x014
11#define SVM_EXIT_WRITE_CR8 0x018 13#define SVM_EXIT_WRITE_CR8 0x018
@@ -80,10 +82,12 @@
80 82
81#define SVM_EXIT_REASONS \ 83#define SVM_EXIT_REASONS \
82 { SVM_EXIT_READ_CR0, "read_cr0" }, \ 84 { SVM_EXIT_READ_CR0, "read_cr0" }, \
85 { SVM_EXIT_READ_CR2, "read_cr2" }, \
83 { SVM_EXIT_READ_CR3, "read_cr3" }, \ 86 { SVM_EXIT_READ_CR3, "read_cr3" }, \
84 { SVM_EXIT_READ_CR4, "read_cr4" }, \ 87 { SVM_EXIT_READ_CR4, "read_cr4" }, \
85 { SVM_EXIT_READ_CR8, "read_cr8" }, \ 88 { SVM_EXIT_READ_CR8, "read_cr8" }, \
86 { SVM_EXIT_WRITE_CR0, "write_cr0" }, \ 89 { SVM_EXIT_WRITE_CR0, "write_cr0" }, \
90 { SVM_EXIT_WRITE_CR2, "write_cr2" }, \
87 { SVM_EXIT_WRITE_CR3, "write_cr3" }, \ 91 { SVM_EXIT_WRITE_CR3, "write_cr3" }, \
88 { SVM_EXIT_WRITE_CR4, "write_cr4" }, \ 92 { SVM_EXIT_WRITE_CR4, "write_cr4" }, \
89 { SVM_EXIT_WRITE_CR8, "write_cr8" }, \ 93 { SVM_EXIT_WRITE_CR8, "write_cr8" }, \
@@ -91,26 +95,57 @@
91 { SVM_EXIT_READ_DR1, "read_dr1" }, \ 95 { SVM_EXIT_READ_DR1, "read_dr1" }, \
92 { SVM_EXIT_READ_DR2, "read_dr2" }, \ 96 { SVM_EXIT_READ_DR2, "read_dr2" }, \
93 { SVM_EXIT_READ_DR3, "read_dr3" }, \ 97 { SVM_EXIT_READ_DR3, "read_dr3" }, \
98 { SVM_EXIT_READ_DR4, "read_dr4" }, \
99 { SVM_EXIT_READ_DR5, "read_dr5" }, \
100 { SVM_EXIT_READ_DR6, "read_dr6" }, \
101 { SVM_EXIT_READ_DR7, "read_dr7" }, \
94 { SVM_EXIT_WRITE_DR0, "write_dr0" }, \ 102 { SVM_EXIT_WRITE_DR0, "write_dr0" }, \
95 { SVM_EXIT_WRITE_DR1, "write_dr1" }, \ 103 { SVM_EXIT_WRITE_DR1, "write_dr1" }, \
96 { SVM_EXIT_WRITE_DR2, "write_dr2" }, \ 104 { SVM_EXIT_WRITE_DR2, "write_dr2" }, \
97 { SVM_EXIT_WRITE_DR3, "write_dr3" }, \ 105 { SVM_EXIT_WRITE_DR3, "write_dr3" }, \
106 { SVM_EXIT_WRITE_DR4, "write_dr4" }, \
98 { SVM_EXIT_WRITE_DR5, "write_dr5" }, \ 107 { SVM_EXIT_WRITE_DR5, "write_dr5" }, \
108 { SVM_EXIT_WRITE_DR6, "write_dr6" }, \
99 { SVM_EXIT_WRITE_DR7, "write_dr7" }, \ 109 { SVM_EXIT_WRITE_DR7, "write_dr7" }, \
110 { SVM_EXIT_EXCP_BASE + DE_VECTOR, "DE excp" }, \
100 { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, \ 111 { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, \
101 { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, \ 112 { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, \
113 { SVM_EXIT_EXCP_BASE + OF_VECTOR, "OF excp" }, \
114 { SVM_EXIT_EXCP_BASE + BR_VECTOR, "BR excp" }, \
102 { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \ 115 { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \
103 { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \
104 { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \ 116 { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \
117 { SVM_EXIT_EXCP_BASE + DF_VECTOR, "DF excp" }, \
118 { SVM_EXIT_EXCP_BASE + TS_VECTOR, "TS excp" }, \
119 { SVM_EXIT_EXCP_BASE + NP_VECTOR, "NP excp" }, \
120 { SVM_EXIT_EXCP_BASE + SS_VECTOR, "SS excp" }, \
121 { SVM_EXIT_EXCP_BASE + GP_VECTOR, "GP excp" }, \
122 { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \
123 { SVM_EXIT_EXCP_BASE + MF_VECTOR, "MF excp" }, \
105 { SVM_EXIT_EXCP_BASE + AC_VECTOR, "AC excp" }, \ 124 { SVM_EXIT_EXCP_BASE + AC_VECTOR, "AC excp" }, \
106 { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \ 125 { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \
126 { SVM_EXIT_EXCP_BASE + XM_VECTOR, "XF excp" }, \
107 { SVM_EXIT_INTR, "interrupt" }, \ 127 { SVM_EXIT_INTR, "interrupt" }, \
108 { SVM_EXIT_NMI, "nmi" }, \ 128 { SVM_EXIT_NMI, "nmi" }, \
109 { SVM_EXIT_SMI, "smi" }, \ 129 { SVM_EXIT_SMI, "smi" }, \
110 { SVM_EXIT_INIT, "init" }, \ 130 { SVM_EXIT_INIT, "init" }, \
111 { SVM_EXIT_VINTR, "vintr" }, \ 131 { SVM_EXIT_VINTR, "vintr" }, \
112 { SVM_EXIT_CR0_SEL_WRITE, "cr0_sel_write" }, \ 132 { SVM_EXIT_CR0_SEL_WRITE, "cr0_sel_write" }, \
133 { SVM_EXIT_IDTR_READ, "read_idtr" }, \
134 { SVM_EXIT_GDTR_READ, "read_gdtr" }, \
135 { SVM_EXIT_LDTR_READ, "read_ldtr" }, \
136 { SVM_EXIT_TR_READ, "read_rt" }, \
137 { SVM_EXIT_IDTR_WRITE, "write_idtr" }, \
138 { SVM_EXIT_GDTR_WRITE, "write_gdtr" }, \
139 { SVM_EXIT_LDTR_WRITE, "write_ldtr" }, \
140 { SVM_EXIT_TR_WRITE, "write_rt" }, \
141 { SVM_EXIT_RDTSC, "rdtsc" }, \
142 { SVM_EXIT_RDPMC, "rdpmc" }, \
143 { SVM_EXIT_PUSHF, "pushf" }, \
144 { SVM_EXIT_POPF, "popf" }, \
113 { SVM_EXIT_CPUID, "cpuid" }, \ 145 { SVM_EXIT_CPUID, "cpuid" }, \
146 { SVM_EXIT_RSM, "rsm" }, \
147 { SVM_EXIT_IRET, "iret" }, \
148 { SVM_EXIT_SWINT, "swint" }, \
114 { SVM_EXIT_INVD, "invd" }, \ 149 { SVM_EXIT_INVD, "invd" }, \
115 { SVM_EXIT_PAUSE, "pause" }, \ 150 { SVM_EXIT_PAUSE, "pause" }, \
116 { SVM_EXIT_HLT, "hlt" }, \ 151 { SVM_EXIT_HLT, "hlt" }, \
@@ -119,6 +154,7 @@
119 { SVM_EXIT_IOIO, "io" }, \ 154 { SVM_EXIT_IOIO, "io" }, \
120 { SVM_EXIT_MSR, "msr" }, \ 155 { SVM_EXIT_MSR, "msr" }, \
121 { SVM_EXIT_TASK_SWITCH, "task_switch" }, \ 156 { SVM_EXIT_TASK_SWITCH, "task_switch" }, \
157 { SVM_EXIT_FERR_FREEZE, "ferr_freeze" }, \
122 { SVM_EXIT_SHUTDOWN, "shutdown" }, \ 158 { SVM_EXIT_SHUTDOWN, "shutdown" }, \
123 { SVM_EXIT_VMRUN, "vmrun" }, \ 159 { SVM_EXIT_VMRUN, "vmrun" }, \
124 { SVM_EXIT_VMMCALL, "hypercall" }, \ 160 { SVM_EXIT_VMMCALL, "hypercall" }, \
@@ -127,14 +163,16 @@
127 { SVM_EXIT_STGI, "stgi" }, \ 163 { SVM_EXIT_STGI, "stgi" }, \
128 { SVM_EXIT_CLGI, "clgi" }, \ 164 { SVM_EXIT_CLGI, "clgi" }, \
129 { SVM_EXIT_SKINIT, "skinit" }, \ 165 { SVM_EXIT_SKINIT, "skinit" }, \
166 { SVM_EXIT_RDTSCP, "rdtscp" }, \
167 { SVM_EXIT_ICEBP, "icebp" }, \
130 { SVM_EXIT_WBINVD, "wbinvd" }, \ 168 { SVM_EXIT_WBINVD, "wbinvd" }, \
131 { SVM_EXIT_MONITOR, "monitor" }, \ 169 { SVM_EXIT_MONITOR, "monitor" }, \
132 { SVM_EXIT_MWAIT, "mwait" }, \ 170 { SVM_EXIT_MWAIT, "mwait" }, \
133 { SVM_EXIT_XSETBV, "xsetbv" }, \ 171 { SVM_EXIT_XSETBV, "xsetbv" }, \
134 { SVM_EXIT_NPF, "npf" }, \ 172 { SVM_EXIT_NPF, "npf" }, \
135 { SVM_EXIT_RSM, "rsm" }, \
136 { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \ 173 { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \
137 { SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" } 174 { SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" }, \
175 { SVM_EXIT_ERR, "invalid_guest_state" }
138 176
139 177
140#endif /* _UAPI__SVM_H */ 178#endif /* _UAPI__SVM_H */
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 2214214c786b..1163e8173e5a 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -84,7 +84,7 @@ MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
84#define TSC_RATIO_MIN 0x0000000000000001ULL 84#define TSC_RATIO_MIN 0x0000000000000001ULL
85#define TSC_RATIO_MAX 0x000000ffffffffffULL 85#define TSC_RATIO_MAX 0x000000ffffffffffULL
86 86
87#define AVIC_HPA_MASK ~((0xFFFULL << 52) || 0xFFF) 87#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
88 88
89/* 89/*
90 * 0xff is broadcast, so the max index allowed for physical APIC ID 90 * 0xff is broadcast, so the max index allowed for physical APIC ID
@@ -3597,7 +3597,7 @@ static int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
3597 u32 icrh = svm->vmcb->control.exit_info_1 >> 32; 3597 u32 icrh = svm->vmcb->control.exit_info_1 >> 32;
3598 u32 icrl = svm->vmcb->control.exit_info_1; 3598 u32 icrl = svm->vmcb->control.exit_info_1;
3599 u32 id = svm->vmcb->control.exit_info_2 >> 32; 3599 u32 id = svm->vmcb->control.exit_info_2 >> 32;
3600 u32 index = svm->vmcb->control.exit_info_2 && 0xFF; 3600 u32 index = svm->vmcb->control.exit_info_2 & 0xFF;
3601 struct kvm_lapic *apic = svm->vcpu.arch.apic; 3601 struct kvm_lapic *apic = svm->vcpu.arch.apic;
3602 3602
3603 trace_kvm_avic_incomplete_ipi(svm->vcpu.vcpu_id, icrh, icrl, id, index); 3603 trace_kvm_avic_incomplete_ipi(svm->vcpu.vcpu_id, icrh, icrl, id, index);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e605d1ed334f..fb93010beaa4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2418,7 +2418,9 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
2418 2418
2419 if (is_guest_mode(vcpu)) 2419 if (is_guest_mode(vcpu))
2420 msr_bitmap = vmx_msr_bitmap_nested; 2420 msr_bitmap = vmx_msr_bitmap_nested;
2421 else if (vcpu->arch.apic_base & X2APIC_ENABLE) { 2421 else if (cpu_has_secondary_exec_ctrls() &&
2422 (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
2423 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
2422 if (is_long_mode(vcpu)) 2424 if (is_long_mode(vcpu))
2423 msr_bitmap = vmx_msr_bitmap_longmode_x2apic; 2425 msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
2424 else 2426 else
@@ -4787,6 +4789,19 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
4787 struct vcpu_vmx *vmx = to_vmx(vcpu); 4789 struct vcpu_vmx *vmx = to_vmx(vcpu);
4788 4790
4789 vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx)); 4791 vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
4792 if (cpu_has_secondary_exec_ctrls()) {
4793 if (kvm_vcpu_apicv_active(vcpu))
4794 vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
4795 SECONDARY_EXEC_APIC_REGISTER_VIRT |
4796 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
4797 else
4798 vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
4799 SECONDARY_EXEC_APIC_REGISTER_VIRT |
4800 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
4801 }
4802
4803 if (cpu_has_vmx_msr_bitmap())
4804 vmx_set_msr_bitmap(vcpu);
4790} 4805}
4791 4806
4792static u32 vmx_exec_control(struct vcpu_vmx *vmx) 4807static u32 vmx_exec_control(struct vcpu_vmx *vmx)
@@ -6333,23 +6348,20 @@ static __init int hardware_setup(void)
6333 6348
6334 set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ 6349 set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
6335 6350
6336 if (enable_apicv) { 6351 for (msr = 0x800; msr <= 0x8ff; msr++)
6337 for (msr = 0x800; msr <= 0x8ff; msr++) 6352 vmx_disable_intercept_msr_read_x2apic(msr);
6338 vmx_disable_intercept_msr_read_x2apic(msr); 6353
6339 6354 /* According SDM, in x2apic mode, the whole id reg is used. But in
6340 /* According SDM, in x2apic mode, the whole id reg is used. 6355 * KVM, it only use the highest eight bits. Need to intercept it */
6341 * But in KVM, it only use the highest eight bits. Need to 6356 vmx_enable_intercept_msr_read_x2apic(0x802);
6342 * intercept it */ 6357 /* TMCCT */
6343 vmx_enable_intercept_msr_read_x2apic(0x802); 6358 vmx_enable_intercept_msr_read_x2apic(0x839);
6344 /* TMCCT */ 6359 /* TPR */
6345 vmx_enable_intercept_msr_read_x2apic(0x839); 6360 vmx_disable_intercept_msr_write_x2apic(0x808);
6346 /* TPR */ 6361 /* EOI */
6347 vmx_disable_intercept_msr_write_x2apic(0x808); 6362 vmx_disable_intercept_msr_write_x2apic(0x80b);
6348 /* EOI */ 6363 /* SELF-IPI */
6349 vmx_disable_intercept_msr_write_x2apic(0x80b); 6364 vmx_disable_intercept_msr_write_x2apic(0x83f);
6350 /* SELF-IPI */
6351 vmx_disable_intercept_msr_write_x2apic(0x83f);
6352 }
6353 6365
6354 if (enable_ept) { 6366 if (enable_ept) {
6355 kvm_mmu_set_mask_ptes(0ull, 6367 kvm_mmu_set_mask_ptes(0ull,
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index b651aed9dc6b..dda39d8fa189 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -24,9 +24,6 @@
24#include <linux/workqueue.h> 24#include <linux/workqueue.h>
25 25
26struct arch_timer_kvm { 26struct arch_timer_kvm {
27 /* Is the timer enabled */
28 bool enabled;
29
30 /* Virtual offset */ 27 /* Virtual offset */
31 cycle_t cntvoff; 28 cycle_t cntvoff;
32}; 29};
@@ -53,15 +50,15 @@ struct arch_timer_cpu {
53 /* Timer IRQ */ 50 /* Timer IRQ */
54 struct kvm_irq_level irq; 51 struct kvm_irq_level irq;
55 52
56 /* VGIC mapping */
57 struct irq_phys_map *map;
58
59 /* Active IRQ state caching */ 53 /* Active IRQ state caching */
60 bool active_cleared_last; 54 bool active_cleared_last;
55
56 /* Is the timer enabled */
57 bool enabled;
61}; 58};
62 59
63int kvm_timer_hyp_init(void); 60int kvm_timer_hyp_init(void);
64void kvm_timer_enable(struct kvm *kvm); 61int kvm_timer_enable(struct kvm_vcpu *vcpu);
65void kvm_timer_init(struct kvm *kvm); 62void kvm_timer_init(struct kvm *kvm);
66int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, 63int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
67 const struct kvm_irq_level *irq); 64 const struct kvm_irq_level *irq);
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index be6037aa703d..da0a524802cb 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -19,6 +19,10 @@
19#ifndef __ASM_ARM_KVM_VGIC_H 19#ifndef __ASM_ARM_KVM_VGIC_H
20#define __ASM_ARM_KVM_VGIC_H 20#define __ASM_ARM_KVM_VGIC_H
21 21
22#ifdef CONFIG_KVM_NEW_VGIC
23#include <kvm/vgic/vgic.h>
24#else
25
22#include <linux/kernel.h> 26#include <linux/kernel.h>
23#include <linux/kvm.h> 27#include <linux/kvm.h>
24#include <linux/irqreturn.h> 28#include <linux/irqreturn.h>
@@ -158,7 +162,6 @@ struct vgic_io_device {
158struct irq_phys_map { 162struct irq_phys_map {
159 u32 virt_irq; 163 u32 virt_irq;
160 u32 phys_irq; 164 u32 phys_irq;
161 u32 irq;
162}; 165};
163 166
164struct irq_phys_map_entry { 167struct irq_phys_map_entry {
@@ -305,9 +308,6 @@ struct vgic_cpu {
305 unsigned long *active_shared; 308 unsigned long *active_shared;
306 unsigned long *pend_act_shared; 309 unsigned long *pend_act_shared;
307 310
308 /* Number of list registers on this CPU */
309 int nr_lr;
310
311 /* CPU vif control registers for world switch */ 311 /* CPU vif control registers for world switch */
312 union { 312 union {
313 struct vgic_v2_cpu_if vgic_v2; 313 struct vgic_v2_cpu_if vgic_v2;
@@ -342,17 +342,18 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
342int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, 342int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
343 bool level); 343 bool level);
344int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, 344int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid,
345 struct irq_phys_map *map, bool level); 345 unsigned int virt_irq, bool level);
346void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg); 346void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
347int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); 347int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
348struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, 348int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, int virt_irq, int phys_irq);
349 int virt_irq, int irq); 349int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq);
350int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map); 350bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq);
351bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, struct irq_phys_map *map);
352 351
353#define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) 352#define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel))
354#define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus)) 353#define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus))
355#define vgic_ready(k) ((k)->arch.vgic.ready) 354#define vgic_ready(k) ((k)->arch.vgic.ready)
355#define vgic_valid_spi(k, i) (((i) >= VGIC_NR_PRIVATE_IRQS) && \
356 ((i) < (k)->arch.vgic.nr_irqs))
356 357
357int vgic_v2_probe(const struct gic_kvm_info *gic_kvm_info, 358int vgic_v2_probe(const struct gic_kvm_info *gic_kvm_info,
358 const struct vgic_ops **ops, 359 const struct vgic_ops **ops,
@@ -370,4 +371,5 @@ static inline int vgic_v3_probe(const struct gic_kvm_info *gic_kvm_info,
370} 371}
371#endif 372#endif
372 373
374#endif /* old VGIC include */
373#endif 375#endif
diff --git a/include/kvm/vgic/vgic.h b/include/kvm/vgic/vgic.h
new file mode 100644
index 000000000000..3fbd175265ae
--- /dev/null
+++ b/include/kvm/vgic/vgic.h
@@ -0,0 +1,246 @@
1/*
2 * Copyright (C) 2015, 2016 ARM Ltd.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16#ifndef __ASM_ARM_KVM_VGIC_VGIC_H
17#define __ASM_ARM_KVM_VGIC_VGIC_H
18
19#include <linux/kernel.h>
20#include <linux/kvm.h>
21#include <linux/irqreturn.h>
22#include <linux/spinlock.h>
23#include <linux/types.h>
24#include <kvm/iodev.h>
25
26#define VGIC_V3_MAX_CPUS 255
27#define VGIC_V2_MAX_CPUS 8
28#define VGIC_NR_IRQS_LEGACY 256
29#define VGIC_NR_SGIS 16
30#define VGIC_NR_PPIS 16
31#define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS)
32#define VGIC_MAX_PRIVATE (VGIC_NR_PRIVATE_IRQS - 1)
33#define VGIC_MAX_SPI 1019
34#define VGIC_MAX_RESERVED 1023
35#define VGIC_MIN_LPI 8192
36
37enum vgic_type {
38 VGIC_V2, /* Good ol' GICv2 */
39 VGIC_V3, /* New fancy GICv3 */
40};
41
42/* same for all guests, as depending only on the _host's_ GIC model */
43struct vgic_global {
44 /* type of the host GIC */
45 enum vgic_type type;
46
47 /* Physical address of vgic virtual cpu interface */
48 phys_addr_t vcpu_base;
49
50 /* virtual control interface mapping */
51 void __iomem *vctrl_base;
52
53 /* Number of implemented list registers */
54 int nr_lr;
55
56 /* Maintenance IRQ number */
57 unsigned int maint_irq;
58
59 /* maximum number of VCPUs allowed (GICv2 limits us to 8) */
60 int max_gic_vcpus;
61
62 /* Only needed for the legacy KVM_CREATE_IRQCHIP */
63 bool can_emulate_gicv2;
64};
65
66extern struct vgic_global kvm_vgic_global_state;
67
68#define VGIC_V2_MAX_LRS (1 << 6)
69#define VGIC_V3_MAX_LRS 16
70#define VGIC_V3_LR_INDEX(lr) (VGIC_V3_MAX_LRS - 1 - lr)
71
72enum vgic_irq_config {
73 VGIC_CONFIG_EDGE = 0,
74 VGIC_CONFIG_LEVEL
75};
76
77struct vgic_irq {
78 spinlock_t irq_lock; /* Protects the content of the struct */
79 struct list_head ap_list;
80
81 struct kvm_vcpu *vcpu; /* SGIs and PPIs: The VCPU
82 * SPIs and LPIs: The VCPU whose ap_list
83 * this is queued on.
84 */
85
86 struct kvm_vcpu *target_vcpu; /* The VCPU that this interrupt should
87 * be sent to, as a result of the
88 * targets reg (v2) or the
89 * affinity reg (v3).
90 */
91
92 u32 intid; /* Guest visible INTID */
93 bool pending;
94 bool line_level; /* Level only */
95 bool soft_pending; /* Level only */
96 bool active; /* not used for LPIs */
97 bool enabled;
98 bool hw; /* Tied to HW IRQ */
99 u32 hwintid; /* HW INTID number */
100 union {
101 u8 targets; /* GICv2 target VCPUs mask */
102 u32 mpidr; /* GICv3 target VCPU */
103 };
104 u8 source; /* GICv2 SGIs only */
105 u8 priority;
106 enum vgic_irq_config config; /* Level or edge */
107};
108
109struct vgic_register_region;
110
111struct vgic_io_device {
112 gpa_t base_addr;
113 struct kvm_vcpu *redist_vcpu;
114 const struct vgic_register_region *regions;
115 int nr_regions;
116 struct kvm_io_device dev;
117};
118
119struct vgic_dist {
120 bool in_kernel;
121 bool ready;
122 bool initialized;
123
124 /* vGIC model the kernel emulates for the guest (GICv2 or GICv3) */
125 u32 vgic_model;
126
127 int nr_spis;
128
129 /* TODO: Consider moving to global state */
130 /* Virtual control interface mapping */
131 void __iomem *vctrl_base;
132
133 /* base addresses in guest physical address space: */
134 gpa_t vgic_dist_base; /* distributor */
135 union {
136 /* either a GICv2 CPU interface */
137 gpa_t vgic_cpu_base;
138 /* or a number of GICv3 redistributor regions */
139 gpa_t vgic_redist_base;
140 };
141
142 /* distributor enabled */
143 bool enabled;
144
145 struct vgic_irq *spis;
146
147 struct vgic_io_device dist_iodev;
148 struct vgic_io_device *redist_iodevs;
149};
150
151struct vgic_v2_cpu_if {
152 u32 vgic_hcr;
153 u32 vgic_vmcr;
154 u32 vgic_misr; /* Saved only */
155 u64 vgic_eisr; /* Saved only */
156 u64 vgic_elrsr; /* Saved only */
157 u32 vgic_apr;
158 u32 vgic_lr[VGIC_V2_MAX_LRS];
159};
160
161struct vgic_v3_cpu_if {
162#ifdef CONFIG_KVM_ARM_VGIC_V3
163 u32 vgic_hcr;
164 u32 vgic_vmcr;
165 u32 vgic_sre; /* Restored only, change ignored */
166 u32 vgic_misr; /* Saved only */
167 u32 vgic_eisr; /* Saved only */
168 u32 vgic_elrsr; /* Saved only */
169 u32 vgic_ap0r[4];
170 u32 vgic_ap1r[4];
171 u64 vgic_lr[VGIC_V3_MAX_LRS];
172#endif
173};
174
175struct vgic_cpu {
176 /* CPU vif control registers for world switch */
177 union {
178 struct vgic_v2_cpu_if vgic_v2;
179 struct vgic_v3_cpu_if vgic_v3;
180 };
181
182 unsigned int used_lrs;
183 struct vgic_irq private_irqs[VGIC_NR_PRIVATE_IRQS];
184
185 spinlock_t ap_list_lock; /* Protects the ap_list */
186
187 /*
188 * List of IRQs that this VCPU should consider because they are either
189 * Active or Pending (hence the name; AP list), or because they recently
190 * were one of the two and need to be migrated off this list to another
191 * VCPU.
192 */
193 struct list_head ap_list_head;
194
195 u64 live_lrs;
196};
197
198int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
199void kvm_vgic_early_init(struct kvm *kvm);
200int kvm_vgic_create(struct kvm *kvm, u32 type);
201void kvm_vgic_destroy(struct kvm *kvm);
202void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu);
203void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
204int kvm_vgic_map_resources(struct kvm *kvm);
205int kvm_vgic_hyp_init(void);
206
207int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
208 bool level);
209int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, unsigned int intid,
210 bool level);
211int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq);
212int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq);
213bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq);
214
215int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
216
217#define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel))
218#define vgic_initialized(k) ((k)->arch.vgic.initialized)
219#define vgic_ready(k) ((k)->arch.vgic.ready)
220#define vgic_valid_spi(k, i) (((i) >= VGIC_NR_PRIVATE_IRQS) && \
221 ((i) < (k)->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS))
222
223bool kvm_vcpu_has_pending_irqs(struct kvm_vcpu *vcpu);
224void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
225void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
226
227#ifdef CONFIG_KVM_ARM_VGIC_V3
228void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
229#else
230static inline void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg)
231{
232}
233#endif
234
235/**
236 * kvm_vgic_get_max_vcpus - Get the maximum number of VCPUs allowed by HW
237 *
238 * The host's GIC naturally limits the maximum amount of VCPUs a guest
239 * can use.
240 */
241static inline int kvm_vgic_get_max_vcpus(void)
242{
243 return kvm_vgic_global_state.max_gic_vcpus;
244}
245
246#endif /* __ASM_ARM_KVM_VGIC_VGIC_H */
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 9e6fdd33bdb2..bfbd707de390 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -273,6 +273,12 @@
273#define ICH_LR_ACTIVE_BIT (1ULL << 63) 273#define ICH_LR_ACTIVE_BIT (1ULL << 63)
274#define ICH_LR_PHYS_ID_SHIFT 32 274#define ICH_LR_PHYS_ID_SHIFT 32
275#define ICH_LR_PHYS_ID_MASK (0x3ffULL << ICH_LR_PHYS_ID_SHIFT) 275#define ICH_LR_PHYS_ID_MASK (0x3ffULL << ICH_LR_PHYS_ID_SHIFT)
276#define ICH_LR_PRIORITY_SHIFT 48
277
278/* These are for GICv2 emulation only */
279#define GICH_LR_VIRTUALID (0x3ffUL << 0)
280#define GICH_LR_PHYSID_CPUID_SHIFT (10)
281#define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT)
276 282
277#define ICH_MISR_EOI (1 << 0) 283#define ICH_MISR_EOI (1 << 0)
278#define ICH_MISR_U (1 << 1) 284#define ICH_MISR_U (1 << 1)
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 9c940263ca23..fd051855539b 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -33,6 +33,7 @@
33 33
34#define GIC_DIST_CTRL 0x000 34#define GIC_DIST_CTRL 0x000
35#define GIC_DIST_CTR 0x004 35#define GIC_DIST_CTR 0x004
36#define GIC_DIST_IIDR 0x008
36#define GIC_DIST_IGROUP 0x080 37#define GIC_DIST_IGROUP 0x080
37#define GIC_DIST_ENABLE_SET 0x100 38#define GIC_DIST_ENABLE_SET 0x100
38#define GIC_DIST_ENABLE_CLEAR 0x180 39#define GIC_DIST_ENABLE_CLEAR 0x180
@@ -76,6 +77,7 @@
76#define GICH_LR_VIRTUALID (0x3ff << 0) 77#define GICH_LR_VIRTUALID (0x3ff << 0)
77#define GICH_LR_PHYSID_CPUID_SHIFT (10) 78#define GICH_LR_PHYSID_CPUID_SHIFT (10)
78#define GICH_LR_PHYSID_CPUID (0x3ff << GICH_LR_PHYSID_CPUID_SHIFT) 79#define GICH_LR_PHYSID_CPUID (0x3ff << GICH_LR_PHYSID_CPUID_SHIFT)
80#define GICH_LR_PRIORITY_SHIFT 23
79#define GICH_LR_STATE (3 << 28) 81#define GICH_LR_STATE (3 << 28)
80#define GICH_LR_PENDING_BIT (1 << 28) 82#define GICH_LR_PENDING_BIT (1 << 28)
81#define GICH_LR_ACTIVE_BIT (1 << 29) 83#define GICH_LR_ACTIVE_BIT (1 << 29)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b1fa8f11c95b..1c9c973a7dd9 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -412,6 +412,8 @@ struct kvm {
412#endif 412#endif
413 long tlbs_dirty; 413 long tlbs_dirty;
414 struct list_head devices; 414 struct list_head devices;
415 struct dentry *debugfs_dentry;
416 struct kvm_stat_data **debugfs_stat_data;
415}; 417};
416 418
417#define kvm_err(fmt, ...) \ 419#define kvm_err(fmt, ...) \
@@ -991,6 +993,11 @@ enum kvm_stat_kind {
991 KVM_STAT_VCPU, 993 KVM_STAT_VCPU,
992}; 994};
993 995
996struct kvm_stat_data {
997 int offset;
998 struct kvm *kvm;
999};
1000
994struct kvm_stats_debugfs_item { 1001struct kvm_stats_debugfs_item {
995 const char *name; 1002 const char *name;
996 int offset; 1003 int offset;
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index 526fb3d2e43a..f28292d73ddb 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -108,7 +108,7 @@ TRACE_EVENT(kvm_ioapic_set_irq,
108 __entry->coalesced = coalesced; 108 __entry->coalesced = coalesced;
109 ), 109 ),
110 110
111 TP_printk("pin %u dst %x vec=%u (%s|%s|%s%s)%s", 111 TP_printk("pin %u dst %x vec %u (%s|%s|%s%s)%s",
112 __entry->pin, (u8)(__entry->e >> 56), (u8)__entry->e, 112 __entry->pin, (u8)(__entry->e >> 56), (u8)__entry->e,
113 __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode), 113 __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode),
114 (__entry->e & (1<<11)) ? "logical" : "physical", 114 (__entry->e & (1<<11)) ? "logical" : "physical",
@@ -129,7 +129,7 @@ TRACE_EVENT(kvm_ioapic_delayed_eoi_inj,
129 __entry->e = e; 129 __entry->e = e;
130 ), 130 ),
131 131
132 TP_printk("dst %x vec=%u (%s|%s|%s%s)", 132 TP_printk("dst %x vec %u (%s|%s|%s%s)",
133 (u8)(__entry->e >> 56), (u8)__entry->e, 133 (u8)(__entry->e >> 56), (u8)__entry->e,
134 __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode), 134 __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode),
135 (__entry->e & (1<<11)) ? "logical" : "physical", 135 (__entry->e & (1<<11)) ? "logical" : "physical",
@@ -151,7 +151,7 @@ TRACE_EVENT(kvm_msi_set_irq,
151 __entry->data = data; 151 __entry->data = data;
152 ), 152 ),
153 153
154 TP_printk("dst %u vec %x (%s|%s|%s%s)", 154 TP_printk("dst %u vec %u (%s|%s|%s%s)",
155 (u8)(__entry->address >> 12), (u8)__entry->data, 155 (u8)(__entry->address >> 12), (u8)__entry->data,
156 __print_symbolic((__entry->data >> 8 & 0x7), kvm_deliver_mode), 156 __print_symbolic((__entry->data >> 8 & 0x7), kvm_deliver_mode),
157 (__entry->address & (1<<2)) ? "logical" : "physical", 157 (__entry->address & (1<<2)) ? "logical" : "physical",
diff --git a/tools/Makefile b/tools/Makefile
index 6bf68fe7dd29..f10b64d8c674 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -16,6 +16,7 @@ help:
16 @echo ' gpio - GPIO tools' 16 @echo ' gpio - GPIO tools'
17 @echo ' hv - tools used when in Hyper-V clients' 17 @echo ' hv - tools used when in Hyper-V clients'
18 @echo ' iio - IIO tools' 18 @echo ' iio - IIO tools'
19 @echo ' kvm_stat - top-like utility for displaying kvm statistics'
19 @echo ' lguest - a minimal 32-bit x86 hypervisor' 20 @echo ' lguest - a minimal 32-bit x86 hypervisor'
20 @echo ' net - misc networking tools' 21 @echo ' net - misc networking tools'
21 @echo ' perf - Linux performance measurement and analysis tool' 22 @echo ' perf - Linux performance measurement and analysis tool'
@@ -110,10 +111,13 @@ tmon_install:
110freefall_install: 111freefall_install:
111 $(call descend,laptop/$(@:_install=),install) 112 $(call descend,laptop/$(@:_install=),install)
112 113
114kvm_stat_install:
115 $(call descend,kvm/$(@:_install=),install)
116
113install: acpi_install cgroup_install cpupower_install hv_install firewire_install lguest_install \ 117install: acpi_install cgroup_install cpupower_install hv_install firewire_install lguest_install \
114 perf_install selftests_install turbostat_install usb_install \ 118 perf_install selftests_install turbostat_install usb_install \
115 virtio_install vm_install net_install x86_energy_perf_policy_install \ 119 virtio_install vm_install net_install x86_energy_perf_policy_install \
116 tmon_install freefall_install objtool_install 120 tmon_install freefall_install objtool_install kvm_stat_install
117 121
118acpi_clean: 122acpi_clean:
119 $(call descend,power/acpi,clean) 123 $(call descend,power/acpi,clean)
diff --git a/tools/kvm/kvm_stat/Makefile b/tools/kvm/kvm_stat/Makefile
new file mode 100644
index 000000000000..5b1cba57e3b3
--- /dev/null
+++ b/tools/kvm/kvm_stat/Makefile
@@ -0,0 +1,41 @@
1include ../../scripts/Makefile.include
2include ../../scripts/utilities.mak
3BINDIR=usr/bin
4MANDIR=usr/share/man
5MAN1DIR=$(MANDIR)/man1
6
7MAN1=kvm_stat.1
8
9A2X=a2x
10a2x_path := $(call get-executable,$(A2X))
11
12all: man
13
14ifneq ($(findstring $(MAKEFLAGS),s),s)
15 ifneq ($(V),1)
16 QUIET_A2X = @echo ' A2X '$@;
17 endif
18endif
19
20%.1: %.txt
21ifeq ($(a2x_path),)
22 $(error "You need to install asciidoc for man pages")
23else
24 $(QUIET_A2X)$(A2X) --doctype manpage --format manpage $<
25endif
26
27clean:
28 rm -f $(MAN1)
29
30man: $(MAN1)
31
32install-man: man
33 install -d -m 755 $(INSTALL_ROOT)/$(MAN1DIR)
34 install -m 644 kvm_stat.1 $(INSTALL_ROOT)/$(MAN1DIR)
35
36install-tools:
37 install -d -m 755 $(INSTALL_ROOT)/$(BINDIR)
38 install -m 755 -p "kvm_stat" "$(INSTALL_ROOT)/$(BINDIR)/$(TARGET)"
39
40install: install-tools install-man
41.PHONY: all clean man install-tools install-man install
diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
new file mode 100755
index 000000000000..581278c58488
--- /dev/null
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -0,0 +1,1127 @@
1#!/usr/bin/python
2#
3# top-like utility for displaying kvm statistics
4#
5# Copyright 2006-2008 Qumranet Technologies
6# Copyright 2008-2011 Red Hat, Inc.
7#
8# Authors:
9# Avi Kivity <avi@redhat.com>
10#
11# This work is licensed under the terms of the GNU GPL, version 2. See
12# the COPYING file in the top-level directory.
13"""The kvm_stat module outputs statistics about running KVM VMs
14
15Three different ways of output formatting are available:
16- as a top-like text ui
17- in a key -> value format
18- in an all keys, all values format
19
20The data is sampled from the KVM's debugfs entries and its perf events.
21"""
22
23import curses
24import sys
25import os
26import time
27import optparse
28import ctypes
29import fcntl
30import resource
31import struct
32import re
33from collections import defaultdict
34from time import sleep
35
36VMX_EXIT_REASONS = {
37 'EXCEPTION_NMI': 0,
38 'EXTERNAL_INTERRUPT': 1,
39 'TRIPLE_FAULT': 2,
40 'PENDING_INTERRUPT': 7,
41 'NMI_WINDOW': 8,
42 'TASK_SWITCH': 9,
43 'CPUID': 10,
44 'HLT': 12,
45 'INVLPG': 14,
46 'RDPMC': 15,
47 'RDTSC': 16,
48 'VMCALL': 18,
49 'VMCLEAR': 19,
50 'VMLAUNCH': 20,
51 'VMPTRLD': 21,
52 'VMPTRST': 22,
53 'VMREAD': 23,
54 'VMRESUME': 24,
55 'VMWRITE': 25,
56 'VMOFF': 26,
57 'VMON': 27,
58 'CR_ACCESS': 28,
59 'DR_ACCESS': 29,
60 'IO_INSTRUCTION': 30,
61 'MSR_READ': 31,
62 'MSR_WRITE': 32,
63 'INVALID_STATE': 33,
64 'MWAIT_INSTRUCTION': 36,
65 'MONITOR_INSTRUCTION': 39,
66 'PAUSE_INSTRUCTION': 40,
67 'MCE_DURING_VMENTRY': 41,
68 'TPR_BELOW_THRESHOLD': 43,
69 'APIC_ACCESS': 44,
70 'EPT_VIOLATION': 48,
71 'EPT_MISCONFIG': 49,
72 'WBINVD': 54,
73 'XSETBV': 55,
74 'APIC_WRITE': 56,
75 'INVPCID': 58,
76}
77
78SVM_EXIT_REASONS = {
79 'READ_CR0': 0x000,
80 'READ_CR3': 0x003,
81 'READ_CR4': 0x004,
82 'READ_CR8': 0x008,
83 'WRITE_CR0': 0x010,
84 'WRITE_CR3': 0x013,
85 'WRITE_CR4': 0x014,
86 'WRITE_CR8': 0x018,
87 'READ_DR0': 0x020,
88 'READ_DR1': 0x021,
89 'READ_DR2': 0x022,
90 'READ_DR3': 0x023,
91 'READ_DR4': 0x024,
92 'READ_DR5': 0x025,
93 'READ_DR6': 0x026,
94 'READ_DR7': 0x027,
95 'WRITE_DR0': 0x030,
96 'WRITE_DR1': 0x031,
97 'WRITE_DR2': 0x032,
98 'WRITE_DR3': 0x033,
99 'WRITE_DR4': 0x034,
100 'WRITE_DR5': 0x035,
101 'WRITE_DR6': 0x036,
102 'WRITE_DR7': 0x037,
103 'EXCP_BASE': 0x040,
104 'INTR': 0x060,
105 'NMI': 0x061,
106 'SMI': 0x062,
107 'INIT': 0x063,
108 'VINTR': 0x064,
109 'CR0_SEL_WRITE': 0x065,
110 'IDTR_READ': 0x066,
111 'GDTR_READ': 0x067,
112 'LDTR_READ': 0x068,
113 'TR_READ': 0x069,
114 'IDTR_WRITE': 0x06a,
115 'GDTR_WRITE': 0x06b,
116 'LDTR_WRITE': 0x06c,
117 'TR_WRITE': 0x06d,
118 'RDTSC': 0x06e,
119 'RDPMC': 0x06f,
120 'PUSHF': 0x070,
121 'POPF': 0x071,
122 'CPUID': 0x072,
123 'RSM': 0x073,
124 'IRET': 0x074,
125 'SWINT': 0x075,
126 'INVD': 0x076,
127 'PAUSE': 0x077,
128 'HLT': 0x078,
129 'INVLPG': 0x079,
130 'INVLPGA': 0x07a,
131 'IOIO': 0x07b,
132 'MSR': 0x07c,
133 'TASK_SWITCH': 0x07d,
134 'FERR_FREEZE': 0x07e,
135 'SHUTDOWN': 0x07f,
136 'VMRUN': 0x080,
137 'VMMCALL': 0x081,
138 'VMLOAD': 0x082,
139 'VMSAVE': 0x083,
140 'STGI': 0x084,
141 'CLGI': 0x085,
142 'SKINIT': 0x086,
143 'RDTSCP': 0x087,
144 'ICEBP': 0x088,
145 'WBINVD': 0x089,
146 'MONITOR': 0x08a,
147 'MWAIT': 0x08b,
148 'MWAIT_COND': 0x08c,
149 'XSETBV': 0x08d,
150 'NPF': 0x400,
151}
152
153# EC definition of HSR (from arch/arm64/include/asm/kvm_arm.h)
154AARCH64_EXIT_REASONS = {
155 'UNKNOWN': 0x00,
156 'WFI': 0x01,
157 'CP15_32': 0x03,
158 'CP15_64': 0x04,
159 'CP14_MR': 0x05,
160 'CP14_LS': 0x06,
161 'FP_ASIMD': 0x07,
162 'CP10_ID': 0x08,
163 'CP14_64': 0x0C,
164 'ILL_ISS': 0x0E,
165 'SVC32': 0x11,
166 'HVC32': 0x12,
167 'SMC32': 0x13,
168 'SVC64': 0x15,
169 'HVC64': 0x16,
170 'SMC64': 0x17,
171 'SYS64': 0x18,
172 'IABT': 0x20,
173 'IABT_HYP': 0x21,
174 'PC_ALIGN': 0x22,
175 'DABT': 0x24,
176 'DABT_HYP': 0x25,
177 'SP_ALIGN': 0x26,
178 'FP_EXC32': 0x28,
179 'FP_EXC64': 0x2C,
180 'SERROR': 0x2F,
181 'BREAKPT': 0x30,
182 'BREAKPT_HYP': 0x31,
183 'SOFTSTP': 0x32,
184 'SOFTSTP_HYP': 0x33,
185 'WATCHPT': 0x34,
186 'WATCHPT_HYP': 0x35,
187 'BKPT32': 0x38,
188 'VECTOR32': 0x3A,
189 'BRK64': 0x3C,
190}
191
192# From include/uapi/linux/kvm.h, KVM_EXIT_xxx
193USERSPACE_EXIT_REASONS = {
194 'UNKNOWN': 0,
195 'EXCEPTION': 1,
196 'IO': 2,
197 'HYPERCALL': 3,
198 'DEBUG': 4,
199 'HLT': 5,
200 'MMIO': 6,
201 'IRQ_WINDOW_OPEN': 7,
202 'SHUTDOWN': 8,
203 'FAIL_ENTRY': 9,
204 'INTR': 10,
205 'SET_TPR': 11,
206 'TPR_ACCESS': 12,
207 'S390_SIEIC': 13,
208 'S390_RESET': 14,
209 'DCR': 15,
210 'NMI': 16,
211 'INTERNAL_ERROR': 17,
212 'OSI': 18,
213 'PAPR_HCALL': 19,
214 'S390_UCONTROL': 20,
215 'WATCHDOG': 21,
216 'S390_TSCH': 22,
217 'EPR': 23,
218 'SYSTEM_EVENT': 24,
219}
220
221IOCTL_NUMBERS = {
222 'SET_FILTER': 0x40082406,
223 'ENABLE': 0x00002400,
224 'DISABLE': 0x00002401,
225 'RESET': 0x00002403,
226}
227
228class Arch(object):
229 """Encapsulates global architecture specific data.
230
231 Contains the performance event open syscall and ioctl numbers, as
232 well as the VM exit reasons for the architecture it runs on.
233
234 """
235 @staticmethod
236 def get_arch():
237 machine = os.uname()[4]
238
239 if machine.startswith('ppc'):
240 return ArchPPC()
241 elif machine.startswith('aarch64'):
242 return ArchA64()
243 elif machine.startswith('s390'):
244 return ArchS390()
245 else:
246 # X86_64
247 for line in open('/proc/cpuinfo'):
248 if not line.startswith('flags'):
249 continue
250
251 flags = line.split()
252 if 'vmx' in flags:
253 return ArchX86(VMX_EXIT_REASONS)
254 if 'svm' in flags:
255 return ArchX86(SVM_EXIT_REASONS)
256 return
257
258class ArchX86(Arch):
259 def __init__(self, exit_reasons):
260 self.sc_perf_evt_open = 298
261 self.ioctl_numbers = IOCTL_NUMBERS
262 self.exit_reasons = exit_reasons
263
264class ArchPPC(Arch):
265 def __init__(self):
266 self.sc_perf_evt_open = 319
267 self.ioctl_numbers = IOCTL_NUMBERS
268 self.ioctl_numbers['ENABLE'] = 0x20002400
269 self.ioctl_numbers['DISABLE'] = 0x20002401
270 self.ioctl_numbers['RESET'] = 0x20002403
271
272 # PPC comes in 32 and 64 bit and some generated ioctl
273 # numbers depend on the wordsize.
274 char_ptr_size = ctypes.sizeof(ctypes.c_char_p)
275 self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16
276 self.exit_reasons = {}
277
278class ArchA64(Arch):
279 def __init__(self):
280 self.sc_perf_evt_open = 241
281 self.ioctl_numbers = IOCTL_NUMBERS
282 self.exit_reasons = AARCH64_EXIT_REASONS
283
284class ArchS390(Arch):
285 def __init__(self):
286 self.sc_perf_evt_open = 331
287 self.ioctl_numbers = IOCTL_NUMBERS
288 self.exit_reasons = None
289
290ARCH = Arch.get_arch()
291
292
293def walkdir(path):
294 """Returns os.walk() data for specified directory.
295
296 As it is only a wrapper it returns the same 3-tuple of (dirpath,
297 dirnames, filenames).
298 """
299 return next(os.walk(path))
300
301
302def parse_int_list(list_string):
303 """Returns an int list from a string of comma separated integers and
304 integer ranges."""
305 integers = []
306 members = list_string.split(',')
307
308 for member in members:
309 if '-' not in member:
310 integers.append(int(member))
311 else:
312 int_range = member.split('-')
313 integers.extend(range(int(int_range[0]),
314 int(int_range[1]) + 1))
315
316 return integers
317
318
319def get_online_cpus():
320 """Returns a list of cpu id integers."""
321 with open('/sys/devices/system/cpu/online') as cpu_list:
322 cpu_string = cpu_list.readline()
323 return parse_int_list(cpu_string)
324
325
326def get_filters():
327 """Returns a dict of trace events, their filter ids and
328 the values that can be filtered.
329
330 Trace events can be filtered for special values by setting a
331 filter string via an ioctl. The string normally has the format
332 identifier==value. For each filter a new event will be created, to
333 be able to distinguish the events.
334
335 """
336 filters = {}
337 filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS)
338 if ARCH.exit_reasons:
339 filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons)
340 return filters
341
342libc = ctypes.CDLL('libc.so.6', use_errno=True)
343syscall = libc.syscall
344
345class perf_event_attr(ctypes.Structure):
346 """Struct that holds the necessary data to set up a trace event.
347
348 For an extensive explanation see perf_event_open(2) and
349 include/uapi/linux/perf_event.h, struct perf_event_attr
350
351 All fields that are not initialized in the constructor are 0.
352
353 """
354 _fields_ = [('type', ctypes.c_uint32),
355 ('size', ctypes.c_uint32),
356 ('config', ctypes.c_uint64),
357 ('sample_freq', ctypes.c_uint64),
358 ('sample_type', ctypes.c_uint64),
359 ('read_format', ctypes.c_uint64),
360 ('flags', ctypes.c_uint64),
361 ('wakeup_events', ctypes.c_uint32),
362 ('bp_type', ctypes.c_uint32),
363 ('bp_addr', ctypes.c_uint64),
364 ('bp_len', ctypes.c_uint64),
365 ]
366
367 def __init__(self):
368 super(self.__class__, self).__init__()
369 self.type = PERF_TYPE_TRACEPOINT
370 self.size = ctypes.sizeof(self)
371 self.read_format = PERF_FORMAT_GROUP
372
373def perf_event_open(attr, pid, cpu, group_fd, flags):
374 """Wrapper for the sys_perf_evt_open() syscall.
375
376 Used to set up performance events, returns a file descriptor or -1
377 on error.
378
379 Attributes are:
380 - syscall number
381 - struct perf_event_attr *
382 - pid or -1 to monitor all pids
383 - cpu number or -1 to monitor all cpus
384 - The file descriptor of the group leader or -1 to create a group.
385 - flags
386
387 """
388 return syscall(ARCH.sc_perf_evt_open, ctypes.pointer(attr),
389 ctypes.c_int(pid), ctypes.c_int(cpu),
390 ctypes.c_int(group_fd), ctypes.c_long(flags))
391
392PERF_TYPE_TRACEPOINT = 2
393PERF_FORMAT_GROUP = 1 << 3
394
395PATH_DEBUGFS_TRACING = '/sys/kernel/debug/tracing'
396PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm'
397
398class Group(object):
399 """Represents a perf event group."""
400
401 def __init__(self):
402 self.events = []
403
404 def add_event(self, event):
405 self.events.append(event)
406
407 def read(self):
408 """Returns a dict with 'event name: value' for all events in the
409 group.
410
411 Values are read by reading from the file descriptor of the
412 event that is the group leader. See perf_event_open(2) for
413 details.
414
415 Read format for the used event configuration is:
416 struct read_format {
417 u64 nr; /* The number of events */
418 struct {
419 u64 value; /* The value of the event */
420 } values[nr];
421 };
422
423 """
424 length = 8 * (1 + len(self.events))
425 read_format = 'xxxxxxxx' + 'Q' * len(self.events)
426 return dict(zip([event.name for event in self.events],
427 struct.unpack(read_format,
428 os.read(self.events[0].fd, length))))
429
430class Event(object):
431 """Represents a performance event and manages its life cycle."""
432 def __init__(self, name, group, trace_cpu, trace_pid, trace_point,
433 trace_filter, trace_set='kvm'):
434 self.name = name
435 self.fd = None
436 self.setup_event(group, trace_cpu, trace_pid, trace_point,
437 trace_filter, trace_set)
438
439 def __del__(self):
440 """Closes the event's file descriptor.
441
442 As no python file object was created for the file descriptor,
443 python will not reference count the descriptor and will not
444 close it itself automatically, so we do it.
445
446 """
447 if self.fd:
448 os.close(self.fd)
449
450 def setup_event_attribute(self, trace_set, trace_point):
451 """Returns an initialized ctype perf_event_attr struct."""
452
453 id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set,
454 trace_point, 'id')
455
456 event_attr = perf_event_attr()
457 event_attr.config = int(open(id_path).read())
458 return event_attr
459
460 def setup_event(self, group, trace_cpu, trace_pid, trace_point,
461 trace_filter, trace_set):
462 """Sets up the perf event in Linux.
463
464 Issues the syscall to register the event in the kernel and
465 then sets the optional filter.
466
467 """
468
469 event_attr = self.setup_event_attribute(trace_set, trace_point)
470
471 # First event will be group leader.
472 group_leader = -1
473
474 # All others have to pass the leader's descriptor instead.
475 if group.events:
476 group_leader = group.events[0].fd
477
478 fd = perf_event_open(event_attr, trace_pid,
479 trace_cpu, group_leader, 0)
480 if fd == -1:
481 err = ctypes.get_errno()
482 raise OSError(err, os.strerror(err),
483 'while calling sys_perf_event_open().')
484
485 if trace_filter:
486 fcntl.ioctl(fd, ARCH.ioctl_numbers['SET_FILTER'],
487 trace_filter)
488
489 self.fd = fd
490
491 def enable(self):
492 """Enables the trace event in the kernel.
493
494 Enabling the group leader makes reading counters from it and the
495 events under it possible.
496
497 """
498 fcntl.ioctl(self.fd, ARCH.ioctl_numbers['ENABLE'], 0)
499
500 def disable(self):
501 """Disables the trace event in the kernel.
502
503 Disabling the group leader makes reading all counters under it
504 impossible.
505
506 """
507 fcntl.ioctl(self.fd, ARCH.ioctl_numbers['DISABLE'], 0)
508
509 def reset(self):
510 """Resets the count of the trace event in the kernel."""
511 fcntl.ioctl(self.fd, ARCH.ioctl_numbers['RESET'], 0)
512
513class TracepointProvider(object):
514 """Data provider for the stats class.
515
516 Manages the events/groups from which it acquires its data.
517
518 """
519 def __init__(self):
520 self.group_leaders = []
521 self.filters = get_filters()
522 self._fields = self.get_available_fields()
523 self._pid = 0
524
525 def get_available_fields(self):
526 """Returns a list of available event's of format 'event name(filter
527 name)'.
528
529 All available events have directories under
530 /sys/kernel/debug/tracing/events/ which export information
531 about the specific event. Therefore, listing the dirs gives us
532 a list of all available events.
533
534 Some events like the vm exit reasons can be filtered for
535 specific values. To take account for that, the routine below
536 creates special fields with the following format:
537 event name(filter name)
538
539 """
540 path = os.path.join(PATH_DEBUGFS_TRACING, 'events', 'kvm')
541 fields = walkdir(path)[1]
542 extra = []
543 for field in fields:
544 if field in self.filters:
545 filter_name_, filter_dicts = self.filters[field]
546 for name in filter_dicts:
547 extra.append(field + '(' + name + ')')
548 fields += extra
549 return fields
550
551 def setup_traces(self):
552 """Creates all event and group objects needed to be able to retrieve
553 data."""
554 if self._pid > 0:
555 # Fetch list of all threads of the monitored pid, as qemu
556 # starts a thread for each vcpu.
557 path = os.path.join('/proc', str(self._pid), 'task')
558 groupids = walkdir(path)[1]
559 else:
560 groupids = get_online_cpus()
561
562 # The constant is needed as a buffer for python libs, std
563 # streams and other files that the script opens.
564 newlim = len(groupids) * len(self._fields) + 50
565 try:
566 softlim_, hardlim = resource.getrlimit(resource.RLIMIT_NOFILE)
567
568 if hardlim < newlim:
569 # Now we need CAP_SYS_RESOURCE, to increase the hard limit.
570 resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, newlim))
571 else:
572 # Raising the soft limit is sufficient.
573 resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, hardlim))
574
575 except ValueError:
576 sys.exit("NOFILE rlimit could not be raised to {0}".format(newlim))
577
578 for groupid in groupids:
579 group = Group()
580 for name in self._fields:
581 tracepoint = name
582 tracefilter = None
583 match = re.match(r'(.*)\((.*)\)', name)
584 if match:
585 tracepoint, sub = match.groups()
586 tracefilter = ('%s==%d\0' %
587 (self.filters[tracepoint][0],
588 self.filters[tracepoint][1][sub]))
589
590 # From perf_event_open(2):
591 # pid > 0 and cpu == -1
592 # This measures the specified process/thread on any CPU.
593 #
594 # pid == -1 and cpu >= 0
595 # This measures all processes/threads on the specified CPU.
596 trace_cpu = groupid if self._pid == 0 else -1
597 trace_pid = int(groupid) if self._pid != 0 else -1
598
599 group.add_event(Event(name=name,
600 group=group,
601 trace_cpu=trace_cpu,
602 trace_pid=trace_pid,
603 trace_point=tracepoint,
604 trace_filter=tracefilter))
605
606 self.group_leaders.append(group)
607
608 def available_fields(self):
609 return self.get_available_fields()
610
611 @property
612 def fields(self):
613 return self._fields
614
615 @fields.setter
616 def fields(self, fields):
617 """Enables/disables the (un)wanted events"""
618 self._fields = fields
619 for group in self.group_leaders:
620 for index, event in enumerate(group.events):
621 if event.name in fields:
622 event.reset()
623 event.enable()
624 else:
625 # Do not disable the group leader.
626 # It would disable all of its events.
627 if index != 0:
628 event.disable()
629
630 @property
631 def pid(self):
632 return self._pid
633
634 @pid.setter
635 def pid(self, pid):
636 """Changes the monitored pid by setting new traces."""
637 self._pid = pid
638 # The garbage collector will get rid of all Event/Group
639 # objects and open files after removing the references.
640 self.group_leaders = []
641 self.setup_traces()
642 self.fields = self._fields
643
644 def read(self):
645 """Returns 'event name: current value' for all enabled events."""
646 ret = defaultdict(int)
647 for group in self.group_leaders:
648 for name, val in group.read().iteritems():
649 if name in self._fields:
650 ret[name] += val
651 return ret
652
653class DebugfsProvider(object):
654 """Provides data from the files that KVM creates in the kvm debugfs
655 folder."""
656 def __init__(self):
657 self._fields = self.get_available_fields()
658 self._pid = 0
659 self.do_read = True
660
661 def get_available_fields(self):
662 """"Returns a list of available fields.
663
664 The fields are all available KVM debugfs files
665
666 """
667 return walkdir(PATH_DEBUGFS_KVM)[2]
668
669 @property
670 def fields(self):
671 return self._fields
672
673 @fields.setter
674 def fields(self, fields):
675 self._fields = fields
676
677 @property
678 def pid(self):
679 return self._pid
680
681 @pid.setter
682 def pid(self, pid):
683 if pid != 0:
684 self._pid = pid
685
686 vms = walkdir(PATH_DEBUGFS_KVM)[1]
687 if len(vms) == 0:
688 self.do_read = False
689
690 self.paths = filter(lambda x: "{}-".format(pid) in x, vms)
691
692 else:
693 self.paths = ['']
694 self.do_read = True
695
696 def read(self):
697 """Returns a dict with format:'file name / field -> current value'."""
698 results = {}
699
700 # If no debugfs filtering support is available, then don't read.
701 if not self.do_read:
702 return results
703
704 for path in self.paths:
705 for field in self._fields:
706 results[field] = results.get(field, 0) \
707 + self.read_field(field, path)
708
709 return results
710
711 def read_field(self, field, path):
712 """Returns the value of a single field from a specific VM."""
713 try:
714 return int(open(os.path.join(PATH_DEBUGFS_KVM,
715 path,
716 field))
717 .read())
718 except IOError:
719 return 0
720
721class Stats(object):
722 """Manages the data providers and the data they provide.
723
724 It is used to set filters on the provider's data and collect all
725 provider data.
726
727 """
728 def __init__(self, providers, pid, fields=None):
729 self.providers = providers
730 self._pid_filter = pid
731 self._fields_filter = fields
732 self.values = {}
733 self.update_provider_pid()
734 self.update_provider_filters()
735
736 def update_provider_filters(self):
737 """Propagates fields filters to providers."""
738 def wanted(key):
739 if not self._fields_filter:
740 return True
741 return re.match(self._fields_filter, key) is not None
742
743 # As we reset the counters when updating the fields we can
744 # also clear the cache of old values.
745 self.values = {}
746 for provider in self.providers:
747 provider_fields = [key for key in provider.get_available_fields()
748 if wanted(key)]
749 provider.fields = provider_fields
750
751 def update_provider_pid(self):
752 """Propagates pid filters to providers."""
753 for provider in self.providers:
754 provider.pid = self._pid_filter
755
756 @property
757 def fields_filter(self):
758 return self._fields_filter
759
760 @fields_filter.setter
761 def fields_filter(self, fields_filter):
762 self._fields_filter = fields_filter
763 self.update_provider_filters()
764
765 @property
766 def pid_filter(self):
767 return self._pid_filter
768
769 @pid_filter.setter
770 def pid_filter(self, pid):
771 self._pid_filter = pid
772 self.values = {}
773 self.update_provider_pid()
774
775 def get(self):
776 """Returns a dict with field -> (value, delta to last value) of all
777 provider data."""
778 for provider in self.providers:
779 new = provider.read()
780 for key in provider.fields:
781 oldval = self.values.get(key, (0, 0))
782 newval = new.get(key, 0)
783 newdelta = None
784 if oldval is not None:
785 newdelta = newval - oldval[0]
786 self.values[key] = (newval, newdelta)
787 return self.values
788
789LABEL_WIDTH = 40
790NUMBER_WIDTH = 10
791
792class Tui(object):
793 """Instruments curses to draw a nice text ui."""
794 def __init__(self, stats):
795 self.stats = stats
796 self.screen = None
797 self.drilldown = False
798 self.update_drilldown()
799
800 def __enter__(self):
801 """Initialises curses for later use. Based on curses.wrapper
802 implementation from the Python standard library."""
803 self.screen = curses.initscr()
804 curses.noecho()
805 curses.cbreak()
806
807 # The try/catch works around a minor bit of
808 # over-conscientiousness in the curses module, the error
809 # return from C start_color() is ignorable.
810 try:
811 curses.start_color()
812 except:
813 pass
814
815 curses.use_default_colors()
816 return self
817
818 def __exit__(self, *exception):
819 """Resets the terminal to its normal state. Based on curses.wrappre
820 implementation from the Python standard library."""
821 if self.screen:
822 self.screen.keypad(0)
823 curses.echo()
824 curses.nocbreak()
825 curses.endwin()
826
827 def update_drilldown(self):
828 """Sets or removes a filter that only allows fields without braces."""
829 if not self.stats.fields_filter:
830 self.stats.fields_filter = r'^[^\(]*$'
831
832 elif self.stats.fields_filter == r'^[^\(]*$':
833 self.stats.fields_filter = None
834
835 def update_pid(self, pid):
836 """Propagates pid selection to stats object."""
837 self.stats.pid_filter = pid
838
839 def refresh(self, sleeptime):
840 """Refreshes on-screen data."""
841 self.screen.erase()
842 if self.stats.pid_filter > 0:
843 self.screen.addstr(0, 0, 'kvm statistics - pid {0}'
844 .format(self.stats.pid_filter),
845 curses.A_BOLD)
846 else:
847 self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD)
848 self.screen.addstr(2, 1, 'Event')
849 self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH -
850 len('Total'), 'Total')
851 self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH + 8 -
852 len('Current'), 'Current')
853 row = 3
854 stats = self.stats.get()
855 def sortkey(x):
856 if stats[x][1]:
857 return (-stats[x][1], -stats[x][0])
858 else:
859 return (0, -stats[x][0])
860 for key in sorted(stats.keys(), key=sortkey):
861
862 if row >= self.screen.getmaxyx()[0]:
863 break
864 values = stats[key]
865 if not values[0] and not values[1]:
866 break
867 col = 1
868 self.screen.addstr(row, col, key)
869 col += LABEL_WIDTH
870 self.screen.addstr(row, col, '%10d' % (values[0],))
871 col += NUMBER_WIDTH
872 if values[1] is not None:
873 self.screen.addstr(row, col, '%8d' % (values[1] / sleeptime,))
874 row += 1
875 self.screen.refresh()
876
877 def show_filter_selection(self):
878 """Draws filter selection mask.
879
880 Asks for a valid regex and sets the fields filter accordingly.
881
882 """
883 while True:
884 self.screen.erase()
885 self.screen.addstr(0, 0,
886 "Show statistics for events matching a regex.",
887 curses.A_BOLD)
888 self.screen.addstr(2, 0,
889 "Current regex: {0}"
890 .format(self.stats.fields_filter))
891 self.screen.addstr(3, 0, "New regex: ")
892 curses.echo()
893 regex = self.screen.getstr()
894 curses.noecho()
895 if len(regex) == 0:
896 return
897 try:
898 re.compile(regex)
899 self.stats.fields_filter = regex
900 return
901 except re.error:
902 continue
903
904 def show_vm_selection(self):
905 """Draws PID selection mask.
906
907 Asks for a pid until a valid pid or 0 has been entered.
908
909 """
910 while True:
911 self.screen.erase()
912 self.screen.addstr(0, 0,
913 'Show statistics for specific pid.',
914 curses.A_BOLD)
915 self.screen.addstr(1, 0,
916 'This might limit the shown data to the trace '
917 'statistics.')
918
919 curses.echo()
920 self.screen.addstr(3, 0, "Pid [0 or pid]: ")
921 pid = self.screen.getstr()
922 curses.noecho()
923
924 try:
925 pid = int(pid)
926
927 if pid == 0:
928 self.update_pid(pid)
929 break
930 else:
931 if not os.path.isdir(os.path.join('/proc/', str(pid))):
932 continue
933 else:
934 self.update_pid(pid)
935 break
936
937 except ValueError:
938 continue
939
940 def show_stats(self):
941 """Refreshes the screen and processes user input."""
942 sleeptime = 0.25
943 while True:
944 self.refresh(sleeptime)
945 curses.halfdelay(int(sleeptime * 10))
946 sleeptime = 3
947 try:
948 char = self.screen.getkey()
949 if char == 'x':
950 self.drilldown = not self.drilldown
951 self.update_drilldown()
952 if char == 'q':
953 break
954 if char == 'f':
955 self.show_filter_selection()
956 if char == 'p':
957 self.show_vm_selection()
958 except KeyboardInterrupt:
959 break
960 except curses.error:
961 continue
962
963def batch(stats):
964 """Prints statistics in a key, value format."""
965 s = stats.get()
966 time.sleep(1)
967 s = stats.get()
968 for key in sorted(s.keys()):
969 values = s[key]
970 print '%-42s%10d%10d' % (key, values[0], values[1])
971
972def log(stats):
973 """Prints statistics as reiterating key block, multiple value blocks."""
974 keys = sorted(stats.get().iterkeys())
975 def banner():
976 for k in keys:
977 print '%s' % k,
978 print
979 def statline():
980 s = stats.get()
981 for k in keys:
982 print ' %9d' % s[k][1],
983 print
984 line = 0
985 banner_repeat = 20
986 while True:
987 time.sleep(1)
988 if line % banner_repeat == 0:
989 banner()
990 statline()
991 line += 1
992
993def get_options():
994 """Returns processed program arguments."""
995 description_text = """
996This script displays various statistics about VMs running under KVM.
997The statistics are gathered from the KVM debugfs entries and / or the
998currently available perf traces.
999
1000The monitoring takes additional cpu cycles and might affect the VM's
1001performance.
1002
1003Requirements:
1004- Access to:
1005 /sys/kernel/debug/kvm
1006 /sys/kernel/debug/trace/events/*
1007 /proc/pid/task
1008- /proc/sys/kernel/perf_event_paranoid < 1 if user has no
1009 CAP_SYS_ADMIN and perf events are used.
1010- CAP_SYS_RESOURCE if the hard limit is not high enough to allow
1011 the large number of files that are possibly opened.
1012"""
1013
1014 class PlainHelpFormatter(optparse.IndentedHelpFormatter):
1015 def format_description(self, description):
1016 if description:
1017 return description + "\n"
1018 else:
1019 return ""
1020
1021 optparser = optparse.OptionParser(description=description_text,
1022 formatter=PlainHelpFormatter())
1023 optparser.add_option('-1', '--once', '--batch',
1024 action='store_true',
1025 default=False,
1026 dest='once',
1027 help='run in batch mode for one second',
1028 )
1029 optparser.add_option('-l', '--log',
1030 action='store_true',
1031 default=False,
1032 dest='log',
1033 help='run in logging mode (like vmstat)',
1034 )
1035 optparser.add_option('-t', '--tracepoints',
1036 action='store_true',
1037 default=False,
1038 dest='tracepoints',
1039 help='retrieve statistics from tracepoints',
1040 )
1041 optparser.add_option('-d', '--debugfs',
1042 action='store_true',
1043 default=False,
1044 dest='debugfs',
1045 help='retrieve statistics from debugfs',
1046 )
1047 optparser.add_option('-f', '--fields',
1048 action='store',
1049 default=None,
1050 dest='fields',
1051 help='fields to display (regex)',
1052 )
1053 optparser.add_option('-p', '--pid',
1054 action='store',
1055 default=0,
1056 type=int,
1057 dest='pid',
1058 help='restrict statistics to pid',
1059 )
1060 (options, _) = optparser.parse_args(sys.argv)
1061 return options
1062
1063def get_providers(options):
1064 """Returns a list of data providers depending on the passed options."""
1065 providers = []
1066
1067 if options.tracepoints:
1068 providers.append(TracepointProvider())
1069 if options.debugfs:
1070 providers.append(DebugfsProvider())
1071 if len(providers) == 0:
1072 providers.append(TracepointProvider())
1073
1074 return providers
1075
1076def check_access(options):
1077 """Exits if the current user can't access all needed directories."""
1078 if not os.path.exists('/sys/kernel/debug'):
1079 sys.stderr.write('Please enable CONFIG_DEBUG_FS in your kernel.')
1080 sys.exit(1)
1081
1082 if not os.path.exists(PATH_DEBUGFS_KVM):
1083 sys.stderr.write("Please make sure, that debugfs is mounted and "
1084 "readable by the current user:\n"
1085 "('mount -t debugfs debugfs /sys/kernel/debug')\n"
1086 "Also ensure, that the kvm modules are loaded.\n")
1087 sys.exit(1)
1088
1089 if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints
1090 or not options.debugfs):
1091 sys.stderr.write("Please enable CONFIG_TRACING in your kernel "
1092 "when using the option -t (default).\n"
1093 "If it is enabled, make {0} readable by the "
1094 "current user.\n"
1095 .format(PATH_DEBUGFS_TRACING))
1096 if options.tracepoints:
1097 sys.exit(1)
1098
1099 sys.stderr.write("Falling back to debugfs statistics!\n")
1100 options.debugfs = True
1101 sleep(5)
1102
1103 return options
1104
1105def main():
1106 options = get_options()
1107 options = check_access(options)
1108
1109 if (options.pid > 0 and
1110 not os.path.isdir(os.path.join('/proc/',
1111 str(options.pid)))):
1112 sys.stderr.write('Did you use a (unsupported) tid instead of a pid?\n')
1113 sys.exit('Specified pid does not exist.')
1114
1115 providers = get_providers(options)
1116 stats = Stats(providers, options.pid, fields=options.fields)
1117
1118 if options.log:
1119 log(stats)
1120 elif not options.once:
1121 with Tui(stats) as tui:
1122 tui.show_stats()
1123 else:
1124 batch(stats)
1125
1126if __name__ == "__main__":
1127 main()
diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt
new file mode 100644
index 000000000000..b92a153d7115
--- /dev/null
+++ b/tools/kvm/kvm_stat/kvm_stat.txt
@@ -0,0 +1,63 @@
1kvm_stat(1)
2===========
3
4NAME
5----
6kvm_stat - Report KVM kernel module event counters
7
8SYNOPSIS
9--------
10[verse]
11'kvm_stat' [OPTION]...
12
13DESCRIPTION
14-----------
15kvm_stat prints counts of KVM kernel module trace events. These events signify
16state transitions such as guest mode entry and exit.
17
18This tool is useful for observing guest behavior from the host perspective.
19Often conclusions about performance or buggy behavior can be drawn from the
20output.
21
22The set of KVM kernel module trace events may be specific to the kernel version
23or architecture. It is best to check the KVM kernel module source code for the
24meaning of events.
25
26OPTIONS
27-------
28-1::
29--once::
30--batch::
31 run in batch mode for one second
32
33-l::
34--log::
35 run in logging mode (like vmstat)
36
37-t::
38--tracepoints::
39 retrieve statistics from tracepoints
40
41-d::
42--debugfs::
43 retrieve statistics from debugfs
44
45-p<pid>::
46--pid=<pid>::
47 limit statistics to one virtual machine (pid)
48
49-f<fields>::
50--fields=<fields>::
51 fields to display (regex)
52
53-h::
54--help::
55 show help message
56
57SEE ALSO
58--------
59'perf'(1), 'trace-cmd'(1)
60
61AUTHOR
62------
63Stefan Hajnoczi <stefanha@redhat.com>
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 409db3304471..e2d5b6f988fb 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -20,6 +20,7 @@
20#include <linux/kvm.h> 20#include <linux/kvm.h>
21#include <linux/kvm_host.h> 21#include <linux/kvm_host.h>
22#include <linux/interrupt.h> 22#include <linux/interrupt.h>
23#include <linux/irq.h>
23 24
24#include <clocksource/arm_arch_timer.h> 25#include <clocksource/arm_arch_timer.h>
25#include <asm/arch_timer.h> 26#include <asm/arch_timer.h>
@@ -174,10 +175,10 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level)
174 175
175 timer->active_cleared_last = false; 176 timer->active_cleared_last = false;
176 timer->irq.level = new_level; 177 timer->irq.level = new_level;
177 trace_kvm_timer_update_irq(vcpu->vcpu_id, timer->map->virt_irq, 178 trace_kvm_timer_update_irq(vcpu->vcpu_id, timer->irq.irq,
178 timer->irq.level); 179 timer->irq.level);
179 ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id, 180 ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id,
180 timer->map, 181 timer->irq.irq,
181 timer->irq.level); 182 timer->irq.level);
182 WARN_ON(ret); 183 WARN_ON(ret);
183} 184}
@@ -196,7 +197,7 @@ static int kvm_timer_update_state(struct kvm_vcpu *vcpu)
196 * because the guest would never see the interrupt. Instead wait 197 * because the guest would never see the interrupt. Instead wait
197 * until we call this function from kvm_timer_flush_hwstate. 198 * until we call this function from kvm_timer_flush_hwstate.
198 */ 199 */
199 if (!vgic_initialized(vcpu->kvm)) 200 if (!vgic_initialized(vcpu->kvm) || !timer->enabled)
200 return -ENODEV; 201 return -ENODEV;
201 202
202 if (kvm_timer_should_fire(vcpu) != timer->irq.level) 203 if (kvm_timer_should_fire(vcpu) != timer->irq.level)
@@ -274,10 +275,8 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
274 * to ensure that hardware interrupts from the timer triggers a guest 275 * to ensure that hardware interrupts from the timer triggers a guest
275 * exit. 276 * exit.
276 */ 277 */
277 if (timer->irq.level || kvm_vgic_map_is_active(vcpu, timer->map)) 278 phys_active = timer->irq.level ||
278 phys_active = true; 279 kvm_vgic_map_is_active(vcpu, timer->irq.irq);
279 else
280 phys_active = false;
281 280
282 /* 281 /*
283 * We want to avoid hitting the (re)distributor as much as 282 * We want to avoid hitting the (re)distributor as much as
@@ -302,7 +301,7 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
302 if (timer->active_cleared_last && !phys_active) 301 if (timer->active_cleared_last && !phys_active)
303 return; 302 return;
304 303
305 ret = irq_set_irqchip_state(timer->map->irq, 304 ret = irq_set_irqchip_state(host_vtimer_irq,
306 IRQCHIP_STATE_ACTIVE, 305 IRQCHIP_STATE_ACTIVE,
307 phys_active); 306 phys_active);
308 WARN_ON(ret); 307 WARN_ON(ret);
@@ -334,7 +333,6 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
334 const struct kvm_irq_level *irq) 333 const struct kvm_irq_level *irq)
335{ 334{
336 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 335 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
337 struct irq_phys_map *map;
338 336
339 /* 337 /*
340 * The vcpu timer irq number cannot be determined in 338 * The vcpu timer irq number cannot be determined in
@@ -353,15 +351,6 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
353 timer->cntv_ctl = 0; 351 timer->cntv_ctl = 0;
354 kvm_timer_update_state(vcpu); 352 kvm_timer_update_state(vcpu);
355 353
356 /*
357 * Tell the VGIC that the virtual interrupt is tied to a
358 * physical interrupt. We do that once per VCPU.
359 */
360 map = kvm_vgic_map_phys_irq(vcpu, irq->irq, host_vtimer_irq);
361 if (WARN_ON(IS_ERR(map)))
362 return PTR_ERR(map);
363
364 timer->map = map;
365 return 0; 354 return 0;
366} 355}
367 356
@@ -487,14 +476,43 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
487 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 476 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
488 477
489 timer_disarm(timer); 478 timer_disarm(timer);
490 if (timer->map) 479 kvm_vgic_unmap_phys_irq(vcpu, timer->irq.irq);
491 kvm_vgic_unmap_phys_irq(vcpu, timer->map);
492} 480}
493 481
494void kvm_timer_enable(struct kvm *kvm) 482int kvm_timer_enable(struct kvm_vcpu *vcpu)
495{ 483{
496 if (kvm->arch.timer.enabled) 484 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
497 return; 485 struct irq_desc *desc;
486 struct irq_data *data;
487 int phys_irq;
488 int ret;
489
490 if (timer->enabled)
491 return 0;
492
493 /*
494 * Find the physical IRQ number corresponding to the host_vtimer_irq
495 */
496 desc = irq_to_desc(host_vtimer_irq);
497 if (!desc) {
498 kvm_err("%s: no interrupt descriptor\n", __func__);
499 return -EINVAL;
500 }
501
502 data = irq_desc_get_irq_data(desc);
503 while (data->parent_data)
504 data = data->parent_data;
505
506 phys_irq = data->hwirq;
507
508 /*
509 * Tell the VGIC that the virtual interrupt is tied to a
510 * physical interrupt. We do that once per VCPU.
511 */
512 ret = kvm_vgic_map_phys_irq(vcpu, timer->irq.irq, phys_irq);
513 if (ret)
514 return ret;
515
498 516
499 /* 517 /*
500 * There is a potential race here between VCPUs starting for the first 518 * There is a potential race here between VCPUs starting for the first
@@ -505,7 +523,9 @@ void kvm_timer_enable(struct kvm *kvm)
505 * the arch timers are enabled. 523 * the arch timers are enabled.
506 */ 524 */
507 if (timecounter && wqueue) 525 if (timecounter && wqueue)
508 kvm->arch.timer.enabled = 1; 526 timer->enabled = 1;
527
528 return 0;
509} 529}
510 530
511void kvm_timer_init(struct kvm *kvm) 531void kvm_timer_init(struct kvm *kvm)
diff --git a/virt/kvm/arm/hyp/timer-sr.c b/virt/kvm/arm/hyp/timer-sr.c
index ea00d69e7078..798866a8d875 100644
--- a/virt/kvm/arm/hyp/timer-sr.c
+++ b/virt/kvm/arm/hyp/timer-sr.c
@@ -24,11 +24,10 @@
24/* vcpu is already in the HYP VA space */ 24/* vcpu is already in the HYP VA space */
25void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu) 25void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu)
26{ 26{
27 struct kvm *kvm = kern_hyp_va(vcpu->kvm);
28 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 27 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
29 u64 val; 28 u64 val;
30 29
31 if (kvm->arch.timer.enabled) { 30 if (timer->enabled) {
32 timer->cntv_ctl = read_sysreg_el0(cntv_ctl); 31 timer->cntv_ctl = read_sysreg_el0(cntv_ctl);
33 timer->cntv_cval = read_sysreg_el0(cntv_cval); 32 timer->cntv_cval = read_sysreg_el0(cntv_cval);
34 } 33 }
@@ -60,7 +59,7 @@ void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu)
60 val |= CNTHCTL_EL1PCTEN; 59 val |= CNTHCTL_EL1PCTEN;
61 write_sysreg(val, cnthctl_el2); 60 write_sysreg(val, cnthctl_el2);
62 61
63 if (kvm->arch.timer.enabled) { 62 if (timer->enabled) {
64 write_sysreg(kvm->arch.timer.cntvoff, cntvoff_el2); 63 write_sysreg(kvm->arch.timer.cntvoff, cntvoff_el2);
65 write_sysreg_el0(timer->cntv_cval, cntv_cval); 64 write_sysreg_el0(timer->cntv_cval, cntv_cval);
66 isb(); 65 isb();
diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c
index 674bdf8ecf4f..a3f12b3b277b 100644
--- a/virt/kvm/arm/hyp/vgic-v2-sr.c
+++ b/virt/kvm/arm/hyp/vgic-v2-sr.c
@@ -21,11 +21,18 @@
21 21
22#include <asm/kvm_hyp.h> 22#include <asm/kvm_hyp.h>
23 23
24#ifdef CONFIG_KVM_NEW_VGIC
25extern struct vgic_global kvm_vgic_global_state;
26#define vgic_v2_params kvm_vgic_global_state
27#else
28extern struct vgic_params vgic_v2_params;
29#endif
30
24static void __hyp_text save_maint_int_state(struct kvm_vcpu *vcpu, 31static void __hyp_text save_maint_int_state(struct kvm_vcpu *vcpu,
25 void __iomem *base) 32 void __iomem *base)
26{ 33{
27 struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; 34 struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
28 int nr_lr = vcpu->arch.vgic_cpu.nr_lr; 35 int nr_lr = (kern_hyp_va(&vgic_v2_params))->nr_lr;
29 u32 eisr0, eisr1; 36 u32 eisr0, eisr1;
30 int i; 37 int i;
31 bool expect_mi; 38 bool expect_mi;
@@ -67,7 +74,7 @@ static void __hyp_text save_maint_int_state(struct kvm_vcpu *vcpu,
67static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base) 74static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base)
68{ 75{
69 struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; 76 struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
70 int nr_lr = vcpu->arch.vgic_cpu.nr_lr; 77 int nr_lr = (kern_hyp_va(&vgic_v2_params))->nr_lr;
71 u32 elrsr0, elrsr1; 78 u32 elrsr0, elrsr1;
72 79
73 elrsr0 = readl_relaxed(base + GICH_ELRSR0); 80 elrsr0 = readl_relaxed(base + GICH_ELRSR0);
@@ -86,7 +93,7 @@ static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base)
86static void __hyp_text save_lrs(struct kvm_vcpu *vcpu, void __iomem *base) 93static void __hyp_text save_lrs(struct kvm_vcpu *vcpu, void __iomem *base)
87{ 94{
88 struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; 95 struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
89 int nr_lr = vcpu->arch.vgic_cpu.nr_lr; 96 int nr_lr = (kern_hyp_va(&vgic_v2_params))->nr_lr;
90 int i; 97 int i;
91 98
92 for (i = 0; i < nr_lr; i++) { 99 for (i = 0; i < nr_lr; i++) {
@@ -141,13 +148,13 @@ void __hyp_text __vgic_v2_restore_state(struct kvm_vcpu *vcpu)
141 struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; 148 struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
142 struct vgic_dist *vgic = &kvm->arch.vgic; 149 struct vgic_dist *vgic = &kvm->arch.vgic;
143 void __iomem *base = kern_hyp_va(vgic->vctrl_base); 150 void __iomem *base = kern_hyp_va(vgic->vctrl_base);
144 int i, nr_lr; 151 int nr_lr = (kern_hyp_va(&vgic_v2_params))->nr_lr;
152 int i;
145 u64 live_lrs = 0; 153 u64 live_lrs = 0;
146 154
147 if (!base) 155 if (!base)
148 return; 156 return;
149 157
150 nr_lr = vcpu->arch.vgic_cpu.nr_lr;
151 158
152 for (i = 0; i < nr_lr; i++) 159 for (i = 0; i < nr_lr; i++)
153 if (cpu_if->vgic_lr[i] & GICH_LR_STATE) 160 if (cpu_if->vgic_lr[i] & GICH_LR_STATE)
diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
index 575c7aa30d7e..a027569facfa 100644
--- a/virt/kvm/arm/pmu.c
+++ b/virt/kvm/arm/pmu.c
@@ -436,7 +436,14 @@ static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
436 return 0; 436 return 0;
437} 437}
438 438
439static bool irq_is_valid(struct kvm *kvm, int irq, bool is_ppi) 439#define irq_is_ppi(irq) ((irq) >= VGIC_NR_SGIS && (irq) < VGIC_NR_PRIVATE_IRQS)
440
441/*
442 * For one VM the interrupt type must be same for each vcpu.
443 * As a PPI, the interrupt number is the same for all vcpus,
444 * while as an SPI it must be a separate number per vcpu.
445 */
446static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
440{ 447{
441 int i; 448 int i;
442 struct kvm_vcpu *vcpu; 449 struct kvm_vcpu *vcpu;
@@ -445,7 +452,7 @@ static bool irq_is_valid(struct kvm *kvm, int irq, bool is_ppi)
445 if (!kvm_arm_pmu_irq_initialized(vcpu)) 452 if (!kvm_arm_pmu_irq_initialized(vcpu))
446 continue; 453 continue;
447 454
448 if (is_ppi) { 455 if (irq_is_ppi(irq)) {
449 if (vcpu->arch.pmu.irq_num != irq) 456 if (vcpu->arch.pmu.irq_num != irq)
450 return false; 457 return false;
451 } else { 458 } else {
@@ -457,7 +464,6 @@ static bool irq_is_valid(struct kvm *kvm, int irq, bool is_ppi)
457 return true; 464 return true;
458} 465}
459 466
460
461int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 467int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
462{ 468{
463 switch (attr->attr) { 469 switch (attr->attr) {
@@ -471,14 +477,11 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
471 if (get_user(irq, uaddr)) 477 if (get_user(irq, uaddr))
472 return -EFAULT; 478 return -EFAULT;
473 479
474 /* 480 /* The PMU overflow interrupt can be a PPI or a valid SPI. */
475 * The PMU overflow interrupt could be a PPI or SPI, but for one 481 if (!(irq_is_ppi(irq) || vgic_valid_spi(vcpu->kvm, irq)))
476 * VM the interrupt type must be same for each vcpu. As a PPI, 482 return -EINVAL;
477 * the interrupt number is the same for all vcpus, while as an 483
478 * SPI it must be a separate number per vcpu. 484 if (!pmu_irq_is_valid(vcpu->kvm, irq))
479 */
480 if (irq < VGIC_NR_SGIS || irq >= vcpu->kvm->arch.vgic.nr_irqs ||
481 !irq_is_valid(vcpu->kvm, irq, irq < VGIC_NR_PRIVATE_IRQS))
482 return -EINVAL; 485 return -EINVAL;
483 486
484 if (kvm_arm_pmu_irq_initialized(vcpu)) 487 if (kvm_arm_pmu_irq_initialized(vcpu))
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
index 7e826c9b2b0a..334cd7a89106 100644
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -171,7 +171,7 @@ static const struct vgic_ops vgic_v2_ops = {
171 .enable = vgic_v2_enable, 171 .enable = vgic_v2_enable,
172}; 172};
173 173
174static struct vgic_params vgic_v2_params; 174struct vgic_params __section(.hyp.text) vgic_v2_params;
175 175
176static void vgic_cpu_init_lrs(void *params) 176static void vgic_cpu_init_lrs(void *params)
177{ 177{
@@ -201,6 +201,8 @@ int vgic_v2_probe(const struct gic_kvm_info *gic_kvm_info,
201 const struct resource *vctrl_res = &gic_kvm_info->vctrl; 201 const struct resource *vctrl_res = &gic_kvm_info->vctrl;
202 const struct resource *vcpu_res = &gic_kvm_info->vcpu; 202 const struct resource *vcpu_res = &gic_kvm_info->vcpu;
203 203
204 memset(vgic, 0, sizeof(*vgic));
205
204 if (!gic_kvm_info->maint_irq) { 206 if (!gic_kvm_info->maint_irq) {
205 kvm_err("error getting vgic maintenance irq\n"); 207 kvm_err("error getting vgic maintenance irq\n");
206 ret = -ENXIO; 208 ret = -ENXIO;
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index c02a1b1cf855..75b02fa86436 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -29,12 +29,6 @@
29#include <asm/kvm_asm.h> 29#include <asm/kvm_asm.h>
30#include <asm/kvm_mmu.h> 30#include <asm/kvm_mmu.h>
31 31
32/* These are for GICv2 emulation only */
33#define GICH_LR_VIRTUALID (0x3ffUL << 0)
34#define GICH_LR_PHYSID_CPUID_SHIFT (10)
35#define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT)
36#define ICH_LR_VIRTUALID_MASK (BIT_ULL(32) - 1)
37
38static u32 ich_vtr_el2; 32static u32 ich_vtr_el2;
39 33
40static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr) 34static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr)
@@ -43,7 +37,7 @@ static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr)
43 u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr]; 37 u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr];
44 38
45 if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) 39 if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
46 lr_desc.irq = val & ICH_LR_VIRTUALID_MASK; 40 lr_desc.irq = val & ICH_LR_VIRTUAL_ID_MASK;
47 else 41 else
48 lr_desc.irq = val & GICH_LR_VIRTUALID; 42 lr_desc.irq = val & GICH_LR_VIRTUALID;
49 43
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 60668a7f319a..c3bfbb981e73 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -690,12 +690,11 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio,
690 */ 690 */
691void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) 691void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
692{ 692{
693 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
694 u64 elrsr = vgic_get_elrsr(vcpu); 693 u64 elrsr = vgic_get_elrsr(vcpu);
695 unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr); 694 unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr);
696 int i; 695 int i;
697 696
698 for_each_clear_bit(i, elrsr_ptr, vgic_cpu->nr_lr) { 697 for_each_clear_bit(i, elrsr_ptr, vgic->nr_lr) {
699 struct vgic_lr lr = vgic_get_lr(vcpu, i); 698 struct vgic_lr lr = vgic_get_lr(vcpu, i);
700 699
701 /* 700 /*
@@ -820,7 +819,6 @@ static int vgic_handle_mmio_access(struct kvm_vcpu *vcpu,
820 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 819 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
821 struct vgic_io_device *iodev = container_of(this, 820 struct vgic_io_device *iodev = container_of(this,
822 struct vgic_io_device, dev); 821 struct vgic_io_device, dev);
823 struct kvm_run *run = vcpu->run;
824 const struct vgic_io_range *range; 822 const struct vgic_io_range *range;
825 struct kvm_exit_mmio mmio; 823 struct kvm_exit_mmio mmio;
826 bool updated_state; 824 bool updated_state;
@@ -849,12 +847,6 @@ static int vgic_handle_mmio_access(struct kvm_vcpu *vcpu,
849 updated_state = false; 847 updated_state = false;
850 } 848 }
851 spin_unlock(&dist->lock); 849 spin_unlock(&dist->lock);
852 run->mmio.is_write = is_write;
853 run->mmio.len = len;
854 run->mmio.phys_addr = addr;
855 memcpy(run->mmio.data, val, len);
856
857 kvm_handle_mmio_return(vcpu, run);
858 850
859 if (updated_state) 851 if (updated_state)
860 vgic_kick_vcpus(vcpu->kvm); 852 vgic_kick_vcpus(vcpu->kvm);
@@ -1102,18 +1094,18 @@ static bool dist_active_irq(struct kvm_vcpu *vcpu)
1102 return test_bit(vcpu->vcpu_id, dist->irq_active_on_cpu); 1094 return test_bit(vcpu->vcpu_id, dist->irq_active_on_cpu);
1103} 1095}
1104 1096
1105bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, struct irq_phys_map *map) 1097bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq)
1106{ 1098{
1107 int i; 1099 int i;
1108 1100
1109 for (i = 0; i < vcpu->arch.vgic_cpu.nr_lr; i++) { 1101 for (i = 0; i < vgic->nr_lr; i++) {
1110 struct vgic_lr vlr = vgic_get_lr(vcpu, i); 1102 struct vgic_lr vlr = vgic_get_lr(vcpu, i);
1111 1103
1112 if (vlr.irq == map->virt_irq && vlr.state & LR_STATE_ACTIVE) 1104 if (vlr.irq == virt_irq && vlr.state & LR_STATE_ACTIVE)
1113 return true; 1105 return true;
1114 } 1106 }
1115 1107
1116 return vgic_irq_is_active(vcpu, map->virt_irq); 1108 return vgic_irq_is_active(vcpu, virt_irq);
1117} 1109}
1118 1110
1119/* 1111/*
@@ -1521,7 +1513,6 @@ static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
1521} 1513}
1522 1514
1523static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, 1515static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
1524 struct irq_phys_map *map,
1525 unsigned int irq_num, bool level) 1516 unsigned int irq_num, bool level)
1526{ 1517{
1527 struct vgic_dist *dist = &kvm->arch.vgic; 1518 struct vgic_dist *dist = &kvm->arch.vgic;
@@ -1660,14 +1651,14 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
1660 if (map) 1651 if (map)
1661 return -EINVAL; 1652 return -EINVAL;
1662 1653
1663 return vgic_update_irq_pending(kvm, cpuid, NULL, irq_num, level); 1654 return vgic_update_irq_pending(kvm, cpuid, irq_num, level);
1664} 1655}
1665 1656
1666/** 1657/**
1667 * kvm_vgic_inject_mapped_irq - Inject a physically mapped IRQ to the vgic 1658 * kvm_vgic_inject_mapped_irq - Inject a physically mapped IRQ to the vgic
1668 * @kvm: The VM structure pointer 1659 * @kvm: The VM structure pointer
1669 * @cpuid: The CPU for PPIs 1660 * @cpuid: The CPU for PPIs
1670 * @map: Pointer to a irq_phys_map structure describing the mapping 1661 * @virt_irq: The virtual IRQ to be injected
1671 * @level: Edge-triggered: true: to trigger the interrupt 1662 * @level: Edge-triggered: true: to trigger the interrupt
1672 * false: to ignore the call 1663 * false: to ignore the call
1673 * Level-sensitive true: raise the input signal 1664 * Level-sensitive true: raise the input signal
@@ -1678,7 +1669,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
1678 * being HIGH and 0 being LOW and all devices being active-HIGH. 1669 * being HIGH and 0 being LOW and all devices being active-HIGH.
1679 */ 1670 */
1680int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, 1671int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid,
1681 struct irq_phys_map *map, bool level) 1672 unsigned int virt_irq, bool level)
1682{ 1673{
1683 int ret; 1674 int ret;
1684 1675
@@ -1686,7 +1677,7 @@ int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid,
1686 if (ret) 1677 if (ret)
1687 return ret; 1678 return ret;
1688 1679
1689 return vgic_update_irq_pending(kvm, cpuid, map, map->virt_irq, level); 1680 return vgic_update_irq_pending(kvm, cpuid, virt_irq, level);
1690} 1681}
1691 1682
1692static irqreturn_t vgic_maintenance_handler(int irq, void *data) 1683static irqreturn_t vgic_maintenance_handler(int irq, void *data)
@@ -1712,43 +1703,28 @@ static struct list_head *vgic_get_irq_phys_map_list(struct kvm_vcpu *vcpu,
1712/** 1703/**
1713 * kvm_vgic_map_phys_irq - map a virtual IRQ to a physical IRQ 1704 * kvm_vgic_map_phys_irq - map a virtual IRQ to a physical IRQ
1714 * @vcpu: The VCPU pointer 1705 * @vcpu: The VCPU pointer
1715 * @virt_irq: The virtual irq number 1706 * @virt_irq: The virtual IRQ number for the guest
1716 * @irq: The Linux IRQ number 1707 * @phys_irq: The hardware IRQ number of the host
1717 * 1708 *
1718 * Establish a mapping between a guest visible irq (@virt_irq) and a 1709 * Establish a mapping between a guest visible irq (@virt_irq) and a
1719 * Linux irq (@irq). On injection, @virt_irq will be associated with 1710 * hardware irq (@phys_irq). On injection, @virt_irq will be associated with
1720 * the physical interrupt represented by @irq. This mapping can be 1711 * the physical interrupt represented by @phys_irq. This mapping can be
1721 * established multiple times as long as the parameters are the same. 1712 * established multiple times as long as the parameters are the same.
1722 * 1713 *
1723 * Returns a valid pointer on success, and an error pointer otherwise 1714 * Returns 0 on success or an error value otherwise.
1724 */ 1715 */
1725struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, 1716int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, int virt_irq, int phys_irq)
1726 int virt_irq, int irq)
1727{ 1717{
1728 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 1718 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1729 struct list_head *root = vgic_get_irq_phys_map_list(vcpu, virt_irq); 1719 struct list_head *root = vgic_get_irq_phys_map_list(vcpu, virt_irq);
1730 struct irq_phys_map *map; 1720 struct irq_phys_map *map;
1731 struct irq_phys_map_entry *entry; 1721 struct irq_phys_map_entry *entry;
1732 struct irq_desc *desc; 1722 int ret = 0;
1733 struct irq_data *data;
1734 int phys_irq;
1735
1736 desc = irq_to_desc(irq);
1737 if (!desc) {
1738 kvm_err("%s: no interrupt descriptor\n", __func__);
1739 return ERR_PTR(-EINVAL);
1740 }
1741
1742 data = irq_desc_get_irq_data(desc);
1743 while (data->parent_data)
1744 data = data->parent_data;
1745
1746 phys_irq = data->hwirq;
1747 1723
1748 /* Create a new mapping */ 1724 /* Create a new mapping */
1749 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 1725 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1750 if (!entry) 1726 if (!entry)
1751 return ERR_PTR(-ENOMEM); 1727 return -ENOMEM;
1752 1728
1753 spin_lock(&dist->irq_phys_map_lock); 1729 spin_lock(&dist->irq_phys_map_lock);
1754 1730
@@ -1756,9 +1732,8 @@ struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
1756 map = vgic_irq_map_search(vcpu, virt_irq); 1732 map = vgic_irq_map_search(vcpu, virt_irq);
1757 if (map) { 1733 if (map) {
1758 /* Make sure this mapping matches */ 1734 /* Make sure this mapping matches */
1759 if (map->phys_irq != phys_irq || 1735 if (map->phys_irq != phys_irq)
1760 map->irq != irq) 1736 ret = -EINVAL;
1761 map = ERR_PTR(-EINVAL);
1762 1737
1763 /* Found an existing, valid mapping */ 1738 /* Found an existing, valid mapping */
1764 goto out; 1739 goto out;
@@ -1767,7 +1742,6 @@ struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
1767 map = &entry->map; 1742 map = &entry->map;
1768 map->virt_irq = virt_irq; 1743 map->virt_irq = virt_irq;
1769 map->phys_irq = phys_irq; 1744 map->phys_irq = phys_irq;
1770 map->irq = irq;
1771 1745
1772 list_add_tail_rcu(&entry->entry, root); 1746 list_add_tail_rcu(&entry->entry, root);
1773 1747
@@ -1775,9 +1749,9 @@ out:
1775 spin_unlock(&dist->irq_phys_map_lock); 1749 spin_unlock(&dist->irq_phys_map_lock);
1776 /* If we've found a hit in the existing list, free the useless 1750 /* If we've found a hit in the existing list, free the useless
1777 * entry */ 1751 * entry */
1778 if (IS_ERR(map) || map != &entry->map) 1752 if (ret || map != &entry->map)
1779 kfree(entry); 1753 kfree(entry);
1780 return map; 1754 return ret;
1781} 1755}
1782 1756
1783static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu, 1757static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu,
@@ -1813,25 +1787,22 @@ static void vgic_free_phys_irq_map_rcu(struct rcu_head *rcu)
1813/** 1787/**
1814 * kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping 1788 * kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping
1815 * @vcpu: The VCPU pointer 1789 * @vcpu: The VCPU pointer
1816 * @map: The pointer to a mapping obtained through kvm_vgic_map_phys_irq 1790 * @virt_irq: The virtual IRQ number to be unmapped
1817 * 1791 *
1818 * Remove an existing mapping between virtual and physical interrupts. 1792 * Remove an existing mapping between virtual and physical interrupts.
1819 */ 1793 */
1820int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map) 1794int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq)
1821{ 1795{
1822 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 1796 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1823 struct irq_phys_map_entry *entry; 1797 struct irq_phys_map_entry *entry;
1824 struct list_head *root; 1798 struct list_head *root;
1825 1799
1826 if (!map) 1800 root = vgic_get_irq_phys_map_list(vcpu, virt_irq);
1827 return -EINVAL;
1828
1829 root = vgic_get_irq_phys_map_list(vcpu, map->virt_irq);
1830 1801
1831 spin_lock(&dist->irq_phys_map_lock); 1802 spin_lock(&dist->irq_phys_map_lock);
1832 1803
1833 list_for_each_entry(entry, root, entry) { 1804 list_for_each_entry(entry, root, entry) {
1834 if (&entry->map == map) { 1805 if (entry->map.virt_irq == virt_irq) {
1835 list_del_rcu(&entry->entry); 1806 list_del_rcu(&entry->entry);
1836 call_rcu(&entry->rcu, vgic_free_phys_irq_map_rcu); 1807 call_rcu(&entry->rcu, vgic_free_phys_irq_map_rcu);
1837 break; 1808 break;
@@ -1887,13 +1858,6 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
1887 return -ENOMEM; 1858 return -ENOMEM;
1888 } 1859 }
1889 1860
1890 /*
1891 * Store the number of LRs per vcpu, so we don't have to go
1892 * all the way to the distributor structure to find out. Only
1893 * assembly code should use this one.
1894 */
1895 vgic_cpu->nr_lr = vgic->nr_lr;
1896
1897 return 0; 1861 return 0;
1898} 1862}
1899 1863
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
new file mode 100644
index 000000000000..a1442f7c9c4d
--- /dev/null
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -0,0 +1,452 @@
1/*
2 * Copyright (C) 2015, 2016 ARM Ltd.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/uaccess.h>
18#include <linux/interrupt.h>
19#include <linux/cpu.h>
20#include <linux/kvm_host.h>
21#include <kvm/arm_vgic.h>
22#include <asm/kvm_mmu.h>
23#include "vgic.h"
24
25/*
26 * Initialization rules: there are multiple stages to the vgic
27 * initialization, both for the distributor and the CPU interfaces.
28 *
29 * Distributor:
30 *
31 * - kvm_vgic_early_init(): initialization of static data that doesn't
32 * depend on any sizing information or emulation type. No allocation
33 * is allowed there.
34 *
35 * - vgic_init(): allocation and initialization of the generic data
36 * structures that depend on sizing information (number of CPUs,
37 * number of interrupts). Also initializes the vcpu specific data
38 * structures. Can be executed lazily for GICv2.
39 *
40 * CPU Interface:
41 *
42 * - kvm_vgic_cpu_early_init(): initialization of static data that
43 * doesn't depend on any sizing information or emulation type. No
44 * allocation is allowed there.
45 */
46
47/* EARLY INIT */
48
49/*
50 * Those 2 functions should not be needed anymore but they
51 * still are called from arm.c
52 */
53void kvm_vgic_early_init(struct kvm *kvm)
54{
55}
56
57void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu)
58{
59}
60
61/* CREATION */
62
63/**
64 * kvm_vgic_create: triggered by the instantiation of the VGIC device by
65 * user space, either through the legacy KVM_CREATE_IRQCHIP ioctl (v2 only)
66 * or through the generic KVM_CREATE_DEVICE API ioctl.
67 * irqchip_in_kernel() tells you if this function succeeded or not.
68 * @kvm: kvm struct pointer
69 * @type: KVM_DEV_TYPE_ARM_VGIC_V[23]
70 */
71int kvm_vgic_create(struct kvm *kvm, u32 type)
72{
73 int i, vcpu_lock_idx = -1, ret;
74 struct kvm_vcpu *vcpu;
75
76 mutex_lock(&kvm->lock);
77
78 if (irqchip_in_kernel(kvm)) {
79 ret = -EEXIST;
80 goto out;
81 }
82
83 /*
84 * This function is also called by the KVM_CREATE_IRQCHIP handler,
85 * which had no chance yet to check the availability of the GICv2
86 * emulation. So check this here again. KVM_CREATE_DEVICE does
87 * the proper checks already.
88 */
89 if (type == KVM_DEV_TYPE_ARM_VGIC_V2 &&
90 !kvm_vgic_global_state.can_emulate_gicv2) {
91 ret = -ENODEV;
92 goto out;
93 }
94
95 /*
96 * Any time a vcpu is run, vcpu_load is called which tries to grab the
97 * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure
98 * that no other VCPUs are run while we create the vgic.
99 */
100 ret = -EBUSY;
101 kvm_for_each_vcpu(i, vcpu, kvm) {
102 if (!mutex_trylock(&vcpu->mutex))
103 goto out_unlock;
104 vcpu_lock_idx = i;
105 }
106
107 kvm_for_each_vcpu(i, vcpu, kvm) {
108 if (vcpu->arch.has_run_once)
109 goto out_unlock;
110 }
111 ret = 0;
112
113 if (type == KVM_DEV_TYPE_ARM_VGIC_V2)
114 kvm->arch.max_vcpus = VGIC_V2_MAX_CPUS;
115 else
116 kvm->arch.max_vcpus = VGIC_V3_MAX_CPUS;
117
118 if (atomic_read(&kvm->online_vcpus) > kvm->arch.max_vcpus) {
119 ret = -E2BIG;
120 goto out_unlock;
121 }
122
123 kvm->arch.vgic.in_kernel = true;
124 kvm->arch.vgic.vgic_model = type;
125
126 /*
127 * kvm_vgic_global_state.vctrl_base is set on vgic probe (kvm_arch_init)
128 * it is stored in distributor struct for asm save/restore purpose
129 */
130 kvm->arch.vgic.vctrl_base = kvm_vgic_global_state.vctrl_base;
131
132 kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
133 kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
134 kvm->arch.vgic.vgic_redist_base = VGIC_ADDR_UNDEF;
135
136out_unlock:
137 for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
138 vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
139 mutex_unlock(&vcpu->mutex);
140 }
141
142out:
143 mutex_unlock(&kvm->lock);
144 return ret;
145}
146
147/* INIT/DESTROY */
148
149/**
150 * kvm_vgic_dist_init: initialize the dist data structures
151 * @kvm: kvm struct pointer
152 * @nr_spis: number of spis, frozen by caller
153 */
154static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
155{
156 struct vgic_dist *dist = &kvm->arch.vgic;
157 struct kvm_vcpu *vcpu0 = kvm_get_vcpu(kvm, 0);
158 int i;
159
160 dist->spis = kcalloc(nr_spis, sizeof(struct vgic_irq), GFP_KERNEL);
161 if (!dist->spis)
162 return -ENOMEM;
163
164 /*
165 * In the following code we do not take the irq struct lock since
166 * no other action on irq structs can happen while the VGIC is
167 * not initialized yet:
168 * If someone wants to inject an interrupt or does a MMIO access, we
169 * require prior initialization in case of a virtual GICv3 or trigger
170 * initialization when using a virtual GICv2.
171 */
172 for (i = 0; i < nr_spis; i++) {
173 struct vgic_irq *irq = &dist->spis[i];
174
175 irq->intid = i + VGIC_NR_PRIVATE_IRQS;
176 INIT_LIST_HEAD(&irq->ap_list);
177 spin_lock_init(&irq->irq_lock);
178 irq->vcpu = NULL;
179 irq->target_vcpu = vcpu0;
180 if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2)
181 irq->targets = 0;
182 else
183 irq->mpidr = 0;
184 }
185 return 0;
186}
187
188/**
189 * kvm_vgic_vcpu_init: initialize the vcpu data structures and
190 * enable the VCPU interface
191 * @vcpu: the VCPU which's VGIC should be initialized
192 */
193static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
194{
195 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
196 int i;
197
198 INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
199 spin_lock_init(&vgic_cpu->ap_list_lock);
200
201 /*
202 * Enable and configure all SGIs to be edge-triggered and
203 * configure all PPIs as level-triggered.
204 */
205 for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) {
206 struct vgic_irq *irq = &vgic_cpu->private_irqs[i];
207
208 INIT_LIST_HEAD(&irq->ap_list);
209 spin_lock_init(&irq->irq_lock);
210 irq->intid = i;
211 irq->vcpu = NULL;
212 irq->target_vcpu = vcpu;
213 irq->targets = 1U << vcpu->vcpu_id;
214 if (vgic_irq_is_sgi(i)) {
215 /* SGIs */
216 irq->enabled = 1;
217 irq->config = VGIC_CONFIG_EDGE;
218 } else {
219 /* PPIs */
220 irq->config = VGIC_CONFIG_LEVEL;
221 }
222 }
223 if (kvm_vgic_global_state.type == VGIC_V2)
224 vgic_v2_enable(vcpu);
225 else
226 vgic_v3_enable(vcpu);
227}
228
229/*
230 * vgic_init: allocates and initializes dist and vcpu data structures
231 * depending on two dimensioning parameters:
232 * - the number of spis
233 * - the number of vcpus
234 * The function is generally called when nr_spis has been explicitly set
235 * by the guest through the KVM DEVICE API. If not nr_spis is set to 256.
236 * vgic_initialized() returns true when this function has succeeded.
237 * Must be called with kvm->lock held!
238 */
239int vgic_init(struct kvm *kvm)
240{
241 struct vgic_dist *dist = &kvm->arch.vgic;
242 struct kvm_vcpu *vcpu;
243 int ret = 0, i;
244
245 if (vgic_initialized(kvm))
246 return 0;
247
248 /* freeze the number of spis */
249 if (!dist->nr_spis)
250 dist->nr_spis = VGIC_NR_IRQS_LEGACY - VGIC_NR_PRIVATE_IRQS;
251
252 ret = kvm_vgic_dist_init(kvm, dist->nr_spis);
253 if (ret)
254 goto out;
255
256 kvm_for_each_vcpu(i, vcpu, kvm)
257 kvm_vgic_vcpu_init(vcpu);
258
259 dist->initialized = true;
260out:
261 return ret;
262}
263
264static void kvm_vgic_dist_destroy(struct kvm *kvm)
265{
266 struct vgic_dist *dist = &kvm->arch.vgic;
267
268 mutex_lock(&kvm->lock);
269
270 dist->ready = false;
271 dist->initialized = false;
272
273 kfree(dist->spis);
274 kfree(dist->redist_iodevs);
275 dist->nr_spis = 0;
276
277 mutex_unlock(&kvm->lock);
278}
279
280void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
281{
282 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
283
284 INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
285}
286
287void kvm_vgic_destroy(struct kvm *kvm)
288{
289 struct kvm_vcpu *vcpu;
290 int i;
291
292 kvm_vgic_dist_destroy(kvm);
293
294 kvm_for_each_vcpu(i, vcpu, kvm)
295 kvm_vgic_vcpu_destroy(vcpu);
296}
297
298/**
299 * vgic_lazy_init: Lazy init is only allowed if the GIC exposed to the guest
300 * is a GICv2. A GICv3 must be explicitly initialized by the guest using the
301 * KVM_DEV_ARM_VGIC_GRP_CTRL KVM_DEVICE group.
302 * @kvm: kvm struct pointer
303 */
304int vgic_lazy_init(struct kvm *kvm)
305{
306 int ret = 0;
307
308 if (unlikely(!vgic_initialized(kvm))) {
309 /*
310 * We only provide the automatic initialization of the VGIC
311 * for the legacy case of a GICv2. Any other type must
312 * be explicitly initialized once setup with the respective
313 * KVM device call.
314 */
315 if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2)
316 return -EBUSY;
317
318 mutex_lock(&kvm->lock);
319 ret = vgic_init(kvm);
320 mutex_unlock(&kvm->lock);
321 }
322
323 return ret;
324}
325
326/* RESOURCE MAPPING */
327
328/**
329 * Map the MMIO regions depending on the VGIC model exposed to the guest
330 * called on the first VCPU run.
331 * Also map the virtual CPU interface into the VM.
332 * v2/v3 derivatives call vgic_init if not already done.
333 * vgic_ready() returns true if this function has succeeded.
334 * @kvm: kvm struct pointer
335 */
336int kvm_vgic_map_resources(struct kvm *kvm)
337{
338 struct vgic_dist *dist = &kvm->arch.vgic;
339 int ret = 0;
340
341 mutex_lock(&kvm->lock);
342 if (!irqchip_in_kernel(kvm))
343 goto out;
344
345 if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2)
346 ret = vgic_v2_map_resources(kvm);
347 else
348 ret = vgic_v3_map_resources(kvm);
349out:
350 mutex_unlock(&kvm->lock);
351 return ret;
352}
353
354/* GENERIC PROBE */
355
356static void vgic_init_maintenance_interrupt(void *info)
357{
358 enable_percpu_irq(kvm_vgic_global_state.maint_irq, 0);
359}
360
361static int vgic_cpu_notify(struct notifier_block *self,
362 unsigned long action, void *cpu)
363{
364 switch (action) {
365 case CPU_STARTING:
366 case CPU_STARTING_FROZEN:
367 vgic_init_maintenance_interrupt(NULL);
368 break;
369 case CPU_DYING:
370 case CPU_DYING_FROZEN:
371 disable_percpu_irq(kvm_vgic_global_state.maint_irq);
372 break;
373 }
374
375 return NOTIFY_OK;
376}
377
378static struct notifier_block vgic_cpu_nb = {
379 .notifier_call = vgic_cpu_notify,
380};
381
382static irqreturn_t vgic_maintenance_handler(int irq, void *data)
383{
384 /*
385 * We cannot rely on the vgic maintenance interrupt to be
386 * delivered synchronously. This means we can only use it to
387 * exit the VM, and we perform the handling of EOIed
388 * interrupts on the exit path (see vgic_process_maintenance).
389 */
390 return IRQ_HANDLED;
391}
392
393/**
394 * kvm_vgic_hyp_init: populates the kvm_vgic_global_state variable
395 * according to the host GIC model. Accordingly calls either
396 * vgic_v2/v3_probe which registers the KVM_DEVICE that can be
397 * instantiated by a guest later on .
398 */
399int kvm_vgic_hyp_init(void)
400{
401 const struct gic_kvm_info *gic_kvm_info;
402 int ret;
403
404 gic_kvm_info = gic_get_kvm_info();
405 if (!gic_kvm_info)
406 return -ENODEV;
407
408 if (!gic_kvm_info->maint_irq) {
409 kvm_err("No vgic maintenance irq\n");
410 return -ENXIO;
411 }
412
413 switch (gic_kvm_info->type) {
414 case GIC_V2:
415 ret = vgic_v2_probe(gic_kvm_info);
416 break;
417 case GIC_V3:
418 ret = vgic_v3_probe(gic_kvm_info);
419 break;
420 default:
421 ret = -ENODEV;
422 };
423
424 if (ret)
425 return ret;
426
427 kvm_vgic_global_state.maint_irq = gic_kvm_info->maint_irq;
428 ret = request_percpu_irq(kvm_vgic_global_state.maint_irq,
429 vgic_maintenance_handler,
430 "vgic", kvm_get_running_vcpus());
431 if (ret) {
432 kvm_err("Cannot register interrupt %d\n",
433 kvm_vgic_global_state.maint_irq);
434 return ret;
435 }
436
437 ret = __register_cpu_notifier(&vgic_cpu_nb);
438 if (ret) {
439 kvm_err("Cannot register vgic CPU notifier\n");
440 goto out_free_irq;
441 }
442
443 on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
444
445 kvm_info("vgic interrupt IRQ%d\n", kvm_vgic_global_state.maint_irq);
446 return 0;
447
448out_free_irq:
449 free_percpu_irq(kvm_vgic_global_state.maint_irq,
450 kvm_get_running_vcpus());
451 return ret;
452}
diff --git a/virt/kvm/arm/vgic/vgic-irqfd.c b/virt/kvm/arm/vgic/vgic-irqfd.c
new file mode 100644
index 000000000000..c675513270bb
--- /dev/null
+++ b/virt/kvm/arm/vgic/vgic-irqfd.c
@@ -0,0 +1,52 @@
1/*
2 * Copyright (C) 2015, 2016 ARM Ltd.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/kvm.h>
18#include <linux/kvm_host.h>
19#include <trace/events/kvm.h>
20
21int kvm_irq_map_gsi(struct kvm *kvm,
22 struct kvm_kernel_irq_routing_entry *entries,
23 int gsi)
24{
25 return 0;
26}
27
28int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned int irqchip,
29 unsigned int pin)
30{
31 return pin;
32}
33
34int kvm_set_irq(struct kvm *kvm, int irq_source_id,
35 u32 irq, int level, bool line_status)
36{
37 unsigned int spi = irq + VGIC_NR_PRIVATE_IRQS;
38
39 trace_kvm_set_irq(irq, level, irq_source_id);
40
41 BUG_ON(!vgic_initialized(kvm));
42
43 return kvm_vgic_inject_irq(kvm, 0, spi, level);
44}
45
46/* MSI not implemented yet */
47int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
48 struct kvm *kvm, int irq_source_id,
49 int level, bool line_status)
50{
51 return 0;
52}
diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c b/virt/kvm/arm/vgic/vgic-kvm-device.c
new file mode 100644
index 000000000000..0130c4b147b7
--- /dev/null
+++ b/virt/kvm/arm/vgic/vgic-kvm-device.c
@@ -0,0 +1,431 @@
1/*
2 * VGIC: KVM DEVICE API
3 *
4 * Copyright (C) 2015 ARM Ltd.
5 * Author: Marc Zyngier <marc.zyngier@arm.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 */
16#include <linux/kvm_host.h>
17#include <kvm/arm_vgic.h>
18#include <linux/uaccess.h>
19#include <asm/kvm_mmu.h>
20#include "vgic.h"
21
22/* common helpers */
23
24static int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
25 phys_addr_t addr, phys_addr_t alignment)
26{
27 if (addr & ~KVM_PHYS_MASK)
28 return -E2BIG;
29
30 if (!IS_ALIGNED(addr, alignment))
31 return -EINVAL;
32
33 if (!IS_VGIC_ADDR_UNDEF(*ioaddr))
34 return -EEXIST;
35
36 return 0;
37}
38
39/**
40 * kvm_vgic_addr - set or get vgic VM base addresses
41 * @kvm: pointer to the vm struct
42 * @type: the VGIC addr type, one of KVM_VGIC_V[23]_ADDR_TYPE_XXX
43 * @addr: pointer to address value
44 * @write: if true set the address in the VM address space, if false read the
45 * address
46 *
47 * Set or get the vgic base addresses for the distributor and the virtual CPU
48 * interface in the VM physical address space. These addresses are properties
49 * of the emulated core/SoC and therefore user space initially knows this
50 * information.
51 * Check them for sanity (alignment, double assignment). We can't check for
52 * overlapping regions in case of a virtual GICv3 here, since we don't know
53 * the number of VCPUs yet, so we defer this check to map_resources().
54 */
55int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
56{
57 int r = 0;
58 struct vgic_dist *vgic = &kvm->arch.vgic;
59 int type_needed;
60 phys_addr_t *addr_ptr, alignment;
61
62 mutex_lock(&kvm->lock);
63 switch (type) {
64 case KVM_VGIC_V2_ADDR_TYPE_DIST:
65 type_needed = KVM_DEV_TYPE_ARM_VGIC_V2;
66 addr_ptr = &vgic->vgic_dist_base;
67 alignment = SZ_4K;
68 break;
69 case KVM_VGIC_V2_ADDR_TYPE_CPU:
70 type_needed = KVM_DEV_TYPE_ARM_VGIC_V2;
71 addr_ptr = &vgic->vgic_cpu_base;
72 alignment = SZ_4K;
73 break;
74#ifdef CONFIG_KVM_ARM_VGIC_V3
75 case KVM_VGIC_V3_ADDR_TYPE_DIST:
76 type_needed = KVM_DEV_TYPE_ARM_VGIC_V3;
77 addr_ptr = &vgic->vgic_dist_base;
78 alignment = SZ_64K;
79 break;
80 case KVM_VGIC_V3_ADDR_TYPE_REDIST:
81 type_needed = KVM_DEV_TYPE_ARM_VGIC_V3;
82 addr_ptr = &vgic->vgic_redist_base;
83 alignment = SZ_64K;
84 break;
85#endif
86 default:
87 r = -ENODEV;
88 goto out;
89 }
90
91 if (vgic->vgic_model != type_needed) {
92 r = -ENODEV;
93 goto out;
94 }
95
96 if (write) {
97 r = vgic_check_ioaddr(kvm, addr_ptr, *addr, alignment);
98 if (!r)
99 *addr_ptr = *addr;
100 } else {
101 *addr = *addr_ptr;
102 }
103
104out:
105 mutex_unlock(&kvm->lock);
106 return r;
107}
108
109static int vgic_set_common_attr(struct kvm_device *dev,
110 struct kvm_device_attr *attr)
111{
112 int r;
113
114 switch (attr->group) {
115 case KVM_DEV_ARM_VGIC_GRP_ADDR: {
116 u64 __user *uaddr = (u64 __user *)(long)attr->addr;
117 u64 addr;
118 unsigned long type = (unsigned long)attr->attr;
119
120 if (copy_from_user(&addr, uaddr, sizeof(addr)))
121 return -EFAULT;
122
123 r = kvm_vgic_addr(dev->kvm, type, &addr, true);
124 return (r == -ENODEV) ? -ENXIO : r;
125 }
126 case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
127 u32 __user *uaddr = (u32 __user *)(long)attr->addr;
128 u32 val;
129 int ret = 0;
130
131 if (get_user(val, uaddr))
132 return -EFAULT;
133
134 /*
135 * We require:
136 * - at least 32 SPIs on top of the 16 SGIs and 16 PPIs
137 * - at most 1024 interrupts
138 * - a multiple of 32 interrupts
139 */
140 if (val < (VGIC_NR_PRIVATE_IRQS + 32) ||
141 val > VGIC_MAX_RESERVED ||
142 (val & 31))
143 return -EINVAL;
144
145 mutex_lock(&dev->kvm->lock);
146
147 if (vgic_ready(dev->kvm) || dev->kvm->arch.vgic.nr_spis)
148 ret = -EBUSY;
149 else
150 dev->kvm->arch.vgic.nr_spis =
151 val - VGIC_NR_PRIVATE_IRQS;
152
153 mutex_unlock(&dev->kvm->lock);
154
155 return ret;
156 }
157 case KVM_DEV_ARM_VGIC_GRP_CTRL: {
158 switch (attr->attr) {
159 case KVM_DEV_ARM_VGIC_CTRL_INIT:
160 mutex_lock(&dev->kvm->lock);
161 r = vgic_init(dev->kvm);
162 mutex_unlock(&dev->kvm->lock);
163 return r;
164 }
165 break;
166 }
167 }
168
169 return -ENXIO;
170}
171
172static int vgic_get_common_attr(struct kvm_device *dev,
173 struct kvm_device_attr *attr)
174{
175 int r = -ENXIO;
176
177 switch (attr->group) {
178 case KVM_DEV_ARM_VGIC_GRP_ADDR: {
179 u64 __user *uaddr = (u64 __user *)(long)attr->addr;
180 u64 addr;
181 unsigned long type = (unsigned long)attr->attr;
182
183 r = kvm_vgic_addr(dev->kvm, type, &addr, false);
184 if (r)
185 return (r == -ENODEV) ? -ENXIO : r;
186
187 if (copy_to_user(uaddr, &addr, sizeof(addr)))
188 return -EFAULT;
189 break;
190 }
191 case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
192 u32 __user *uaddr = (u32 __user *)(long)attr->addr;
193
194 r = put_user(dev->kvm->arch.vgic.nr_spis +
195 VGIC_NR_PRIVATE_IRQS, uaddr);
196 break;
197 }
198 }
199
200 return r;
201}
202
203static int vgic_create(struct kvm_device *dev, u32 type)
204{
205 return kvm_vgic_create(dev->kvm, type);
206}
207
208static void vgic_destroy(struct kvm_device *dev)
209{
210 kfree(dev);
211}
212
213void kvm_register_vgic_device(unsigned long type)
214{
215 switch (type) {
216 case KVM_DEV_TYPE_ARM_VGIC_V2:
217 kvm_register_device_ops(&kvm_arm_vgic_v2_ops,
218 KVM_DEV_TYPE_ARM_VGIC_V2);
219 break;
220#ifdef CONFIG_KVM_ARM_VGIC_V3
221 case KVM_DEV_TYPE_ARM_VGIC_V3:
222 kvm_register_device_ops(&kvm_arm_vgic_v3_ops,
223 KVM_DEV_TYPE_ARM_VGIC_V3);
224 break;
225#endif
226 }
227}
228
229/** vgic_attr_regs_access: allows user space to read/write VGIC registers
230 *
231 * @dev: kvm device handle
232 * @attr: kvm device attribute
233 * @reg: address the value is read or written
234 * @is_write: write flag
235 *
236 */
237static int vgic_attr_regs_access(struct kvm_device *dev,
238 struct kvm_device_attr *attr,
239 u32 *reg, bool is_write)
240{
241 gpa_t addr;
242 int cpuid, ret, c;
243 struct kvm_vcpu *vcpu, *tmp_vcpu;
244 int vcpu_lock_idx = -1;
245
246 cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >>
247 KVM_DEV_ARM_VGIC_CPUID_SHIFT;
248 vcpu = kvm_get_vcpu(dev->kvm, cpuid);
249 addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
250
251 mutex_lock(&dev->kvm->lock);
252
253 ret = vgic_init(dev->kvm);
254 if (ret)
255 goto out;
256
257 if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) {
258 ret = -EINVAL;
259 goto out;
260 }
261
262 /*
263 * Any time a vcpu is run, vcpu_load is called which tries to grab the
264 * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure
265 * that no other VCPUs are run and fiddle with the vgic state while we
266 * access it.
267 */
268 ret = -EBUSY;
269 kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) {
270 if (!mutex_trylock(&tmp_vcpu->mutex))
271 goto out;
272 vcpu_lock_idx = c;
273 }
274
275 switch (attr->group) {
276 case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
277 ret = vgic_v2_cpuif_uaccess(vcpu, is_write, addr, reg);
278 break;
279 case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
280 ret = vgic_v2_dist_uaccess(vcpu, is_write, addr, reg);
281 break;
282 default:
283 ret = -EINVAL;
284 break;
285 }
286
287out:
288 for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
289 tmp_vcpu = kvm_get_vcpu(dev->kvm, vcpu_lock_idx);
290 mutex_unlock(&tmp_vcpu->mutex);
291 }
292
293 mutex_unlock(&dev->kvm->lock);
294 return ret;
295}
296
297/* V2 ops */
298
299static int vgic_v2_set_attr(struct kvm_device *dev,
300 struct kvm_device_attr *attr)
301{
302 int ret;
303
304 ret = vgic_set_common_attr(dev, attr);
305 if (ret != -ENXIO)
306 return ret;
307
308 switch (attr->group) {
309 case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
310 case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
311 u32 __user *uaddr = (u32 __user *)(long)attr->addr;
312 u32 reg;
313
314 if (get_user(reg, uaddr))
315 return -EFAULT;
316
317 return vgic_attr_regs_access(dev, attr, &reg, true);
318 }
319 }
320
321 return -ENXIO;
322}
323
324static int vgic_v2_get_attr(struct kvm_device *dev,
325 struct kvm_device_attr *attr)
326{
327 int ret;
328
329 ret = vgic_get_common_attr(dev, attr);
330 if (ret != -ENXIO)
331 return ret;
332
333 switch (attr->group) {
334 case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
335 case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
336 u32 __user *uaddr = (u32 __user *)(long)attr->addr;
337 u32 reg = 0;
338
339 ret = vgic_attr_regs_access(dev, attr, &reg, false);
340 if (ret)
341 return ret;
342 return put_user(reg, uaddr);
343 }
344 }
345
346 return -ENXIO;
347}
348
349static int vgic_v2_has_attr(struct kvm_device *dev,
350 struct kvm_device_attr *attr)
351{
352 switch (attr->group) {
353 case KVM_DEV_ARM_VGIC_GRP_ADDR:
354 switch (attr->attr) {
355 case KVM_VGIC_V2_ADDR_TYPE_DIST:
356 case KVM_VGIC_V2_ADDR_TYPE_CPU:
357 return 0;
358 }
359 break;
360 case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
361 case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
362 return vgic_v2_has_attr_regs(dev, attr);
363 case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
364 return 0;
365 case KVM_DEV_ARM_VGIC_GRP_CTRL:
366 switch (attr->attr) {
367 case KVM_DEV_ARM_VGIC_CTRL_INIT:
368 return 0;
369 }
370 }
371 return -ENXIO;
372}
373
374struct kvm_device_ops kvm_arm_vgic_v2_ops = {
375 .name = "kvm-arm-vgic-v2",
376 .create = vgic_create,
377 .destroy = vgic_destroy,
378 .set_attr = vgic_v2_set_attr,
379 .get_attr = vgic_v2_get_attr,
380 .has_attr = vgic_v2_has_attr,
381};
382
383/* V3 ops */
384
385#ifdef CONFIG_KVM_ARM_VGIC_V3
386
387static int vgic_v3_set_attr(struct kvm_device *dev,
388 struct kvm_device_attr *attr)
389{
390 return vgic_set_common_attr(dev, attr);
391}
392
393static int vgic_v3_get_attr(struct kvm_device *dev,
394 struct kvm_device_attr *attr)
395{
396 return vgic_get_common_attr(dev, attr);
397}
398
399static int vgic_v3_has_attr(struct kvm_device *dev,
400 struct kvm_device_attr *attr)
401{
402 switch (attr->group) {
403 case KVM_DEV_ARM_VGIC_GRP_ADDR:
404 switch (attr->attr) {
405 case KVM_VGIC_V3_ADDR_TYPE_DIST:
406 case KVM_VGIC_V3_ADDR_TYPE_REDIST:
407 return 0;
408 }
409 break;
410 case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
411 return 0;
412 case KVM_DEV_ARM_VGIC_GRP_CTRL:
413 switch (attr->attr) {
414 case KVM_DEV_ARM_VGIC_CTRL_INIT:
415 return 0;
416 }
417 }
418 return -ENXIO;
419}
420
421struct kvm_device_ops kvm_arm_vgic_v3_ops = {
422 .name = "kvm-arm-vgic-v3",
423 .create = vgic_create,
424 .destroy = vgic_destroy,
425 .set_attr = vgic_v3_set_attr,
426 .get_attr = vgic_v3_get_attr,
427 .has_attr = vgic_v3_has_attr,
428};
429
430#endif /* CONFIG_KVM_ARM_VGIC_V3 */
431
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c
new file mode 100644
index 000000000000..a21393637e4b
--- /dev/null
+++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c
@@ -0,0 +1,446 @@
1/*
2 * VGICv2 MMIO handling functions
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 */
13
14#include <linux/irqchip/arm-gic.h>
15#include <linux/kvm.h>
16#include <linux/kvm_host.h>
17#include <kvm/iodev.h>
18#include <kvm/arm_vgic.h>
19
20#include "vgic.h"
21#include "vgic-mmio.h"
22
23static unsigned long vgic_mmio_read_v2_misc(struct kvm_vcpu *vcpu,
24 gpa_t addr, unsigned int len)
25{
26 u32 value;
27
28 switch (addr & 0x0c) {
29 case GIC_DIST_CTRL:
30 value = vcpu->kvm->arch.vgic.enabled ? GICD_ENABLE : 0;
31 break;
32 case GIC_DIST_CTR:
33 value = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
34 value = (value >> 5) - 1;
35 value |= (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
36 break;
37 case GIC_DIST_IIDR:
38 value = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
39 break;
40 default:
41 return 0;
42 }
43
44 return value;
45}
46
47static void vgic_mmio_write_v2_misc(struct kvm_vcpu *vcpu,
48 gpa_t addr, unsigned int len,
49 unsigned long val)
50{
51 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
52 bool was_enabled = dist->enabled;
53
54 switch (addr & 0x0c) {
55 case GIC_DIST_CTRL:
56 dist->enabled = val & GICD_ENABLE;
57 if (!was_enabled && dist->enabled)
58 vgic_kick_vcpus(vcpu->kvm);
59 break;
60 case GIC_DIST_CTR:
61 case GIC_DIST_IIDR:
62 /* Nothing to do */
63 return;
64 }
65}
66
67static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu,
68 gpa_t addr, unsigned int len,
69 unsigned long val)
70{
71 int nr_vcpus = atomic_read(&source_vcpu->kvm->online_vcpus);
72 int intid = val & 0xf;
73 int targets = (val >> 16) & 0xff;
74 int mode = (val >> 24) & 0x03;
75 int c;
76 struct kvm_vcpu *vcpu;
77
78 switch (mode) {
79 case 0x0: /* as specified by targets */
80 break;
81 case 0x1:
82 targets = (1U << nr_vcpus) - 1; /* all, ... */
83 targets &= ~(1U << source_vcpu->vcpu_id); /* but self */
84 break;
85 case 0x2: /* this very vCPU only */
86 targets = (1U << source_vcpu->vcpu_id);
87 break;
88 case 0x3: /* reserved */
89 return;
90 }
91
92 kvm_for_each_vcpu(c, vcpu, source_vcpu->kvm) {
93 struct vgic_irq *irq;
94
95 if (!(targets & (1U << c)))
96 continue;
97
98 irq = vgic_get_irq(source_vcpu->kvm, vcpu, intid);
99
100 spin_lock(&irq->irq_lock);
101 irq->pending = true;
102 irq->source |= 1U << source_vcpu->vcpu_id;
103
104 vgic_queue_irq_unlock(source_vcpu->kvm, irq);
105 }
106}
107
108static unsigned long vgic_mmio_read_target(struct kvm_vcpu *vcpu,
109 gpa_t addr, unsigned int len)
110{
111 u32 intid = VGIC_ADDR_TO_INTID(addr, 8);
112 int i;
113 u64 val = 0;
114
115 for (i = 0; i < len; i++) {
116 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
117
118 val |= (u64)irq->targets << (i * 8);
119 }
120
121 return val;
122}
123
124static void vgic_mmio_write_target(struct kvm_vcpu *vcpu,
125 gpa_t addr, unsigned int len,
126 unsigned long val)
127{
128 u32 intid = VGIC_ADDR_TO_INTID(addr, 8);
129 int i;
130
131 /* GICD_ITARGETSR[0-7] are read-only */
132 if (intid < VGIC_NR_PRIVATE_IRQS)
133 return;
134
135 for (i = 0; i < len; i++) {
136 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid + i);
137 int target;
138
139 spin_lock(&irq->irq_lock);
140
141 irq->targets = (val >> (i * 8)) & 0xff;
142 target = irq->targets ? __ffs(irq->targets) : 0;
143 irq->target_vcpu = kvm_get_vcpu(vcpu->kvm, target);
144
145 spin_unlock(&irq->irq_lock);
146 }
147}
148
149static unsigned long vgic_mmio_read_sgipend(struct kvm_vcpu *vcpu,
150 gpa_t addr, unsigned int len)
151{
152 u32 intid = addr & 0x0f;
153 int i;
154 u64 val = 0;
155
156 for (i = 0; i < len; i++) {
157 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
158
159 val |= (u64)irq->source << (i * 8);
160 }
161 return val;
162}
163
164static void vgic_mmio_write_sgipendc(struct kvm_vcpu *vcpu,
165 gpa_t addr, unsigned int len,
166 unsigned long val)
167{
168 u32 intid = addr & 0x0f;
169 int i;
170
171 for (i = 0; i < len; i++) {
172 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
173
174 spin_lock(&irq->irq_lock);
175
176 irq->source &= ~((val >> (i * 8)) & 0xff);
177 if (!irq->source)
178 irq->pending = false;
179
180 spin_unlock(&irq->irq_lock);
181 }
182}
183
184static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu,
185 gpa_t addr, unsigned int len,
186 unsigned long val)
187{
188 u32 intid = addr & 0x0f;
189 int i;
190
191 for (i = 0; i < len; i++) {
192 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
193
194 spin_lock(&irq->irq_lock);
195
196 irq->source |= (val >> (i * 8)) & 0xff;
197
198 if (irq->source) {
199 irq->pending = true;
200 vgic_queue_irq_unlock(vcpu->kvm, irq);
201 } else {
202 spin_unlock(&irq->irq_lock);
203 }
204 }
205}
206
207static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
208{
209 if (kvm_vgic_global_state.type == VGIC_V2)
210 vgic_v2_set_vmcr(vcpu, vmcr);
211 else
212 vgic_v3_set_vmcr(vcpu, vmcr);
213}
214
215static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
216{
217 if (kvm_vgic_global_state.type == VGIC_V2)
218 vgic_v2_get_vmcr(vcpu, vmcr);
219 else
220 vgic_v3_get_vmcr(vcpu, vmcr);
221}
222
223#define GICC_ARCH_VERSION_V2 0x2
224
225/* These are for userland accesses only, there is no guest-facing emulation. */
226static unsigned long vgic_mmio_read_vcpuif(struct kvm_vcpu *vcpu,
227 gpa_t addr, unsigned int len)
228{
229 struct vgic_vmcr vmcr;
230 u32 val;
231
232 vgic_get_vmcr(vcpu, &vmcr);
233
234 switch (addr & 0xff) {
235 case GIC_CPU_CTRL:
236 val = vmcr.ctlr;
237 break;
238 case GIC_CPU_PRIMASK:
239 val = vmcr.pmr;
240 break;
241 case GIC_CPU_BINPOINT:
242 val = vmcr.bpr;
243 break;
244 case GIC_CPU_ALIAS_BINPOINT:
245 val = vmcr.abpr;
246 break;
247 case GIC_CPU_IDENT:
248 val = ((PRODUCT_ID_KVM << 20) |
249 (GICC_ARCH_VERSION_V2 << 16) |
250 IMPLEMENTER_ARM);
251 break;
252 default:
253 return 0;
254 }
255
256 return val;
257}
258
259static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu,
260 gpa_t addr, unsigned int len,
261 unsigned long val)
262{
263 struct vgic_vmcr vmcr;
264
265 vgic_get_vmcr(vcpu, &vmcr);
266
267 switch (addr & 0xff) {
268 case GIC_CPU_CTRL:
269 vmcr.ctlr = val;
270 break;
271 case GIC_CPU_PRIMASK:
272 vmcr.pmr = val;
273 break;
274 case GIC_CPU_BINPOINT:
275 vmcr.bpr = val;
276 break;
277 case GIC_CPU_ALIAS_BINPOINT:
278 vmcr.abpr = val;
279 break;
280 }
281
282 vgic_set_vmcr(vcpu, &vmcr);
283}
284
285static const struct vgic_register_region vgic_v2_dist_registers[] = {
286 REGISTER_DESC_WITH_LENGTH(GIC_DIST_CTRL,
287 vgic_mmio_read_v2_misc, vgic_mmio_write_v2_misc, 12,
288 VGIC_ACCESS_32bit),
289 REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_IGROUP,
290 vgic_mmio_read_rao, vgic_mmio_write_wi, 1,
291 VGIC_ACCESS_32bit),
292 REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_SET,
293 vgic_mmio_read_enable, vgic_mmio_write_senable, 1,
294 VGIC_ACCESS_32bit),
295 REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_CLEAR,
296 vgic_mmio_read_enable, vgic_mmio_write_cenable, 1,
297 VGIC_ACCESS_32bit),
298 REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_SET,
299 vgic_mmio_read_pending, vgic_mmio_write_spending, 1,
300 VGIC_ACCESS_32bit),
301 REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_CLEAR,
302 vgic_mmio_read_pending, vgic_mmio_write_cpending, 1,
303 VGIC_ACCESS_32bit),
304 REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_SET,
305 vgic_mmio_read_active, vgic_mmio_write_sactive, 1,
306 VGIC_ACCESS_32bit),
307 REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_CLEAR,
308 vgic_mmio_read_active, vgic_mmio_write_cactive, 1,
309 VGIC_ACCESS_32bit),
310 REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PRI,
311 vgic_mmio_read_priority, vgic_mmio_write_priority, 8,
312 VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
313 REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_TARGET,
314 vgic_mmio_read_target, vgic_mmio_write_target, 8,
315 VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
316 REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_CONFIG,
317 vgic_mmio_read_config, vgic_mmio_write_config, 2,
318 VGIC_ACCESS_32bit),
319 REGISTER_DESC_WITH_LENGTH(GIC_DIST_SOFTINT,
320 vgic_mmio_read_raz, vgic_mmio_write_sgir, 4,
321 VGIC_ACCESS_32bit),
322 REGISTER_DESC_WITH_LENGTH(GIC_DIST_SGI_PENDING_CLEAR,
323 vgic_mmio_read_sgipend, vgic_mmio_write_sgipendc, 16,
324 VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
325 REGISTER_DESC_WITH_LENGTH(GIC_DIST_SGI_PENDING_SET,
326 vgic_mmio_read_sgipend, vgic_mmio_write_sgipends, 16,
327 VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
328};
329
330static const struct vgic_register_region vgic_v2_cpu_registers[] = {
331 REGISTER_DESC_WITH_LENGTH(GIC_CPU_CTRL,
332 vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4,
333 VGIC_ACCESS_32bit),
334 REGISTER_DESC_WITH_LENGTH(GIC_CPU_PRIMASK,
335 vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4,
336 VGIC_ACCESS_32bit),
337 REGISTER_DESC_WITH_LENGTH(GIC_CPU_BINPOINT,
338 vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4,
339 VGIC_ACCESS_32bit),
340 REGISTER_DESC_WITH_LENGTH(GIC_CPU_ALIAS_BINPOINT,
341 vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4,
342 VGIC_ACCESS_32bit),
343 REGISTER_DESC_WITH_LENGTH(GIC_CPU_ACTIVEPRIO,
344 vgic_mmio_read_raz, vgic_mmio_write_wi, 16,
345 VGIC_ACCESS_32bit),
346 REGISTER_DESC_WITH_LENGTH(GIC_CPU_IDENT,
347 vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4,
348 VGIC_ACCESS_32bit),
349};
350
351unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev)
352{
353 dev->regions = vgic_v2_dist_registers;
354 dev->nr_regions = ARRAY_SIZE(vgic_v2_dist_registers);
355
356 kvm_iodevice_init(&dev->dev, &kvm_io_gic_ops);
357
358 return SZ_4K;
359}
360
361int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr)
362{
363 int nr_irqs = dev->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
364 const struct vgic_register_region *regions;
365 gpa_t addr;
366 int nr_regions, i, len;
367
368 addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
369
370 switch (attr->group) {
371 case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
372 regions = vgic_v2_dist_registers;
373 nr_regions = ARRAY_SIZE(vgic_v2_dist_registers);
374 break;
375 case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
376 regions = vgic_v2_cpu_registers;
377 nr_regions = ARRAY_SIZE(vgic_v2_cpu_registers);
378 break;
379 default:
380 return -ENXIO;
381 }
382
383 /* We only support aligned 32-bit accesses. */
384 if (addr & 3)
385 return -ENXIO;
386
387 for (i = 0; i < nr_regions; i++) {
388 if (regions[i].bits_per_irq)
389 len = (regions[i].bits_per_irq * nr_irqs) / 8;
390 else
391 len = regions[i].len;
392
393 if (regions[i].reg_offset <= addr &&
394 regions[i].reg_offset + len > addr)
395 return 0;
396 }
397
398 return -ENXIO;
399}
400
401/*
402 * When userland tries to access the VGIC register handlers, we need to
403 * create a usable struct vgic_io_device to be passed to the handlers and we
404 * have to set up a buffer similar to what would have happened if a guest MMIO
405 * access occurred, including doing endian conversions on BE systems.
406 */
407static int vgic_uaccess(struct kvm_vcpu *vcpu, struct vgic_io_device *dev,
408 bool is_write, int offset, u32 *val)
409{
410 unsigned int len = 4;
411 u8 buf[4];
412 int ret;
413
414 if (is_write) {
415 vgic_data_host_to_mmio_bus(buf, len, *val);
416 ret = kvm_io_gic_ops.write(vcpu, &dev->dev, offset, len, buf);
417 } else {
418 ret = kvm_io_gic_ops.read(vcpu, &dev->dev, offset, len, buf);
419 if (!ret)
420 *val = vgic_data_mmio_bus_to_host(buf, len);
421 }
422
423 return ret;
424}
425
426int vgic_v2_cpuif_uaccess(struct kvm_vcpu *vcpu, bool is_write,
427 int offset, u32 *val)
428{
429 struct vgic_io_device dev = {
430 .regions = vgic_v2_cpu_registers,
431 .nr_regions = ARRAY_SIZE(vgic_v2_cpu_registers),
432 };
433
434 return vgic_uaccess(vcpu, &dev, is_write, offset, val);
435}
436
437int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
438 int offset, u32 *val)
439{
440 struct vgic_io_device dev = {
441 .regions = vgic_v2_dist_registers,
442 .nr_regions = ARRAY_SIZE(vgic_v2_dist_registers),
443 };
444
445 return vgic_uaccess(vcpu, &dev, is_write, offset, val);
446}
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
new file mode 100644
index 000000000000..a0c515a412a7
--- /dev/null
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -0,0 +1,455 @@
1/*
2 * VGICv3 MMIO handling functions
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 */
13
14#include <linux/irqchip/arm-gic-v3.h>
15#include <linux/kvm.h>
16#include <linux/kvm_host.h>
17#include <kvm/iodev.h>
18#include <kvm/arm_vgic.h>
19
20#include <asm/kvm_emulate.h>
21
22#include "vgic.h"
23#include "vgic-mmio.h"
24
25/* extract @num bytes at @offset bytes offset in data */
26static unsigned long extract_bytes(unsigned long data, unsigned int offset,
27 unsigned int num)
28{
29 return (data >> (offset * 8)) & GENMASK_ULL(num * 8 - 1, 0);
30}
31
32static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu,
33 gpa_t addr, unsigned int len)
34{
35 u32 value = 0;
36
37 switch (addr & 0x0c) {
38 case GICD_CTLR:
39 if (vcpu->kvm->arch.vgic.enabled)
40 value |= GICD_CTLR_ENABLE_SS_G1;
41 value |= GICD_CTLR_ARE_NS | GICD_CTLR_DS;
42 break;
43 case GICD_TYPER:
44 value = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
45 value = (value >> 5) - 1;
46 value |= (INTERRUPT_ID_BITS_SPIS - 1) << 19;
47 break;
48 case GICD_IIDR:
49 value = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
50 break;
51 default:
52 return 0;
53 }
54
55 return value;
56}
57
58static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu,
59 gpa_t addr, unsigned int len,
60 unsigned long val)
61{
62 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
63 bool was_enabled = dist->enabled;
64
65 switch (addr & 0x0c) {
66 case GICD_CTLR:
67 dist->enabled = val & GICD_CTLR_ENABLE_SS_G1;
68
69 if (!was_enabled && dist->enabled)
70 vgic_kick_vcpus(vcpu->kvm);
71 break;
72 case GICD_TYPER:
73 case GICD_IIDR:
74 return;
75 }
76}
77
78static unsigned long vgic_mmio_read_irouter(struct kvm_vcpu *vcpu,
79 gpa_t addr, unsigned int len)
80{
81 int intid = VGIC_ADDR_TO_INTID(addr, 64);
82 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid);
83
84 if (!irq)
85 return 0;
86
87 /* The upper word is RAZ for us. */
88 if (addr & 4)
89 return 0;
90
91 return extract_bytes(READ_ONCE(irq->mpidr), addr & 7, len);
92}
93
94static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu,
95 gpa_t addr, unsigned int len,
96 unsigned long val)
97{
98 int intid = VGIC_ADDR_TO_INTID(addr, 64);
99 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid);
100
101 if (!irq)
102 return;
103
104 /* The upper word is WI for us since we don't implement Aff3. */
105 if (addr & 4)
106 return;
107
108 spin_lock(&irq->irq_lock);
109
110 /* We only care about and preserve Aff0, Aff1 and Aff2. */
111 irq->mpidr = val & GENMASK(23, 0);
112 irq->target_vcpu = kvm_mpidr_to_vcpu(vcpu->kvm, irq->mpidr);
113
114 spin_unlock(&irq->irq_lock);
115}
116
117static unsigned long vgic_mmio_read_v3r_typer(struct kvm_vcpu *vcpu,
118 gpa_t addr, unsigned int len)
119{
120 unsigned long mpidr = kvm_vcpu_get_mpidr_aff(vcpu);
121 int target_vcpu_id = vcpu->vcpu_id;
122 u64 value;
123
124 value = (mpidr & GENMASK(23, 0)) << 32;
125 value |= ((target_vcpu_id & 0xffff) << 8);
126 if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1)
127 value |= GICR_TYPER_LAST;
128
129 return extract_bytes(value, addr & 7, len);
130}
131
132static unsigned long vgic_mmio_read_v3r_iidr(struct kvm_vcpu *vcpu,
133 gpa_t addr, unsigned int len)
134{
135 return (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
136}
137
138static unsigned long vgic_mmio_read_v3_idregs(struct kvm_vcpu *vcpu,
139 gpa_t addr, unsigned int len)
140{
141 switch (addr & 0xffff) {
142 case GICD_PIDR2:
143 /* report a GICv3 compliant implementation */
144 return 0x3b;
145 }
146
147 return 0;
148}
149
150/*
151 * The GICv3 per-IRQ registers are split to control PPIs and SGIs in the
152 * redistributors, while SPIs are covered by registers in the distributor
153 * block. Trying to set private IRQs in this block gets ignored.
154 * We take some special care here to fix the calculation of the register
155 * offset.
156 */
157#define REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(off, rd, wr, bpi, acc) \
158 { \
159 .reg_offset = off, \
160 .bits_per_irq = bpi, \
161 .len = (bpi * VGIC_NR_PRIVATE_IRQS) / 8, \
162 .access_flags = acc, \
163 .read = vgic_mmio_read_raz, \
164 .write = vgic_mmio_write_wi, \
165 }, { \
166 .reg_offset = off + (bpi * VGIC_NR_PRIVATE_IRQS) / 8, \
167 .bits_per_irq = bpi, \
168 .len = (bpi * (1024 - VGIC_NR_PRIVATE_IRQS)) / 8, \
169 .access_flags = acc, \
170 .read = rd, \
171 .write = wr, \
172 }
173
174static const struct vgic_register_region vgic_v3_dist_registers[] = {
175 REGISTER_DESC_WITH_LENGTH(GICD_CTLR,
176 vgic_mmio_read_v3_misc, vgic_mmio_write_v3_misc, 16,
177 VGIC_ACCESS_32bit),
178 REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGROUPR,
179 vgic_mmio_read_rao, vgic_mmio_write_wi, 1,
180 VGIC_ACCESS_32bit),
181 REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISENABLER,
182 vgic_mmio_read_enable, vgic_mmio_write_senable, 1,
183 VGIC_ACCESS_32bit),
184 REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICENABLER,
185 vgic_mmio_read_enable, vgic_mmio_write_cenable, 1,
186 VGIC_ACCESS_32bit),
187 REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISPENDR,
188 vgic_mmio_read_pending, vgic_mmio_write_spending, 1,
189 VGIC_ACCESS_32bit),
190 REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICPENDR,
191 vgic_mmio_read_pending, vgic_mmio_write_cpending, 1,
192 VGIC_ACCESS_32bit),
193 REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISACTIVER,
194 vgic_mmio_read_active, vgic_mmio_write_sactive, 1,
195 VGIC_ACCESS_32bit),
196 REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICACTIVER,
197 vgic_mmio_read_active, vgic_mmio_write_cactive, 1,
198 VGIC_ACCESS_32bit),
199 REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IPRIORITYR,
200 vgic_mmio_read_priority, vgic_mmio_write_priority, 8,
201 VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
202 REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ITARGETSR,
203 vgic_mmio_read_raz, vgic_mmio_write_wi, 8,
204 VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
205 REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICFGR,
206 vgic_mmio_read_config, vgic_mmio_write_config, 2,
207 VGIC_ACCESS_32bit),
208 REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGRPMODR,
209 vgic_mmio_read_raz, vgic_mmio_write_wi, 1,
210 VGIC_ACCESS_32bit),
211 REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IROUTER,
212 vgic_mmio_read_irouter, vgic_mmio_write_irouter, 64,
213 VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
214 REGISTER_DESC_WITH_LENGTH(GICD_IDREGS,
215 vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48,
216 VGIC_ACCESS_32bit),
217};
218
219static const struct vgic_register_region vgic_v3_rdbase_registers[] = {
220 REGISTER_DESC_WITH_LENGTH(GICR_CTLR,
221 vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
222 VGIC_ACCESS_32bit),
223 REGISTER_DESC_WITH_LENGTH(GICR_IIDR,
224 vgic_mmio_read_v3r_iidr, vgic_mmio_write_wi, 4,
225 VGIC_ACCESS_32bit),
226 REGISTER_DESC_WITH_LENGTH(GICR_TYPER,
227 vgic_mmio_read_v3r_typer, vgic_mmio_write_wi, 8,
228 VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
229 REGISTER_DESC_WITH_LENGTH(GICR_PROPBASER,
230 vgic_mmio_read_raz, vgic_mmio_write_wi, 8,
231 VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
232 REGISTER_DESC_WITH_LENGTH(GICR_PENDBASER,
233 vgic_mmio_read_raz, vgic_mmio_write_wi, 8,
234 VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
235 REGISTER_DESC_WITH_LENGTH(GICR_IDREGS,
236 vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48,
237 VGIC_ACCESS_32bit),
238};
239
240static const struct vgic_register_region vgic_v3_sgibase_registers[] = {
241 REGISTER_DESC_WITH_LENGTH(GICR_IGROUPR0,
242 vgic_mmio_read_rao, vgic_mmio_write_wi, 4,
243 VGIC_ACCESS_32bit),
244 REGISTER_DESC_WITH_LENGTH(GICR_ISENABLER0,
245 vgic_mmio_read_enable, vgic_mmio_write_senable, 4,
246 VGIC_ACCESS_32bit),
247 REGISTER_DESC_WITH_LENGTH(GICR_ICENABLER0,
248 vgic_mmio_read_enable, vgic_mmio_write_cenable, 4,
249 VGIC_ACCESS_32bit),
250 REGISTER_DESC_WITH_LENGTH(GICR_ISPENDR0,
251 vgic_mmio_read_pending, vgic_mmio_write_spending, 4,
252 VGIC_ACCESS_32bit),
253 REGISTER_DESC_WITH_LENGTH(GICR_ICPENDR0,
254 vgic_mmio_read_pending, vgic_mmio_write_cpending, 4,
255 VGIC_ACCESS_32bit),
256 REGISTER_DESC_WITH_LENGTH(GICR_ISACTIVER0,
257 vgic_mmio_read_active, vgic_mmio_write_sactive, 4,
258 VGIC_ACCESS_32bit),
259 REGISTER_DESC_WITH_LENGTH(GICR_ICACTIVER0,
260 vgic_mmio_read_active, vgic_mmio_write_cactive, 4,
261 VGIC_ACCESS_32bit),
262 REGISTER_DESC_WITH_LENGTH(GICR_IPRIORITYR0,
263 vgic_mmio_read_priority, vgic_mmio_write_priority, 32,
264 VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
265 REGISTER_DESC_WITH_LENGTH(GICR_ICFGR0,
266 vgic_mmio_read_config, vgic_mmio_write_config, 8,
267 VGIC_ACCESS_32bit),
268 REGISTER_DESC_WITH_LENGTH(GICR_IGRPMODR0,
269 vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
270 VGIC_ACCESS_32bit),
271 REGISTER_DESC_WITH_LENGTH(GICR_NSACR,
272 vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
273 VGIC_ACCESS_32bit),
274};
275
276unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev)
277{
278 dev->regions = vgic_v3_dist_registers;
279 dev->nr_regions = ARRAY_SIZE(vgic_v3_dist_registers);
280
281 kvm_iodevice_init(&dev->dev, &kvm_io_gic_ops);
282
283 return SZ_64K;
284}
285
286int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t redist_base_address)
287{
288 int nr_vcpus = atomic_read(&kvm->online_vcpus);
289 struct kvm_vcpu *vcpu;
290 struct vgic_io_device *devices;
291 int c, ret = 0;
292
293 devices = kmalloc(sizeof(struct vgic_io_device) * nr_vcpus * 2,
294 GFP_KERNEL);
295 if (!devices)
296 return -ENOMEM;
297
298 kvm_for_each_vcpu(c, vcpu, kvm) {
299 gpa_t rd_base = redist_base_address + c * SZ_64K * 2;
300 gpa_t sgi_base = rd_base + SZ_64K;
301 struct vgic_io_device *rd_dev = &devices[c * 2];
302 struct vgic_io_device *sgi_dev = &devices[c * 2 + 1];
303
304 kvm_iodevice_init(&rd_dev->dev, &kvm_io_gic_ops);
305 rd_dev->base_addr = rd_base;
306 rd_dev->regions = vgic_v3_rdbase_registers;
307 rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers);
308 rd_dev->redist_vcpu = vcpu;
309
310 mutex_lock(&kvm->slots_lock);
311 ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, rd_base,
312 SZ_64K, &rd_dev->dev);
313 mutex_unlock(&kvm->slots_lock);
314
315 if (ret)
316 break;
317
318 kvm_iodevice_init(&sgi_dev->dev, &kvm_io_gic_ops);
319 sgi_dev->base_addr = sgi_base;
320 sgi_dev->regions = vgic_v3_sgibase_registers;
321 sgi_dev->nr_regions = ARRAY_SIZE(vgic_v3_sgibase_registers);
322 sgi_dev->redist_vcpu = vcpu;
323
324 mutex_lock(&kvm->slots_lock);
325 ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, sgi_base,
326 SZ_64K, &sgi_dev->dev);
327 mutex_unlock(&kvm->slots_lock);
328 if (ret) {
329 kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
330 &rd_dev->dev);
331 break;
332 }
333 }
334
335 if (ret) {
336 /* The current c failed, so we start with the previous one. */
337 for (c--; c >= 0; c--) {
338 kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
339 &devices[c * 2].dev);
340 kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
341 &devices[c * 2 + 1].dev);
342 }
343 kfree(devices);
344 } else {
345 kvm->arch.vgic.redist_iodevs = devices;
346 }
347
348 return ret;
349}
350
351/*
352 * Compare a given affinity (level 1-3 and a level 0 mask, from the SGI
353 * generation register ICC_SGI1R_EL1) with a given VCPU.
354 * If the VCPU's MPIDR matches, return the level0 affinity, otherwise
355 * return -1.
356 */
357static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu)
358{
359 unsigned long affinity;
360 int level0;
361
362 /*
363 * Split the current VCPU's MPIDR into affinity level 0 and the
364 * rest as this is what we have to compare against.
365 */
366 affinity = kvm_vcpu_get_mpidr_aff(vcpu);
367 level0 = MPIDR_AFFINITY_LEVEL(affinity, 0);
368 affinity &= ~MPIDR_LEVEL_MASK;
369
370 /* bail out if the upper three levels don't match */
371 if (sgi_aff != affinity)
372 return -1;
373
374 /* Is this VCPU's bit set in the mask ? */
375 if (!(sgi_cpu_mask & BIT(level0)))
376 return -1;
377
378 return level0;
379}
380
381/*
382 * The ICC_SGI* registers encode the affinity differently from the MPIDR,
383 * so provide a wrapper to use the existing defines to isolate a certain
384 * affinity level.
385 */
386#define SGI_AFFINITY_LEVEL(reg, level) \
387 ((((reg) & ICC_SGI1R_AFFINITY_## level ##_MASK) \
388 >> ICC_SGI1R_AFFINITY_## level ##_SHIFT) << MPIDR_LEVEL_SHIFT(level))
389
390/**
391 * vgic_v3_dispatch_sgi - handle SGI requests from VCPUs
392 * @vcpu: The VCPU requesting a SGI
393 * @reg: The value written into the ICC_SGI1R_EL1 register by that VCPU
394 *
395 * With GICv3 (and ARE=1) CPUs trigger SGIs by writing to a system register.
396 * This will trap in sys_regs.c and call this function.
397 * This ICC_SGI1R_EL1 register contains the upper three affinity levels of the
398 * target processors as well as a bitmask of 16 Aff0 CPUs.
399 * If the interrupt routing mode bit is not set, we iterate over all VCPUs to
400 * check for matching ones. If this bit is set, we signal all, but not the
401 * calling VCPU.
402 */
403void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg)
404{
405 struct kvm *kvm = vcpu->kvm;
406 struct kvm_vcpu *c_vcpu;
407 u16 target_cpus;
408 u64 mpidr;
409 int sgi, c;
410 int vcpu_id = vcpu->vcpu_id;
411 bool broadcast;
412
413 sgi = (reg & ICC_SGI1R_SGI_ID_MASK) >> ICC_SGI1R_SGI_ID_SHIFT;
414 broadcast = reg & BIT(ICC_SGI1R_IRQ_ROUTING_MODE_BIT);
415 target_cpus = (reg & ICC_SGI1R_TARGET_LIST_MASK) >> ICC_SGI1R_TARGET_LIST_SHIFT;
416 mpidr = SGI_AFFINITY_LEVEL(reg, 3);
417 mpidr |= SGI_AFFINITY_LEVEL(reg, 2);
418 mpidr |= SGI_AFFINITY_LEVEL(reg, 1);
419
420 /*
421 * We iterate over all VCPUs to find the MPIDRs matching the request.
422 * If we have handled one CPU, we clear its bit to detect early
423 * if we are already finished. This avoids iterating through all
424 * VCPUs when most of the times we just signal a single VCPU.
425 */
426 kvm_for_each_vcpu(c, c_vcpu, kvm) {
427 struct vgic_irq *irq;
428
429 /* Exit early if we have dealt with all requested CPUs */
430 if (!broadcast && target_cpus == 0)
431 break;
432
433 /* Don't signal the calling VCPU */
434 if (broadcast && c == vcpu_id)
435 continue;
436
437 if (!broadcast) {
438 int level0;
439
440 level0 = match_mpidr(mpidr, target_cpus, c_vcpu);
441 if (level0 == -1)
442 continue;
443
444 /* remove this matching VCPU from the mask */
445 target_cpus &= ~BIT(level0);
446 }
447
448 irq = vgic_get_irq(vcpu->kvm, c_vcpu, sgi);
449
450 spin_lock(&irq->irq_lock);
451 irq->pending = true;
452
453 vgic_queue_irq_unlock(vcpu->kvm, irq);
454 }
455}
diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
new file mode 100644
index 000000000000..059595ec3da0
--- /dev/null
+++ b/virt/kvm/arm/vgic/vgic-mmio.c
@@ -0,0 +1,526 @@
1/*
2 * VGIC MMIO handling functions
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 */
13
14#include <linux/bitops.h>
15#include <linux/bsearch.h>
16#include <linux/kvm.h>
17#include <linux/kvm_host.h>
18#include <kvm/iodev.h>
19#include <kvm/arm_vgic.h>
20
21#include "vgic.h"
22#include "vgic-mmio.h"
23
24unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu,
25 gpa_t addr, unsigned int len)
26{
27 return 0;
28}
29
30unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu,
31 gpa_t addr, unsigned int len)
32{
33 return -1UL;
34}
35
36void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
37 unsigned int len, unsigned long val)
38{
39 /* Ignore */
40}
41
42/*
43 * Read accesses to both GICD_ICENABLER and GICD_ISENABLER return the value
44 * of the enabled bit, so there is only one function for both here.
45 */
46unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu,
47 gpa_t addr, unsigned int len)
48{
49 u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
50 u32 value = 0;
51 int i;
52
53 /* Loop over all IRQs affected by this read */
54 for (i = 0; i < len * 8; i++) {
55 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
56
57 if (irq->enabled)
58 value |= (1U << i);
59 }
60
61 return value;
62}
63
64void vgic_mmio_write_senable(struct kvm_vcpu *vcpu,
65 gpa_t addr, unsigned int len,
66 unsigned long val)
67{
68 u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
69 int i;
70
71 for_each_set_bit(i, &val, len * 8) {
72 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
73
74 spin_lock(&irq->irq_lock);
75 irq->enabled = true;
76 vgic_queue_irq_unlock(vcpu->kvm, irq);
77 }
78}
79
80void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu,
81 gpa_t addr, unsigned int len,
82 unsigned long val)
83{
84 u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
85 int i;
86
87 for_each_set_bit(i, &val, len * 8) {
88 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
89
90 spin_lock(&irq->irq_lock);
91
92 irq->enabled = false;
93
94 spin_unlock(&irq->irq_lock);
95 }
96}
97
98unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
99 gpa_t addr, unsigned int len)
100{
101 u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
102 u32 value = 0;
103 int i;
104
105 /* Loop over all IRQs affected by this read */
106 for (i = 0; i < len * 8; i++) {
107 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
108
109 if (irq->pending)
110 value |= (1U << i);
111 }
112
113 return value;
114}
115
116void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
117 gpa_t addr, unsigned int len,
118 unsigned long val)
119{
120 u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
121 int i;
122
123 for_each_set_bit(i, &val, len * 8) {
124 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
125
126 spin_lock(&irq->irq_lock);
127 irq->pending = true;
128 if (irq->config == VGIC_CONFIG_LEVEL)
129 irq->soft_pending = true;
130
131 vgic_queue_irq_unlock(vcpu->kvm, irq);
132 }
133}
134
135void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
136 gpa_t addr, unsigned int len,
137 unsigned long val)
138{
139 u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
140 int i;
141
142 for_each_set_bit(i, &val, len * 8) {
143 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
144
145 spin_lock(&irq->irq_lock);
146
147 if (irq->config == VGIC_CONFIG_LEVEL) {
148 irq->soft_pending = false;
149 irq->pending = irq->line_level;
150 } else {
151 irq->pending = false;
152 }
153
154 spin_unlock(&irq->irq_lock);
155 }
156}
157
158unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
159 gpa_t addr, unsigned int len)
160{
161 u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
162 u32 value = 0;
163 int i;
164
165 /* Loop over all IRQs affected by this read */
166 for (i = 0; i < len * 8; i++) {
167 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
168
169 if (irq->active)
170 value |= (1U << i);
171 }
172
173 return value;
174}
175
176static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
177 bool new_active_state)
178{
179 spin_lock(&irq->irq_lock);
180 /*
181 * If this virtual IRQ was written into a list register, we
182 * have to make sure the CPU that runs the VCPU thread has
183 * synced back LR state to the struct vgic_irq. We can only
184 * know this for sure, when either this irq is not assigned to
185 * anyone's AP list anymore, or the VCPU thread is not
186 * running on any CPUs.
187 *
188 * In the opposite case, we know the VCPU thread may be on its
189 * way back from the guest and still has to sync back this
190 * IRQ, so we release and re-acquire the spin_lock to let the
191 * other thread sync back the IRQ.
192 */
193 while (irq->vcpu && /* IRQ may have state in an LR somewhere */
194 irq->vcpu->cpu != -1) { /* VCPU thread is running */
195 BUG_ON(irq->intid < VGIC_NR_PRIVATE_IRQS);
196 cond_resched_lock(&irq->irq_lock);
197 }
198
199 irq->active = new_active_state;
200 if (new_active_state)
201 vgic_queue_irq_unlock(vcpu->kvm, irq);
202 else
203 spin_unlock(&irq->irq_lock);
204}
205
206/*
207 * If we are fiddling with an IRQ's active state, we have to make sure the IRQ
208 * is not queued on some running VCPU's LRs, because then the change to the
209 * active state can be overwritten when the VCPU's state is synced coming back
210 * from the guest.
211 *
212 * For shared interrupts, we have to stop all the VCPUs because interrupts can
213 * be migrated while we don't hold the IRQ locks and we don't want to be
214 * chasing moving targets.
215 *
216 * For private interrupts, we only have to make sure the single and only VCPU
217 * that can potentially queue the IRQ is stopped.
218 */
219static void vgic_change_active_prepare(struct kvm_vcpu *vcpu, u32 intid)
220{
221 if (intid < VGIC_NR_PRIVATE_IRQS)
222 kvm_arm_halt_vcpu(vcpu);
223 else
224 kvm_arm_halt_guest(vcpu->kvm);
225}
226
227/* See vgic_change_active_prepare */
228static void vgic_change_active_finish(struct kvm_vcpu *vcpu, u32 intid)
229{
230 if (intid < VGIC_NR_PRIVATE_IRQS)
231 kvm_arm_resume_vcpu(vcpu);
232 else
233 kvm_arm_resume_guest(vcpu->kvm);
234}
235
236void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
237 gpa_t addr, unsigned int len,
238 unsigned long val)
239{
240 u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
241 int i;
242
243 vgic_change_active_prepare(vcpu, intid);
244 for_each_set_bit(i, &val, len * 8) {
245 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
246 vgic_mmio_change_active(vcpu, irq, false);
247 }
248 vgic_change_active_finish(vcpu, intid);
249}
250
251void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
252 gpa_t addr, unsigned int len,
253 unsigned long val)
254{
255 u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
256 int i;
257
258 vgic_change_active_prepare(vcpu, intid);
259 for_each_set_bit(i, &val, len * 8) {
260 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
261 vgic_mmio_change_active(vcpu, irq, true);
262 }
263 vgic_change_active_finish(vcpu, intid);
264}
265
266unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu,
267 gpa_t addr, unsigned int len)
268{
269 u32 intid = VGIC_ADDR_TO_INTID(addr, 8);
270 int i;
271 u64 val = 0;
272
273 for (i = 0; i < len; i++) {
274 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
275
276 val |= (u64)irq->priority << (i * 8);
277 }
278
279 return val;
280}
281
282/*
283 * We currently don't handle changing the priority of an interrupt that
284 * is already pending on a VCPU. If there is a need for this, we would
285 * need to make this VCPU exit and re-evaluate the priorities, potentially
286 * leading to this interrupt getting presented now to the guest (if it has
287 * been masked by the priority mask before).
288 */
289void vgic_mmio_write_priority(struct kvm_vcpu *vcpu,
290 gpa_t addr, unsigned int len,
291 unsigned long val)
292{
293 u32 intid = VGIC_ADDR_TO_INTID(addr, 8);
294 int i;
295
296 for (i = 0; i < len; i++) {
297 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
298
299 spin_lock(&irq->irq_lock);
300 /* Narrow the priority range to what we actually support */
301 irq->priority = (val >> (i * 8)) & GENMASK(7, 8 - VGIC_PRI_BITS);
302 spin_unlock(&irq->irq_lock);
303 }
304}
305
306unsigned long vgic_mmio_read_config(struct kvm_vcpu *vcpu,
307 gpa_t addr, unsigned int len)
308{
309 u32 intid = VGIC_ADDR_TO_INTID(addr, 2);
310 u32 value = 0;
311 int i;
312
313 for (i = 0; i < len * 4; i++) {
314 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
315
316 if (irq->config == VGIC_CONFIG_EDGE)
317 value |= (2U << (i * 2));
318 }
319
320 return value;
321}
322
323void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
324 gpa_t addr, unsigned int len,
325 unsigned long val)
326{
327 u32 intid = VGIC_ADDR_TO_INTID(addr, 2);
328 int i;
329
330 for (i = 0; i < len * 4; i++) {
331 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
332
333 /*
334 * The configuration cannot be changed for SGIs in general,
335 * for PPIs this is IMPLEMENTATION DEFINED. The arch timer
336 * code relies on PPIs being level triggered, so we also
337 * make them read-only here.
338 */
339 if (intid + i < VGIC_NR_PRIVATE_IRQS)
340 continue;
341
342 spin_lock(&irq->irq_lock);
343 if (test_bit(i * 2 + 1, &val)) {
344 irq->config = VGIC_CONFIG_EDGE;
345 } else {
346 irq->config = VGIC_CONFIG_LEVEL;
347 irq->pending = irq->line_level | irq->soft_pending;
348 }
349 spin_unlock(&irq->irq_lock);
350 }
351}
352
353static int match_region(const void *key, const void *elt)
354{
355 const unsigned int offset = (unsigned long)key;
356 const struct vgic_register_region *region = elt;
357
358 if (offset < region->reg_offset)
359 return -1;
360
361 if (offset >= region->reg_offset + region->len)
362 return 1;
363
364 return 0;
365}
366
367/* Find the proper register handler entry given a certain address offset. */
368static const struct vgic_register_region *
369vgic_find_mmio_region(const struct vgic_register_region *region, int nr_regions,
370 unsigned int offset)
371{
372 return bsearch((void *)(uintptr_t)offset, region, nr_regions,
373 sizeof(region[0]), match_region);
374}
375
376/*
377 * kvm_mmio_read_buf() returns a value in a format where it can be converted
378 * to a byte array and be directly observed as the guest wanted it to appear
379 * in memory if it had done the store itself, which is LE for the GIC, as the
380 * guest knows the GIC is always LE.
381 *
382 * We convert this value to the CPUs native format to deal with it as a data
383 * value.
384 */
385unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len)
386{
387 unsigned long data = kvm_mmio_read_buf(val, len);
388
389 switch (len) {
390 case 1:
391 return data;
392 case 2:
393 return le16_to_cpu(data);
394 case 4:
395 return le32_to_cpu(data);
396 default:
397 return le64_to_cpu(data);
398 }
399}
400
401/*
402 * kvm_mmio_write_buf() expects a value in a format such that if converted to
403 * a byte array it is observed as the guest would see it if it could perform
404 * the load directly. Since the GIC is LE, and the guest knows this, the
405 * guest expects a value in little endian format.
406 *
407 * We convert the data value from the CPUs native format to LE so that the
408 * value is returned in the proper format.
409 */
410void vgic_data_host_to_mmio_bus(void *buf, unsigned int len,
411 unsigned long data)
412{
413 switch (len) {
414 case 1:
415 break;
416 case 2:
417 data = cpu_to_le16(data);
418 break;
419 case 4:
420 data = cpu_to_le32(data);
421 break;
422 default:
423 data = cpu_to_le64(data);
424 }
425
426 kvm_mmio_write_buf(buf, len, data);
427}
428
429static
430struct vgic_io_device *kvm_to_vgic_iodev(const struct kvm_io_device *dev)
431{
432 return container_of(dev, struct vgic_io_device, dev);
433}
434
435static bool check_region(const struct vgic_register_region *region,
436 gpa_t addr, int len)
437{
438 if ((region->access_flags & VGIC_ACCESS_8bit) && len == 1)
439 return true;
440 if ((region->access_flags & VGIC_ACCESS_32bit) &&
441 len == sizeof(u32) && !(addr & 3))
442 return true;
443 if ((region->access_flags & VGIC_ACCESS_64bit) &&
444 len == sizeof(u64) && !(addr & 7))
445 return true;
446
447 return false;
448}
449
450static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
451 gpa_t addr, int len, void *val)
452{
453 struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev);
454 const struct vgic_register_region *region;
455 struct kvm_vcpu *r_vcpu;
456 unsigned long data;
457
458 region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions,
459 addr - iodev->base_addr);
460 if (!region || !check_region(region, addr, len)) {
461 memset(val, 0, len);
462 return 0;
463 }
464
465 r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu;
466 data = region->read(r_vcpu, addr, len);
467 vgic_data_host_to_mmio_bus(val, len, data);
468 return 0;
469}
470
471static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
472 gpa_t addr, int len, const void *val)
473{
474 struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev);
475 const struct vgic_register_region *region;
476 struct kvm_vcpu *r_vcpu;
477 unsigned long data = vgic_data_mmio_bus_to_host(val, len);
478
479 region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions,
480 addr - iodev->base_addr);
481 if (!region)
482 return 0;
483
484 if (!check_region(region, addr, len))
485 return 0;
486
487 r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu;
488 region->write(r_vcpu, addr, len, data);
489 return 0;
490}
491
492struct kvm_io_device_ops kvm_io_gic_ops = {
493 .read = dispatch_mmio_read,
494 .write = dispatch_mmio_write,
495};
496
497int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
498 enum vgic_type type)
499{
500 struct vgic_io_device *io_device = &kvm->arch.vgic.dist_iodev;
501 int ret = 0;
502 unsigned int len;
503
504 switch (type) {
505 case VGIC_V2:
506 len = vgic_v2_init_dist_iodev(io_device);
507 break;
508#ifdef CONFIG_KVM_ARM_VGIC_V3
509 case VGIC_V3:
510 len = vgic_v3_init_dist_iodev(io_device);
511 break;
512#endif
513 default:
514 BUG_ON(1);
515 }
516
517 io_device->base_addr = dist_base_address;
518 io_device->redist_vcpu = NULL;
519
520 mutex_lock(&kvm->slots_lock);
521 ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, dist_base_address,
522 len, &io_device->dev);
523 mutex_unlock(&kvm->slots_lock);
524
525 return ret;
526}
diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h
new file mode 100644
index 000000000000..850901482aec
--- /dev/null
+++ b/virt/kvm/arm/vgic/vgic-mmio.h
@@ -0,0 +1,150 @@
1/*
2 * Copyright (C) 2015, 2016 ARM Ltd.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16#ifndef __KVM_ARM_VGIC_MMIO_H__
17#define __KVM_ARM_VGIC_MMIO_H__
18
19struct vgic_register_region {
20 unsigned int reg_offset;
21 unsigned int len;
22 unsigned int bits_per_irq;
23 unsigned int access_flags;
24 unsigned long (*read)(struct kvm_vcpu *vcpu, gpa_t addr,
25 unsigned int len);
26 void (*write)(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len,
27 unsigned long val);
28};
29
30extern struct kvm_io_device_ops kvm_io_gic_ops;
31
32#define VGIC_ACCESS_8bit 1
33#define VGIC_ACCESS_32bit 2
34#define VGIC_ACCESS_64bit 4
35
36/*
37 * Generate a mask that covers the number of bytes required to address
38 * up to 1024 interrupts, each represented by <bits> bits. This assumes
39 * that <bits> is a power of two.
40 */
41#define VGIC_ADDR_IRQ_MASK(bits) (((bits) * 1024 / 8) - 1)
42
43/*
44 * (addr & mask) gives us the byte offset for the INT ID, so we want to
45 * divide this with 'bytes per irq' to get the INT ID, which is given
46 * by '(bits) / 8'. But we do this with fixed-point-arithmetic and
47 * take advantage of the fact that division by a fraction equals
48 * multiplication with the inverted fraction, and scale up both the
49 * numerator and denominator with 8 to support at most 64 bits per IRQ:
50 */
51#define VGIC_ADDR_TO_INTID(addr, bits) (((addr) & VGIC_ADDR_IRQ_MASK(bits)) * \
52 64 / (bits) / 8)
53
54/*
55 * Some VGIC registers store per-IRQ information, with a different number
56 * of bits per IRQ. For those registers this macro is used.
57 * The _WITH_LENGTH version instantiates registers with a fixed length
58 * and is mutually exclusive with the _PER_IRQ version.
59 */
60#define REGISTER_DESC_WITH_BITS_PER_IRQ(off, rd, wr, bpi, acc) \
61 { \
62 .reg_offset = off, \
63 .bits_per_irq = bpi, \
64 .len = bpi * 1024 / 8, \
65 .access_flags = acc, \
66 .read = rd, \
67 .write = wr, \
68 }
69
70#define REGISTER_DESC_WITH_LENGTH(off, rd, wr, length, acc) \
71 { \
72 .reg_offset = off, \
73 .bits_per_irq = 0, \
74 .len = length, \
75 .access_flags = acc, \
76 .read = rd, \
77 .write = wr, \
78 }
79
80int kvm_vgic_register_mmio_region(struct kvm *kvm, struct kvm_vcpu *vcpu,
81 struct vgic_register_region *reg_desc,
82 struct vgic_io_device *region,
83 int nr_irqs, bool offset_private);
84
85unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len);
86
87void vgic_data_host_to_mmio_bus(void *buf, unsigned int len,
88 unsigned long data);
89
90unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu,
91 gpa_t addr, unsigned int len);
92
93unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu,
94 gpa_t addr, unsigned int len);
95
96void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
97 unsigned int len, unsigned long val);
98
99unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu,
100 gpa_t addr, unsigned int len);
101
102void vgic_mmio_write_senable(struct kvm_vcpu *vcpu,
103 gpa_t addr, unsigned int len,
104 unsigned long val);
105
106void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu,
107 gpa_t addr, unsigned int len,
108 unsigned long val);
109
110unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
111 gpa_t addr, unsigned int len);
112
113void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
114 gpa_t addr, unsigned int len,
115 unsigned long val);
116
117void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
118 gpa_t addr, unsigned int len,
119 unsigned long val);
120
121unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
122 gpa_t addr, unsigned int len);
123
124void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
125 gpa_t addr, unsigned int len,
126 unsigned long val);
127
128void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
129 gpa_t addr, unsigned int len,
130 unsigned long val);
131
132unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu,
133 gpa_t addr, unsigned int len);
134
135void vgic_mmio_write_priority(struct kvm_vcpu *vcpu,
136 gpa_t addr, unsigned int len,
137 unsigned long val);
138
139unsigned long vgic_mmio_read_config(struct kvm_vcpu *vcpu,
140 gpa_t addr, unsigned int len);
141
142void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
143 gpa_t addr, unsigned int len,
144 unsigned long val);
145
146unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev);
147
148unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev);
149
150#endif
diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
new file mode 100644
index 000000000000..8ad42c217770
--- /dev/null
+++ b/virt/kvm/arm/vgic/vgic-v2.c
@@ -0,0 +1,352 @@
1/*
2 * Copyright (C) 2015, 2016 ARM Ltd.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/irqchip/arm-gic.h>
18#include <linux/kvm.h>
19#include <linux/kvm_host.h>
20#include <kvm/arm_vgic.h>
21#include <asm/kvm_mmu.h>
22
23#include "vgic.h"
24
25/*
26 * Call this function to convert a u64 value to an unsigned long * bitmask
27 * in a way that works on both 32-bit and 64-bit LE and BE platforms.
28 *
29 * Warning: Calling this function may modify *val.
30 */
31static unsigned long *u64_to_bitmask(u64 *val)
32{
33#if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 32
34 *val = (*val >> 32) | (*val << 32);
35#endif
36 return (unsigned long *)val;
37}
38
39void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu)
40{
41 struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
42
43 if (cpuif->vgic_misr & GICH_MISR_EOI) {
44 u64 eisr = cpuif->vgic_eisr;
45 unsigned long *eisr_bmap = u64_to_bitmask(&eisr);
46 int lr;
47
48 for_each_set_bit(lr, eisr_bmap, kvm_vgic_global_state.nr_lr) {
49 u32 intid = cpuif->vgic_lr[lr] & GICH_LR_VIRTUALID;
50
51 WARN_ON(cpuif->vgic_lr[lr] & GICH_LR_STATE);
52
53 kvm_notify_acked_irq(vcpu->kvm, 0,
54 intid - VGIC_NR_PRIVATE_IRQS);
55 }
56 }
57
58 /* check and disable underflow maintenance IRQ */
59 cpuif->vgic_hcr &= ~GICH_HCR_UIE;
60
61 /*
62 * In the next iterations of the vcpu loop, if we sync the
63 * vgic state after flushing it, but before entering the guest
64 * (this happens for pending signals and vmid rollovers), then
65 * make sure we don't pick up any old maintenance interrupts
66 * here.
67 */
68 cpuif->vgic_eisr = 0;
69}
70
71void vgic_v2_set_underflow(struct kvm_vcpu *vcpu)
72{
73 struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
74
75 cpuif->vgic_hcr |= GICH_HCR_UIE;
76}
77
78/*
79 * transfer the content of the LRs back into the corresponding ap_list:
80 * - active bit is transferred as is
81 * - pending bit is
82 * - transferred as is in case of edge sensitive IRQs
83 * - set to the line-level (resample time) for level sensitive IRQs
84 */
85void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
86{
87 struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
88 int lr;
89
90 for (lr = 0; lr < vcpu->arch.vgic_cpu.used_lrs; lr++) {
91 u32 val = cpuif->vgic_lr[lr];
92 u32 intid = val & GICH_LR_VIRTUALID;
93 struct vgic_irq *irq;
94
95 irq = vgic_get_irq(vcpu->kvm, vcpu, intid);
96
97 spin_lock(&irq->irq_lock);
98
99 /* Always preserve the active bit */
100 irq->active = !!(val & GICH_LR_ACTIVE_BIT);
101
102 /* Edge is the only case where we preserve the pending bit */
103 if (irq->config == VGIC_CONFIG_EDGE &&
104 (val & GICH_LR_PENDING_BIT)) {
105 irq->pending = true;
106
107 if (vgic_irq_is_sgi(intid)) {
108 u32 cpuid = val & GICH_LR_PHYSID_CPUID;
109
110 cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT;
111 irq->source |= (1 << cpuid);
112 }
113 }
114
115 /* Clear soft pending state when level IRQs have been acked */
116 if (irq->config == VGIC_CONFIG_LEVEL &&
117 !(val & GICH_LR_PENDING_BIT)) {
118 irq->soft_pending = false;
119 irq->pending = irq->line_level;
120 }
121
122 spin_unlock(&irq->irq_lock);
123 }
124}
125
126/*
127 * Populates the particular LR with the state of a given IRQ:
128 * - for an edge sensitive IRQ the pending state is cleared in struct vgic_irq
129 * - for a level sensitive IRQ the pending state value is unchanged;
130 * it is dictated directly by the input level
131 *
132 * If @irq describes an SGI with multiple sources, we choose the
133 * lowest-numbered source VCPU and clear that bit in the source bitmap.
134 *
135 * The irq_lock must be held by the caller.
136 */
137void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
138{
139 u32 val = irq->intid;
140
141 if (irq->pending) {
142 val |= GICH_LR_PENDING_BIT;
143
144 if (irq->config == VGIC_CONFIG_EDGE)
145 irq->pending = false;
146
147 if (vgic_irq_is_sgi(irq->intid)) {
148 u32 src = ffs(irq->source);
149
150 BUG_ON(!src);
151 val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT;
152 irq->source &= ~(1 << (src - 1));
153 if (irq->source)
154 irq->pending = true;
155 }
156 }
157
158 if (irq->active)
159 val |= GICH_LR_ACTIVE_BIT;
160
161 if (irq->hw) {
162 val |= GICH_LR_HW;
163 val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT;
164 } else {
165 if (irq->config == VGIC_CONFIG_LEVEL)
166 val |= GICH_LR_EOI;
167 }
168
169 /* The GICv2 LR only holds five bits of priority. */
170 val |= (irq->priority >> 3) << GICH_LR_PRIORITY_SHIFT;
171
172 vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = val;
173}
174
175void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr)
176{
177 vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = 0;
178}
179
180void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
181{
182 u32 vmcr;
183
184 vmcr = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK;
185 vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) &
186 GICH_VMCR_ALIAS_BINPOINT_MASK;
187 vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) &
188 GICH_VMCR_BINPOINT_MASK;
189 vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) &
190 GICH_VMCR_PRIMASK_MASK;
191
192 vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr;
193}
194
195void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
196{
197 u32 vmcr = vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr;
198
199 vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >>
200 GICH_VMCR_CTRL_SHIFT;
201 vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >>
202 GICH_VMCR_ALIAS_BINPOINT_SHIFT;
203 vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >>
204 GICH_VMCR_BINPOINT_SHIFT;
205 vmcrp->pmr = (vmcr & GICH_VMCR_PRIMASK_MASK) >>
206 GICH_VMCR_PRIMASK_SHIFT;
207}
208
209void vgic_v2_enable(struct kvm_vcpu *vcpu)
210{
211 /*
212 * By forcing VMCR to zero, the GIC will restore the binary
213 * points to their reset values. Anything else resets to zero
214 * anyway.
215 */
216 vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0;
217 vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr = ~0;
218
219 /* Get the show on the road... */
220 vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN;
221}
222
223/* check for overlapping regions and for regions crossing the end of memory */
224static bool vgic_v2_check_base(gpa_t dist_base, gpa_t cpu_base)
225{
226 if (dist_base + KVM_VGIC_V2_DIST_SIZE < dist_base)
227 return false;
228 if (cpu_base + KVM_VGIC_V2_CPU_SIZE < cpu_base)
229 return false;
230
231 if (dist_base + KVM_VGIC_V2_DIST_SIZE <= cpu_base)
232 return true;
233 if (cpu_base + KVM_VGIC_V2_CPU_SIZE <= dist_base)
234 return true;
235
236 return false;
237}
238
239int vgic_v2_map_resources(struct kvm *kvm)
240{
241 struct vgic_dist *dist = &kvm->arch.vgic;
242 int ret = 0;
243
244 if (vgic_ready(kvm))
245 goto out;
246
247 if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) ||
248 IS_VGIC_ADDR_UNDEF(dist->vgic_cpu_base)) {
249 kvm_err("Need to set vgic cpu and dist addresses first\n");
250 ret = -ENXIO;
251 goto out;
252 }
253
254 if (!vgic_v2_check_base(dist->vgic_dist_base, dist->vgic_cpu_base)) {
255 kvm_err("VGIC CPU and dist frames overlap\n");
256 ret = -EINVAL;
257 goto out;
258 }
259
260 /*
261 * Initialize the vgic if this hasn't already been done on demand by
262 * accessing the vgic state from userspace.
263 */
264 ret = vgic_init(kvm);
265 if (ret) {
266 kvm_err("Unable to initialize VGIC dynamic data structures\n");
267 goto out;
268 }
269
270 ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V2);
271 if (ret) {
272 kvm_err("Unable to register VGIC MMIO regions\n");
273 goto out;
274 }
275
276 ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base,
277 kvm_vgic_global_state.vcpu_base,
278 KVM_VGIC_V2_CPU_SIZE, true);
279 if (ret) {
280 kvm_err("Unable to remap VGIC CPU to VCPU\n");
281 goto out;
282 }
283
284 dist->ready = true;
285
286out:
287 if (ret)
288 kvm_vgic_destroy(kvm);
289 return ret;
290}
291
292/**
293 * vgic_v2_probe - probe for a GICv2 compatible interrupt controller in DT
294 * @node: pointer to the DT node
295 *
296 * Returns 0 if a GICv2 has been found, returns an error code otherwise
297 */
298int vgic_v2_probe(const struct gic_kvm_info *info)
299{
300 int ret;
301 u32 vtr;
302
303 if (!info->vctrl.start) {
304 kvm_err("GICH not present in the firmware table\n");
305 return -ENXIO;
306 }
307
308 if (!PAGE_ALIGNED(info->vcpu.start)) {
309 kvm_err("GICV physical address 0x%llx not page aligned\n",
310 (unsigned long long)info->vcpu.start);
311 return -ENXIO;
312 }
313
314 if (!PAGE_ALIGNED(resource_size(&info->vcpu))) {
315 kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n",
316 (unsigned long long)resource_size(&info->vcpu),
317 PAGE_SIZE);
318 return -ENXIO;
319 }
320
321 kvm_vgic_global_state.vctrl_base = ioremap(info->vctrl.start,
322 resource_size(&info->vctrl));
323 if (!kvm_vgic_global_state.vctrl_base) {
324 kvm_err("Cannot ioremap GICH\n");
325 return -ENOMEM;
326 }
327
328 vtr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VTR);
329 kvm_vgic_global_state.nr_lr = (vtr & 0x3f) + 1;
330
331 ret = create_hyp_io_mappings(kvm_vgic_global_state.vctrl_base,
332 kvm_vgic_global_state.vctrl_base +
333 resource_size(&info->vctrl),
334 info->vctrl.start);
335
336 if (ret) {
337 kvm_err("Cannot map VCTRL into hyp\n");
338 iounmap(kvm_vgic_global_state.vctrl_base);
339 return ret;
340 }
341
342 kvm_vgic_global_state.can_emulate_gicv2 = true;
343 kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2);
344
345 kvm_vgic_global_state.vcpu_base = info->vcpu.start;
346 kvm_vgic_global_state.type = VGIC_V2;
347 kvm_vgic_global_state.max_gic_vcpus = VGIC_V2_MAX_CPUS;
348
349 kvm_info("vgic-v2@%llx\n", info->vctrl.start);
350
351 return 0;
352}
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
new file mode 100644
index 000000000000..336a46115937
--- /dev/null
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -0,0 +1,330 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License version 2 as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program. If not, see <http://www.gnu.org/licenses/>.
13 */
14
15#include <linux/irqchip/arm-gic-v3.h>
16#include <linux/kvm.h>
17#include <linux/kvm_host.h>
18#include <kvm/arm_vgic.h>
19#include <asm/kvm_mmu.h>
20#include <asm/kvm_asm.h>
21
22#include "vgic.h"
23
24void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu)
25{
26 struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
27 u32 model = vcpu->kvm->arch.vgic.vgic_model;
28
29 if (cpuif->vgic_misr & ICH_MISR_EOI) {
30 unsigned long eisr_bmap = cpuif->vgic_eisr;
31 int lr;
32
33 for_each_set_bit(lr, &eisr_bmap, kvm_vgic_global_state.nr_lr) {
34 u32 intid;
35 u64 val = cpuif->vgic_lr[lr];
36
37 if (model == KVM_DEV_TYPE_ARM_VGIC_V3)
38 intid = val & ICH_LR_VIRTUAL_ID_MASK;
39 else
40 intid = val & GICH_LR_VIRTUALID;
41
42 WARN_ON(cpuif->vgic_lr[lr] & ICH_LR_STATE);
43
44 kvm_notify_acked_irq(vcpu->kvm, 0,
45 intid - VGIC_NR_PRIVATE_IRQS);
46 }
47
48 /*
49 * In the next iterations of the vcpu loop, if we sync
50 * the vgic state after flushing it, but before
51 * entering the guest (this happens for pending
52 * signals and vmid rollovers), then make sure we
53 * don't pick up any old maintenance interrupts here.
54 */
55 cpuif->vgic_eisr = 0;
56 }
57
58 cpuif->vgic_hcr &= ~ICH_HCR_UIE;
59}
60
61void vgic_v3_set_underflow(struct kvm_vcpu *vcpu)
62{
63 struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
64
65 cpuif->vgic_hcr |= ICH_HCR_UIE;
66}
67
68void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
69{
70 struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
71 u32 model = vcpu->kvm->arch.vgic.vgic_model;
72 int lr;
73
74 for (lr = 0; lr < vcpu->arch.vgic_cpu.used_lrs; lr++) {
75 u64 val = cpuif->vgic_lr[lr];
76 u32 intid;
77 struct vgic_irq *irq;
78
79 if (model == KVM_DEV_TYPE_ARM_VGIC_V3)
80 intid = val & ICH_LR_VIRTUAL_ID_MASK;
81 else
82 intid = val & GICH_LR_VIRTUALID;
83 irq = vgic_get_irq(vcpu->kvm, vcpu, intid);
84
85 spin_lock(&irq->irq_lock);
86
87 /* Always preserve the active bit */
88 irq->active = !!(val & ICH_LR_ACTIVE_BIT);
89
90 /* Edge is the only case where we preserve the pending bit */
91 if (irq->config == VGIC_CONFIG_EDGE &&
92 (val & ICH_LR_PENDING_BIT)) {
93 irq->pending = true;
94
95 if (vgic_irq_is_sgi(intid) &&
96 model == KVM_DEV_TYPE_ARM_VGIC_V2) {
97 u32 cpuid = val & GICH_LR_PHYSID_CPUID;
98
99 cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT;
100 irq->source |= (1 << cpuid);
101 }
102 }
103
104 /* Clear soft pending state when level irqs have been acked */
105 if (irq->config == VGIC_CONFIG_LEVEL &&
106 !(val & ICH_LR_PENDING_BIT)) {
107 irq->soft_pending = false;
108 irq->pending = irq->line_level;
109 }
110
111 spin_unlock(&irq->irq_lock);
112 }
113}
114
115/* Requires the irq to be locked already */
116void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
117{
118 u32 model = vcpu->kvm->arch.vgic.vgic_model;
119 u64 val = irq->intid;
120
121 if (irq->pending) {
122 val |= ICH_LR_PENDING_BIT;
123
124 if (irq->config == VGIC_CONFIG_EDGE)
125 irq->pending = false;
126
127 if (vgic_irq_is_sgi(irq->intid) &&
128 model == KVM_DEV_TYPE_ARM_VGIC_V2) {
129 u32 src = ffs(irq->source);
130
131 BUG_ON(!src);
132 val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT;
133 irq->source &= ~(1 << (src - 1));
134 if (irq->source)
135 irq->pending = true;
136 }
137 }
138
139 if (irq->active)
140 val |= ICH_LR_ACTIVE_BIT;
141
142 if (irq->hw) {
143 val |= ICH_LR_HW;
144 val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT;
145 } else {
146 if (irq->config == VGIC_CONFIG_LEVEL)
147 val |= ICH_LR_EOI;
148 }
149
150 /*
151 * We currently only support Group1 interrupts, which is a
152 * known defect. This needs to be addressed at some point.
153 */
154 if (model == KVM_DEV_TYPE_ARM_VGIC_V3)
155 val |= ICH_LR_GROUP;
156
157 val |= (u64)irq->priority << ICH_LR_PRIORITY_SHIFT;
158
159 vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = val;
160}
161
162void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr)
163{
164 vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = 0;
165}
166
167void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
168{
169 u32 vmcr;
170
171 vmcr = (vmcrp->ctlr << ICH_VMCR_CTLR_SHIFT) & ICH_VMCR_CTLR_MASK;
172 vmcr |= (vmcrp->abpr << ICH_VMCR_BPR1_SHIFT) & ICH_VMCR_BPR1_MASK;
173 vmcr |= (vmcrp->bpr << ICH_VMCR_BPR0_SHIFT) & ICH_VMCR_BPR0_MASK;
174 vmcr |= (vmcrp->pmr << ICH_VMCR_PMR_SHIFT) & ICH_VMCR_PMR_MASK;
175
176 vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = vmcr;
177}
178
179void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
180{
181 u32 vmcr = vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr;
182
183 vmcrp->ctlr = (vmcr & ICH_VMCR_CTLR_MASK) >> ICH_VMCR_CTLR_SHIFT;
184 vmcrp->abpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT;
185 vmcrp->bpr = (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT;
186 vmcrp->pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT;
187}
188
189void vgic_v3_enable(struct kvm_vcpu *vcpu)
190{
191 struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3;
192
193 /*
194 * By forcing VMCR to zero, the GIC will restore the binary
195 * points to their reset values. Anything else resets to zero
196 * anyway.
197 */
198 vgic_v3->vgic_vmcr = 0;
199 vgic_v3->vgic_elrsr = ~0;
200
201 /*
202 * If we are emulating a GICv3, we do it in an non-GICv2-compatible
203 * way, so we force SRE to 1 to demonstrate this to the guest.
204 * This goes with the spec allowing the value to be RAO/WI.
205 */
206 if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
207 vgic_v3->vgic_sre = ICC_SRE_EL1_SRE;
208 else
209 vgic_v3->vgic_sre = 0;
210
211 /* Get the show on the road... */
212 vgic_v3->vgic_hcr = ICH_HCR_EN;
213}
214
215/* check for overlapping regions and for regions crossing the end of memory */
216static bool vgic_v3_check_base(struct kvm *kvm)
217{
218 struct vgic_dist *d = &kvm->arch.vgic;
219 gpa_t redist_size = KVM_VGIC_V3_REDIST_SIZE;
220
221 redist_size *= atomic_read(&kvm->online_vcpus);
222
223 if (d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE < d->vgic_dist_base)
224 return false;
225 if (d->vgic_redist_base + redist_size < d->vgic_redist_base)
226 return false;
227
228 if (d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE <= d->vgic_redist_base)
229 return true;
230 if (d->vgic_redist_base + redist_size <= d->vgic_dist_base)
231 return true;
232
233 return false;
234}
235
236int vgic_v3_map_resources(struct kvm *kvm)
237{
238 int ret = 0;
239 struct vgic_dist *dist = &kvm->arch.vgic;
240
241 if (vgic_ready(kvm))
242 goto out;
243
244 if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) ||
245 IS_VGIC_ADDR_UNDEF(dist->vgic_redist_base)) {
246 kvm_err("Need to set vgic distributor addresses first\n");
247 ret = -ENXIO;
248 goto out;
249 }
250
251 if (!vgic_v3_check_base(kvm)) {
252 kvm_err("VGIC redist and dist frames overlap\n");
253 ret = -EINVAL;
254 goto out;
255 }
256
257 /*
258 * For a VGICv3 we require the userland to explicitly initialize
259 * the VGIC before we need to use it.
260 */
261 if (!vgic_initialized(kvm)) {
262 ret = -EBUSY;
263 goto out;
264 }
265
266 ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V3);
267 if (ret) {
268 kvm_err("Unable to register VGICv3 dist MMIO regions\n");
269 goto out;
270 }
271
272 ret = vgic_register_redist_iodevs(kvm, dist->vgic_redist_base);
273 if (ret) {
274 kvm_err("Unable to register VGICv3 redist MMIO regions\n");
275 goto out;
276 }
277
278 dist->ready = true;
279
280out:
281 if (ret)
282 kvm_vgic_destroy(kvm);
283 return ret;
284}
285
286/**
287 * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT
288 * @node: pointer to the DT node
289 *
290 * Returns 0 if a GICv3 has been found, returns an error code otherwise
291 */
292int vgic_v3_probe(const struct gic_kvm_info *info)
293{
294 u32 ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2);
295
296 /*
297 * The ListRegs field is 5 bits, but there is a architectural
298 * maximum of 16 list registers. Just ignore bit 4...
299 */
300 kvm_vgic_global_state.nr_lr = (ich_vtr_el2 & 0xf) + 1;
301 kvm_vgic_global_state.can_emulate_gicv2 = false;
302
303 if (!info->vcpu.start) {
304 kvm_info("GICv3: no GICV resource entry\n");
305 kvm_vgic_global_state.vcpu_base = 0;
306 } else if (!PAGE_ALIGNED(info->vcpu.start)) {
307 pr_warn("GICV physical address 0x%llx not page aligned\n",
308 (unsigned long long)info->vcpu.start);
309 kvm_vgic_global_state.vcpu_base = 0;
310 } else if (!PAGE_ALIGNED(resource_size(&info->vcpu))) {
311 pr_warn("GICV size 0x%llx not a multiple of page size 0x%lx\n",
312 (unsigned long long)resource_size(&info->vcpu),
313 PAGE_SIZE);
314 kvm_vgic_global_state.vcpu_base = 0;
315 } else {
316 kvm_vgic_global_state.vcpu_base = info->vcpu.start;
317 kvm_vgic_global_state.can_emulate_gicv2 = true;
318 kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2);
319 kvm_info("vgic-v2@%llx\n", info->vcpu.start);
320 }
321 if (kvm_vgic_global_state.vcpu_base == 0)
322 kvm_info("disabling GICv2 emulation\n");
323 kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V3);
324
325 kvm_vgic_global_state.vctrl_base = NULL;
326 kvm_vgic_global_state.type = VGIC_V3;
327 kvm_vgic_global_state.max_gic_vcpus = VGIC_V3_MAX_CPUS;
328
329 return 0;
330}
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
new file mode 100644
index 000000000000..69b61abefa19
--- /dev/null
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -0,0 +1,619 @@
1/*
2 * Copyright (C) 2015, 2016 ARM Ltd.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/kvm.h>
18#include <linux/kvm_host.h>
19#include <linux/list_sort.h>
20
21#include "vgic.h"
22
23#define CREATE_TRACE_POINTS
24#include "../trace.h"
25
26#ifdef CONFIG_DEBUG_SPINLOCK
27#define DEBUG_SPINLOCK_BUG_ON(p) BUG_ON(p)
28#else
29#define DEBUG_SPINLOCK_BUG_ON(p)
30#endif
31
32struct vgic_global __section(.hyp.text) kvm_vgic_global_state;
33
34/*
35 * Locking order is always:
36 * vgic_cpu->ap_list_lock
37 * vgic_irq->irq_lock
38 *
39 * (that is, always take the ap_list_lock before the struct vgic_irq lock).
40 *
41 * When taking more than one ap_list_lock at the same time, always take the
42 * lowest numbered VCPU's ap_list_lock first, so:
43 * vcpuX->vcpu_id < vcpuY->vcpu_id:
44 * spin_lock(vcpuX->arch.vgic_cpu.ap_list_lock);
45 * spin_lock(vcpuY->arch.vgic_cpu.ap_list_lock);
46 */
47
48struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
49 u32 intid)
50{
51 /* SGIs and PPIs */
52 if (intid <= VGIC_MAX_PRIVATE)
53 return &vcpu->arch.vgic_cpu.private_irqs[intid];
54
55 /* SPIs */
56 if (intid <= VGIC_MAX_SPI)
57 return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS];
58
59 /* LPIs are not yet covered */
60 if (intid >= VGIC_MIN_LPI)
61 return NULL;
62
63 WARN(1, "Looking up struct vgic_irq for reserved INTID");
64 return NULL;
65}
66
67/**
68 * kvm_vgic_target_oracle - compute the target vcpu for an irq
69 *
70 * @irq: The irq to route. Must be already locked.
71 *
72 * Based on the current state of the interrupt (enabled, pending,
73 * active, vcpu and target_vcpu), compute the next vcpu this should be
74 * given to. Return NULL if this shouldn't be injected at all.
75 *
76 * Requires the IRQ lock to be held.
77 */
78static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq)
79{
80 DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock));
81
82 /* If the interrupt is active, it must stay on the current vcpu */
83 if (irq->active)
84 return irq->vcpu ? : irq->target_vcpu;
85
86 /*
87 * If the IRQ is not active but enabled and pending, we should direct
88 * it to its configured target VCPU.
89 * If the distributor is disabled, pending interrupts shouldn't be
90 * forwarded.
91 */
92 if (irq->enabled && irq->pending) {
93 if (unlikely(irq->target_vcpu &&
94 !irq->target_vcpu->kvm->arch.vgic.enabled))
95 return NULL;
96
97 return irq->target_vcpu;
98 }
99
100 /* If neither active nor pending and enabled, then this IRQ should not
101 * be queued to any VCPU.
102 */
103 return NULL;
104}
105
106/*
107 * The order of items in the ap_lists defines how we'll pack things in LRs as
108 * well, the first items in the list being the first things populated in the
109 * LRs.
110 *
111 * A hard rule is that active interrupts can never be pushed out of the LRs
112 * (and therefore take priority) since we cannot reliably trap on deactivation
113 * of IRQs and therefore they have to be present in the LRs.
114 *
115 * Otherwise things should be sorted by the priority field and the GIC
116 * hardware support will take care of preemption of priority groups etc.
117 *
118 * Return negative if "a" sorts before "b", 0 to preserve order, and positive
119 * to sort "b" before "a".
120 */
121static int vgic_irq_cmp(void *priv, struct list_head *a, struct list_head *b)
122{
123 struct vgic_irq *irqa = container_of(a, struct vgic_irq, ap_list);
124 struct vgic_irq *irqb = container_of(b, struct vgic_irq, ap_list);
125 bool penda, pendb;
126 int ret;
127
128 spin_lock(&irqa->irq_lock);
129 spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING);
130
131 if (irqa->active || irqb->active) {
132 ret = (int)irqb->active - (int)irqa->active;
133 goto out;
134 }
135
136 penda = irqa->enabled && irqa->pending;
137 pendb = irqb->enabled && irqb->pending;
138
139 if (!penda || !pendb) {
140 ret = (int)pendb - (int)penda;
141 goto out;
142 }
143
144 /* Both pending and enabled, sort by priority */
145 ret = irqa->priority - irqb->priority;
146out:
147 spin_unlock(&irqb->irq_lock);
148 spin_unlock(&irqa->irq_lock);
149 return ret;
150}
151
152/* Must be called with the ap_list_lock held */
153static void vgic_sort_ap_list(struct kvm_vcpu *vcpu)
154{
155 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
156
157 DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock));
158
159 list_sort(NULL, &vgic_cpu->ap_list_head, vgic_irq_cmp);
160}
161
162/*
163 * Only valid injection if changing level for level-triggered IRQs or for a
164 * rising edge.
165 */
166static bool vgic_validate_injection(struct vgic_irq *irq, bool level)
167{
168 switch (irq->config) {
169 case VGIC_CONFIG_LEVEL:
170 return irq->line_level != level;
171 case VGIC_CONFIG_EDGE:
172 return level;
173 }
174
175 return false;
176}
177
178/*
179 * Check whether an IRQ needs to (and can) be queued to a VCPU's ap list.
180 * Do the queuing if necessary, taking the right locks in the right order.
181 * Returns true when the IRQ was queued, false otherwise.
182 *
183 * Needs to be entered with the IRQ lock already held, but will return
184 * with all locks dropped.
185 */
186bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq)
187{
188 struct kvm_vcpu *vcpu;
189
190 DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock));
191
192retry:
193 vcpu = vgic_target_oracle(irq);
194 if (irq->vcpu || !vcpu) {
195 /*
196 * If this IRQ is already on a VCPU's ap_list, then it
197 * cannot be moved or modified and there is no more work for
198 * us to do.
199 *
200 * Otherwise, if the irq is not pending and enabled, it does
201 * not need to be inserted into an ap_list and there is also
202 * no more work for us to do.
203 */
204 spin_unlock(&irq->irq_lock);
205 return false;
206 }
207
208 /*
209 * We must unlock the irq lock to take the ap_list_lock where
210 * we are going to insert this new pending interrupt.
211 */
212 spin_unlock(&irq->irq_lock);
213
214 /* someone can do stuff here, which we re-check below */
215
216 spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock);
217 spin_lock(&irq->irq_lock);
218
219 /*
220 * Did something change behind our backs?
221 *
222 * There are two cases:
223 * 1) The irq lost its pending state or was disabled behind our
224 * backs and/or it was queued to another VCPU's ap_list.
225 * 2) Someone changed the affinity on this irq behind our
226 * backs and we are now holding the wrong ap_list_lock.
227 *
228 * In both cases, drop the locks and retry.
229 */
230
231 if (unlikely(irq->vcpu || vcpu != vgic_target_oracle(irq))) {
232 spin_unlock(&irq->irq_lock);
233 spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
234
235 spin_lock(&irq->irq_lock);
236 goto retry;
237 }
238
239 list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head);
240 irq->vcpu = vcpu;
241
242 spin_unlock(&irq->irq_lock);
243 spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
244
245 kvm_vcpu_kick(vcpu);
246
247 return true;
248}
249
250static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
251 unsigned int intid, bool level,
252 bool mapped_irq)
253{
254 struct kvm_vcpu *vcpu;
255 struct vgic_irq *irq;
256 int ret;
257
258 trace_vgic_update_irq_pending(cpuid, intid, level);
259
260 ret = vgic_lazy_init(kvm);
261 if (ret)
262 return ret;
263
264 vcpu = kvm_get_vcpu(kvm, cpuid);
265 if (!vcpu && intid < VGIC_NR_PRIVATE_IRQS)
266 return -EINVAL;
267
268 irq = vgic_get_irq(kvm, vcpu, intid);
269 if (!irq)
270 return -EINVAL;
271
272 if (irq->hw != mapped_irq)
273 return -EINVAL;
274
275 spin_lock(&irq->irq_lock);
276
277 if (!vgic_validate_injection(irq, level)) {
278 /* Nothing to see here, move along... */
279 spin_unlock(&irq->irq_lock);
280 return 0;
281 }
282
283 if (irq->config == VGIC_CONFIG_LEVEL) {
284 irq->line_level = level;
285 irq->pending = level || irq->soft_pending;
286 } else {
287 irq->pending = true;
288 }
289
290 vgic_queue_irq_unlock(kvm, irq);
291
292 return 0;
293}
294
295/**
296 * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
297 * @kvm: The VM structure pointer
298 * @cpuid: The CPU for PPIs
299 * @intid: The INTID to inject a new state to.
300 * @level: Edge-triggered: true: to trigger the interrupt
301 * false: to ignore the call
302 * Level-sensitive true: raise the input signal
303 * false: lower the input signal
304 *
305 * The VGIC is not concerned with devices being active-LOW or active-HIGH for
306 * level-sensitive interrupts. You can think of the level parameter as 1
307 * being HIGH and 0 being LOW and all devices being active-HIGH.
308 */
309int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
310 bool level)
311{
312 return vgic_update_irq_pending(kvm, cpuid, intid, level, false);
313}
314
315int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, unsigned int intid,
316 bool level)
317{
318 return vgic_update_irq_pending(kvm, cpuid, intid, level, true);
319}
320
321int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq)
322{
323 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq);
324
325 BUG_ON(!irq);
326
327 spin_lock(&irq->irq_lock);
328
329 irq->hw = true;
330 irq->hwintid = phys_irq;
331
332 spin_unlock(&irq->irq_lock);
333
334 return 0;
335}
336
337int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq)
338{
339 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq);
340
341 BUG_ON(!irq);
342
343 if (!vgic_initialized(vcpu->kvm))
344 return -EAGAIN;
345
346 spin_lock(&irq->irq_lock);
347
348 irq->hw = false;
349 irq->hwintid = 0;
350
351 spin_unlock(&irq->irq_lock);
352
353 return 0;
354}
355
356/**
357 * vgic_prune_ap_list - Remove non-relevant interrupts from the list
358 *
359 * @vcpu: The VCPU pointer
360 *
361 * Go over the list of "interesting" interrupts, and prune those that we
362 * won't have to consider in the near future.
363 */
364static void vgic_prune_ap_list(struct kvm_vcpu *vcpu)
365{
366 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
367 struct vgic_irq *irq, *tmp;
368
369retry:
370 spin_lock(&vgic_cpu->ap_list_lock);
371
372 list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
373 struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB;
374
375 spin_lock(&irq->irq_lock);
376
377 BUG_ON(vcpu != irq->vcpu);
378
379 target_vcpu = vgic_target_oracle(irq);
380
381 if (!target_vcpu) {
382 /*
383 * We don't need to process this interrupt any
384 * further, move it off the list.
385 */
386 list_del(&irq->ap_list);
387 irq->vcpu = NULL;
388 spin_unlock(&irq->irq_lock);
389 continue;
390 }
391
392 if (target_vcpu == vcpu) {
393 /* We're on the right CPU */
394 spin_unlock(&irq->irq_lock);
395 continue;
396 }
397
398 /* This interrupt looks like it has to be migrated. */
399
400 spin_unlock(&irq->irq_lock);
401 spin_unlock(&vgic_cpu->ap_list_lock);
402
403 /*
404 * Ensure locking order by always locking the smallest
405 * ID first.
406 */
407 if (vcpu->vcpu_id < target_vcpu->vcpu_id) {
408 vcpuA = vcpu;
409 vcpuB = target_vcpu;
410 } else {
411 vcpuA = target_vcpu;
412 vcpuB = vcpu;
413 }
414
415 spin_lock(&vcpuA->arch.vgic_cpu.ap_list_lock);
416 spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock,
417 SINGLE_DEPTH_NESTING);
418 spin_lock(&irq->irq_lock);
419
420 /*
421 * If the affinity has been preserved, move the
422 * interrupt around. Otherwise, it means things have
423 * changed while the interrupt was unlocked, and we
424 * need to replay this.
425 *
426 * In all cases, we cannot trust the list not to have
427 * changed, so we restart from the beginning.
428 */
429 if (target_vcpu == vgic_target_oracle(irq)) {
430 struct vgic_cpu *new_cpu = &target_vcpu->arch.vgic_cpu;
431
432 list_del(&irq->ap_list);
433 irq->vcpu = target_vcpu;
434 list_add_tail(&irq->ap_list, &new_cpu->ap_list_head);
435 }
436
437 spin_unlock(&irq->irq_lock);
438 spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock);
439 spin_unlock(&vcpuA->arch.vgic_cpu.ap_list_lock);
440 goto retry;
441 }
442
443 spin_unlock(&vgic_cpu->ap_list_lock);
444}
445
446static inline void vgic_process_maintenance_interrupt(struct kvm_vcpu *vcpu)
447{
448 if (kvm_vgic_global_state.type == VGIC_V2)
449 vgic_v2_process_maintenance(vcpu);
450 else
451 vgic_v3_process_maintenance(vcpu);
452}
453
454static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu)
455{
456 if (kvm_vgic_global_state.type == VGIC_V2)
457 vgic_v2_fold_lr_state(vcpu);
458 else
459 vgic_v3_fold_lr_state(vcpu);
460}
461
462/* Requires the irq_lock to be held. */
463static inline void vgic_populate_lr(struct kvm_vcpu *vcpu,
464 struct vgic_irq *irq, int lr)
465{
466 DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock));
467
468 if (kvm_vgic_global_state.type == VGIC_V2)
469 vgic_v2_populate_lr(vcpu, irq, lr);
470 else
471 vgic_v3_populate_lr(vcpu, irq, lr);
472}
473
474static inline void vgic_clear_lr(struct kvm_vcpu *vcpu, int lr)
475{
476 if (kvm_vgic_global_state.type == VGIC_V2)
477 vgic_v2_clear_lr(vcpu, lr);
478 else
479 vgic_v3_clear_lr(vcpu, lr);
480}
481
482static inline void vgic_set_underflow(struct kvm_vcpu *vcpu)
483{
484 if (kvm_vgic_global_state.type == VGIC_V2)
485 vgic_v2_set_underflow(vcpu);
486 else
487 vgic_v3_set_underflow(vcpu);
488}
489
490/* Requires the ap_list_lock to be held. */
491static int compute_ap_list_depth(struct kvm_vcpu *vcpu)
492{
493 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
494 struct vgic_irq *irq;
495 int count = 0;
496
497 DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock));
498
499 list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
500 spin_lock(&irq->irq_lock);
501 /* GICv2 SGIs can count for more than one... */
502 if (vgic_irq_is_sgi(irq->intid) && irq->source)
503 count += hweight8(irq->source);
504 else
505 count++;
506 spin_unlock(&irq->irq_lock);
507 }
508 return count;
509}
510
511/* Requires the VCPU's ap_list_lock to be held. */
512static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
513{
514 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
515 struct vgic_irq *irq;
516 int count = 0;
517
518 DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock));
519
520 if (compute_ap_list_depth(vcpu) > kvm_vgic_global_state.nr_lr) {
521 vgic_set_underflow(vcpu);
522 vgic_sort_ap_list(vcpu);
523 }
524
525 list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
526 spin_lock(&irq->irq_lock);
527
528 if (unlikely(vgic_target_oracle(irq) != vcpu))
529 goto next;
530
531 /*
532 * If we get an SGI with multiple sources, try to get
533 * them in all at once.
534 */
535 do {
536 vgic_populate_lr(vcpu, irq, count++);
537 } while (irq->source && count < kvm_vgic_global_state.nr_lr);
538
539next:
540 spin_unlock(&irq->irq_lock);
541
542 if (count == kvm_vgic_global_state.nr_lr)
543 break;
544 }
545
546 vcpu->arch.vgic_cpu.used_lrs = count;
547
548 /* Nuke remaining LRs */
549 for ( ; count < kvm_vgic_global_state.nr_lr; count++)
550 vgic_clear_lr(vcpu, count);
551}
552
553/* Sync back the hardware VGIC state into our emulation after a guest's run. */
554void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
555{
556 vgic_process_maintenance_interrupt(vcpu);
557 vgic_fold_lr_state(vcpu);
558 vgic_prune_ap_list(vcpu);
559}
560
561/* Flush our emulation state into the GIC hardware before entering the guest. */
562void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
563{
564 spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock);
565 vgic_flush_lr_state(vcpu);
566 spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
567}
568
569int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
570{
571 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
572 struct vgic_irq *irq;
573 bool pending = false;
574
575 if (!vcpu->kvm->arch.vgic.enabled)
576 return false;
577
578 spin_lock(&vgic_cpu->ap_list_lock);
579
580 list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
581 spin_lock(&irq->irq_lock);
582 pending = irq->pending && irq->enabled;
583 spin_unlock(&irq->irq_lock);
584
585 if (pending)
586 break;
587 }
588
589 spin_unlock(&vgic_cpu->ap_list_lock);
590
591 return pending;
592}
593
594void vgic_kick_vcpus(struct kvm *kvm)
595{
596 struct kvm_vcpu *vcpu;
597 int c;
598
599 /*
600 * We've injected an interrupt, time to find out who deserves
601 * a good kick...
602 */
603 kvm_for_each_vcpu(c, vcpu, kvm) {
604 if (kvm_vgic_vcpu_pending_irq(vcpu))
605 kvm_vcpu_kick(vcpu);
606 }
607}
608
609bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq)
610{
611 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq);
612 bool map_is_active;
613
614 spin_lock(&irq->irq_lock);
615 map_is_active = irq->hw && irq->active;
616 spin_unlock(&irq->irq_lock);
617
618 return map_is_active;
619}
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
new file mode 100644
index 000000000000..7b300ca370b7
--- /dev/null
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -0,0 +1,131 @@
1/*
2 * Copyright (C) 2015, 2016 ARM Ltd.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16#ifndef __KVM_ARM_VGIC_NEW_H__
17#define __KVM_ARM_VGIC_NEW_H__
18
19#include <linux/irqchip/arm-gic-common.h>
20
21#define PRODUCT_ID_KVM 0x4b /* ASCII code K */
22#define IMPLEMENTER_ARM 0x43b
23
24#define VGIC_ADDR_UNDEF (-1)
25#define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF)
26
27#define INTERRUPT_ID_BITS_SPIS 10
28#define VGIC_PRI_BITS 5
29
30#define vgic_irq_is_sgi(intid) ((intid) < VGIC_NR_SGIS)
31
32struct vgic_vmcr {
33 u32 ctlr;
34 u32 abpr;
35 u32 bpr;
36 u32 pmr;
37};
38
39struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
40 u32 intid);
41bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq);
42void vgic_kick_vcpus(struct kvm *kvm);
43
44void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu);
45void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu);
46void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
47void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr);
48void vgic_v2_set_underflow(struct kvm_vcpu *vcpu);
49int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr);
50int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
51 int offset, u32 *val);
52int vgic_v2_cpuif_uaccess(struct kvm_vcpu *vcpu, bool is_write,
53 int offset, u32 *val);
54void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
55void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
56void vgic_v2_enable(struct kvm_vcpu *vcpu);
57int vgic_v2_probe(const struct gic_kvm_info *info);
58int vgic_v2_map_resources(struct kvm *kvm);
59int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
60 enum vgic_type);
61
62#ifdef CONFIG_KVM_ARM_VGIC_V3
63void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu);
64void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu);
65void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
66void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr);
67void vgic_v3_set_underflow(struct kvm_vcpu *vcpu);
68void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
69void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
70void vgic_v3_enable(struct kvm_vcpu *vcpu);
71int vgic_v3_probe(const struct gic_kvm_info *info);
72int vgic_v3_map_resources(struct kvm *kvm);
73int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t dist_base_address);
74#else
75static inline void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu)
76{
77}
78
79static inline void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
80{
81}
82
83static inline void vgic_v3_populate_lr(struct kvm_vcpu *vcpu,
84 struct vgic_irq *irq, int lr)
85{
86}
87
88static inline void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr)
89{
90}
91
92static inline void vgic_v3_set_underflow(struct kvm_vcpu *vcpu)
93{
94}
95
96static inline
97void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
98{
99}
100
101static inline
102void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
103{
104}
105
106static inline void vgic_v3_enable(struct kvm_vcpu *vcpu)
107{
108}
109
110static inline int vgic_v3_probe(const struct gic_kvm_info *info)
111{
112 return -ENODEV;
113}
114
115static inline int vgic_v3_map_resources(struct kvm *kvm)
116{
117 return -ENODEV;
118}
119
120static inline int vgic_register_redist_iodevs(struct kvm *kvm,
121 gpa_t dist_base_address)
122{
123 return -ENODEV;
124}
125#endif
126
127void kvm_register_vgic_device(unsigned long type);
128int vgic_lazy_init(struct kvm *kvm);
129int vgic_init(struct kvm *kvm);
130
131#endif
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index dd4ac9d9e8f5..37af23052470 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -63,6 +63,9 @@
63#define CREATE_TRACE_POINTS 63#define CREATE_TRACE_POINTS
64#include <trace/events/kvm.h> 64#include <trace/events/kvm.h>
65 65
66/* Worst case buffer size needed for holding an integer. */
67#define ITOA_MAX_LEN 12
68
66MODULE_AUTHOR("Qumranet"); 69MODULE_AUTHOR("Qumranet");
67MODULE_LICENSE("GPL"); 70MODULE_LICENSE("GPL");
68 71
@@ -100,6 +103,9 @@ static __read_mostly struct preempt_ops kvm_preempt_ops;
100struct dentry *kvm_debugfs_dir; 103struct dentry *kvm_debugfs_dir;
101EXPORT_SYMBOL_GPL(kvm_debugfs_dir); 104EXPORT_SYMBOL_GPL(kvm_debugfs_dir);
102 105
106static int kvm_debugfs_num_entries;
107static const struct file_operations *stat_fops_per_vm[];
108
103static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, 109static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
104 unsigned long arg); 110 unsigned long arg);
105#ifdef CONFIG_KVM_COMPAT 111#ifdef CONFIG_KVM_COMPAT
@@ -542,6 +548,58 @@ static void kvm_free_memslots(struct kvm *kvm, struct kvm_memslots *slots)
542 kvfree(slots); 548 kvfree(slots);
543} 549}
544 550
551static void kvm_destroy_vm_debugfs(struct kvm *kvm)
552{
553 int i;
554
555 if (!kvm->debugfs_dentry)
556 return;
557
558 debugfs_remove_recursive(kvm->debugfs_dentry);
559
560 for (i = 0; i < kvm_debugfs_num_entries; i++)
561 kfree(kvm->debugfs_stat_data[i]);
562 kfree(kvm->debugfs_stat_data);
563}
564
565static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
566{
567 char dir_name[ITOA_MAX_LEN * 2];
568 struct kvm_stat_data *stat_data;
569 struct kvm_stats_debugfs_item *p;
570
571 if (!debugfs_initialized())
572 return 0;
573
574 snprintf(dir_name, sizeof(dir_name), "%d-%d", task_pid_nr(current), fd);
575 kvm->debugfs_dentry = debugfs_create_dir(dir_name,
576 kvm_debugfs_dir);
577 if (!kvm->debugfs_dentry)
578 return -ENOMEM;
579
580 kvm->debugfs_stat_data = kcalloc(kvm_debugfs_num_entries,
581 sizeof(*kvm->debugfs_stat_data),
582 GFP_KERNEL);
583 if (!kvm->debugfs_stat_data)
584 return -ENOMEM;
585
586 for (p = debugfs_entries; p->name; p++) {
587 stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL);
588 if (!stat_data)
589 return -ENOMEM;
590
591 stat_data->kvm = kvm;
592 stat_data->offset = p->offset;
593 kvm->debugfs_stat_data[p - debugfs_entries] = stat_data;
594 if (!debugfs_create_file(p->name, 0444,
595 kvm->debugfs_dentry,
596 stat_data,
597 stat_fops_per_vm[p->kind]))
598 return -ENOMEM;
599 }
600 return 0;
601}
602
545static struct kvm *kvm_create_vm(unsigned long type) 603static struct kvm *kvm_create_vm(unsigned long type)
546{ 604{
547 int r, i; 605 int r, i;
@@ -647,6 +705,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
647 int i; 705 int i;
648 struct mm_struct *mm = kvm->mm; 706 struct mm_struct *mm = kvm->mm;
649 707
708 kvm_destroy_vm_debugfs(kvm);
650 kvm_arch_sync_events(kvm); 709 kvm_arch_sync_events(kvm);
651 spin_lock(&kvm_lock); 710 spin_lock(&kvm_lock);
652 list_del(&kvm->vm_list); 711 list_del(&kvm->vm_list);
@@ -2999,8 +3058,15 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
2999 } 3058 }
3000#endif 3059#endif
3001 r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR | O_CLOEXEC); 3060 r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR | O_CLOEXEC);
3002 if (r < 0) 3061 if (r < 0) {
3003 kvm_put_kvm(kvm); 3062 kvm_put_kvm(kvm);
3063 return r;
3064 }
3065
3066 if (kvm_create_vm_debugfs(kvm, r) < 0) {
3067 kvm_put_kvm(kvm);
3068 return -ENOMEM;
3069 }
3004 3070
3005 return r; 3071 return r;
3006} 3072}
@@ -3425,15 +3491,114 @@ static struct notifier_block kvm_cpu_notifier = {
3425 .notifier_call = kvm_cpu_hotplug, 3491 .notifier_call = kvm_cpu_hotplug,
3426}; 3492};
3427 3493
3494static int kvm_debugfs_open(struct inode *inode, struct file *file,
3495 int (*get)(void *, u64 *), int (*set)(void *, u64),
3496 const char *fmt)
3497{
3498 struct kvm_stat_data *stat_data = (struct kvm_stat_data *)
3499 inode->i_private;
3500
3501 /* The debugfs files are a reference to the kvm struct which
3502 * is still valid when kvm_destroy_vm is called.
3503 * To avoid the race between open and the removal of the debugfs
3504 * directory we test against the users count.
3505 */
3506 if (!atomic_add_unless(&stat_data->kvm->users_count, 1, 0))
3507 return -ENOENT;
3508
3509 if (simple_attr_open(inode, file, get, set, fmt)) {
3510 kvm_put_kvm(stat_data->kvm);
3511 return -ENOMEM;
3512 }
3513
3514 return 0;
3515}
3516
3517static int kvm_debugfs_release(struct inode *inode, struct file *file)
3518{
3519 struct kvm_stat_data *stat_data = (struct kvm_stat_data *)
3520 inode->i_private;
3521
3522 simple_attr_release(inode, file);
3523 kvm_put_kvm(stat_data->kvm);
3524
3525 return 0;
3526}
3527
3528static int vm_stat_get_per_vm(void *data, u64 *val)
3529{
3530 struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data;
3531
3532 *val = *(u32 *)((void *)stat_data->kvm + stat_data->offset);
3533
3534 return 0;
3535}
3536
3537static int vm_stat_get_per_vm_open(struct inode *inode, struct file *file)
3538{
3539 __simple_attr_check_format("%llu\n", 0ull);
3540 return kvm_debugfs_open(inode, file, vm_stat_get_per_vm,
3541 NULL, "%llu\n");
3542}
3543
3544static const struct file_operations vm_stat_get_per_vm_fops = {
3545 .owner = THIS_MODULE,
3546 .open = vm_stat_get_per_vm_open,
3547 .release = kvm_debugfs_release,
3548 .read = simple_attr_read,
3549 .write = simple_attr_write,
3550 .llseek = generic_file_llseek,
3551};
3552
3553static int vcpu_stat_get_per_vm(void *data, u64 *val)
3554{
3555 int i;
3556 struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data;
3557 struct kvm_vcpu *vcpu;
3558
3559 *val = 0;
3560
3561 kvm_for_each_vcpu(i, vcpu, stat_data->kvm)
3562 *val += *(u32 *)((void *)vcpu + stat_data->offset);
3563
3564 return 0;
3565}
3566
3567static int vcpu_stat_get_per_vm_open(struct inode *inode, struct file *file)
3568{
3569 __simple_attr_check_format("%llu\n", 0ull);
3570 return kvm_debugfs_open(inode, file, vcpu_stat_get_per_vm,
3571 NULL, "%llu\n");
3572}
3573
3574static const struct file_operations vcpu_stat_get_per_vm_fops = {
3575 .owner = THIS_MODULE,
3576 .open = vcpu_stat_get_per_vm_open,
3577 .release = kvm_debugfs_release,
3578 .read = simple_attr_read,
3579 .write = simple_attr_write,
3580 .llseek = generic_file_llseek,
3581};
3582
3583static const struct file_operations *stat_fops_per_vm[] = {
3584 [KVM_STAT_VCPU] = &vcpu_stat_get_per_vm_fops,
3585 [KVM_STAT_VM] = &vm_stat_get_per_vm_fops,
3586};
3587
3428static int vm_stat_get(void *_offset, u64 *val) 3588static int vm_stat_get(void *_offset, u64 *val)
3429{ 3589{
3430 unsigned offset = (long)_offset; 3590 unsigned offset = (long)_offset;
3431 struct kvm *kvm; 3591 struct kvm *kvm;
3592 struct kvm_stat_data stat_tmp = {.offset = offset};
3593 u64 tmp_val;
3432 3594
3433 *val = 0; 3595 *val = 0;
3434 spin_lock(&kvm_lock); 3596 spin_lock(&kvm_lock);
3435 list_for_each_entry(kvm, &vm_list, vm_list) 3597 list_for_each_entry(kvm, &vm_list, vm_list) {
3436 *val += *(u32 *)((void *)kvm + offset); 3598 stat_tmp.kvm = kvm;
3599 vm_stat_get_per_vm((void *)&stat_tmp, &tmp_val);
3600 *val += tmp_val;
3601 }
3437 spin_unlock(&kvm_lock); 3602 spin_unlock(&kvm_lock);
3438 return 0; 3603 return 0;
3439} 3604}
@@ -3444,15 +3609,16 @@ static int vcpu_stat_get(void *_offset, u64 *val)
3444{ 3609{
3445 unsigned offset = (long)_offset; 3610 unsigned offset = (long)_offset;
3446 struct kvm *kvm; 3611 struct kvm *kvm;
3447 struct kvm_vcpu *vcpu; 3612 struct kvm_stat_data stat_tmp = {.offset = offset};
3448 int i; 3613 u64 tmp_val;
3449 3614
3450 *val = 0; 3615 *val = 0;
3451 spin_lock(&kvm_lock); 3616 spin_lock(&kvm_lock);
3452 list_for_each_entry(kvm, &vm_list, vm_list) 3617 list_for_each_entry(kvm, &vm_list, vm_list) {
3453 kvm_for_each_vcpu(i, vcpu, kvm) 3618 stat_tmp.kvm = kvm;
3454 *val += *(u32 *)((void *)vcpu + offset); 3619 vcpu_stat_get_per_vm((void *)&stat_tmp, &tmp_val);
3455 3620 *val += tmp_val;
3621 }
3456 spin_unlock(&kvm_lock); 3622 spin_unlock(&kvm_lock);
3457 return 0; 3623 return 0;
3458} 3624}
@@ -3473,7 +3639,8 @@ static int kvm_init_debug(void)
3473 if (kvm_debugfs_dir == NULL) 3639 if (kvm_debugfs_dir == NULL)
3474 goto out; 3640 goto out;
3475 3641
3476 for (p = debugfs_entries; p->name; ++p) { 3642 kvm_debugfs_num_entries = 0;
3643 for (p = debugfs_entries; p->name; ++p, kvm_debugfs_num_entries++) {
3477 if (!debugfs_create_file(p->name, 0444, kvm_debugfs_dir, 3644 if (!debugfs_create_file(p->name, 0444, kvm_debugfs_dir,
3478 (void *)(long)p->offset, 3645 (void *)(long)p->offset,
3479 stat_fops[p->kind])) 3646 stat_fops[p->kind]))