diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-05-27 16:41:54 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-05-27 16:41:54 -0400 |
commit | e28e909c36bb5d6319953822d84df00fce7cbd18 (patch) | |
tree | a4aca971908a7a604c6fdd9a95360728f9f721b3 | |
parent | dc03c0f9d12d85286d5e3623aa96d5c2a271b8e6 (diff) | |
parent | fabc712866435660f7fa1070e1fabe29eba5bc4c (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull second batch of KVM updates from Radim Krčmář:
"General:
- move kvm_stat tool from QEMU repo into tools/kvm/kvm_stat (kvm_stat
had nothing to do with QEMU in the first place -- the tool only
interprets debugfs)
- expose per-vm statistics in debugfs and support them in kvm_stat
(KVM always collected per-vm statistics, but they were summarised
into global statistics)
x86:
- fix dynamic APICv (VMX was improperly configured and a guest could
access host's APIC MSRs, CVE-2016-4440)
- minor fixes
ARM changes from Christoffer Dall:
- new vgic reimplementation of our horribly broken legacy vgic
implementation. The two implementations will live side-by-side
(with the new being the configured default) for one kernel release
and then we'll remove the legacy one.
- fix for a non-critical issue with virtual abort injection to guests"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (70 commits)
tools: kvm_stat: Add comments
tools: kvm_stat: Introduce pid monitoring
KVM: Create debugfs dir and stat files for each VM
MAINTAINERS: Add kvm tools
tools: kvm_stat: Powerpc related fixes
tools: Add kvm_stat man page
tools: Add kvm_stat vm monitor script
kvm:vmx: more complete state update on APICv on/off
KVM: SVM: Add more SVM_EXIT_REASONS
KVM: Unify traced vector format
svm: bitwise vs logical op typo
KVM: arm/arm64: vgic-new: Synchronize changes to active state
KVM: arm/arm64: vgic-new: enable build
KVM: arm/arm64: vgic-new: implement mapped IRQ handling
KVM: arm/arm64: vgic-new: Wire up irqfd injection
KVM: arm/arm64: vgic-new: Add vgic_v2/v3_enable
KVM: arm/arm64: vgic-new: vgic_init: implement map_resources
KVM: arm/arm64: vgic-new: vgic_init: implement vgic_init
KVM: arm/arm64: vgic-new: vgic_init: implement vgic_create
KVM: arm/arm64: vgic-new: vgic_init: implement kvm_vgic_hyp_init
...
45 files changed, 5901 insertions, 193 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 81e9c984d2f3..312cd77e820c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -6491,6 +6491,7 @@ F: arch/*/include/asm/kvm* | |||
6491 | F: include/linux/kvm* | 6491 | F: include/linux/kvm* |
6492 | F: include/uapi/linux/kvm* | 6492 | F: include/uapi/linux/kvm* |
6493 | F: virt/kvm/ | 6493 | F: virt/kvm/ |
6494 | F: tools/kvm/ | ||
6494 | 6495 | ||
6495 | KERNEL VIRTUAL MACHINE (KVM) FOR AMD-V | 6496 | KERNEL VIRTUAL MACHINE (KVM) FOR AMD-V |
6496 | M: Joerg Roedel <joro@8bytes.org> | 6497 | M: Joerg Roedel <joro@8bytes.org> |
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 0df6b1fc9655..96387d477e91 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h | |||
@@ -41,6 +41,8 @@ | |||
41 | 41 | ||
42 | #define KVM_MAX_VCPUS VGIC_V2_MAX_CPUS | 42 | #define KVM_MAX_VCPUS VGIC_V2_MAX_CPUS |
43 | 43 | ||
44 | #define KVM_REQ_VCPU_EXIT 8 | ||
45 | |||
44 | u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); | 46 | u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); |
45 | int __attribute_const__ kvm_target_cpu(void); | 47 | int __attribute_const__ kvm_target_cpu(void); |
46 | int kvm_reset_vcpu(struct kvm_vcpu *vcpu); | 48 | int kvm_reset_vcpu(struct kvm_vcpu *vcpu); |
@@ -226,6 +228,10 @@ static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, | |||
226 | 228 | ||
227 | struct kvm_vcpu *kvm_arm_get_running_vcpu(void); | 229 | struct kvm_vcpu *kvm_arm_get_running_vcpu(void); |
228 | struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); | 230 | struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); |
231 | void kvm_arm_halt_guest(struct kvm *kvm); | ||
232 | void kvm_arm_resume_guest(struct kvm *kvm); | ||
233 | void kvm_arm_halt_vcpu(struct kvm_vcpu *vcpu); | ||
234 | void kvm_arm_resume_vcpu(struct kvm_vcpu *vcpu); | ||
229 | 235 | ||
230 | int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); | 236 | int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); |
231 | unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu); | 237 | unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu); |
diff --git a/arch/arm/include/asm/kvm_mmio.h b/arch/arm/include/asm/kvm_mmio.h index d8e90c8cb5fa..f3a7de71f515 100644 --- a/arch/arm/include/asm/kvm_mmio.h +++ b/arch/arm/include/asm/kvm_mmio.h | |||
@@ -28,6 +28,9 @@ struct kvm_decode { | |||
28 | bool sign_extend; | 28 | bool sign_extend; |
29 | }; | 29 | }; |
30 | 30 | ||
31 | void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); | ||
32 | unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len); | ||
33 | |||
31 | int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); | 34 | int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); |
32 | int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, | 35 | int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, |
33 | phys_addr_t fault_ipa); | 36 | phys_addr_t fault_ipa); |
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 95a000515e43..02abfff68ee5 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig | |||
@@ -46,6 +46,13 @@ config KVM_ARM_HOST | |||
46 | ---help--- | 46 | ---help--- |
47 | Provides host support for ARM processors. | 47 | Provides host support for ARM processors. |
48 | 48 | ||
49 | config KVM_NEW_VGIC | ||
50 | bool "New VGIC implementation" | ||
51 | depends on KVM | ||
52 | default y | ||
53 | ---help--- | ||
54 | uses the new VGIC implementation | ||
55 | |||
49 | source drivers/vhost/Kconfig | 56 | source drivers/vhost/Kconfig |
50 | 57 | ||
51 | endif # VIRTUALIZATION | 58 | endif # VIRTUALIZATION |
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index eb1bf4309c13..a596b58f6d37 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile | |||
@@ -21,7 +21,18 @@ obj-$(CONFIG_KVM_ARM_HOST) += hyp/ | |||
21 | obj-y += kvm-arm.o init.o interrupts.o | 21 | obj-y += kvm-arm.o init.o interrupts.o |
22 | obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o | 22 | obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o |
23 | obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o | 23 | obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o |
24 | |||
25 | ifeq ($(CONFIG_KVM_NEW_VGIC),y) | ||
26 | obj-y += $(KVM)/arm/vgic/vgic.o | ||
27 | obj-y += $(KVM)/arm/vgic/vgic-init.o | ||
28 | obj-y += $(KVM)/arm/vgic/vgic-irqfd.o | ||
29 | obj-y += $(KVM)/arm/vgic/vgic-v2.o | ||
30 | obj-y += $(KVM)/arm/vgic/vgic-mmio.o | ||
31 | obj-y += $(KVM)/arm/vgic/vgic-mmio-v2.o | ||
32 | obj-y += $(KVM)/arm/vgic/vgic-kvm-device.o | ||
33 | else | ||
24 | obj-y += $(KVM)/arm/vgic.o | 34 | obj-y += $(KVM)/arm/vgic.o |
25 | obj-y += $(KVM)/arm/vgic-v2.o | 35 | obj-y += $(KVM)/arm/vgic-v2.o |
26 | obj-y += $(KVM)/arm/vgic-v2-emul.o | 36 | obj-y += $(KVM)/arm/vgic-v2-emul.o |
37 | endif | ||
27 | obj-y += $(KVM)/arm/arch_timer.o | 38 | obj-y += $(KVM)/arm/arch_timer.o |
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 237d5d82f0af..893941ec98dc 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c | |||
@@ -455,7 +455,7 @@ static void update_vttbr(struct kvm *kvm) | |||
455 | static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) | 455 | static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) |
456 | { | 456 | { |
457 | struct kvm *kvm = vcpu->kvm; | 457 | struct kvm *kvm = vcpu->kvm; |
458 | int ret; | 458 | int ret = 0; |
459 | 459 | ||
460 | if (likely(vcpu->arch.has_run_once)) | 460 | if (likely(vcpu->arch.has_run_once)) |
461 | return 0; | 461 | return 0; |
@@ -478,9 +478,9 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) | |||
478 | * interrupts from the virtual timer with a userspace gic. | 478 | * interrupts from the virtual timer with a userspace gic. |
479 | */ | 479 | */ |
480 | if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) | 480 | if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) |
481 | kvm_timer_enable(kvm); | 481 | ret = kvm_timer_enable(vcpu); |
482 | 482 | ||
483 | return 0; | 483 | return ret; |
484 | } | 484 | } |
485 | 485 | ||
486 | bool kvm_arch_intc_initialized(struct kvm *kvm) | 486 | bool kvm_arch_intc_initialized(struct kvm *kvm) |
@@ -488,30 +488,37 @@ bool kvm_arch_intc_initialized(struct kvm *kvm) | |||
488 | return vgic_initialized(kvm); | 488 | return vgic_initialized(kvm); |
489 | } | 489 | } |
490 | 490 | ||
491 | static void kvm_arm_halt_guest(struct kvm *kvm) __maybe_unused; | 491 | void kvm_arm_halt_guest(struct kvm *kvm) |
492 | static void kvm_arm_resume_guest(struct kvm *kvm) __maybe_unused; | ||
493 | |||
494 | static void kvm_arm_halt_guest(struct kvm *kvm) | ||
495 | { | 492 | { |
496 | int i; | 493 | int i; |
497 | struct kvm_vcpu *vcpu; | 494 | struct kvm_vcpu *vcpu; |
498 | 495 | ||
499 | kvm_for_each_vcpu(i, vcpu, kvm) | 496 | kvm_for_each_vcpu(i, vcpu, kvm) |
500 | vcpu->arch.pause = true; | 497 | vcpu->arch.pause = true; |
501 | force_vm_exit(cpu_all_mask); | 498 | kvm_make_all_cpus_request(kvm, KVM_REQ_VCPU_EXIT); |
499 | } | ||
500 | |||
501 | void kvm_arm_halt_vcpu(struct kvm_vcpu *vcpu) | ||
502 | { | ||
503 | vcpu->arch.pause = true; | ||
504 | kvm_vcpu_kick(vcpu); | ||
502 | } | 505 | } |
503 | 506 | ||
504 | static void kvm_arm_resume_guest(struct kvm *kvm) | 507 | void kvm_arm_resume_vcpu(struct kvm_vcpu *vcpu) |
508 | { | ||
509 | struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu); | ||
510 | |||
511 | vcpu->arch.pause = false; | ||
512 | swake_up(wq); | ||
513 | } | ||
514 | |||
515 | void kvm_arm_resume_guest(struct kvm *kvm) | ||
505 | { | 516 | { |
506 | int i; | 517 | int i; |
507 | struct kvm_vcpu *vcpu; | 518 | struct kvm_vcpu *vcpu; |
508 | 519 | ||
509 | kvm_for_each_vcpu(i, vcpu, kvm) { | 520 | kvm_for_each_vcpu(i, vcpu, kvm) |
510 | struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu); | 521 | kvm_arm_resume_vcpu(vcpu); |
511 | |||
512 | vcpu->arch.pause = false; | ||
513 | swake_up(wq); | ||
514 | } | ||
515 | } | 522 | } |
516 | 523 | ||
517 | static void vcpu_sleep(struct kvm_vcpu *vcpu) | 524 | static void vcpu_sleep(struct kvm_vcpu *vcpu) |
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index 0f6600f05137..10f80a6c797a 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c | |||
@@ -23,7 +23,7 @@ | |||
23 | 23 | ||
24 | #include "trace.h" | 24 | #include "trace.h" |
25 | 25 | ||
26 | static void mmio_write_buf(char *buf, unsigned int len, unsigned long data) | 26 | void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data) |
27 | { | 27 | { |
28 | void *datap = NULL; | 28 | void *datap = NULL; |
29 | union { | 29 | union { |
@@ -55,7 +55,7 @@ static void mmio_write_buf(char *buf, unsigned int len, unsigned long data) | |||
55 | memcpy(buf, datap, len); | 55 | memcpy(buf, datap, len); |
56 | } | 56 | } |
57 | 57 | ||
58 | static unsigned long mmio_read_buf(char *buf, unsigned int len) | 58 | unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len) |
59 | { | 59 | { |
60 | unsigned long data = 0; | 60 | unsigned long data = 0; |
61 | union { | 61 | union { |
@@ -66,7 +66,7 @@ static unsigned long mmio_read_buf(char *buf, unsigned int len) | |||
66 | 66 | ||
67 | switch (len) { | 67 | switch (len) { |
68 | case 1: | 68 | case 1: |
69 | data = buf[0]; | 69 | data = *(u8 *)buf; |
70 | break; | 70 | break; |
71 | case 2: | 71 | case 2: |
72 | memcpy(&tmp.hword, buf, len); | 72 | memcpy(&tmp.hword, buf, len); |
@@ -87,11 +87,10 @@ static unsigned long mmio_read_buf(char *buf, unsigned int len) | |||
87 | 87 | ||
88 | /** | 88 | /** |
89 | * kvm_handle_mmio_return -- Handle MMIO loads after user space emulation | 89 | * kvm_handle_mmio_return -- Handle MMIO loads after user space emulation |
90 | * or in-kernel IO emulation | ||
91 | * | ||
90 | * @vcpu: The VCPU pointer | 92 | * @vcpu: The VCPU pointer |
91 | * @run: The VCPU run struct containing the mmio data | 93 | * @run: The VCPU run struct containing the mmio data |
92 | * | ||
93 | * This should only be called after returning from userspace for MMIO load | ||
94 | * emulation. | ||
95 | */ | 94 | */ |
96 | int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) | 95 | int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) |
97 | { | 96 | { |
@@ -104,7 +103,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
104 | if (len > sizeof(unsigned long)) | 103 | if (len > sizeof(unsigned long)) |
105 | return -EINVAL; | 104 | return -EINVAL; |
106 | 105 | ||
107 | data = mmio_read_buf(run->mmio.data, len); | 106 | data = kvm_mmio_read_buf(run->mmio.data, len); |
108 | 107 | ||
109 | if (vcpu->arch.mmio_decode.sign_extend && | 108 | if (vcpu->arch.mmio_decode.sign_extend && |
110 | len < sizeof(unsigned long)) { | 109 | len < sizeof(unsigned long)) { |
@@ -190,7 +189,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, | |||
190 | len); | 189 | len); |
191 | 190 | ||
192 | trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data); | 191 | trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data); |
193 | mmio_write_buf(data_buf, len, data); | 192 | kvm_mmio_write_buf(data_buf, len, data); |
194 | 193 | ||
195 | ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len, | 194 | ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len, |
196 | data_buf); | 195 | data_buf); |
@@ -206,18 +205,19 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, | |||
206 | run->mmio.is_write = is_write; | 205 | run->mmio.is_write = is_write; |
207 | run->mmio.phys_addr = fault_ipa; | 206 | run->mmio.phys_addr = fault_ipa; |
208 | run->mmio.len = len; | 207 | run->mmio.len = len; |
209 | if (is_write) | ||
210 | memcpy(run->mmio.data, data_buf, len); | ||
211 | 208 | ||
212 | if (!ret) { | 209 | if (!ret) { |
213 | /* We handled the access successfully in the kernel. */ | 210 | /* We handled the access successfully in the kernel. */ |
211 | if (!is_write) | ||
212 | memcpy(run->mmio.data, data_buf, len); | ||
214 | vcpu->stat.mmio_exit_kernel++; | 213 | vcpu->stat.mmio_exit_kernel++; |
215 | kvm_handle_mmio_return(vcpu, run); | 214 | kvm_handle_mmio_return(vcpu, run); |
216 | return 1; | 215 | return 1; |
217 | } else { | ||
218 | vcpu->stat.mmio_exit_user++; | ||
219 | } | 216 | } |
220 | 217 | ||
218 | if (is_write) | ||
219 | memcpy(run->mmio.data, data_buf, len); | ||
220 | vcpu->stat.mmio_exit_user++; | ||
221 | run->exit_reason = KVM_EXIT_MMIO; | 221 | run->exit_reason = KVM_EXIT_MMIO; |
222 | return 0; | 222 | return 0; |
223 | } | 223 | } |
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index e63d23bad36e..49095fc4b482 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h | |||
@@ -43,6 +43,8 @@ | |||
43 | 43 | ||
44 | #define KVM_VCPU_MAX_FEATURES 4 | 44 | #define KVM_VCPU_MAX_FEATURES 4 |
45 | 45 | ||
46 | #define KVM_REQ_VCPU_EXIT 8 | ||
47 | |||
46 | int __attribute_const__ kvm_target_cpu(void); | 48 | int __attribute_const__ kvm_target_cpu(void); |
47 | int kvm_reset_vcpu(struct kvm_vcpu *vcpu); | 49 | int kvm_reset_vcpu(struct kvm_vcpu *vcpu); |
48 | int kvm_arch_dev_ioctl_check_extension(long ext); | 50 | int kvm_arch_dev_ioctl_check_extension(long ext); |
@@ -327,6 +329,10 @@ static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, | |||
327 | 329 | ||
328 | struct kvm_vcpu *kvm_arm_get_running_vcpu(void); | 330 | struct kvm_vcpu *kvm_arm_get_running_vcpu(void); |
329 | struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); | 331 | struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); |
332 | void kvm_arm_halt_guest(struct kvm *kvm); | ||
333 | void kvm_arm_resume_guest(struct kvm *kvm); | ||
334 | void kvm_arm_halt_vcpu(struct kvm_vcpu *vcpu); | ||
335 | void kvm_arm_resume_vcpu(struct kvm_vcpu *vcpu); | ||
330 | 336 | ||
331 | u64 __kvm_call_hyp(void *hypfn, ...); | 337 | u64 __kvm_call_hyp(void *hypfn, ...); |
332 | #define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__) | 338 | #define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__) |
diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h index fe612a962576..75ea42079757 100644 --- a/arch/arm64/include/asm/kvm_mmio.h +++ b/arch/arm64/include/asm/kvm_mmio.h | |||
@@ -30,6 +30,9 @@ struct kvm_decode { | |||
30 | bool sign_extend; | 30 | bool sign_extend; |
31 | }; | 31 | }; |
32 | 32 | ||
33 | void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); | ||
34 | unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len); | ||
35 | |||
33 | int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); | 36 | int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); |
34 | int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, | 37 | int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, |
35 | phys_addr_t fault_ipa); | 38 | phys_addr_t fault_ipa); |
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index aa2e34e99582..c4f26ef91e77 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig | |||
@@ -54,6 +54,13 @@ config KVM_ARM_PMU | |||
54 | Adds support for a virtual Performance Monitoring Unit (PMU) in | 54 | Adds support for a virtual Performance Monitoring Unit (PMU) in |
55 | virtual machines. | 55 | virtual machines. |
56 | 56 | ||
57 | config KVM_NEW_VGIC | ||
58 | bool "New VGIC implementation" | ||
59 | depends on KVM | ||
60 | default y | ||
61 | ---help--- | ||
62 | uses the new VGIC implementation | ||
63 | |||
57 | source drivers/vhost/Kconfig | 64 | source drivers/vhost/Kconfig |
58 | 65 | ||
59 | endif # VIRTUALIZATION | 66 | endif # VIRTUALIZATION |
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 122cff482ac4..a7a958ca29d5 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile | |||
@@ -20,10 +20,22 @@ kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o | |||
20 | kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o | 20 | kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o |
21 | kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o | 21 | kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o |
22 | 22 | ||
23 | ifeq ($(CONFIG_KVM_NEW_VGIC),y) | ||
24 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic.o | ||
25 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-init.o | ||
26 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-irqfd.o | ||
27 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v2.o | ||
28 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v3.o | ||
29 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio.o | ||
30 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v2.o | ||
31 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v3.o | ||
32 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-kvm-device.o | ||
33 | else | ||
23 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic.o | 34 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic.o |
24 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2.o | 35 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2.o |
25 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2-emul.o | 36 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2-emul.o |
26 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o | 37 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o |
27 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o | 38 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o |
39 | endif | ||
28 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o | 40 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o |
29 | kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o | 41 | kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o |
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index 4d1ac81870d2..e9e0e6db73f6 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c | |||
@@ -162,7 +162,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr | |||
162 | esr |= (ESR_ELx_EC_IABT_CUR << ESR_ELx_EC_SHIFT); | 162 | esr |= (ESR_ELx_EC_IABT_CUR << ESR_ELx_EC_SHIFT); |
163 | 163 | ||
164 | if (!is_iabt) | 164 | if (!is_iabt) |
165 | esr |= ESR_ELx_EC_DABT_LOW; | 165 | esr |= ESR_ELx_EC_DABT_LOW << ESR_ELx_EC_SHIFT; |
166 | 166 | ||
167 | vcpu_sys_reg(vcpu, ESR_EL1) = esr | ESR_ELx_FSC_EXTABT; | 167 | vcpu_sys_reg(vcpu, ESR_EL1) = esr | ESR_ELx_FSC_EXTABT; |
168 | } | 168 | } |
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index b9e9bb2c6089..3725e145aa58 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h | |||
@@ -2,10 +2,12 @@ | |||
2 | #define _UAPI__SVM_H | 2 | #define _UAPI__SVM_H |
3 | 3 | ||
4 | #define SVM_EXIT_READ_CR0 0x000 | 4 | #define SVM_EXIT_READ_CR0 0x000 |
5 | #define SVM_EXIT_READ_CR2 0x002 | ||
5 | #define SVM_EXIT_READ_CR3 0x003 | 6 | #define SVM_EXIT_READ_CR3 0x003 |
6 | #define SVM_EXIT_READ_CR4 0x004 | 7 | #define SVM_EXIT_READ_CR4 0x004 |
7 | #define SVM_EXIT_READ_CR8 0x008 | 8 | #define SVM_EXIT_READ_CR8 0x008 |
8 | #define SVM_EXIT_WRITE_CR0 0x010 | 9 | #define SVM_EXIT_WRITE_CR0 0x010 |
10 | #define SVM_EXIT_WRITE_CR2 0x012 | ||
9 | #define SVM_EXIT_WRITE_CR3 0x013 | 11 | #define SVM_EXIT_WRITE_CR3 0x013 |
10 | #define SVM_EXIT_WRITE_CR4 0x014 | 12 | #define SVM_EXIT_WRITE_CR4 0x014 |
11 | #define SVM_EXIT_WRITE_CR8 0x018 | 13 | #define SVM_EXIT_WRITE_CR8 0x018 |
@@ -80,10 +82,12 @@ | |||
80 | 82 | ||
81 | #define SVM_EXIT_REASONS \ | 83 | #define SVM_EXIT_REASONS \ |
82 | { SVM_EXIT_READ_CR0, "read_cr0" }, \ | 84 | { SVM_EXIT_READ_CR0, "read_cr0" }, \ |
85 | { SVM_EXIT_READ_CR2, "read_cr2" }, \ | ||
83 | { SVM_EXIT_READ_CR3, "read_cr3" }, \ | 86 | { SVM_EXIT_READ_CR3, "read_cr3" }, \ |
84 | { SVM_EXIT_READ_CR4, "read_cr4" }, \ | 87 | { SVM_EXIT_READ_CR4, "read_cr4" }, \ |
85 | { SVM_EXIT_READ_CR8, "read_cr8" }, \ | 88 | { SVM_EXIT_READ_CR8, "read_cr8" }, \ |
86 | { SVM_EXIT_WRITE_CR0, "write_cr0" }, \ | 89 | { SVM_EXIT_WRITE_CR0, "write_cr0" }, \ |
90 | { SVM_EXIT_WRITE_CR2, "write_cr2" }, \ | ||
87 | { SVM_EXIT_WRITE_CR3, "write_cr3" }, \ | 91 | { SVM_EXIT_WRITE_CR3, "write_cr3" }, \ |
88 | { SVM_EXIT_WRITE_CR4, "write_cr4" }, \ | 92 | { SVM_EXIT_WRITE_CR4, "write_cr4" }, \ |
89 | { SVM_EXIT_WRITE_CR8, "write_cr8" }, \ | 93 | { SVM_EXIT_WRITE_CR8, "write_cr8" }, \ |
@@ -91,26 +95,57 @@ | |||
91 | { SVM_EXIT_READ_DR1, "read_dr1" }, \ | 95 | { SVM_EXIT_READ_DR1, "read_dr1" }, \ |
92 | { SVM_EXIT_READ_DR2, "read_dr2" }, \ | 96 | { SVM_EXIT_READ_DR2, "read_dr2" }, \ |
93 | { SVM_EXIT_READ_DR3, "read_dr3" }, \ | 97 | { SVM_EXIT_READ_DR3, "read_dr3" }, \ |
98 | { SVM_EXIT_READ_DR4, "read_dr4" }, \ | ||
99 | { SVM_EXIT_READ_DR5, "read_dr5" }, \ | ||
100 | { SVM_EXIT_READ_DR6, "read_dr6" }, \ | ||
101 | { SVM_EXIT_READ_DR7, "read_dr7" }, \ | ||
94 | { SVM_EXIT_WRITE_DR0, "write_dr0" }, \ | 102 | { SVM_EXIT_WRITE_DR0, "write_dr0" }, \ |
95 | { SVM_EXIT_WRITE_DR1, "write_dr1" }, \ | 103 | { SVM_EXIT_WRITE_DR1, "write_dr1" }, \ |
96 | { SVM_EXIT_WRITE_DR2, "write_dr2" }, \ | 104 | { SVM_EXIT_WRITE_DR2, "write_dr2" }, \ |
97 | { SVM_EXIT_WRITE_DR3, "write_dr3" }, \ | 105 | { SVM_EXIT_WRITE_DR3, "write_dr3" }, \ |
106 | { SVM_EXIT_WRITE_DR4, "write_dr4" }, \ | ||
98 | { SVM_EXIT_WRITE_DR5, "write_dr5" }, \ | 107 | { SVM_EXIT_WRITE_DR5, "write_dr5" }, \ |
108 | { SVM_EXIT_WRITE_DR6, "write_dr6" }, \ | ||
99 | { SVM_EXIT_WRITE_DR7, "write_dr7" }, \ | 109 | { SVM_EXIT_WRITE_DR7, "write_dr7" }, \ |
110 | { SVM_EXIT_EXCP_BASE + DE_VECTOR, "DE excp" }, \ | ||
100 | { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, \ | 111 | { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, \ |
101 | { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, \ | 112 | { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, \ |
113 | { SVM_EXIT_EXCP_BASE + OF_VECTOR, "OF excp" }, \ | ||
114 | { SVM_EXIT_EXCP_BASE + BR_VECTOR, "BR excp" }, \ | ||
102 | { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \ | 115 | { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \ |
103 | { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \ | ||
104 | { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \ | 116 | { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \ |
117 | { SVM_EXIT_EXCP_BASE + DF_VECTOR, "DF excp" }, \ | ||
118 | { SVM_EXIT_EXCP_BASE + TS_VECTOR, "TS excp" }, \ | ||
119 | { SVM_EXIT_EXCP_BASE + NP_VECTOR, "NP excp" }, \ | ||
120 | { SVM_EXIT_EXCP_BASE + SS_VECTOR, "SS excp" }, \ | ||
121 | { SVM_EXIT_EXCP_BASE + GP_VECTOR, "GP excp" }, \ | ||
122 | { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \ | ||
123 | { SVM_EXIT_EXCP_BASE + MF_VECTOR, "MF excp" }, \ | ||
105 | { SVM_EXIT_EXCP_BASE + AC_VECTOR, "AC excp" }, \ | 124 | { SVM_EXIT_EXCP_BASE + AC_VECTOR, "AC excp" }, \ |
106 | { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \ | 125 | { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \ |
126 | { SVM_EXIT_EXCP_BASE + XM_VECTOR, "XF excp" }, \ | ||
107 | { SVM_EXIT_INTR, "interrupt" }, \ | 127 | { SVM_EXIT_INTR, "interrupt" }, \ |
108 | { SVM_EXIT_NMI, "nmi" }, \ | 128 | { SVM_EXIT_NMI, "nmi" }, \ |
109 | { SVM_EXIT_SMI, "smi" }, \ | 129 | { SVM_EXIT_SMI, "smi" }, \ |
110 | { SVM_EXIT_INIT, "init" }, \ | 130 | { SVM_EXIT_INIT, "init" }, \ |
111 | { SVM_EXIT_VINTR, "vintr" }, \ | 131 | { SVM_EXIT_VINTR, "vintr" }, \ |
112 | { SVM_EXIT_CR0_SEL_WRITE, "cr0_sel_write" }, \ | 132 | { SVM_EXIT_CR0_SEL_WRITE, "cr0_sel_write" }, \ |
133 | { SVM_EXIT_IDTR_READ, "read_idtr" }, \ | ||
134 | { SVM_EXIT_GDTR_READ, "read_gdtr" }, \ | ||
135 | { SVM_EXIT_LDTR_READ, "read_ldtr" }, \ | ||
136 | { SVM_EXIT_TR_READ, "read_rt" }, \ | ||
137 | { SVM_EXIT_IDTR_WRITE, "write_idtr" }, \ | ||
138 | { SVM_EXIT_GDTR_WRITE, "write_gdtr" }, \ | ||
139 | { SVM_EXIT_LDTR_WRITE, "write_ldtr" }, \ | ||
140 | { SVM_EXIT_TR_WRITE, "write_rt" }, \ | ||
141 | { SVM_EXIT_RDTSC, "rdtsc" }, \ | ||
142 | { SVM_EXIT_RDPMC, "rdpmc" }, \ | ||
143 | { SVM_EXIT_PUSHF, "pushf" }, \ | ||
144 | { SVM_EXIT_POPF, "popf" }, \ | ||
113 | { SVM_EXIT_CPUID, "cpuid" }, \ | 145 | { SVM_EXIT_CPUID, "cpuid" }, \ |
146 | { SVM_EXIT_RSM, "rsm" }, \ | ||
147 | { SVM_EXIT_IRET, "iret" }, \ | ||
148 | { SVM_EXIT_SWINT, "swint" }, \ | ||
114 | { SVM_EXIT_INVD, "invd" }, \ | 149 | { SVM_EXIT_INVD, "invd" }, \ |
115 | { SVM_EXIT_PAUSE, "pause" }, \ | 150 | { SVM_EXIT_PAUSE, "pause" }, \ |
116 | { SVM_EXIT_HLT, "hlt" }, \ | 151 | { SVM_EXIT_HLT, "hlt" }, \ |
@@ -119,6 +154,7 @@ | |||
119 | { SVM_EXIT_IOIO, "io" }, \ | 154 | { SVM_EXIT_IOIO, "io" }, \ |
120 | { SVM_EXIT_MSR, "msr" }, \ | 155 | { SVM_EXIT_MSR, "msr" }, \ |
121 | { SVM_EXIT_TASK_SWITCH, "task_switch" }, \ | 156 | { SVM_EXIT_TASK_SWITCH, "task_switch" }, \ |
157 | { SVM_EXIT_FERR_FREEZE, "ferr_freeze" }, \ | ||
122 | { SVM_EXIT_SHUTDOWN, "shutdown" }, \ | 158 | { SVM_EXIT_SHUTDOWN, "shutdown" }, \ |
123 | { SVM_EXIT_VMRUN, "vmrun" }, \ | 159 | { SVM_EXIT_VMRUN, "vmrun" }, \ |
124 | { SVM_EXIT_VMMCALL, "hypercall" }, \ | 160 | { SVM_EXIT_VMMCALL, "hypercall" }, \ |
@@ -127,14 +163,16 @@ | |||
127 | { SVM_EXIT_STGI, "stgi" }, \ | 163 | { SVM_EXIT_STGI, "stgi" }, \ |
128 | { SVM_EXIT_CLGI, "clgi" }, \ | 164 | { SVM_EXIT_CLGI, "clgi" }, \ |
129 | { SVM_EXIT_SKINIT, "skinit" }, \ | 165 | { SVM_EXIT_SKINIT, "skinit" }, \ |
166 | { SVM_EXIT_RDTSCP, "rdtscp" }, \ | ||
167 | { SVM_EXIT_ICEBP, "icebp" }, \ | ||
130 | { SVM_EXIT_WBINVD, "wbinvd" }, \ | 168 | { SVM_EXIT_WBINVD, "wbinvd" }, \ |
131 | { SVM_EXIT_MONITOR, "monitor" }, \ | 169 | { SVM_EXIT_MONITOR, "monitor" }, \ |
132 | { SVM_EXIT_MWAIT, "mwait" }, \ | 170 | { SVM_EXIT_MWAIT, "mwait" }, \ |
133 | { SVM_EXIT_XSETBV, "xsetbv" }, \ | 171 | { SVM_EXIT_XSETBV, "xsetbv" }, \ |
134 | { SVM_EXIT_NPF, "npf" }, \ | 172 | { SVM_EXIT_NPF, "npf" }, \ |
135 | { SVM_EXIT_RSM, "rsm" }, \ | ||
136 | { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \ | 173 | { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \ |
137 | { SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" } | 174 | { SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" }, \ |
175 | { SVM_EXIT_ERR, "invalid_guest_state" } | ||
138 | 176 | ||
139 | 177 | ||
140 | #endif /* _UAPI__SVM_H */ | 178 | #endif /* _UAPI__SVM_H */ |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 2214214c786b..1163e8173e5a 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -84,7 +84,7 @@ MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id); | |||
84 | #define TSC_RATIO_MIN 0x0000000000000001ULL | 84 | #define TSC_RATIO_MIN 0x0000000000000001ULL |
85 | #define TSC_RATIO_MAX 0x000000ffffffffffULL | 85 | #define TSC_RATIO_MAX 0x000000ffffffffffULL |
86 | 86 | ||
87 | #define AVIC_HPA_MASK ~((0xFFFULL << 52) || 0xFFF) | 87 | #define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF) |
88 | 88 | ||
89 | /* | 89 | /* |
90 | * 0xff is broadcast, so the max index allowed for physical APIC ID | 90 | * 0xff is broadcast, so the max index allowed for physical APIC ID |
@@ -3597,7 +3597,7 @@ static int avic_incomplete_ipi_interception(struct vcpu_svm *svm) | |||
3597 | u32 icrh = svm->vmcb->control.exit_info_1 >> 32; | 3597 | u32 icrh = svm->vmcb->control.exit_info_1 >> 32; |
3598 | u32 icrl = svm->vmcb->control.exit_info_1; | 3598 | u32 icrl = svm->vmcb->control.exit_info_1; |
3599 | u32 id = svm->vmcb->control.exit_info_2 >> 32; | 3599 | u32 id = svm->vmcb->control.exit_info_2 >> 32; |
3600 | u32 index = svm->vmcb->control.exit_info_2 && 0xFF; | 3600 | u32 index = svm->vmcb->control.exit_info_2 & 0xFF; |
3601 | struct kvm_lapic *apic = svm->vcpu.arch.apic; | 3601 | struct kvm_lapic *apic = svm->vcpu.arch.apic; |
3602 | 3602 | ||
3603 | trace_kvm_avic_incomplete_ipi(svm->vcpu.vcpu_id, icrh, icrl, id, index); | 3603 | trace_kvm_avic_incomplete_ipi(svm->vcpu.vcpu_id, icrh, icrl, id, index); |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e605d1ed334f..fb93010beaa4 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -2418,7 +2418,9 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) | |||
2418 | 2418 | ||
2419 | if (is_guest_mode(vcpu)) | 2419 | if (is_guest_mode(vcpu)) |
2420 | msr_bitmap = vmx_msr_bitmap_nested; | 2420 | msr_bitmap = vmx_msr_bitmap_nested; |
2421 | else if (vcpu->arch.apic_base & X2APIC_ENABLE) { | 2421 | else if (cpu_has_secondary_exec_ctrls() && |
2422 | (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & | ||
2423 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { | ||
2422 | if (is_long_mode(vcpu)) | 2424 | if (is_long_mode(vcpu)) |
2423 | msr_bitmap = vmx_msr_bitmap_longmode_x2apic; | 2425 | msr_bitmap = vmx_msr_bitmap_longmode_x2apic; |
2424 | else | 2426 | else |
@@ -4787,6 +4789,19 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) | |||
4787 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 4789 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
4788 | 4790 | ||
4789 | vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx)); | 4791 | vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx)); |
4792 | if (cpu_has_secondary_exec_ctrls()) { | ||
4793 | if (kvm_vcpu_apicv_active(vcpu)) | ||
4794 | vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, | ||
4795 | SECONDARY_EXEC_APIC_REGISTER_VIRT | | ||
4796 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); | ||
4797 | else | ||
4798 | vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, | ||
4799 | SECONDARY_EXEC_APIC_REGISTER_VIRT | | ||
4800 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); | ||
4801 | } | ||
4802 | |||
4803 | if (cpu_has_vmx_msr_bitmap()) | ||
4804 | vmx_set_msr_bitmap(vcpu); | ||
4790 | } | 4805 | } |
4791 | 4806 | ||
4792 | static u32 vmx_exec_control(struct vcpu_vmx *vmx) | 4807 | static u32 vmx_exec_control(struct vcpu_vmx *vmx) |
@@ -6333,23 +6348,20 @@ static __init int hardware_setup(void) | |||
6333 | 6348 | ||
6334 | set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ | 6349 | set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ |
6335 | 6350 | ||
6336 | if (enable_apicv) { | 6351 | for (msr = 0x800; msr <= 0x8ff; msr++) |
6337 | for (msr = 0x800; msr <= 0x8ff; msr++) | 6352 | vmx_disable_intercept_msr_read_x2apic(msr); |
6338 | vmx_disable_intercept_msr_read_x2apic(msr); | 6353 | |
6339 | 6354 | /* According SDM, in x2apic mode, the whole id reg is used. But in | |
6340 | /* According SDM, in x2apic mode, the whole id reg is used. | 6355 | * KVM, it only use the highest eight bits. Need to intercept it */ |
6341 | * But in KVM, it only use the highest eight bits. Need to | 6356 | vmx_enable_intercept_msr_read_x2apic(0x802); |
6342 | * intercept it */ | 6357 | /* TMCCT */ |
6343 | vmx_enable_intercept_msr_read_x2apic(0x802); | 6358 | vmx_enable_intercept_msr_read_x2apic(0x839); |
6344 | /* TMCCT */ | 6359 | /* TPR */ |
6345 | vmx_enable_intercept_msr_read_x2apic(0x839); | 6360 | vmx_disable_intercept_msr_write_x2apic(0x808); |
6346 | /* TPR */ | 6361 | /* EOI */ |
6347 | vmx_disable_intercept_msr_write_x2apic(0x808); | 6362 | vmx_disable_intercept_msr_write_x2apic(0x80b); |
6348 | /* EOI */ | 6363 | /* SELF-IPI */ |
6349 | vmx_disable_intercept_msr_write_x2apic(0x80b); | 6364 | vmx_disable_intercept_msr_write_x2apic(0x83f); |
6350 | /* SELF-IPI */ | ||
6351 | vmx_disable_intercept_msr_write_x2apic(0x83f); | ||
6352 | } | ||
6353 | 6365 | ||
6354 | if (enable_ept) { | 6366 | if (enable_ept) { |
6355 | kvm_mmu_set_mask_ptes(0ull, | 6367 | kvm_mmu_set_mask_ptes(0ull, |
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index b651aed9dc6b..dda39d8fa189 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h | |||
@@ -24,9 +24,6 @@ | |||
24 | #include <linux/workqueue.h> | 24 | #include <linux/workqueue.h> |
25 | 25 | ||
26 | struct arch_timer_kvm { | 26 | struct arch_timer_kvm { |
27 | /* Is the timer enabled */ | ||
28 | bool enabled; | ||
29 | |||
30 | /* Virtual offset */ | 27 | /* Virtual offset */ |
31 | cycle_t cntvoff; | 28 | cycle_t cntvoff; |
32 | }; | 29 | }; |
@@ -53,15 +50,15 @@ struct arch_timer_cpu { | |||
53 | /* Timer IRQ */ | 50 | /* Timer IRQ */ |
54 | struct kvm_irq_level irq; | 51 | struct kvm_irq_level irq; |
55 | 52 | ||
56 | /* VGIC mapping */ | ||
57 | struct irq_phys_map *map; | ||
58 | |||
59 | /* Active IRQ state caching */ | 53 | /* Active IRQ state caching */ |
60 | bool active_cleared_last; | 54 | bool active_cleared_last; |
55 | |||
56 | /* Is the timer enabled */ | ||
57 | bool enabled; | ||
61 | }; | 58 | }; |
62 | 59 | ||
63 | int kvm_timer_hyp_init(void); | 60 | int kvm_timer_hyp_init(void); |
64 | void kvm_timer_enable(struct kvm *kvm); | 61 | int kvm_timer_enable(struct kvm_vcpu *vcpu); |
65 | void kvm_timer_init(struct kvm *kvm); | 62 | void kvm_timer_init(struct kvm *kvm); |
66 | int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, | 63 | int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, |
67 | const struct kvm_irq_level *irq); | 64 | const struct kvm_irq_level *irq); |
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index be6037aa703d..da0a524802cb 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h | |||
@@ -19,6 +19,10 @@ | |||
19 | #ifndef __ASM_ARM_KVM_VGIC_H | 19 | #ifndef __ASM_ARM_KVM_VGIC_H |
20 | #define __ASM_ARM_KVM_VGIC_H | 20 | #define __ASM_ARM_KVM_VGIC_H |
21 | 21 | ||
22 | #ifdef CONFIG_KVM_NEW_VGIC | ||
23 | #include <kvm/vgic/vgic.h> | ||
24 | #else | ||
25 | |||
22 | #include <linux/kernel.h> | 26 | #include <linux/kernel.h> |
23 | #include <linux/kvm.h> | 27 | #include <linux/kvm.h> |
24 | #include <linux/irqreturn.h> | 28 | #include <linux/irqreturn.h> |
@@ -158,7 +162,6 @@ struct vgic_io_device { | |||
158 | struct irq_phys_map { | 162 | struct irq_phys_map { |
159 | u32 virt_irq; | 163 | u32 virt_irq; |
160 | u32 phys_irq; | 164 | u32 phys_irq; |
161 | u32 irq; | ||
162 | }; | 165 | }; |
163 | 166 | ||
164 | struct irq_phys_map_entry { | 167 | struct irq_phys_map_entry { |
@@ -305,9 +308,6 @@ struct vgic_cpu { | |||
305 | unsigned long *active_shared; | 308 | unsigned long *active_shared; |
306 | unsigned long *pend_act_shared; | 309 | unsigned long *pend_act_shared; |
307 | 310 | ||
308 | /* Number of list registers on this CPU */ | ||
309 | int nr_lr; | ||
310 | |||
311 | /* CPU vif control registers for world switch */ | 311 | /* CPU vif control registers for world switch */ |
312 | union { | 312 | union { |
313 | struct vgic_v2_cpu_if vgic_v2; | 313 | struct vgic_v2_cpu_if vgic_v2; |
@@ -342,17 +342,18 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); | |||
342 | int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, | 342 | int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, |
343 | bool level); | 343 | bool level); |
344 | int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, | 344 | int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, |
345 | struct irq_phys_map *map, bool level); | 345 | unsigned int virt_irq, bool level); |
346 | void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg); | 346 | void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg); |
347 | int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); | 347 | int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); |
348 | struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, | 348 | int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, int virt_irq, int phys_irq); |
349 | int virt_irq, int irq); | 349 | int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq); |
350 | int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map); | 350 | bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq); |
351 | bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, struct irq_phys_map *map); | ||
352 | 351 | ||
353 | #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) | 352 | #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) |
354 | #define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus)) | 353 | #define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus)) |
355 | #define vgic_ready(k) ((k)->arch.vgic.ready) | 354 | #define vgic_ready(k) ((k)->arch.vgic.ready) |
355 | #define vgic_valid_spi(k, i) (((i) >= VGIC_NR_PRIVATE_IRQS) && \ | ||
356 | ((i) < (k)->arch.vgic.nr_irqs)) | ||
356 | 357 | ||
357 | int vgic_v2_probe(const struct gic_kvm_info *gic_kvm_info, | 358 | int vgic_v2_probe(const struct gic_kvm_info *gic_kvm_info, |
358 | const struct vgic_ops **ops, | 359 | const struct vgic_ops **ops, |
@@ -370,4 +371,5 @@ static inline int vgic_v3_probe(const struct gic_kvm_info *gic_kvm_info, | |||
370 | } | 371 | } |
371 | #endif | 372 | #endif |
372 | 373 | ||
374 | #endif /* old VGIC include */ | ||
373 | #endif | 375 | #endif |
diff --git a/include/kvm/vgic/vgic.h b/include/kvm/vgic/vgic.h new file mode 100644 index 000000000000..3fbd175265ae --- /dev/null +++ b/include/kvm/vgic/vgic.h | |||
@@ -0,0 +1,246 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2015, 2016 ARM Ltd. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | #ifndef __ASM_ARM_KVM_VGIC_VGIC_H | ||
17 | #define __ASM_ARM_KVM_VGIC_VGIC_H | ||
18 | |||
19 | #include <linux/kernel.h> | ||
20 | #include <linux/kvm.h> | ||
21 | #include <linux/irqreturn.h> | ||
22 | #include <linux/spinlock.h> | ||
23 | #include <linux/types.h> | ||
24 | #include <kvm/iodev.h> | ||
25 | |||
26 | #define VGIC_V3_MAX_CPUS 255 | ||
27 | #define VGIC_V2_MAX_CPUS 8 | ||
28 | #define VGIC_NR_IRQS_LEGACY 256 | ||
29 | #define VGIC_NR_SGIS 16 | ||
30 | #define VGIC_NR_PPIS 16 | ||
31 | #define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS) | ||
32 | #define VGIC_MAX_PRIVATE (VGIC_NR_PRIVATE_IRQS - 1) | ||
33 | #define VGIC_MAX_SPI 1019 | ||
34 | #define VGIC_MAX_RESERVED 1023 | ||
35 | #define VGIC_MIN_LPI 8192 | ||
36 | |||
37 | enum vgic_type { | ||
38 | VGIC_V2, /* Good ol' GICv2 */ | ||
39 | VGIC_V3, /* New fancy GICv3 */ | ||
40 | }; | ||
41 | |||
42 | /* same for all guests, as depending only on the _host's_ GIC model */ | ||
43 | struct vgic_global { | ||
44 | /* type of the host GIC */ | ||
45 | enum vgic_type type; | ||
46 | |||
47 | /* Physical address of vgic virtual cpu interface */ | ||
48 | phys_addr_t vcpu_base; | ||
49 | |||
50 | /* virtual control interface mapping */ | ||
51 | void __iomem *vctrl_base; | ||
52 | |||
53 | /* Number of implemented list registers */ | ||
54 | int nr_lr; | ||
55 | |||
56 | /* Maintenance IRQ number */ | ||
57 | unsigned int maint_irq; | ||
58 | |||
59 | /* maximum number of VCPUs allowed (GICv2 limits us to 8) */ | ||
60 | int max_gic_vcpus; | ||
61 | |||
62 | /* Only needed for the legacy KVM_CREATE_IRQCHIP */ | ||
63 | bool can_emulate_gicv2; | ||
64 | }; | ||
65 | |||
66 | extern struct vgic_global kvm_vgic_global_state; | ||
67 | |||
68 | #define VGIC_V2_MAX_LRS (1 << 6) | ||
69 | #define VGIC_V3_MAX_LRS 16 | ||
70 | #define VGIC_V3_LR_INDEX(lr) (VGIC_V3_MAX_LRS - 1 - lr) | ||
71 | |||
72 | enum vgic_irq_config { | ||
73 | VGIC_CONFIG_EDGE = 0, | ||
74 | VGIC_CONFIG_LEVEL | ||
75 | }; | ||
76 | |||
77 | struct vgic_irq { | ||
78 | spinlock_t irq_lock; /* Protects the content of the struct */ | ||
79 | struct list_head ap_list; | ||
80 | |||
81 | struct kvm_vcpu *vcpu; /* SGIs and PPIs: The VCPU | ||
82 | * SPIs and LPIs: The VCPU whose ap_list | ||
83 | * this is queued on. | ||
84 | */ | ||
85 | |||
86 | struct kvm_vcpu *target_vcpu; /* The VCPU that this interrupt should | ||
87 | * be sent to, as a result of the | ||
88 | * targets reg (v2) or the | ||
89 | * affinity reg (v3). | ||
90 | */ | ||
91 | |||
92 | u32 intid; /* Guest visible INTID */ | ||
93 | bool pending; | ||
94 | bool line_level; /* Level only */ | ||
95 | bool soft_pending; /* Level only */ | ||
96 | bool active; /* not used for LPIs */ | ||
97 | bool enabled; | ||
98 | bool hw; /* Tied to HW IRQ */ | ||
99 | u32 hwintid; /* HW INTID number */ | ||
100 | union { | ||
101 | u8 targets; /* GICv2 target VCPUs mask */ | ||
102 | u32 mpidr; /* GICv3 target VCPU */ | ||
103 | }; | ||
104 | u8 source; /* GICv2 SGIs only */ | ||
105 | u8 priority; | ||
106 | enum vgic_irq_config config; /* Level or edge */ | ||
107 | }; | ||
108 | |||
109 | struct vgic_register_region; | ||
110 | |||
111 | struct vgic_io_device { | ||
112 | gpa_t base_addr; | ||
113 | struct kvm_vcpu *redist_vcpu; | ||
114 | const struct vgic_register_region *regions; | ||
115 | int nr_regions; | ||
116 | struct kvm_io_device dev; | ||
117 | }; | ||
118 | |||
119 | struct vgic_dist { | ||
120 | bool in_kernel; | ||
121 | bool ready; | ||
122 | bool initialized; | ||
123 | |||
124 | /* vGIC model the kernel emulates for the guest (GICv2 or GICv3) */ | ||
125 | u32 vgic_model; | ||
126 | |||
127 | int nr_spis; | ||
128 | |||
129 | /* TODO: Consider moving to global state */ | ||
130 | /* Virtual control interface mapping */ | ||
131 | void __iomem *vctrl_base; | ||
132 | |||
133 | /* base addresses in guest physical address space: */ | ||
134 | gpa_t vgic_dist_base; /* distributor */ | ||
135 | union { | ||
136 | /* either a GICv2 CPU interface */ | ||
137 | gpa_t vgic_cpu_base; | ||
138 | /* or a number of GICv3 redistributor regions */ | ||
139 | gpa_t vgic_redist_base; | ||
140 | }; | ||
141 | |||
142 | /* distributor enabled */ | ||
143 | bool enabled; | ||
144 | |||
145 | struct vgic_irq *spis; | ||
146 | |||
147 | struct vgic_io_device dist_iodev; | ||
148 | struct vgic_io_device *redist_iodevs; | ||
149 | }; | ||
150 | |||
151 | struct vgic_v2_cpu_if { | ||
152 | u32 vgic_hcr; | ||
153 | u32 vgic_vmcr; | ||
154 | u32 vgic_misr; /* Saved only */ | ||
155 | u64 vgic_eisr; /* Saved only */ | ||
156 | u64 vgic_elrsr; /* Saved only */ | ||
157 | u32 vgic_apr; | ||
158 | u32 vgic_lr[VGIC_V2_MAX_LRS]; | ||
159 | }; | ||
160 | |||
161 | struct vgic_v3_cpu_if { | ||
162 | #ifdef CONFIG_KVM_ARM_VGIC_V3 | ||
163 | u32 vgic_hcr; | ||
164 | u32 vgic_vmcr; | ||
165 | u32 vgic_sre; /* Restored only, change ignored */ | ||
166 | u32 vgic_misr; /* Saved only */ | ||
167 | u32 vgic_eisr; /* Saved only */ | ||
168 | u32 vgic_elrsr; /* Saved only */ | ||
169 | u32 vgic_ap0r[4]; | ||
170 | u32 vgic_ap1r[4]; | ||
171 | u64 vgic_lr[VGIC_V3_MAX_LRS]; | ||
172 | #endif | ||
173 | }; | ||
174 | |||
175 | struct vgic_cpu { | ||
176 | /* CPU vif control registers for world switch */ | ||
177 | union { | ||
178 | struct vgic_v2_cpu_if vgic_v2; | ||
179 | struct vgic_v3_cpu_if vgic_v3; | ||
180 | }; | ||
181 | |||
182 | unsigned int used_lrs; | ||
183 | struct vgic_irq private_irqs[VGIC_NR_PRIVATE_IRQS]; | ||
184 | |||
185 | spinlock_t ap_list_lock; /* Protects the ap_list */ | ||
186 | |||
187 | /* | ||
188 | * List of IRQs that this VCPU should consider because they are either | ||
189 | * Active or Pending (hence the name; AP list), or because they recently | ||
190 | * were one of the two and need to be migrated off this list to another | ||
191 | * VCPU. | ||
192 | */ | ||
193 | struct list_head ap_list_head; | ||
194 | |||
195 | u64 live_lrs; | ||
196 | }; | ||
197 | |||
198 | int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write); | ||
199 | void kvm_vgic_early_init(struct kvm *kvm); | ||
200 | int kvm_vgic_create(struct kvm *kvm, u32 type); | ||
201 | void kvm_vgic_destroy(struct kvm *kvm); | ||
202 | void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu); | ||
203 | void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu); | ||
204 | int kvm_vgic_map_resources(struct kvm *kvm); | ||
205 | int kvm_vgic_hyp_init(void); | ||
206 | |||
207 | int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, | ||
208 | bool level); | ||
209 | int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, unsigned int intid, | ||
210 | bool level); | ||
211 | int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq); | ||
212 | int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq); | ||
213 | bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq); | ||
214 | |||
215 | int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); | ||
216 | |||
217 | #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) | ||
218 | #define vgic_initialized(k) ((k)->arch.vgic.initialized) | ||
219 | #define vgic_ready(k) ((k)->arch.vgic.ready) | ||
220 | #define vgic_valid_spi(k, i) (((i) >= VGIC_NR_PRIVATE_IRQS) && \ | ||
221 | ((i) < (k)->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)) | ||
222 | |||
223 | bool kvm_vcpu_has_pending_irqs(struct kvm_vcpu *vcpu); | ||
224 | void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); | ||
225 | void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); | ||
226 | |||
227 | #ifdef CONFIG_KVM_ARM_VGIC_V3 | ||
228 | void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg); | ||
229 | #else | ||
230 | static inline void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg) | ||
231 | { | ||
232 | } | ||
233 | #endif | ||
234 | |||
235 | /** | ||
236 | * kvm_vgic_get_max_vcpus - Get the maximum number of VCPUs allowed by HW | ||
237 | * | ||
238 | * The host's GIC naturally limits the maximum amount of VCPUs a guest | ||
239 | * can use. | ||
240 | */ | ||
241 | static inline int kvm_vgic_get_max_vcpus(void) | ||
242 | { | ||
243 | return kvm_vgic_global_state.max_gic_vcpus; | ||
244 | } | ||
245 | |||
246 | #endif /* __ASM_ARM_KVM_VGIC_VGIC_H */ | ||
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 9e6fdd33bdb2..bfbd707de390 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h | |||
@@ -273,6 +273,12 @@ | |||
273 | #define ICH_LR_ACTIVE_BIT (1ULL << 63) | 273 | #define ICH_LR_ACTIVE_BIT (1ULL << 63) |
274 | #define ICH_LR_PHYS_ID_SHIFT 32 | 274 | #define ICH_LR_PHYS_ID_SHIFT 32 |
275 | #define ICH_LR_PHYS_ID_MASK (0x3ffULL << ICH_LR_PHYS_ID_SHIFT) | 275 | #define ICH_LR_PHYS_ID_MASK (0x3ffULL << ICH_LR_PHYS_ID_SHIFT) |
276 | #define ICH_LR_PRIORITY_SHIFT 48 | ||
277 | |||
278 | /* These are for GICv2 emulation only */ | ||
279 | #define GICH_LR_VIRTUALID (0x3ffUL << 0) | ||
280 | #define GICH_LR_PHYSID_CPUID_SHIFT (10) | ||
281 | #define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT) | ||
276 | 282 | ||
277 | #define ICH_MISR_EOI (1 << 0) | 283 | #define ICH_MISR_EOI (1 << 0) |
278 | #define ICH_MISR_U (1 << 1) | 284 | #define ICH_MISR_U (1 << 1) |
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index 9c940263ca23..fd051855539b 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h | |||
@@ -33,6 +33,7 @@ | |||
33 | 33 | ||
34 | #define GIC_DIST_CTRL 0x000 | 34 | #define GIC_DIST_CTRL 0x000 |
35 | #define GIC_DIST_CTR 0x004 | 35 | #define GIC_DIST_CTR 0x004 |
36 | #define GIC_DIST_IIDR 0x008 | ||
36 | #define GIC_DIST_IGROUP 0x080 | 37 | #define GIC_DIST_IGROUP 0x080 |
37 | #define GIC_DIST_ENABLE_SET 0x100 | 38 | #define GIC_DIST_ENABLE_SET 0x100 |
38 | #define GIC_DIST_ENABLE_CLEAR 0x180 | 39 | #define GIC_DIST_ENABLE_CLEAR 0x180 |
@@ -76,6 +77,7 @@ | |||
76 | #define GICH_LR_VIRTUALID (0x3ff << 0) | 77 | #define GICH_LR_VIRTUALID (0x3ff << 0) |
77 | #define GICH_LR_PHYSID_CPUID_SHIFT (10) | 78 | #define GICH_LR_PHYSID_CPUID_SHIFT (10) |
78 | #define GICH_LR_PHYSID_CPUID (0x3ff << GICH_LR_PHYSID_CPUID_SHIFT) | 79 | #define GICH_LR_PHYSID_CPUID (0x3ff << GICH_LR_PHYSID_CPUID_SHIFT) |
80 | #define GICH_LR_PRIORITY_SHIFT 23 | ||
79 | #define GICH_LR_STATE (3 << 28) | 81 | #define GICH_LR_STATE (3 << 28) |
80 | #define GICH_LR_PENDING_BIT (1 << 28) | 82 | #define GICH_LR_PENDING_BIT (1 << 28) |
81 | #define GICH_LR_ACTIVE_BIT (1 << 29) | 83 | #define GICH_LR_ACTIVE_BIT (1 << 29) |
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b1fa8f11c95b..1c9c973a7dd9 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
@@ -412,6 +412,8 @@ struct kvm { | |||
412 | #endif | 412 | #endif |
413 | long tlbs_dirty; | 413 | long tlbs_dirty; |
414 | struct list_head devices; | 414 | struct list_head devices; |
415 | struct dentry *debugfs_dentry; | ||
416 | struct kvm_stat_data **debugfs_stat_data; | ||
415 | }; | 417 | }; |
416 | 418 | ||
417 | #define kvm_err(fmt, ...) \ | 419 | #define kvm_err(fmt, ...) \ |
@@ -991,6 +993,11 @@ enum kvm_stat_kind { | |||
991 | KVM_STAT_VCPU, | 993 | KVM_STAT_VCPU, |
992 | }; | 994 | }; |
993 | 995 | ||
996 | struct kvm_stat_data { | ||
997 | int offset; | ||
998 | struct kvm *kvm; | ||
999 | }; | ||
1000 | |||
994 | struct kvm_stats_debugfs_item { | 1001 | struct kvm_stats_debugfs_item { |
995 | const char *name; | 1002 | const char *name; |
996 | int offset; | 1003 | int offset; |
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 526fb3d2e43a..f28292d73ddb 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h | |||
@@ -108,7 +108,7 @@ TRACE_EVENT(kvm_ioapic_set_irq, | |||
108 | __entry->coalesced = coalesced; | 108 | __entry->coalesced = coalesced; |
109 | ), | 109 | ), |
110 | 110 | ||
111 | TP_printk("pin %u dst %x vec=%u (%s|%s|%s%s)%s", | 111 | TP_printk("pin %u dst %x vec %u (%s|%s|%s%s)%s", |
112 | __entry->pin, (u8)(__entry->e >> 56), (u8)__entry->e, | 112 | __entry->pin, (u8)(__entry->e >> 56), (u8)__entry->e, |
113 | __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode), | 113 | __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode), |
114 | (__entry->e & (1<<11)) ? "logical" : "physical", | 114 | (__entry->e & (1<<11)) ? "logical" : "physical", |
@@ -129,7 +129,7 @@ TRACE_EVENT(kvm_ioapic_delayed_eoi_inj, | |||
129 | __entry->e = e; | 129 | __entry->e = e; |
130 | ), | 130 | ), |
131 | 131 | ||
132 | TP_printk("dst %x vec=%u (%s|%s|%s%s)", | 132 | TP_printk("dst %x vec %u (%s|%s|%s%s)", |
133 | (u8)(__entry->e >> 56), (u8)__entry->e, | 133 | (u8)(__entry->e >> 56), (u8)__entry->e, |
134 | __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode), | 134 | __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode), |
135 | (__entry->e & (1<<11)) ? "logical" : "physical", | 135 | (__entry->e & (1<<11)) ? "logical" : "physical", |
@@ -151,7 +151,7 @@ TRACE_EVENT(kvm_msi_set_irq, | |||
151 | __entry->data = data; | 151 | __entry->data = data; |
152 | ), | 152 | ), |
153 | 153 | ||
154 | TP_printk("dst %u vec %x (%s|%s|%s%s)", | 154 | TP_printk("dst %u vec %u (%s|%s|%s%s)", |
155 | (u8)(__entry->address >> 12), (u8)__entry->data, | 155 | (u8)(__entry->address >> 12), (u8)__entry->data, |
156 | __print_symbolic((__entry->data >> 8 & 0x7), kvm_deliver_mode), | 156 | __print_symbolic((__entry->data >> 8 & 0x7), kvm_deliver_mode), |
157 | (__entry->address & (1<<2)) ? "logical" : "physical", | 157 | (__entry->address & (1<<2)) ? "logical" : "physical", |
diff --git a/tools/Makefile b/tools/Makefile index 6bf68fe7dd29..f10b64d8c674 100644 --- a/tools/Makefile +++ b/tools/Makefile | |||
@@ -16,6 +16,7 @@ help: | |||
16 | @echo ' gpio - GPIO tools' | 16 | @echo ' gpio - GPIO tools' |
17 | @echo ' hv - tools used when in Hyper-V clients' | 17 | @echo ' hv - tools used when in Hyper-V clients' |
18 | @echo ' iio - IIO tools' | 18 | @echo ' iio - IIO tools' |
19 | @echo ' kvm_stat - top-like utility for displaying kvm statistics' | ||
19 | @echo ' lguest - a minimal 32-bit x86 hypervisor' | 20 | @echo ' lguest - a minimal 32-bit x86 hypervisor' |
20 | @echo ' net - misc networking tools' | 21 | @echo ' net - misc networking tools' |
21 | @echo ' perf - Linux performance measurement and analysis tool' | 22 | @echo ' perf - Linux performance measurement and analysis tool' |
@@ -110,10 +111,13 @@ tmon_install: | |||
110 | freefall_install: | 111 | freefall_install: |
111 | $(call descend,laptop/$(@:_install=),install) | 112 | $(call descend,laptop/$(@:_install=),install) |
112 | 113 | ||
114 | kvm_stat_install: | ||
115 | $(call descend,kvm/$(@:_install=),install) | ||
116 | |||
113 | install: acpi_install cgroup_install cpupower_install hv_install firewire_install lguest_install \ | 117 | install: acpi_install cgroup_install cpupower_install hv_install firewire_install lguest_install \ |
114 | perf_install selftests_install turbostat_install usb_install \ | 118 | perf_install selftests_install turbostat_install usb_install \ |
115 | virtio_install vm_install net_install x86_energy_perf_policy_install \ | 119 | virtio_install vm_install net_install x86_energy_perf_policy_install \ |
116 | tmon_install freefall_install objtool_install | 120 | tmon_install freefall_install objtool_install kvm_stat_install |
117 | 121 | ||
118 | acpi_clean: | 122 | acpi_clean: |
119 | $(call descend,power/acpi,clean) | 123 | $(call descend,power/acpi,clean) |
diff --git a/tools/kvm/kvm_stat/Makefile b/tools/kvm/kvm_stat/Makefile new file mode 100644 index 000000000000..5b1cba57e3b3 --- /dev/null +++ b/tools/kvm/kvm_stat/Makefile | |||
@@ -0,0 +1,41 @@ | |||
1 | include ../../scripts/Makefile.include | ||
2 | include ../../scripts/utilities.mak | ||
3 | BINDIR=usr/bin | ||
4 | MANDIR=usr/share/man | ||
5 | MAN1DIR=$(MANDIR)/man1 | ||
6 | |||
7 | MAN1=kvm_stat.1 | ||
8 | |||
9 | A2X=a2x | ||
10 | a2x_path := $(call get-executable,$(A2X)) | ||
11 | |||
12 | all: man | ||
13 | |||
14 | ifneq ($(findstring $(MAKEFLAGS),s),s) | ||
15 | ifneq ($(V),1) | ||
16 | QUIET_A2X = @echo ' A2X '$@; | ||
17 | endif | ||
18 | endif | ||
19 | |||
20 | %.1: %.txt | ||
21 | ifeq ($(a2x_path),) | ||
22 | $(error "You need to install asciidoc for man pages") | ||
23 | else | ||
24 | $(QUIET_A2X)$(A2X) --doctype manpage --format manpage $< | ||
25 | endif | ||
26 | |||
27 | clean: | ||
28 | rm -f $(MAN1) | ||
29 | |||
30 | man: $(MAN1) | ||
31 | |||
32 | install-man: man | ||
33 | install -d -m 755 $(INSTALL_ROOT)/$(MAN1DIR) | ||
34 | install -m 644 kvm_stat.1 $(INSTALL_ROOT)/$(MAN1DIR) | ||
35 | |||
36 | install-tools: | ||
37 | install -d -m 755 $(INSTALL_ROOT)/$(BINDIR) | ||
38 | install -m 755 -p "kvm_stat" "$(INSTALL_ROOT)/$(BINDIR)/$(TARGET)" | ||
39 | |||
40 | install: install-tools install-man | ||
41 | .PHONY: all clean man install-tools install-man install | ||
diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat new file mode 100755 index 000000000000..581278c58488 --- /dev/null +++ b/tools/kvm/kvm_stat/kvm_stat | |||
@@ -0,0 +1,1127 @@ | |||
1 | #!/usr/bin/python | ||
2 | # | ||
3 | # top-like utility for displaying kvm statistics | ||
4 | # | ||
5 | # Copyright 2006-2008 Qumranet Technologies | ||
6 | # Copyright 2008-2011 Red Hat, Inc. | ||
7 | # | ||
8 | # Authors: | ||
9 | # Avi Kivity <avi@redhat.com> | ||
10 | # | ||
11 | # This work is licensed under the terms of the GNU GPL, version 2. See | ||
12 | # the COPYING file in the top-level directory. | ||
13 | """The kvm_stat module outputs statistics about running KVM VMs | ||
14 | |||
15 | Three different ways of output formatting are available: | ||
16 | - as a top-like text ui | ||
17 | - in a key -> value format | ||
18 | - in an all keys, all values format | ||
19 | |||
20 | The data is sampled from the KVM's debugfs entries and its perf events. | ||
21 | """ | ||
22 | |||
23 | import curses | ||
24 | import sys | ||
25 | import os | ||
26 | import time | ||
27 | import optparse | ||
28 | import ctypes | ||
29 | import fcntl | ||
30 | import resource | ||
31 | import struct | ||
32 | import re | ||
33 | from collections import defaultdict | ||
34 | from time import sleep | ||
35 | |||
36 | VMX_EXIT_REASONS = { | ||
37 | 'EXCEPTION_NMI': 0, | ||
38 | 'EXTERNAL_INTERRUPT': 1, | ||
39 | 'TRIPLE_FAULT': 2, | ||
40 | 'PENDING_INTERRUPT': 7, | ||
41 | 'NMI_WINDOW': 8, | ||
42 | 'TASK_SWITCH': 9, | ||
43 | 'CPUID': 10, | ||
44 | 'HLT': 12, | ||
45 | 'INVLPG': 14, | ||
46 | 'RDPMC': 15, | ||
47 | 'RDTSC': 16, | ||
48 | 'VMCALL': 18, | ||
49 | 'VMCLEAR': 19, | ||
50 | 'VMLAUNCH': 20, | ||
51 | 'VMPTRLD': 21, | ||
52 | 'VMPTRST': 22, | ||
53 | 'VMREAD': 23, | ||
54 | 'VMRESUME': 24, | ||
55 | 'VMWRITE': 25, | ||
56 | 'VMOFF': 26, | ||
57 | 'VMON': 27, | ||
58 | 'CR_ACCESS': 28, | ||
59 | 'DR_ACCESS': 29, | ||
60 | 'IO_INSTRUCTION': 30, | ||
61 | 'MSR_READ': 31, | ||
62 | 'MSR_WRITE': 32, | ||
63 | 'INVALID_STATE': 33, | ||
64 | 'MWAIT_INSTRUCTION': 36, | ||
65 | 'MONITOR_INSTRUCTION': 39, | ||
66 | 'PAUSE_INSTRUCTION': 40, | ||
67 | 'MCE_DURING_VMENTRY': 41, | ||
68 | 'TPR_BELOW_THRESHOLD': 43, | ||
69 | 'APIC_ACCESS': 44, | ||
70 | 'EPT_VIOLATION': 48, | ||
71 | 'EPT_MISCONFIG': 49, | ||
72 | 'WBINVD': 54, | ||
73 | 'XSETBV': 55, | ||
74 | 'APIC_WRITE': 56, | ||
75 | 'INVPCID': 58, | ||
76 | } | ||
77 | |||
78 | SVM_EXIT_REASONS = { | ||
79 | 'READ_CR0': 0x000, | ||
80 | 'READ_CR3': 0x003, | ||
81 | 'READ_CR4': 0x004, | ||
82 | 'READ_CR8': 0x008, | ||
83 | 'WRITE_CR0': 0x010, | ||
84 | 'WRITE_CR3': 0x013, | ||
85 | 'WRITE_CR4': 0x014, | ||
86 | 'WRITE_CR8': 0x018, | ||
87 | 'READ_DR0': 0x020, | ||
88 | 'READ_DR1': 0x021, | ||
89 | 'READ_DR2': 0x022, | ||
90 | 'READ_DR3': 0x023, | ||
91 | 'READ_DR4': 0x024, | ||
92 | 'READ_DR5': 0x025, | ||
93 | 'READ_DR6': 0x026, | ||
94 | 'READ_DR7': 0x027, | ||
95 | 'WRITE_DR0': 0x030, | ||
96 | 'WRITE_DR1': 0x031, | ||
97 | 'WRITE_DR2': 0x032, | ||
98 | 'WRITE_DR3': 0x033, | ||
99 | 'WRITE_DR4': 0x034, | ||
100 | 'WRITE_DR5': 0x035, | ||
101 | 'WRITE_DR6': 0x036, | ||
102 | 'WRITE_DR7': 0x037, | ||
103 | 'EXCP_BASE': 0x040, | ||
104 | 'INTR': 0x060, | ||
105 | 'NMI': 0x061, | ||
106 | 'SMI': 0x062, | ||
107 | 'INIT': 0x063, | ||
108 | 'VINTR': 0x064, | ||
109 | 'CR0_SEL_WRITE': 0x065, | ||
110 | 'IDTR_READ': 0x066, | ||
111 | 'GDTR_READ': 0x067, | ||
112 | 'LDTR_READ': 0x068, | ||
113 | 'TR_READ': 0x069, | ||
114 | 'IDTR_WRITE': 0x06a, | ||
115 | 'GDTR_WRITE': 0x06b, | ||
116 | 'LDTR_WRITE': 0x06c, | ||
117 | 'TR_WRITE': 0x06d, | ||
118 | 'RDTSC': 0x06e, | ||
119 | 'RDPMC': 0x06f, | ||
120 | 'PUSHF': 0x070, | ||
121 | 'POPF': 0x071, | ||
122 | 'CPUID': 0x072, | ||
123 | 'RSM': 0x073, | ||
124 | 'IRET': 0x074, | ||
125 | 'SWINT': 0x075, | ||
126 | 'INVD': 0x076, | ||
127 | 'PAUSE': 0x077, | ||
128 | 'HLT': 0x078, | ||
129 | 'INVLPG': 0x079, | ||
130 | 'INVLPGA': 0x07a, | ||
131 | 'IOIO': 0x07b, | ||
132 | 'MSR': 0x07c, | ||
133 | 'TASK_SWITCH': 0x07d, | ||
134 | 'FERR_FREEZE': 0x07e, | ||
135 | 'SHUTDOWN': 0x07f, | ||
136 | 'VMRUN': 0x080, | ||
137 | 'VMMCALL': 0x081, | ||
138 | 'VMLOAD': 0x082, | ||
139 | 'VMSAVE': 0x083, | ||
140 | 'STGI': 0x084, | ||
141 | 'CLGI': 0x085, | ||
142 | 'SKINIT': 0x086, | ||
143 | 'RDTSCP': 0x087, | ||
144 | 'ICEBP': 0x088, | ||
145 | 'WBINVD': 0x089, | ||
146 | 'MONITOR': 0x08a, | ||
147 | 'MWAIT': 0x08b, | ||
148 | 'MWAIT_COND': 0x08c, | ||
149 | 'XSETBV': 0x08d, | ||
150 | 'NPF': 0x400, | ||
151 | } | ||
152 | |||
153 | # EC definition of HSR (from arch/arm64/include/asm/kvm_arm.h) | ||
154 | AARCH64_EXIT_REASONS = { | ||
155 | 'UNKNOWN': 0x00, | ||
156 | 'WFI': 0x01, | ||
157 | 'CP15_32': 0x03, | ||
158 | 'CP15_64': 0x04, | ||
159 | 'CP14_MR': 0x05, | ||
160 | 'CP14_LS': 0x06, | ||
161 | 'FP_ASIMD': 0x07, | ||
162 | 'CP10_ID': 0x08, | ||
163 | 'CP14_64': 0x0C, | ||
164 | 'ILL_ISS': 0x0E, | ||
165 | 'SVC32': 0x11, | ||
166 | 'HVC32': 0x12, | ||
167 | 'SMC32': 0x13, | ||
168 | 'SVC64': 0x15, | ||
169 | 'HVC64': 0x16, | ||
170 | 'SMC64': 0x17, | ||
171 | 'SYS64': 0x18, | ||
172 | 'IABT': 0x20, | ||
173 | 'IABT_HYP': 0x21, | ||
174 | 'PC_ALIGN': 0x22, | ||
175 | 'DABT': 0x24, | ||
176 | 'DABT_HYP': 0x25, | ||
177 | 'SP_ALIGN': 0x26, | ||
178 | 'FP_EXC32': 0x28, | ||
179 | 'FP_EXC64': 0x2C, | ||
180 | 'SERROR': 0x2F, | ||
181 | 'BREAKPT': 0x30, | ||
182 | 'BREAKPT_HYP': 0x31, | ||
183 | 'SOFTSTP': 0x32, | ||
184 | 'SOFTSTP_HYP': 0x33, | ||
185 | 'WATCHPT': 0x34, | ||
186 | 'WATCHPT_HYP': 0x35, | ||
187 | 'BKPT32': 0x38, | ||
188 | 'VECTOR32': 0x3A, | ||
189 | 'BRK64': 0x3C, | ||
190 | } | ||
191 | |||
192 | # From include/uapi/linux/kvm.h, KVM_EXIT_xxx | ||
193 | USERSPACE_EXIT_REASONS = { | ||
194 | 'UNKNOWN': 0, | ||
195 | 'EXCEPTION': 1, | ||
196 | 'IO': 2, | ||
197 | 'HYPERCALL': 3, | ||
198 | 'DEBUG': 4, | ||
199 | 'HLT': 5, | ||
200 | 'MMIO': 6, | ||
201 | 'IRQ_WINDOW_OPEN': 7, | ||
202 | 'SHUTDOWN': 8, | ||
203 | 'FAIL_ENTRY': 9, | ||
204 | 'INTR': 10, | ||
205 | 'SET_TPR': 11, | ||
206 | 'TPR_ACCESS': 12, | ||
207 | 'S390_SIEIC': 13, | ||
208 | 'S390_RESET': 14, | ||
209 | 'DCR': 15, | ||
210 | 'NMI': 16, | ||
211 | 'INTERNAL_ERROR': 17, | ||
212 | 'OSI': 18, | ||
213 | 'PAPR_HCALL': 19, | ||
214 | 'S390_UCONTROL': 20, | ||
215 | 'WATCHDOG': 21, | ||
216 | 'S390_TSCH': 22, | ||
217 | 'EPR': 23, | ||
218 | 'SYSTEM_EVENT': 24, | ||
219 | } | ||
220 | |||
221 | IOCTL_NUMBERS = { | ||
222 | 'SET_FILTER': 0x40082406, | ||
223 | 'ENABLE': 0x00002400, | ||
224 | 'DISABLE': 0x00002401, | ||
225 | 'RESET': 0x00002403, | ||
226 | } | ||
227 | |||
228 | class Arch(object): | ||
229 | """Encapsulates global architecture specific data. | ||
230 | |||
231 | Contains the performance event open syscall and ioctl numbers, as | ||
232 | well as the VM exit reasons for the architecture it runs on. | ||
233 | |||
234 | """ | ||
235 | @staticmethod | ||
236 | def get_arch(): | ||
237 | machine = os.uname()[4] | ||
238 | |||
239 | if machine.startswith('ppc'): | ||
240 | return ArchPPC() | ||
241 | elif machine.startswith('aarch64'): | ||
242 | return ArchA64() | ||
243 | elif machine.startswith('s390'): | ||
244 | return ArchS390() | ||
245 | else: | ||
246 | # X86_64 | ||
247 | for line in open('/proc/cpuinfo'): | ||
248 | if not line.startswith('flags'): | ||
249 | continue | ||
250 | |||
251 | flags = line.split() | ||
252 | if 'vmx' in flags: | ||
253 | return ArchX86(VMX_EXIT_REASONS) | ||
254 | if 'svm' in flags: | ||
255 | return ArchX86(SVM_EXIT_REASONS) | ||
256 | return | ||
257 | |||
258 | class ArchX86(Arch): | ||
259 | def __init__(self, exit_reasons): | ||
260 | self.sc_perf_evt_open = 298 | ||
261 | self.ioctl_numbers = IOCTL_NUMBERS | ||
262 | self.exit_reasons = exit_reasons | ||
263 | |||
264 | class ArchPPC(Arch): | ||
265 | def __init__(self): | ||
266 | self.sc_perf_evt_open = 319 | ||
267 | self.ioctl_numbers = IOCTL_NUMBERS | ||
268 | self.ioctl_numbers['ENABLE'] = 0x20002400 | ||
269 | self.ioctl_numbers['DISABLE'] = 0x20002401 | ||
270 | self.ioctl_numbers['RESET'] = 0x20002403 | ||
271 | |||
272 | # PPC comes in 32 and 64 bit and some generated ioctl | ||
273 | # numbers depend on the wordsize. | ||
274 | char_ptr_size = ctypes.sizeof(ctypes.c_char_p) | ||
275 | self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16 | ||
276 | self.exit_reasons = {} | ||
277 | |||
278 | class ArchA64(Arch): | ||
279 | def __init__(self): | ||
280 | self.sc_perf_evt_open = 241 | ||
281 | self.ioctl_numbers = IOCTL_NUMBERS | ||
282 | self.exit_reasons = AARCH64_EXIT_REASONS | ||
283 | |||
284 | class ArchS390(Arch): | ||
285 | def __init__(self): | ||
286 | self.sc_perf_evt_open = 331 | ||
287 | self.ioctl_numbers = IOCTL_NUMBERS | ||
288 | self.exit_reasons = None | ||
289 | |||
290 | ARCH = Arch.get_arch() | ||
291 | |||
292 | |||
293 | def walkdir(path): | ||
294 | """Returns os.walk() data for specified directory. | ||
295 | |||
296 | As it is only a wrapper it returns the same 3-tuple of (dirpath, | ||
297 | dirnames, filenames). | ||
298 | """ | ||
299 | return next(os.walk(path)) | ||
300 | |||
301 | |||
302 | def parse_int_list(list_string): | ||
303 | """Returns an int list from a string of comma separated integers and | ||
304 | integer ranges.""" | ||
305 | integers = [] | ||
306 | members = list_string.split(',') | ||
307 | |||
308 | for member in members: | ||
309 | if '-' not in member: | ||
310 | integers.append(int(member)) | ||
311 | else: | ||
312 | int_range = member.split('-') | ||
313 | integers.extend(range(int(int_range[0]), | ||
314 | int(int_range[1]) + 1)) | ||
315 | |||
316 | return integers | ||
317 | |||
318 | |||
319 | def get_online_cpus(): | ||
320 | """Returns a list of cpu id integers.""" | ||
321 | with open('/sys/devices/system/cpu/online') as cpu_list: | ||
322 | cpu_string = cpu_list.readline() | ||
323 | return parse_int_list(cpu_string) | ||
324 | |||
325 | |||
326 | def get_filters(): | ||
327 | """Returns a dict of trace events, their filter ids and | ||
328 | the values that can be filtered. | ||
329 | |||
330 | Trace events can be filtered for special values by setting a | ||
331 | filter string via an ioctl. The string normally has the format | ||
332 | identifier==value. For each filter a new event will be created, to | ||
333 | be able to distinguish the events. | ||
334 | |||
335 | """ | ||
336 | filters = {} | ||
337 | filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS) | ||
338 | if ARCH.exit_reasons: | ||
339 | filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons) | ||
340 | return filters | ||
341 | |||
342 | libc = ctypes.CDLL('libc.so.6', use_errno=True) | ||
343 | syscall = libc.syscall | ||
344 | |||
345 | class perf_event_attr(ctypes.Structure): | ||
346 | """Struct that holds the necessary data to set up a trace event. | ||
347 | |||
348 | For an extensive explanation see perf_event_open(2) and | ||
349 | include/uapi/linux/perf_event.h, struct perf_event_attr | ||
350 | |||
351 | All fields that are not initialized in the constructor are 0. | ||
352 | |||
353 | """ | ||
354 | _fields_ = [('type', ctypes.c_uint32), | ||
355 | ('size', ctypes.c_uint32), | ||
356 | ('config', ctypes.c_uint64), | ||
357 | ('sample_freq', ctypes.c_uint64), | ||
358 | ('sample_type', ctypes.c_uint64), | ||
359 | ('read_format', ctypes.c_uint64), | ||
360 | ('flags', ctypes.c_uint64), | ||
361 | ('wakeup_events', ctypes.c_uint32), | ||
362 | ('bp_type', ctypes.c_uint32), | ||
363 | ('bp_addr', ctypes.c_uint64), | ||
364 | ('bp_len', ctypes.c_uint64), | ||
365 | ] | ||
366 | |||
367 | def __init__(self): | ||
368 | super(self.__class__, self).__init__() | ||
369 | self.type = PERF_TYPE_TRACEPOINT | ||
370 | self.size = ctypes.sizeof(self) | ||
371 | self.read_format = PERF_FORMAT_GROUP | ||
372 | |||
373 | def perf_event_open(attr, pid, cpu, group_fd, flags): | ||
374 | """Wrapper for the sys_perf_evt_open() syscall. | ||
375 | |||
376 | Used to set up performance events, returns a file descriptor or -1 | ||
377 | on error. | ||
378 | |||
379 | Attributes are: | ||
380 | - syscall number | ||
381 | - struct perf_event_attr * | ||
382 | - pid or -1 to monitor all pids | ||
383 | - cpu number or -1 to monitor all cpus | ||
384 | - The file descriptor of the group leader or -1 to create a group. | ||
385 | - flags | ||
386 | |||
387 | """ | ||
388 | return syscall(ARCH.sc_perf_evt_open, ctypes.pointer(attr), | ||
389 | ctypes.c_int(pid), ctypes.c_int(cpu), | ||
390 | ctypes.c_int(group_fd), ctypes.c_long(flags)) | ||
391 | |||
392 | PERF_TYPE_TRACEPOINT = 2 | ||
393 | PERF_FORMAT_GROUP = 1 << 3 | ||
394 | |||
395 | PATH_DEBUGFS_TRACING = '/sys/kernel/debug/tracing' | ||
396 | PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm' | ||
397 | |||
398 | class Group(object): | ||
399 | """Represents a perf event group.""" | ||
400 | |||
401 | def __init__(self): | ||
402 | self.events = [] | ||
403 | |||
404 | def add_event(self, event): | ||
405 | self.events.append(event) | ||
406 | |||
407 | def read(self): | ||
408 | """Returns a dict with 'event name: value' for all events in the | ||
409 | group. | ||
410 | |||
411 | Values are read by reading from the file descriptor of the | ||
412 | event that is the group leader. See perf_event_open(2) for | ||
413 | details. | ||
414 | |||
415 | Read format for the used event configuration is: | ||
416 | struct read_format { | ||
417 | u64 nr; /* The number of events */ | ||
418 | struct { | ||
419 | u64 value; /* The value of the event */ | ||
420 | } values[nr]; | ||
421 | }; | ||
422 | |||
423 | """ | ||
424 | length = 8 * (1 + len(self.events)) | ||
425 | read_format = 'xxxxxxxx' + 'Q' * len(self.events) | ||
426 | return dict(zip([event.name for event in self.events], | ||
427 | struct.unpack(read_format, | ||
428 | os.read(self.events[0].fd, length)))) | ||
429 | |||
430 | class Event(object): | ||
431 | """Represents a performance event and manages its life cycle.""" | ||
432 | def __init__(self, name, group, trace_cpu, trace_pid, trace_point, | ||
433 | trace_filter, trace_set='kvm'): | ||
434 | self.name = name | ||
435 | self.fd = None | ||
436 | self.setup_event(group, trace_cpu, trace_pid, trace_point, | ||
437 | trace_filter, trace_set) | ||
438 | |||
439 | def __del__(self): | ||
440 | """Closes the event's file descriptor. | ||
441 | |||
442 | As no python file object was created for the file descriptor, | ||
443 | python will not reference count the descriptor and will not | ||
444 | close it itself automatically, so we do it. | ||
445 | |||
446 | """ | ||
447 | if self.fd: | ||
448 | os.close(self.fd) | ||
449 | |||
450 | def setup_event_attribute(self, trace_set, trace_point): | ||
451 | """Returns an initialized ctype perf_event_attr struct.""" | ||
452 | |||
453 | id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set, | ||
454 | trace_point, 'id') | ||
455 | |||
456 | event_attr = perf_event_attr() | ||
457 | event_attr.config = int(open(id_path).read()) | ||
458 | return event_attr | ||
459 | |||
460 | def setup_event(self, group, trace_cpu, trace_pid, trace_point, | ||
461 | trace_filter, trace_set): | ||
462 | """Sets up the perf event in Linux. | ||
463 | |||
464 | Issues the syscall to register the event in the kernel and | ||
465 | then sets the optional filter. | ||
466 | |||
467 | """ | ||
468 | |||
469 | event_attr = self.setup_event_attribute(trace_set, trace_point) | ||
470 | |||
471 | # First event will be group leader. | ||
472 | group_leader = -1 | ||
473 | |||
474 | # All others have to pass the leader's descriptor instead. | ||
475 | if group.events: | ||
476 | group_leader = group.events[0].fd | ||
477 | |||
478 | fd = perf_event_open(event_attr, trace_pid, | ||
479 | trace_cpu, group_leader, 0) | ||
480 | if fd == -1: | ||
481 | err = ctypes.get_errno() | ||
482 | raise OSError(err, os.strerror(err), | ||
483 | 'while calling sys_perf_event_open().') | ||
484 | |||
485 | if trace_filter: | ||
486 | fcntl.ioctl(fd, ARCH.ioctl_numbers['SET_FILTER'], | ||
487 | trace_filter) | ||
488 | |||
489 | self.fd = fd | ||
490 | |||
491 | def enable(self): | ||
492 | """Enables the trace event in the kernel. | ||
493 | |||
494 | Enabling the group leader makes reading counters from it and the | ||
495 | events under it possible. | ||
496 | |||
497 | """ | ||
498 | fcntl.ioctl(self.fd, ARCH.ioctl_numbers['ENABLE'], 0) | ||
499 | |||
500 | def disable(self): | ||
501 | """Disables the trace event in the kernel. | ||
502 | |||
503 | Disabling the group leader makes reading all counters under it | ||
504 | impossible. | ||
505 | |||
506 | """ | ||
507 | fcntl.ioctl(self.fd, ARCH.ioctl_numbers['DISABLE'], 0) | ||
508 | |||
509 | def reset(self): | ||
510 | """Resets the count of the trace event in the kernel.""" | ||
511 | fcntl.ioctl(self.fd, ARCH.ioctl_numbers['RESET'], 0) | ||
512 | |||
513 | class TracepointProvider(object): | ||
514 | """Data provider for the stats class. | ||
515 | |||
516 | Manages the events/groups from which it acquires its data. | ||
517 | |||
518 | """ | ||
519 | def __init__(self): | ||
520 | self.group_leaders = [] | ||
521 | self.filters = get_filters() | ||
522 | self._fields = self.get_available_fields() | ||
523 | self._pid = 0 | ||
524 | |||
525 | def get_available_fields(self): | ||
526 | """Returns a list of available event's of format 'event name(filter | ||
527 | name)'. | ||
528 | |||
529 | All available events have directories under | ||
530 | /sys/kernel/debug/tracing/events/ which export information | ||
531 | about the specific event. Therefore, listing the dirs gives us | ||
532 | a list of all available events. | ||
533 | |||
534 | Some events like the vm exit reasons can be filtered for | ||
535 | specific values. To take account for that, the routine below | ||
536 | creates special fields with the following format: | ||
537 | event name(filter name) | ||
538 | |||
539 | """ | ||
540 | path = os.path.join(PATH_DEBUGFS_TRACING, 'events', 'kvm') | ||
541 | fields = walkdir(path)[1] | ||
542 | extra = [] | ||
543 | for field in fields: | ||
544 | if field in self.filters: | ||
545 | filter_name_, filter_dicts = self.filters[field] | ||
546 | for name in filter_dicts: | ||
547 | extra.append(field + '(' + name + ')') | ||
548 | fields += extra | ||
549 | return fields | ||
550 | |||
551 | def setup_traces(self): | ||
552 | """Creates all event and group objects needed to be able to retrieve | ||
553 | data.""" | ||
554 | if self._pid > 0: | ||
555 | # Fetch list of all threads of the monitored pid, as qemu | ||
556 | # starts a thread for each vcpu. | ||
557 | path = os.path.join('/proc', str(self._pid), 'task') | ||
558 | groupids = walkdir(path)[1] | ||
559 | else: | ||
560 | groupids = get_online_cpus() | ||
561 | |||
562 | # The constant is needed as a buffer for python libs, std | ||
563 | # streams and other files that the script opens. | ||
564 | newlim = len(groupids) * len(self._fields) + 50 | ||
565 | try: | ||
566 | softlim_, hardlim = resource.getrlimit(resource.RLIMIT_NOFILE) | ||
567 | |||
568 | if hardlim < newlim: | ||
569 | # Now we need CAP_SYS_RESOURCE, to increase the hard limit. | ||
570 | resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, newlim)) | ||
571 | else: | ||
572 | # Raising the soft limit is sufficient. | ||
573 | resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, hardlim)) | ||
574 | |||
575 | except ValueError: | ||
576 | sys.exit("NOFILE rlimit could not be raised to {0}".format(newlim)) | ||
577 | |||
578 | for groupid in groupids: | ||
579 | group = Group() | ||
580 | for name in self._fields: | ||
581 | tracepoint = name | ||
582 | tracefilter = None | ||
583 | match = re.match(r'(.*)\((.*)\)', name) | ||
584 | if match: | ||
585 | tracepoint, sub = match.groups() | ||
586 | tracefilter = ('%s==%d\0' % | ||
587 | (self.filters[tracepoint][0], | ||
588 | self.filters[tracepoint][1][sub])) | ||
589 | |||
590 | # From perf_event_open(2): | ||
591 | # pid > 0 and cpu == -1 | ||
592 | # This measures the specified process/thread on any CPU. | ||
593 | # | ||
594 | # pid == -1 and cpu >= 0 | ||
595 | # This measures all processes/threads on the specified CPU. | ||
596 | trace_cpu = groupid if self._pid == 0 else -1 | ||
597 | trace_pid = int(groupid) if self._pid != 0 else -1 | ||
598 | |||
599 | group.add_event(Event(name=name, | ||
600 | group=group, | ||
601 | trace_cpu=trace_cpu, | ||
602 | trace_pid=trace_pid, | ||
603 | trace_point=tracepoint, | ||
604 | trace_filter=tracefilter)) | ||
605 | |||
606 | self.group_leaders.append(group) | ||
607 | |||
608 | def available_fields(self): | ||
609 | return self.get_available_fields() | ||
610 | |||
611 | @property | ||
612 | def fields(self): | ||
613 | return self._fields | ||
614 | |||
615 | @fields.setter | ||
616 | def fields(self, fields): | ||
617 | """Enables/disables the (un)wanted events""" | ||
618 | self._fields = fields | ||
619 | for group in self.group_leaders: | ||
620 | for index, event in enumerate(group.events): | ||
621 | if event.name in fields: | ||
622 | event.reset() | ||
623 | event.enable() | ||
624 | else: | ||
625 | # Do not disable the group leader. | ||
626 | # It would disable all of its events. | ||
627 | if index != 0: | ||
628 | event.disable() | ||
629 | |||
630 | @property | ||
631 | def pid(self): | ||
632 | return self._pid | ||
633 | |||
634 | @pid.setter | ||
635 | def pid(self, pid): | ||
636 | """Changes the monitored pid by setting new traces.""" | ||
637 | self._pid = pid | ||
638 | # The garbage collector will get rid of all Event/Group | ||
639 | # objects and open files after removing the references. | ||
640 | self.group_leaders = [] | ||
641 | self.setup_traces() | ||
642 | self.fields = self._fields | ||
643 | |||
644 | def read(self): | ||
645 | """Returns 'event name: current value' for all enabled events.""" | ||
646 | ret = defaultdict(int) | ||
647 | for group in self.group_leaders: | ||
648 | for name, val in group.read().iteritems(): | ||
649 | if name in self._fields: | ||
650 | ret[name] += val | ||
651 | return ret | ||
652 | |||
653 | class DebugfsProvider(object): | ||
654 | """Provides data from the files that KVM creates in the kvm debugfs | ||
655 | folder.""" | ||
656 | def __init__(self): | ||
657 | self._fields = self.get_available_fields() | ||
658 | self._pid = 0 | ||
659 | self.do_read = True | ||
660 | |||
661 | def get_available_fields(self): | ||
662 | """"Returns a list of available fields. | ||
663 | |||
664 | The fields are all available KVM debugfs files | ||
665 | |||
666 | """ | ||
667 | return walkdir(PATH_DEBUGFS_KVM)[2] | ||
668 | |||
669 | @property | ||
670 | def fields(self): | ||
671 | return self._fields | ||
672 | |||
673 | @fields.setter | ||
674 | def fields(self, fields): | ||
675 | self._fields = fields | ||
676 | |||
677 | @property | ||
678 | def pid(self): | ||
679 | return self._pid | ||
680 | |||
681 | @pid.setter | ||
682 | def pid(self, pid): | ||
683 | if pid != 0: | ||
684 | self._pid = pid | ||
685 | |||
686 | vms = walkdir(PATH_DEBUGFS_KVM)[1] | ||
687 | if len(vms) == 0: | ||
688 | self.do_read = False | ||
689 | |||
690 | self.paths = filter(lambda x: "{}-".format(pid) in x, vms) | ||
691 | |||
692 | else: | ||
693 | self.paths = [''] | ||
694 | self.do_read = True | ||
695 | |||
696 | def read(self): | ||
697 | """Returns a dict with format:'file name / field -> current value'.""" | ||
698 | results = {} | ||
699 | |||
700 | # If no debugfs filtering support is available, then don't read. | ||
701 | if not self.do_read: | ||
702 | return results | ||
703 | |||
704 | for path in self.paths: | ||
705 | for field in self._fields: | ||
706 | results[field] = results.get(field, 0) \ | ||
707 | + self.read_field(field, path) | ||
708 | |||
709 | return results | ||
710 | |||
711 | def read_field(self, field, path): | ||
712 | """Returns the value of a single field from a specific VM.""" | ||
713 | try: | ||
714 | return int(open(os.path.join(PATH_DEBUGFS_KVM, | ||
715 | path, | ||
716 | field)) | ||
717 | .read()) | ||
718 | except IOError: | ||
719 | return 0 | ||
720 | |||
721 | class Stats(object): | ||
722 | """Manages the data providers and the data they provide. | ||
723 | |||
724 | It is used to set filters on the provider's data and collect all | ||
725 | provider data. | ||
726 | |||
727 | """ | ||
728 | def __init__(self, providers, pid, fields=None): | ||
729 | self.providers = providers | ||
730 | self._pid_filter = pid | ||
731 | self._fields_filter = fields | ||
732 | self.values = {} | ||
733 | self.update_provider_pid() | ||
734 | self.update_provider_filters() | ||
735 | |||
736 | def update_provider_filters(self): | ||
737 | """Propagates fields filters to providers.""" | ||
738 | def wanted(key): | ||
739 | if not self._fields_filter: | ||
740 | return True | ||
741 | return re.match(self._fields_filter, key) is not None | ||
742 | |||
743 | # As we reset the counters when updating the fields we can | ||
744 | # also clear the cache of old values. | ||
745 | self.values = {} | ||
746 | for provider in self.providers: | ||
747 | provider_fields = [key for key in provider.get_available_fields() | ||
748 | if wanted(key)] | ||
749 | provider.fields = provider_fields | ||
750 | |||
751 | def update_provider_pid(self): | ||
752 | """Propagates pid filters to providers.""" | ||
753 | for provider in self.providers: | ||
754 | provider.pid = self._pid_filter | ||
755 | |||
756 | @property | ||
757 | def fields_filter(self): | ||
758 | return self._fields_filter | ||
759 | |||
760 | @fields_filter.setter | ||
761 | def fields_filter(self, fields_filter): | ||
762 | self._fields_filter = fields_filter | ||
763 | self.update_provider_filters() | ||
764 | |||
765 | @property | ||
766 | def pid_filter(self): | ||
767 | return self._pid_filter | ||
768 | |||
769 | @pid_filter.setter | ||
770 | def pid_filter(self, pid): | ||
771 | self._pid_filter = pid | ||
772 | self.values = {} | ||
773 | self.update_provider_pid() | ||
774 | |||
775 | def get(self): | ||
776 | """Returns a dict with field -> (value, delta to last value) of all | ||
777 | provider data.""" | ||
778 | for provider in self.providers: | ||
779 | new = provider.read() | ||
780 | for key in provider.fields: | ||
781 | oldval = self.values.get(key, (0, 0)) | ||
782 | newval = new.get(key, 0) | ||
783 | newdelta = None | ||
784 | if oldval is not None: | ||
785 | newdelta = newval - oldval[0] | ||
786 | self.values[key] = (newval, newdelta) | ||
787 | return self.values | ||
788 | |||
789 | LABEL_WIDTH = 40 | ||
790 | NUMBER_WIDTH = 10 | ||
791 | |||
792 | class Tui(object): | ||
793 | """Instruments curses to draw a nice text ui.""" | ||
794 | def __init__(self, stats): | ||
795 | self.stats = stats | ||
796 | self.screen = None | ||
797 | self.drilldown = False | ||
798 | self.update_drilldown() | ||
799 | |||
800 | def __enter__(self): | ||
801 | """Initialises curses for later use. Based on curses.wrapper | ||
802 | implementation from the Python standard library.""" | ||
803 | self.screen = curses.initscr() | ||
804 | curses.noecho() | ||
805 | curses.cbreak() | ||
806 | |||
807 | # The try/catch works around a minor bit of | ||
808 | # over-conscientiousness in the curses module, the error | ||
809 | # return from C start_color() is ignorable. | ||
810 | try: | ||
811 | curses.start_color() | ||
812 | except: | ||
813 | pass | ||
814 | |||
815 | curses.use_default_colors() | ||
816 | return self | ||
817 | |||
818 | def __exit__(self, *exception): | ||
819 | """Resets the terminal to its normal state. Based on curses.wrappre | ||
820 | implementation from the Python standard library.""" | ||
821 | if self.screen: | ||
822 | self.screen.keypad(0) | ||
823 | curses.echo() | ||
824 | curses.nocbreak() | ||
825 | curses.endwin() | ||
826 | |||
827 | def update_drilldown(self): | ||
828 | """Sets or removes a filter that only allows fields without braces.""" | ||
829 | if not self.stats.fields_filter: | ||
830 | self.stats.fields_filter = r'^[^\(]*$' | ||
831 | |||
832 | elif self.stats.fields_filter == r'^[^\(]*$': | ||
833 | self.stats.fields_filter = None | ||
834 | |||
835 | def update_pid(self, pid): | ||
836 | """Propagates pid selection to stats object.""" | ||
837 | self.stats.pid_filter = pid | ||
838 | |||
839 | def refresh(self, sleeptime): | ||
840 | """Refreshes on-screen data.""" | ||
841 | self.screen.erase() | ||
842 | if self.stats.pid_filter > 0: | ||
843 | self.screen.addstr(0, 0, 'kvm statistics - pid {0}' | ||
844 | .format(self.stats.pid_filter), | ||
845 | curses.A_BOLD) | ||
846 | else: | ||
847 | self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD) | ||
848 | self.screen.addstr(2, 1, 'Event') | ||
849 | self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH - | ||
850 | len('Total'), 'Total') | ||
851 | self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH + 8 - | ||
852 | len('Current'), 'Current') | ||
853 | row = 3 | ||
854 | stats = self.stats.get() | ||
855 | def sortkey(x): | ||
856 | if stats[x][1]: | ||
857 | return (-stats[x][1], -stats[x][0]) | ||
858 | else: | ||
859 | return (0, -stats[x][0]) | ||
860 | for key in sorted(stats.keys(), key=sortkey): | ||
861 | |||
862 | if row >= self.screen.getmaxyx()[0]: | ||
863 | break | ||
864 | values = stats[key] | ||
865 | if not values[0] and not values[1]: | ||
866 | break | ||
867 | col = 1 | ||
868 | self.screen.addstr(row, col, key) | ||
869 | col += LABEL_WIDTH | ||
870 | self.screen.addstr(row, col, '%10d' % (values[0],)) | ||
871 | col += NUMBER_WIDTH | ||
872 | if values[1] is not None: | ||
873 | self.screen.addstr(row, col, '%8d' % (values[1] / sleeptime,)) | ||
874 | row += 1 | ||
875 | self.screen.refresh() | ||
876 | |||
877 | def show_filter_selection(self): | ||
878 | """Draws filter selection mask. | ||
879 | |||
880 | Asks for a valid regex and sets the fields filter accordingly. | ||
881 | |||
882 | """ | ||
883 | while True: | ||
884 | self.screen.erase() | ||
885 | self.screen.addstr(0, 0, | ||
886 | "Show statistics for events matching a regex.", | ||
887 | curses.A_BOLD) | ||
888 | self.screen.addstr(2, 0, | ||
889 | "Current regex: {0}" | ||
890 | .format(self.stats.fields_filter)) | ||
891 | self.screen.addstr(3, 0, "New regex: ") | ||
892 | curses.echo() | ||
893 | regex = self.screen.getstr() | ||
894 | curses.noecho() | ||
895 | if len(regex) == 0: | ||
896 | return | ||
897 | try: | ||
898 | re.compile(regex) | ||
899 | self.stats.fields_filter = regex | ||
900 | return | ||
901 | except re.error: | ||
902 | continue | ||
903 | |||
904 | def show_vm_selection(self): | ||
905 | """Draws PID selection mask. | ||
906 | |||
907 | Asks for a pid until a valid pid or 0 has been entered. | ||
908 | |||
909 | """ | ||
910 | while True: | ||
911 | self.screen.erase() | ||
912 | self.screen.addstr(0, 0, | ||
913 | 'Show statistics for specific pid.', | ||
914 | curses.A_BOLD) | ||
915 | self.screen.addstr(1, 0, | ||
916 | 'This might limit the shown data to the trace ' | ||
917 | 'statistics.') | ||
918 | |||
919 | curses.echo() | ||
920 | self.screen.addstr(3, 0, "Pid [0 or pid]: ") | ||
921 | pid = self.screen.getstr() | ||
922 | curses.noecho() | ||
923 | |||
924 | try: | ||
925 | pid = int(pid) | ||
926 | |||
927 | if pid == 0: | ||
928 | self.update_pid(pid) | ||
929 | break | ||
930 | else: | ||
931 | if not os.path.isdir(os.path.join('/proc/', str(pid))): | ||
932 | continue | ||
933 | else: | ||
934 | self.update_pid(pid) | ||
935 | break | ||
936 | |||
937 | except ValueError: | ||
938 | continue | ||
939 | |||
940 | def show_stats(self): | ||
941 | """Refreshes the screen and processes user input.""" | ||
942 | sleeptime = 0.25 | ||
943 | while True: | ||
944 | self.refresh(sleeptime) | ||
945 | curses.halfdelay(int(sleeptime * 10)) | ||
946 | sleeptime = 3 | ||
947 | try: | ||
948 | char = self.screen.getkey() | ||
949 | if char == 'x': | ||
950 | self.drilldown = not self.drilldown | ||
951 | self.update_drilldown() | ||
952 | if char == 'q': | ||
953 | break | ||
954 | if char == 'f': | ||
955 | self.show_filter_selection() | ||
956 | if char == 'p': | ||
957 | self.show_vm_selection() | ||
958 | except KeyboardInterrupt: | ||
959 | break | ||
960 | except curses.error: | ||
961 | continue | ||
962 | |||
963 | def batch(stats): | ||
964 | """Prints statistics in a key, value format.""" | ||
965 | s = stats.get() | ||
966 | time.sleep(1) | ||
967 | s = stats.get() | ||
968 | for key in sorted(s.keys()): | ||
969 | values = s[key] | ||
970 | print '%-42s%10d%10d' % (key, values[0], values[1]) | ||
971 | |||
972 | def log(stats): | ||
973 | """Prints statistics as reiterating key block, multiple value blocks.""" | ||
974 | keys = sorted(stats.get().iterkeys()) | ||
975 | def banner(): | ||
976 | for k in keys: | ||
977 | print '%s' % k, | ||
978 | |||
979 | def statline(): | ||
980 | s = stats.get() | ||
981 | for k in keys: | ||
982 | print ' %9d' % s[k][1], | ||
983 | |||
984 | line = 0 | ||
985 | banner_repeat = 20 | ||
986 | while True: | ||
987 | time.sleep(1) | ||
988 | if line % banner_repeat == 0: | ||
989 | banner() | ||
990 | statline() | ||
991 | line += 1 | ||
992 | |||
993 | def get_options(): | ||
994 | """Returns processed program arguments.""" | ||
995 | description_text = """ | ||
996 | This script displays various statistics about VMs running under KVM. | ||
997 | The statistics are gathered from the KVM debugfs entries and / or the | ||
998 | currently available perf traces. | ||
999 | |||
1000 | The monitoring takes additional cpu cycles and might affect the VM's | ||
1001 | performance. | ||
1002 | |||
1003 | Requirements: | ||
1004 | - Access to: | ||
1005 | /sys/kernel/debug/kvm | ||
1006 | /sys/kernel/debug/trace/events/* | ||
1007 | /proc/pid/task | ||
1008 | - /proc/sys/kernel/perf_event_paranoid < 1 if user has no | ||
1009 | CAP_SYS_ADMIN and perf events are used. | ||
1010 | - CAP_SYS_RESOURCE if the hard limit is not high enough to allow | ||
1011 | the large number of files that are possibly opened. | ||
1012 | """ | ||
1013 | |||
1014 | class PlainHelpFormatter(optparse.IndentedHelpFormatter): | ||
1015 | def format_description(self, description): | ||
1016 | if description: | ||
1017 | return description + "\n" | ||
1018 | else: | ||
1019 | return "" | ||
1020 | |||
1021 | optparser = optparse.OptionParser(description=description_text, | ||
1022 | formatter=PlainHelpFormatter()) | ||
1023 | optparser.add_option('-1', '--once', '--batch', | ||
1024 | action='store_true', | ||
1025 | default=False, | ||
1026 | dest='once', | ||
1027 | help='run in batch mode for one second', | ||
1028 | ) | ||
1029 | optparser.add_option('-l', '--log', | ||
1030 | action='store_true', | ||
1031 | default=False, | ||
1032 | dest='log', | ||
1033 | help='run in logging mode (like vmstat)', | ||
1034 | ) | ||
1035 | optparser.add_option('-t', '--tracepoints', | ||
1036 | action='store_true', | ||
1037 | default=False, | ||
1038 | dest='tracepoints', | ||
1039 | help='retrieve statistics from tracepoints', | ||
1040 | ) | ||
1041 | optparser.add_option('-d', '--debugfs', | ||
1042 | action='store_true', | ||
1043 | default=False, | ||
1044 | dest='debugfs', | ||
1045 | help='retrieve statistics from debugfs', | ||
1046 | ) | ||
1047 | optparser.add_option('-f', '--fields', | ||
1048 | action='store', | ||
1049 | default=None, | ||
1050 | dest='fields', | ||
1051 | help='fields to display (regex)', | ||
1052 | ) | ||
1053 | optparser.add_option('-p', '--pid', | ||
1054 | action='store', | ||
1055 | default=0, | ||
1056 | type=int, | ||
1057 | dest='pid', | ||
1058 | help='restrict statistics to pid', | ||
1059 | ) | ||
1060 | (options, _) = optparser.parse_args(sys.argv) | ||
1061 | return options | ||
1062 | |||
1063 | def get_providers(options): | ||
1064 | """Returns a list of data providers depending on the passed options.""" | ||
1065 | providers = [] | ||
1066 | |||
1067 | if options.tracepoints: | ||
1068 | providers.append(TracepointProvider()) | ||
1069 | if options.debugfs: | ||
1070 | providers.append(DebugfsProvider()) | ||
1071 | if len(providers) == 0: | ||
1072 | providers.append(TracepointProvider()) | ||
1073 | |||
1074 | return providers | ||
1075 | |||
1076 | def check_access(options): | ||
1077 | """Exits if the current user can't access all needed directories.""" | ||
1078 | if not os.path.exists('/sys/kernel/debug'): | ||
1079 | sys.stderr.write('Please enable CONFIG_DEBUG_FS in your kernel.') | ||
1080 | sys.exit(1) | ||
1081 | |||
1082 | if not os.path.exists(PATH_DEBUGFS_KVM): | ||
1083 | sys.stderr.write("Please make sure, that debugfs is mounted and " | ||
1084 | "readable by the current user:\n" | ||
1085 | "('mount -t debugfs debugfs /sys/kernel/debug')\n" | ||
1086 | "Also ensure, that the kvm modules are loaded.\n") | ||
1087 | sys.exit(1) | ||
1088 | |||
1089 | if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints | ||
1090 | or not options.debugfs): | ||
1091 | sys.stderr.write("Please enable CONFIG_TRACING in your kernel " | ||
1092 | "when using the option -t (default).\n" | ||
1093 | "If it is enabled, make {0} readable by the " | ||
1094 | "current user.\n" | ||
1095 | .format(PATH_DEBUGFS_TRACING)) | ||
1096 | if options.tracepoints: | ||
1097 | sys.exit(1) | ||
1098 | |||
1099 | sys.stderr.write("Falling back to debugfs statistics!\n") | ||
1100 | options.debugfs = True | ||
1101 | sleep(5) | ||
1102 | |||
1103 | return options | ||
1104 | |||
1105 | def main(): | ||
1106 | options = get_options() | ||
1107 | options = check_access(options) | ||
1108 | |||
1109 | if (options.pid > 0 and | ||
1110 | not os.path.isdir(os.path.join('/proc/', | ||
1111 | str(options.pid)))): | ||
1112 | sys.stderr.write('Did you use a (unsupported) tid instead of a pid?\n') | ||
1113 | sys.exit('Specified pid does not exist.') | ||
1114 | |||
1115 | providers = get_providers(options) | ||
1116 | stats = Stats(providers, options.pid, fields=options.fields) | ||
1117 | |||
1118 | if options.log: | ||
1119 | log(stats) | ||
1120 | elif not options.once: | ||
1121 | with Tui(stats) as tui: | ||
1122 | tui.show_stats() | ||
1123 | else: | ||
1124 | batch(stats) | ||
1125 | |||
1126 | if __name__ == "__main__": | ||
1127 | main() | ||
diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt new file mode 100644 index 000000000000..b92a153d7115 --- /dev/null +++ b/tools/kvm/kvm_stat/kvm_stat.txt | |||
@@ -0,0 +1,63 @@ | |||
1 | kvm_stat(1) | ||
2 | =========== | ||
3 | |||
4 | NAME | ||
5 | ---- | ||
6 | kvm_stat - Report KVM kernel module event counters | ||
7 | |||
8 | SYNOPSIS | ||
9 | -------- | ||
10 | [verse] | ||
11 | 'kvm_stat' [OPTION]... | ||
12 | |||
13 | DESCRIPTION | ||
14 | ----------- | ||
15 | kvm_stat prints counts of KVM kernel module trace events. These events signify | ||
16 | state transitions such as guest mode entry and exit. | ||
17 | |||
18 | This tool is useful for observing guest behavior from the host perspective. | ||
19 | Often conclusions about performance or buggy behavior can be drawn from the | ||
20 | output. | ||
21 | |||
22 | The set of KVM kernel module trace events may be specific to the kernel version | ||
23 | or architecture. It is best to check the KVM kernel module source code for the | ||
24 | meaning of events. | ||
25 | |||
26 | OPTIONS | ||
27 | ------- | ||
28 | -1:: | ||
29 | --once:: | ||
30 | --batch:: | ||
31 | run in batch mode for one second | ||
32 | |||
33 | -l:: | ||
34 | --log:: | ||
35 | run in logging mode (like vmstat) | ||
36 | |||
37 | -t:: | ||
38 | --tracepoints:: | ||
39 | retrieve statistics from tracepoints | ||
40 | |||
41 | -d:: | ||
42 | --debugfs:: | ||
43 | retrieve statistics from debugfs | ||
44 | |||
45 | -p<pid>:: | ||
46 | --pid=<pid>:: | ||
47 | limit statistics to one virtual machine (pid) | ||
48 | |||
49 | -f<fields>:: | ||
50 | --fields=<fields>:: | ||
51 | fields to display (regex) | ||
52 | |||
53 | -h:: | ||
54 | --help:: | ||
55 | show help message | ||
56 | |||
57 | SEE ALSO | ||
58 | -------- | ||
59 | 'perf'(1), 'trace-cmd'(1) | ||
60 | |||
61 | AUTHOR | ||
62 | ------ | ||
63 | Stefan Hajnoczi <stefanha@redhat.com> | ||
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 409db3304471..e2d5b6f988fb 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/kvm.h> | 20 | #include <linux/kvm.h> |
21 | #include <linux/kvm_host.h> | 21 | #include <linux/kvm_host.h> |
22 | #include <linux/interrupt.h> | 22 | #include <linux/interrupt.h> |
23 | #include <linux/irq.h> | ||
23 | 24 | ||
24 | #include <clocksource/arm_arch_timer.h> | 25 | #include <clocksource/arm_arch_timer.h> |
25 | #include <asm/arch_timer.h> | 26 | #include <asm/arch_timer.h> |
@@ -174,10 +175,10 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level) | |||
174 | 175 | ||
175 | timer->active_cleared_last = false; | 176 | timer->active_cleared_last = false; |
176 | timer->irq.level = new_level; | 177 | timer->irq.level = new_level; |
177 | trace_kvm_timer_update_irq(vcpu->vcpu_id, timer->map->virt_irq, | 178 | trace_kvm_timer_update_irq(vcpu->vcpu_id, timer->irq.irq, |
178 | timer->irq.level); | 179 | timer->irq.level); |
179 | ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id, | 180 | ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id, |
180 | timer->map, | 181 | timer->irq.irq, |
181 | timer->irq.level); | 182 | timer->irq.level); |
182 | WARN_ON(ret); | 183 | WARN_ON(ret); |
183 | } | 184 | } |
@@ -196,7 +197,7 @@ static int kvm_timer_update_state(struct kvm_vcpu *vcpu) | |||
196 | * because the guest would never see the interrupt. Instead wait | 197 | * because the guest would never see the interrupt. Instead wait |
197 | * until we call this function from kvm_timer_flush_hwstate. | 198 | * until we call this function from kvm_timer_flush_hwstate. |
198 | */ | 199 | */ |
199 | if (!vgic_initialized(vcpu->kvm)) | 200 | if (!vgic_initialized(vcpu->kvm) || !timer->enabled) |
200 | return -ENODEV; | 201 | return -ENODEV; |
201 | 202 | ||
202 | if (kvm_timer_should_fire(vcpu) != timer->irq.level) | 203 | if (kvm_timer_should_fire(vcpu) != timer->irq.level) |
@@ -274,10 +275,8 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) | |||
274 | * to ensure that hardware interrupts from the timer triggers a guest | 275 | * to ensure that hardware interrupts from the timer triggers a guest |
275 | * exit. | 276 | * exit. |
276 | */ | 277 | */ |
277 | if (timer->irq.level || kvm_vgic_map_is_active(vcpu, timer->map)) | 278 | phys_active = timer->irq.level || |
278 | phys_active = true; | 279 | kvm_vgic_map_is_active(vcpu, timer->irq.irq); |
279 | else | ||
280 | phys_active = false; | ||
281 | 280 | ||
282 | /* | 281 | /* |
283 | * We want to avoid hitting the (re)distributor as much as | 282 | * We want to avoid hitting the (re)distributor as much as |
@@ -302,7 +301,7 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) | |||
302 | if (timer->active_cleared_last && !phys_active) | 301 | if (timer->active_cleared_last && !phys_active) |
303 | return; | 302 | return; |
304 | 303 | ||
305 | ret = irq_set_irqchip_state(timer->map->irq, | 304 | ret = irq_set_irqchip_state(host_vtimer_irq, |
306 | IRQCHIP_STATE_ACTIVE, | 305 | IRQCHIP_STATE_ACTIVE, |
307 | phys_active); | 306 | phys_active); |
308 | WARN_ON(ret); | 307 | WARN_ON(ret); |
@@ -334,7 +333,6 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, | |||
334 | const struct kvm_irq_level *irq) | 333 | const struct kvm_irq_level *irq) |
335 | { | 334 | { |
336 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 335 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; |
337 | struct irq_phys_map *map; | ||
338 | 336 | ||
339 | /* | 337 | /* |
340 | * The vcpu timer irq number cannot be determined in | 338 | * The vcpu timer irq number cannot be determined in |
@@ -353,15 +351,6 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, | |||
353 | timer->cntv_ctl = 0; | 351 | timer->cntv_ctl = 0; |
354 | kvm_timer_update_state(vcpu); | 352 | kvm_timer_update_state(vcpu); |
355 | 353 | ||
356 | /* | ||
357 | * Tell the VGIC that the virtual interrupt is tied to a | ||
358 | * physical interrupt. We do that once per VCPU. | ||
359 | */ | ||
360 | map = kvm_vgic_map_phys_irq(vcpu, irq->irq, host_vtimer_irq); | ||
361 | if (WARN_ON(IS_ERR(map))) | ||
362 | return PTR_ERR(map); | ||
363 | |||
364 | timer->map = map; | ||
365 | return 0; | 354 | return 0; |
366 | } | 355 | } |
367 | 356 | ||
@@ -487,14 +476,43 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) | |||
487 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 476 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; |
488 | 477 | ||
489 | timer_disarm(timer); | 478 | timer_disarm(timer); |
490 | if (timer->map) | 479 | kvm_vgic_unmap_phys_irq(vcpu, timer->irq.irq); |
491 | kvm_vgic_unmap_phys_irq(vcpu, timer->map); | ||
492 | } | 480 | } |
493 | 481 | ||
494 | void kvm_timer_enable(struct kvm *kvm) | 482 | int kvm_timer_enable(struct kvm_vcpu *vcpu) |
495 | { | 483 | { |
496 | if (kvm->arch.timer.enabled) | 484 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; |
497 | return; | 485 | struct irq_desc *desc; |
486 | struct irq_data *data; | ||
487 | int phys_irq; | ||
488 | int ret; | ||
489 | |||
490 | if (timer->enabled) | ||
491 | return 0; | ||
492 | |||
493 | /* | ||
494 | * Find the physical IRQ number corresponding to the host_vtimer_irq | ||
495 | */ | ||
496 | desc = irq_to_desc(host_vtimer_irq); | ||
497 | if (!desc) { | ||
498 | kvm_err("%s: no interrupt descriptor\n", __func__); | ||
499 | return -EINVAL; | ||
500 | } | ||
501 | |||
502 | data = irq_desc_get_irq_data(desc); | ||
503 | while (data->parent_data) | ||
504 | data = data->parent_data; | ||
505 | |||
506 | phys_irq = data->hwirq; | ||
507 | |||
508 | /* | ||
509 | * Tell the VGIC that the virtual interrupt is tied to a | ||
510 | * physical interrupt. We do that once per VCPU. | ||
511 | */ | ||
512 | ret = kvm_vgic_map_phys_irq(vcpu, timer->irq.irq, phys_irq); | ||
513 | if (ret) | ||
514 | return ret; | ||
515 | |||
498 | 516 | ||
499 | /* | 517 | /* |
500 | * There is a potential race here between VCPUs starting for the first | 518 | * There is a potential race here between VCPUs starting for the first |
@@ -505,7 +523,9 @@ void kvm_timer_enable(struct kvm *kvm) | |||
505 | * the arch timers are enabled. | 523 | * the arch timers are enabled. |
506 | */ | 524 | */ |
507 | if (timecounter && wqueue) | 525 | if (timecounter && wqueue) |
508 | kvm->arch.timer.enabled = 1; | 526 | timer->enabled = 1; |
527 | |||
528 | return 0; | ||
509 | } | 529 | } |
510 | 530 | ||
511 | void kvm_timer_init(struct kvm *kvm) | 531 | void kvm_timer_init(struct kvm *kvm) |
diff --git a/virt/kvm/arm/hyp/timer-sr.c b/virt/kvm/arm/hyp/timer-sr.c index ea00d69e7078..798866a8d875 100644 --- a/virt/kvm/arm/hyp/timer-sr.c +++ b/virt/kvm/arm/hyp/timer-sr.c | |||
@@ -24,11 +24,10 @@ | |||
24 | /* vcpu is already in the HYP VA space */ | 24 | /* vcpu is already in the HYP VA space */ |
25 | void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu) | 25 | void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu) |
26 | { | 26 | { |
27 | struct kvm *kvm = kern_hyp_va(vcpu->kvm); | ||
28 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 27 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; |
29 | u64 val; | 28 | u64 val; |
30 | 29 | ||
31 | if (kvm->arch.timer.enabled) { | 30 | if (timer->enabled) { |
32 | timer->cntv_ctl = read_sysreg_el0(cntv_ctl); | 31 | timer->cntv_ctl = read_sysreg_el0(cntv_ctl); |
33 | timer->cntv_cval = read_sysreg_el0(cntv_cval); | 32 | timer->cntv_cval = read_sysreg_el0(cntv_cval); |
34 | } | 33 | } |
@@ -60,7 +59,7 @@ void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu) | |||
60 | val |= CNTHCTL_EL1PCTEN; | 59 | val |= CNTHCTL_EL1PCTEN; |
61 | write_sysreg(val, cnthctl_el2); | 60 | write_sysreg(val, cnthctl_el2); |
62 | 61 | ||
63 | if (kvm->arch.timer.enabled) { | 62 | if (timer->enabled) { |
64 | write_sysreg(kvm->arch.timer.cntvoff, cntvoff_el2); | 63 | write_sysreg(kvm->arch.timer.cntvoff, cntvoff_el2); |
65 | write_sysreg_el0(timer->cntv_cval, cntv_cval); | 64 | write_sysreg_el0(timer->cntv_cval, cntv_cval); |
66 | isb(); | 65 | isb(); |
diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c index 674bdf8ecf4f..a3f12b3b277b 100644 --- a/virt/kvm/arm/hyp/vgic-v2-sr.c +++ b/virt/kvm/arm/hyp/vgic-v2-sr.c | |||
@@ -21,11 +21,18 @@ | |||
21 | 21 | ||
22 | #include <asm/kvm_hyp.h> | 22 | #include <asm/kvm_hyp.h> |
23 | 23 | ||
24 | #ifdef CONFIG_KVM_NEW_VGIC | ||
25 | extern struct vgic_global kvm_vgic_global_state; | ||
26 | #define vgic_v2_params kvm_vgic_global_state | ||
27 | #else | ||
28 | extern struct vgic_params vgic_v2_params; | ||
29 | #endif | ||
30 | |||
24 | static void __hyp_text save_maint_int_state(struct kvm_vcpu *vcpu, | 31 | static void __hyp_text save_maint_int_state(struct kvm_vcpu *vcpu, |
25 | void __iomem *base) | 32 | void __iomem *base) |
26 | { | 33 | { |
27 | struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; | 34 | struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; |
28 | int nr_lr = vcpu->arch.vgic_cpu.nr_lr; | 35 | int nr_lr = (kern_hyp_va(&vgic_v2_params))->nr_lr; |
29 | u32 eisr0, eisr1; | 36 | u32 eisr0, eisr1; |
30 | int i; | 37 | int i; |
31 | bool expect_mi; | 38 | bool expect_mi; |
@@ -67,7 +74,7 @@ static void __hyp_text save_maint_int_state(struct kvm_vcpu *vcpu, | |||
67 | static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base) | 74 | static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base) |
68 | { | 75 | { |
69 | struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; | 76 | struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; |
70 | int nr_lr = vcpu->arch.vgic_cpu.nr_lr; | 77 | int nr_lr = (kern_hyp_va(&vgic_v2_params))->nr_lr; |
71 | u32 elrsr0, elrsr1; | 78 | u32 elrsr0, elrsr1; |
72 | 79 | ||
73 | elrsr0 = readl_relaxed(base + GICH_ELRSR0); | 80 | elrsr0 = readl_relaxed(base + GICH_ELRSR0); |
@@ -86,7 +93,7 @@ static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base) | |||
86 | static void __hyp_text save_lrs(struct kvm_vcpu *vcpu, void __iomem *base) | 93 | static void __hyp_text save_lrs(struct kvm_vcpu *vcpu, void __iomem *base) |
87 | { | 94 | { |
88 | struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; | 95 | struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; |
89 | int nr_lr = vcpu->arch.vgic_cpu.nr_lr; | 96 | int nr_lr = (kern_hyp_va(&vgic_v2_params))->nr_lr; |
90 | int i; | 97 | int i; |
91 | 98 | ||
92 | for (i = 0; i < nr_lr; i++) { | 99 | for (i = 0; i < nr_lr; i++) { |
@@ -141,13 +148,13 @@ void __hyp_text __vgic_v2_restore_state(struct kvm_vcpu *vcpu) | |||
141 | struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; | 148 | struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; |
142 | struct vgic_dist *vgic = &kvm->arch.vgic; | 149 | struct vgic_dist *vgic = &kvm->arch.vgic; |
143 | void __iomem *base = kern_hyp_va(vgic->vctrl_base); | 150 | void __iomem *base = kern_hyp_va(vgic->vctrl_base); |
144 | int i, nr_lr; | 151 | int nr_lr = (kern_hyp_va(&vgic_v2_params))->nr_lr; |
152 | int i; | ||
145 | u64 live_lrs = 0; | 153 | u64 live_lrs = 0; |
146 | 154 | ||
147 | if (!base) | 155 | if (!base) |
148 | return; | 156 | return; |
149 | 157 | ||
150 | nr_lr = vcpu->arch.vgic_cpu.nr_lr; | ||
151 | 158 | ||
152 | for (i = 0; i < nr_lr; i++) | 159 | for (i = 0; i < nr_lr; i++) |
153 | if (cpu_if->vgic_lr[i] & GICH_LR_STATE) | 160 | if (cpu_if->vgic_lr[i] & GICH_LR_STATE) |
diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 575c7aa30d7e..a027569facfa 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c | |||
@@ -436,7 +436,14 @@ static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) | |||
436 | return 0; | 436 | return 0; |
437 | } | 437 | } |
438 | 438 | ||
439 | static bool irq_is_valid(struct kvm *kvm, int irq, bool is_ppi) | 439 | #define irq_is_ppi(irq) ((irq) >= VGIC_NR_SGIS && (irq) < VGIC_NR_PRIVATE_IRQS) |
440 | |||
441 | /* | ||
442 | * For one VM the interrupt type must be same for each vcpu. | ||
443 | * As a PPI, the interrupt number is the same for all vcpus, | ||
444 | * while as an SPI it must be a separate number per vcpu. | ||
445 | */ | ||
446 | static bool pmu_irq_is_valid(struct kvm *kvm, int irq) | ||
440 | { | 447 | { |
441 | int i; | 448 | int i; |
442 | struct kvm_vcpu *vcpu; | 449 | struct kvm_vcpu *vcpu; |
@@ -445,7 +452,7 @@ static bool irq_is_valid(struct kvm *kvm, int irq, bool is_ppi) | |||
445 | if (!kvm_arm_pmu_irq_initialized(vcpu)) | 452 | if (!kvm_arm_pmu_irq_initialized(vcpu)) |
446 | continue; | 453 | continue; |
447 | 454 | ||
448 | if (is_ppi) { | 455 | if (irq_is_ppi(irq)) { |
449 | if (vcpu->arch.pmu.irq_num != irq) | 456 | if (vcpu->arch.pmu.irq_num != irq) |
450 | return false; | 457 | return false; |
451 | } else { | 458 | } else { |
@@ -457,7 +464,6 @@ static bool irq_is_valid(struct kvm *kvm, int irq, bool is_ppi) | |||
457 | return true; | 464 | return true; |
458 | } | 465 | } |
459 | 466 | ||
460 | |||
461 | int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) | 467 | int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) |
462 | { | 468 | { |
463 | switch (attr->attr) { | 469 | switch (attr->attr) { |
@@ -471,14 +477,11 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) | |||
471 | if (get_user(irq, uaddr)) | 477 | if (get_user(irq, uaddr)) |
472 | return -EFAULT; | 478 | return -EFAULT; |
473 | 479 | ||
474 | /* | 480 | /* The PMU overflow interrupt can be a PPI or a valid SPI. */ |
475 | * The PMU overflow interrupt could be a PPI or SPI, but for one | 481 | if (!(irq_is_ppi(irq) || vgic_valid_spi(vcpu->kvm, irq))) |
476 | * VM the interrupt type must be same for each vcpu. As a PPI, | 482 | return -EINVAL; |
477 | * the interrupt number is the same for all vcpus, while as an | 483 | |
478 | * SPI it must be a separate number per vcpu. | 484 | if (!pmu_irq_is_valid(vcpu->kvm, irq)) |
479 | */ | ||
480 | if (irq < VGIC_NR_SGIS || irq >= vcpu->kvm->arch.vgic.nr_irqs || | ||
481 | !irq_is_valid(vcpu->kvm, irq, irq < VGIC_NR_PRIVATE_IRQS)) | ||
482 | return -EINVAL; | 485 | return -EINVAL; |
483 | 486 | ||
484 | if (kvm_arm_pmu_irq_initialized(vcpu)) | 487 | if (kvm_arm_pmu_irq_initialized(vcpu)) |
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c index 7e826c9b2b0a..334cd7a89106 100644 --- a/virt/kvm/arm/vgic-v2.c +++ b/virt/kvm/arm/vgic-v2.c | |||
@@ -171,7 +171,7 @@ static const struct vgic_ops vgic_v2_ops = { | |||
171 | .enable = vgic_v2_enable, | 171 | .enable = vgic_v2_enable, |
172 | }; | 172 | }; |
173 | 173 | ||
174 | static struct vgic_params vgic_v2_params; | 174 | struct vgic_params __section(.hyp.text) vgic_v2_params; |
175 | 175 | ||
176 | static void vgic_cpu_init_lrs(void *params) | 176 | static void vgic_cpu_init_lrs(void *params) |
177 | { | 177 | { |
@@ -201,6 +201,8 @@ int vgic_v2_probe(const struct gic_kvm_info *gic_kvm_info, | |||
201 | const struct resource *vctrl_res = &gic_kvm_info->vctrl; | 201 | const struct resource *vctrl_res = &gic_kvm_info->vctrl; |
202 | const struct resource *vcpu_res = &gic_kvm_info->vcpu; | 202 | const struct resource *vcpu_res = &gic_kvm_info->vcpu; |
203 | 203 | ||
204 | memset(vgic, 0, sizeof(*vgic)); | ||
205 | |||
204 | if (!gic_kvm_info->maint_irq) { | 206 | if (!gic_kvm_info->maint_irq) { |
205 | kvm_err("error getting vgic maintenance irq\n"); | 207 | kvm_err("error getting vgic maintenance irq\n"); |
206 | ret = -ENXIO; | 208 | ret = -ENXIO; |
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c index c02a1b1cf855..75b02fa86436 100644 --- a/virt/kvm/arm/vgic-v3.c +++ b/virt/kvm/arm/vgic-v3.c | |||
@@ -29,12 +29,6 @@ | |||
29 | #include <asm/kvm_asm.h> | 29 | #include <asm/kvm_asm.h> |
30 | #include <asm/kvm_mmu.h> | 30 | #include <asm/kvm_mmu.h> |
31 | 31 | ||
32 | /* These are for GICv2 emulation only */ | ||
33 | #define GICH_LR_VIRTUALID (0x3ffUL << 0) | ||
34 | #define GICH_LR_PHYSID_CPUID_SHIFT (10) | ||
35 | #define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT) | ||
36 | #define ICH_LR_VIRTUALID_MASK (BIT_ULL(32) - 1) | ||
37 | |||
38 | static u32 ich_vtr_el2; | 32 | static u32 ich_vtr_el2; |
39 | 33 | ||
40 | static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr) | 34 | static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr) |
@@ -43,7 +37,7 @@ static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr) | |||
43 | u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr]; | 37 | u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr]; |
44 | 38 | ||
45 | if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) | 39 | if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) |
46 | lr_desc.irq = val & ICH_LR_VIRTUALID_MASK; | 40 | lr_desc.irq = val & ICH_LR_VIRTUAL_ID_MASK; |
47 | else | 41 | else |
48 | lr_desc.irq = val & GICH_LR_VIRTUALID; | 42 | lr_desc.irq = val & GICH_LR_VIRTUALID; |
49 | 43 | ||
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 60668a7f319a..c3bfbb981e73 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c | |||
@@ -690,12 +690,11 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio, | |||
690 | */ | 690 | */ |
691 | void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) | 691 | void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) |
692 | { | 692 | { |
693 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; | ||
694 | u64 elrsr = vgic_get_elrsr(vcpu); | 693 | u64 elrsr = vgic_get_elrsr(vcpu); |
695 | unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr); | 694 | unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr); |
696 | int i; | 695 | int i; |
697 | 696 | ||
698 | for_each_clear_bit(i, elrsr_ptr, vgic_cpu->nr_lr) { | 697 | for_each_clear_bit(i, elrsr_ptr, vgic->nr_lr) { |
699 | struct vgic_lr lr = vgic_get_lr(vcpu, i); | 698 | struct vgic_lr lr = vgic_get_lr(vcpu, i); |
700 | 699 | ||
701 | /* | 700 | /* |
@@ -820,7 +819,6 @@ static int vgic_handle_mmio_access(struct kvm_vcpu *vcpu, | |||
820 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 819 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
821 | struct vgic_io_device *iodev = container_of(this, | 820 | struct vgic_io_device *iodev = container_of(this, |
822 | struct vgic_io_device, dev); | 821 | struct vgic_io_device, dev); |
823 | struct kvm_run *run = vcpu->run; | ||
824 | const struct vgic_io_range *range; | 822 | const struct vgic_io_range *range; |
825 | struct kvm_exit_mmio mmio; | 823 | struct kvm_exit_mmio mmio; |
826 | bool updated_state; | 824 | bool updated_state; |
@@ -849,12 +847,6 @@ static int vgic_handle_mmio_access(struct kvm_vcpu *vcpu, | |||
849 | updated_state = false; | 847 | updated_state = false; |
850 | } | 848 | } |
851 | spin_unlock(&dist->lock); | 849 | spin_unlock(&dist->lock); |
852 | run->mmio.is_write = is_write; | ||
853 | run->mmio.len = len; | ||
854 | run->mmio.phys_addr = addr; | ||
855 | memcpy(run->mmio.data, val, len); | ||
856 | |||
857 | kvm_handle_mmio_return(vcpu, run); | ||
858 | 850 | ||
859 | if (updated_state) | 851 | if (updated_state) |
860 | vgic_kick_vcpus(vcpu->kvm); | 852 | vgic_kick_vcpus(vcpu->kvm); |
@@ -1102,18 +1094,18 @@ static bool dist_active_irq(struct kvm_vcpu *vcpu) | |||
1102 | return test_bit(vcpu->vcpu_id, dist->irq_active_on_cpu); | 1094 | return test_bit(vcpu->vcpu_id, dist->irq_active_on_cpu); |
1103 | } | 1095 | } |
1104 | 1096 | ||
1105 | bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, struct irq_phys_map *map) | 1097 | bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq) |
1106 | { | 1098 | { |
1107 | int i; | 1099 | int i; |
1108 | 1100 | ||
1109 | for (i = 0; i < vcpu->arch.vgic_cpu.nr_lr; i++) { | 1101 | for (i = 0; i < vgic->nr_lr; i++) { |
1110 | struct vgic_lr vlr = vgic_get_lr(vcpu, i); | 1102 | struct vgic_lr vlr = vgic_get_lr(vcpu, i); |
1111 | 1103 | ||
1112 | if (vlr.irq == map->virt_irq && vlr.state & LR_STATE_ACTIVE) | 1104 | if (vlr.irq == virt_irq && vlr.state & LR_STATE_ACTIVE) |
1113 | return true; | 1105 | return true; |
1114 | } | 1106 | } |
1115 | 1107 | ||
1116 | return vgic_irq_is_active(vcpu, map->virt_irq); | 1108 | return vgic_irq_is_active(vcpu, virt_irq); |
1117 | } | 1109 | } |
1118 | 1110 | ||
1119 | /* | 1111 | /* |
@@ -1521,7 +1513,6 @@ static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level) | |||
1521 | } | 1513 | } |
1522 | 1514 | ||
1523 | static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, | 1515 | static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, |
1524 | struct irq_phys_map *map, | ||
1525 | unsigned int irq_num, bool level) | 1516 | unsigned int irq_num, bool level) |
1526 | { | 1517 | { |
1527 | struct vgic_dist *dist = &kvm->arch.vgic; | 1518 | struct vgic_dist *dist = &kvm->arch.vgic; |
@@ -1660,14 +1651,14 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, | |||
1660 | if (map) | 1651 | if (map) |
1661 | return -EINVAL; | 1652 | return -EINVAL; |
1662 | 1653 | ||
1663 | return vgic_update_irq_pending(kvm, cpuid, NULL, irq_num, level); | 1654 | return vgic_update_irq_pending(kvm, cpuid, irq_num, level); |
1664 | } | 1655 | } |
1665 | 1656 | ||
1666 | /** | 1657 | /** |
1667 | * kvm_vgic_inject_mapped_irq - Inject a physically mapped IRQ to the vgic | 1658 | * kvm_vgic_inject_mapped_irq - Inject a physically mapped IRQ to the vgic |
1668 | * @kvm: The VM structure pointer | 1659 | * @kvm: The VM structure pointer |
1669 | * @cpuid: The CPU for PPIs | 1660 | * @cpuid: The CPU for PPIs |
1670 | * @map: Pointer to a irq_phys_map structure describing the mapping | 1661 | * @virt_irq: The virtual IRQ to be injected |
1671 | * @level: Edge-triggered: true: to trigger the interrupt | 1662 | * @level: Edge-triggered: true: to trigger the interrupt |
1672 | * false: to ignore the call | 1663 | * false: to ignore the call |
1673 | * Level-sensitive true: raise the input signal | 1664 | * Level-sensitive true: raise the input signal |
@@ -1678,7 +1669,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, | |||
1678 | * being HIGH and 0 being LOW and all devices being active-HIGH. | 1669 | * being HIGH and 0 being LOW and all devices being active-HIGH. |
1679 | */ | 1670 | */ |
1680 | int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, | 1671 | int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, |
1681 | struct irq_phys_map *map, bool level) | 1672 | unsigned int virt_irq, bool level) |
1682 | { | 1673 | { |
1683 | int ret; | 1674 | int ret; |
1684 | 1675 | ||
@@ -1686,7 +1677,7 @@ int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, | |||
1686 | if (ret) | 1677 | if (ret) |
1687 | return ret; | 1678 | return ret; |
1688 | 1679 | ||
1689 | return vgic_update_irq_pending(kvm, cpuid, map, map->virt_irq, level); | 1680 | return vgic_update_irq_pending(kvm, cpuid, virt_irq, level); |
1690 | } | 1681 | } |
1691 | 1682 | ||
1692 | static irqreturn_t vgic_maintenance_handler(int irq, void *data) | 1683 | static irqreturn_t vgic_maintenance_handler(int irq, void *data) |
@@ -1712,43 +1703,28 @@ static struct list_head *vgic_get_irq_phys_map_list(struct kvm_vcpu *vcpu, | |||
1712 | /** | 1703 | /** |
1713 | * kvm_vgic_map_phys_irq - map a virtual IRQ to a physical IRQ | 1704 | * kvm_vgic_map_phys_irq - map a virtual IRQ to a physical IRQ |
1714 | * @vcpu: The VCPU pointer | 1705 | * @vcpu: The VCPU pointer |
1715 | * @virt_irq: The virtual irq number | 1706 | * @virt_irq: The virtual IRQ number for the guest |
1716 | * @irq: The Linux IRQ number | 1707 | * @phys_irq: The hardware IRQ number of the host |
1717 | * | 1708 | * |
1718 | * Establish a mapping between a guest visible irq (@virt_irq) and a | 1709 | * Establish a mapping between a guest visible irq (@virt_irq) and a |
1719 | * Linux irq (@irq). On injection, @virt_irq will be associated with | 1710 | * hardware irq (@phys_irq). On injection, @virt_irq will be associated with |
1720 | * the physical interrupt represented by @irq. This mapping can be | 1711 | * the physical interrupt represented by @phys_irq. This mapping can be |
1721 | * established multiple times as long as the parameters are the same. | 1712 | * established multiple times as long as the parameters are the same. |
1722 | * | 1713 | * |
1723 | * Returns a valid pointer on success, and an error pointer otherwise | 1714 | * Returns 0 on success or an error value otherwise. |
1724 | */ | 1715 | */ |
1725 | struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, | 1716 | int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, int virt_irq, int phys_irq) |
1726 | int virt_irq, int irq) | ||
1727 | { | 1717 | { |
1728 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 1718 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
1729 | struct list_head *root = vgic_get_irq_phys_map_list(vcpu, virt_irq); | 1719 | struct list_head *root = vgic_get_irq_phys_map_list(vcpu, virt_irq); |
1730 | struct irq_phys_map *map; | 1720 | struct irq_phys_map *map; |
1731 | struct irq_phys_map_entry *entry; | 1721 | struct irq_phys_map_entry *entry; |
1732 | struct irq_desc *desc; | 1722 | int ret = 0; |
1733 | struct irq_data *data; | ||
1734 | int phys_irq; | ||
1735 | |||
1736 | desc = irq_to_desc(irq); | ||
1737 | if (!desc) { | ||
1738 | kvm_err("%s: no interrupt descriptor\n", __func__); | ||
1739 | return ERR_PTR(-EINVAL); | ||
1740 | } | ||
1741 | |||
1742 | data = irq_desc_get_irq_data(desc); | ||
1743 | while (data->parent_data) | ||
1744 | data = data->parent_data; | ||
1745 | |||
1746 | phys_irq = data->hwirq; | ||
1747 | 1723 | ||
1748 | /* Create a new mapping */ | 1724 | /* Create a new mapping */ |
1749 | entry = kzalloc(sizeof(*entry), GFP_KERNEL); | 1725 | entry = kzalloc(sizeof(*entry), GFP_KERNEL); |
1750 | if (!entry) | 1726 | if (!entry) |
1751 | return ERR_PTR(-ENOMEM); | 1727 | return -ENOMEM; |
1752 | 1728 | ||
1753 | spin_lock(&dist->irq_phys_map_lock); | 1729 | spin_lock(&dist->irq_phys_map_lock); |
1754 | 1730 | ||
@@ -1756,9 +1732,8 @@ struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, | |||
1756 | map = vgic_irq_map_search(vcpu, virt_irq); | 1732 | map = vgic_irq_map_search(vcpu, virt_irq); |
1757 | if (map) { | 1733 | if (map) { |
1758 | /* Make sure this mapping matches */ | 1734 | /* Make sure this mapping matches */ |
1759 | if (map->phys_irq != phys_irq || | 1735 | if (map->phys_irq != phys_irq) |
1760 | map->irq != irq) | 1736 | ret = -EINVAL; |
1761 | map = ERR_PTR(-EINVAL); | ||
1762 | 1737 | ||
1763 | /* Found an existing, valid mapping */ | 1738 | /* Found an existing, valid mapping */ |
1764 | goto out; | 1739 | goto out; |
@@ -1767,7 +1742,6 @@ struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, | |||
1767 | map = &entry->map; | 1742 | map = &entry->map; |
1768 | map->virt_irq = virt_irq; | 1743 | map->virt_irq = virt_irq; |
1769 | map->phys_irq = phys_irq; | 1744 | map->phys_irq = phys_irq; |
1770 | map->irq = irq; | ||
1771 | 1745 | ||
1772 | list_add_tail_rcu(&entry->entry, root); | 1746 | list_add_tail_rcu(&entry->entry, root); |
1773 | 1747 | ||
@@ -1775,9 +1749,9 @@ out: | |||
1775 | spin_unlock(&dist->irq_phys_map_lock); | 1749 | spin_unlock(&dist->irq_phys_map_lock); |
1776 | /* If we've found a hit in the existing list, free the useless | 1750 | /* If we've found a hit in the existing list, free the useless |
1777 | * entry */ | 1751 | * entry */ |
1778 | if (IS_ERR(map) || map != &entry->map) | 1752 | if (ret || map != &entry->map) |
1779 | kfree(entry); | 1753 | kfree(entry); |
1780 | return map; | 1754 | return ret; |
1781 | } | 1755 | } |
1782 | 1756 | ||
1783 | static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu, | 1757 | static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu, |
@@ -1813,25 +1787,22 @@ static void vgic_free_phys_irq_map_rcu(struct rcu_head *rcu) | |||
1813 | /** | 1787 | /** |
1814 | * kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping | 1788 | * kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping |
1815 | * @vcpu: The VCPU pointer | 1789 | * @vcpu: The VCPU pointer |
1816 | * @map: The pointer to a mapping obtained through kvm_vgic_map_phys_irq | 1790 | * @virt_irq: The virtual IRQ number to be unmapped |
1817 | * | 1791 | * |
1818 | * Remove an existing mapping between virtual and physical interrupts. | 1792 | * Remove an existing mapping between virtual and physical interrupts. |
1819 | */ | 1793 | */ |
1820 | int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map) | 1794 | int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq) |
1821 | { | 1795 | { |
1822 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 1796 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
1823 | struct irq_phys_map_entry *entry; | 1797 | struct irq_phys_map_entry *entry; |
1824 | struct list_head *root; | 1798 | struct list_head *root; |
1825 | 1799 | ||
1826 | if (!map) | 1800 | root = vgic_get_irq_phys_map_list(vcpu, virt_irq); |
1827 | return -EINVAL; | ||
1828 | |||
1829 | root = vgic_get_irq_phys_map_list(vcpu, map->virt_irq); | ||
1830 | 1801 | ||
1831 | spin_lock(&dist->irq_phys_map_lock); | 1802 | spin_lock(&dist->irq_phys_map_lock); |
1832 | 1803 | ||
1833 | list_for_each_entry(entry, root, entry) { | 1804 | list_for_each_entry(entry, root, entry) { |
1834 | if (&entry->map == map) { | 1805 | if (entry->map.virt_irq == virt_irq) { |
1835 | list_del_rcu(&entry->entry); | 1806 | list_del_rcu(&entry->entry); |
1836 | call_rcu(&entry->rcu, vgic_free_phys_irq_map_rcu); | 1807 | call_rcu(&entry->rcu, vgic_free_phys_irq_map_rcu); |
1837 | break; | 1808 | break; |
@@ -1887,13 +1858,6 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) | |||
1887 | return -ENOMEM; | 1858 | return -ENOMEM; |
1888 | } | 1859 | } |
1889 | 1860 | ||
1890 | /* | ||
1891 | * Store the number of LRs per vcpu, so we don't have to go | ||
1892 | * all the way to the distributor structure to find out. Only | ||
1893 | * assembly code should use this one. | ||
1894 | */ | ||
1895 | vgic_cpu->nr_lr = vgic->nr_lr; | ||
1896 | |||
1897 | return 0; | 1861 | return 0; |
1898 | } | 1862 | } |
1899 | 1863 | ||
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c new file mode 100644 index 000000000000..a1442f7c9c4d --- /dev/null +++ b/virt/kvm/arm/vgic/vgic-init.c | |||
@@ -0,0 +1,452 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2015, 2016 ARM Ltd. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/uaccess.h> | ||
18 | #include <linux/interrupt.h> | ||
19 | #include <linux/cpu.h> | ||
20 | #include <linux/kvm_host.h> | ||
21 | #include <kvm/arm_vgic.h> | ||
22 | #include <asm/kvm_mmu.h> | ||
23 | #include "vgic.h" | ||
24 | |||
25 | /* | ||
26 | * Initialization rules: there are multiple stages to the vgic | ||
27 | * initialization, both for the distributor and the CPU interfaces. | ||
28 | * | ||
29 | * Distributor: | ||
30 | * | ||
31 | * - kvm_vgic_early_init(): initialization of static data that doesn't | ||
32 | * depend on any sizing information or emulation type. No allocation | ||
33 | * is allowed there. | ||
34 | * | ||
35 | * - vgic_init(): allocation and initialization of the generic data | ||
36 | * structures that depend on sizing information (number of CPUs, | ||
37 | * number of interrupts). Also initializes the vcpu specific data | ||
38 | * structures. Can be executed lazily for GICv2. | ||
39 | * | ||
40 | * CPU Interface: | ||
41 | * | ||
42 | * - kvm_vgic_cpu_early_init(): initialization of static data that | ||
43 | * doesn't depend on any sizing information or emulation type. No | ||
44 | * allocation is allowed there. | ||
45 | */ | ||
46 | |||
47 | /* EARLY INIT */ | ||
48 | |||
49 | /* | ||
50 | * Those 2 functions should not be needed anymore but they | ||
51 | * still are called from arm.c | ||
52 | */ | ||
53 | void kvm_vgic_early_init(struct kvm *kvm) | ||
54 | { | ||
55 | } | ||
56 | |||
57 | void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu) | ||
58 | { | ||
59 | } | ||
60 | |||
61 | /* CREATION */ | ||
62 | |||
63 | /** | ||
64 | * kvm_vgic_create: triggered by the instantiation of the VGIC device by | ||
65 | * user space, either through the legacy KVM_CREATE_IRQCHIP ioctl (v2 only) | ||
66 | * or through the generic KVM_CREATE_DEVICE API ioctl. | ||
67 | * irqchip_in_kernel() tells you if this function succeeded or not. | ||
68 | * @kvm: kvm struct pointer | ||
69 | * @type: KVM_DEV_TYPE_ARM_VGIC_V[23] | ||
70 | */ | ||
71 | int kvm_vgic_create(struct kvm *kvm, u32 type) | ||
72 | { | ||
73 | int i, vcpu_lock_idx = -1, ret; | ||
74 | struct kvm_vcpu *vcpu; | ||
75 | |||
76 | mutex_lock(&kvm->lock); | ||
77 | |||
78 | if (irqchip_in_kernel(kvm)) { | ||
79 | ret = -EEXIST; | ||
80 | goto out; | ||
81 | } | ||
82 | |||
83 | /* | ||
84 | * This function is also called by the KVM_CREATE_IRQCHIP handler, | ||
85 | * which had no chance yet to check the availability of the GICv2 | ||
86 | * emulation. So check this here again. KVM_CREATE_DEVICE does | ||
87 | * the proper checks already. | ||
88 | */ | ||
89 | if (type == KVM_DEV_TYPE_ARM_VGIC_V2 && | ||
90 | !kvm_vgic_global_state.can_emulate_gicv2) { | ||
91 | ret = -ENODEV; | ||
92 | goto out; | ||
93 | } | ||
94 | |||
95 | /* | ||
96 | * Any time a vcpu is run, vcpu_load is called which tries to grab the | ||
97 | * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure | ||
98 | * that no other VCPUs are run while we create the vgic. | ||
99 | */ | ||
100 | ret = -EBUSY; | ||
101 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
102 | if (!mutex_trylock(&vcpu->mutex)) | ||
103 | goto out_unlock; | ||
104 | vcpu_lock_idx = i; | ||
105 | } | ||
106 | |||
107 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
108 | if (vcpu->arch.has_run_once) | ||
109 | goto out_unlock; | ||
110 | } | ||
111 | ret = 0; | ||
112 | |||
113 | if (type == KVM_DEV_TYPE_ARM_VGIC_V2) | ||
114 | kvm->arch.max_vcpus = VGIC_V2_MAX_CPUS; | ||
115 | else | ||
116 | kvm->arch.max_vcpus = VGIC_V3_MAX_CPUS; | ||
117 | |||
118 | if (atomic_read(&kvm->online_vcpus) > kvm->arch.max_vcpus) { | ||
119 | ret = -E2BIG; | ||
120 | goto out_unlock; | ||
121 | } | ||
122 | |||
123 | kvm->arch.vgic.in_kernel = true; | ||
124 | kvm->arch.vgic.vgic_model = type; | ||
125 | |||
126 | /* | ||
127 | * kvm_vgic_global_state.vctrl_base is set on vgic probe (kvm_arch_init) | ||
128 | * it is stored in distributor struct for asm save/restore purpose | ||
129 | */ | ||
130 | kvm->arch.vgic.vctrl_base = kvm_vgic_global_state.vctrl_base; | ||
131 | |||
132 | kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; | ||
133 | kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; | ||
134 | kvm->arch.vgic.vgic_redist_base = VGIC_ADDR_UNDEF; | ||
135 | |||
136 | out_unlock: | ||
137 | for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { | ||
138 | vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx); | ||
139 | mutex_unlock(&vcpu->mutex); | ||
140 | } | ||
141 | |||
142 | out: | ||
143 | mutex_unlock(&kvm->lock); | ||
144 | return ret; | ||
145 | } | ||
146 | |||
147 | /* INIT/DESTROY */ | ||
148 | |||
149 | /** | ||
150 | * kvm_vgic_dist_init: initialize the dist data structures | ||
151 | * @kvm: kvm struct pointer | ||
152 | * @nr_spis: number of spis, frozen by caller | ||
153 | */ | ||
154 | static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis) | ||
155 | { | ||
156 | struct vgic_dist *dist = &kvm->arch.vgic; | ||
157 | struct kvm_vcpu *vcpu0 = kvm_get_vcpu(kvm, 0); | ||
158 | int i; | ||
159 | |||
160 | dist->spis = kcalloc(nr_spis, sizeof(struct vgic_irq), GFP_KERNEL); | ||
161 | if (!dist->spis) | ||
162 | return -ENOMEM; | ||
163 | |||
164 | /* | ||
165 | * In the following code we do not take the irq struct lock since | ||
166 | * no other action on irq structs can happen while the VGIC is | ||
167 | * not initialized yet: | ||
168 | * If someone wants to inject an interrupt or does a MMIO access, we | ||
169 | * require prior initialization in case of a virtual GICv3 or trigger | ||
170 | * initialization when using a virtual GICv2. | ||
171 | */ | ||
172 | for (i = 0; i < nr_spis; i++) { | ||
173 | struct vgic_irq *irq = &dist->spis[i]; | ||
174 | |||
175 | irq->intid = i + VGIC_NR_PRIVATE_IRQS; | ||
176 | INIT_LIST_HEAD(&irq->ap_list); | ||
177 | spin_lock_init(&irq->irq_lock); | ||
178 | irq->vcpu = NULL; | ||
179 | irq->target_vcpu = vcpu0; | ||
180 | if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) | ||
181 | irq->targets = 0; | ||
182 | else | ||
183 | irq->mpidr = 0; | ||
184 | } | ||
185 | return 0; | ||
186 | } | ||
187 | |||
188 | /** | ||
189 | * kvm_vgic_vcpu_init: initialize the vcpu data structures and | ||
190 | * enable the VCPU interface | ||
191 | * @vcpu: the VCPU which's VGIC should be initialized | ||
192 | */ | ||
193 | static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) | ||
194 | { | ||
195 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; | ||
196 | int i; | ||
197 | |||
198 | INIT_LIST_HEAD(&vgic_cpu->ap_list_head); | ||
199 | spin_lock_init(&vgic_cpu->ap_list_lock); | ||
200 | |||
201 | /* | ||
202 | * Enable and configure all SGIs to be edge-triggered and | ||
203 | * configure all PPIs as level-triggered. | ||
204 | */ | ||
205 | for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) { | ||
206 | struct vgic_irq *irq = &vgic_cpu->private_irqs[i]; | ||
207 | |||
208 | INIT_LIST_HEAD(&irq->ap_list); | ||
209 | spin_lock_init(&irq->irq_lock); | ||
210 | irq->intid = i; | ||
211 | irq->vcpu = NULL; | ||
212 | irq->target_vcpu = vcpu; | ||
213 | irq->targets = 1U << vcpu->vcpu_id; | ||
214 | if (vgic_irq_is_sgi(i)) { | ||
215 | /* SGIs */ | ||
216 | irq->enabled = 1; | ||
217 | irq->config = VGIC_CONFIG_EDGE; | ||
218 | } else { | ||
219 | /* PPIs */ | ||
220 | irq->config = VGIC_CONFIG_LEVEL; | ||
221 | } | ||
222 | } | ||
223 | if (kvm_vgic_global_state.type == VGIC_V2) | ||
224 | vgic_v2_enable(vcpu); | ||
225 | else | ||
226 | vgic_v3_enable(vcpu); | ||
227 | } | ||
228 | |||
229 | /* | ||
230 | * vgic_init: allocates and initializes dist and vcpu data structures | ||
231 | * depending on two dimensioning parameters: | ||
232 | * - the number of spis | ||
233 | * - the number of vcpus | ||
234 | * The function is generally called when nr_spis has been explicitly set | ||
235 | * by the guest through the KVM DEVICE API. If not nr_spis is set to 256. | ||
236 | * vgic_initialized() returns true when this function has succeeded. | ||
237 | * Must be called with kvm->lock held! | ||
238 | */ | ||
239 | int vgic_init(struct kvm *kvm) | ||
240 | { | ||
241 | struct vgic_dist *dist = &kvm->arch.vgic; | ||
242 | struct kvm_vcpu *vcpu; | ||
243 | int ret = 0, i; | ||
244 | |||
245 | if (vgic_initialized(kvm)) | ||
246 | return 0; | ||
247 | |||
248 | /* freeze the number of spis */ | ||
249 | if (!dist->nr_spis) | ||
250 | dist->nr_spis = VGIC_NR_IRQS_LEGACY - VGIC_NR_PRIVATE_IRQS; | ||
251 | |||
252 | ret = kvm_vgic_dist_init(kvm, dist->nr_spis); | ||
253 | if (ret) | ||
254 | goto out; | ||
255 | |||
256 | kvm_for_each_vcpu(i, vcpu, kvm) | ||
257 | kvm_vgic_vcpu_init(vcpu); | ||
258 | |||
259 | dist->initialized = true; | ||
260 | out: | ||
261 | return ret; | ||
262 | } | ||
263 | |||
264 | static void kvm_vgic_dist_destroy(struct kvm *kvm) | ||
265 | { | ||
266 | struct vgic_dist *dist = &kvm->arch.vgic; | ||
267 | |||
268 | mutex_lock(&kvm->lock); | ||
269 | |||
270 | dist->ready = false; | ||
271 | dist->initialized = false; | ||
272 | |||
273 | kfree(dist->spis); | ||
274 | kfree(dist->redist_iodevs); | ||
275 | dist->nr_spis = 0; | ||
276 | |||
277 | mutex_unlock(&kvm->lock); | ||
278 | } | ||
279 | |||
280 | void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) | ||
281 | { | ||
282 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; | ||
283 | |||
284 | INIT_LIST_HEAD(&vgic_cpu->ap_list_head); | ||
285 | } | ||
286 | |||
287 | void kvm_vgic_destroy(struct kvm *kvm) | ||
288 | { | ||
289 | struct kvm_vcpu *vcpu; | ||
290 | int i; | ||
291 | |||
292 | kvm_vgic_dist_destroy(kvm); | ||
293 | |||
294 | kvm_for_each_vcpu(i, vcpu, kvm) | ||
295 | kvm_vgic_vcpu_destroy(vcpu); | ||
296 | } | ||
297 | |||
298 | /** | ||
299 | * vgic_lazy_init: Lazy init is only allowed if the GIC exposed to the guest | ||
300 | * is a GICv2. A GICv3 must be explicitly initialized by the guest using the | ||
301 | * KVM_DEV_ARM_VGIC_GRP_CTRL KVM_DEVICE group. | ||
302 | * @kvm: kvm struct pointer | ||
303 | */ | ||
304 | int vgic_lazy_init(struct kvm *kvm) | ||
305 | { | ||
306 | int ret = 0; | ||
307 | |||
308 | if (unlikely(!vgic_initialized(kvm))) { | ||
309 | /* | ||
310 | * We only provide the automatic initialization of the VGIC | ||
311 | * for the legacy case of a GICv2. Any other type must | ||
312 | * be explicitly initialized once setup with the respective | ||
313 | * KVM device call. | ||
314 | */ | ||
315 | if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2) | ||
316 | return -EBUSY; | ||
317 | |||
318 | mutex_lock(&kvm->lock); | ||
319 | ret = vgic_init(kvm); | ||
320 | mutex_unlock(&kvm->lock); | ||
321 | } | ||
322 | |||
323 | return ret; | ||
324 | } | ||
325 | |||
326 | /* RESOURCE MAPPING */ | ||
327 | |||
328 | /** | ||
329 | * Map the MMIO regions depending on the VGIC model exposed to the guest | ||
330 | * called on the first VCPU run. | ||
331 | * Also map the virtual CPU interface into the VM. | ||
332 | * v2/v3 derivatives call vgic_init if not already done. | ||
333 | * vgic_ready() returns true if this function has succeeded. | ||
334 | * @kvm: kvm struct pointer | ||
335 | */ | ||
336 | int kvm_vgic_map_resources(struct kvm *kvm) | ||
337 | { | ||
338 | struct vgic_dist *dist = &kvm->arch.vgic; | ||
339 | int ret = 0; | ||
340 | |||
341 | mutex_lock(&kvm->lock); | ||
342 | if (!irqchip_in_kernel(kvm)) | ||
343 | goto out; | ||
344 | |||
345 | if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) | ||
346 | ret = vgic_v2_map_resources(kvm); | ||
347 | else | ||
348 | ret = vgic_v3_map_resources(kvm); | ||
349 | out: | ||
350 | mutex_unlock(&kvm->lock); | ||
351 | return ret; | ||
352 | } | ||
353 | |||
354 | /* GENERIC PROBE */ | ||
355 | |||
356 | static void vgic_init_maintenance_interrupt(void *info) | ||
357 | { | ||
358 | enable_percpu_irq(kvm_vgic_global_state.maint_irq, 0); | ||
359 | } | ||
360 | |||
361 | static int vgic_cpu_notify(struct notifier_block *self, | ||
362 | unsigned long action, void *cpu) | ||
363 | { | ||
364 | switch (action) { | ||
365 | case CPU_STARTING: | ||
366 | case CPU_STARTING_FROZEN: | ||
367 | vgic_init_maintenance_interrupt(NULL); | ||
368 | break; | ||
369 | case CPU_DYING: | ||
370 | case CPU_DYING_FROZEN: | ||
371 | disable_percpu_irq(kvm_vgic_global_state.maint_irq); | ||
372 | break; | ||
373 | } | ||
374 | |||
375 | return NOTIFY_OK; | ||
376 | } | ||
377 | |||
378 | static struct notifier_block vgic_cpu_nb = { | ||
379 | .notifier_call = vgic_cpu_notify, | ||
380 | }; | ||
381 | |||
382 | static irqreturn_t vgic_maintenance_handler(int irq, void *data) | ||
383 | { | ||
384 | /* | ||
385 | * We cannot rely on the vgic maintenance interrupt to be | ||
386 | * delivered synchronously. This means we can only use it to | ||
387 | * exit the VM, and we perform the handling of EOIed | ||
388 | * interrupts on the exit path (see vgic_process_maintenance). | ||
389 | */ | ||
390 | return IRQ_HANDLED; | ||
391 | } | ||
392 | |||
393 | /** | ||
394 | * kvm_vgic_hyp_init: populates the kvm_vgic_global_state variable | ||
395 | * according to the host GIC model. Accordingly calls either | ||
396 | * vgic_v2/v3_probe which registers the KVM_DEVICE that can be | ||
397 | * instantiated by a guest later on . | ||
398 | */ | ||
399 | int kvm_vgic_hyp_init(void) | ||
400 | { | ||
401 | const struct gic_kvm_info *gic_kvm_info; | ||
402 | int ret; | ||
403 | |||
404 | gic_kvm_info = gic_get_kvm_info(); | ||
405 | if (!gic_kvm_info) | ||
406 | return -ENODEV; | ||
407 | |||
408 | if (!gic_kvm_info->maint_irq) { | ||
409 | kvm_err("No vgic maintenance irq\n"); | ||
410 | return -ENXIO; | ||
411 | } | ||
412 | |||
413 | switch (gic_kvm_info->type) { | ||
414 | case GIC_V2: | ||
415 | ret = vgic_v2_probe(gic_kvm_info); | ||
416 | break; | ||
417 | case GIC_V3: | ||
418 | ret = vgic_v3_probe(gic_kvm_info); | ||
419 | break; | ||
420 | default: | ||
421 | ret = -ENODEV; | ||
422 | }; | ||
423 | |||
424 | if (ret) | ||
425 | return ret; | ||
426 | |||
427 | kvm_vgic_global_state.maint_irq = gic_kvm_info->maint_irq; | ||
428 | ret = request_percpu_irq(kvm_vgic_global_state.maint_irq, | ||
429 | vgic_maintenance_handler, | ||
430 | "vgic", kvm_get_running_vcpus()); | ||
431 | if (ret) { | ||
432 | kvm_err("Cannot register interrupt %d\n", | ||
433 | kvm_vgic_global_state.maint_irq); | ||
434 | return ret; | ||
435 | } | ||
436 | |||
437 | ret = __register_cpu_notifier(&vgic_cpu_nb); | ||
438 | if (ret) { | ||
439 | kvm_err("Cannot register vgic CPU notifier\n"); | ||
440 | goto out_free_irq; | ||
441 | } | ||
442 | |||
443 | on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); | ||
444 | |||
445 | kvm_info("vgic interrupt IRQ%d\n", kvm_vgic_global_state.maint_irq); | ||
446 | return 0; | ||
447 | |||
448 | out_free_irq: | ||
449 | free_percpu_irq(kvm_vgic_global_state.maint_irq, | ||
450 | kvm_get_running_vcpus()); | ||
451 | return ret; | ||
452 | } | ||
diff --git a/virt/kvm/arm/vgic/vgic-irqfd.c b/virt/kvm/arm/vgic/vgic-irqfd.c new file mode 100644 index 000000000000..c675513270bb --- /dev/null +++ b/virt/kvm/arm/vgic/vgic-irqfd.c | |||
@@ -0,0 +1,52 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2015, 2016 ARM Ltd. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/kvm.h> | ||
18 | #include <linux/kvm_host.h> | ||
19 | #include <trace/events/kvm.h> | ||
20 | |||
21 | int kvm_irq_map_gsi(struct kvm *kvm, | ||
22 | struct kvm_kernel_irq_routing_entry *entries, | ||
23 | int gsi) | ||
24 | { | ||
25 | return 0; | ||
26 | } | ||
27 | |||
28 | int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned int irqchip, | ||
29 | unsigned int pin) | ||
30 | { | ||
31 | return pin; | ||
32 | } | ||
33 | |||
34 | int kvm_set_irq(struct kvm *kvm, int irq_source_id, | ||
35 | u32 irq, int level, bool line_status) | ||
36 | { | ||
37 | unsigned int spi = irq + VGIC_NR_PRIVATE_IRQS; | ||
38 | |||
39 | trace_kvm_set_irq(irq, level, irq_source_id); | ||
40 | |||
41 | BUG_ON(!vgic_initialized(kvm)); | ||
42 | |||
43 | return kvm_vgic_inject_irq(kvm, 0, spi, level); | ||
44 | } | ||
45 | |||
46 | /* MSI not implemented yet */ | ||
47 | int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, | ||
48 | struct kvm *kvm, int irq_source_id, | ||
49 | int level, bool line_status) | ||
50 | { | ||
51 | return 0; | ||
52 | } | ||
diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c b/virt/kvm/arm/vgic/vgic-kvm-device.c new file mode 100644 index 000000000000..0130c4b147b7 --- /dev/null +++ b/virt/kvm/arm/vgic/vgic-kvm-device.c | |||
@@ -0,0 +1,431 @@ | |||
1 | /* | ||
2 | * VGIC: KVM DEVICE API | ||
3 | * | ||
4 | * Copyright (C) 2015 ARM Ltd. | ||
5 | * Author: Marc Zyngier <marc.zyngier@arm.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | */ | ||
16 | #include <linux/kvm_host.h> | ||
17 | #include <kvm/arm_vgic.h> | ||
18 | #include <linux/uaccess.h> | ||
19 | #include <asm/kvm_mmu.h> | ||
20 | #include "vgic.h" | ||
21 | |||
22 | /* common helpers */ | ||
23 | |||
24 | static int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr, | ||
25 | phys_addr_t addr, phys_addr_t alignment) | ||
26 | { | ||
27 | if (addr & ~KVM_PHYS_MASK) | ||
28 | return -E2BIG; | ||
29 | |||
30 | if (!IS_ALIGNED(addr, alignment)) | ||
31 | return -EINVAL; | ||
32 | |||
33 | if (!IS_VGIC_ADDR_UNDEF(*ioaddr)) | ||
34 | return -EEXIST; | ||
35 | |||
36 | return 0; | ||
37 | } | ||
38 | |||
39 | /** | ||
40 | * kvm_vgic_addr - set or get vgic VM base addresses | ||
41 | * @kvm: pointer to the vm struct | ||
42 | * @type: the VGIC addr type, one of KVM_VGIC_V[23]_ADDR_TYPE_XXX | ||
43 | * @addr: pointer to address value | ||
44 | * @write: if true set the address in the VM address space, if false read the | ||
45 | * address | ||
46 | * | ||
47 | * Set or get the vgic base addresses for the distributor and the virtual CPU | ||
48 | * interface in the VM physical address space. These addresses are properties | ||
49 | * of the emulated core/SoC and therefore user space initially knows this | ||
50 | * information. | ||
51 | * Check them for sanity (alignment, double assignment). We can't check for | ||
52 | * overlapping regions in case of a virtual GICv3 here, since we don't know | ||
53 | * the number of VCPUs yet, so we defer this check to map_resources(). | ||
54 | */ | ||
55 | int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) | ||
56 | { | ||
57 | int r = 0; | ||
58 | struct vgic_dist *vgic = &kvm->arch.vgic; | ||
59 | int type_needed; | ||
60 | phys_addr_t *addr_ptr, alignment; | ||
61 | |||
62 | mutex_lock(&kvm->lock); | ||
63 | switch (type) { | ||
64 | case KVM_VGIC_V2_ADDR_TYPE_DIST: | ||
65 | type_needed = KVM_DEV_TYPE_ARM_VGIC_V2; | ||
66 | addr_ptr = &vgic->vgic_dist_base; | ||
67 | alignment = SZ_4K; | ||
68 | break; | ||
69 | case KVM_VGIC_V2_ADDR_TYPE_CPU: | ||
70 | type_needed = KVM_DEV_TYPE_ARM_VGIC_V2; | ||
71 | addr_ptr = &vgic->vgic_cpu_base; | ||
72 | alignment = SZ_4K; | ||
73 | break; | ||
74 | #ifdef CONFIG_KVM_ARM_VGIC_V3 | ||
75 | case KVM_VGIC_V3_ADDR_TYPE_DIST: | ||
76 | type_needed = KVM_DEV_TYPE_ARM_VGIC_V3; | ||
77 | addr_ptr = &vgic->vgic_dist_base; | ||
78 | alignment = SZ_64K; | ||
79 | break; | ||
80 | case KVM_VGIC_V3_ADDR_TYPE_REDIST: | ||
81 | type_needed = KVM_DEV_TYPE_ARM_VGIC_V3; | ||
82 | addr_ptr = &vgic->vgic_redist_base; | ||
83 | alignment = SZ_64K; | ||
84 | break; | ||
85 | #endif | ||
86 | default: | ||
87 | r = -ENODEV; | ||
88 | goto out; | ||
89 | } | ||
90 | |||
91 | if (vgic->vgic_model != type_needed) { | ||
92 | r = -ENODEV; | ||
93 | goto out; | ||
94 | } | ||
95 | |||
96 | if (write) { | ||
97 | r = vgic_check_ioaddr(kvm, addr_ptr, *addr, alignment); | ||
98 | if (!r) | ||
99 | *addr_ptr = *addr; | ||
100 | } else { | ||
101 | *addr = *addr_ptr; | ||
102 | } | ||
103 | |||
104 | out: | ||
105 | mutex_unlock(&kvm->lock); | ||
106 | return r; | ||
107 | } | ||
108 | |||
109 | static int vgic_set_common_attr(struct kvm_device *dev, | ||
110 | struct kvm_device_attr *attr) | ||
111 | { | ||
112 | int r; | ||
113 | |||
114 | switch (attr->group) { | ||
115 | case KVM_DEV_ARM_VGIC_GRP_ADDR: { | ||
116 | u64 __user *uaddr = (u64 __user *)(long)attr->addr; | ||
117 | u64 addr; | ||
118 | unsigned long type = (unsigned long)attr->attr; | ||
119 | |||
120 | if (copy_from_user(&addr, uaddr, sizeof(addr))) | ||
121 | return -EFAULT; | ||
122 | |||
123 | r = kvm_vgic_addr(dev->kvm, type, &addr, true); | ||
124 | return (r == -ENODEV) ? -ENXIO : r; | ||
125 | } | ||
126 | case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { | ||
127 | u32 __user *uaddr = (u32 __user *)(long)attr->addr; | ||
128 | u32 val; | ||
129 | int ret = 0; | ||
130 | |||
131 | if (get_user(val, uaddr)) | ||
132 | return -EFAULT; | ||
133 | |||
134 | /* | ||
135 | * We require: | ||
136 | * - at least 32 SPIs on top of the 16 SGIs and 16 PPIs | ||
137 | * - at most 1024 interrupts | ||
138 | * - a multiple of 32 interrupts | ||
139 | */ | ||
140 | if (val < (VGIC_NR_PRIVATE_IRQS + 32) || | ||
141 | val > VGIC_MAX_RESERVED || | ||
142 | (val & 31)) | ||
143 | return -EINVAL; | ||
144 | |||
145 | mutex_lock(&dev->kvm->lock); | ||
146 | |||
147 | if (vgic_ready(dev->kvm) || dev->kvm->arch.vgic.nr_spis) | ||
148 | ret = -EBUSY; | ||
149 | else | ||
150 | dev->kvm->arch.vgic.nr_spis = | ||
151 | val - VGIC_NR_PRIVATE_IRQS; | ||
152 | |||
153 | mutex_unlock(&dev->kvm->lock); | ||
154 | |||
155 | return ret; | ||
156 | } | ||
157 | case KVM_DEV_ARM_VGIC_GRP_CTRL: { | ||
158 | switch (attr->attr) { | ||
159 | case KVM_DEV_ARM_VGIC_CTRL_INIT: | ||
160 | mutex_lock(&dev->kvm->lock); | ||
161 | r = vgic_init(dev->kvm); | ||
162 | mutex_unlock(&dev->kvm->lock); | ||
163 | return r; | ||
164 | } | ||
165 | break; | ||
166 | } | ||
167 | } | ||
168 | |||
169 | return -ENXIO; | ||
170 | } | ||
171 | |||
172 | static int vgic_get_common_attr(struct kvm_device *dev, | ||
173 | struct kvm_device_attr *attr) | ||
174 | { | ||
175 | int r = -ENXIO; | ||
176 | |||
177 | switch (attr->group) { | ||
178 | case KVM_DEV_ARM_VGIC_GRP_ADDR: { | ||
179 | u64 __user *uaddr = (u64 __user *)(long)attr->addr; | ||
180 | u64 addr; | ||
181 | unsigned long type = (unsigned long)attr->attr; | ||
182 | |||
183 | r = kvm_vgic_addr(dev->kvm, type, &addr, false); | ||
184 | if (r) | ||
185 | return (r == -ENODEV) ? -ENXIO : r; | ||
186 | |||
187 | if (copy_to_user(uaddr, &addr, sizeof(addr))) | ||
188 | return -EFAULT; | ||
189 | break; | ||
190 | } | ||
191 | case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { | ||
192 | u32 __user *uaddr = (u32 __user *)(long)attr->addr; | ||
193 | |||
194 | r = put_user(dev->kvm->arch.vgic.nr_spis + | ||
195 | VGIC_NR_PRIVATE_IRQS, uaddr); | ||
196 | break; | ||
197 | } | ||
198 | } | ||
199 | |||
200 | return r; | ||
201 | } | ||
202 | |||
203 | static int vgic_create(struct kvm_device *dev, u32 type) | ||
204 | { | ||
205 | return kvm_vgic_create(dev->kvm, type); | ||
206 | } | ||
207 | |||
208 | static void vgic_destroy(struct kvm_device *dev) | ||
209 | { | ||
210 | kfree(dev); | ||
211 | } | ||
212 | |||
213 | void kvm_register_vgic_device(unsigned long type) | ||
214 | { | ||
215 | switch (type) { | ||
216 | case KVM_DEV_TYPE_ARM_VGIC_V2: | ||
217 | kvm_register_device_ops(&kvm_arm_vgic_v2_ops, | ||
218 | KVM_DEV_TYPE_ARM_VGIC_V2); | ||
219 | break; | ||
220 | #ifdef CONFIG_KVM_ARM_VGIC_V3 | ||
221 | case KVM_DEV_TYPE_ARM_VGIC_V3: | ||
222 | kvm_register_device_ops(&kvm_arm_vgic_v3_ops, | ||
223 | KVM_DEV_TYPE_ARM_VGIC_V3); | ||
224 | break; | ||
225 | #endif | ||
226 | } | ||
227 | } | ||
228 | |||
229 | /** vgic_attr_regs_access: allows user space to read/write VGIC registers | ||
230 | * | ||
231 | * @dev: kvm device handle | ||
232 | * @attr: kvm device attribute | ||
233 | * @reg: address the value is read or written | ||
234 | * @is_write: write flag | ||
235 | * | ||
236 | */ | ||
237 | static int vgic_attr_regs_access(struct kvm_device *dev, | ||
238 | struct kvm_device_attr *attr, | ||
239 | u32 *reg, bool is_write) | ||
240 | { | ||
241 | gpa_t addr; | ||
242 | int cpuid, ret, c; | ||
243 | struct kvm_vcpu *vcpu, *tmp_vcpu; | ||
244 | int vcpu_lock_idx = -1; | ||
245 | |||
246 | cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >> | ||
247 | KVM_DEV_ARM_VGIC_CPUID_SHIFT; | ||
248 | vcpu = kvm_get_vcpu(dev->kvm, cpuid); | ||
249 | addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; | ||
250 | |||
251 | mutex_lock(&dev->kvm->lock); | ||
252 | |||
253 | ret = vgic_init(dev->kvm); | ||
254 | if (ret) | ||
255 | goto out; | ||
256 | |||
257 | if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) { | ||
258 | ret = -EINVAL; | ||
259 | goto out; | ||
260 | } | ||
261 | |||
262 | /* | ||
263 | * Any time a vcpu is run, vcpu_load is called which tries to grab the | ||
264 | * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure | ||
265 | * that no other VCPUs are run and fiddle with the vgic state while we | ||
266 | * access it. | ||
267 | */ | ||
268 | ret = -EBUSY; | ||
269 | kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) { | ||
270 | if (!mutex_trylock(&tmp_vcpu->mutex)) | ||
271 | goto out; | ||
272 | vcpu_lock_idx = c; | ||
273 | } | ||
274 | |||
275 | switch (attr->group) { | ||
276 | case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: | ||
277 | ret = vgic_v2_cpuif_uaccess(vcpu, is_write, addr, reg); | ||
278 | break; | ||
279 | case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: | ||
280 | ret = vgic_v2_dist_uaccess(vcpu, is_write, addr, reg); | ||
281 | break; | ||
282 | default: | ||
283 | ret = -EINVAL; | ||
284 | break; | ||
285 | } | ||
286 | |||
287 | out: | ||
288 | for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { | ||
289 | tmp_vcpu = kvm_get_vcpu(dev->kvm, vcpu_lock_idx); | ||
290 | mutex_unlock(&tmp_vcpu->mutex); | ||
291 | } | ||
292 | |||
293 | mutex_unlock(&dev->kvm->lock); | ||
294 | return ret; | ||
295 | } | ||
296 | |||
297 | /* V2 ops */ | ||
298 | |||
299 | static int vgic_v2_set_attr(struct kvm_device *dev, | ||
300 | struct kvm_device_attr *attr) | ||
301 | { | ||
302 | int ret; | ||
303 | |||
304 | ret = vgic_set_common_attr(dev, attr); | ||
305 | if (ret != -ENXIO) | ||
306 | return ret; | ||
307 | |||
308 | switch (attr->group) { | ||
309 | case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: | ||
310 | case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: { | ||
311 | u32 __user *uaddr = (u32 __user *)(long)attr->addr; | ||
312 | u32 reg; | ||
313 | |||
314 | if (get_user(reg, uaddr)) | ||
315 | return -EFAULT; | ||
316 | |||
317 | return vgic_attr_regs_access(dev, attr, ®, true); | ||
318 | } | ||
319 | } | ||
320 | |||
321 | return -ENXIO; | ||
322 | } | ||
323 | |||
324 | static int vgic_v2_get_attr(struct kvm_device *dev, | ||
325 | struct kvm_device_attr *attr) | ||
326 | { | ||
327 | int ret; | ||
328 | |||
329 | ret = vgic_get_common_attr(dev, attr); | ||
330 | if (ret != -ENXIO) | ||
331 | return ret; | ||
332 | |||
333 | switch (attr->group) { | ||
334 | case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: | ||
335 | case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: { | ||
336 | u32 __user *uaddr = (u32 __user *)(long)attr->addr; | ||
337 | u32 reg = 0; | ||
338 | |||
339 | ret = vgic_attr_regs_access(dev, attr, ®, false); | ||
340 | if (ret) | ||
341 | return ret; | ||
342 | return put_user(reg, uaddr); | ||
343 | } | ||
344 | } | ||
345 | |||
346 | return -ENXIO; | ||
347 | } | ||
348 | |||
349 | static int vgic_v2_has_attr(struct kvm_device *dev, | ||
350 | struct kvm_device_attr *attr) | ||
351 | { | ||
352 | switch (attr->group) { | ||
353 | case KVM_DEV_ARM_VGIC_GRP_ADDR: | ||
354 | switch (attr->attr) { | ||
355 | case KVM_VGIC_V2_ADDR_TYPE_DIST: | ||
356 | case KVM_VGIC_V2_ADDR_TYPE_CPU: | ||
357 | return 0; | ||
358 | } | ||
359 | break; | ||
360 | case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: | ||
361 | case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: | ||
362 | return vgic_v2_has_attr_regs(dev, attr); | ||
363 | case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: | ||
364 | return 0; | ||
365 | case KVM_DEV_ARM_VGIC_GRP_CTRL: | ||
366 | switch (attr->attr) { | ||
367 | case KVM_DEV_ARM_VGIC_CTRL_INIT: | ||
368 | return 0; | ||
369 | } | ||
370 | } | ||
371 | return -ENXIO; | ||
372 | } | ||
373 | |||
374 | struct kvm_device_ops kvm_arm_vgic_v2_ops = { | ||
375 | .name = "kvm-arm-vgic-v2", | ||
376 | .create = vgic_create, | ||
377 | .destroy = vgic_destroy, | ||
378 | .set_attr = vgic_v2_set_attr, | ||
379 | .get_attr = vgic_v2_get_attr, | ||
380 | .has_attr = vgic_v2_has_attr, | ||
381 | }; | ||
382 | |||
383 | /* V3 ops */ | ||
384 | |||
385 | #ifdef CONFIG_KVM_ARM_VGIC_V3 | ||
386 | |||
387 | static int vgic_v3_set_attr(struct kvm_device *dev, | ||
388 | struct kvm_device_attr *attr) | ||
389 | { | ||
390 | return vgic_set_common_attr(dev, attr); | ||
391 | } | ||
392 | |||
393 | static int vgic_v3_get_attr(struct kvm_device *dev, | ||
394 | struct kvm_device_attr *attr) | ||
395 | { | ||
396 | return vgic_get_common_attr(dev, attr); | ||
397 | } | ||
398 | |||
399 | static int vgic_v3_has_attr(struct kvm_device *dev, | ||
400 | struct kvm_device_attr *attr) | ||
401 | { | ||
402 | switch (attr->group) { | ||
403 | case KVM_DEV_ARM_VGIC_GRP_ADDR: | ||
404 | switch (attr->attr) { | ||
405 | case KVM_VGIC_V3_ADDR_TYPE_DIST: | ||
406 | case KVM_VGIC_V3_ADDR_TYPE_REDIST: | ||
407 | return 0; | ||
408 | } | ||
409 | break; | ||
410 | case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: | ||
411 | return 0; | ||
412 | case KVM_DEV_ARM_VGIC_GRP_CTRL: | ||
413 | switch (attr->attr) { | ||
414 | case KVM_DEV_ARM_VGIC_CTRL_INIT: | ||
415 | return 0; | ||
416 | } | ||
417 | } | ||
418 | return -ENXIO; | ||
419 | } | ||
420 | |||
421 | struct kvm_device_ops kvm_arm_vgic_v3_ops = { | ||
422 | .name = "kvm-arm-vgic-v3", | ||
423 | .create = vgic_create, | ||
424 | .destroy = vgic_destroy, | ||
425 | .set_attr = vgic_v3_set_attr, | ||
426 | .get_attr = vgic_v3_get_attr, | ||
427 | .has_attr = vgic_v3_has_attr, | ||
428 | }; | ||
429 | |||
430 | #endif /* CONFIG_KVM_ARM_VGIC_V3 */ | ||
431 | |||
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c new file mode 100644 index 000000000000..a21393637e4b --- /dev/null +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c | |||
@@ -0,0 +1,446 @@ | |||
1 | /* | ||
2 | * VGICv2 MMIO handling functions | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | */ | ||
13 | |||
14 | #include <linux/irqchip/arm-gic.h> | ||
15 | #include <linux/kvm.h> | ||
16 | #include <linux/kvm_host.h> | ||
17 | #include <kvm/iodev.h> | ||
18 | #include <kvm/arm_vgic.h> | ||
19 | |||
20 | #include "vgic.h" | ||
21 | #include "vgic-mmio.h" | ||
22 | |||
23 | static unsigned long vgic_mmio_read_v2_misc(struct kvm_vcpu *vcpu, | ||
24 | gpa_t addr, unsigned int len) | ||
25 | { | ||
26 | u32 value; | ||
27 | |||
28 | switch (addr & 0x0c) { | ||
29 | case GIC_DIST_CTRL: | ||
30 | value = vcpu->kvm->arch.vgic.enabled ? GICD_ENABLE : 0; | ||
31 | break; | ||
32 | case GIC_DIST_CTR: | ||
33 | value = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; | ||
34 | value = (value >> 5) - 1; | ||
35 | value |= (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5; | ||
36 | break; | ||
37 | case GIC_DIST_IIDR: | ||
38 | value = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0); | ||
39 | break; | ||
40 | default: | ||
41 | return 0; | ||
42 | } | ||
43 | |||
44 | return value; | ||
45 | } | ||
46 | |||
47 | static void vgic_mmio_write_v2_misc(struct kvm_vcpu *vcpu, | ||
48 | gpa_t addr, unsigned int len, | ||
49 | unsigned long val) | ||
50 | { | ||
51 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | ||
52 | bool was_enabled = dist->enabled; | ||
53 | |||
54 | switch (addr & 0x0c) { | ||
55 | case GIC_DIST_CTRL: | ||
56 | dist->enabled = val & GICD_ENABLE; | ||
57 | if (!was_enabled && dist->enabled) | ||
58 | vgic_kick_vcpus(vcpu->kvm); | ||
59 | break; | ||
60 | case GIC_DIST_CTR: | ||
61 | case GIC_DIST_IIDR: | ||
62 | /* Nothing to do */ | ||
63 | return; | ||
64 | } | ||
65 | } | ||
66 | |||
67 | static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu, | ||
68 | gpa_t addr, unsigned int len, | ||
69 | unsigned long val) | ||
70 | { | ||
71 | int nr_vcpus = atomic_read(&source_vcpu->kvm->online_vcpus); | ||
72 | int intid = val & 0xf; | ||
73 | int targets = (val >> 16) & 0xff; | ||
74 | int mode = (val >> 24) & 0x03; | ||
75 | int c; | ||
76 | struct kvm_vcpu *vcpu; | ||
77 | |||
78 | switch (mode) { | ||
79 | case 0x0: /* as specified by targets */ | ||
80 | break; | ||
81 | case 0x1: | ||
82 | targets = (1U << nr_vcpus) - 1; /* all, ... */ | ||
83 | targets &= ~(1U << source_vcpu->vcpu_id); /* but self */ | ||
84 | break; | ||
85 | case 0x2: /* this very vCPU only */ | ||
86 | targets = (1U << source_vcpu->vcpu_id); | ||
87 | break; | ||
88 | case 0x3: /* reserved */ | ||
89 | return; | ||
90 | } | ||
91 | |||
92 | kvm_for_each_vcpu(c, vcpu, source_vcpu->kvm) { | ||
93 | struct vgic_irq *irq; | ||
94 | |||
95 | if (!(targets & (1U << c))) | ||
96 | continue; | ||
97 | |||
98 | irq = vgic_get_irq(source_vcpu->kvm, vcpu, intid); | ||
99 | |||
100 | spin_lock(&irq->irq_lock); | ||
101 | irq->pending = true; | ||
102 | irq->source |= 1U << source_vcpu->vcpu_id; | ||
103 | |||
104 | vgic_queue_irq_unlock(source_vcpu->kvm, irq); | ||
105 | } | ||
106 | } | ||
107 | |||
108 | static unsigned long vgic_mmio_read_target(struct kvm_vcpu *vcpu, | ||
109 | gpa_t addr, unsigned int len) | ||
110 | { | ||
111 | u32 intid = VGIC_ADDR_TO_INTID(addr, 8); | ||
112 | int i; | ||
113 | u64 val = 0; | ||
114 | |||
115 | for (i = 0; i < len; i++) { | ||
116 | struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); | ||
117 | |||
118 | val |= (u64)irq->targets << (i * 8); | ||
119 | } | ||
120 | |||
121 | return val; | ||
122 | } | ||
123 | |||
124 | static void vgic_mmio_write_target(struct kvm_vcpu *vcpu, | ||
125 | gpa_t addr, unsigned int len, | ||
126 | unsigned long val) | ||
127 | { | ||
128 | u32 intid = VGIC_ADDR_TO_INTID(addr, 8); | ||
129 | int i; | ||
130 | |||
131 | /* GICD_ITARGETSR[0-7] are read-only */ | ||
132 | if (intid < VGIC_NR_PRIVATE_IRQS) | ||
133 | return; | ||
134 | |||
135 | for (i = 0; i < len; i++) { | ||
136 | struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid + i); | ||
137 | int target; | ||
138 | |||
139 | spin_lock(&irq->irq_lock); | ||
140 | |||
141 | irq->targets = (val >> (i * 8)) & 0xff; | ||
142 | target = irq->targets ? __ffs(irq->targets) : 0; | ||
143 | irq->target_vcpu = kvm_get_vcpu(vcpu->kvm, target); | ||
144 | |||
145 | spin_unlock(&irq->irq_lock); | ||
146 | } | ||
147 | } | ||
148 | |||
149 | static unsigned long vgic_mmio_read_sgipend(struct kvm_vcpu *vcpu, | ||
150 | gpa_t addr, unsigned int len) | ||
151 | { | ||
152 | u32 intid = addr & 0x0f; | ||
153 | int i; | ||
154 | u64 val = 0; | ||
155 | |||
156 | for (i = 0; i < len; i++) { | ||
157 | struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); | ||
158 | |||
159 | val |= (u64)irq->source << (i * 8); | ||
160 | } | ||
161 | return val; | ||
162 | } | ||
163 | |||
164 | static void vgic_mmio_write_sgipendc(struct kvm_vcpu *vcpu, | ||
165 | gpa_t addr, unsigned int len, | ||
166 | unsigned long val) | ||
167 | { | ||
168 | u32 intid = addr & 0x0f; | ||
169 | int i; | ||
170 | |||
171 | for (i = 0; i < len; i++) { | ||
172 | struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); | ||
173 | |||
174 | spin_lock(&irq->irq_lock); | ||
175 | |||
176 | irq->source &= ~((val >> (i * 8)) & 0xff); | ||
177 | if (!irq->source) | ||
178 | irq->pending = false; | ||
179 | |||
180 | spin_unlock(&irq->irq_lock); | ||
181 | } | ||
182 | } | ||
183 | |||
184 | static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu, | ||
185 | gpa_t addr, unsigned int len, | ||
186 | unsigned long val) | ||
187 | { | ||
188 | u32 intid = addr & 0x0f; | ||
189 | int i; | ||
190 | |||
191 | for (i = 0; i < len; i++) { | ||
192 | struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); | ||
193 | |||
194 | spin_lock(&irq->irq_lock); | ||
195 | |||
196 | irq->source |= (val >> (i * 8)) & 0xff; | ||
197 | |||
198 | if (irq->source) { | ||
199 | irq->pending = true; | ||
200 | vgic_queue_irq_unlock(vcpu->kvm, irq); | ||
201 | } else { | ||
202 | spin_unlock(&irq->irq_lock); | ||
203 | } | ||
204 | } | ||
205 | } | ||
206 | |||
207 | static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) | ||
208 | { | ||
209 | if (kvm_vgic_global_state.type == VGIC_V2) | ||
210 | vgic_v2_set_vmcr(vcpu, vmcr); | ||
211 | else | ||
212 | vgic_v3_set_vmcr(vcpu, vmcr); | ||
213 | } | ||
214 | |||
215 | static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) | ||
216 | { | ||
217 | if (kvm_vgic_global_state.type == VGIC_V2) | ||
218 | vgic_v2_get_vmcr(vcpu, vmcr); | ||
219 | else | ||
220 | vgic_v3_get_vmcr(vcpu, vmcr); | ||
221 | } | ||
222 | |||
223 | #define GICC_ARCH_VERSION_V2 0x2 | ||
224 | |||
225 | /* These are for userland accesses only, there is no guest-facing emulation. */ | ||
226 | static unsigned long vgic_mmio_read_vcpuif(struct kvm_vcpu *vcpu, | ||
227 | gpa_t addr, unsigned int len) | ||
228 | { | ||
229 | struct vgic_vmcr vmcr; | ||
230 | u32 val; | ||
231 | |||
232 | vgic_get_vmcr(vcpu, &vmcr); | ||
233 | |||
234 | switch (addr & 0xff) { | ||
235 | case GIC_CPU_CTRL: | ||
236 | val = vmcr.ctlr; | ||
237 | break; | ||
238 | case GIC_CPU_PRIMASK: | ||
239 | val = vmcr.pmr; | ||
240 | break; | ||
241 | case GIC_CPU_BINPOINT: | ||
242 | val = vmcr.bpr; | ||
243 | break; | ||
244 | case GIC_CPU_ALIAS_BINPOINT: | ||
245 | val = vmcr.abpr; | ||
246 | break; | ||
247 | case GIC_CPU_IDENT: | ||
248 | val = ((PRODUCT_ID_KVM << 20) | | ||
249 | (GICC_ARCH_VERSION_V2 << 16) | | ||
250 | IMPLEMENTER_ARM); | ||
251 | break; | ||
252 | default: | ||
253 | return 0; | ||
254 | } | ||
255 | |||
256 | return val; | ||
257 | } | ||
258 | |||
259 | static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu, | ||
260 | gpa_t addr, unsigned int len, | ||
261 | unsigned long val) | ||
262 | { | ||
263 | struct vgic_vmcr vmcr; | ||
264 | |||
265 | vgic_get_vmcr(vcpu, &vmcr); | ||
266 | |||
267 | switch (addr & 0xff) { | ||
268 | case GIC_CPU_CTRL: | ||
269 | vmcr.ctlr = val; | ||
270 | break; | ||
271 | case GIC_CPU_PRIMASK: | ||
272 | vmcr.pmr = val; | ||
273 | break; | ||
274 | case GIC_CPU_BINPOINT: | ||
275 | vmcr.bpr = val; | ||
276 | break; | ||
277 | case GIC_CPU_ALIAS_BINPOINT: | ||
278 | vmcr.abpr = val; | ||
279 | break; | ||
280 | } | ||
281 | |||
282 | vgic_set_vmcr(vcpu, &vmcr); | ||
283 | } | ||
284 | |||
285 | static const struct vgic_register_region vgic_v2_dist_registers[] = { | ||
286 | REGISTER_DESC_WITH_LENGTH(GIC_DIST_CTRL, | ||
287 | vgic_mmio_read_v2_misc, vgic_mmio_write_v2_misc, 12, | ||
288 | VGIC_ACCESS_32bit), | ||
289 | REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_IGROUP, | ||
290 | vgic_mmio_read_rao, vgic_mmio_write_wi, 1, | ||
291 | VGIC_ACCESS_32bit), | ||
292 | REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_SET, | ||
293 | vgic_mmio_read_enable, vgic_mmio_write_senable, 1, | ||
294 | VGIC_ACCESS_32bit), | ||
295 | REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_CLEAR, | ||
296 | vgic_mmio_read_enable, vgic_mmio_write_cenable, 1, | ||
297 | VGIC_ACCESS_32bit), | ||
298 | REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_SET, | ||
299 | vgic_mmio_read_pending, vgic_mmio_write_spending, 1, | ||
300 | VGIC_ACCESS_32bit), | ||
301 | REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_CLEAR, | ||
302 | vgic_mmio_read_pending, vgic_mmio_write_cpending, 1, | ||
303 | VGIC_ACCESS_32bit), | ||
304 | REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_SET, | ||
305 | vgic_mmio_read_active, vgic_mmio_write_sactive, 1, | ||
306 | VGIC_ACCESS_32bit), | ||
307 | REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_CLEAR, | ||
308 | vgic_mmio_read_active, vgic_mmio_write_cactive, 1, | ||
309 | VGIC_ACCESS_32bit), | ||
310 | REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PRI, | ||
311 | vgic_mmio_read_priority, vgic_mmio_write_priority, 8, | ||
312 | VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), | ||
313 | REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_TARGET, | ||
314 | vgic_mmio_read_target, vgic_mmio_write_target, 8, | ||
315 | VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), | ||
316 | REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_CONFIG, | ||
317 | vgic_mmio_read_config, vgic_mmio_write_config, 2, | ||
318 | VGIC_ACCESS_32bit), | ||
319 | REGISTER_DESC_WITH_LENGTH(GIC_DIST_SOFTINT, | ||
320 | vgic_mmio_read_raz, vgic_mmio_write_sgir, 4, | ||
321 | VGIC_ACCESS_32bit), | ||
322 | REGISTER_DESC_WITH_LENGTH(GIC_DIST_SGI_PENDING_CLEAR, | ||
323 | vgic_mmio_read_sgipend, vgic_mmio_write_sgipendc, 16, | ||
324 | VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), | ||
325 | REGISTER_DESC_WITH_LENGTH(GIC_DIST_SGI_PENDING_SET, | ||
326 | vgic_mmio_read_sgipend, vgic_mmio_write_sgipends, 16, | ||
327 | VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), | ||
328 | }; | ||
329 | |||
330 | static const struct vgic_register_region vgic_v2_cpu_registers[] = { | ||
331 | REGISTER_DESC_WITH_LENGTH(GIC_CPU_CTRL, | ||
332 | vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, | ||
333 | VGIC_ACCESS_32bit), | ||
334 | REGISTER_DESC_WITH_LENGTH(GIC_CPU_PRIMASK, | ||
335 | vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, | ||
336 | VGIC_ACCESS_32bit), | ||
337 | REGISTER_DESC_WITH_LENGTH(GIC_CPU_BINPOINT, | ||
338 | vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, | ||
339 | VGIC_ACCESS_32bit), | ||
340 | REGISTER_DESC_WITH_LENGTH(GIC_CPU_ALIAS_BINPOINT, | ||
341 | vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, | ||
342 | VGIC_ACCESS_32bit), | ||
343 | REGISTER_DESC_WITH_LENGTH(GIC_CPU_ACTIVEPRIO, | ||
344 | vgic_mmio_read_raz, vgic_mmio_write_wi, 16, | ||
345 | VGIC_ACCESS_32bit), | ||
346 | REGISTER_DESC_WITH_LENGTH(GIC_CPU_IDENT, | ||
347 | vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, | ||
348 | VGIC_ACCESS_32bit), | ||
349 | }; | ||
350 | |||
351 | unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev) | ||
352 | { | ||
353 | dev->regions = vgic_v2_dist_registers; | ||
354 | dev->nr_regions = ARRAY_SIZE(vgic_v2_dist_registers); | ||
355 | |||
356 | kvm_iodevice_init(&dev->dev, &kvm_io_gic_ops); | ||
357 | |||
358 | return SZ_4K; | ||
359 | } | ||
360 | |||
361 | int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr) | ||
362 | { | ||
363 | int nr_irqs = dev->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; | ||
364 | const struct vgic_register_region *regions; | ||
365 | gpa_t addr; | ||
366 | int nr_regions, i, len; | ||
367 | |||
368 | addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; | ||
369 | |||
370 | switch (attr->group) { | ||
371 | case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: | ||
372 | regions = vgic_v2_dist_registers; | ||
373 | nr_regions = ARRAY_SIZE(vgic_v2_dist_registers); | ||
374 | break; | ||
375 | case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: | ||
376 | regions = vgic_v2_cpu_registers; | ||
377 | nr_regions = ARRAY_SIZE(vgic_v2_cpu_registers); | ||
378 | break; | ||
379 | default: | ||
380 | return -ENXIO; | ||
381 | } | ||
382 | |||
383 | /* We only support aligned 32-bit accesses. */ | ||
384 | if (addr & 3) | ||
385 | return -ENXIO; | ||
386 | |||
387 | for (i = 0; i < nr_regions; i++) { | ||
388 | if (regions[i].bits_per_irq) | ||
389 | len = (regions[i].bits_per_irq * nr_irqs) / 8; | ||
390 | else | ||
391 | len = regions[i].len; | ||
392 | |||
393 | if (regions[i].reg_offset <= addr && | ||
394 | regions[i].reg_offset + len > addr) | ||
395 | return 0; | ||
396 | } | ||
397 | |||
398 | return -ENXIO; | ||
399 | } | ||
400 | |||
401 | /* | ||
402 | * When userland tries to access the VGIC register handlers, we need to | ||
403 | * create a usable struct vgic_io_device to be passed to the handlers and we | ||
404 | * have to set up a buffer similar to what would have happened if a guest MMIO | ||
405 | * access occurred, including doing endian conversions on BE systems. | ||
406 | */ | ||
407 | static int vgic_uaccess(struct kvm_vcpu *vcpu, struct vgic_io_device *dev, | ||
408 | bool is_write, int offset, u32 *val) | ||
409 | { | ||
410 | unsigned int len = 4; | ||
411 | u8 buf[4]; | ||
412 | int ret; | ||
413 | |||
414 | if (is_write) { | ||
415 | vgic_data_host_to_mmio_bus(buf, len, *val); | ||
416 | ret = kvm_io_gic_ops.write(vcpu, &dev->dev, offset, len, buf); | ||
417 | } else { | ||
418 | ret = kvm_io_gic_ops.read(vcpu, &dev->dev, offset, len, buf); | ||
419 | if (!ret) | ||
420 | *val = vgic_data_mmio_bus_to_host(buf, len); | ||
421 | } | ||
422 | |||
423 | return ret; | ||
424 | } | ||
425 | |||
426 | int vgic_v2_cpuif_uaccess(struct kvm_vcpu *vcpu, bool is_write, | ||
427 | int offset, u32 *val) | ||
428 | { | ||
429 | struct vgic_io_device dev = { | ||
430 | .regions = vgic_v2_cpu_registers, | ||
431 | .nr_regions = ARRAY_SIZE(vgic_v2_cpu_registers), | ||
432 | }; | ||
433 | |||
434 | return vgic_uaccess(vcpu, &dev, is_write, offset, val); | ||
435 | } | ||
436 | |||
437 | int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, | ||
438 | int offset, u32 *val) | ||
439 | { | ||
440 | struct vgic_io_device dev = { | ||
441 | .regions = vgic_v2_dist_registers, | ||
442 | .nr_regions = ARRAY_SIZE(vgic_v2_dist_registers), | ||
443 | }; | ||
444 | |||
445 | return vgic_uaccess(vcpu, &dev, is_write, offset, val); | ||
446 | } | ||
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c new file mode 100644 index 000000000000..a0c515a412a7 --- /dev/null +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c | |||
@@ -0,0 +1,455 @@ | |||
1 | /* | ||
2 | * VGICv3 MMIO handling functions | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | */ | ||
13 | |||
14 | #include <linux/irqchip/arm-gic-v3.h> | ||
15 | #include <linux/kvm.h> | ||
16 | #include <linux/kvm_host.h> | ||
17 | #include <kvm/iodev.h> | ||
18 | #include <kvm/arm_vgic.h> | ||
19 | |||
20 | #include <asm/kvm_emulate.h> | ||
21 | |||
22 | #include "vgic.h" | ||
23 | #include "vgic-mmio.h" | ||
24 | |||
25 | /* extract @num bytes at @offset bytes offset in data */ | ||
26 | static unsigned long extract_bytes(unsigned long data, unsigned int offset, | ||
27 | unsigned int num) | ||
28 | { | ||
29 | return (data >> (offset * 8)) & GENMASK_ULL(num * 8 - 1, 0); | ||
30 | } | ||
31 | |||
32 | static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu, | ||
33 | gpa_t addr, unsigned int len) | ||
34 | { | ||
35 | u32 value = 0; | ||
36 | |||
37 | switch (addr & 0x0c) { | ||
38 | case GICD_CTLR: | ||
39 | if (vcpu->kvm->arch.vgic.enabled) | ||
40 | value |= GICD_CTLR_ENABLE_SS_G1; | ||
41 | value |= GICD_CTLR_ARE_NS | GICD_CTLR_DS; | ||
42 | break; | ||
43 | case GICD_TYPER: | ||
44 | value = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; | ||
45 | value = (value >> 5) - 1; | ||
46 | value |= (INTERRUPT_ID_BITS_SPIS - 1) << 19; | ||
47 | break; | ||
48 | case GICD_IIDR: | ||
49 | value = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0); | ||
50 | break; | ||
51 | default: | ||
52 | return 0; | ||
53 | } | ||
54 | |||
55 | return value; | ||
56 | } | ||
57 | |||
58 | static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu, | ||
59 | gpa_t addr, unsigned int len, | ||
60 | unsigned long val) | ||
61 | { | ||
62 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | ||
63 | bool was_enabled = dist->enabled; | ||
64 | |||
65 | switch (addr & 0x0c) { | ||
66 | case GICD_CTLR: | ||
67 | dist->enabled = val & GICD_CTLR_ENABLE_SS_G1; | ||
68 | |||
69 | if (!was_enabled && dist->enabled) | ||
70 | vgic_kick_vcpus(vcpu->kvm); | ||
71 | break; | ||
72 | case GICD_TYPER: | ||
73 | case GICD_IIDR: | ||
74 | return; | ||
75 | } | ||
76 | } | ||
77 | |||
78 | static unsigned long vgic_mmio_read_irouter(struct kvm_vcpu *vcpu, | ||
79 | gpa_t addr, unsigned int len) | ||
80 | { | ||
81 | int intid = VGIC_ADDR_TO_INTID(addr, 64); | ||
82 | struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid); | ||
83 | |||
84 | if (!irq) | ||
85 | return 0; | ||
86 | |||
87 | /* The upper word is RAZ for us. */ | ||
88 | if (addr & 4) | ||
89 | return 0; | ||
90 | |||
91 | return extract_bytes(READ_ONCE(irq->mpidr), addr & 7, len); | ||
92 | } | ||
93 | |||
94 | static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu, | ||
95 | gpa_t addr, unsigned int len, | ||
96 | unsigned long val) | ||
97 | { | ||
98 | int intid = VGIC_ADDR_TO_INTID(addr, 64); | ||
99 | struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid); | ||
100 | |||
101 | if (!irq) | ||
102 | return; | ||
103 | |||
104 | /* The upper word is WI for us since we don't implement Aff3. */ | ||
105 | if (addr & 4) | ||
106 | return; | ||
107 | |||
108 | spin_lock(&irq->irq_lock); | ||
109 | |||
110 | /* We only care about and preserve Aff0, Aff1 and Aff2. */ | ||
111 | irq->mpidr = val & GENMASK(23, 0); | ||
112 | irq->target_vcpu = kvm_mpidr_to_vcpu(vcpu->kvm, irq->mpidr); | ||
113 | |||
114 | spin_unlock(&irq->irq_lock); | ||
115 | } | ||
116 | |||
117 | static unsigned long vgic_mmio_read_v3r_typer(struct kvm_vcpu *vcpu, | ||
118 | gpa_t addr, unsigned int len) | ||
119 | { | ||
120 | unsigned long mpidr = kvm_vcpu_get_mpidr_aff(vcpu); | ||
121 | int target_vcpu_id = vcpu->vcpu_id; | ||
122 | u64 value; | ||
123 | |||
124 | value = (mpidr & GENMASK(23, 0)) << 32; | ||
125 | value |= ((target_vcpu_id & 0xffff) << 8); | ||
126 | if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1) | ||
127 | value |= GICR_TYPER_LAST; | ||
128 | |||
129 | return extract_bytes(value, addr & 7, len); | ||
130 | } | ||
131 | |||
132 | static unsigned long vgic_mmio_read_v3r_iidr(struct kvm_vcpu *vcpu, | ||
133 | gpa_t addr, unsigned int len) | ||
134 | { | ||
135 | return (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0); | ||
136 | } | ||
137 | |||
138 | static unsigned long vgic_mmio_read_v3_idregs(struct kvm_vcpu *vcpu, | ||
139 | gpa_t addr, unsigned int len) | ||
140 | { | ||
141 | switch (addr & 0xffff) { | ||
142 | case GICD_PIDR2: | ||
143 | /* report a GICv3 compliant implementation */ | ||
144 | return 0x3b; | ||
145 | } | ||
146 | |||
147 | return 0; | ||
148 | } | ||
149 | |||
150 | /* | ||
151 | * The GICv3 per-IRQ registers are split to control PPIs and SGIs in the | ||
152 | * redistributors, while SPIs are covered by registers in the distributor | ||
153 | * block. Trying to set private IRQs in this block gets ignored. | ||
154 | * We take some special care here to fix the calculation of the register | ||
155 | * offset. | ||
156 | */ | ||
157 | #define REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(off, rd, wr, bpi, acc) \ | ||
158 | { \ | ||
159 | .reg_offset = off, \ | ||
160 | .bits_per_irq = bpi, \ | ||
161 | .len = (bpi * VGIC_NR_PRIVATE_IRQS) / 8, \ | ||
162 | .access_flags = acc, \ | ||
163 | .read = vgic_mmio_read_raz, \ | ||
164 | .write = vgic_mmio_write_wi, \ | ||
165 | }, { \ | ||
166 | .reg_offset = off + (bpi * VGIC_NR_PRIVATE_IRQS) / 8, \ | ||
167 | .bits_per_irq = bpi, \ | ||
168 | .len = (bpi * (1024 - VGIC_NR_PRIVATE_IRQS)) / 8, \ | ||
169 | .access_flags = acc, \ | ||
170 | .read = rd, \ | ||
171 | .write = wr, \ | ||
172 | } | ||
173 | |||
174 | static const struct vgic_register_region vgic_v3_dist_registers[] = { | ||
175 | REGISTER_DESC_WITH_LENGTH(GICD_CTLR, | ||
176 | vgic_mmio_read_v3_misc, vgic_mmio_write_v3_misc, 16, | ||
177 | VGIC_ACCESS_32bit), | ||
178 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGROUPR, | ||
179 | vgic_mmio_read_rao, vgic_mmio_write_wi, 1, | ||
180 | VGIC_ACCESS_32bit), | ||
181 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISENABLER, | ||
182 | vgic_mmio_read_enable, vgic_mmio_write_senable, 1, | ||
183 | VGIC_ACCESS_32bit), | ||
184 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICENABLER, | ||
185 | vgic_mmio_read_enable, vgic_mmio_write_cenable, 1, | ||
186 | VGIC_ACCESS_32bit), | ||
187 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISPENDR, | ||
188 | vgic_mmio_read_pending, vgic_mmio_write_spending, 1, | ||
189 | VGIC_ACCESS_32bit), | ||
190 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICPENDR, | ||
191 | vgic_mmio_read_pending, vgic_mmio_write_cpending, 1, | ||
192 | VGIC_ACCESS_32bit), | ||
193 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISACTIVER, | ||
194 | vgic_mmio_read_active, vgic_mmio_write_sactive, 1, | ||
195 | VGIC_ACCESS_32bit), | ||
196 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICACTIVER, | ||
197 | vgic_mmio_read_active, vgic_mmio_write_cactive, 1, | ||
198 | VGIC_ACCESS_32bit), | ||
199 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IPRIORITYR, | ||
200 | vgic_mmio_read_priority, vgic_mmio_write_priority, 8, | ||
201 | VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), | ||
202 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ITARGETSR, | ||
203 | vgic_mmio_read_raz, vgic_mmio_write_wi, 8, | ||
204 | VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), | ||
205 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICFGR, | ||
206 | vgic_mmio_read_config, vgic_mmio_write_config, 2, | ||
207 | VGIC_ACCESS_32bit), | ||
208 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGRPMODR, | ||
209 | vgic_mmio_read_raz, vgic_mmio_write_wi, 1, | ||
210 | VGIC_ACCESS_32bit), | ||
211 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IROUTER, | ||
212 | vgic_mmio_read_irouter, vgic_mmio_write_irouter, 64, | ||
213 | VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), | ||
214 | REGISTER_DESC_WITH_LENGTH(GICD_IDREGS, | ||
215 | vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48, | ||
216 | VGIC_ACCESS_32bit), | ||
217 | }; | ||
218 | |||
219 | static const struct vgic_register_region vgic_v3_rdbase_registers[] = { | ||
220 | REGISTER_DESC_WITH_LENGTH(GICR_CTLR, | ||
221 | vgic_mmio_read_raz, vgic_mmio_write_wi, 4, | ||
222 | VGIC_ACCESS_32bit), | ||
223 | REGISTER_DESC_WITH_LENGTH(GICR_IIDR, | ||
224 | vgic_mmio_read_v3r_iidr, vgic_mmio_write_wi, 4, | ||
225 | VGIC_ACCESS_32bit), | ||
226 | REGISTER_DESC_WITH_LENGTH(GICR_TYPER, | ||
227 | vgic_mmio_read_v3r_typer, vgic_mmio_write_wi, 8, | ||
228 | VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), | ||
229 | REGISTER_DESC_WITH_LENGTH(GICR_PROPBASER, | ||
230 | vgic_mmio_read_raz, vgic_mmio_write_wi, 8, | ||
231 | VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), | ||
232 | REGISTER_DESC_WITH_LENGTH(GICR_PENDBASER, | ||
233 | vgic_mmio_read_raz, vgic_mmio_write_wi, 8, | ||
234 | VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), | ||
235 | REGISTER_DESC_WITH_LENGTH(GICR_IDREGS, | ||
236 | vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48, | ||
237 | VGIC_ACCESS_32bit), | ||
238 | }; | ||
239 | |||
240 | static const struct vgic_register_region vgic_v3_sgibase_registers[] = { | ||
241 | REGISTER_DESC_WITH_LENGTH(GICR_IGROUPR0, | ||
242 | vgic_mmio_read_rao, vgic_mmio_write_wi, 4, | ||
243 | VGIC_ACCESS_32bit), | ||
244 | REGISTER_DESC_WITH_LENGTH(GICR_ISENABLER0, | ||
245 | vgic_mmio_read_enable, vgic_mmio_write_senable, 4, | ||
246 | VGIC_ACCESS_32bit), | ||
247 | REGISTER_DESC_WITH_LENGTH(GICR_ICENABLER0, | ||
248 | vgic_mmio_read_enable, vgic_mmio_write_cenable, 4, | ||
249 | VGIC_ACCESS_32bit), | ||
250 | REGISTER_DESC_WITH_LENGTH(GICR_ISPENDR0, | ||
251 | vgic_mmio_read_pending, vgic_mmio_write_spending, 4, | ||
252 | VGIC_ACCESS_32bit), | ||
253 | REGISTER_DESC_WITH_LENGTH(GICR_ICPENDR0, | ||
254 | vgic_mmio_read_pending, vgic_mmio_write_cpending, 4, | ||
255 | VGIC_ACCESS_32bit), | ||
256 | REGISTER_DESC_WITH_LENGTH(GICR_ISACTIVER0, | ||
257 | vgic_mmio_read_active, vgic_mmio_write_sactive, 4, | ||
258 | VGIC_ACCESS_32bit), | ||
259 | REGISTER_DESC_WITH_LENGTH(GICR_ICACTIVER0, | ||
260 | vgic_mmio_read_active, vgic_mmio_write_cactive, 4, | ||
261 | VGIC_ACCESS_32bit), | ||
262 | REGISTER_DESC_WITH_LENGTH(GICR_IPRIORITYR0, | ||
263 | vgic_mmio_read_priority, vgic_mmio_write_priority, 32, | ||
264 | VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), | ||
265 | REGISTER_DESC_WITH_LENGTH(GICR_ICFGR0, | ||
266 | vgic_mmio_read_config, vgic_mmio_write_config, 8, | ||
267 | VGIC_ACCESS_32bit), | ||
268 | REGISTER_DESC_WITH_LENGTH(GICR_IGRPMODR0, | ||
269 | vgic_mmio_read_raz, vgic_mmio_write_wi, 4, | ||
270 | VGIC_ACCESS_32bit), | ||
271 | REGISTER_DESC_WITH_LENGTH(GICR_NSACR, | ||
272 | vgic_mmio_read_raz, vgic_mmio_write_wi, 4, | ||
273 | VGIC_ACCESS_32bit), | ||
274 | }; | ||
275 | |||
276 | unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev) | ||
277 | { | ||
278 | dev->regions = vgic_v3_dist_registers; | ||
279 | dev->nr_regions = ARRAY_SIZE(vgic_v3_dist_registers); | ||
280 | |||
281 | kvm_iodevice_init(&dev->dev, &kvm_io_gic_ops); | ||
282 | |||
283 | return SZ_64K; | ||
284 | } | ||
285 | |||
286 | int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t redist_base_address) | ||
287 | { | ||
288 | int nr_vcpus = atomic_read(&kvm->online_vcpus); | ||
289 | struct kvm_vcpu *vcpu; | ||
290 | struct vgic_io_device *devices; | ||
291 | int c, ret = 0; | ||
292 | |||
293 | devices = kmalloc(sizeof(struct vgic_io_device) * nr_vcpus * 2, | ||
294 | GFP_KERNEL); | ||
295 | if (!devices) | ||
296 | return -ENOMEM; | ||
297 | |||
298 | kvm_for_each_vcpu(c, vcpu, kvm) { | ||
299 | gpa_t rd_base = redist_base_address + c * SZ_64K * 2; | ||
300 | gpa_t sgi_base = rd_base + SZ_64K; | ||
301 | struct vgic_io_device *rd_dev = &devices[c * 2]; | ||
302 | struct vgic_io_device *sgi_dev = &devices[c * 2 + 1]; | ||
303 | |||
304 | kvm_iodevice_init(&rd_dev->dev, &kvm_io_gic_ops); | ||
305 | rd_dev->base_addr = rd_base; | ||
306 | rd_dev->regions = vgic_v3_rdbase_registers; | ||
307 | rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers); | ||
308 | rd_dev->redist_vcpu = vcpu; | ||
309 | |||
310 | mutex_lock(&kvm->slots_lock); | ||
311 | ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, rd_base, | ||
312 | SZ_64K, &rd_dev->dev); | ||
313 | mutex_unlock(&kvm->slots_lock); | ||
314 | |||
315 | if (ret) | ||
316 | break; | ||
317 | |||
318 | kvm_iodevice_init(&sgi_dev->dev, &kvm_io_gic_ops); | ||
319 | sgi_dev->base_addr = sgi_base; | ||
320 | sgi_dev->regions = vgic_v3_sgibase_registers; | ||
321 | sgi_dev->nr_regions = ARRAY_SIZE(vgic_v3_sgibase_registers); | ||
322 | sgi_dev->redist_vcpu = vcpu; | ||
323 | |||
324 | mutex_lock(&kvm->slots_lock); | ||
325 | ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, sgi_base, | ||
326 | SZ_64K, &sgi_dev->dev); | ||
327 | mutex_unlock(&kvm->slots_lock); | ||
328 | if (ret) { | ||
329 | kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, | ||
330 | &rd_dev->dev); | ||
331 | break; | ||
332 | } | ||
333 | } | ||
334 | |||
335 | if (ret) { | ||
336 | /* The current c failed, so we start with the previous one. */ | ||
337 | for (c--; c >= 0; c--) { | ||
338 | kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, | ||
339 | &devices[c * 2].dev); | ||
340 | kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, | ||
341 | &devices[c * 2 + 1].dev); | ||
342 | } | ||
343 | kfree(devices); | ||
344 | } else { | ||
345 | kvm->arch.vgic.redist_iodevs = devices; | ||
346 | } | ||
347 | |||
348 | return ret; | ||
349 | } | ||
350 | |||
351 | /* | ||
352 | * Compare a given affinity (level 1-3 and a level 0 mask, from the SGI | ||
353 | * generation register ICC_SGI1R_EL1) with a given VCPU. | ||
354 | * If the VCPU's MPIDR matches, return the level0 affinity, otherwise | ||
355 | * return -1. | ||
356 | */ | ||
357 | static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu) | ||
358 | { | ||
359 | unsigned long affinity; | ||
360 | int level0; | ||
361 | |||
362 | /* | ||
363 | * Split the current VCPU's MPIDR into affinity level 0 and the | ||
364 | * rest as this is what we have to compare against. | ||
365 | */ | ||
366 | affinity = kvm_vcpu_get_mpidr_aff(vcpu); | ||
367 | level0 = MPIDR_AFFINITY_LEVEL(affinity, 0); | ||
368 | affinity &= ~MPIDR_LEVEL_MASK; | ||
369 | |||
370 | /* bail out if the upper three levels don't match */ | ||
371 | if (sgi_aff != affinity) | ||
372 | return -1; | ||
373 | |||
374 | /* Is this VCPU's bit set in the mask ? */ | ||
375 | if (!(sgi_cpu_mask & BIT(level0))) | ||
376 | return -1; | ||
377 | |||
378 | return level0; | ||
379 | } | ||
380 | |||
381 | /* | ||
382 | * The ICC_SGI* registers encode the affinity differently from the MPIDR, | ||
383 | * so provide a wrapper to use the existing defines to isolate a certain | ||
384 | * affinity level. | ||
385 | */ | ||
386 | #define SGI_AFFINITY_LEVEL(reg, level) \ | ||
387 | ((((reg) & ICC_SGI1R_AFFINITY_## level ##_MASK) \ | ||
388 | >> ICC_SGI1R_AFFINITY_## level ##_SHIFT) << MPIDR_LEVEL_SHIFT(level)) | ||
389 | |||
390 | /** | ||
391 | * vgic_v3_dispatch_sgi - handle SGI requests from VCPUs | ||
392 | * @vcpu: The VCPU requesting a SGI | ||
393 | * @reg: The value written into the ICC_SGI1R_EL1 register by that VCPU | ||
394 | * | ||
395 | * With GICv3 (and ARE=1) CPUs trigger SGIs by writing to a system register. | ||
396 | * This will trap in sys_regs.c and call this function. | ||
397 | * This ICC_SGI1R_EL1 register contains the upper three affinity levels of the | ||
398 | * target processors as well as a bitmask of 16 Aff0 CPUs. | ||
399 | * If the interrupt routing mode bit is not set, we iterate over all VCPUs to | ||
400 | * check for matching ones. If this bit is set, we signal all, but not the | ||
401 | * calling VCPU. | ||
402 | */ | ||
403 | void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg) | ||
404 | { | ||
405 | struct kvm *kvm = vcpu->kvm; | ||
406 | struct kvm_vcpu *c_vcpu; | ||
407 | u16 target_cpus; | ||
408 | u64 mpidr; | ||
409 | int sgi, c; | ||
410 | int vcpu_id = vcpu->vcpu_id; | ||
411 | bool broadcast; | ||
412 | |||
413 | sgi = (reg & ICC_SGI1R_SGI_ID_MASK) >> ICC_SGI1R_SGI_ID_SHIFT; | ||
414 | broadcast = reg & BIT(ICC_SGI1R_IRQ_ROUTING_MODE_BIT); | ||
415 | target_cpus = (reg & ICC_SGI1R_TARGET_LIST_MASK) >> ICC_SGI1R_TARGET_LIST_SHIFT; | ||
416 | mpidr = SGI_AFFINITY_LEVEL(reg, 3); | ||
417 | mpidr |= SGI_AFFINITY_LEVEL(reg, 2); | ||
418 | mpidr |= SGI_AFFINITY_LEVEL(reg, 1); | ||
419 | |||
420 | /* | ||
421 | * We iterate over all VCPUs to find the MPIDRs matching the request. | ||
422 | * If we have handled one CPU, we clear its bit to detect early | ||
423 | * if we are already finished. This avoids iterating through all | ||
424 | * VCPUs when most of the times we just signal a single VCPU. | ||
425 | */ | ||
426 | kvm_for_each_vcpu(c, c_vcpu, kvm) { | ||
427 | struct vgic_irq *irq; | ||
428 | |||
429 | /* Exit early if we have dealt with all requested CPUs */ | ||
430 | if (!broadcast && target_cpus == 0) | ||
431 | break; | ||
432 | |||
433 | /* Don't signal the calling VCPU */ | ||
434 | if (broadcast && c == vcpu_id) | ||
435 | continue; | ||
436 | |||
437 | if (!broadcast) { | ||
438 | int level0; | ||
439 | |||
440 | level0 = match_mpidr(mpidr, target_cpus, c_vcpu); | ||
441 | if (level0 == -1) | ||
442 | continue; | ||
443 | |||
444 | /* remove this matching VCPU from the mask */ | ||
445 | target_cpus &= ~BIT(level0); | ||
446 | } | ||
447 | |||
448 | irq = vgic_get_irq(vcpu->kvm, c_vcpu, sgi); | ||
449 | |||
450 | spin_lock(&irq->irq_lock); | ||
451 | irq->pending = true; | ||
452 | |||
453 | vgic_queue_irq_unlock(vcpu->kvm, irq); | ||
454 | } | ||
455 | } | ||
diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c new file mode 100644 index 000000000000..059595ec3da0 --- /dev/null +++ b/virt/kvm/arm/vgic/vgic-mmio.c | |||
@@ -0,0 +1,526 @@ | |||
1 | /* | ||
2 | * VGIC MMIO handling functions | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | */ | ||
13 | |||
14 | #include <linux/bitops.h> | ||
15 | #include <linux/bsearch.h> | ||
16 | #include <linux/kvm.h> | ||
17 | #include <linux/kvm_host.h> | ||
18 | #include <kvm/iodev.h> | ||
19 | #include <kvm/arm_vgic.h> | ||
20 | |||
21 | #include "vgic.h" | ||
22 | #include "vgic-mmio.h" | ||
23 | |||
24 | unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu, | ||
25 | gpa_t addr, unsigned int len) | ||
26 | { | ||
27 | return 0; | ||
28 | } | ||
29 | |||
30 | unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu, | ||
31 | gpa_t addr, unsigned int len) | ||
32 | { | ||
33 | return -1UL; | ||
34 | } | ||
35 | |||
36 | void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr, | ||
37 | unsigned int len, unsigned long val) | ||
38 | { | ||
39 | /* Ignore */ | ||
40 | } | ||
41 | |||
42 | /* | ||
43 | * Read accesses to both GICD_ICENABLER and GICD_ISENABLER return the value | ||
44 | * of the enabled bit, so there is only one function for both here. | ||
45 | */ | ||
46 | unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu, | ||
47 | gpa_t addr, unsigned int len) | ||
48 | { | ||
49 | u32 intid = VGIC_ADDR_TO_INTID(addr, 1); | ||
50 | u32 value = 0; | ||
51 | int i; | ||
52 | |||
53 | /* Loop over all IRQs affected by this read */ | ||
54 | for (i = 0; i < len * 8; i++) { | ||
55 | struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); | ||
56 | |||
57 | if (irq->enabled) | ||
58 | value |= (1U << i); | ||
59 | } | ||
60 | |||
61 | return value; | ||
62 | } | ||
63 | |||
64 | void vgic_mmio_write_senable(struct kvm_vcpu *vcpu, | ||
65 | gpa_t addr, unsigned int len, | ||
66 | unsigned long val) | ||
67 | { | ||
68 | u32 intid = VGIC_ADDR_TO_INTID(addr, 1); | ||
69 | int i; | ||
70 | |||
71 | for_each_set_bit(i, &val, len * 8) { | ||
72 | struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); | ||
73 | |||
74 | spin_lock(&irq->irq_lock); | ||
75 | irq->enabled = true; | ||
76 | vgic_queue_irq_unlock(vcpu->kvm, irq); | ||
77 | } | ||
78 | } | ||
79 | |||
80 | void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu, | ||
81 | gpa_t addr, unsigned int len, | ||
82 | unsigned long val) | ||
83 | { | ||
84 | u32 intid = VGIC_ADDR_TO_INTID(addr, 1); | ||
85 | int i; | ||
86 | |||
87 | for_each_set_bit(i, &val, len * 8) { | ||
88 | struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); | ||
89 | |||
90 | spin_lock(&irq->irq_lock); | ||
91 | |||
92 | irq->enabled = false; | ||
93 | |||
94 | spin_unlock(&irq->irq_lock); | ||
95 | } | ||
96 | } | ||
97 | |||
98 | unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, | ||
99 | gpa_t addr, unsigned int len) | ||
100 | { | ||
101 | u32 intid = VGIC_ADDR_TO_INTID(addr, 1); | ||
102 | u32 value = 0; | ||
103 | int i; | ||
104 | |||
105 | /* Loop over all IRQs affected by this read */ | ||
106 | for (i = 0; i < len * 8; i++) { | ||
107 | struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); | ||
108 | |||
109 | if (irq->pending) | ||
110 | value |= (1U << i); | ||
111 | } | ||
112 | |||
113 | return value; | ||
114 | } | ||
115 | |||
116 | void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, | ||
117 | gpa_t addr, unsigned int len, | ||
118 | unsigned long val) | ||
119 | { | ||
120 | u32 intid = VGIC_ADDR_TO_INTID(addr, 1); | ||
121 | int i; | ||
122 | |||
123 | for_each_set_bit(i, &val, len * 8) { | ||
124 | struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); | ||
125 | |||
126 | spin_lock(&irq->irq_lock); | ||
127 | irq->pending = true; | ||
128 | if (irq->config == VGIC_CONFIG_LEVEL) | ||
129 | irq->soft_pending = true; | ||
130 | |||
131 | vgic_queue_irq_unlock(vcpu->kvm, irq); | ||
132 | } | ||
133 | } | ||
134 | |||
135 | void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, | ||
136 | gpa_t addr, unsigned int len, | ||
137 | unsigned long val) | ||
138 | { | ||
139 | u32 intid = VGIC_ADDR_TO_INTID(addr, 1); | ||
140 | int i; | ||
141 | |||
142 | for_each_set_bit(i, &val, len * 8) { | ||
143 | struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); | ||
144 | |||
145 | spin_lock(&irq->irq_lock); | ||
146 | |||
147 | if (irq->config == VGIC_CONFIG_LEVEL) { | ||
148 | irq->soft_pending = false; | ||
149 | irq->pending = irq->line_level; | ||
150 | } else { | ||
151 | irq->pending = false; | ||
152 | } | ||
153 | |||
154 | spin_unlock(&irq->irq_lock); | ||
155 | } | ||
156 | } | ||
157 | |||
158 | unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu, | ||
159 | gpa_t addr, unsigned int len) | ||
160 | { | ||
161 | u32 intid = VGIC_ADDR_TO_INTID(addr, 1); | ||
162 | u32 value = 0; | ||
163 | int i; | ||
164 | |||
165 | /* Loop over all IRQs affected by this read */ | ||
166 | for (i = 0; i < len * 8; i++) { | ||
167 | struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); | ||
168 | |||
169 | if (irq->active) | ||
170 | value |= (1U << i); | ||
171 | } | ||
172 | |||
173 | return value; | ||
174 | } | ||
175 | |||
176 | static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, | ||
177 | bool new_active_state) | ||
178 | { | ||
179 | spin_lock(&irq->irq_lock); | ||
180 | /* | ||
181 | * If this virtual IRQ was written into a list register, we | ||
182 | * have to make sure the CPU that runs the VCPU thread has | ||
183 | * synced back LR state to the struct vgic_irq. We can only | ||
184 | * know this for sure, when either this irq is not assigned to | ||
185 | * anyone's AP list anymore, or the VCPU thread is not | ||
186 | * running on any CPUs. | ||
187 | * | ||
188 | * In the opposite case, we know the VCPU thread may be on its | ||
189 | * way back from the guest and still has to sync back this | ||
190 | * IRQ, so we release and re-acquire the spin_lock to let the | ||
191 | * other thread sync back the IRQ. | ||
192 | */ | ||
193 | while (irq->vcpu && /* IRQ may have state in an LR somewhere */ | ||
194 | irq->vcpu->cpu != -1) { /* VCPU thread is running */ | ||
195 | BUG_ON(irq->intid < VGIC_NR_PRIVATE_IRQS); | ||
196 | cond_resched_lock(&irq->irq_lock); | ||
197 | } | ||
198 | |||
199 | irq->active = new_active_state; | ||
200 | if (new_active_state) | ||
201 | vgic_queue_irq_unlock(vcpu->kvm, irq); | ||
202 | else | ||
203 | spin_unlock(&irq->irq_lock); | ||
204 | } | ||
205 | |||
206 | /* | ||
207 | * If we are fiddling with an IRQ's active state, we have to make sure the IRQ | ||
208 | * is not queued on some running VCPU's LRs, because then the change to the | ||
209 | * active state can be overwritten when the VCPU's state is synced coming back | ||
210 | * from the guest. | ||
211 | * | ||
212 | * For shared interrupts, we have to stop all the VCPUs because interrupts can | ||
213 | * be migrated while we don't hold the IRQ locks and we don't want to be | ||
214 | * chasing moving targets. | ||
215 | * | ||
216 | * For private interrupts, we only have to make sure the single and only VCPU | ||
217 | * that can potentially queue the IRQ is stopped. | ||
218 | */ | ||
219 | static void vgic_change_active_prepare(struct kvm_vcpu *vcpu, u32 intid) | ||
220 | { | ||
221 | if (intid < VGIC_NR_PRIVATE_IRQS) | ||
222 | kvm_arm_halt_vcpu(vcpu); | ||
223 | else | ||
224 | kvm_arm_halt_guest(vcpu->kvm); | ||
225 | } | ||
226 | |||
227 | /* See vgic_change_active_prepare */ | ||
228 | static void vgic_change_active_finish(struct kvm_vcpu *vcpu, u32 intid) | ||
229 | { | ||
230 | if (intid < VGIC_NR_PRIVATE_IRQS) | ||
231 | kvm_arm_resume_vcpu(vcpu); | ||
232 | else | ||
233 | kvm_arm_resume_guest(vcpu->kvm); | ||
234 | } | ||
235 | |||
236 | void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, | ||
237 | gpa_t addr, unsigned int len, | ||
238 | unsigned long val) | ||
239 | { | ||
240 | u32 intid = VGIC_ADDR_TO_INTID(addr, 1); | ||
241 | int i; | ||
242 | |||
243 | vgic_change_active_prepare(vcpu, intid); | ||
244 | for_each_set_bit(i, &val, len * 8) { | ||
245 | struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); | ||
246 | vgic_mmio_change_active(vcpu, irq, false); | ||
247 | } | ||
248 | vgic_change_active_finish(vcpu, intid); | ||
249 | } | ||
250 | |||
251 | void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, | ||
252 | gpa_t addr, unsigned int len, | ||
253 | unsigned long val) | ||
254 | { | ||
255 | u32 intid = VGIC_ADDR_TO_INTID(addr, 1); | ||
256 | int i; | ||
257 | |||
258 | vgic_change_active_prepare(vcpu, intid); | ||
259 | for_each_set_bit(i, &val, len * 8) { | ||
260 | struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); | ||
261 | vgic_mmio_change_active(vcpu, irq, true); | ||
262 | } | ||
263 | vgic_change_active_finish(vcpu, intid); | ||
264 | } | ||
265 | |||
266 | unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu, | ||
267 | gpa_t addr, unsigned int len) | ||
268 | { | ||
269 | u32 intid = VGIC_ADDR_TO_INTID(addr, 8); | ||
270 | int i; | ||
271 | u64 val = 0; | ||
272 | |||
273 | for (i = 0; i < len; i++) { | ||
274 | struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); | ||
275 | |||
276 | val |= (u64)irq->priority << (i * 8); | ||
277 | } | ||
278 | |||
279 | return val; | ||
280 | } | ||
281 | |||
282 | /* | ||
283 | * We currently don't handle changing the priority of an interrupt that | ||
284 | * is already pending on a VCPU. If there is a need for this, we would | ||
285 | * need to make this VCPU exit and re-evaluate the priorities, potentially | ||
286 | * leading to this interrupt getting presented now to the guest (if it has | ||
287 | * been masked by the priority mask before). | ||
288 | */ | ||
289 | void vgic_mmio_write_priority(struct kvm_vcpu *vcpu, | ||
290 | gpa_t addr, unsigned int len, | ||
291 | unsigned long val) | ||
292 | { | ||
293 | u32 intid = VGIC_ADDR_TO_INTID(addr, 8); | ||
294 | int i; | ||
295 | |||
296 | for (i = 0; i < len; i++) { | ||
297 | struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); | ||
298 | |||
299 | spin_lock(&irq->irq_lock); | ||
300 | /* Narrow the priority range to what we actually support */ | ||
301 | irq->priority = (val >> (i * 8)) & GENMASK(7, 8 - VGIC_PRI_BITS); | ||
302 | spin_unlock(&irq->irq_lock); | ||
303 | } | ||
304 | } | ||
305 | |||
306 | unsigned long vgic_mmio_read_config(struct kvm_vcpu *vcpu, | ||
307 | gpa_t addr, unsigned int len) | ||
308 | { | ||
309 | u32 intid = VGIC_ADDR_TO_INTID(addr, 2); | ||
310 | u32 value = 0; | ||
311 | int i; | ||
312 | |||
313 | for (i = 0; i < len * 4; i++) { | ||
314 | struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); | ||
315 | |||
316 | if (irq->config == VGIC_CONFIG_EDGE) | ||
317 | value |= (2U << (i * 2)); | ||
318 | } | ||
319 | |||
320 | return value; | ||
321 | } | ||
322 | |||
323 | void vgic_mmio_write_config(struct kvm_vcpu *vcpu, | ||
324 | gpa_t addr, unsigned int len, | ||
325 | unsigned long val) | ||
326 | { | ||
327 | u32 intid = VGIC_ADDR_TO_INTID(addr, 2); | ||
328 | int i; | ||
329 | |||
330 | for (i = 0; i < len * 4; i++) { | ||
331 | struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); | ||
332 | |||
333 | /* | ||
334 | * The configuration cannot be changed for SGIs in general, | ||
335 | * for PPIs this is IMPLEMENTATION DEFINED. The arch timer | ||
336 | * code relies on PPIs being level triggered, so we also | ||
337 | * make them read-only here. | ||
338 | */ | ||
339 | if (intid + i < VGIC_NR_PRIVATE_IRQS) | ||
340 | continue; | ||
341 | |||
342 | spin_lock(&irq->irq_lock); | ||
343 | if (test_bit(i * 2 + 1, &val)) { | ||
344 | irq->config = VGIC_CONFIG_EDGE; | ||
345 | } else { | ||
346 | irq->config = VGIC_CONFIG_LEVEL; | ||
347 | irq->pending = irq->line_level | irq->soft_pending; | ||
348 | } | ||
349 | spin_unlock(&irq->irq_lock); | ||
350 | } | ||
351 | } | ||
352 | |||
353 | static int match_region(const void *key, const void *elt) | ||
354 | { | ||
355 | const unsigned int offset = (unsigned long)key; | ||
356 | const struct vgic_register_region *region = elt; | ||
357 | |||
358 | if (offset < region->reg_offset) | ||
359 | return -1; | ||
360 | |||
361 | if (offset >= region->reg_offset + region->len) | ||
362 | return 1; | ||
363 | |||
364 | return 0; | ||
365 | } | ||
366 | |||
367 | /* Find the proper register handler entry given a certain address offset. */ | ||
368 | static const struct vgic_register_region * | ||
369 | vgic_find_mmio_region(const struct vgic_register_region *region, int nr_regions, | ||
370 | unsigned int offset) | ||
371 | { | ||
372 | return bsearch((void *)(uintptr_t)offset, region, nr_regions, | ||
373 | sizeof(region[0]), match_region); | ||
374 | } | ||
375 | |||
376 | /* | ||
377 | * kvm_mmio_read_buf() returns a value in a format where it can be converted | ||
378 | * to a byte array and be directly observed as the guest wanted it to appear | ||
379 | * in memory if it had done the store itself, which is LE for the GIC, as the | ||
380 | * guest knows the GIC is always LE. | ||
381 | * | ||
382 | * We convert this value to the CPUs native format to deal with it as a data | ||
383 | * value. | ||
384 | */ | ||
385 | unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len) | ||
386 | { | ||
387 | unsigned long data = kvm_mmio_read_buf(val, len); | ||
388 | |||
389 | switch (len) { | ||
390 | case 1: | ||
391 | return data; | ||
392 | case 2: | ||
393 | return le16_to_cpu(data); | ||
394 | case 4: | ||
395 | return le32_to_cpu(data); | ||
396 | default: | ||
397 | return le64_to_cpu(data); | ||
398 | } | ||
399 | } | ||
400 | |||
401 | /* | ||
402 | * kvm_mmio_write_buf() expects a value in a format such that if converted to | ||
403 | * a byte array it is observed as the guest would see it if it could perform | ||
404 | * the load directly. Since the GIC is LE, and the guest knows this, the | ||
405 | * guest expects a value in little endian format. | ||
406 | * | ||
407 | * We convert the data value from the CPUs native format to LE so that the | ||
408 | * value is returned in the proper format. | ||
409 | */ | ||
410 | void vgic_data_host_to_mmio_bus(void *buf, unsigned int len, | ||
411 | unsigned long data) | ||
412 | { | ||
413 | switch (len) { | ||
414 | case 1: | ||
415 | break; | ||
416 | case 2: | ||
417 | data = cpu_to_le16(data); | ||
418 | break; | ||
419 | case 4: | ||
420 | data = cpu_to_le32(data); | ||
421 | break; | ||
422 | default: | ||
423 | data = cpu_to_le64(data); | ||
424 | } | ||
425 | |||
426 | kvm_mmio_write_buf(buf, len, data); | ||
427 | } | ||
428 | |||
429 | static | ||
430 | struct vgic_io_device *kvm_to_vgic_iodev(const struct kvm_io_device *dev) | ||
431 | { | ||
432 | return container_of(dev, struct vgic_io_device, dev); | ||
433 | } | ||
434 | |||
435 | static bool check_region(const struct vgic_register_region *region, | ||
436 | gpa_t addr, int len) | ||
437 | { | ||
438 | if ((region->access_flags & VGIC_ACCESS_8bit) && len == 1) | ||
439 | return true; | ||
440 | if ((region->access_flags & VGIC_ACCESS_32bit) && | ||
441 | len == sizeof(u32) && !(addr & 3)) | ||
442 | return true; | ||
443 | if ((region->access_flags & VGIC_ACCESS_64bit) && | ||
444 | len == sizeof(u64) && !(addr & 7)) | ||
445 | return true; | ||
446 | |||
447 | return false; | ||
448 | } | ||
449 | |||
450 | static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, | ||
451 | gpa_t addr, int len, void *val) | ||
452 | { | ||
453 | struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev); | ||
454 | const struct vgic_register_region *region; | ||
455 | struct kvm_vcpu *r_vcpu; | ||
456 | unsigned long data; | ||
457 | |||
458 | region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, | ||
459 | addr - iodev->base_addr); | ||
460 | if (!region || !check_region(region, addr, len)) { | ||
461 | memset(val, 0, len); | ||
462 | return 0; | ||
463 | } | ||
464 | |||
465 | r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu; | ||
466 | data = region->read(r_vcpu, addr, len); | ||
467 | vgic_data_host_to_mmio_bus(val, len, data); | ||
468 | return 0; | ||
469 | } | ||
470 | |||
471 | static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, | ||
472 | gpa_t addr, int len, const void *val) | ||
473 | { | ||
474 | struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev); | ||
475 | const struct vgic_register_region *region; | ||
476 | struct kvm_vcpu *r_vcpu; | ||
477 | unsigned long data = vgic_data_mmio_bus_to_host(val, len); | ||
478 | |||
479 | region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, | ||
480 | addr - iodev->base_addr); | ||
481 | if (!region) | ||
482 | return 0; | ||
483 | |||
484 | if (!check_region(region, addr, len)) | ||
485 | return 0; | ||
486 | |||
487 | r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu; | ||
488 | region->write(r_vcpu, addr, len, data); | ||
489 | return 0; | ||
490 | } | ||
491 | |||
492 | struct kvm_io_device_ops kvm_io_gic_ops = { | ||
493 | .read = dispatch_mmio_read, | ||
494 | .write = dispatch_mmio_write, | ||
495 | }; | ||
496 | |||
497 | int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, | ||
498 | enum vgic_type type) | ||
499 | { | ||
500 | struct vgic_io_device *io_device = &kvm->arch.vgic.dist_iodev; | ||
501 | int ret = 0; | ||
502 | unsigned int len; | ||
503 | |||
504 | switch (type) { | ||
505 | case VGIC_V2: | ||
506 | len = vgic_v2_init_dist_iodev(io_device); | ||
507 | break; | ||
508 | #ifdef CONFIG_KVM_ARM_VGIC_V3 | ||
509 | case VGIC_V3: | ||
510 | len = vgic_v3_init_dist_iodev(io_device); | ||
511 | break; | ||
512 | #endif | ||
513 | default: | ||
514 | BUG_ON(1); | ||
515 | } | ||
516 | |||
517 | io_device->base_addr = dist_base_address; | ||
518 | io_device->redist_vcpu = NULL; | ||
519 | |||
520 | mutex_lock(&kvm->slots_lock); | ||
521 | ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, dist_base_address, | ||
522 | len, &io_device->dev); | ||
523 | mutex_unlock(&kvm->slots_lock); | ||
524 | |||
525 | return ret; | ||
526 | } | ||
diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h new file mode 100644 index 000000000000..850901482aec --- /dev/null +++ b/virt/kvm/arm/vgic/vgic-mmio.h | |||
@@ -0,0 +1,150 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2015, 2016 ARM Ltd. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | #ifndef __KVM_ARM_VGIC_MMIO_H__ | ||
17 | #define __KVM_ARM_VGIC_MMIO_H__ | ||
18 | |||
19 | struct vgic_register_region { | ||
20 | unsigned int reg_offset; | ||
21 | unsigned int len; | ||
22 | unsigned int bits_per_irq; | ||
23 | unsigned int access_flags; | ||
24 | unsigned long (*read)(struct kvm_vcpu *vcpu, gpa_t addr, | ||
25 | unsigned int len); | ||
26 | void (*write)(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, | ||
27 | unsigned long val); | ||
28 | }; | ||
29 | |||
30 | extern struct kvm_io_device_ops kvm_io_gic_ops; | ||
31 | |||
32 | #define VGIC_ACCESS_8bit 1 | ||
33 | #define VGIC_ACCESS_32bit 2 | ||
34 | #define VGIC_ACCESS_64bit 4 | ||
35 | |||
36 | /* | ||
37 | * Generate a mask that covers the number of bytes required to address | ||
38 | * up to 1024 interrupts, each represented by <bits> bits. This assumes | ||
39 | * that <bits> is a power of two. | ||
40 | */ | ||
41 | #define VGIC_ADDR_IRQ_MASK(bits) (((bits) * 1024 / 8) - 1) | ||
42 | |||
43 | /* | ||
44 | * (addr & mask) gives us the byte offset for the INT ID, so we want to | ||
45 | * divide this with 'bytes per irq' to get the INT ID, which is given | ||
46 | * by '(bits) / 8'. But we do this with fixed-point-arithmetic and | ||
47 | * take advantage of the fact that division by a fraction equals | ||
48 | * multiplication with the inverted fraction, and scale up both the | ||
49 | * numerator and denominator with 8 to support at most 64 bits per IRQ: | ||
50 | */ | ||
51 | #define VGIC_ADDR_TO_INTID(addr, bits) (((addr) & VGIC_ADDR_IRQ_MASK(bits)) * \ | ||
52 | 64 / (bits) / 8) | ||
53 | |||
54 | /* | ||
55 | * Some VGIC registers store per-IRQ information, with a different number | ||
56 | * of bits per IRQ. For those registers this macro is used. | ||
57 | * The _WITH_LENGTH version instantiates registers with a fixed length | ||
58 | * and is mutually exclusive with the _PER_IRQ version. | ||
59 | */ | ||
60 | #define REGISTER_DESC_WITH_BITS_PER_IRQ(off, rd, wr, bpi, acc) \ | ||
61 | { \ | ||
62 | .reg_offset = off, \ | ||
63 | .bits_per_irq = bpi, \ | ||
64 | .len = bpi * 1024 / 8, \ | ||
65 | .access_flags = acc, \ | ||
66 | .read = rd, \ | ||
67 | .write = wr, \ | ||
68 | } | ||
69 | |||
70 | #define REGISTER_DESC_WITH_LENGTH(off, rd, wr, length, acc) \ | ||
71 | { \ | ||
72 | .reg_offset = off, \ | ||
73 | .bits_per_irq = 0, \ | ||
74 | .len = length, \ | ||
75 | .access_flags = acc, \ | ||
76 | .read = rd, \ | ||
77 | .write = wr, \ | ||
78 | } | ||
79 | |||
80 | int kvm_vgic_register_mmio_region(struct kvm *kvm, struct kvm_vcpu *vcpu, | ||
81 | struct vgic_register_region *reg_desc, | ||
82 | struct vgic_io_device *region, | ||
83 | int nr_irqs, bool offset_private); | ||
84 | |||
85 | unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len); | ||
86 | |||
87 | void vgic_data_host_to_mmio_bus(void *buf, unsigned int len, | ||
88 | unsigned long data); | ||
89 | |||
90 | unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu, | ||
91 | gpa_t addr, unsigned int len); | ||
92 | |||
93 | unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu, | ||
94 | gpa_t addr, unsigned int len); | ||
95 | |||
96 | void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr, | ||
97 | unsigned int len, unsigned long val); | ||
98 | |||
99 | unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu, | ||
100 | gpa_t addr, unsigned int len); | ||
101 | |||
102 | void vgic_mmio_write_senable(struct kvm_vcpu *vcpu, | ||
103 | gpa_t addr, unsigned int len, | ||
104 | unsigned long val); | ||
105 | |||
106 | void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu, | ||
107 | gpa_t addr, unsigned int len, | ||
108 | unsigned long val); | ||
109 | |||
110 | unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, | ||
111 | gpa_t addr, unsigned int len); | ||
112 | |||
113 | void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, | ||
114 | gpa_t addr, unsigned int len, | ||
115 | unsigned long val); | ||
116 | |||
117 | void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, | ||
118 | gpa_t addr, unsigned int len, | ||
119 | unsigned long val); | ||
120 | |||
121 | unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu, | ||
122 | gpa_t addr, unsigned int len); | ||
123 | |||
124 | void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, | ||
125 | gpa_t addr, unsigned int len, | ||
126 | unsigned long val); | ||
127 | |||
128 | void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, | ||
129 | gpa_t addr, unsigned int len, | ||
130 | unsigned long val); | ||
131 | |||
132 | unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu, | ||
133 | gpa_t addr, unsigned int len); | ||
134 | |||
135 | void vgic_mmio_write_priority(struct kvm_vcpu *vcpu, | ||
136 | gpa_t addr, unsigned int len, | ||
137 | unsigned long val); | ||
138 | |||
139 | unsigned long vgic_mmio_read_config(struct kvm_vcpu *vcpu, | ||
140 | gpa_t addr, unsigned int len); | ||
141 | |||
142 | void vgic_mmio_write_config(struct kvm_vcpu *vcpu, | ||
143 | gpa_t addr, unsigned int len, | ||
144 | unsigned long val); | ||
145 | |||
146 | unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev); | ||
147 | |||
148 | unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev); | ||
149 | |||
150 | #endif | ||
diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c new file mode 100644 index 000000000000..8ad42c217770 --- /dev/null +++ b/virt/kvm/arm/vgic/vgic-v2.c | |||
@@ -0,0 +1,352 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2015, 2016 ARM Ltd. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/irqchip/arm-gic.h> | ||
18 | #include <linux/kvm.h> | ||
19 | #include <linux/kvm_host.h> | ||
20 | #include <kvm/arm_vgic.h> | ||
21 | #include <asm/kvm_mmu.h> | ||
22 | |||
23 | #include "vgic.h" | ||
24 | |||
25 | /* | ||
26 | * Call this function to convert a u64 value to an unsigned long * bitmask | ||
27 | * in a way that works on both 32-bit and 64-bit LE and BE platforms. | ||
28 | * | ||
29 | * Warning: Calling this function may modify *val. | ||
30 | */ | ||
31 | static unsigned long *u64_to_bitmask(u64 *val) | ||
32 | { | ||
33 | #if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 32 | ||
34 | *val = (*val >> 32) | (*val << 32); | ||
35 | #endif | ||
36 | return (unsigned long *)val; | ||
37 | } | ||
38 | |||
39 | void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu) | ||
40 | { | ||
41 | struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2; | ||
42 | |||
43 | if (cpuif->vgic_misr & GICH_MISR_EOI) { | ||
44 | u64 eisr = cpuif->vgic_eisr; | ||
45 | unsigned long *eisr_bmap = u64_to_bitmask(&eisr); | ||
46 | int lr; | ||
47 | |||
48 | for_each_set_bit(lr, eisr_bmap, kvm_vgic_global_state.nr_lr) { | ||
49 | u32 intid = cpuif->vgic_lr[lr] & GICH_LR_VIRTUALID; | ||
50 | |||
51 | WARN_ON(cpuif->vgic_lr[lr] & GICH_LR_STATE); | ||
52 | |||
53 | kvm_notify_acked_irq(vcpu->kvm, 0, | ||
54 | intid - VGIC_NR_PRIVATE_IRQS); | ||
55 | } | ||
56 | } | ||
57 | |||
58 | /* check and disable underflow maintenance IRQ */ | ||
59 | cpuif->vgic_hcr &= ~GICH_HCR_UIE; | ||
60 | |||
61 | /* | ||
62 | * In the next iterations of the vcpu loop, if we sync the | ||
63 | * vgic state after flushing it, but before entering the guest | ||
64 | * (this happens for pending signals and vmid rollovers), then | ||
65 | * make sure we don't pick up any old maintenance interrupts | ||
66 | * here. | ||
67 | */ | ||
68 | cpuif->vgic_eisr = 0; | ||
69 | } | ||
70 | |||
71 | void vgic_v2_set_underflow(struct kvm_vcpu *vcpu) | ||
72 | { | ||
73 | struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2; | ||
74 | |||
75 | cpuif->vgic_hcr |= GICH_HCR_UIE; | ||
76 | } | ||
77 | |||
78 | /* | ||
79 | * transfer the content of the LRs back into the corresponding ap_list: | ||
80 | * - active bit is transferred as is | ||
81 | * - pending bit is | ||
82 | * - transferred as is in case of edge sensitive IRQs | ||
83 | * - set to the line-level (resample time) for level sensitive IRQs | ||
84 | */ | ||
85 | void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) | ||
86 | { | ||
87 | struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2; | ||
88 | int lr; | ||
89 | |||
90 | for (lr = 0; lr < vcpu->arch.vgic_cpu.used_lrs; lr++) { | ||
91 | u32 val = cpuif->vgic_lr[lr]; | ||
92 | u32 intid = val & GICH_LR_VIRTUALID; | ||
93 | struct vgic_irq *irq; | ||
94 | |||
95 | irq = vgic_get_irq(vcpu->kvm, vcpu, intid); | ||
96 | |||
97 | spin_lock(&irq->irq_lock); | ||
98 | |||
99 | /* Always preserve the active bit */ | ||
100 | irq->active = !!(val & GICH_LR_ACTIVE_BIT); | ||
101 | |||
102 | /* Edge is the only case where we preserve the pending bit */ | ||
103 | if (irq->config == VGIC_CONFIG_EDGE && | ||
104 | (val & GICH_LR_PENDING_BIT)) { | ||
105 | irq->pending = true; | ||
106 | |||
107 | if (vgic_irq_is_sgi(intid)) { | ||
108 | u32 cpuid = val & GICH_LR_PHYSID_CPUID; | ||
109 | |||
110 | cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT; | ||
111 | irq->source |= (1 << cpuid); | ||
112 | } | ||
113 | } | ||
114 | |||
115 | /* Clear soft pending state when level IRQs have been acked */ | ||
116 | if (irq->config == VGIC_CONFIG_LEVEL && | ||
117 | !(val & GICH_LR_PENDING_BIT)) { | ||
118 | irq->soft_pending = false; | ||
119 | irq->pending = irq->line_level; | ||
120 | } | ||
121 | |||
122 | spin_unlock(&irq->irq_lock); | ||
123 | } | ||
124 | } | ||
125 | |||
126 | /* | ||
127 | * Populates the particular LR with the state of a given IRQ: | ||
128 | * - for an edge sensitive IRQ the pending state is cleared in struct vgic_irq | ||
129 | * - for a level sensitive IRQ the pending state value is unchanged; | ||
130 | * it is dictated directly by the input level | ||
131 | * | ||
132 | * If @irq describes an SGI with multiple sources, we choose the | ||
133 | * lowest-numbered source VCPU and clear that bit in the source bitmap. | ||
134 | * | ||
135 | * The irq_lock must be held by the caller. | ||
136 | */ | ||
137 | void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) | ||
138 | { | ||
139 | u32 val = irq->intid; | ||
140 | |||
141 | if (irq->pending) { | ||
142 | val |= GICH_LR_PENDING_BIT; | ||
143 | |||
144 | if (irq->config == VGIC_CONFIG_EDGE) | ||
145 | irq->pending = false; | ||
146 | |||
147 | if (vgic_irq_is_sgi(irq->intid)) { | ||
148 | u32 src = ffs(irq->source); | ||
149 | |||
150 | BUG_ON(!src); | ||
151 | val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT; | ||
152 | irq->source &= ~(1 << (src - 1)); | ||
153 | if (irq->source) | ||
154 | irq->pending = true; | ||
155 | } | ||
156 | } | ||
157 | |||
158 | if (irq->active) | ||
159 | val |= GICH_LR_ACTIVE_BIT; | ||
160 | |||
161 | if (irq->hw) { | ||
162 | val |= GICH_LR_HW; | ||
163 | val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT; | ||
164 | } else { | ||
165 | if (irq->config == VGIC_CONFIG_LEVEL) | ||
166 | val |= GICH_LR_EOI; | ||
167 | } | ||
168 | |||
169 | /* The GICv2 LR only holds five bits of priority. */ | ||
170 | val |= (irq->priority >> 3) << GICH_LR_PRIORITY_SHIFT; | ||
171 | |||
172 | vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = val; | ||
173 | } | ||
174 | |||
175 | void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr) | ||
176 | { | ||
177 | vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = 0; | ||
178 | } | ||
179 | |||
180 | void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) | ||
181 | { | ||
182 | u32 vmcr; | ||
183 | |||
184 | vmcr = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK; | ||
185 | vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) & | ||
186 | GICH_VMCR_ALIAS_BINPOINT_MASK; | ||
187 | vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & | ||
188 | GICH_VMCR_BINPOINT_MASK; | ||
189 | vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) & | ||
190 | GICH_VMCR_PRIMASK_MASK; | ||
191 | |||
192 | vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr; | ||
193 | } | ||
194 | |||
195 | void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) | ||
196 | { | ||
197 | u32 vmcr = vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr; | ||
198 | |||
199 | vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >> | ||
200 | GICH_VMCR_CTRL_SHIFT; | ||
201 | vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >> | ||
202 | GICH_VMCR_ALIAS_BINPOINT_SHIFT; | ||
203 | vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >> | ||
204 | GICH_VMCR_BINPOINT_SHIFT; | ||
205 | vmcrp->pmr = (vmcr & GICH_VMCR_PRIMASK_MASK) >> | ||
206 | GICH_VMCR_PRIMASK_SHIFT; | ||
207 | } | ||
208 | |||
209 | void vgic_v2_enable(struct kvm_vcpu *vcpu) | ||
210 | { | ||
211 | /* | ||
212 | * By forcing VMCR to zero, the GIC will restore the binary | ||
213 | * points to their reset values. Anything else resets to zero | ||
214 | * anyway. | ||
215 | */ | ||
216 | vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; | ||
217 | vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr = ~0; | ||
218 | |||
219 | /* Get the show on the road... */ | ||
220 | vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; | ||
221 | } | ||
222 | |||
223 | /* check for overlapping regions and for regions crossing the end of memory */ | ||
224 | static bool vgic_v2_check_base(gpa_t dist_base, gpa_t cpu_base) | ||
225 | { | ||
226 | if (dist_base + KVM_VGIC_V2_DIST_SIZE < dist_base) | ||
227 | return false; | ||
228 | if (cpu_base + KVM_VGIC_V2_CPU_SIZE < cpu_base) | ||
229 | return false; | ||
230 | |||
231 | if (dist_base + KVM_VGIC_V2_DIST_SIZE <= cpu_base) | ||
232 | return true; | ||
233 | if (cpu_base + KVM_VGIC_V2_CPU_SIZE <= dist_base) | ||
234 | return true; | ||
235 | |||
236 | return false; | ||
237 | } | ||
238 | |||
239 | int vgic_v2_map_resources(struct kvm *kvm) | ||
240 | { | ||
241 | struct vgic_dist *dist = &kvm->arch.vgic; | ||
242 | int ret = 0; | ||
243 | |||
244 | if (vgic_ready(kvm)) | ||
245 | goto out; | ||
246 | |||
247 | if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) || | ||
248 | IS_VGIC_ADDR_UNDEF(dist->vgic_cpu_base)) { | ||
249 | kvm_err("Need to set vgic cpu and dist addresses first\n"); | ||
250 | ret = -ENXIO; | ||
251 | goto out; | ||
252 | } | ||
253 | |||
254 | if (!vgic_v2_check_base(dist->vgic_dist_base, dist->vgic_cpu_base)) { | ||
255 | kvm_err("VGIC CPU and dist frames overlap\n"); | ||
256 | ret = -EINVAL; | ||
257 | goto out; | ||
258 | } | ||
259 | |||
260 | /* | ||
261 | * Initialize the vgic if this hasn't already been done on demand by | ||
262 | * accessing the vgic state from userspace. | ||
263 | */ | ||
264 | ret = vgic_init(kvm); | ||
265 | if (ret) { | ||
266 | kvm_err("Unable to initialize VGIC dynamic data structures\n"); | ||
267 | goto out; | ||
268 | } | ||
269 | |||
270 | ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V2); | ||
271 | if (ret) { | ||
272 | kvm_err("Unable to register VGIC MMIO regions\n"); | ||
273 | goto out; | ||
274 | } | ||
275 | |||
276 | ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base, | ||
277 | kvm_vgic_global_state.vcpu_base, | ||
278 | KVM_VGIC_V2_CPU_SIZE, true); | ||
279 | if (ret) { | ||
280 | kvm_err("Unable to remap VGIC CPU to VCPU\n"); | ||
281 | goto out; | ||
282 | } | ||
283 | |||
284 | dist->ready = true; | ||
285 | |||
286 | out: | ||
287 | if (ret) | ||
288 | kvm_vgic_destroy(kvm); | ||
289 | return ret; | ||
290 | } | ||
291 | |||
292 | /** | ||
293 | * vgic_v2_probe - probe for a GICv2 compatible interrupt controller in DT | ||
294 | * @node: pointer to the DT node | ||
295 | * | ||
296 | * Returns 0 if a GICv2 has been found, returns an error code otherwise | ||
297 | */ | ||
298 | int vgic_v2_probe(const struct gic_kvm_info *info) | ||
299 | { | ||
300 | int ret; | ||
301 | u32 vtr; | ||
302 | |||
303 | if (!info->vctrl.start) { | ||
304 | kvm_err("GICH not present in the firmware table\n"); | ||
305 | return -ENXIO; | ||
306 | } | ||
307 | |||
308 | if (!PAGE_ALIGNED(info->vcpu.start)) { | ||
309 | kvm_err("GICV physical address 0x%llx not page aligned\n", | ||
310 | (unsigned long long)info->vcpu.start); | ||
311 | return -ENXIO; | ||
312 | } | ||
313 | |||
314 | if (!PAGE_ALIGNED(resource_size(&info->vcpu))) { | ||
315 | kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n", | ||
316 | (unsigned long long)resource_size(&info->vcpu), | ||
317 | PAGE_SIZE); | ||
318 | return -ENXIO; | ||
319 | } | ||
320 | |||
321 | kvm_vgic_global_state.vctrl_base = ioremap(info->vctrl.start, | ||
322 | resource_size(&info->vctrl)); | ||
323 | if (!kvm_vgic_global_state.vctrl_base) { | ||
324 | kvm_err("Cannot ioremap GICH\n"); | ||
325 | return -ENOMEM; | ||
326 | } | ||
327 | |||
328 | vtr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VTR); | ||
329 | kvm_vgic_global_state.nr_lr = (vtr & 0x3f) + 1; | ||
330 | |||
331 | ret = create_hyp_io_mappings(kvm_vgic_global_state.vctrl_base, | ||
332 | kvm_vgic_global_state.vctrl_base + | ||
333 | resource_size(&info->vctrl), | ||
334 | info->vctrl.start); | ||
335 | |||
336 | if (ret) { | ||
337 | kvm_err("Cannot map VCTRL into hyp\n"); | ||
338 | iounmap(kvm_vgic_global_state.vctrl_base); | ||
339 | return ret; | ||
340 | } | ||
341 | |||
342 | kvm_vgic_global_state.can_emulate_gicv2 = true; | ||
343 | kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2); | ||
344 | |||
345 | kvm_vgic_global_state.vcpu_base = info->vcpu.start; | ||
346 | kvm_vgic_global_state.type = VGIC_V2; | ||
347 | kvm_vgic_global_state.max_gic_vcpus = VGIC_V2_MAX_CPUS; | ||
348 | |||
349 | kvm_info("vgic-v2@%llx\n", info->vctrl.start); | ||
350 | |||
351 | return 0; | ||
352 | } | ||
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c new file mode 100644 index 000000000000..336a46115937 --- /dev/null +++ b/virt/kvm/arm/vgic/vgic-v3.c | |||
@@ -0,0 +1,330 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License version 2 as | ||
4 | * published by the Free Software Foundation. | ||
5 | * | ||
6 | * This program is distributed in the hope that it will be useful, | ||
7 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
8 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
9 | * GNU General Public License for more details. | ||
10 | * | ||
11 | * You should have received a copy of the GNU General Public License | ||
12 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
13 | */ | ||
14 | |||
15 | #include <linux/irqchip/arm-gic-v3.h> | ||
16 | #include <linux/kvm.h> | ||
17 | #include <linux/kvm_host.h> | ||
18 | #include <kvm/arm_vgic.h> | ||
19 | #include <asm/kvm_mmu.h> | ||
20 | #include <asm/kvm_asm.h> | ||
21 | |||
22 | #include "vgic.h" | ||
23 | |||
24 | void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu) | ||
25 | { | ||
26 | struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; | ||
27 | u32 model = vcpu->kvm->arch.vgic.vgic_model; | ||
28 | |||
29 | if (cpuif->vgic_misr & ICH_MISR_EOI) { | ||
30 | unsigned long eisr_bmap = cpuif->vgic_eisr; | ||
31 | int lr; | ||
32 | |||
33 | for_each_set_bit(lr, &eisr_bmap, kvm_vgic_global_state.nr_lr) { | ||
34 | u32 intid; | ||
35 | u64 val = cpuif->vgic_lr[lr]; | ||
36 | |||
37 | if (model == KVM_DEV_TYPE_ARM_VGIC_V3) | ||
38 | intid = val & ICH_LR_VIRTUAL_ID_MASK; | ||
39 | else | ||
40 | intid = val & GICH_LR_VIRTUALID; | ||
41 | |||
42 | WARN_ON(cpuif->vgic_lr[lr] & ICH_LR_STATE); | ||
43 | |||
44 | kvm_notify_acked_irq(vcpu->kvm, 0, | ||
45 | intid - VGIC_NR_PRIVATE_IRQS); | ||
46 | } | ||
47 | |||
48 | /* | ||
49 | * In the next iterations of the vcpu loop, if we sync | ||
50 | * the vgic state after flushing it, but before | ||
51 | * entering the guest (this happens for pending | ||
52 | * signals and vmid rollovers), then make sure we | ||
53 | * don't pick up any old maintenance interrupts here. | ||
54 | */ | ||
55 | cpuif->vgic_eisr = 0; | ||
56 | } | ||
57 | |||
58 | cpuif->vgic_hcr &= ~ICH_HCR_UIE; | ||
59 | } | ||
60 | |||
61 | void vgic_v3_set_underflow(struct kvm_vcpu *vcpu) | ||
62 | { | ||
63 | struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; | ||
64 | |||
65 | cpuif->vgic_hcr |= ICH_HCR_UIE; | ||
66 | } | ||
67 | |||
68 | void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) | ||
69 | { | ||
70 | struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; | ||
71 | u32 model = vcpu->kvm->arch.vgic.vgic_model; | ||
72 | int lr; | ||
73 | |||
74 | for (lr = 0; lr < vcpu->arch.vgic_cpu.used_lrs; lr++) { | ||
75 | u64 val = cpuif->vgic_lr[lr]; | ||
76 | u32 intid; | ||
77 | struct vgic_irq *irq; | ||
78 | |||
79 | if (model == KVM_DEV_TYPE_ARM_VGIC_V3) | ||
80 | intid = val & ICH_LR_VIRTUAL_ID_MASK; | ||
81 | else | ||
82 | intid = val & GICH_LR_VIRTUALID; | ||
83 | irq = vgic_get_irq(vcpu->kvm, vcpu, intid); | ||
84 | |||
85 | spin_lock(&irq->irq_lock); | ||
86 | |||
87 | /* Always preserve the active bit */ | ||
88 | irq->active = !!(val & ICH_LR_ACTIVE_BIT); | ||
89 | |||
90 | /* Edge is the only case where we preserve the pending bit */ | ||
91 | if (irq->config == VGIC_CONFIG_EDGE && | ||
92 | (val & ICH_LR_PENDING_BIT)) { | ||
93 | irq->pending = true; | ||
94 | |||
95 | if (vgic_irq_is_sgi(intid) && | ||
96 | model == KVM_DEV_TYPE_ARM_VGIC_V2) { | ||
97 | u32 cpuid = val & GICH_LR_PHYSID_CPUID; | ||
98 | |||
99 | cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT; | ||
100 | irq->source |= (1 << cpuid); | ||
101 | } | ||
102 | } | ||
103 | |||
104 | /* Clear soft pending state when level irqs have been acked */ | ||
105 | if (irq->config == VGIC_CONFIG_LEVEL && | ||
106 | !(val & ICH_LR_PENDING_BIT)) { | ||
107 | irq->soft_pending = false; | ||
108 | irq->pending = irq->line_level; | ||
109 | } | ||
110 | |||
111 | spin_unlock(&irq->irq_lock); | ||
112 | } | ||
113 | } | ||
114 | |||
115 | /* Requires the irq to be locked already */ | ||
116 | void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) | ||
117 | { | ||
118 | u32 model = vcpu->kvm->arch.vgic.vgic_model; | ||
119 | u64 val = irq->intid; | ||
120 | |||
121 | if (irq->pending) { | ||
122 | val |= ICH_LR_PENDING_BIT; | ||
123 | |||
124 | if (irq->config == VGIC_CONFIG_EDGE) | ||
125 | irq->pending = false; | ||
126 | |||
127 | if (vgic_irq_is_sgi(irq->intid) && | ||
128 | model == KVM_DEV_TYPE_ARM_VGIC_V2) { | ||
129 | u32 src = ffs(irq->source); | ||
130 | |||
131 | BUG_ON(!src); | ||
132 | val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT; | ||
133 | irq->source &= ~(1 << (src - 1)); | ||
134 | if (irq->source) | ||
135 | irq->pending = true; | ||
136 | } | ||
137 | } | ||
138 | |||
139 | if (irq->active) | ||
140 | val |= ICH_LR_ACTIVE_BIT; | ||
141 | |||
142 | if (irq->hw) { | ||
143 | val |= ICH_LR_HW; | ||
144 | val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT; | ||
145 | } else { | ||
146 | if (irq->config == VGIC_CONFIG_LEVEL) | ||
147 | val |= ICH_LR_EOI; | ||
148 | } | ||
149 | |||
150 | /* | ||
151 | * We currently only support Group1 interrupts, which is a | ||
152 | * known defect. This needs to be addressed at some point. | ||
153 | */ | ||
154 | if (model == KVM_DEV_TYPE_ARM_VGIC_V3) | ||
155 | val |= ICH_LR_GROUP; | ||
156 | |||
157 | val |= (u64)irq->priority << ICH_LR_PRIORITY_SHIFT; | ||
158 | |||
159 | vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = val; | ||
160 | } | ||
161 | |||
162 | void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr) | ||
163 | { | ||
164 | vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = 0; | ||
165 | } | ||
166 | |||
167 | void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) | ||
168 | { | ||
169 | u32 vmcr; | ||
170 | |||
171 | vmcr = (vmcrp->ctlr << ICH_VMCR_CTLR_SHIFT) & ICH_VMCR_CTLR_MASK; | ||
172 | vmcr |= (vmcrp->abpr << ICH_VMCR_BPR1_SHIFT) & ICH_VMCR_BPR1_MASK; | ||
173 | vmcr |= (vmcrp->bpr << ICH_VMCR_BPR0_SHIFT) & ICH_VMCR_BPR0_MASK; | ||
174 | vmcr |= (vmcrp->pmr << ICH_VMCR_PMR_SHIFT) & ICH_VMCR_PMR_MASK; | ||
175 | |||
176 | vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = vmcr; | ||
177 | } | ||
178 | |||
179 | void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) | ||
180 | { | ||
181 | u32 vmcr = vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr; | ||
182 | |||
183 | vmcrp->ctlr = (vmcr & ICH_VMCR_CTLR_MASK) >> ICH_VMCR_CTLR_SHIFT; | ||
184 | vmcrp->abpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT; | ||
185 | vmcrp->bpr = (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT; | ||
186 | vmcrp->pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT; | ||
187 | } | ||
188 | |||
189 | void vgic_v3_enable(struct kvm_vcpu *vcpu) | ||
190 | { | ||
191 | struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3; | ||
192 | |||
193 | /* | ||
194 | * By forcing VMCR to zero, the GIC will restore the binary | ||
195 | * points to their reset values. Anything else resets to zero | ||
196 | * anyway. | ||
197 | */ | ||
198 | vgic_v3->vgic_vmcr = 0; | ||
199 | vgic_v3->vgic_elrsr = ~0; | ||
200 | |||
201 | /* | ||
202 | * If we are emulating a GICv3, we do it in an non-GICv2-compatible | ||
203 | * way, so we force SRE to 1 to demonstrate this to the guest. | ||
204 | * This goes with the spec allowing the value to be RAO/WI. | ||
205 | */ | ||
206 | if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) | ||
207 | vgic_v3->vgic_sre = ICC_SRE_EL1_SRE; | ||
208 | else | ||
209 | vgic_v3->vgic_sre = 0; | ||
210 | |||
211 | /* Get the show on the road... */ | ||
212 | vgic_v3->vgic_hcr = ICH_HCR_EN; | ||
213 | } | ||
214 | |||
215 | /* check for overlapping regions and for regions crossing the end of memory */ | ||
216 | static bool vgic_v3_check_base(struct kvm *kvm) | ||
217 | { | ||
218 | struct vgic_dist *d = &kvm->arch.vgic; | ||
219 | gpa_t redist_size = KVM_VGIC_V3_REDIST_SIZE; | ||
220 | |||
221 | redist_size *= atomic_read(&kvm->online_vcpus); | ||
222 | |||
223 | if (d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE < d->vgic_dist_base) | ||
224 | return false; | ||
225 | if (d->vgic_redist_base + redist_size < d->vgic_redist_base) | ||
226 | return false; | ||
227 | |||
228 | if (d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE <= d->vgic_redist_base) | ||
229 | return true; | ||
230 | if (d->vgic_redist_base + redist_size <= d->vgic_dist_base) | ||
231 | return true; | ||
232 | |||
233 | return false; | ||
234 | } | ||
235 | |||
236 | int vgic_v3_map_resources(struct kvm *kvm) | ||
237 | { | ||
238 | int ret = 0; | ||
239 | struct vgic_dist *dist = &kvm->arch.vgic; | ||
240 | |||
241 | if (vgic_ready(kvm)) | ||
242 | goto out; | ||
243 | |||
244 | if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) || | ||
245 | IS_VGIC_ADDR_UNDEF(dist->vgic_redist_base)) { | ||
246 | kvm_err("Need to set vgic distributor addresses first\n"); | ||
247 | ret = -ENXIO; | ||
248 | goto out; | ||
249 | } | ||
250 | |||
251 | if (!vgic_v3_check_base(kvm)) { | ||
252 | kvm_err("VGIC redist and dist frames overlap\n"); | ||
253 | ret = -EINVAL; | ||
254 | goto out; | ||
255 | } | ||
256 | |||
257 | /* | ||
258 | * For a VGICv3 we require the userland to explicitly initialize | ||
259 | * the VGIC before we need to use it. | ||
260 | */ | ||
261 | if (!vgic_initialized(kvm)) { | ||
262 | ret = -EBUSY; | ||
263 | goto out; | ||
264 | } | ||
265 | |||
266 | ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V3); | ||
267 | if (ret) { | ||
268 | kvm_err("Unable to register VGICv3 dist MMIO regions\n"); | ||
269 | goto out; | ||
270 | } | ||
271 | |||
272 | ret = vgic_register_redist_iodevs(kvm, dist->vgic_redist_base); | ||
273 | if (ret) { | ||
274 | kvm_err("Unable to register VGICv3 redist MMIO regions\n"); | ||
275 | goto out; | ||
276 | } | ||
277 | |||
278 | dist->ready = true; | ||
279 | |||
280 | out: | ||
281 | if (ret) | ||
282 | kvm_vgic_destroy(kvm); | ||
283 | return ret; | ||
284 | } | ||
285 | |||
286 | /** | ||
287 | * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT | ||
288 | * @node: pointer to the DT node | ||
289 | * | ||
290 | * Returns 0 if a GICv3 has been found, returns an error code otherwise | ||
291 | */ | ||
292 | int vgic_v3_probe(const struct gic_kvm_info *info) | ||
293 | { | ||
294 | u32 ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2); | ||
295 | |||
296 | /* | ||
297 | * The ListRegs field is 5 bits, but there is a architectural | ||
298 | * maximum of 16 list registers. Just ignore bit 4... | ||
299 | */ | ||
300 | kvm_vgic_global_state.nr_lr = (ich_vtr_el2 & 0xf) + 1; | ||
301 | kvm_vgic_global_state.can_emulate_gicv2 = false; | ||
302 | |||
303 | if (!info->vcpu.start) { | ||
304 | kvm_info("GICv3: no GICV resource entry\n"); | ||
305 | kvm_vgic_global_state.vcpu_base = 0; | ||
306 | } else if (!PAGE_ALIGNED(info->vcpu.start)) { | ||
307 | pr_warn("GICV physical address 0x%llx not page aligned\n", | ||
308 | (unsigned long long)info->vcpu.start); | ||
309 | kvm_vgic_global_state.vcpu_base = 0; | ||
310 | } else if (!PAGE_ALIGNED(resource_size(&info->vcpu))) { | ||
311 | pr_warn("GICV size 0x%llx not a multiple of page size 0x%lx\n", | ||
312 | (unsigned long long)resource_size(&info->vcpu), | ||
313 | PAGE_SIZE); | ||
314 | kvm_vgic_global_state.vcpu_base = 0; | ||
315 | } else { | ||
316 | kvm_vgic_global_state.vcpu_base = info->vcpu.start; | ||
317 | kvm_vgic_global_state.can_emulate_gicv2 = true; | ||
318 | kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2); | ||
319 | kvm_info("vgic-v2@%llx\n", info->vcpu.start); | ||
320 | } | ||
321 | if (kvm_vgic_global_state.vcpu_base == 0) | ||
322 | kvm_info("disabling GICv2 emulation\n"); | ||
323 | kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V3); | ||
324 | |||
325 | kvm_vgic_global_state.vctrl_base = NULL; | ||
326 | kvm_vgic_global_state.type = VGIC_V3; | ||
327 | kvm_vgic_global_state.max_gic_vcpus = VGIC_V3_MAX_CPUS; | ||
328 | |||
329 | return 0; | ||
330 | } | ||
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c new file mode 100644 index 000000000000..69b61abefa19 --- /dev/null +++ b/virt/kvm/arm/vgic/vgic.c | |||
@@ -0,0 +1,619 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2015, 2016 ARM Ltd. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/kvm.h> | ||
18 | #include <linux/kvm_host.h> | ||
19 | #include <linux/list_sort.h> | ||
20 | |||
21 | #include "vgic.h" | ||
22 | |||
23 | #define CREATE_TRACE_POINTS | ||
24 | #include "../trace.h" | ||
25 | |||
26 | #ifdef CONFIG_DEBUG_SPINLOCK | ||
27 | #define DEBUG_SPINLOCK_BUG_ON(p) BUG_ON(p) | ||
28 | #else | ||
29 | #define DEBUG_SPINLOCK_BUG_ON(p) | ||
30 | #endif | ||
31 | |||
32 | struct vgic_global __section(.hyp.text) kvm_vgic_global_state; | ||
33 | |||
34 | /* | ||
35 | * Locking order is always: | ||
36 | * vgic_cpu->ap_list_lock | ||
37 | * vgic_irq->irq_lock | ||
38 | * | ||
39 | * (that is, always take the ap_list_lock before the struct vgic_irq lock). | ||
40 | * | ||
41 | * When taking more than one ap_list_lock at the same time, always take the | ||
42 | * lowest numbered VCPU's ap_list_lock first, so: | ||
43 | * vcpuX->vcpu_id < vcpuY->vcpu_id: | ||
44 | * spin_lock(vcpuX->arch.vgic_cpu.ap_list_lock); | ||
45 | * spin_lock(vcpuY->arch.vgic_cpu.ap_list_lock); | ||
46 | */ | ||
47 | |||
48 | struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, | ||
49 | u32 intid) | ||
50 | { | ||
51 | /* SGIs and PPIs */ | ||
52 | if (intid <= VGIC_MAX_PRIVATE) | ||
53 | return &vcpu->arch.vgic_cpu.private_irqs[intid]; | ||
54 | |||
55 | /* SPIs */ | ||
56 | if (intid <= VGIC_MAX_SPI) | ||
57 | return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS]; | ||
58 | |||
59 | /* LPIs are not yet covered */ | ||
60 | if (intid >= VGIC_MIN_LPI) | ||
61 | return NULL; | ||
62 | |||
63 | WARN(1, "Looking up struct vgic_irq for reserved INTID"); | ||
64 | return NULL; | ||
65 | } | ||
66 | |||
67 | /** | ||
68 | * kvm_vgic_target_oracle - compute the target vcpu for an irq | ||
69 | * | ||
70 | * @irq: The irq to route. Must be already locked. | ||
71 | * | ||
72 | * Based on the current state of the interrupt (enabled, pending, | ||
73 | * active, vcpu and target_vcpu), compute the next vcpu this should be | ||
74 | * given to. Return NULL if this shouldn't be injected at all. | ||
75 | * | ||
76 | * Requires the IRQ lock to be held. | ||
77 | */ | ||
78 | static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq) | ||
79 | { | ||
80 | DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock)); | ||
81 | |||
82 | /* If the interrupt is active, it must stay on the current vcpu */ | ||
83 | if (irq->active) | ||
84 | return irq->vcpu ? : irq->target_vcpu; | ||
85 | |||
86 | /* | ||
87 | * If the IRQ is not active but enabled and pending, we should direct | ||
88 | * it to its configured target VCPU. | ||
89 | * If the distributor is disabled, pending interrupts shouldn't be | ||
90 | * forwarded. | ||
91 | */ | ||
92 | if (irq->enabled && irq->pending) { | ||
93 | if (unlikely(irq->target_vcpu && | ||
94 | !irq->target_vcpu->kvm->arch.vgic.enabled)) | ||
95 | return NULL; | ||
96 | |||
97 | return irq->target_vcpu; | ||
98 | } | ||
99 | |||
100 | /* If neither active nor pending and enabled, then this IRQ should not | ||
101 | * be queued to any VCPU. | ||
102 | */ | ||
103 | return NULL; | ||
104 | } | ||
105 | |||
106 | /* | ||
107 | * The order of items in the ap_lists defines how we'll pack things in LRs as | ||
108 | * well, the first items in the list being the first things populated in the | ||
109 | * LRs. | ||
110 | * | ||
111 | * A hard rule is that active interrupts can never be pushed out of the LRs | ||
112 | * (and therefore take priority) since we cannot reliably trap on deactivation | ||
113 | * of IRQs and therefore they have to be present in the LRs. | ||
114 | * | ||
115 | * Otherwise things should be sorted by the priority field and the GIC | ||
116 | * hardware support will take care of preemption of priority groups etc. | ||
117 | * | ||
118 | * Return negative if "a" sorts before "b", 0 to preserve order, and positive | ||
119 | * to sort "b" before "a". | ||
120 | */ | ||
121 | static int vgic_irq_cmp(void *priv, struct list_head *a, struct list_head *b) | ||
122 | { | ||
123 | struct vgic_irq *irqa = container_of(a, struct vgic_irq, ap_list); | ||
124 | struct vgic_irq *irqb = container_of(b, struct vgic_irq, ap_list); | ||
125 | bool penda, pendb; | ||
126 | int ret; | ||
127 | |||
128 | spin_lock(&irqa->irq_lock); | ||
129 | spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING); | ||
130 | |||
131 | if (irqa->active || irqb->active) { | ||
132 | ret = (int)irqb->active - (int)irqa->active; | ||
133 | goto out; | ||
134 | } | ||
135 | |||
136 | penda = irqa->enabled && irqa->pending; | ||
137 | pendb = irqb->enabled && irqb->pending; | ||
138 | |||
139 | if (!penda || !pendb) { | ||
140 | ret = (int)pendb - (int)penda; | ||
141 | goto out; | ||
142 | } | ||
143 | |||
144 | /* Both pending and enabled, sort by priority */ | ||
145 | ret = irqa->priority - irqb->priority; | ||
146 | out: | ||
147 | spin_unlock(&irqb->irq_lock); | ||
148 | spin_unlock(&irqa->irq_lock); | ||
149 | return ret; | ||
150 | } | ||
151 | |||
152 | /* Must be called with the ap_list_lock held */ | ||
153 | static void vgic_sort_ap_list(struct kvm_vcpu *vcpu) | ||
154 | { | ||
155 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; | ||
156 | |||
157 | DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock)); | ||
158 | |||
159 | list_sort(NULL, &vgic_cpu->ap_list_head, vgic_irq_cmp); | ||
160 | } | ||
161 | |||
162 | /* | ||
163 | * Only valid injection if changing level for level-triggered IRQs or for a | ||
164 | * rising edge. | ||
165 | */ | ||
166 | static bool vgic_validate_injection(struct vgic_irq *irq, bool level) | ||
167 | { | ||
168 | switch (irq->config) { | ||
169 | case VGIC_CONFIG_LEVEL: | ||
170 | return irq->line_level != level; | ||
171 | case VGIC_CONFIG_EDGE: | ||
172 | return level; | ||
173 | } | ||
174 | |||
175 | return false; | ||
176 | } | ||
177 | |||
178 | /* | ||
179 | * Check whether an IRQ needs to (and can) be queued to a VCPU's ap list. | ||
180 | * Do the queuing if necessary, taking the right locks in the right order. | ||
181 | * Returns true when the IRQ was queued, false otherwise. | ||
182 | * | ||
183 | * Needs to be entered with the IRQ lock already held, but will return | ||
184 | * with all locks dropped. | ||
185 | */ | ||
186 | bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq) | ||
187 | { | ||
188 | struct kvm_vcpu *vcpu; | ||
189 | |||
190 | DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock)); | ||
191 | |||
192 | retry: | ||
193 | vcpu = vgic_target_oracle(irq); | ||
194 | if (irq->vcpu || !vcpu) { | ||
195 | /* | ||
196 | * If this IRQ is already on a VCPU's ap_list, then it | ||
197 | * cannot be moved or modified and there is no more work for | ||
198 | * us to do. | ||
199 | * | ||
200 | * Otherwise, if the irq is not pending and enabled, it does | ||
201 | * not need to be inserted into an ap_list and there is also | ||
202 | * no more work for us to do. | ||
203 | */ | ||
204 | spin_unlock(&irq->irq_lock); | ||
205 | return false; | ||
206 | } | ||
207 | |||
208 | /* | ||
209 | * We must unlock the irq lock to take the ap_list_lock where | ||
210 | * we are going to insert this new pending interrupt. | ||
211 | */ | ||
212 | spin_unlock(&irq->irq_lock); | ||
213 | |||
214 | /* someone can do stuff here, which we re-check below */ | ||
215 | |||
216 | spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock); | ||
217 | spin_lock(&irq->irq_lock); | ||
218 | |||
219 | /* | ||
220 | * Did something change behind our backs? | ||
221 | * | ||
222 | * There are two cases: | ||
223 | * 1) The irq lost its pending state or was disabled behind our | ||
224 | * backs and/or it was queued to another VCPU's ap_list. | ||
225 | * 2) Someone changed the affinity on this irq behind our | ||
226 | * backs and we are now holding the wrong ap_list_lock. | ||
227 | * | ||
228 | * In both cases, drop the locks and retry. | ||
229 | */ | ||
230 | |||
231 | if (unlikely(irq->vcpu || vcpu != vgic_target_oracle(irq))) { | ||
232 | spin_unlock(&irq->irq_lock); | ||
233 | spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); | ||
234 | |||
235 | spin_lock(&irq->irq_lock); | ||
236 | goto retry; | ||
237 | } | ||
238 | |||
239 | list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head); | ||
240 | irq->vcpu = vcpu; | ||
241 | |||
242 | spin_unlock(&irq->irq_lock); | ||
243 | spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); | ||
244 | |||
245 | kvm_vcpu_kick(vcpu); | ||
246 | |||
247 | return true; | ||
248 | } | ||
249 | |||
250 | static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, | ||
251 | unsigned int intid, bool level, | ||
252 | bool mapped_irq) | ||
253 | { | ||
254 | struct kvm_vcpu *vcpu; | ||
255 | struct vgic_irq *irq; | ||
256 | int ret; | ||
257 | |||
258 | trace_vgic_update_irq_pending(cpuid, intid, level); | ||
259 | |||
260 | ret = vgic_lazy_init(kvm); | ||
261 | if (ret) | ||
262 | return ret; | ||
263 | |||
264 | vcpu = kvm_get_vcpu(kvm, cpuid); | ||
265 | if (!vcpu && intid < VGIC_NR_PRIVATE_IRQS) | ||
266 | return -EINVAL; | ||
267 | |||
268 | irq = vgic_get_irq(kvm, vcpu, intid); | ||
269 | if (!irq) | ||
270 | return -EINVAL; | ||
271 | |||
272 | if (irq->hw != mapped_irq) | ||
273 | return -EINVAL; | ||
274 | |||
275 | spin_lock(&irq->irq_lock); | ||
276 | |||
277 | if (!vgic_validate_injection(irq, level)) { | ||
278 | /* Nothing to see here, move along... */ | ||
279 | spin_unlock(&irq->irq_lock); | ||
280 | return 0; | ||
281 | } | ||
282 | |||
283 | if (irq->config == VGIC_CONFIG_LEVEL) { | ||
284 | irq->line_level = level; | ||
285 | irq->pending = level || irq->soft_pending; | ||
286 | } else { | ||
287 | irq->pending = true; | ||
288 | } | ||
289 | |||
290 | vgic_queue_irq_unlock(kvm, irq); | ||
291 | |||
292 | return 0; | ||
293 | } | ||
294 | |||
295 | /** | ||
296 | * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic | ||
297 | * @kvm: The VM structure pointer | ||
298 | * @cpuid: The CPU for PPIs | ||
299 | * @intid: The INTID to inject a new state to. | ||
300 | * @level: Edge-triggered: true: to trigger the interrupt | ||
301 | * false: to ignore the call | ||
302 | * Level-sensitive true: raise the input signal | ||
303 | * false: lower the input signal | ||
304 | * | ||
305 | * The VGIC is not concerned with devices being active-LOW or active-HIGH for | ||
306 | * level-sensitive interrupts. You can think of the level parameter as 1 | ||
307 | * being HIGH and 0 being LOW and all devices being active-HIGH. | ||
308 | */ | ||
309 | int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, | ||
310 | bool level) | ||
311 | { | ||
312 | return vgic_update_irq_pending(kvm, cpuid, intid, level, false); | ||
313 | } | ||
314 | |||
315 | int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, unsigned int intid, | ||
316 | bool level) | ||
317 | { | ||
318 | return vgic_update_irq_pending(kvm, cpuid, intid, level, true); | ||
319 | } | ||
320 | |||
321 | int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq) | ||
322 | { | ||
323 | struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq); | ||
324 | |||
325 | BUG_ON(!irq); | ||
326 | |||
327 | spin_lock(&irq->irq_lock); | ||
328 | |||
329 | irq->hw = true; | ||
330 | irq->hwintid = phys_irq; | ||
331 | |||
332 | spin_unlock(&irq->irq_lock); | ||
333 | |||
334 | return 0; | ||
335 | } | ||
336 | |||
337 | int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq) | ||
338 | { | ||
339 | struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq); | ||
340 | |||
341 | BUG_ON(!irq); | ||
342 | |||
343 | if (!vgic_initialized(vcpu->kvm)) | ||
344 | return -EAGAIN; | ||
345 | |||
346 | spin_lock(&irq->irq_lock); | ||
347 | |||
348 | irq->hw = false; | ||
349 | irq->hwintid = 0; | ||
350 | |||
351 | spin_unlock(&irq->irq_lock); | ||
352 | |||
353 | return 0; | ||
354 | } | ||
355 | |||
356 | /** | ||
357 | * vgic_prune_ap_list - Remove non-relevant interrupts from the list | ||
358 | * | ||
359 | * @vcpu: The VCPU pointer | ||
360 | * | ||
361 | * Go over the list of "interesting" interrupts, and prune those that we | ||
362 | * won't have to consider in the near future. | ||
363 | */ | ||
364 | static void vgic_prune_ap_list(struct kvm_vcpu *vcpu) | ||
365 | { | ||
366 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; | ||
367 | struct vgic_irq *irq, *tmp; | ||
368 | |||
369 | retry: | ||
370 | spin_lock(&vgic_cpu->ap_list_lock); | ||
371 | |||
372 | list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) { | ||
373 | struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB; | ||
374 | |||
375 | spin_lock(&irq->irq_lock); | ||
376 | |||
377 | BUG_ON(vcpu != irq->vcpu); | ||
378 | |||
379 | target_vcpu = vgic_target_oracle(irq); | ||
380 | |||
381 | if (!target_vcpu) { | ||
382 | /* | ||
383 | * We don't need to process this interrupt any | ||
384 | * further, move it off the list. | ||
385 | */ | ||
386 | list_del(&irq->ap_list); | ||
387 | irq->vcpu = NULL; | ||
388 | spin_unlock(&irq->irq_lock); | ||
389 | continue; | ||
390 | } | ||
391 | |||
392 | if (target_vcpu == vcpu) { | ||
393 | /* We're on the right CPU */ | ||
394 | spin_unlock(&irq->irq_lock); | ||
395 | continue; | ||
396 | } | ||
397 | |||
398 | /* This interrupt looks like it has to be migrated. */ | ||
399 | |||
400 | spin_unlock(&irq->irq_lock); | ||
401 | spin_unlock(&vgic_cpu->ap_list_lock); | ||
402 | |||
403 | /* | ||
404 | * Ensure locking order by always locking the smallest | ||
405 | * ID first. | ||
406 | */ | ||
407 | if (vcpu->vcpu_id < target_vcpu->vcpu_id) { | ||
408 | vcpuA = vcpu; | ||
409 | vcpuB = target_vcpu; | ||
410 | } else { | ||
411 | vcpuA = target_vcpu; | ||
412 | vcpuB = vcpu; | ||
413 | } | ||
414 | |||
415 | spin_lock(&vcpuA->arch.vgic_cpu.ap_list_lock); | ||
416 | spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock, | ||
417 | SINGLE_DEPTH_NESTING); | ||
418 | spin_lock(&irq->irq_lock); | ||
419 | |||
420 | /* | ||
421 | * If the affinity has been preserved, move the | ||
422 | * interrupt around. Otherwise, it means things have | ||
423 | * changed while the interrupt was unlocked, and we | ||
424 | * need to replay this. | ||
425 | * | ||
426 | * In all cases, we cannot trust the list not to have | ||
427 | * changed, so we restart from the beginning. | ||
428 | */ | ||
429 | if (target_vcpu == vgic_target_oracle(irq)) { | ||
430 | struct vgic_cpu *new_cpu = &target_vcpu->arch.vgic_cpu; | ||
431 | |||
432 | list_del(&irq->ap_list); | ||
433 | irq->vcpu = target_vcpu; | ||
434 | list_add_tail(&irq->ap_list, &new_cpu->ap_list_head); | ||
435 | } | ||
436 | |||
437 | spin_unlock(&irq->irq_lock); | ||
438 | spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock); | ||
439 | spin_unlock(&vcpuA->arch.vgic_cpu.ap_list_lock); | ||
440 | goto retry; | ||
441 | } | ||
442 | |||
443 | spin_unlock(&vgic_cpu->ap_list_lock); | ||
444 | } | ||
445 | |||
446 | static inline void vgic_process_maintenance_interrupt(struct kvm_vcpu *vcpu) | ||
447 | { | ||
448 | if (kvm_vgic_global_state.type == VGIC_V2) | ||
449 | vgic_v2_process_maintenance(vcpu); | ||
450 | else | ||
451 | vgic_v3_process_maintenance(vcpu); | ||
452 | } | ||
453 | |||
454 | static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu) | ||
455 | { | ||
456 | if (kvm_vgic_global_state.type == VGIC_V2) | ||
457 | vgic_v2_fold_lr_state(vcpu); | ||
458 | else | ||
459 | vgic_v3_fold_lr_state(vcpu); | ||
460 | } | ||
461 | |||
462 | /* Requires the irq_lock to be held. */ | ||
463 | static inline void vgic_populate_lr(struct kvm_vcpu *vcpu, | ||
464 | struct vgic_irq *irq, int lr) | ||
465 | { | ||
466 | DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock)); | ||
467 | |||
468 | if (kvm_vgic_global_state.type == VGIC_V2) | ||
469 | vgic_v2_populate_lr(vcpu, irq, lr); | ||
470 | else | ||
471 | vgic_v3_populate_lr(vcpu, irq, lr); | ||
472 | } | ||
473 | |||
474 | static inline void vgic_clear_lr(struct kvm_vcpu *vcpu, int lr) | ||
475 | { | ||
476 | if (kvm_vgic_global_state.type == VGIC_V2) | ||
477 | vgic_v2_clear_lr(vcpu, lr); | ||
478 | else | ||
479 | vgic_v3_clear_lr(vcpu, lr); | ||
480 | } | ||
481 | |||
482 | static inline void vgic_set_underflow(struct kvm_vcpu *vcpu) | ||
483 | { | ||
484 | if (kvm_vgic_global_state.type == VGIC_V2) | ||
485 | vgic_v2_set_underflow(vcpu); | ||
486 | else | ||
487 | vgic_v3_set_underflow(vcpu); | ||
488 | } | ||
489 | |||
490 | /* Requires the ap_list_lock to be held. */ | ||
491 | static int compute_ap_list_depth(struct kvm_vcpu *vcpu) | ||
492 | { | ||
493 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; | ||
494 | struct vgic_irq *irq; | ||
495 | int count = 0; | ||
496 | |||
497 | DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock)); | ||
498 | |||
499 | list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { | ||
500 | spin_lock(&irq->irq_lock); | ||
501 | /* GICv2 SGIs can count for more than one... */ | ||
502 | if (vgic_irq_is_sgi(irq->intid) && irq->source) | ||
503 | count += hweight8(irq->source); | ||
504 | else | ||
505 | count++; | ||
506 | spin_unlock(&irq->irq_lock); | ||
507 | } | ||
508 | return count; | ||
509 | } | ||
510 | |||
511 | /* Requires the VCPU's ap_list_lock to be held. */ | ||
512 | static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) | ||
513 | { | ||
514 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; | ||
515 | struct vgic_irq *irq; | ||
516 | int count = 0; | ||
517 | |||
518 | DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock)); | ||
519 | |||
520 | if (compute_ap_list_depth(vcpu) > kvm_vgic_global_state.nr_lr) { | ||
521 | vgic_set_underflow(vcpu); | ||
522 | vgic_sort_ap_list(vcpu); | ||
523 | } | ||
524 | |||
525 | list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { | ||
526 | spin_lock(&irq->irq_lock); | ||
527 | |||
528 | if (unlikely(vgic_target_oracle(irq) != vcpu)) | ||
529 | goto next; | ||
530 | |||
531 | /* | ||
532 | * If we get an SGI with multiple sources, try to get | ||
533 | * them in all at once. | ||
534 | */ | ||
535 | do { | ||
536 | vgic_populate_lr(vcpu, irq, count++); | ||
537 | } while (irq->source && count < kvm_vgic_global_state.nr_lr); | ||
538 | |||
539 | next: | ||
540 | spin_unlock(&irq->irq_lock); | ||
541 | |||
542 | if (count == kvm_vgic_global_state.nr_lr) | ||
543 | break; | ||
544 | } | ||
545 | |||
546 | vcpu->arch.vgic_cpu.used_lrs = count; | ||
547 | |||
548 | /* Nuke remaining LRs */ | ||
549 | for ( ; count < kvm_vgic_global_state.nr_lr; count++) | ||
550 | vgic_clear_lr(vcpu, count); | ||
551 | } | ||
552 | |||
553 | /* Sync back the hardware VGIC state into our emulation after a guest's run. */ | ||
554 | void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) | ||
555 | { | ||
556 | vgic_process_maintenance_interrupt(vcpu); | ||
557 | vgic_fold_lr_state(vcpu); | ||
558 | vgic_prune_ap_list(vcpu); | ||
559 | } | ||
560 | |||
561 | /* Flush our emulation state into the GIC hardware before entering the guest. */ | ||
562 | void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) | ||
563 | { | ||
564 | spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock); | ||
565 | vgic_flush_lr_state(vcpu); | ||
566 | spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); | ||
567 | } | ||
568 | |||
569 | int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) | ||
570 | { | ||
571 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; | ||
572 | struct vgic_irq *irq; | ||
573 | bool pending = false; | ||
574 | |||
575 | if (!vcpu->kvm->arch.vgic.enabled) | ||
576 | return false; | ||
577 | |||
578 | spin_lock(&vgic_cpu->ap_list_lock); | ||
579 | |||
580 | list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { | ||
581 | spin_lock(&irq->irq_lock); | ||
582 | pending = irq->pending && irq->enabled; | ||
583 | spin_unlock(&irq->irq_lock); | ||
584 | |||
585 | if (pending) | ||
586 | break; | ||
587 | } | ||
588 | |||
589 | spin_unlock(&vgic_cpu->ap_list_lock); | ||
590 | |||
591 | return pending; | ||
592 | } | ||
593 | |||
594 | void vgic_kick_vcpus(struct kvm *kvm) | ||
595 | { | ||
596 | struct kvm_vcpu *vcpu; | ||
597 | int c; | ||
598 | |||
599 | /* | ||
600 | * We've injected an interrupt, time to find out who deserves | ||
601 | * a good kick... | ||
602 | */ | ||
603 | kvm_for_each_vcpu(c, vcpu, kvm) { | ||
604 | if (kvm_vgic_vcpu_pending_irq(vcpu)) | ||
605 | kvm_vcpu_kick(vcpu); | ||
606 | } | ||
607 | } | ||
608 | |||
609 | bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq) | ||
610 | { | ||
611 | struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq); | ||
612 | bool map_is_active; | ||
613 | |||
614 | spin_lock(&irq->irq_lock); | ||
615 | map_is_active = irq->hw && irq->active; | ||
616 | spin_unlock(&irq->irq_lock); | ||
617 | |||
618 | return map_is_active; | ||
619 | } | ||
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h new file mode 100644 index 000000000000..7b300ca370b7 --- /dev/null +++ b/virt/kvm/arm/vgic/vgic.h | |||
@@ -0,0 +1,131 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2015, 2016 ARM Ltd. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | #ifndef __KVM_ARM_VGIC_NEW_H__ | ||
17 | #define __KVM_ARM_VGIC_NEW_H__ | ||
18 | |||
19 | #include <linux/irqchip/arm-gic-common.h> | ||
20 | |||
21 | #define PRODUCT_ID_KVM 0x4b /* ASCII code K */ | ||
22 | #define IMPLEMENTER_ARM 0x43b | ||
23 | |||
24 | #define VGIC_ADDR_UNDEF (-1) | ||
25 | #define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF) | ||
26 | |||
27 | #define INTERRUPT_ID_BITS_SPIS 10 | ||
28 | #define VGIC_PRI_BITS 5 | ||
29 | |||
30 | #define vgic_irq_is_sgi(intid) ((intid) < VGIC_NR_SGIS) | ||
31 | |||
32 | struct vgic_vmcr { | ||
33 | u32 ctlr; | ||
34 | u32 abpr; | ||
35 | u32 bpr; | ||
36 | u32 pmr; | ||
37 | }; | ||
38 | |||
39 | struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, | ||
40 | u32 intid); | ||
41 | bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq); | ||
42 | void vgic_kick_vcpus(struct kvm *kvm); | ||
43 | |||
44 | void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu); | ||
45 | void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu); | ||
46 | void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); | ||
47 | void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr); | ||
48 | void vgic_v2_set_underflow(struct kvm_vcpu *vcpu); | ||
49 | int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr); | ||
50 | int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, | ||
51 | int offset, u32 *val); | ||
52 | int vgic_v2_cpuif_uaccess(struct kvm_vcpu *vcpu, bool is_write, | ||
53 | int offset, u32 *val); | ||
54 | void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); | ||
55 | void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); | ||
56 | void vgic_v2_enable(struct kvm_vcpu *vcpu); | ||
57 | int vgic_v2_probe(const struct gic_kvm_info *info); | ||
58 | int vgic_v2_map_resources(struct kvm *kvm); | ||
59 | int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, | ||
60 | enum vgic_type); | ||
61 | |||
62 | #ifdef CONFIG_KVM_ARM_VGIC_V3 | ||
63 | void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu); | ||
64 | void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu); | ||
65 | void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); | ||
66 | void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr); | ||
67 | void vgic_v3_set_underflow(struct kvm_vcpu *vcpu); | ||
68 | void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); | ||
69 | void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); | ||
70 | void vgic_v3_enable(struct kvm_vcpu *vcpu); | ||
71 | int vgic_v3_probe(const struct gic_kvm_info *info); | ||
72 | int vgic_v3_map_resources(struct kvm *kvm); | ||
73 | int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t dist_base_address); | ||
74 | #else | ||
75 | static inline void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu) | ||
76 | { | ||
77 | } | ||
78 | |||
79 | static inline void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) | ||
80 | { | ||
81 | } | ||
82 | |||
83 | static inline void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, | ||
84 | struct vgic_irq *irq, int lr) | ||
85 | { | ||
86 | } | ||
87 | |||
88 | static inline void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr) | ||
89 | { | ||
90 | } | ||
91 | |||
92 | static inline void vgic_v3_set_underflow(struct kvm_vcpu *vcpu) | ||
93 | { | ||
94 | } | ||
95 | |||
96 | static inline | ||
97 | void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) | ||
98 | { | ||
99 | } | ||
100 | |||
101 | static inline | ||
102 | void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) | ||
103 | { | ||
104 | } | ||
105 | |||
106 | static inline void vgic_v3_enable(struct kvm_vcpu *vcpu) | ||
107 | { | ||
108 | } | ||
109 | |||
110 | static inline int vgic_v3_probe(const struct gic_kvm_info *info) | ||
111 | { | ||
112 | return -ENODEV; | ||
113 | } | ||
114 | |||
115 | static inline int vgic_v3_map_resources(struct kvm *kvm) | ||
116 | { | ||
117 | return -ENODEV; | ||
118 | } | ||
119 | |||
120 | static inline int vgic_register_redist_iodevs(struct kvm *kvm, | ||
121 | gpa_t dist_base_address) | ||
122 | { | ||
123 | return -ENODEV; | ||
124 | } | ||
125 | #endif | ||
126 | |||
127 | void kvm_register_vgic_device(unsigned long type); | ||
128 | int vgic_lazy_init(struct kvm *kvm); | ||
129 | int vgic_init(struct kvm *kvm); | ||
130 | |||
131 | #endif | ||
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index dd4ac9d9e8f5..37af23052470 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -63,6 +63,9 @@ | |||
63 | #define CREATE_TRACE_POINTS | 63 | #define CREATE_TRACE_POINTS |
64 | #include <trace/events/kvm.h> | 64 | #include <trace/events/kvm.h> |
65 | 65 | ||
66 | /* Worst case buffer size needed for holding an integer. */ | ||
67 | #define ITOA_MAX_LEN 12 | ||
68 | |||
66 | MODULE_AUTHOR("Qumranet"); | 69 | MODULE_AUTHOR("Qumranet"); |
67 | MODULE_LICENSE("GPL"); | 70 | MODULE_LICENSE("GPL"); |
68 | 71 | ||
@@ -100,6 +103,9 @@ static __read_mostly struct preempt_ops kvm_preempt_ops; | |||
100 | struct dentry *kvm_debugfs_dir; | 103 | struct dentry *kvm_debugfs_dir; |
101 | EXPORT_SYMBOL_GPL(kvm_debugfs_dir); | 104 | EXPORT_SYMBOL_GPL(kvm_debugfs_dir); |
102 | 105 | ||
106 | static int kvm_debugfs_num_entries; | ||
107 | static const struct file_operations *stat_fops_per_vm[]; | ||
108 | |||
103 | static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, | 109 | static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, |
104 | unsigned long arg); | 110 | unsigned long arg); |
105 | #ifdef CONFIG_KVM_COMPAT | 111 | #ifdef CONFIG_KVM_COMPAT |
@@ -542,6 +548,58 @@ static void kvm_free_memslots(struct kvm *kvm, struct kvm_memslots *slots) | |||
542 | kvfree(slots); | 548 | kvfree(slots); |
543 | } | 549 | } |
544 | 550 | ||
551 | static void kvm_destroy_vm_debugfs(struct kvm *kvm) | ||
552 | { | ||
553 | int i; | ||
554 | |||
555 | if (!kvm->debugfs_dentry) | ||
556 | return; | ||
557 | |||
558 | debugfs_remove_recursive(kvm->debugfs_dentry); | ||
559 | |||
560 | for (i = 0; i < kvm_debugfs_num_entries; i++) | ||
561 | kfree(kvm->debugfs_stat_data[i]); | ||
562 | kfree(kvm->debugfs_stat_data); | ||
563 | } | ||
564 | |||
565 | static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) | ||
566 | { | ||
567 | char dir_name[ITOA_MAX_LEN * 2]; | ||
568 | struct kvm_stat_data *stat_data; | ||
569 | struct kvm_stats_debugfs_item *p; | ||
570 | |||
571 | if (!debugfs_initialized()) | ||
572 | return 0; | ||
573 | |||
574 | snprintf(dir_name, sizeof(dir_name), "%d-%d", task_pid_nr(current), fd); | ||
575 | kvm->debugfs_dentry = debugfs_create_dir(dir_name, | ||
576 | kvm_debugfs_dir); | ||
577 | if (!kvm->debugfs_dentry) | ||
578 | return -ENOMEM; | ||
579 | |||
580 | kvm->debugfs_stat_data = kcalloc(kvm_debugfs_num_entries, | ||
581 | sizeof(*kvm->debugfs_stat_data), | ||
582 | GFP_KERNEL); | ||
583 | if (!kvm->debugfs_stat_data) | ||
584 | return -ENOMEM; | ||
585 | |||
586 | for (p = debugfs_entries; p->name; p++) { | ||
587 | stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL); | ||
588 | if (!stat_data) | ||
589 | return -ENOMEM; | ||
590 | |||
591 | stat_data->kvm = kvm; | ||
592 | stat_data->offset = p->offset; | ||
593 | kvm->debugfs_stat_data[p - debugfs_entries] = stat_data; | ||
594 | if (!debugfs_create_file(p->name, 0444, | ||
595 | kvm->debugfs_dentry, | ||
596 | stat_data, | ||
597 | stat_fops_per_vm[p->kind])) | ||
598 | return -ENOMEM; | ||
599 | } | ||
600 | return 0; | ||
601 | } | ||
602 | |||
545 | static struct kvm *kvm_create_vm(unsigned long type) | 603 | static struct kvm *kvm_create_vm(unsigned long type) |
546 | { | 604 | { |
547 | int r, i; | 605 | int r, i; |
@@ -647,6 +705,7 @@ static void kvm_destroy_vm(struct kvm *kvm) | |||
647 | int i; | 705 | int i; |
648 | struct mm_struct *mm = kvm->mm; | 706 | struct mm_struct *mm = kvm->mm; |
649 | 707 | ||
708 | kvm_destroy_vm_debugfs(kvm); | ||
650 | kvm_arch_sync_events(kvm); | 709 | kvm_arch_sync_events(kvm); |
651 | spin_lock(&kvm_lock); | 710 | spin_lock(&kvm_lock); |
652 | list_del(&kvm->vm_list); | 711 | list_del(&kvm->vm_list); |
@@ -2999,8 +3058,15 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) | |||
2999 | } | 3058 | } |
3000 | #endif | 3059 | #endif |
3001 | r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR | O_CLOEXEC); | 3060 | r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR | O_CLOEXEC); |
3002 | if (r < 0) | 3061 | if (r < 0) { |
3003 | kvm_put_kvm(kvm); | 3062 | kvm_put_kvm(kvm); |
3063 | return r; | ||
3064 | } | ||
3065 | |||
3066 | if (kvm_create_vm_debugfs(kvm, r) < 0) { | ||
3067 | kvm_put_kvm(kvm); | ||
3068 | return -ENOMEM; | ||
3069 | } | ||
3004 | 3070 | ||
3005 | return r; | 3071 | return r; |
3006 | } | 3072 | } |
@@ -3425,15 +3491,114 @@ static struct notifier_block kvm_cpu_notifier = { | |||
3425 | .notifier_call = kvm_cpu_hotplug, | 3491 | .notifier_call = kvm_cpu_hotplug, |
3426 | }; | 3492 | }; |
3427 | 3493 | ||
3494 | static int kvm_debugfs_open(struct inode *inode, struct file *file, | ||
3495 | int (*get)(void *, u64 *), int (*set)(void *, u64), | ||
3496 | const char *fmt) | ||
3497 | { | ||
3498 | struct kvm_stat_data *stat_data = (struct kvm_stat_data *) | ||
3499 | inode->i_private; | ||
3500 | |||
3501 | /* The debugfs files are a reference to the kvm struct which | ||
3502 | * is still valid when kvm_destroy_vm is called. | ||
3503 | * To avoid the race between open and the removal of the debugfs | ||
3504 | * directory we test against the users count. | ||
3505 | */ | ||
3506 | if (!atomic_add_unless(&stat_data->kvm->users_count, 1, 0)) | ||
3507 | return -ENOENT; | ||
3508 | |||
3509 | if (simple_attr_open(inode, file, get, set, fmt)) { | ||
3510 | kvm_put_kvm(stat_data->kvm); | ||
3511 | return -ENOMEM; | ||
3512 | } | ||
3513 | |||
3514 | return 0; | ||
3515 | } | ||
3516 | |||
3517 | static int kvm_debugfs_release(struct inode *inode, struct file *file) | ||
3518 | { | ||
3519 | struct kvm_stat_data *stat_data = (struct kvm_stat_data *) | ||
3520 | inode->i_private; | ||
3521 | |||
3522 | simple_attr_release(inode, file); | ||
3523 | kvm_put_kvm(stat_data->kvm); | ||
3524 | |||
3525 | return 0; | ||
3526 | } | ||
3527 | |||
3528 | static int vm_stat_get_per_vm(void *data, u64 *val) | ||
3529 | { | ||
3530 | struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data; | ||
3531 | |||
3532 | *val = *(u32 *)((void *)stat_data->kvm + stat_data->offset); | ||
3533 | |||
3534 | return 0; | ||
3535 | } | ||
3536 | |||
3537 | static int vm_stat_get_per_vm_open(struct inode *inode, struct file *file) | ||
3538 | { | ||
3539 | __simple_attr_check_format("%llu\n", 0ull); | ||
3540 | return kvm_debugfs_open(inode, file, vm_stat_get_per_vm, | ||
3541 | NULL, "%llu\n"); | ||
3542 | } | ||
3543 | |||
3544 | static const struct file_operations vm_stat_get_per_vm_fops = { | ||
3545 | .owner = THIS_MODULE, | ||
3546 | .open = vm_stat_get_per_vm_open, | ||
3547 | .release = kvm_debugfs_release, | ||
3548 | .read = simple_attr_read, | ||
3549 | .write = simple_attr_write, | ||
3550 | .llseek = generic_file_llseek, | ||
3551 | }; | ||
3552 | |||
3553 | static int vcpu_stat_get_per_vm(void *data, u64 *val) | ||
3554 | { | ||
3555 | int i; | ||
3556 | struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data; | ||
3557 | struct kvm_vcpu *vcpu; | ||
3558 | |||
3559 | *val = 0; | ||
3560 | |||
3561 | kvm_for_each_vcpu(i, vcpu, stat_data->kvm) | ||
3562 | *val += *(u32 *)((void *)vcpu + stat_data->offset); | ||
3563 | |||
3564 | return 0; | ||
3565 | } | ||
3566 | |||
3567 | static int vcpu_stat_get_per_vm_open(struct inode *inode, struct file *file) | ||
3568 | { | ||
3569 | __simple_attr_check_format("%llu\n", 0ull); | ||
3570 | return kvm_debugfs_open(inode, file, vcpu_stat_get_per_vm, | ||
3571 | NULL, "%llu\n"); | ||
3572 | } | ||
3573 | |||
3574 | static const struct file_operations vcpu_stat_get_per_vm_fops = { | ||
3575 | .owner = THIS_MODULE, | ||
3576 | .open = vcpu_stat_get_per_vm_open, | ||
3577 | .release = kvm_debugfs_release, | ||
3578 | .read = simple_attr_read, | ||
3579 | .write = simple_attr_write, | ||
3580 | .llseek = generic_file_llseek, | ||
3581 | }; | ||
3582 | |||
3583 | static const struct file_operations *stat_fops_per_vm[] = { | ||
3584 | [KVM_STAT_VCPU] = &vcpu_stat_get_per_vm_fops, | ||
3585 | [KVM_STAT_VM] = &vm_stat_get_per_vm_fops, | ||
3586 | }; | ||
3587 | |||
3428 | static int vm_stat_get(void *_offset, u64 *val) | 3588 | static int vm_stat_get(void *_offset, u64 *val) |
3429 | { | 3589 | { |
3430 | unsigned offset = (long)_offset; | 3590 | unsigned offset = (long)_offset; |
3431 | struct kvm *kvm; | 3591 | struct kvm *kvm; |
3592 | struct kvm_stat_data stat_tmp = {.offset = offset}; | ||
3593 | u64 tmp_val; | ||
3432 | 3594 | ||
3433 | *val = 0; | 3595 | *val = 0; |
3434 | spin_lock(&kvm_lock); | 3596 | spin_lock(&kvm_lock); |
3435 | list_for_each_entry(kvm, &vm_list, vm_list) | 3597 | list_for_each_entry(kvm, &vm_list, vm_list) { |
3436 | *val += *(u32 *)((void *)kvm + offset); | 3598 | stat_tmp.kvm = kvm; |
3599 | vm_stat_get_per_vm((void *)&stat_tmp, &tmp_val); | ||
3600 | *val += tmp_val; | ||
3601 | } | ||
3437 | spin_unlock(&kvm_lock); | 3602 | spin_unlock(&kvm_lock); |
3438 | return 0; | 3603 | return 0; |
3439 | } | 3604 | } |
@@ -3444,15 +3609,16 @@ static int vcpu_stat_get(void *_offset, u64 *val) | |||
3444 | { | 3609 | { |
3445 | unsigned offset = (long)_offset; | 3610 | unsigned offset = (long)_offset; |
3446 | struct kvm *kvm; | 3611 | struct kvm *kvm; |
3447 | struct kvm_vcpu *vcpu; | 3612 | struct kvm_stat_data stat_tmp = {.offset = offset}; |
3448 | int i; | 3613 | u64 tmp_val; |
3449 | 3614 | ||
3450 | *val = 0; | 3615 | *val = 0; |
3451 | spin_lock(&kvm_lock); | 3616 | spin_lock(&kvm_lock); |
3452 | list_for_each_entry(kvm, &vm_list, vm_list) | 3617 | list_for_each_entry(kvm, &vm_list, vm_list) { |
3453 | kvm_for_each_vcpu(i, vcpu, kvm) | 3618 | stat_tmp.kvm = kvm; |
3454 | *val += *(u32 *)((void *)vcpu + offset); | 3619 | vcpu_stat_get_per_vm((void *)&stat_tmp, &tmp_val); |
3455 | 3620 | *val += tmp_val; | |
3621 | } | ||
3456 | spin_unlock(&kvm_lock); | 3622 | spin_unlock(&kvm_lock); |
3457 | return 0; | 3623 | return 0; |
3458 | } | 3624 | } |
@@ -3473,7 +3639,8 @@ static int kvm_init_debug(void) | |||
3473 | if (kvm_debugfs_dir == NULL) | 3639 | if (kvm_debugfs_dir == NULL) |
3474 | goto out; | 3640 | goto out; |
3475 | 3641 | ||
3476 | for (p = debugfs_entries; p->name; ++p) { | 3642 | kvm_debugfs_num_entries = 0; |
3643 | for (p = debugfs_entries; p->name; ++p, kvm_debugfs_num_entries++) { | ||
3477 | if (!debugfs_create_file(p->name, 0444, kvm_debugfs_dir, | 3644 | if (!debugfs_create_file(p->name, 0444, kvm_debugfs_dir, |
3478 | (void *)(long)p->offset, | 3645 | (void *)(long)p->offset, |
3479 | stat_fops[p->kind])) | 3646 | stat_fops[p->kind])) |