diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-02-26 12:28:35 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-02-26 12:28:35 -0500 |
| commit | d4858aaf6bd8a90e2dacc0dfec2077e334dcedbf (patch) | |
| tree | ccb1934d3943fd7ed443f533409d02f4e2a5ad05 | |
| parent | 4a3928c6f8a53fa1aed28ccba227742486e8ddcb (diff) | |
| parent | 9c5e0afaf15788bcbd1c3469da701ac3da826886 (diff) | |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM fixes from Paolo Bonzini:
"s390:
- optimization for the exitless interrupt support that was merged in 4.16-rc1
- improve the branch prediction blocking for nested KVM
- replace some jump tables with switch statements to improve expoline performance
- fixes for multiple epoch facility
ARM:
- fix the interaction of userspace irqchip VMs with in-kernel irqchip VMs
- make sure we can build 32-bit KVM/ARM with gcc-8.
x86:
- fixes for AMD SEV
- fixes for Intel nested VMX, emulated UMIP and a dump_stack() on VM startup
- fixes for async page fault migration
- small optimization to PV TLB flush (new in 4.16-rc1)
- syzkaller fixes
Generic:
- compiler warning fixes
- syzkaller fixes
- more improvements to the kvm_stat tool
Two more small Spectre fixes are going to reach you via Ingo"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (40 commits)
KVM: SVM: Fix SEV LAUNCH_SECRET command
KVM: SVM: install RSM intercept
KVM: SVM: no need to call access_ok() in LAUNCH_MEASURE command
include: psp-sev: Capitalize invalid length enum
crypto: ccp: Fix sparse, use plain integer as NULL pointer
KVM: X86: Avoid traversing all the cpus for pv tlb flush when steal time is disabled
x86/kvm: Make parse_no_xxx __init for kvm
KVM: x86: fix backward migration with async_PF
kvm: fix warning for non-x86 builds
kvm: fix warning for CONFIG_HAVE_KVM_EVENTFD builds
tools/kvm_stat: print 'Total' line for multiple events only
tools/kvm_stat: group child events indented after parent
tools/kvm_stat: separate drilldown and fields filtering
tools/kvm_stat: eliminate extra guest/pid selection dialog
tools/kvm_stat: mark private methods as such
tools/kvm_stat: fix debugfs handling
tools/kvm_stat: print error on invalid regex
tools/kvm_stat: fix crash when filtering out all non-child trace events
tools/kvm_stat: avoid 'is' for equality checks
tools/kvm_stat: use a more pythonic way to iterate over dictionaries
...
| -rw-r--r-- | Documentation/virtual/kvm/cpuid.txt | 4 | ||||
| -rw-r--r-- | Documentation/virtual/kvm/msr.txt | 3 | ||||
| -rw-r--r-- | arch/arm/kvm/hyp/Makefile | 5 | ||||
| -rw-r--r-- | arch/arm/kvm/hyp/banked-sr.c | 4 | ||||
| -rw-r--r-- | arch/s390/kvm/intercept.c | 51 | ||||
| -rw-r--r-- | arch/s390/kvm/interrupt.c | 123 | ||||
| -rw-r--r-- | arch/s390/kvm/kvm-s390.c | 79 | ||||
| -rw-r--r-- | arch/s390/kvm/kvm-s390.h | 7 | ||||
| -rw-r--r-- | arch/s390/kvm/priv.c | 192 | ||||
| -rw-r--r-- | arch/s390/kvm/vsie.c | 20 | ||||
| -rw-r--r-- | arch/x86/include/asm/kvm_host.h | 3 | ||||
| -rw-r--r-- | arch/x86/include/uapi/asm/kvm_para.h | 1 | ||||
| -rw-r--r-- | arch/x86/kernel/kvm.c | 20 | ||||
| -rw-r--r-- | arch/x86/kvm/cpuid.c | 3 | ||||
| -rw-r--r-- | arch/x86/kvm/lapic.c | 1 | ||||
| -rw-r--r-- | arch/x86/kvm/mmu.c | 2 | ||||
| -rw-r--r-- | arch/x86/kvm/svm.c | 37 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx.c | 10 | ||||
| -rw-r--r-- | arch/x86/kvm/x86.c | 7 | ||||
| -rw-r--r-- | drivers/crypto/ccp/psp-dev.c | 8 | ||||
| -rw-r--r-- | include/linux/kvm_host.h | 6 | ||||
| -rw-r--r-- | include/uapi/linux/psp-sev.h | 2 | ||||
| -rwxr-xr-x | tools/kvm/kvm_stat/kvm_stat | 503 | ||||
| -rw-r--r-- | tools/kvm/kvm_stat/kvm_stat.txt | 4 | ||||
| -rw-r--r-- | virt/kvm/arm/arch_timer.c | 116 | ||||
| -rw-r--r-- | virt/kvm/kvm_main.c | 3 |
26 files changed, 698 insertions, 516 deletions
diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt index dcab6dc11e3b..87a7506f31c2 100644 --- a/Documentation/virtual/kvm/cpuid.txt +++ b/Documentation/virtual/kvm/cpuid.txt | |||
| @@ -58,6 +58,10 @@ KVM_FEATURE_PV_TLB_FLUSH || 9 || guest checks this feature bit | |||
| 58 | || || before enabling paravirtualized | 58 | || || before enabling paravirtualized |
| 59 | || || tlb flush. | 59 | || || tlb flush. |
| 60 | ------------------------------------------------------------------------------ | 60 | ------------------------------------------------------------------------------ |
| 61 | KVM_FEATURE_ASYNC_PF_VMEXIT || 10 || paravirtualized async PF VM exit | ||
| 62 | || || can be enabled by setting bit 2 | ||
| 63 | || || when writing to msr 0x4b564d02 | ||
| 64 | ------------------------------------------------------------------------------ | ||
| 61 | KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side | 65 | KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side |
| 62 | || || per-cpu warps are expected in | 66 | || || per-cpu warps are expected in |
| 63 | || || kvmclock. | 67 | || || kvmclock. |
diff --git a/Documentation/virtual/kvm/msr.txt b/Documentation/virtual/kvm/msr.txt index 1ebecc115dc6..f3f0d57ced8e 100644 --- a/Documentation/virtual/kvm/msr.txt +++ b/Documentation/virtual/kvm/msr.txt | |||
| @@ -170,7 +170,8 @@ MSR_KVM_ASYNC_PF_EN: 0x4b564d02 | |||
| 170 | when asynchronous page faults are enabled on the vcpu 0 when | 170 | when asynchronous page faults are enabled on the vcpu 0 when |
| 171 | disabled. Bit 1 is 1 if asynchronous page faults can be injected | 171 | disabled. Bit 1 is 1 if asynchronous page faults can be injected |
| 172 | when vcpu is in cpl == 0. Bit 2 is 1 if asynchronous page faults | 172 | when vcpu is in cpl == 0. Bit 2 is 1 if asynchronous page faults |
| 173 | are delivered to L1 as #PF vmexits. | 173 | are delivered to L1 as #PF vmexits. Bit 2 can be set only if |
| 174 | KVM_FEATURE_ASYNC_PF_VMEXIT is present in CPUID. | ||
| 174 | 175 | ||
| 175 | First 4 byte of 64 byte memory location will be written to by | 176 | First 4 byte of 64 byte memory location will be written to by |
| 176 | the hypervisor at the time of asynchronous page fault (APF) | 177 | the hypervisor at the time of asynchronous page fault (APF) |
diff --git a/arch/arm/kvm/hyp/Makefile b/arch/arm/kvm/hyp/Makefile index 5638ce0c9524..63d6b404d88e 100644 --- a/arch/arm/kvm/hyp/Makefile +++ b/arch/arm/kvm/hyp/Makefile | |||
| @@ -7,6 +7,8 @@ ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING | |||
| 7 | 7 | ||
| 8 | KVM=../../../../virt/kvm | 8 | KVM=../../../../virt/kvm |
| 9 | 9 | ||
| 10 | CFLAGS_ARMV7VE :=$(call cc-option, -march=armv7ve) | ||
| 11 | |||
| 10 | obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o | 12 | obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o |
| 11 | obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o | 13 | obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o |
| 12 | obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o | 14 | obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o |
| @@ -15,7 +17,10 @@ obj-$(CONFIG_KVM_ARM_HOST) += tlb.o | |||
| 15 | obj-$(CONFIG_KVM_ARM_HOST) += cp15-sr.o | 17 | obj-$(CONFIG_KVM_ARM_HOST) += cp15-sr.o |
| 16 | obj-$(CONFIG_KVM_ARM_HOST) += vfp.o | 18 | obj-$(CONFIG_KVM_ARM_HOST) += vfp.o |
| 17 | obj-$(CONFIG_KVM_ARM_HOST) += banked-sr.o | 19 | obj-$(CONFIG_KVM_ARM_HOST) += banked-sr.o |
| 20 | CFLAGS_banked-sr.o += $(CFLAGS_ARMV7VE) | ||
| 21 | |||
| 18 | obj-$(CONFIG_KVM_ARM_HOST) += entry.o | 22 | obj-$(CONFIG_KVM_ARM_HOST) += entry.o |
| 19 | obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o | 23 | obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o |
| 20 | obj-$(CONFIG_KVM_ARM_HOST) += switch.o | 24 | obj-$(CONFIG_KVM_ARM_HOST) += switch.o |
| 25 | CFLAGS_switch.o += $(CFLAGS_ARMV7VE) | ||
| 21 | obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o | 26 | obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o |
diff --git a/arch/arm/kvm/hyp/banked-sr.c b/arch/arm/kvm/hyp/banked-sr.c index 111bda8cdebd..be4b8b0a40ad 100644 --- a/arch/arm/kvm/hyp/banked-sr.c +++ b/arch/arm/kvm/hyp/banked-sr.c | |||
| @@ -20,6 +20,10 @@ | |||
| 20 | 20 | ||
| 21 | #include <asm/kvm_hyp.h> | 21 | #include <asm/kvm_hyp.h> |
| 22 | 22 | ||
| 23 | /* | ||
| 24 | * gcc before 4.9 doesn't understand -march=armv7ve, so we have to | ||
| 25 | * trick the assembler. | ||
| 26 | */ | ||
| 23 | __asm__(".arch_extension virt"); | 27 | __asm__(".arch_extension virt"); |
| 24 | 28 | ||
| 25 | void __hyp_text __banked_save_state(struct kvm_cpu_context *ctxt) | 29 | void __hyp_text __banked_save_state(struct kvm_cpu_context *ctxt) |
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 9c7d70715862..07c6e81163bf 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c | |||
| @@ -22,22 +22,6 @@ | |||
| 22 | #include "trace.h" | 22 | #include "trace.h" |
| 23 | #include "trace-s390.h" | 23 | #include "trace-s390.h" |
| 24 | 24 | ||
| 25 | |||
| 26 | static const intercept_handler_t instruction_handlers[256] = { | ||
| 27 | [0x01] = kvm_s390_handle_01, | ||
| 28 | [0x82] = kvm_s390_handle_lpsw, | ||
| 29 | [0x83] = kvm_s390_handle_diag, | ||
| 30 | [0xaa] = kvm_s390_handle_aa, | ||
| 31 | [0xae] = kvm_s390_handle_sigp, | ||
| 32 | [0xb2] = kvm_s390_handle_b2, | ||
| 33 | [0xb6] = kvm_s390_handle_stctl, | ||
| 34 | [0xb7] = kvm_s390_handle_lctl, | ||
| 35 | [0xb9] = kvm_s390_handle_b9, | ||
| 36 | [0xe3] = kvm_s390_handle_e3, | ||
| 37 | [0xe5] = kvm_s390_handle_e5, | ||
| 38 | [0xeb] = kvm_s390_handle_eb, | ||
| 39 | }; | ||
| 40 | |||
| 41 | u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu) | 25 | u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu) |
| 42 | { | 26 | { |
| 43 | struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block; | 27 | struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block; |
| @@ -129,16 +113,39 @@ static int handle_validity(struct kvm_vcpu *vcpu) | |||
| 129 | 113 | ||
| 130 | static int handle_instruction(struct kvm_vcpu *vcpu) | 114 | static int handle_instruction(struct kvm_vcpu *vcpu) |
| 131 | { | 115 | { |
| 132 | intercept_handler_t handler; | ||
| 133 | |||
| 134 | vcpu->stat.exit_instruction++; | 116 | vcpu->stat.exit_instruction++; |
| 135 | trace_kvm_s390_intercept_instruction(vcpu, | 117 | trace_kvm_s390_intercept_instruction(vcpu, |
| 136 | vcpu->arch.sie_block->ipa, | 118 | vcpu->arch.sie_block->ipa, |
| 137 | vcpu->arch.sie_block->ipb); | 119 | vcpu->arch.sie_block->ipb); |
| 138 | handler = instruction_handlers[vcpu->arch.sie_block->ipa >> 8]; | 120 | |
| 139 | if (handler) | 121 | switch (vcpu->arch.sie_block->ipa >> 8) { |
| 140 | return handler(vcpu); | 122 | case 0x01: |
| 141 | return -EOPNOTSUPP; | 123 | return kvm_s390_handle_01(vcpu); |
| 124 | case 0x82: | ||
| 125 | return kvm_s390_handle_lpsw(vcpu); | ||
| 126 | case 0x83: | ||
| 127 | return kvm_s390_handle_diag(vcpu); | ||
| 128 | case 0xaa: | ||
| 129 | return kvm_s390_handle_aa(vcpu); | ||
| 130 | case 0xae: | ||
| 131 | return kvm_s390_handle_sigp(vcpu); | ||
| 132 | case 0xb2: | ||
| 133 | return kvm_s390_handle_b2(vcpu); | ||
| 134 | case 0xb6: | ||
| 135 | return kvm_s390_handle_stctl(vcpu); | ||
| 136 | case 0xb7: | ||
| 137 | return kvm_s390_handle_lctl(vcpu); | ||
| 138 | case 0xb9: | ||
| 139 | return kvm_s390_handle_b9(vcpu); | ||
| 140 | case 0xe3: | ||
| 141 | return kvm_s390_handle_e3(vcpu); | ||
| 142 | case 0xe5: | ||
| 143 | return kvm_s390_handle_e5(vcpu); | ||
| 144 | case 0xeb: | ||
| 145 | return kvm_s390_handle_eb(vcpu); | ||
| 146 | default: | ||
| 147 | return -EOPNOTSUPP; | ||
| 148 | } | ||
| 142 | } | 149 | } |
| 143 | 150 | ||
| 144 | static int inject_prog_on_prog_intercept(struct kvm_vcpu *vcpu) | 151 | static int inject_prog_on_prog_intercept(struct kvm_vcpu *vcpu) |
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index aabf46f5f883..b04616b57a94 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c | |||
| @@ -169,8 +169,15 @@ static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu) | |||
| 169 | 169 | ||
| 170 | static int ckc_irq_pending(struct kvm_vcpu *vcpu) | 170 | static int ckc_irq_pending(struct kvm_vcpu *vcpu) |
| 171 | { | 171 | { |
| 172 | if (vcpu->arch.sie_block->ckc >= kvm_s390_get_tod_clock_fast(vcpu->kvm)) | 172 | const u64 now = kvm_s390_get_tod_clock_fast(vcpu->kvm); |
| 173 | const u64 ckc = vcpu->arch.sie_block->ckc; | ||
| 174 | |||
| 175 | if (vcpu->arch.sie_block->gcr[0] & 0x0020000000000000ul) { | ||
| 176 | if ((s64)ckc >= (s64)now) | ||
| 177 | return 0; | ||
| 178 | } else if (ckc >= now) { | ||
| 173 | return 0; | 179 | return 0; |
| 180 | } | ||
| 174 | return ckc_interrupts_enabled(vcpu); | 181 | return ckc_interrupts_enabled(vcpu); |
| 175 | } | 182 | } |
| 176 | 183 | ||
| @@ -187,12 +194,6 @@ static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu) | |||
| 187 | return kvm_s390_get_cpu_timer(vcpu) >> 63; | 194 | return kvm_s390_get_cpu_timer(vcpu) >> 63; |
| 188 | } | 195 | } |
| 189 | 196 | ||
| 190 | static inline int is_ioirq(unsigned long irq_type) | ||
| 191 | { | ||
| 192 | return ((irq_type >= IRQ_PEND_IO_ISC_7) && | ||
| 193 | (irq_type <= IRQ_PEND_IO_ISC_0)); | ||
| 194 | } | ||
| 195 | |||
| 196 | static uint64_t isc_to_isc_bits(int isc) | 197 | static uint64_t isc_to_isc_bits(int isc) |
| 197 | { | 198 | { |
| 198 | return (0x80 >> isc) << 24; | 199 | return (0x80 >> isc) << 24; |
| @@ -236,10 +237,15 @@ static inline int kvm_s390_gisa_tac_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gis | |||
| 236 | return test_and_clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa); | 237 | return test_and_clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa); |
| 237 | } | 238 | } |
| 238 | 239 | ||
| 239 | static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu) | 240 | static inline unsigned long pending_irqs_no_gisa(struct kvm_vcpu *vcpu) |
| 240 | { | 241 | { |
| 241 | return vcpu->kvm->arch.float_int.pending_irqs | | 242 | return vcpu->kvm->arch.float_int.pending_irqs | |
| 242 | vcpu->arch.local_int.pending_irqs | | 243 | vcpu->arch.local_int.pending_irqs; |
| 244 | } | ||
| 245 | |||
| 246 | static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu) | ||
| 247 | { | ||
| 248 | return pending_irqs_no_gisa(vcpu) | | ||
| 243 | kvm_s390_gisa_get_ipm(vcpu->kvm->arch.gisa) << IRQ_PEND_IO_ISC_7; | 249 | kvm_s390_gisa_get_ipm(vcpu->kvm->arch.gisa) << IRQ_PEND_IO_ISC_7; |
| 244 | } | 250 | } |
| 245 | 251 | ||
| @@ -337,7 +343,7 @@ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu) | |||
| 337 | 343 | ||
| 338 | static void set_intercept_indicators_io(struct kvm_vcpu *vcpu) | 344 | static void set_intercept_indicators_io(struct kvm_vcpu *vcpu) |
| 339 | { | 345 | { |
| 340 | if (!(pending_irqs(vcpu) & IRQ_PEND_IO_MASK)) | 346 | if (!(pending_irqs_no_gisa(vcpu) & IRQ_PEND_IO_MASK)) |
| 341 | return; | 347 | return; |
| 342 | else if (psw_ioint_disabled(vcpu)) | 348 | else if (psw_ioint_disabled(vcpu)) |
| 343 | kvm_s390_set_cpuflags(vcpu, CPUSTAT_IO_INT); | 349 | kvm_s390_set_cpuflags(vcpu, CPUSTAT_IO_INT); |
| @@ -1011,24 +1017,6 @@ out: | |||
| 1011 | return rc; | 1017 | return rc; |
| 1012 | } | 1018 | } |
| 1013 | 1019 | ||
| 1014 | typedef int (*deliver_irq_t)(struct kvm_vcpu *vcpu); | ||
| 1015 | |||
| 1016 | static const deliver_irq_t deliver_irq_funcs[] = { | ||
| 1017 | [IRQ_PEND_MCHK_EX] = __deliver_machine_check, | ||
| 1018 | [IRQ_PEND_MCHK_REP] = __deliver_machine_check, | ||
| 1019 | [IRQ_PEND_PROG] = __deliver_prog, | ||
| 1020 | [IRQ_PEND_EXT_EMERGENCY] = __deliver_emergency_signal, | ||
| 1021 | [IRQ_PEND_EXT_EXTERNAL] = __deliver_external_call, | ||
| 1022 | [IRQ_PEND_EXT_CLOCK_COMP] = __deliver_ckc, | ||
| 1023 | [IRQ_PEND_EXT_CPU_TIMER] = __deliver_cpu_timer, | ||
| 1024 | [IRQ_PEND_RESTART] = __deliver_restart, | ||
| 1025 | [IRQ_PEND_SET_PREFIX] = __deliver_set_prefix, | ||
| 1026 | [IRQ_PEND_PFAULT_INIT] = __deliver_pfault_init, | ||
| 1027 | [IRQ_PEND_EXT_SERVICE] = __deliver_service, | ||
| 1028 | [IRQ_PEND_PFAULT_DONE] = __deliver_pfault_done, | ||
| 1029 | [IRQ_PEND_VIRTIO] = __deliver_virtio, | ||
| 1030 | }; | ||
| 1031 | |||
| 1032 | /* Check whether an external call is pending (deliverable or not) */ | 1020 | /* Check whether an external call is pending (deliverable or not) */ |
| 1033 | int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu) | 1021 | int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu) |
| 1034 | { | 1022 | { |
| @@ -1066,13 +1054,19 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) | |||
| 1066 | 1054 | ||
| 1067 | static u64 __calculate_sltime(struct kvm_vcpu *vcpu) | 1055 | static u64 __calculate_sltime(struct kvm_vcpu *vcpu) |
| 1068 | { | 1056 | { |
| 1069 | u64 now, cputm, sltime = 0; | 1057 | const u64 now = kvm_s390_get_tod_clock_fast(vcpu->kvm); |
| 1058 | const u64 ckc = vcpu->arch.sie_block->ckc; | ||
| 1059 | u64 cputm, sltime = 0; | ||
| 1070 | 1060 | ||
| 1071 | if (ckc_interrupts_enabled(vcpu)) { | 1061 | if (ckc_interrupts_enabled(vcpu)) { |
| 1072 | now = kvm_s390_get_tod_clock_fast(vcpu->kvm); | 1062 | if (vcpu->arch.sie_block->gcr[0] & 0x0020000000000000ul) { |
| 1073 | sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); | 1063 | if ((s64)now < (s64)ckc) |
| 1074 | /* already expired or overflow? */ | 1064 | sltime = tod_to_ns((s64)ckc - (s64)now); |
| 1075 | if (!sltime || vcpu->arch.sie_block->ckc <= now) | 1065 | } else if (now < ckc) { |
| 1066 | sltime = tod_to_ns(ckc - now); | ||
| 1067 | } | ||
| 1068 | /* already expired */ | ||
| 1069 | if (!sltime) | ||
| 1076 | return 0; | 1070 | return 0; |
| 1077 | if (cpu_timer_interrupts_enabled(vcpu)) { | 1071 | if (cpu_timer_interrupts_enabled(vcpu)) { |
| 1078 | cputm = kvm_s390_get_cpu_timer(vcpu); | 1072 | cputm = kvm_s390_get_cpu_timer(vcpu); |
| @@ -1192,7 +1186,6 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu) | |||
| 1192 | int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) | 1186 | int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) |
| 1193 | { | 1187 | { |
| 1194 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; | 1188 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; |
| 1195 | deliver_irq_t func; | ||
| 1196 | int rc = 0; | 1189 | int rc = 0; |
| 1197 | unsigned long irq_type; | 1190 | unsigned long irq_type; |
| 1198 | unsigned long irqs; | 1191 | unsigned long irqs; |
| @@ -1212,16 +1205,57 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) | |||
| 1212 | while ((irqs = deliverable_irqs(vcpu)) && !rc) { | 1205 | while ((irqs = deliverable_irqs(vcpu)) && !rc) { |
| 1213 | /* bits are in the reverse order of interrupt priority */ | 1206 | /* bits are in the reverse order of interrupt priority */ |
| 1214 | irq_type = find_last_bit(&irqs, IRQ_PEND_COUNT); | 1207 | irq_type = find_last_bit(&irqs, IRQ_PEND_COUNT); |
| 1215 | if (is_ioirq(irq_type)) { | 1208 | switch (irq_type) { |
| 1209 | case IRQ_PEND_IO_ISC_0: | ||
| 1210 | case IRQ_PEND_IO_ISC_1: | ||
| 1211 | case IRQ_PEND_IO_ISC_2: | ||
| 1212 | case IRQ_PEND_IO_ISC_3: | ||
| 1213 | case IRQ_PEND_IO_ISC_4: | ||
| 1214 | case IRQ_PEND_IO_ISC_5: | ||
| 1215 | case IRQ_PEND_IO_ISC_6: | ||
| 1216 | case IRQ_PEND_IO_ISC_7: | ||
| 1216 | rc = __deliver_io(vcpu, irq_type); | 1217 | rc = __deliver_io(vcpu, irq_type); |
| 1217 | } else { | 1218 | break; |
| 1218 | func = deliver_irq_funcs[irq_type]; | 1219 | case IRQ_PEND_MCHK_EX: |
| 1219 | if (!func) { | 1220 | case IRQ_PEND_MCHK_REP: |
| 1220 | WARN_ON_ONCE(func == NULL); | 1221 | rc = __deliver_machine_check(vcpu); |
| 1221 | clear_bit(irq_type, &li->pending_irqs); | 1222 | break; |
| 1222 | continue; | 1223 | case IRQ_PEND_PROG: |
| 1223 | } | 1224 | rc = __deliver_prog(vcpu); |
| 1224 | rc = func(vcpu); | 1225 | break; |
| 1226 | case IRQ_PEND_EXT_EMERGENCY: | ||
| 1227 | rc = __deliver_emergency_signal(vcpu); | ||
| 1228 | break; | ||
| 1229 | case IRQ_PEND_EXT_EXTERNAL: | ||
| 1230 | rc = __deliver_external_call(vcpu); | ||
| 1231 | break; | ||
| 1232 | case IRQ_PEND_EXT_CLOCK_COMP: | ||
| 1233 | rc = __deliver_ckc(vcpu); | ||
| 1234 | break; | ||
| 1235 | case IRQ_PEND_EXT_CPU_TIMER: | ||
| 1236 | rc = __deliver_cpu_timer(vcpu); | ||
| 1237 | break; | ||
| 1238 | case IRQ_PEND_RESTART: | ||
| 1239 | rc = __deliver_restart(vcpu); | ||
| 1240 | break; | ||
| 1241 | case IRQ_PEND_SET_PREFIX: | ||
| 1242 | rc = __deliver_set_prefix(vcpu); | ||
| 1243 | break; | ||
| 1244 | case IRQ_PEND_PFAULT_INIT: | ||
| 1245 | rc = __deliver_pfault_init(vcpu); | ||
| 1246 | break; | ||
| 1247 | case IRQ_PEND_EXT_SERVICE: | ||
| 1248 | rc = __deliver_service(vcpu); | ||
| 1249 | break; | ||
| 1250 | case IRQ_PEND_PFAULT_DONE: | ||
| 1251 | rc = __deliver_pfault_done(vcpu); | ||
| 1252 | break; | ||
| 1253 | case IRQ_PEND_VIRTIO: | ||
| 1254 | rc = __deliver_virtio(vcpu); | ||
| 1255 | break; | ||
| 1256 | default: | ||
| 1257 | WARN_ONCE(1, "Unknown pending irq type %ld", irq_type); | ||
| 1258 | clear_bit(irq_type, &li->pending_irqs); | ||
| 1225 | } | 1259 | } |
| 1226 | } | 1260 | } |
| 1227 | 1261 | ||
| @@ -1701,7 +1735,8 @@ static void __floating_irq_kick(struct kvm *kvm, u64 type) | |||
| 1701 | kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_STOP_INT); | 1735 | kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_STOP_INT); |
| 1702 | break; | 1736 | break; |
| 1703 | case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: | 1737 | case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: |
| 1704 | kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT); | 1738 | if (!(type & KVM_S390_INT_IO_AI_MASK && kvm->arch.gisa)) |
| 1739 | kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT); | ||
| 1705 | break; | 1740 | break; |
| 1706 | default: | 1741 | default: |
| 1707 | kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_EXT_INT); | 1742 | kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_EXT_INT); |
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index ba4c7092335a..77d7818130db 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c | |||
| @@ -179,6 +179,28 @@ int kvm_arch_hardware_enable(void) | |||
| 179 | static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, | 179 | static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, |
| 180 | unsigned long end); | 180 | unsigned long end); |
| 181 | 181 | ||
| 182 | static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta) | ||
| 183 | { | ||
| 184 | u8 delta_idx = 0; | ||
| 185 | |||
| 186 | /* | ||
| 187 | * The TOD jumps by delta, we have to compensate this by adding | ||
| 188 | * -delta to the epoch. | ||
| 189 | */ | ||
| 190 | delta = -delta; | ||
| 191 | |||
| 192 | /* sign-extension - we're adding to signed values below */ | ||
| 193 | if ((s64)delta < 0) | ||
| 194 | delta_idx = -1; | ||
| 195 | |||
| 196 | scb->epoch += delta; | ||
| 197 | if (scb->ecd & ECD_MEF) { | ||
| 198 | scb->epdx += delta_idx; | ||
| 199 | if (scb->epoch < delta) | ||
| 200 | scb->epdx += 1; | ||
| 201 | } | ||
| 202 | } | ||
| 203 | |||
| 182 | /* | 204 | /* |
| 183 | * This callback is executed during stop_machine(). All CPUs are therefore | 205 | * This callback is executed during stop_machine(). All CPUs are therefore |
| 184 | * temporarily stopped. In order not to change guest behavior, we have to | 206 | * temporarily stopped. In order not to change guest behavior, we have to |
| @@ -194,13 +216,17 @@ static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val, | |||
| 194 | unsigned long long *delta = v; | 216 | unsigned long long *delta = v; |
| 195 | 217 | ||
| 196 | list_for_each_entry(kvm, &vm_list, vm_list) { | 218 | list_for_each_entry(kvm, &vm_list, vm_list) { |
| 197 | kvm->arch.epoch -= *delta; | ||
| 198 | kvm_for_each_vcpu(i, vcpu, kvm) { | 219 | kvm_for_each_vcpu(i, vcpu, kvm) { |
| 199 | vcpu->arch.sie_block->epoch -= *delta; | 220 | kvm_clock_sync_scb(vcpu->arch.sie_block, *delta); |
| 221 | if (i == 0) { | ||
| 222 | kvm->arch.epoch = vcpu->arch.sie_block->epoch; | ||
| 223 | kvm->arch.epdx = vcpu->arch.sie_block->epdx; | ||
| 224 | } | ||
| 200 | if (vcpu->arch.cputm_enabled) | 225 | if (vcpu->arch.cputm_enabled) |
| 201 | vcpu->arch.cputm_start += *delta; | 226 | vcpu->arch.cputm_start += *delta; |
| 202 | if (vcpu->arch.vsie_block) | 227 | if (vcpu->arch.vsie_block) |
| 203 | vcpu->arch.vsie_block->epoch -= *delta; | 228 | kvm_clock_sync_scb(vcpu->arch.vsie_block, |
| 229 | *delta); | ||
| 204 | } | 230 | } |
| 205 | } | 231 | } |
| 206 | return NOTIFY_OK; | 232 | return NOTIFY_OK; |
| @@ -902,12 +928,9 @@ static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) | |||
| 902 | if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) | 928 | if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) |
| 903 | return -EFAULT; | 929 | return -EFAULT; |
| 904 | 930 | ||
| 905 | if (test_kvm_facility(kvm, 139)) | 931 | if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx) |
| 906 | kvm_s390_set_tod_clock_ext(kvm, >od); | ||
| 907 | else if (gtod.epoch_idx == 0) | ||
| 908 | kvm_s390_set_tod_clock(kvm, gtod.tod); | ||
| 909 | else | ||
| 910 | return -EINVAL; | 932 | return -EINVAL; |
| 933 | kvm_s390_set_tod_clock(kvm, >od); | ||
| 911 | 934 | ||
| 912 | VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx", | 935 | VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx", |
| 913 | gtod.epoch_idx, gtod.tod); | 936 | gtod.epoch_idx, gtod.tod); |
| @@ -932,13 +955,14 @@ static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) | |||
| 932 | 955 | ||
| 933 | static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) | 956 | static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) |
| 934 | { | 957 | { |
| 935 | u64 gtod; | 958 | struct kvm_s390_vm_tod_clock gtod = { 0 }; |
| 936 | 959 | ||
| 937 | if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) | 960 | if (copy_from_user(>od.tod, (void __user *)attr->addr, |
| 961 | sizeof(gtod.tod))) | ||
| 938 | return -EFAULT; | 962 | return -EFAULT; |
| 939 | 963 | ||
| 940 | kvm_s390_set_tod_clock(kvm, gtod); | 964 | kvm_s390_set_tod_clock(kvm, >od); |
| 941 | VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod); | 965 | VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod); |
| 942 | return 0; | 966 | return 0; |
| 943 | } | 967 | } |
| 944 | 968 | ||
| @@ -2389,6 +2413,7 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) | |||
| 2389 | mutex_lock(&vcpu->kvm->lock); | 2413 | mutex_lock(&vcpu->kvm->lock); |
| 2390 | preempt_disable(); | 2414 | preempt_disable(); |
| 2391 | vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; | 2415 | vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; |
| 2416 | vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx; | ||
| 2392 | preempt_enable(); | 2417 | preempt_enable(); |
| 2393 | mutex_unlock(&vcpu->kvm->lock); | 2418 | mutex_unlock(&vcpu->kvm->lock); |
| 2394 | if (!kvm_is_ucontrol(vcpu->kvm)) { | 2419 | if (!kvm_is_ucontrol(vcpu->kvm)) { |
| @@ -3021,8 +3046,8 @@ retry: | |||
| 3021 | return 0; | 3046 | return 0; |
| 3022 | } | 3047 | } |
| 3023 | 3048 | ||
| 3024 | void kvm_s390_set_tod_clock_ext(struct kvm *kvm, | 3049 | void kvm_s390_set_tod_clock(struct kvm *kvm, |
| 3025 | const struct kvm_s390_vm_tod_clock *gtod) | 3050 | const struct kvm_s390_vm_tod_clock *gtod) |
| 3026 | { | 3051 | { |
| 3027 | struct kvm_vcpu *vcpu; | 3052 | struct kvm_vcpu *vcpu; |
| 3028 | struct kvm_s390_tod_clock_ext htod; | 3053 | struct kvm_s390_tod_clock_ext htod; |
| @@ -3034,10 +3059,12 @@ void kvm_s390_set_tod_clock_ext(struct kvm *kvm, | |||
| 3034 | get_tod_clock_ext((char *)&htod); | 3059 | get_tod_clock_ext((char *)&htod); |
| 3035 | 3060 | ||
| 3036 | kvm->arch.epoch = gtod->tod - htod.tod; | 3061 | kvm->arch.epoch = gtod->tod - htod.tod; |
| 3037 | kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx; | 3062 | kvm->arch.epdx = 0; |
| 3038 | 3063 | if (test_kvm_facility(kvm, 139)) { | |
| 3039 | if (kvm->arch.epoch > gtod->tod) | 3064 | kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx; |
| 3040 | kvm->arch.epdx -= 1; | 3065 | if (kvm->arch.epoch > gtod->tod) |
| 3066 | kvm->arch.epdx -= 1; | ||
| 3067 | } | ||
| 3041 | 3068 | ||
| 3042 | kvm_s390_vcpu_block_all(kvm); | 3069 | kvm_s390_vcpu_block_all(kvm); |
| 3043 | kvm_for_each_vcpu(i, vcpu, kvm) { | 3070 | kvm_for_each_vcpu(i, vcpu, kvm) { |
| @@ -3050,22 +3077,6 @@ void kvm_s390_set_tod_clock_ext(struct kvm *kvm, | |||
| 3050 | mutex_unlock(&kvm->lock); | 3077 | mutex_unlock(&kvm->lock); |
| 3051 | } | 3078 | } |
| 3052 | 3079 | ||
| 3053 | void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod) | ||
| 3054 | { | ||
| 3055 | struct kvm_vcpu *vcpu; | ||
| 3056 | int i; | ||
| 3057 | |||
| 3058 | mutex_lock(&kvm->lock); | ||
| 3059 | preempt_disable(); | ||
| 3060 | kvm->arch.epoch = tod - get_tod_clock(); | ||
| 3061 | kvm_s390_vcpu_block_all(kvm); | ||
| 3062 | kvm_for_each_vcpu(i, vcpu, kvm) | ||
| 3063 | vcpu->arch.sie_block->epoch = kvm->arch.epoch; | ||
| 3064 | kvm_s390_vcpu_unblock_all(kvm); | ||
| 3065 | preempt_enable(); | ||
| 3066 | mutex_unlock(&kvm->lock); | ||
| 3067 | } | ||
| 3068 | |||
| 3069 | /** | 3080 | /** |
| 3070 | * kvm_arch_fault_in_page - fault-in guest page if necessary | 3081 | * kvm_arch_fault_in_page - fault-in guest page if necessary |
| 3071 | * @vcpu: The corresponding virtual cpu | 3082 | * @vcpu: The corresponding virtual cpu |
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index bd31b37b0e6f..f55ac0ef99ea 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h | |||
| @@ -19,8 +19,6 @@ | |||
| 19 | #include <asm/processor.h> | 19 | #include <asm/processor.h> |
| 20 | #include <asm/sclp.h> | 20 | #include <asm/sclp.h> |
| 21 | 21 | ||
| 22 | typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); | ||
| 23 | |||
| 24 | /* Transactional Memory Execution related macros */ | 22 | /* Transactional Memory Execution related macros */ |
| 25 | #define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & ECB_TE)) | 23 | #define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & ECB_TE)) |
| 26 | #define TDB_FORMAT1 1 | 24 | #define TDB_FORMAT1 1 |
| @@ -283,9 +281,8 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); | |||
| 283 | int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu); | 281 | int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu); |
| 284 | 282 | ||
| 285 | /* implemented in kvm-s390.c */ | 283 | /* implemented in kvm-s390.c */ |
| 286 | void kvm_s390_set_tod_clock_ext(struct kvm *kvm, | 284 | void kvm_s390_set_tod_clock(struct kvm *kvm, |
| 287 | const struct kvm_s390_vm_tod_clock *gtod); | 285 | const struct kvm_s390_vm_tod_clock *gtod); |
| 288 | void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod); | ||
| 289 | long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable); | 286 | long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable); |
| 290 | int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); | 287 | int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); |
| 291 | int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr); | 288 | int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr); |
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index c4c4e157c036..f0b4185158af 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c | |||
| @@ -85,9 +85,10 @@ int kvm_s390_handle_e3(struct kvm_vcpu *vcpu) | |||
| 85 | /* Handle SCK (SET CLOCK) interception */ | 85 | /* Handle SCK (SET CLOCK) interception */ |
| 86 | static int handle_set_clock(struct kvm_vcpu *vcpu) | 86 | static int handle_set_clock(struct kvm_vcpu *vcpu) |
| 87 | { | 87 | { |
| 88 | struct kvm_s390_vm_tod_clock gtod = { 0 }; | ||
| 88 | int rc; | 89 | int rc; |
| 89 | u8 ar; | 90 | u8 ar; |
| 90 | u64 op2, val; | 91 | u64 op2; |
| 91 | 92 | ||
| 92 | vcpu->stat.instruction_sck++; | 93 | vcpu->stat.instruction_sck++; |
| 93 | 94 | ||
| @@ -97,12 +98,12 @@ static int handle_set_clock(struct kvm_vcpu *vcpu) | |||
| 97 | op2 = kvm_s390_get_base_disp_s(vcpu, &ar); | 98 | op2 = kvm_s390_get_base_disp_s(vcpu, &ar); |
| 98 | if (op2 & 7) /* Operand must be on a doubleword boundary */ | 99 | if (op2 & 7) /* Operand must be on a doubleword boundary */ |
| 99 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | 100 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); |
| 100 | rc = read_guest(vcpu, op2, ar, &val, sizeof(val)); | 101 | rc = read_guest(vcpu, op2, ar, >od.tod, sizeof(gtod.tod)); |
| 101 | if (rc) | 102 | if (rc) |
| 102 | return kvm_s390_inject_prog_cond(vcpu, rc); | 103 | return kvm_s390_inject_prog_cond(vcpu, rc); |
| 103 | 104 | ||
| 104 | VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", val); | 105 | VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", gtod.tod); |
| 105 | kvm_s390_set_tod_clock(vcpu->kvm, val); | 106 | kvm_s390_set_tod_clock(vcpu->kvm, >od); |
| 106 | 107 | ||
| 107 | kvm_s390_set_psw_cc(vcpu, 0); | 108 | kvm_s390_set_psw_cc(vcpu, 0); |
| 108 | return 0; | 109 | return 0; |
| @@ -795,55 +796,60 @@ out: | |||
| 795 | return rc; | 796 | return rc; |
| 796 | } | 797 | } |
| 797 | 798 | ||
| 798 | static const intercept_handler_t b2_handlers[256] = { | ||
| 799 | [0x02] = handle_stidp, | ||
| 800 | [0x04] = handle_set_clock, | ||
| 801 | [0x10] = handle_set_prefix, | ||
| 802 | [0x11] = handle_store_prefix, | ||
| 803 | [0x12] = handle_store_cpu_address, | ||
| 804 | [0x14] = kvm_s390_handle_vsie, | ||
| 805 | [0x21] = handle_ipte_interlock, | ||
| 806 | [0x29] = handle_iske, | ||
| 807 | [0x2a] = handle_rrbe, | ||
| 808 | [0x2b] = handle_sske, | ||
| 809 | [0x2c] = handle_test_block, | ||
| 810 | [0x30] = handle_io_inst, | ||
| 811 | [0x31] = handle_io_inst, | ||
| 812 | [0x32] = handle_io_inst, | ||
| 813 | [0x33] = handle_io_inst, | ||
| 814 | [0x34] = handle_io_inst, | ||
| 815 | [0x35] = handle_io_inst, | ||
| 816 | [0x36] = handle_io_inst, | ||
| 817 | [0x37] = handle_io_inst, | ||
| 818 | [0x38] = handle_io_inst, | ||
| 819 | [0x39] = handle_io_inst, | ||
| 820 | [0x3a] = handle_io_inst, | ||
| 821 | [0x3b] = handle_io_inst, | ||
| 822 | [0x3c] = handle_io_inst, | ||
| 823 | [0x50] = handle_ipte_interlock, | ||
| 824 | [0x56] = handle_sthyi, | ||
| 825 | [0x5f] = handle_io_inst, | ||
| 826 | [0x74] = handle_io_inst, | ||
| 827 | [0x76] = handle_io_inst, | ||
| 828 | [0x7d] = handle_stsi, | ||
| 829 | [0xb1] = handle_stfl, | ||
| 830 | [0xb2] = handle_lpswe, | ||
| 831 | }; | ||
| 832 | |||
| 833 | int kvm_s390_handle_b2(struct kvm_vcpu *vcpu) | 799 | int kvm_s390_handle_b2(struct kvm_vcpu *vcpu) |
| 834 | { | 800 | { |
| 835 | intercept_handler_t handler; | 801 | switch (vcpu->arch.sie_block->ipa & 0x00ff) { |
| 836 | 802 | case 0x02: | |
| 837 | /* | 803 | return handle_stidp(vcpu); |
| 838 | * A lot of B2 instructions are priviledged. Here we check for | 804 | case 0x04: |
| 839 | * the privileged ones, that we can handle in the kernel. | 805 | return handle_set_clock(vcpu); |
| 840 | * Anything else goes to userspace. | 806 | case 0x10: |
| 841 | */ | 807 | return handle_set_prefix(vcpu); |
| 842 | handler = b2_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; | 808 | case 0x11: |
| 843 | if (handler) | 809 | return handle_store_prefix(vcpu); |
| 844 | return handler(vcpu); | 810 | case 0x12: |
| 845 | 811 | return handle_store_cpu_address(vcpu); | |
| 846 | return -EOPNOTSUPP; | 812 | case 0x14: |
| 813 | return kvm_s390_handle_vsie(vcpu); | ||
| 814 | case 0x21: | ||
| 815 | case 0x50: | ||
| 816 | return handle_ipte_interlock(vcpu); | ||
| 817 | case 0x29: | ||
| 818 | return handle_iske(vcpu); | ||
| 819 | case 0x2a: | ||
| 820 | return handle_rrbe(vcpu); | ||
| 821 | case 0x2b: | ||
| 822 | return handle_sske(vcpu); | ||
| 823 | case 0x2c: | ||
| 824 | return handle_test_block(vcpu); | ||
| 825 | case 0x30: | ||
| 826 | case 0x31: | ||
| 827 | case 0x32: | ||
| 828 | case 0x33: | ||
| 829 | case 0x34: | ||
| 830 | case 0x35: | ||
| 831 | case 0x36: | ||
| 832 | case 0x37: | ||
| 833 | case 0x38: | ||
| 834 | case 0x39: | ||
| 835 | case 0x3a: | ||
| 836 | case 0x3b: | ||
| 837 | case 0x3c: | ||
| 838 | case 0x5f: | ||
| 839 | case 0x74: | ||
| 840 | case 0x76: | ||
| 841 | return handle_io_inst(vcpu); | ||
| 842 | case 0x56: | ||
| 843 | return handle_sthyi(vcpu); | ||
| 844 | case 0x7d: | ||
| 845 | return handle_stsi(vcpu); | ||
| 846 | case 0xb1: | ||
| 847 | return handle_stfl(vcpu); | ||
| 848 | case 0xb2: | ||
| 849 | return handle_lpswe(vcpu); | ||
| 850 | default: | ||
| 851 | return -EOPNOTSUPP; | ||
| 852 | } | ||
| 847 | } | 853 | } |
| 848 | 854 | ||
| 849 | static int handle_epsw(struct kvm_vcpu *vcpu) | 855 | static int handle_epsw(struct kvm_vcpu *vcpu) |
| @@ -1105,25 +1111,22 @@ static int handle_essa(struct kvm_vcpu *vcpu) | |||
| 1105 | return 0; | 1111 | return 0; |
| 1106 | } | 1112 | } |
| 1107 | 1113 | ||
| 1108 | static const intercept_handler_t b9_handlers[256] = { | ||
| 1109 | [0x8a] = handle_ipte_interlock, | ||
| 1110 | [0x8d] = handle_epsw, | ||
| 1111 | [0x8e] = handle_ipte_interlock, | ||
| 1112 | [0x8f] = handle_ipte_interlock, | ||
| 1113 | [0xab] = handle_essa, | ||
| 1114 | [0xaf] = handle_pfmf, | ||
| 1115 | }; | ||
| 1116 | |||
| 1117 | int kvm_s390_handle_b9(struct kvm_vcpu *vcpu) | 1114 | int kvm_s390_handle_b9(struct kvm_vcpu *vcpu) |
| 1118 | { | 1115 | { |
| 1119 | intercept_handler_t handler; | 1116 | switch (vcpu->arch.sie_block->ipa & 0x00ff) { |
| 1120 | 1117 | case 0x8a: | |
| 1121 | /* This is handled just as for the B2 instructions. */ | 1118 | case 0x8e: |
| 1122 | handler = b9_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; | 1119 | case 0x8f: |
| 1123 | if (handler) | 1120 | return handle_ipte_interlock(vcpu); |
| 1124 | return handler(vcpu); | 1121 | case 0x8d: |
| 1125 | 1122 | return handle_epsw(vcpu); | |
| 1126 | return -EOPNOTSUPP; | 1123 | case 0xab: |
| 1124 | return handle_essa(vcpu); | ||
| 1125 | case 0xaf: | ||
| 1126 | return handle_pfmf(vcpu); | ||
| 1127 | default: | ||
| 1128 | return -EOPNOTSUPP; | ||
| 1129 | } | ||
| 1127 | } | 1130 | } |
| 1128 | 1131 | ||
| 1129 | int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu) | 1132 | int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu) |
| @@ -1271,22 +1274,20 @@ static int handle_stctg(struct kvm_vcpu *vcpu) | |||
| 1271 | return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0; | 1274 | return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0; |
| 1272 | } | 1275 | } |
| 1273 | 1276 | ||
| 1274 | static const intercept_handler_t eb_handlers[256] = { | ||
| 1275 | [0x2f] = handle_lctlg, | ||
| 1276 | [0x25] = handle_stctg, | ||
| 1277 | [0x60] = handle_ri, | ||
| 1278 | [0x61] = handle_ri, | ||
| 1279 | [0x62] = handle_ri, | ||
| 1280 | }; | ||
| 1281 | |||
| 1282 | int kvm_s390_handle_eb(struct kvm_vcpu *vcpu) | 1277 | int kvm_s390_handle_eb(struct kvm_vcpu *vcpu) |
| 1283 | { | 1278 | { |
| 1284 | intercept_handler_t handler; | 1279 | switch (vcpu->arch.sie_block->ipb & 0x000000ff) { |
| 1285 | 1280 | case 0x25: | |
| 1286 | handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff]; | 1281 | return handle_stctg(vcpu); |
| 1287 | if (handler) | 1282 | case 0x2f: |
| 1288 | return handler(vcpu); | 1283 | return handle_lctlg(vcpu); |
| 1289 | return -EOPNOTSUPP; | 1284 | case 0x60: |
| 1285 | case 0x61: | ||
| 1286 | case 0x62: | ||
| 1287 | return handle_ri(vcpu); | ||
| 1288 | default: | ||
| 1289 | return -EOPNOTSUPP; | ||
| 1290 | } | ||
| 1290 | } | 1291 | } |
| 1291 | 1292 | ||
| 1292 | static int handle_tprot(struct kvm_vcpu *vcpu) | 1293 | static int handle_tprot(struct kvm_vcpu *vcpu) |
| @@ -1346,10 +1347,12 @@ out_unlock: | |||
| 1346 | 1347 | ||
| 1347 | int kvm_s390_handle_e5(struct kvm_vcpu *vcpu) | 1348 | int kvm_s390_handle_e5(struct kvm_vcpu *vcpu) |
| 1348 | { | 1349 | { |
| 1349 | /* For e5xx... instructions we only handle TPROT */ | 1350 | switch (vcpu->arch.sie_block->ipa & 0x00ff) { |
| 1350 | if ((vcpu->arch.sie_block->ipa & 0x00ff) == 0x01) | 1351 | case 0x01: |
| 1351 | return handle_tprot(vcpu); | 1352 | return handle_tprot(vcpu); |
| 1352 | return -EOPNOTSUPP; | 1353 | default: |
| 1354 | return -EOPNOTSUPP; | ||
| 1355 | } | ||
| 1353 | } | 1356 | } |
| 1354 | 1357 | ||
| 1355 | static int handle_sckpf(struct kvm_vcpu *vcpu) | 1358 | static int handle_sckpf(struct kvm_vcpu *vcpu) |
| @@ -1380,17 +1383,14 @@ static int handle_ptff(struct kvm_vcpu *vcpu) | |||
| 1380 | return 0; | 1383 | return 0; |
| 1381 | } | 1384 | } |
| 1382 | 1385 | ||
| 1383 | static const intercept_handler_t x01_handlers[256] = { | ||
| 1384 | [0x04] = handle_ptff, | ||
| 1385 | [0x07] = handle_sckpf, | ||
| 1386 | }; | ||
| 1387 | |||
| 1388 | int kvm_s390_handle_01(struct kvm_vcpu *vcpu) | 1386 | int kvm_s390_handle_01(struct kvm_vcpu *vcpu) |
| 1389 | { | 1387 | { |
| 1390 | intercept_handler_t handler; | 1388 | switch (vcpu->arch.sie_block->ipa & 0x00ff) { |
| 1391 | 1389 | case 0x04: | |
| 1392 | handler = x01_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; | 1390 | return handle_ptff(vcpu); |
| 1393 | if (handler) | 1391 | case 0x07: |
| 1394 | return handler(vcpu); | 1392 | return handle_sckpf(vcpu); |
| 1395 | return -EOPNOTSUPP; | 1393 | default: |
| 1394 | return -EOPNOTSUPP; | ||
| 1395 | } | ||
| 1396 | } | 1396 | } |
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index ec772700ff96..8961e3970901 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c | |||
| @@ -821,6 +821,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | |||
| 821 | { | 821 | { |
| 822 | struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; | 822 | struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; |
| 823 | struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; | 823 | struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; |
| 824 | int guest_bp_isolation; | ||
| 824 | int rc; | 825 | int rc; |
| 825 | 826 | ||
| 826 | handle_last_fault(vcpu, vsie_page); | 827 | handle_last_fault(vcpu, vsie_page); |
| @@ -831,6 +832,20 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | |||
| 831 | s390_handle_mcck(); | 832 | s390_handle_mcck(); |
| 832 | 833 | ||
| 833 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | 834 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); |
| 835 | |||
| 836 | /* save current guest state of bp isolation override */ | ||
| 837 | guest_bp_isolation = test_thread_flag(TIF_ISOLATE_BP_GUEST); | ||
| 838 | |||
| 839 | /* | ||
| 840 | * The guest is running with BPBC, so we have to force it on for our | ||
| 841 | * nested guest. This is done by enabling BPBC globally, so the BPBC | ||
| 842 | * control in the SCB (which the nested guest can modify) is simply | ||
| 843 | * ignored. | ||
| 844 | */ | ||
| 845 | if (test_kvm_facility(vcpu->kvm, 82) && | ||
| 846 | vcpu->arch.sie_block->fpf & FPF_BPBC) | ||
| 847 | set_thread_flag(TIF_ISOLATE_BP_GUEST); | ||
| 848 | |||
| 834 | local_irq_disable(); | 849 | local_irq_disable(); |
| 835 | guest_enter_irqoff(); | 850 | guest_enter_irqoff(); |
| 836 | local_irq_enable(); | 851 | local_irq_enable(); |
| @@ -840,6 +855,11 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | |||
| 840 | local_irq_disable(); | 855 | local_irq_disable(); |
| 841 | guest_exit_irqoff(); | 856 | guest_exit_irqoff(); |
| 842 | local_irq_enable(); | 857 | local_irq_enable(); |
| 858 | |||
| 859 | /* restore guest state for bp isolation override */ | ||
| 860 | if (!guest_bp_isolation) | ||
| 861 | clear_thread_flag(TIF_ISOLATE_BP_GUEST); | ||
| 862 | |||
| 843 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | 863 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); |
| 844 | 864 | ||
| 845 | if (rc == -EINTR) { | 865 | if (rc == -EINTR) { |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index dd6f57a54a26..0a9e330b34f0 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
| @@ -1464,7 +1464,4 @@ static inline int kvm_cpu_get_apicid(int mps_cpu) | |||
| 1464 | #define put_smstate(type, buf, offset, val) \ | 1464 | #define put_smstate(type, buf, offset, val) \ |
| 1465 | *(type *)((buf) + (offset) - 0x7e00) = val | 1465 | *(type *)((buf) + (offset) - 0x7e00) = val |
| 1466 | 1466 | ||
| 1467 | void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, | ||
| 1468 | unsigned long start, unsigned long end); | ||
| 1469 | |||
| 1470 | #endif /* _ASM_X86_KVM_HOST_H */ | 1467 | #endif /* _ASM_X86_KVM_HOST_H */ |
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 7a2ade4aa235..6cfa9c8cb7d6 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h | |||
| @@ -26,6 +26,7 @@ | |||
| 26 | #define KVM_FEATURE_PV_EOI 6 | 26 | #define KVM_FEATURE_PV_EOI 6 |
| 27 | #define KVM_FEATURE_PV_UNHALT 7 | 27 | #define KVM_FEATURE_PV_UNHALT 7 |
| 28 | #define KVM_FEATURE_PV_TLB_FLUSH 9 | 28 | #define KVM_FEATURE_PV_TLB_FLUSH 9 |
| 29 | #define KVM_FEATURE_ASYNC_PF_VMEXIT 10 | ||
| 29 | 30 | ||
| 30 | /* The last 8 bits are used to indicate how to interpret the flags field | 31 | /* The last 8 bits are used to indicate how to interpret the flags field |
| 31 | * in pvclock structure. If no bits are set, all flags are ignored. | 32 | * in pvclock structure. If no bits are set, all flags are ignored. |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 4e37d1a851a6..bc1a27280c4b 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
| @@ -49,7 +49,7 @@ | |||
| 49 | 49 | ||
| 50 | static int kvmapf = 1; | 50 | static int kvmapf = 1; |
| 51 | 51 | ||
| 52 | static int parse_no_kvmapf(char *arg) | 52 | static int __init parse_no_kvmapf(char *arg) |
| 53 | { | 53 | { |
| 54 | kvmapf = 0; | 54 | kvmapf = 0; |
| 55 | return 0; | 55 | return 0; |
| @@ -58,7 +58,7 @@ static int parse_no_kvmapf(char *arg) | |||
| 58 | early_param("no-kvmapf", parse_no_kvmapf); | 58 | early_param("no-kvmapf", parse_no_kvmapf); |
| 59 | 59 | ||
| 60 | static int steal_acc = 1; | 60 | static int steal_acc = 1; |
| 61 | static int parse_no_stealacc(char *arg) | 61 | static int __init parse_no_stealacc(char *arg) |
| 62 | { | 62 | { |
| 63 | steal_acc = 0; | 63 | steal_acc = 0; |
| 64 | return 0; | 64 | return 0; |
| @@ -67,7 +67,7 @@ static int parse_no_stealacc(char *arg) | |||
| 67 | early_param("no-steal-acc", parse_no_stealacc); | 67 | early_param("no-steal-acc", parse_no_stealacc); |
| 68 | 68 | ||
| 69 | static int kvmclock_vsyscall = 1; | 69 | static int kvmclock_vsyscall = 1; |
| 70 | static int parse_no_kvmclock_vsyscall(char *arg) | 70 | static int __init parse_no_kvmclock_vsyscall(char *arg) |
| 71 | { | 71 | { |
| 72 | kvmclock_vsyscall = 0; | 72 | kvmclock_vsyscall = 0; |
| 73 | return 0; | 73 | return 0; |
| @@ -341,10 +341,10 @@ static void kvm_guest_cpu_init(void) | |||
| 341 | #endif | 341 | #endif |
| 342 | pa |= KVM_ASYNC_PF_ENABLED; | 342 | pa |= KVM_ASYNC_PF_ENABLED; |
| 343 | 343 | ||
| 344 | /* Async page fault support for L1 hypervisor is optional */ | 344 | if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT)) |
| 345 | if (wrmsr_safe(MSR_KVM_ASYNC_PF_EN, | 345 | pa |= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT; |
| 346 | (pa | KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT) & 0xffffffff, pa >> 32) < 0) | 346 | |
| 347 | wrmsrl(MSR_KVM_ASYNC_PF_EN, pa); | 347 | wrmsrl(MSR_KVM_ASYNC_PF_EN, pa); |
| 348 | __this_cpu_write(apf_reason.enabled, 1); | 348 | __this_cpu_write(apf_reason.enabled, 1); |
| 349 | printk(KERN_INFO"KVM setup async PF for cpu %d\n", | 349 | printk(KERN_INFO"KVM setup async PF for cpu %d\n", |
| 350 | smp_processor_id()); | 350 | smp_processor_id()); |
| @@ -545,7 +545,8 @@ static void __init kvm_guest_init(void) | |||
| 545 | pv_time_ops.steal_clock = kvm_steal_clock; | 545 | pv_time_ops.steal_clock = kvm_steal_clock; |
| 546 | } | 546 | } |
| 547 | 547 | ||
| 548 | if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH)) | 548 | if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && |
| 549 | !kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) | ||
| 549 | pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others; | 550 | pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others; |
| 550 | 551 | ||
| 551 | if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) | 552 | if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) |
| @@ -633,7 +634,8 @@ static __init int kvm_setup_pv_tlb_flush(void) | |||
| 633 | { | 634 | { |
| 634 | int cpu; | 635 | int cpu; |
| 635 | 636 | ||
| 636 | if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH)) { | 637 | if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && |
| 638 | !kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { | ||
| 637 | for_each_possible_cpu(cpu) { | 639 | for_each_possible_cpu(cpu) { |
| 638 | zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu), | 640 | zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu), |
| 639 | GFP_KERNEL, cpu_to_node(cpu)); | 641 | GFP_KERNEL, cpu_to_node(cpu)); |
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index a0c5a69bc7c4..b671fc2d0422 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c | |||
| @@ -607,7 +607,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
| 607 | (1 << KVM_FEATURE_PV_EOI) | | 607 | (1 << KVM_FEATURE_PV_EOI) | |
| 608 | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) | | 608 | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) | |
| 609 | (1 << KVM_FEATURE_PV_UNHALT) | | 609 | (1 << KVM_FEATURE_PV_UNHALT) | |
| 610 | (1 << KVM_FEATURE_PV_TLB_FLUSH); | 610 | (1 << KVM_FEATURE_PV_TLB_FLUSH) | |
| 611 | (1 << KVM_FEATURE_ASYNC_PF_VMEXIT); | ||
| 611 | 612 | ||
| 612 | if (sched_info_on()) | 613 | if (sched_info_on()) |
| 613 | entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); | 614 | entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 924ac8ce9d50..cc5fe7a50dde 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
| @@ -2165,7 +2165,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) | |||
| 2165 | */ | 2165 | */ |
| 2166 | vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE; | 2166 | vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE; |
| 2167 | static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */ | 2167 | static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */ |
| 2168 | kvm_lapic_reset(vcpu, false); | ||
| 2169 | kvm_iodevice_init(&apic->dev, &apic_mmio_ops); | 2168 | kvm_iodevice_init(&apic->dev, &apic_mmio_ops); |
| 2170 | 2169 | ||
| 2171 | return 0; | 2170 | return 0; |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 46ff304140c7..f551962ac294 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
| @@ -3029,7 +3029,7 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn) | |||
| 3029 | return RET_PF_RETRY; | 3029 | return RET_PF_RETRY; |
| 3030 | } | 3030 | } |
| 3031 | 3031 | ||
| 3032 | return -EFAULT; | 3032 | return RET_PF_EMULATE; |
| 3033 | } | 3033 | } |
| 3034 | 3034 | ||
| 3035 | static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, | 3035 | static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b3e488a74828..3d8377f75eda 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
| @@ -300,6 +300,8 @@ module_param(vgif, int, 0444); | |||
| 300 | static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT); | 300 | static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT); |
| 301 | module_param(sev, int, 0444); | 301 | module_param(sev, int, 0444); |
| 302 | 302 | ||
| 303 | static u8 rsm_ins_bytes[] = "\x0f\xaa"; | ||
| 304 | |||
| 303 | static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); | 305 | static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); |
| 304 | static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa); | 306 | static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa); |
| 305 | static void svm_complete_interrupts(struct vcpu_svm *svm); | 307 | static void svm_complete_interrupts(struct vcpu_svm *svm); |
| @@ -1383,6 +1385,7 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
| 1383 | set_intercept(svm, INTERCEPT_SKINIT); | 1385 | set_intercept(svm, INTERCEPT_SKINIT); |
| 1384 | set_intercept(svm, INTERCEPT_WBINVD); | 1386 | set_intercept(svm, INTERCEPT_WBINVD); |
| 1385 | set_intercept(svm, INTERCEPT_XSETBV); | 1387 | set_intercept(svm, INTERCEPT_XSETBV); |
| 1388 | set_intercept(svm, INTERCEPT_RSM); | ||
| 1386 | 1389 | ||
| 1387 | if (!kvm_mwait_in_guest()) { | 1390 | if (!kvm_mwait_in_guest()) { |
| 1388 | set_intercept(svm, INTERCEPT_MONITOR); | 1391 | set_intercept(svm, INTERCEPT_MONITOR); |
| @@ -3699,6 +3702,12 @@ static int emulate_on_interception(struct vcpu_svm *svm) | |||
| 3699 | return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; | 3702 | return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; |
| 3700 | } | 3703 | } |
| 3701 | 3704 | ||
| 3705 | static int rsm_interception(struct vcpu_svm *svm) | ||
| 3706 | { | ||
| 3707 | return x86_emulate_instruction(&svm->vcpu, 0, 0, | ||
| 3708 | rsm_ins_bytes, 2) == EMULATE_DONE; | ||
| 3709 | } | ||
| 3710 | |||
| 3702 | static int rdpmc_interception(struct vcpu_svm *svm) | 3711 | static int rdpmc_interception(struct vcpu_svm *svm) |
| 3703 | { | 3712 | { |
| 3704 | int err; | 3713 | int err; |
| @@ -4541,7 +4550,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { | |||
| 4541 | [SVM_EXIT_MWAIT] = mwait_interception, | 4550 | [SVM_EXIT_MWAIT] = mwait_interception, |
| 4542 | [SVM_EXIT_XSETBV] = xsetbv_interception, | 4551 | [SVM_EXIT_XSETBV] = xsetbv_interception, |
| 4543 | [SVM_EXIT_NPF] = npf_interception, | 4552 | [SVM_EXIT_NPF] = npf_interception, |
| 4544 | [SVM_EXIT_RSM] = emulate_on_interception, | 4553 | [SVM_EXIT_RSM] = rsm_interception, |
| 4545 | [SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception, | 4554 | [SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception, |
| 4546 | [SVM_EXIT_AVIC_UNACCELERATED_ACCESS] = avic_unaccelerated_access_interception, | 4555 | [SVM_EXIT_AVIC_UNACCELERATED_ACCESS] = avic_unaccelerated_access_interception, |
| 4547 | }; | 4556 | }; |
| @@ -6236,16 +6245,18 @@ e_free: | |||
| 6236 | 6245 | ||
| 6237 | static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp) | 6246 | static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp) |
| 6238 | { | 6247 | { |
| 6248 | void __user *measure = (void __user *)(uintptr_t)argp->data; | ||
| 6239 | struct kvm_sev_info *sev = &kvm->arch.sev_info; | 6249 | struct kvm_sev_info *sev = &kvm->arch.sev_info; |
| 6240 | struct sev_data_launch_measure *data; | 6250 | struct sev_data_launch_measure *data; |
| 6241 | struct kvm_sev_launch_measure params; | 6251 | struct kvm_sev_launch_measure params; |
| 6252 | void __user *p = NULL; | ||
| 6242 | void *blob = NULL; | 6253 | void *blob = NULL; |
| 6243 | int ret; | 6254 | int ret; |
| 6244 | 6255 | ||
| 6245 | if (!sev_guest(kvm)) | 6256 | if (!sev_guest(kvm)) |
| 6246 | return -ENOTTY; | 6257 | return -ENOTTY; |
| 6247 | 6258 | ||
| 6248 | if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) | 6259 | if (copy_from_user(¶ms, measure, sizeof(params))) |
| 6249 | return -EFAULT; | 6260 | return -EFAULT; |
| 6250 | 6261 | ||
| 6251 | data = kzalloc(sizeof(*data), GFP_KERNEL); | 6262 | data = kzalloc(sizeof(*data), GFP_KERNEL); |
| @@ -6256,17 +6267,13 @@ static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp) | |||
| 6256 | if (!params.len) | 6267 | if (!params.len) |
| 6257 | goto cmd; | 6268 | goto cmd; |
| 6258 | 6269 | ||
| 6259 | if (params.uaddr) { | 6270 | p = (void __user *)(uintptr_t)params.uaddr; |
| 6271 | if (p) { | ||
| 6260 | if (params.len > SEV_FW_BLOB_MAX_SIZE) { | 6272 | if (params.len > SEV_FW_BLOB_MAX_SIZE) { |
| 6261 | ret = -EINVAL; | 6273 | ret = -EINVAL; |
| 6262 | goto e_free; | 6274 | goto e_free; |
| 6263 | } | 6275 | } |
| 6264 | 6276 | ||
| 6265 | if (!access_ok(VERIFY_WRITE, params.uaddr, params.len)) { | ||
| 6266 | ret = -EFAULT; | ||
| 6267 | goto e_free; | ||
| 6268 | } | ||
| 6269 | |||
| 6270 | ret = -ENOMEM; | 6277 | ret = -ENOMEM; |
| 6271 | blob = kmalloc(params.len, GFP_KERNEL); | 6278 | blob = kmalloc(params.len, GFP_KERNEL); |
| 6272 | if (!blob) | 6279 | if (!blob) |
| @@ -6290,13 +6297,13 @@ cmd: | |||
| 6290 | goto e_free_blob; | 6297 | goto e_free_blob; |
| 6291 | 6298 | ||
| 6292 | if (blob) { | 6299 | if (blob) { |
| 6293 | if (copy_to_user((void __user *)(uintptr_t)params.uaddr, blob, params.len)) | 6300 | if (copy_to_user(p, blob, params.len)) |
| 6294 | ret = -EFAULT; | 6301 | ret = -EFAULT; |
| 6295 | } | 6302 | } |
| 6296 | 6303 | ||
| 6297 | done: | 6304 | done: |
| 6298 | params.len = data->len; | 6305 | params.len = data->len; |
| 6299 | if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, sizeof(params))) | 6306 | if (copy_to_user(measure, ¶ms, sizeof(params))) |
| 6300 | ret = -EFAULT; | 6307 | ret = -EFAULT; |
| 6301 | e_free_blob: | 6308 | e_free_blob: |
| 6302 | kfree(blob); | 6309 | kfree(blob); |
| @@ -6597,7 +6604,7 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp) | |||
| 6597 | struct page **pages; | 6604 | struct page **pages; |
| 6598 | void *blob, *hdr; | 6605 | void *blob, *hdr; |
| 6599 | unsigned long n; | 6606 | unsigned long n; |
| 6600 | int ret; | 6607 | int ret, offset; |
| 6601 | 6608 | ||
| 6602 | if (!sev_guest(kvm)) | 6609 | if (!sev_guest(kvm)) |
| 6603 | return -ENOTTY; | 6610 | return -ENOTTY; |
| @@ -6623,6 +6630,10 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp) | |||
| 6623 | if (!data) | 6630 | if (!data) |
| 6624 | goto e_unpin_memory; | 6631 | goto e_unpin_memory; |
| 6625 | 6632 | ||
| 6633 | offset = params.guest_uaddr & (PAGE_SIZE - 1); | ||
| 6634 | data->guest_address = __sme_page_pa(pages[0]) + offset; | ||
| 6635 | data->guest_len = params.guest_len; | ||
| 6636 | |||
| 6626 | blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len); | 6637 | blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len); |
| 6627 | if (IS_ERR(blob)) { | 6638 | if (IS_ERR(blob)) { |
| 6628 | ret = PTR_ERR(blob); | 6639 | ret = PTR_ERR(blob); |
| @@ -6637,8 +6648,8 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp) | |||
| 6637 | ret = PTR_ERR(hdr); | 6648 | ret = PTR_ERR(hdr); |
| 6638 | goto e_free_blob; | 6649 | goto e_free_blob; |
| 6639 | } | 6650 | } |
| 6640 | data->trans_address = __psp_pa(blob); | 6651 | data->hdr_address = __psp_pa(hdr); |
| 6641 | data->trans_len = params.trans_len; | 6652 | data->hdr_len = params.hdr_len; |
| 6642 | 6653 | ||
| 6643 | data->handle = sev->handle; | 6654 | data->handle = sev->handle; |
| 6644 | ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error); | 6655 | ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error); |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 3dec126aa302..ec14f2319a87 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
| @@ -4485,7 +4485,8 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
| 4485 | vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, | 4485 | vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, |
| 4486 | SECONDARY_EXEC_DESC); | 4486 | SECONDARY_EXEC_DESC); |
| 4487 | hw_cr4 &= ~X86_CR4_UMIP; | 4487 | hw_cr4 &= ~X86_CR4_UMIP; |
| 4488 | } else | 4488 | } else if (!is_guest_mode(vcpu) || |
| 4489 | !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC)) | ||
| 4489 | vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, | 4490 | vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, |
| 4490 | SECONDARY_EXEC_DESC); | 4491 | SECONDARY_EXEC_DESC); |
| 4491 | 4492 | ||
| @@ -11199,7 +11200,12 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
| 11199 | if (ret) | 11200 | if (ret) |
| 11200 | return ret; | 11201 | return ret; |
| 11201 | 11202 | ||
| 11202 | if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) | 11203 | /* |
| 11204 | * If we're entering a halted L2 vcpu and the L2 vcpu won't be woken | ||
| 11205 | * by event injection, halt vcpu. | ||
| 11206 | */ | ||
| 11207 | if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) && | ||
| 11208 | !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK)) | ||
| 11203 | return kvm_vcpu_halt(vcpu); | 11209 | return kvm_vcpu_halt(vcpu); |
| 11204 | 11210 | ||
| 11205 | vmx->nested.nested_run_pending = 1; | 11211 | vmx->nested.nested_run_pending = 1; |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c8a0b545ac20..96edda878dbf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
| @@ -7975,6 +7975,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
| 7975 | kvm_vcpu_mtrr_init(vcpu); | 7975 | kvm_vcpu_mtrr_init(vcpu); |
| 7976 | vcpu_load(vcpu); | 7976 | vcpu_load(vcpu); |
| 7977 | kvm_vcpu_reset(vcpu, false); | 7977 | kvm_vcpu_reset(vcpu, false); |
| 7978 | kvm_lapic_reset(vcpu, false); | ||
| 7978 | kvm_mmu_setup(vcpu); | 7979 | kvm_mmu_setup(vcpu); |
| 7979 | vcpu_put(vcpu); | 7980 | vcpu_put(vcpu); |
| 7980 | return 0; | 7981 | return 0; |
| @@ -8460,10 +8461,8 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) | |||
| 8460 | return r; | 8461 | return r; |
| 8461 | } | 8462 | } |
| 8462 | 8463 | ||
| 8463 | if (!size) { | 8464 | if (!size) |
| 8464 | r = vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE); | 8465 | vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE); |
| 8465 | WARN_ON(r < 0); | ||
| 8466 | } | ||
| 8467 | 8466 | ||
| 8468 | return 0; | 8467 | return 0; |
| 8469 | } | 8468 | } |
diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c index fcfa5b1eae61..b3afb6cc9d72 100644 --- a/drivers/crypto/ccp/psp-dev.c +++ b/drivers/crypto/ccp/psp-dev.c | |||
| @@ -211,7 +211,7 @@ static int __sev_platform_shutdown_locked(int *error) | |||
| 211 | { | 211 | { |
| 212 | int ret; | 212 | int ret; |
| 213 | 213 | ||
| 214 | ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, 0, error); | 214 | ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, NULL, error); |
| 215 | if (ret) | 215 | if (ret) |
| 216 | return ret; | 216 | return ret; |
| 217 | 217 | ||
| @@ -271,7 +271,7 @@ static int sev_ioctl_do_reset(struct sev_issue_cmd *argp) | |||
| 271 | return rc; | 271 | return rc; |
| 272 | } | 272 | } |
| 273 | 273 | ||
| 274 | return __sev_do_cmd_locked(SEV_CMD_FACTORY_RESET, 0, &argp->error); | 274 | return __sev_do_cmd_locked(SEV_CMD_FACTORY_RESET, NULL, &argp->error); |
| 275 | } | 275 | } |
| 276 | 276 | ||
| 277 | static int sev_ioctl_do_platform_status(struct sev_issue_cmd *argp) | 277 | static int sev_ioctl_do_platform_status(struct sev_issue_cmd *argp) |
| @@ -299,7 +299,7 @@ static int sev_ioctl_do_pek_pdh_gen(int cmd, struct sev_issue_cmd *argp) | |||
| 299 | return rc; | 299 | return rc; |
| 300 | } | 300 | } |
| 301 | 301 | ||
| 302 | return __sev_do_cmd_locked(cmd, 0, &argp->error); | 302 | return __sev_do_cmd_locked(cmd, NULL, &argp->error); |
| 303 | } | 303 | } |
| 304 | 304 | ||
| 305 | static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp) | 305 | static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp) |
| @@ -624,7 +624,7 @@ EXPORT_SYMBOL_GPL(sev_guest_decommission); | |||
| 624 | 624 | ||
| 625 | int sev_guest_df_flush(int *error) | 625 | int sev_guest_df_flush(int *error) |
| 626 | { | 626 | { |
| 627 | return sev_do_cmd(SEV_CMD_DF_FLUSH, 0, error); | 627 | return sev_do_cmd(SEV_CMD_DF_FLUSH, NULL, error); |
| 628 | } | 628 | } |
| 629 | EXPORT_SYMBOL_GPL(sev_guest_df_flush); | 629 | EXPORT_SYMBOL_GPL(sev_guest_df_flush); |
| 630 | 630 | ||
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ac0062b74aed..6930c63126c7 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
| @@ -1105,7 +1105,6 @@ static inline void kvm_irq_routing_update(struct kvm *kvm) | |||
| 1105 | { | 1105 | { |
| 1106 | } | 1106 | } |
| 1107 | #endif | 1107 | #endif |
| 1108 | void kvm_arch_irq_routing_update(struct kvm *kvm); | ||
| 1109 | 1108 | ||
| 1110 | static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | 1109 | static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) |
| 1111 | { | 1110 | { |
| @@ -1114,6 +1113,8 @@ static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | |||
| 1114 | 1113 | ||
| 1115 | #endif /* CONFIG_HAVE_KVM_EVENTFD */ | 1114 | #endif /* CONFIG_HAVE_KVM_EVENTFD */ |
| 1116 | 1115 | ||
| 1116 | void kvm_arch_irq_routing_update(struct kvm *kvm); | ||
| 1117 | |||
| 1117 | static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) | 1118 | static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) |
| 1118 | { | 1119 | { |
| 1119 | /* | 1120 | /* |
| @@ -1272,4 +1273,7 @@ static inline long kvm_arch_vcpu_async_ioctl(struct file *filp, | |||
| 1272 | } | 1273 | } |
| 1273 | #endif /* CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL */ | 1274 | #endif /* CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL */ |
| 1274 | 1275 | ||
| 1276 | void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, | ||
| 1277 | unsigned long start, unsigned long end); | ||
| 1278 | |||
| 1275 | #endif | 1279 | #endif |
diff --git a/include/uapi/linux/psp-sev.h b/include/uapi/linux/psp-sev.h index 3d77fe91239a..9008f31c7eb6 100644 --- a/include/uapi/linux/psp-sev.h +++ b/include/uapi/linux/psp-sev.h | |||
| @@ -42,7 +42,7 @@ typedef enum { | |||
| 42 | SEV_RET_INVALID_PLATFORM_STATE, | 42 | SEV_RET_INVALID_PLATFORM_STATE, |
| 43 | SEV_RET_INVALID_GUEST_STATE, | 43 | SEV_RET_INVALID_GUEST_STATE, |
| 44 | SEV_RET_INAVLID_CONFIG, | 44 | SEV_RET_INAVLID_CONFIG, |
| 45 | SEV_RET_INVALID_len, | 45 | SEV_RET_INVALID_LEN, |
| 46 | SEV_RET_ALREADY_OWNED, | 46 | SEV_RET_ALREADY_OWNED, |
| 47 | SEV_RET_INVALID_CERTIFICATE, | 47 | SEV_RET_INVALID_CERTIFICATE, |
| 48 | SEV_RET_POLICY_FAILURE, | 48 | SEV_RET_POLICY_FAILURE, |
diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index a5684d0968b4..5898c22ba310 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat | |||
| @@ -33,7 +33,7 @@ import resource | |||
| 33 | import struct | 33 | import struct |
| 34 | import re | 34 | import re |
| 35 | import subprocess | 35 | import subprocess |
| 36 | from collections import defaultdict | 36 | from collections import defaultdict, namedtuple |
| 37 | 37 | ||
| 38 | VMX_EXIT_REASONS = { | 38 | VMX_EXIT_REASONS = { |
| 39 | 'EXCEPTION_NMI': 0, | 39 | 'EXCEPTION_NMI': 0, |
| @@ -228,6 +228,7 @@ IOCTL_NUMBERS = { | |||
| 228 | } | 228 | } |
| 229 | 229 | ||
| 230 | ENCODING = locale.getpreferredencoding(False) | 230 | ENCODING = locale.getpreferredencoding(False) |
| 231 | TRACE_FILTER = re.compile(r'^[^\(]*$') | ||
| 231 | 232 | ||
| 232 | 233 | ||
| 233 | class Arch(object): | 234 | class Arch(object): |
| @@ -260,6 +261,11 @@ class Arch(object): | |||
| 260 | return ArchX86(SVM_EXIT_REASONS) | 261 | return ArchX86(SVM_EXIT_REASONS) |
| 261 | return | 262 | return |
| 262 | 263 | ||
| 264 | def tracepoint_is_child(self, field): | ||
| 265 | if (TRACE_FILTER.match(field)): | ||
| 266 | return None | ||
| 267 | return field.split('(', 1)[0] | ||
| 268 | |||
| 263 | 269 | ||
| 264 | class ArchX86(Arch): | 270 | class ArchX86(Arch): |
| 265 | def __init__(self, exit_reasons): | 271 | def __init__(self, exit_reasons): |
| @@ -267,6 +273,10 @@ class ArchX86(Arch): | |||
| 267 | self.ioctl_numbers = IOCTL_NUMBERS | 273 | self.ioctl_numbers = IOCTL_NUMBERS |
| 268 | self.exit_reasons = exit_reasons | 274 | self.exit_reasons = exit_reasons |
| 269 | 275 | ||
| 276 | def debugfs_is_child(self, field): | ||
| 277 | """ Returns name of parent if 'field' is a child, None otherwise """ | ||
| 278 | return None | ||
| 279 | |||
| 270 | 280 | ||
| 271 | class ArchPPC(Arch): | 281 | class ArchPPC(Arch): |
| 272 | def __init__(self): | 282 | def __init__(self): |
| @@ -282,6 +292,10 @@ class ArchPPC(Arch): | |||
| 282 | self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16 | 292 | self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16 |
| 283 | self.exit_reasons = {} | 293 | self.exit_reasons = {} |
| 284 | 294 | ||
| 295 | def debugfs_is_child(self, field): | ||
| 296 | """ Returns name of parent if 'field' is a child, None otherwise """ | ||
| 297 | return None | ||
| 298 | |||
| 285 | 299 | ||
| 286 | class ArchA64(Arch): | 300 | class ArchA64(Arch): |
| 287 | def __init__(self): | 301 | def __init__(self): |
| @@ -289,6 +303,10 @@ class ArchA64(Arch): | |||
| 289 | self.ioctl_numbers = IOCTL_NUMBERS | 303 | self.ioctl_numbers = IOCTL_NUMBERS |
| 290 | self.exit_reasons = AARCH64_EXIT_REASONS | 304 | self.exit_reasons = AARCH64_EXIT_REASONS |
| 291 | 305 | ||
| 306 | def debugfs_is_child(self, field): | ||
| 307 | """ Returns name of parent if 'field' is a child, None otherwise """ | ||
| 308 | return None | ||
| 309 | |||
| 292 | 310 | ||
| 293 | class ArchS390(Arch): | 311 | class ArchS390(Arch): |
| 294 | def __init__(self): | 312 | def __init__(self): |
| @@ -296,6 +314,12 @@ class ArchS390(Arch): | |||
| 296 | self.ioctl_numbers = IOCTL_NUMBERS | 314 | self.ioctl_numbers = IOCTL_NUMBERS |
| 297 | self.exit_reasons = None | 315 | self.exit_reasons = None |
| 298 | 316 | ||
| 317 | def debugfs_is_child(self, field): | ||
| 318 | """ Returns name of parent if 'field' is a child, None otherwise """ | ||
| 319 | if field.startswith('instruction_'): | ||
| 320 | return 'exit_instruction' | ||
| 321 | |||
| 322 | |||
| 299 | ARCH = Arch.get_arch() | 323 | ARCH = Arch.get_arch() |
| 300 | 324 | ||
| 301 | 325 | ||
| @@ -331,9 +355,6 @@ class perf_event_attr(ctypes.Structure): | |||
| 331 | PERF_TYPE_TRACEPOINT = 2 | 355 | PERF_TYPE_TRACEPOINT = 2 |
| 332 | PERF_FORMAT_GROUP = 1 << 3 | 356 | PERF_FORMAT_GROUP = 1 << 3 |
| 333 | 357 | ||
| 334 | PATH_DEBUGFS_TRACING = '/sys/kernel/debug/tracing' | ||
| 335 | PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm' | ||
| 336 | |||
| 337 | 358 | ||
| 338 | class Group(object): | 359 | class Group(object): |
| 339 | """Represents a perf event group.""" | 360 | """Represents a perf event group.""" |
| @@ -376,8 +397,8 @@ class Event(object): | |||
| 376 | self.syscall = self.libc.syscall | 397 | self.syscall = self.libc.syscall |
| 377 | self.name = name | 398 | self.name = name |
| 378 | self.fd = None | 399 | self.fd = None |
| 379 | self.setup_event(group, trace_cpu, trace_pid, trace_point, | 400 | self._setup_event(group, trace_cpu, trace_pid, trace_point, |
| 380 | trace_filter, trace_set) | 401 | trace_filter, trace_set) |
| 381 | 402 | ||
| 382 | def __del__(self): | 403 | def __del__(self): |
| 383 | """Closes the event's file descriptor. | 404 | """Closes the event's file descriptor. |
| @@ -390,7 +411,7 @@ class Event(object): | |||
| 390 | if self.fd: | 411 | if self.fd: |
| 391 | os.close(self.fd) | 412 | os.close(self.fd) |
| 392 | 413 | ||
| 393 | def perf_event_open(self, attr, pid, cpu, group_fd, flags): | 414 | def _perf_event_open(self, attr, pid, cpu, group_fd, flags): |
| 394 | """Wrapper for the sys_perf_evt_open() syscall. | 415 | """Wrapper for the sys_perf_evt_open() syscall. |
| 395 | 416 | ||
| 396 | Used to set up performance events, returns a file descriptor or -1 | 417 | Used to set up performance events, returns a file descriptor or -1 |
| @@ -409,7 +430,7 @@ class Event(object): | |||
| 409 | ctypes.c_int(pid), ctypes.c_int(cpu), | 430 | ctypes.c_int(pid), ctypes.c_int(cpu), |
| 410 | ctypes.c_int(group_fd), ctypes.c_long(flags)) | 431 | ctypes.c_int(group_fd), ctypes.c_long(flags)) |
| 411 | 432 | ||
| 412 | def setup_event_attribute(self, trace_set, trace_point): | 433 | def _setup_event_attribute(self, trace_set, trace_point): |
| 413 | """Returns an initialized ctype perf_event_attr struct.""" | 434 | """Returns an initialized ctype perf_event_attr struct.""" |
| 414 | 435 | ||
| 415 | id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set, | 436 | id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set, |
| @@ -419,8 +440,8 @@ class Event(object): | |||
| 419 | event_attr.config = int(open(id_path).read()) | 440 | event_attr.config = int(open(id_path).read()) |
| 420 | return event_attr | 441 | return event_attr |
| 421 | 442 | ||
| 422 | def setup_event(self, group, trace_cpu, trace_pid, trace_point, | 443 | def _setup_event(self, group, trace_cpu, trace_pid, trace_point, |
| 423 | trace_filter, trace_set): | 444 | trace_filter, trace_set): |
| 424 | """Sets up the perf event in Linux. | 445 | """Sets up the perf event in Linux. |
| 425 | 446 | ||
| 426 | Issues the syscall to register the event in the kernel and | 447 | Issues the syscall to register the event in the kernel and |
| @@ -428,7 +449,7 @@ class Event(object): | |||
| 428 | 449 | ||
| 429 | """ | 450 | """ |
| 430 | 451 | ||
| 431 | event_attr = self.setup_event_attribute(trace_set, trace_point) | 452 | event_attr = self._setup_event_attribute(trace_set, trace_point) |
| 432 | 453 | ||
| 433 | # First event will be group leader. | 454 | # First event will be group leader. |
| 434 | group_leader = -1 | 455 | group_leader = -1 |
| @@ -437,8 +458,8 @@ class Event(object): | |||
| 437 | if group.events: | 458 | if group.events: |
| 438 | group_leader = group.events[0].fd | 459 | group_leader = group.events[0].fd |
| 439 | 460 | ||
| 440 | fd = self.perf_event_open(event_attr, trace_pid, | 461 | fd = self._perf_event_open(event_attr, trace_pid, |
| 441 | trace_cpu, group_leader, 0) | 462 | trace_cpu, group_leader, 0) |
| 442 | if fd == -1: | 463 | if fd == -1: |
| 443 | err = ctypes.get_errno() | 464 | err = ctypes.get_errno() |
| 444 | raise OSError(err, os.strerror(err), | 465 | raise OSError(err, os.strerror(err), |
| @@ -475,6 +496,10 @@ class Event(object): | |||
| 475 | 496 | ||
| 476 | class Provider(object): | 497 | class Provider(object): |
| 477 | """Encapsulates functionalities used by all providers.""" | 498 | """Encapsulates functionalities used by all providers.""" |
| 499 | def __init__(self, pid): | ||
| 500 | self.child_events = False | ||
| 501 | self.pid = pid | ||
| 502 | |||
| 478 | @staticmethod | 503 | @staticmethod |
| 479 | def is_field_wanted(fields_filter, field): | 504 | def is_field_wanted(fields_filter, field): |
| 480 | """Indicate whether field is valid according to fields_filter.""" | 505 | """Indicate whether field is valid according to fields_filter.""" |
| @@ -500,12 +525,12 @@ class TracepointProvider(Provider): | |||
| 500 | """ | 525 | """ |
| 501 | def __init__(self, pid, fields_filter): | 526 | def __init__(self, pid, fields_filter): |
| 502 | self.group_leaders = [] | 527 | self.group_leaders = [] |
| 503 | self.filters = self.get_filters() | 528 | self.filters = self._get_filters() |
| 504 | self.update_fields(fields_filter) | 529 | self.update_fields(fields_filter) |
| 505 | self.pid = pid | 530 | super(TracepointProvider, self).__init__(pid) |
| 506 | 531 | ||
| 507 | @staticmethod | 532 | @staticmethod |
| 508 | def get_filters(): | 533 | def _get_filters(): |
| 509 | """Returns a dict of trace events, their filter ids and | 534 | """Returns a dict of trace events, their filter ids and |
| 510 | the values that can be filtered. | 535 | the values that can be filtered. |
| 511 | 536 | ||
| @@ -521,8 +546,8 @@ class TracepointProvider(Provider): | |||
| 521 | filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons) | 546 | filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons) |
| 522 | return filters | 547 | return filters |
| 523 | 548 | ||
| 524 | def get_available_fields(self): | 549 | def _get_available_fields(self): |
| 525 | """Returns a list of available event's of format 'event name(filter | 550 | """Returns a list of available events of format 'event name(filter |
| 526 | name)'. | 551 | name)'. |
| 527 | 552 | ||
| 528 | All available events have directories under | 553 | All available events have directories under |
| @@ -549,11 +574,12 @@ class TracepointProvider(Provider): | |||
| 549 | 574 | ||
| 550 | def update_fields(self, fields_filter): | 575 | def update_fields(self, fields_filter): |
| 551 | """Refresh fields, applying fields_filter""" | 576 | """Refresh fields, applying fields_filter""" |
| 552 | self.fields = [field for field in self.get_available_fields() | 577 | self.fields = [field for field in self._get_available_fields() |
| 553 | if self.is_field_wanted(fields_filter, field)] | 578 | if self.is_field_wanted(fields_filter, field) or |
| 579 | ARCH.tracepoint_is_child(field)] | ||
| 554 | 580 | ||
| 555 | @staticmethod | 581 | @staticmethod |
| 556 | def get_online_cpus(): | 582 | def _get_online_cpus(): |
| 557 | """Returns a list of cpu id integers.""" | 583 | """Returns a list of cpu id integers.""" |
| 558 | def parse_int_list(list_string): | 584 | def parse_int_list(list_string): |
| 559 | """Returns an int list from a string of comma separated integers and | 585 | """Returns an int list from a string of comma separated integers and |
| @@ -575,17 +601,17 @@ class TracepointProvider(Provider): | |||
| 575 | cpu_string = cpu_list.readline() | 601 | cpu_string = cpu_list.readline() |
| 576 | return parse_int_list(cpu_string) | 602 | return parse_int_list(cpu_string) |
| 577 | 603 | ||
| 578 | def setup_traces(self): | 604 | def _setup_traces(self): |
| 579 | """Creates all event and group objects needed to be able to retrieve | 605 | """Creates all event and group objects needed to be able to retrieve |
| 580 | data.""" | 606 | data.""" |
| 581 | fields = self.get_available_fields() | 607 | fields = self._get_available_fields() |
| 582 | if self._pid > 0: | 608 | if self._pid > 0: |
| 583 | # Fetch list of all threads of the monitored pid, as qemu | 609 | # Fetch list of all threads of the monitored pid, as qemu |
| 584 | # starts a thread for each vcpu. | 610 | # starts a thread for each vcpu. |
| 585 | path = os.path.join('/proc', str(self._pid), 'task') | 611 | path = os.path.join('/proc', str(self._pid), 'task') |
| 586 | groupids = self.walkdir(path)[1] | 612 | groupids = self.walkdir(path)[1] |
| 587 | else: | 613 | else: |
| 588 | groupids = self.get_online_cpus() | 614 | groupids = self._get_online_cpus() |
| 589 | 615 | ||
| 590 | # The constant is needed as a buffer for python libs, std | 616 | # The constant is needed as a buffer for python libs, std |
| 591 | # streams and other files that the script opens. | 617 | # streams and other files that the script opens. |
| @@ -663,7 +689,7 @@ class TracepointProvider(Provider): | |||
| 663 | # The garbage collector will get rid of all Event/Group | 689 | # The garbage collector will get rid of all Event/Group |
| 664 | # objects and open files after removing the references. | 690 | # objects and open files after removing the references. |
| 665 | self.group_leaders = [] | 691 | self.group_leaders = [] |
| 666 | self.setup_traces() | 692 | self._setup_traces() |
| 667 | self.fields = self._fields | 693 | self.fields = self._fields |
| 668 | 694 | ||
| 669 | def read(self, by_guest=0): | 695 | def read(self, by_guest=0): |
| @@ -671,8 +697,12 @@ class TracepointProvider(Provider): | |||
| 671 | ret = defaultdict(int) | 697 | ret = defaultdict(int) |
| 672 | for group in self.group_leaders: | 698 | for group in self.group_leaders: |
| 673 | for name, val in group.read().items(): | 699 | for name, val in group.read().items(): |
| 674 | if name in self._fields: | 700 | if name not in self._fields: |
| 675 | ret[name] += val | 701 | continue |
| 702 | parent = ARCH.tracepoint_is_child(name) | ||
| 703 | if parent: | ||
| 704 | name += ' ' + parent | ||
| 705 | ret[name] += val | ||
| 676 | return ret | 706 | return ret |
| 677 | 707 | ||
| 678 | def reset(self): | 708 | def reset(self): |
| @@ -690,11 +720,11 @@ class DebugfsProvider(Provider): | |||
| 690 | self._baseline = {} | 720 | self._baseline = {} |
| 691 | self.do_read = True | 721 | self.do_read = True |
| 692 | self.paths = [] | 722 | self.paths = [] |
| 693 | self.pid = pid | 723 | super(DebugfsProvider, self).__init__(pid) |
| 694 | if include_past: | 724 | if include_past: |
| 695 | self.restore() | 725 | self._restore() |
| 696 | 726 | ||
| 697 | def get_available_fields(self): | 727 | def _get_available_fields(self): |
| 698 | """"Returns a list of available fields. | 728 | """"Returns a list of available fields. |
| 699 | 729 | ||
| 700 | The fields are all available KVM debugfs files | 730 | The fields are all available KVM debugfs files |
| @@ -704,8 +734,9 @@ class DebugfsProvider(Provider): | |||
| 704 | 734 | ||
| 705 | def update_fields(self, fields_filter): | 735 | def update_fields(self, fields_filter): |
| 706 | """Refresh fields, applying fields_filter""" | 736 | """Refresh fields, applying fields_filter""" |
| 707 | self._fields = [field for field in self.get_available_fields() | 737 | self._fields = [field for field in self._get_available_fields() |
| 708 | if self.is_field_wanted(fields_filter, field)] | 738 | if self.is_field_wanted(fields_filter, field) or |
| 739 | ARCH.debugfs_is_child(field)] | ||
| 709 | 740 | ||
| 710 | @property | 741 | @property |
| 711 | def fields(self): | 742 | def fields(self): |
| @@ -758,7 +789,7 @@ class DebugfsProvider(Provider): | |||
| 758 | paths.append(dir) | 789 | paths.append(dir) |
| 759 | for path in paths: | 790 | for path in paths: |
| 760 | for field in self._fields: | 791 | for field in self._fields: |
| 761 | value = self.read_field(field, path) | 792 | value = self._read_field(field, path) |
| 762 | key = path + field | 793 | key = path + field |
| 763 | if reset == 1: | 794 | if reset == 1: |
| 764 | self._baseline[key] = value | 795 | self._baseline[key] = value |
| @@ -766,20 +797,21 @@ class DebugfsProvider(Provider): | |||
| 766 | self._baseline[key] = 0 | 797 | self._baseline[key] = 0 |
| 767 | if self._baseline.get(key, -1) == -1: | 798 | if self._baseline.get(key, -1) == -1: |
| 768 | self._baseline[key] = value | 799 | self._baseline[key] = value |
| 769 | increment = (results.get(field, 0) + value - | 800 | parent = ARCH.debugfs_is_child(field) |
| 770 | self._baseline.get(key, 0)) | 801 | if parent: |
| 771 | if by_guest: | 802 | field = field + ' ' + parent |
| 772 | pid = key.split('-')[0] | 803 | else: |
| 773 | if pid in results: | 804 | if by_guest: |
| 774 | results[pid] += increment | 805 | field = key.split('-')[0] # set 'field' to 'pid' |
| 775 | else: | 806 | increment = value - self._baseline.get(key, 0) |
| 776 | results[pid] = increment | 807 | if field in results: |
| 808 | results[field] += increment | ||
| 777 | else: | 809 | else: |
| 778 | results[field] = increment | 810 | results[field] = increment |
| 779 | 811 | ||
| 780 | return results | 812 | return results |
| 781 | 813 | ||
| 782 | def read_field(self, field, path): | 814 | def _read_field(self, field, path): |
| 783 | """Returns the value of a single field from a specific VM.""" | 815 | """Returns the value of a single field from a specific VM.""" |
| 784 | try: | 816 | try: |
| 785 | return int(open(os.path.join(PATH_DEBUGFS_KVM, | 817 | return int(open(os.path.join(PATH_DEBUGFS_KVM, |
| @@ -794,12 +826,15 @@ class DebugfsProvider(Provider): | |||
| 794 | self._baseline = {} | 826 | self._baseline = {} |
| 795 | self.read(1) | 827 | self.read(1) |
| 796 | 828 | ||
| 797 | def restore(self): | 829 | def _restore(self): |
| 798 | """Reset field counters""" | 830 | """Reset field counters""" |
| 799 | self._baseline = {} | 831 | self._baseline = {} |
| 800 | self.read(2) | 832 | self.read(2) |
| 801 | 833 | ||
| 802 | 834 | ||
| 835 | EventStat = namedtuple('EventStat', ['value', 'delta']) | ||
| 836 | |||
| 837 | |||
| 803 | class Stats(object): | 838 | class Stats(object): |
| 804 | """Manages the data providers and the data they provide. | 839 | """Manages the data providers and the data they provide. |
| 805 | 840 | ||
| @@ -808,13 +843,13 @@ class Stats(object): | |||
| 808 | 843 | ||
| 809 | """ | 844 | """ |
| 810 | def __init__(self, options): | 845 | def __init__(self, options): |
| 811 | self.providers = self.get_providers(options) | 846 | self.providers = self._get_providers(options) |
| 812 | self._pid_filter = options.pid | 847 | self._pid_filter = options.pid |
| 813 | self._fields_filter = options.fields | 848 | self._fields_filter = options.fields |
| 814 | self.values = {} | 849 | self.values = {} |
| 850 | self._child_events = False | ||
| 815 | 851 | ||
| 816 | @staticmethod | 852 | def _get_providers(self, options): |
| 817 | def get_providers(options): | ||
| 818 | """Returns a list of data providers depending on the passed options.""" | 853 | """Returns a list of data providers depending on the passed options.""" |
| 819 | providers = [] | 854 | providers = [] |
| 820 | 855 | ||
| @@ -826,7 +861,7 @@ class Stats(object): | |||
| 826 | 861 | ||
| 827 | return providers | 862 | return providers |
| 828 | 863 | ||
| 829 | def update_provider_filters(self): | 864 | def _update_provider_filters(self): |
| 830 | """Propagates fields filters to providers.""" | 865 | """Propagates fields filters to providers.""" |
| 831 | # As we reset the counters when updating the fields we can | 866 | # As we reset the counters when updating the fields we can |
| 832 | # also clear the cache of old values. | 867 | # also clear the cache of old values. |
| @@ -847,7 +882,7 @@ class Stats(object): | |||
| 847 | def fields_filter(self, fields_filter): | 882 | def fields_filter(self, fields_filter): |
| 848 | if fields_filter != self._fields_filter: | 883 | if fields_filter != self._fields_filter: |
| 849 | self._fields_filter = fields_filter | 884 | self._fields_filter = fields_filter |
| 850 | self.update_provider_filters() | 885 | self._update_provider_filters() |
| 851 | 886 | ||
| 852 | @property | 887 | @property |
| 853 | def pid_filter(self): | 888 | def pid_filter(self): |
| @@ -861,16 +896,33 @@ class Stats(object): | |||
| 861 | for provider in self.providers: | 896 | for provider in self.providers: |
| 862 | provider.pid = self._pid_filter | 897 | provider.pid = self._pid_filter |
| 863 | 898 | ||
| 899 | @property | ||
| 900 | def child_events(self): | ||
| 901 | return self._child_events | ||
| 902 | |||
| 903 | @child_events.setter | ||
| 904 | def child_events(self, val): | ||
| 905 | self._child_events = val | ||
| 906 | for provider in self.providers: | ||
| 907 | provider.child_events = val | ||
| 908 | |||
| 864 | def get(self, by_guest=0): | 909 | def get(self, by_guest=0): |
| 865 | """Returns a dict with field -> (value, delta to last value) of all | 910 | """Returns a dict with field -> (value, delta to last value) of all |
| 866 | provider data.""" | 911 | provider data. |
| 912 | Key formats: | ||
| 913 | * plain: 'key' is event name | ||
| 914 | * child-parent: 'key' is in format '<child> <parent>' | ||
| 915 | * pid: 'key' is the pid of the guest, and the record contains the | ||
| 916 | aggregated event data | ||
| 917 | These formats are generated by the providers, and handled in class TUI. | ||
| 918 | """ | ||
| 867 | for provider in self.providers: | 919 | for provider in self.providers: |
| 868 | new = provider.read(by_guest=by_guest) | 920 | new = provider.read(by_guest=by_guest) |
| 869 | for key in new if by_guest else provider.fields: | 921 | for key in new: |
| 870 | oldval = self.values.get(key, (0, 0))[0] | 922 | oldval = self.values.get(key, EventStat(0, 0)).value |
| 871 | newval = new.get(key, 0) | 923 | newval = new.get(key, 0) |
| 872 | newdelta = newval - oldval | 924 | newdelta = newval - oldval |
| 873 | self.values[key] = (newval, newdelta) | 925 | self.values[key] = EventStat(newval, newdelta) |
| 874 | return self.values | 926 | return self.values |
| 875 | 927 | ||
| 876 | def toggle_display_guests(self, to_pid): | 928 | def toggle_display_guests(self, to_pid): |
| @@ -899,10 +951,10 @@ class Stats(object): | |||
| 899 | self.get(to_pid) | 951 | self.get(to_pid) |
| 900 | return 0 | 952 | return 0 |
| 901 | 953 | ||
| 954 | |||
| 902 | DELAY_DEFAULT = 3.0 | 955 | DELAY_DEFAULT = 3.0 |
| 903 | MAX_GUEST_NAME_LEN = 48 | 956 | MAX_GUEST_NAME_LEN = 48 |
| 904 | MAX_REGEX_LEN = 44 | 957 | MAX_REGEX_LEN = 44 |
| 905 | DEFAULT_REGEX = r'^[^\(]*$' | ||
| 906 | SORT_DEFAULT = 0 | 958 | SORT_DEFAULT = 0 |
| 907 | 959 | ||
| 908 | 960 | ||
| @@ -969,7 +1021,7 @@ class Tui(object): | |||
| 969 | 1021 | ||
| 970 | return res | 1022 | return res |
| 971 | 1023 | ||
| 972 | def print_all_gnames(self, row): | 1024 | def _print_all_gnames(self, row): |
| 973 | """Print a list of all running guests along with their pids.""" | 1025 | """Print a list of all running guests along with their pids.""" |
| 974 | self.screen.addstr(row, 2, '%8s %-60s' % | 1026 | self.screen.addstr(row, 2, '%8s %-60s' % |
| 975 | ('Pid', 'Guest Name (fuzzy list, might be ' | 1027 | ('Pid', 'Guest Name (fuzzy list, might be ' |
| @@ -1032,19 +1084,13 @@ class Tui(object): | |||
| 1032 | 1084 | ||
| 1033 | return name | 1085 | return name |
| 1034 | 1086 | ||
| 1035 | def update_drilldown(self): | 1087 | def _update_pid(self, pid): |
| 1036 | """Sets or removes a filter that only allows fields without braces.""" | ||
| 1037 | if not self.stats.fields_filter: | ||
| 1038 | self.stats.fields_filter = DEFAULT_REGEX | ||
| 1039 | |||
| 1040 | elif self.stats.fields_filter == DEFAULT_REGEX: | ||
| 1041 | self.stats.fields_filter = None | ||
| 1042 | |||
| 1043 | def update_pid(self, pid): | ||
| 1044 | """Propagates pid selection to stats object.""" | 1088 | """Propagates pid selection to stats object.""" |
| 1089 | self.screen.addstr(4, 1, 'Updating pid filter...') | ||
| 1090 | self.screen.refresh() | ||
| 1045 | self.stats.pid_filter = pid | 1091 | self.stats.pid_filter = pid |
| 1046 | 1092 | ||
| 1047 | def refresh_header(self, pid=None): | 1093 | def _refresh_header(self, pid=None): |
| 1048 | """Refreshes the header.""" | 1094 | """Refreshes the header.""" |
| 1049 | if pid is None: | 1095 | if pid is None: |
| 1050 | pid = self.stats.pid_filter | 1096 | pid = self.stats.pid_filter |
| @@ -1059,8 +1105,7 @@ class Tui(object): | |||
| 1059 | .format(pid, gname), curses.A_BOLD) | 1105 | .format(pid, gname), curses.A_BOLD) |
| 1060 | else: | 1106 | else: |
| 1061 | self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD) | 1107 | self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD) |
| 1062 | if self.stats.fields_filter and self.stats.fields_filter \ | 1108 | if self.stats.fields_filter: |
| 1063 | != DEFAULT_REGEX: | ||
| 1064 | regex = self.stats.fields_filter | 1109 | regex = self.stats.fields_filter |
| 1065 | if len(regex) > MAX_REGEX_LEN: | 1110 | if len(regex) > MAX_REGEX_LEN: |
| 1066 | regex = regex[:MAX_REGEX_LEN] + '...' | 1111 | regex = regex[:MAX_REGEX_LEN] + '...' |
| @@ -1075,56 +1120,99 @@ class Tui(object): | |||
| 1075 | self.screen.addstr(4, 1, 'Collecting data...') | 1120 | self.screen.addstr(4, 1, 'Collecting data...') |
| 1076 | self.screen.refresh() | 1121 | self.screen.refresh() |
| 1077 | 1122 | ||
| 1078 | def refresh_body(self, sleeptime): | 1123 | def _refresh_body(self, sleeptime): |
| 1124 | def is_child_field(field): | ||
| 1125 | return field.find('(') != -1 | ||
| 1126 | |||
| 1127 | def insert_child(sorted_items, child, values, parent): | ||
| 1128 | num = len(sorted_items) | ||
| 1129 | for i in range(0, num): | ||
| 1130 | # only add child if parent is present | ||
| 1131 | if parent.startswith(sorted_items[i][0]): | ||
| 1132 | sorted_items.insert(i + 1, (' ' + child, values)) | ||
| 1133 | |||
| 1134 | def get_sorted_events(self, stats): | ||
| 1135 | """ separate parent and child events """ | ||
| 1136 | if self._sorting == SORT_DEFAULT: | ||
| 1137 | def sortkey((_k, v)): | ||
| 1138 | # sort by (delta value, overall value) | ||
| 1139 | return (v.delta, v.value) | ||
| 1140 | else: | ||
| 1141 | def sortkey((_k, v)): | ||
| 1142 | # sort by overall value | ||
| 1143 | return v.value | ||
| 1144 | |||
| 1145 | childs = [] | ||
| 1146 | sorted_items = [] | ||
| 1147 | # we can't rule out child events to appear prior to parents even | ||
| 1148 | # when sorted - separate out all children first, and add in later | ||
| 1149 | for key, values in sorted(stats.items(), key=sortkey, | ||
| 1150 | reverse=True): | ||
| 1151 | if values == (0, 0): | ||
| 1152 | continue | ||
| 1153 | if key.find(' ') != -1: | ||
| 1154 | if not self.stats.child_events: | ||
| 1155 | continue | ||
| 1156 | childs.insert(0, (key, values)) | ||
| 1157 | else: | ||
| 1158 | sorted_items.append((key, values)) | ||
| 1159 | if self.stats.child_events: | ||
| 1160 | for key, values in childs: | ||
| 1161 | (child, parent) = key.split(' ') | ||
| 1162 | insert_child(sorted_items, child, values, parent) | ||
| 1163 | |||
| 1164 | return sorted_items | ||
| 1165 | |||
| 1079 | row = 3 | 1166 | row = 3 |
| 1080 | self.screen.move(row, 0) | 1167 | self.screen.move(row, 0) |
| 1081 | self.screen.clrtobot() | 1168 | self.screen.clrtobot() |
| 1082 | stats = self.stats.get(self._display_guests) | 1169 | stats = self.stats.get(self._display_guests) |
| 1083 | 1170 | total = 0. | |
| 1084 | def sortCurAvg(x): | 1171 | ctotal = 0. |
| 1085 | # sort by current events if available | 1172 | for key, values in stats.items(): |
| 1086 | if stats[x][1]: | 1173 | if self._display_guests: |
| 1087 | return (-stats[x][1], -stats[x][0]) | 1174 | if self.get_gname_from_pid(key): |
| 1175 | total += values.value | ||
| 1176 | continue | ||
| 1177 | if not key.find(' ') != -1: | ||
| 1178 | total += values.value | ||
| 1088 | else: | 1179 | else: |
| 1089 | return (0, -stats[x][0]) | 1180 | ctotal += values.value |
| 1181 | if total == 0.: | ||
| 1182 | # we don't have any fields, or all non-child events are filtered | ||
| 1183 | total = ctotal | ||
| 1090 | 1184 | ||
| 1091 | def sortTotal(x): | 1185 | # print events |
| 1092 | # sort by totals | ||
| 1093 | return (0, -stats[x][0]) | ||
| 1094 | total = 0. | ||
| 1095 | for key in stats.keys(): | ||
| 1096 | if key.find('(') is -1: | ||
| 1097 | total += stats[key][0] | ||
| 1098 | if self._sorting == SORT_DEFAULT: | ||
| 1099 | sortkey = sortCurAvg | ||
| 1100 | else: | ||
| 1101 | sortkey = sortTotal | ||
| 1102 | tavg = 0 | 1186 | tavg = 0 |
| 1103 | for key in sorted(stats.keys(), key=sortkey): | 1187 | tcur = 0 |
| 1104 | if row >= self.screen.getmaxyx()[0] - 1: | 1188 | for key, values in get_sorted_events(self, stats): |
| 1105 | break | 1189 | if row >= self.screen.getmaxyx()[0] - 1 or values == (0, 0): |
| 1106 | values = stats[key] | ||
| 1107 | if not values[0] and not values[1]: | ||
| 1108 | break | 1190 | break |
| 1109 | if values[0] is not None: | 1191 | if self._display_guests: |
| 1110 | cur = int(round(values[1] / sleeptime)) if values[1] else '' | 1192 | key = self.get_gname_from_pid(key) |
| 1111 | if self._display_guests: | 1193 | if not key: |
| 1112 | key = self.get_gname_from_pid(key) | 1194 | continue |
| 1113 | self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' % | 1195 | cur = int(round(values.delta / sleeptime)) if values.delta else '' |
| 1114 | (key, values[0], values[0] * 100 / total, | 1196 | if key[0] != ' ': |
| 1115 | cur)) | 1197 | if values.delta: |
| 1116 | if cur is not '' and key.find('(') is -1: | 1198 | tcur += values.delta |
| 1117 | tavg += cur | 1199 | ptotal = values.value |
| 1200 | ltotal = total | ||
| 1201 | else: | ||
| 1202 | ltotal = ptotal | ||
| 1203 | self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' % (key, | ||
| 1204 | values.value, | ||
| 1205 | values.value * 100 / float(ltotal), cur)) | ||
| 1118 | row += 1 | 1206 | row += 1 |
| 1119 | if row == 3: | 1207 | if row == 3: |
| 1120 | self.screen.addstr(4, 1, 'No matching events reported yet') | 1208 | self.screen.addstr(4, 1, 'No matching events reported yet') |
| 1121 | else: | 1209 | if row > 4: |
| 1210 | tavg = int(round(tcur / sleeptime)) if tcur > 0 else '' | ||
| 1122 | self.screen.addstr(row, 1, '%-40s %10d %8s' % | 1211 | self.screen.addstr(row, 1, '%-40s %10d %8s' % |
| 1123 | ('Total', total, tavg if tavg else ''), | 1212 | ('Total', total, tavg), curses.A_BOLD) |
| 1124 | curses.A_BOLD) | ||
| 1125 | self.screen.refresh() | 1213 | self.screen.refresh() |
| 1126 | 1214 | ||
| 1127 | def show_msg(self, text): | 1215 | def _show_msg(self, text): |
| 1128 | """Display message centered text and exit on key press""" | 1216 | """Display message centered text and exit on key press""" |
| 1129 | hint = 'Press any key to continue' | 1217 | hint = 'Press any key to continue' |
| 1130 | curses.cbreak() | 1218 | curses.cbreak() |
| @@ -1139,16 +1227,16 @@ class Tui(object): | |||
| 1139 | curses.A_STANDOUT) | 1227 | curses.A_STANDOUT) |
| 1140 | self.screen.getkey() | 1228 | self.screen.getkey() |
| 1141 | 1229 | ||
| 1142 | def show_help_interactive(self): | 1230 | def _show_help_interactive(self): |
| 1143 | """Display help with list of interactive commands""" | 1231 | """Display help with list of interactive commands""" |
| 1144 | msg = (' b toggle events by guests (debugfs only, honors' | 1232 | msg = (' b toggle events by guests (debugfs only, honors' |
| 1145 | ' filters)', | 1233 | ' filters)', |
| 1146 | ' c clear filter', | 1234 | ' c clear filter', |
| 1147 | ' f filter by regular expression', | 1235 | ' f filter by regular expression', |
| 1148 | ' g filter by guest name', | 1236 | ' g filter by guest name/PID', |
| 1149 | ' h display interactive commands reference', | 1237 | ' h display interactive commands reference', |
| 1150 | ' o toggle sorting order (Total vs CurAvg/s)', | 1238 | ' o toggle sorting order (Total vs CurAvg/s)', |
| 1151 | ' p filter by PID', | 1239 | ' p filter by guest name/PID', |
| 1152 | ' q quit', | 1240 | ' q quit', |
| 1153 | ' r reset stats', | 1241 | ' r reset stats', |
| 1154 | ' s set update interval', | 1242 | ' s set update interval', |
| @@ -1165,14 +1253,15 @@ class Tui(object): | |||
| 1165 | self.screen.addstr(row, 0, line) | 1253 | self.screen.addstr(row, 0, line) |
| 1166 | row += 1 | 1254 | row += 1 |
| 1167 | self.screen.getkey() | 1255 | self.screen.getkey() |
| 1168 | self.refresh_header() | 1256 | self._refresh_header() |
| 1169 | 1257 | ||
| 1170 | def show_filter_selection(self): | 1258 | def _show_filter_selection(self): |
| 1171 | """Draws filter selection mask. | 1259 | """Draws filter selection mask. |
| 1172 | 1260 | ||
| 1173 | Asks for a valid regex and sets the fields filter accordingly. | 1261 | Asks for a valid regex and sets the fields filter accordingly. |
| 1174 | 1262 | ||
| 1175 | """ | 1263 | """ |
| 1264 | msg = '' | ||
| 1176 | while True: | 1265 | while True: |
| 1177 | self.screen.erase() | 1266 | self.screen.erase() |
| 1178 | self.screen.addstr(0, 0, | 1267 | self.screen.addstr(0, 0, |
| @@ -1181,61 +1270,25 @@ class Tui(object): | |||
| 1181 | self.screen.addstr(2, 0, | 1270 | self.screen.addstr(2, 0, |
| 1182 | "Current regex: {0}" | 1271 | "Current regex: {0}" |
| 1183 | .format(self.stats.fields_filter)) | 1272 | .format(self.stats.fields_filter)) |
| 1273 | self.screen.addstr(5, 0, msg) | ||
| 1184 | self.screen.addstr(3, 0, "New regex: ") | 1274 | self.screen.addstr(3, 0, "New regex: ") |
| 1185 | curses.echo() | 1275 | curses.echo() |
| 1186 | regex = self.screen.getstr().decode(ENCODING) | 1276 | regex = self.screen.getstr().decode(ENCODING) |
| 1187 | curses.noecho() | 1277 | curses.noecho() |
| 1188 | if len(regex) == 0: | 1278 | if len(regex) == 0: |
| 1189 | self.stats.fields_filter = DEFAULT_REGEX | 1279 | self.stats.fields_filter = '' |
| 1190 | self.refresh_header() | 1280 | self._refresh_header() |
| 1191 | return | 1281 | return |
| 1192 | try: | 1282 | try: |
| 1193 | re.compile(regex) | 1283 | re.compile(regex) |
| 1194 | self.stats.fields_filter = regex | 1284 | self.stats.fields_filter = regex |
| 1195 | self.refresh_header() | 1285 | self._refresh_header() |
| 1196 | return | 1286 | return |
| 1197 | except re.error: | 1287 | except re.error: |
| 1288 | msg = '"' + regex + '": Not a valid regular expression' | ||
| 1198 | continue | 1289 | continue |
| 1199 | 1290 | ||
| 1200 | def show_vm_selection_by_pid(self): | 1291 | def _show_set_update_interval(self): |
| 1201 | """Draws PID selection mask. | ||
| 1202 | |||
| 1203 | Asks for a pid until a valid pid or 0 has been entered. | ||
| 1204 | |||
| 1205 | """ | ||
| 1206 | msg = '' | ||
| 1207 | while True: | ||
| 1208 | self.screen.erase() | ||
| 1209 | self.screen.addstr(0, 0, | ||
| 1210 | 'Show statistics for specific pid.', | ||
| 1211 | curses.A_BOLD) | ||
| 1212 | self.screen.addstr(1, 0, | ||
| 1213 | 'This might limit the shown data to the trace ' | ||
| 1214 | 'statistics.') | ||
| 1215 | self.screen.addstr(5, 0, msg) | ||
| 1216 | self.print_all_gnames(7) | ||
| 1217 | |||
| 1218 | curses.echo() | ||
| 1219 | self.screen.addstr(3, 0, "Pid [0 or pid]: ") | ||
| 1220 | pid = self.screen.getstr().decode(ENCODING) | ||
| 1221 | curses.noecho() | ||
| 1222 | |||
| 1223 | try: | ||
| 1224 | if len(pid) > 0: | ||
| 1225 | pid = int(pid) | ||
| 1226 | if pid != 0 and not os.path.isdir(os.path.join('/proc/', | ||
| 1227 | str(pid))): | ||
| 1228 | msg = '"' + str(pid) + '": Not a running process' | ||
| 1229 | continue | ||
| 1230 | else: | ||
| 1231 | pid = 0 | ||
| 1232 | self.refresh_header(pid) | ||
| 1233 | self.update_pid(pid) | ||
| 1234 | break | ||
| 1235 | except ValueError: | ||
| 1236 | msg = '"' + str(pid) + '": Not a valid pid' | ||
| 1237 | |||
| 1238 | def show_set_update_interval(self): | ||
| 1239 | """Draws update interval selection mask.""" | 1292 | """Draws update interval selection mask.""" |
| 1240 | msg = '' | 1293 | msg = '' |
| 1241 | while True: | 1294 | while True: |
| @@ -1265,60 +1318,67 @@ class Tui(object): | |||
| 1265 | 1318 | ||
| 1266 | except ValueError: | 1319 | except ValueError: |
| 1267 | msg = '"' + str(val) + '": Invalid value' | 1320 | msg = '"' + str(val) + '": Invalid value' |
| 1268 | self.refresh_header() | 1321 | self._refresh_header() |
| 1269 | 1322 | ||
| 1270 | def show_vm_selection_by_guest_name(self): | 1323 | def _show_vm_selection_by_guest(self): |
| 1271 | """Draws guest selection mask. | 1324 | """Draws guest selection mask. |
| 1272 | 1325 | ||
| 1273 | Asks for a guest name until a valid guest name or '' is entered. | 1326 | Asks for a guest name or pid until a valid guest name or '' is entered. |
| 1274 | 1327 | ||
| 1275 | """ | 1328 | """ |
| 1276 | msg = '' | 1329 | msg = '' |
| 1277 | while True: | 1330 | while True: |
| 1278 | self.screen.erase() | 1331 | self.screen.erase() |
| 1279 | self.screen.addstr(0, 0, | 1332 | self.screen.addstr(0, 0, |
| 1280 | 'Show statistics for specific guest.', | 1333 | 'Show statistics for specific guest or pid.', |
| 1281 | curses.A_BOLD) | 1334 | curses.A_BOLD) |
| 1282 | self.screen.addstr(1, 0, | 1335 | self.screen.addstr(1, 0, |
| 1283 | 'This might limit the shown data to the trace ' | 1336 | 'This might limit the shown data to the trace ' |
| 1284 | 'statistics.') | 1337 | 'statistics.') |
| 1285 | self.screen.addstr(5, 0, msg) | 1338 | self.screen.addstr(5, 0, msg) |
| 1286 | self.print_all_gnames(7) | 1339 | self._print_all_gnames(7) |
| 1287 | curses.echo() | 1340 | curses.echo() |
| 1288 | self.screen.addstr(3, 0, "Guest [ENTER or guest]: ") | 1341 | curses.curs_set(1) |
| 1289 | gname = self.screen.getstr().decode(ENCODING) | 1342 | self.screen.addstr(3, 0, "Guest or pid [ENTER exits]: ") |
| 1343 | guest = self.screen.getstr().decode(ENCODING) | ||
| 1290 | curses.noecho() | 1344 | curses.noecho() |
| 1291 | 1345 | ||
| 1292 | if not gname: | 1346 | pid = 0 |
| 1293 | self.refresh_header(0) | 1347 | if not guest or guest == '0': |
| 1294 | self.update_pid(0) | ||
| 1295 | break | 1348 | break |
| 1296 | else: | 1349 | if guest.isdigit(): |
| 1297 | pids = [] | 1350 | if not os.path.isdir(os.path.join('/proc/', guest)): |
| 1298 | try: | 1351 | msg = '"' + guest + '": Not a running process' |
| 1299 | pids = self.get_pid_from_gname(gname) | ||
| 1300 | except: | ||
| 1301 | msg = '"' + gname + '": Internal error while searching, ' \ | ||
| 1302 | 'use pid filter instead' | ||
| 1303 | continue | ||
| 1304 | if len(pids) == 0: | ||
| 1305 | msg = '"' + gname + '": Not an active guest' | ||
| 1306 | continue | 1352 | continue |
| 1307 | if len(pids) > 1: | 1353 | pid = int(guest) |
| 1308 | msg = '"' + gname + '": Multiple matches found, use pid ' \ | ||
| 1309 | 'filter instead' | ||
| 1310 | continue | ||
| 1311 | self.refresh_header(pids[0]) | ||
| 1312 | self.update_pid(pids[0]) | ||
| 1313 | break | 1354 | break |
| 1355 | pids = [] | ||
| 1356 | try: | ||
| 1357 | pids = self.get_pid_from_gname(guest) | ||
| 1358 | except: | ||
| 1359 | msg = '"' + guest + '": Internal error while searching, ' \ | ||
| 1360 | 'use pid filter instead' | ||
| 1361 | continue | ||
| 1362 | if len(pids) == 0: | ||
| 1363 | msg = '"' + guest + '": Not an active guest' | ||
| 1364 | continue | ||
| 1365 | if len(pids) > 1: | ||
| 1366 | msg = '"' + guest + '": Multiple matches found, use pid ' \ | ||
| 1367 | 'filter instead' | ||
| 1368 | continue | ||
| 1369 | pid = pids[0] | ||
| 1370 | break | ||
| 1371 | curses.curs_set(0) | ||
| 1372 | self._refresh_header(pid) | ||
| 1373 | self._update_pid(pid) | ||
| 1314 | 1374 | ||
| 1315 | def show_stats(self): | 1375 | def show_stats(self): |
| 1316 | """Refreshes the screen and processes user input.""" | 1376 | """Refreshes the screen and processes user input.""" |
| 1317 | sleeptime = self._delay_initial | 1377 | sleeptime = self._delay_initial |
| 1318 | self.refresh_header() | 1378 | self._refresh_header() |
| 1319 | start = 0.0 # result based on init value never appears on screen | 1379 | start = 0.0 # result based on init value never appears on screen |
| 1320 | while True: | 1380 | while True: |
| 1321 | self.refresh_body(time.time() - start) | 1381 | self._refresh_body(time.time() - start) |
| 1322 | curses.halfdelay(int(sleeptime * 10)) | 1382 | curses.halfdelay(int(sleeptime * 10)) |
| 1323 | start = time.time() | 1383 | start = time.time() |
| 1324 | sleeptime = self._delay_regular | 1384 | sleeptime = self._delay_regular |
| @@ -1327,47 +1387,39 @@ class Tui(object): | |||
| 1327 | if char == 'b': | 1387 | if char == 'b': |
| 1328 | self._display_guests = not self._display_guests | 1388 | self._display_guests = not self._display_guests |
| 1329 | if self.stats.toggle_display_guests(self._display_guests): | 1389 | if self.stats.toggle_display_guests(self._display_guests): |
| 1330 | self.show_msg(['Command not available with tracepoints' | 1390 | self._show_msg(['Command not available with ' |
| 1331 | ' enabled', 'Restart with debugfs only ' | 1391 | 'tracepoints enabled', 'Restart with ' |
| 1332 | '(see option \'-d\') and try again!']) | 1392 | 'debugfs only (see option \'-d\') and ' |
| 1393 | 'try again!']) | ||
| 1333 | self._display_guests = not self._display_guests | 1394 | self._display_guests = not self._display_guests |
| 1334 | self.refresh_header() | 1395 | self._refresh_header() |
| 1335 | if char == 'c': | 1396 | if char == 'c': |
| 1336 | self.stats.fields_filter = DEFAULT_REGEX | 1397 | self.stats.fields_filter = '' |
| 1337 | self.refresh_header(0) | 1398 | self._refresh_header(0) |
| 1338 | self.update_pid(0) | 1399 | self._update_pid(0) |
| 1339 | if char == 'f': | 1400 | if char == 'f': |
| 1340 | curses.curs_set(1) | 1401 | curses.curs_set(1) |
| 1341 | self.show_filter_selection() | 1402 | self._show_filter_selection() |
| 1342 | curses.curs_set(0) | 1403 | curses.curs_set(0) |
| 1343 | sleeptime = self._delay_initial | 1404 | sleeptime = self._delay_initial |
| 1344 | if char == 'g': | 1405 | if char == 'g' or char == 'p': |
| 1345 | curses.curs_set(1) | 1406 | self._show_vm_selection_by_guest() |
| 1346 | self.show_vm_selection_by_guest_name() | ||
| 1347 | curses.curs_set(0) | ||
| 1348 | sleeptime = self._delay_initial | 1407 | sleeptime = self._delay_initial |
| 1349 | if char == 'h': | 1408 | if char == 'h': |
| 1350 | self.show_help_interactive() | 1409 | self._show_help_interactive() |
| 1351 | if char == 'o': | 1410 | if char == 'o': |
| 1352 | self._sorting = not self._sorting | 1411 | self._sorting = not self._sorting |
| 1353 | if char == 'p': | ||
| 1354 | curses.curs_set(1) | ||
| 1355 | self.show_vm_selection_by_pid() | ||
| 1356 | curses.curs_set(0) | ||
| 1357 | sleeptime = self._delay_initial | ||
| 1358 | if char == 'q': | 1412 | if char == 'q': |
| 1359 | break | 1413 | break |
| 1360 | if char == 'r': | 1414 | if char == 'r': |
| 1361 | self.stats.reset() | 1415 | self.stats.reset() |
| 1362 | if char == 's': | 1416 | if char == 's': |
| 1363 | curses.curs_set(1) | 1417 | curses.curs_set(1) |
| 1364 | self.show_set_update_interval() | 1418 | self._show_set_update_interval() |
| 1365 | curses.curs_set(0) | 1419 | curses.curs_set(0) |
| 1366 | sleeptime = self._delay_initial | 1420 | sleeptime = self._delay_initial |
| 1367 | if char == 'x': | 1421 | if char == 'x': |
| 1368 | self.update_drilldown() | 1422 | self.stats.child_events = not self.stats.child_events |
| 1369 | # prevents display of current values on next refresh | ||
| 1370 | self.stats.get(self._display_guests) | ||
| 1371 | except KeyboardInterrupt: | 1423 | except KeyboardInterrupt: |
| 1372 | break | 1424 | break |
| 1373 | except curses.error: | 1425 | except curses.error: |
| @@ -1380,9 +1432,9 @@ def batch(stats): | |||
| 1380 | s = stats.get() | 1432 | s = stats.get() |
| 1381 | time.sleep(1) | 1433 | time.sleep(1) |
| 1382 | s = stats.get() | 1434 | s = stats.get() |
| 1383 | for key in sorted(s.keys()): | 1435 | for key, values in sorted(s.items()): |
| 1384 | values = s[key] | 1436 | print('%-42s%10d%10d' % (key.split(' ')[0], values.value, |
| 1385 | print('%-42s%10d%10d' % (key, values[0], values[1])) | 1437 | values.delta)) |
| 1386 | except KeyboardInterrupt: | 1438 | except KeyboardInterrupt: |
| 1387 | pass | 1439 | pass |
| 1388 | 1440 | ||
| @@ -1392,14 +1444,14 @@ def log(stats): | |||
| 1392 | keys = sorted(stats.get().keys()) | 1444 | keys = sorted(stats.get().keys()) |
| 1393 | 1445 | ||
| 1394 | def banner(): | 1446 | def banner(): |
| 1395 | for k in keys: | 1447 | for key in keys: |
| 1396 | print(k, end=' ') | 1448 | print(key.split(' ')[0], end=' ') |
| 1397 | print() | 1449 | print() |
| 1398 | 1450 | ||
| 1399 | def statline(): | 1451 | def statline(): |
| 1400 | s = stats.get() | 1452 | s = stats.get() |
| 1401 | for k in keys: | 1453 | for key in keys: |
| 1402 | print(' %9d' % s[k][1], end=' ') | 1454 | print(' %9d' % s[key].delta, end=' ') |
| 1403 | print() | 1455 | print() |
| 1404 | line = 0 | 1456 | line = 0 |
| 1405 | banner_repeat = 20 | 1457 | banner_repeat = 20 |
| @@ -1504,7 +1556,7 @@ Press any other key to refresh statistics immediately. | |||
| 1504 | ) | 1556 | ) |
| 1505 | optparser.add_option('-f', '--fields', | 1557 | optparser.add_option('-f', '--fields', |
| 1506 | action='store', | 1558 | action='store', |
| 1507 | default=DEFAULT_REGEX, | 1559 | default='', |
| 1508 | dest='fields', | 1560 | dest='fields', |
| 1509 | help='''fields to display (regex) | 1561 | help='''fields to display (regex) |
| 1510 | "-f help" for a list of available events''', | 1562 | "-f help" for a list of available events''', |
| @@ -1539,17 +1591,6 @@ Press any other key to refresh statistics immediately. | |||
| 1539 | 1591 | ||
| 1540 | def check_access(options): | 1592 | def check_access(options): |
| 1541 | """Exits if the current user can't access all needed directories.""" | 1593 | """Exits if the current user can't access all needed directories.""" |
| 1542 | if not os.path.exists('/sys/kernel/debug'): | ||
| 1543 | sys.stderr.write('Please enable CONFIG_DEBUG_FS in your kernel.') | ||
| 1544 | sys.exit(1) | ||
| 1545 | |||
| 1546 | if not os.path.exists(PATH_DEBUGFS_KVM): | ||
| 1547 | sys.stderr.write("Please make sure, that debugfs is mounted and " | ||
| 1548 | "readable by the current user:\n" | ||
| 1549 | "('mount -t debugfs debugfs /sys/kernel/debug')\n" | ||
| 1550 | "Also ensure, that the kvm modules are loaded.\n") | ||
| 1551 | sys.exit(1) | ||
| 1552 | |||
| 1553 | if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints or | 1594 | if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints or |
| 1554 | not options.debugfs): | 1595 | not options.debugfs): |
| 1555 | sys.stderr.write("Please enable CONFIG_TRACING in your kernel " | 1596 | sys.stderr.write("Please enable CONFIG_TRACING in your kernel " |
| @@ -1567,7 +1608,33 @@ def check_access(options): | |||
| 1567 | return options | 1608 | return options |
| 1568 | 1609 | ||
| 1569 | 1610 | ||
| 1611 | def assign_globals(): | ||
| 1612 | global PATH_DEBUGFS_KVM | ||
| 1613 | global PATH_DEBUGFS_TRACING | ||
| 1614 | |||
| 1615 | debugfs = '' | ||
| 1616 | for line in file('/proc/mounts'): | ||
| 1617 | if line.split(' ')[0] == 'debugfs': | ||
| 1618 | debugfs = line.split(' ')[1] | ||
| 1619 | break | ||
| 1620 | if debugfs == '': | ||
| 1621 | sys.stderr.write("Please make sure that CONFIG_DEBUG_FS is enabled in " | ||
| 1622 | "your kernel, mounted and\nreadable by the current " | ||
| 1623 | "user:\n" | ||
| 1624 | "('mount -t debugfs debugfs /sys/kernel/debug')\n") | ||
| 1625 | sys.exit(1) | ||
| 1626 | |||
| 1627 | PATH_DEBUGFS_KVM = os.path.join(debugfs, 'kvm') | ||
| 1628 | PATH_DEBUGFS_TRACING = os.path.join(debugfs, 'tracing') | ||
| 1629 | |||
| 1630 | if not os.path.exists(PATH_DEBUGFS_KVM): | ||
| 1631 | sys.stderr.write("Please make sure that CONFIG_KVM is enabled in " | ||
| 1632 | "your kernel and that the modules are loaded.\n") | ||
| 1633 | sys.exit(1) | ||
| 1634 | |||
| 1635 | |||
| 1570 | def main(): | 1636 | def main(): |
| 1637 | assign_globals() | ||
| 1571 | options = get_options() | 1638 | options = get_options() |
| 1572 | options = check_access(options) | 1639 | options = check_access(options) |
| 1573 | 1640 | ||
diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt index b5b3810c9e94..0811d860fe75 100644 --- a/tools/kvm/kvm_stat/kvm_stat.txt +++ b/tools/kvm/kvm_stat/kvm_stat.txt | |||
| @@ -35,13 +35,13 @@ INTERACTIVE COMMANDS | |||
| 35 | 35 | ||
| 36 | *f*:: filter by regular expression | 36 | *f*:: filter by regular expression |
| 37 | 37 | ||
| 38 | *g*:: filter by guest name | 38 | *g*:: filter by guest name/PID |
| 39 | 39 | ||
| 40 | *h*:: display interactive commands reference | 40 | *h*:: display interactive commands reference |
| 41 | 41 | ||
| 42 | *o*:: toggle sorting order (Total vs CurAvg/s) | 42 | *o*:: toggle sorting order (Total vs CurAvg/s) |
| 43 | 43 | ||
| 44 | *p*:: filter by PID | 44 | *p*:: filter by guest name/PID |
| 45 | 45 | ||
| 46 | *q*:: quit | 46 | *q*:: quit |
| 47 | 47 | ||
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 70268c0bec79..70f4c30918eb 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c | |||
| @@ -36,6 +36,8 @@ static struct timecounter *timecounter; | |||
| 36 | static unsigned int host_vtimer_irq; | 36 | static unsigned int host_vtimer_irq; |
| 37 | static u32 host_vtimer_irq_flags; | 37 | static u32 host_vtimer_irq_flags; |
| 38 | 38 | ||
| 39 | static DEFINE_STATIC_KEY_FALSE(has_gic_active_state); | ||
| 40 | |||
| 39 | static const struct kvm_irq_level default_ptimer_irq = { | 41 | static const struct kvm_irq_level default_ptimer_irq = { |
| 40 | .irq = 30, | 42 | .irq = 30, |
| 41 | .level = 1, | 43 | .level = 1, |
| @@ -56,6 +58,12 @@ u64 kvm_phys_timer_read(void) | |||
| 56 | return timecounter->cc->read(timecounter->cc); | 58 | return timecounter->cc->read(timecounter->cc); |
| 57 | } | 59 | } |
| 58 | 60 | ||
| 61 | static inline bool userspace_irqchip(struct kvm *kvm) | ||
| 62 | { | ||
| 63 | return static_branch_unlikely(&userspace_irqchip_in_use) && | ||
| 64 | unlikely(!irqchip_in_kernel(kvm)); | ||
| 65 | } | ||
| 66 | |||
| 59 | static void soft_timer_start(struct hrtimer *hrt, u64 ns) | 67 | static void soft_timer_start(struct hrtimer *hrt, u64 ns) |
| 60 | { | 68 | { |
| 61 | hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns), | 69 | hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns), |
| @@ -69,25 +77,6 @@ static void soft_timer_cancel(struct hrtimer *hrt, struct work_struct *work) | |||
| 69 | cancel_work_sync(work); | 77 | cancel_work_sync(work); |
| 70 | } | 78 | } |
| 71 | 79 | ||
| 72 | static void kvm_vtimer_update_mask_user(struct kvm_vcpu *vcpu) | ||
| 73 | { | ||
| 74 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | ||
| 75 | |||
| 76 | /* | ||
| 77 | * When using a userspace irqchip with the architected timers, we must | ||
| 78 | * prevent continuously exiting from the guest, and therefore mask the | ||
| 79 | * physical interrupt by disabling it on the host interrupt controller | ||
| 80 | * when the virtual level is high, such that the guest can make | ||
| 81 | * forward progress. Once we detect the output level being | ||
| 82 | * de-asserted, we unmask the interrupt again so that we exit from the | ||
| 83 | * guest when the timer fires. | ||
| 84 | */ | ||
| 85 | if (vtimer->irq.level) | ||
| 86 | disable_percpu_irq(host_vtimer_irq); | ||
| 87 | else | ||
| 88 | enable_percpu_irq(host_vtimer_irq, 0); | ||
| 89 | } | ||
| 90 | |||
| 91 | static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) | 80 | static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) |
| 92 | { | 81 | { |
| 93 | struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; | 82 | struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; |
| @@ -106,9 +95,9 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) | |||
| 106 | if (kvm_timer_should_fire(vtimer)) | 95 | if (kvm_timer_should_fire(vtimer)) |
| 107 | kvm_timer_update_irq(vcpu, true, vtimer); | 96 | kvm_timer_update_irq(vcpu, true, vtimer); |
| 108 | 97 | ||
| 109 | if (static_branch_unlikely(&userspace_irqchip_in_use) && | 98 | if (userspace_irqchip(vcpu->kvm) && |
| 110 | unlikely(!irqchip_in_kernel(vcpu->kvm))) | 99 | !static_branch_unlikely(&has_gic_active_state)) |
| 111 | kvm_vtimer_update_mask_user(vcpu); | 100 | disable_percpu_irq(host_vtimer_irq); |
| 112 | 101 | ||
| 113 | return IRQ_HANDLED; | 102 | return IRQ_HANDLED; |
| 114 | } | 103 | } |
| @@ -290,8 +279,7 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, | |||
| 290 | trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq, | 279 | trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq, |
| 291 | timer_ctx->irq.level); | 280 | timer_ctx->irq.level); |
| 292 | 281 | ||
| 293 | if (!static_branch_unlikely(&userspace_irqchip_in_use) || | 282 | if (!userspace_irqchip(vcpu->kvm)) { |
| 294 | likely(irqchip_in_kernel(vcpu->kvm))) { | ||
| 295 | ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, | 283 | ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, |
| 296 | timer_ctx->irq.irq, | 284 | timer_ctx->irq.irq, |
| 297 | timer_ctx->irq.level, | 285 | timer_ctx->irq.level, |
| @@ -350,12 +338,6 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu) | |||
| 350 | phys_timer_emulate(vcpu); | 338 | phys_timer_emulate(vcpu); |
| 351 | } | 339 | } |
| 352 | 340 | ||
| 353 | static void __timer_snapshot_state(struct arch_timer_context *timer) | ||
| 354 | { | ||
| 355 | timer->cnt_ctl = read_sysreg_el0(cntv_ctl); | ||
| 356 | timer->cnt_cval = read_sysreg_el0(cntv_cval); | ||
| 357 | } | ||
| 358 | |||
| 359 | static void vtimer_save_state(struct kvm_vcpu *vcpu) | 341 | static void vtimer_save_state(struct kvm_vcpu *vcpu) |
| 360 | { | 342 | { |
| 361 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 343 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; |
| @@ -367,8 +349,10 @@ static void vtimer_save_state(struct kvm_vcpu *vcpu) | |||
| 367 | if (!vtimer->loaded) | 349 | if (!vtimer->loaded) |
| 368 | goto out; | 350 | goto out; |
| 369 | 351 | ||
| 370 | if (timer->enabled) | 352 | if (timer->enabled) { |
| 371 | __timer_snapshot_state(vtimer); | 353 | vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl); |
| 354 | vtimer->cnt_cval = read_sysreg_el0(cntv_cval); | ||
| 355 | } | ||
| 372 | 356 | ||
| 373 | /* Disable the virtual timer */ | 357 | /* Disable the virtual timer */ |
| 374 | write_sysreg_el0(0, cntv_ctl); | 358 | write_sysreg_el0(0, cntv_ctl); |
| @@ -460,23 +444,43 @@ static void set_cntvoff(u64 cntvoff) | |||
| 460 | kvm_call_hyp(__kvm_timer_set_cntvoff, low, high); | 444 | kvm_call_hyp(__kvm_timer_set_cntvoff, low, high); |
| 461 | } | 445 | } |
| 462 | 446 | ||
| 463 | static void kvm_timer_vcpu_load_vgic(struct kvm_vcpu *vcpu) | 447 | static inline void set_vtimer_irq_phys_active(struct kvm_vcpu *vcpu, bool active) |
| 448 | { | ||
| 449 | int r; | ||
| 450 | r = irq_set_irqchip_state(host_vtimer_irq, IRQCHIP_STATE_ACTIVE, active); | ||
| 451 | WARN_ON(r); | ||
| 452 | } | ||
| 453 | |||
| 454 | static void kvm_timer_vcpu_load_gic(struct kvm_vcpu *vcpu) | ||
| 464 | { | 455 | { |
| 465 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | 456 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); |
| 466 | bool phys_active; | 457 | bool phys_active; |
| 467 | int ret; | ||
| 468 | 458 | ||
| 469 | phys_active = kvm_vgic_map_is_active(vcpu, vtimer->irq.irq); | 459 | if (irqchip_in_kernel(vcpu->kvm)) |
| 470 | 460 | phys_active = kvm_vgic_map_is_active(vcpu, vtimer->irq.irq); | |
| 471 | ret = irq_set_irqchip_state(host_vtimer_irq, | 461 | else |
| 472 | IRQCHIP_STATE_ACTIVE, | 462 | phys_active = vtimer->irq.level; |
| 473 | phys_active); | 463 | set_vtimer_irq_phys_active(vcpu, phys_active); |
| 474 | WARN_ON(ret); | ||
| 475 | } | 464 | } |
| 476 | 465 | ||
| 477 | static void kvm_timer_vcpu_load_user(struct kvm_vcpu *vcpu) | 466 | static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu) |
| 478 | { | 467 | { |
| 479 | kvm_vtimer_update_mask_user(vcpu); | 468 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); |
| 469 | |||
| 470 | /* | ||
| 471 | * When using a userspace irqchip with the architected timers and a | ||
| 472 | * host interrupt controller that doesn't support an active state, we | ||
| 473 | * must still prevent continuously exiting from the guest, and | ||
| 474 | * therefore mask the physical interrupt by disabling it on the host | ||
| 475 | * interrupt controller when the virtual level is high, such that the | ||
| 476 | * guest can make forward progress. Once we detect the output level | ||
| 477 | * being de-asserted, we unmask the interrupt again so that we exit | ||
| 478 | * from the guest when the timer fires. | ||
| 479 | */ | ||
| 480 | if (vtimer->irq.level) | ||
| 481 | disable_percpu_irq(host_vtimer_irq); | ||
| 482 | else | ||
| 483 | enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); | ||
| 480 | } | 484 | } |
| 481 | 485 | ||
| 482 | void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) | 486 | void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) |
| @@ -487,10 +491,10 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) | |||
| 487 | if (unlikely(!timer->enabled)) | 491 | if (unlikely(!timer->enabled)) |
| 488 | return; | 492 | return; |
| 489 | 493 | ||
| 490 | if (unlikely(!irqchip_in_kernel(vcpu->kvm))) | 494 | if (static_branch_likely(&has_gic_active_state)) |
| 491 | kvm_timer_vcpu_load_user(vcpu); | 495 | kvm_timer_vcpu_load_gic(vcpu); |
| 492 | else | 496 | else |
| 493 | kvm_timer_vcpu_load_vgic(vcpu); | 497 | kvm_timer_vcpu_load_nogic(vcpu); |
| 494 | 498 | ||
| 495 | set_cntvoff(vtimer->cntvoff); | 499 | set_cntvoff(vtimer->cntvoff); |
| 496 | 500 | ||
| @@ -555,18 +559,24 @@ static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu) | |||
| 555 | { | 559 | { |
| 556 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | 560 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); |
| 557 | 561 | ||
| 558 | if (unlikely(!irqchip_in_kernel(vcpu->kvm))) { | 562 | if (!kvm_timer_should_fire(vtimer)) { |
| 559 | __timer_snapshot_state(vtimer); | 563 | kvm_timer_update_irq(vcpu, false, vtimer); |
| 560 | if (!kvm_timer_should_fire(vtimer)) { | 564 | if (static_branch_likely(&has_gic_active_state)) |
| 561 | kvm_timer_update_irq(vcpu, false, vtimer); | 565 | set_vtimer_irq_phys_active(vcpu, false); |
| 562 | kvm_vtimer_update_mask_user(vcpu); | 566 | else |
| 563 | } | 567 | enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); |
| 564 | } | 568 | } |
| 565 | } | 569 | } |
| 566 | 570 | ||
| 567 | void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) | 571 | void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) |
| 568 | { | 572 | { |
| 569 | unmask_vtimer_irq_user(vcpu); | 573 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; |
| 574 | |||
| 575 | if (unlikely(!timer->enabled)) | ||
| 576 | return; | ||
| 577 | |||
| 578 | if (unlikely(!irqchip_in_kernel(vcpu->kvm))) | ||
| 579 | unmask_vtimer_irq_user(vcpu); | ||
| 570 | } | 580 | } |
| 571 | 581 | ||
| 572 | int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) | 582 | int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) |
| @@ -753,6 +763,8 @@ int kvm_timer_hyp_init(bool has_gic) | |||
| 753 | kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); | 763 | kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); |
| 754 | goto out_free_irq; | 764 | goto out_free_irq; |
| 755 | } | 765 | } |
| 766 | |||
| 767 | static_branch_enable(&has_gic_active_state); | ||
| 756 | } | 768 | } |
| 757 | 769 | ||
| 758 | kvm_info("virtual timer IRQ%d\n", host_vtimer_irq); | 770 | kvm_info("virtual timer IRQ%d\n", host_vtimer_irq); |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4501e658e8d6..65dea3ffef68 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
| @@ -969,8 +969,7 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
| 969 | /* Check for overlaps */ | 969 | /* Check for overlaps */ |
| 970 | r = -EEXIST; | 970 | r = -EEXIST; |
| 971 | kvm_for_each_memslot(slot, __kvm_memslots(kvm, as_id)) { | 971 | kvm_for_each_memslot(slot, __kvm_memslots(kvm, as_id)) { |
| 972 | if ((slot->id >= KVM_USER_MEM_SLOTS) || | 972 | if (slot->id == id) |
| 973 | (slot->id == id)) | ||
| 974 | continue; | 973 | continue; |
| 975 | if (!((base_gfn + npages <= slot->base_gfn) || | 974 | if (!((base_gfn + npages <= slot->base_gfn) || |
| 976 | (base_gfn >= slot->base_gfn + slot->npages))) | 975 | (base_gfn >= slot->base_gfn + slot->npages))) |
