aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-02-26 12:28:35 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2018-02-26 12:28:35 -0500
commitd4858aaf6bd8a90e2dacc0dfec2077e334dcedbf (patch)
treeccb1934d3943fd7ed443f533409d02f4e2a5ad05
parent4a3928c6f8a53fa1aed28ccba227742486e8ddcb (diff)
parent9c5e0afaf15788bcbd1c3469da701ac3da826886 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM fixes from Paolo Bonzini: "s390: - optimization for the exitless interrupt support that was merged in 4.16-rc1 - improve the branch prediction blocking for nested KVM - replace some jump tables with switch statements to improve expoline performance - fixes for multiple epoch facility ARM: - fix the interaction of userspace irqchip VMs with in-kernel irqchip VMs - make sure we can build 32-bit KVM/ARM with gcc-8. x86: - fixes for AMD SEV - fixes for Intel nested VMX, emulated UMIP and a dump_stack() on VM startup - fixes for async page fault migration - small optimization to PV TLB flush (new in 4.16-rc1) - syzkaller fixes Generic: - compiler warning fixes - syzkaller fixes - more improvements to the kvm_stat tool Two more small Spectre fixes are going to reach you via Ingo" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (40 commits) KVM: SVM: Fix SEV LAUNCH_SECRET command KVM: SVM: install RSM intercept KVM: SVM: no need to call access_ok() in LAUNCH_MEASURE command include: psp-sev: Capitalize invalid length enum crypto: ccp: Fix sparse, use plain integer as NULL pointer KVM: X86: Avoid traversing all the cpus for pv tlb flush when steal time is disabled x86/kvm: Make parse_no_xxx __init for kvm KVM: x86: fix backward migration with async_PF kvm: fix warning for non-x86 builds kvm: fix warning for CONFIG_HAVE_KVM_EVENTFD builds tools/kvm_stat: print 'Total' line for multiple events only tools/kvm_stat: group child events indented after parent tools/kvm_stat: separate drilldown and fields filtering tools/kvm_stat: eliminate extra guest/pid selection dialog tools/kvm_stat: mark private methods as such tools/kvm_stat: fix debugfs handling tools/kvm_stat: print error on invalid regex tools/kvm_stat: fix crash when filtering out all non-child trace events tools/kvm_stat: avoid 'is' for equality checks tools/kvm_stat: use a more pythonic way to iterate over dictionaries ...
-rw-r--r--Documentation/virtual/kvm/cpuid.txt4
-rw-r--r--Documentation/virtual/kvm/msr.txt3
-rw-r--r--arch/arm/kvm/hyp/Makefile5
-rw-r--r--arch/arm/kvm/hyp/banked-sr.c4
-rw-r--r--arch/s390/kvm/intercept.c51
-rw-r--r--arch/s390/kvm/interrupt.c123
-rw-r--r--arch/s390/kvm/kvm-s390.c79
-rw-r--r--arch/s390/kvm/kvm-s390.h7
-rw-r--r--arch/s390/kvm/priv.c192
-rw-r--r--arch/s390/kvm/vsie.c20
-rw-r--r--arch/x86/include/asm/kvm_host.h3
-rw-r--r--arch/x86/include/uapi/asm/kvm_para.h1
-rw-r--r--arch/x86/kernel/kvm.c20
-rw-r--r--arch/x86/kvm/cpuid.c3
-rw-r--r--arch/x86/kvm/lapic.c1
-rw-r--r--arch/x86/kvm/mmu.c2
-rw-r--r--arch/x86/kvm/svm.c37
-rw-r--r--arch/x86/kvm/vmx.c10
-rw-r--r--arch/x86/kvm/x86.c7
-rw-r--r--drivers/crypto/ccp/psp-dev.c8
-rw-r--r--include/linux/kvm_host.h6
-rw-r--r--include/uapi/linux/psp-sev.h2
-rwxr-xr-xtools/kvm/kvm_stat/kvm_stat503
-rw-r--r--tools/kvm/kvm_stat/kvm_stat.txt4
-rw-r--r--virt/kvm/arm/arch_timer.c116
-rw-r--r--virt/kvm/kvm_main.c3
26 files changed, 698 insertions, 516 deletions
diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt
index dcab6dc11e3b..87a7506f31c2 100644
--- a/Documentation/virtual/kvm/cpuid.txt
+++ b/Documentation/virtual/kvm/cpuid.txt
@@ -58,6 +58,10 @@ KVM_FEATURE_PV_TLB_FLUSH || 9 || guest checks this feature bit
58 || || before enabling paravirtualized 58 || || before enabling paravirtualized
59 || || tlb flush. 59 || || tlb flush.
60------------------------------------------------------------------------------ 60------------------------------------------------------------------------------
61KVM_FEATURE_ASYNC_PF_VMEXIT || 10 || paravirtualized async PF VM exit
62 || || can be enabled by setting bit 2
63 || || when writing to msr 0x4b564d02
64------------------------------------------------------------------------------
61KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side 65KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side
62 || || per-cpu warps are expected in 66 || || per-cpu warps are expected in
63 || || kvmclock. 67 || || kvmclock.
diff --git a/Documentation/virtual/kvm/msr.txt b/Documentation/virtual/kvm/msr.txt
index 1ebecc115dc6..f3f0d57ced8e 100644
--- a/Documentation/virtual/kvm/msr.txt
+++ b/Documentation/virtual/kvm/msr.txt
@@ -170,7 +170,8 @@ MSR_KVM_ASYNC_PF_EN: 0x4b564d02
170 when asynchronous page faults are enabled on the vcpu 0 when 170 when asynchronous page faults are enabled on the vcpu 0 when
171 disabled. Bit 1 is 1 if asynchronous page faults can be injected 171 disabled. Bit 1 is 1 if asynchronous page faults can be injected
172 when vcpu is in cpl == 0. Bit 2 is 1 if asynchronous page faults 172 when vcpu is in cpl == 0. Bit 2 is 1 if asynchronous page faults
173 are delivered to L1 as #PF vmexits. 173 are delivered to L1 as #PF vmexits. Bit 2 can be set only if
174 KVM_FEATURE_ASYNC_PF_VMEXIT is present in CPUID.
174 175
175 First 4 byte of 64 byte memory location will be written to by 176 First 4 byte of 64 byte memory location will be written to by
176 the hypervisor at the time of asynchronous page fault (APF) 177 the hypervisor at the time of asynchronous page fault (APF)
diff --git a/arch/arm/kvm/hyp/Makefile b/arch/arm/kvm/hyp/Makefile
index 5638ce0c9524..63d6b404d88e 100644
--- a/arch/arm/kvm/hyp/Makefile
+++ b/arch/arm/kvm/hyp/Makefile
@@ -7,6 +7,8 @@ ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING
7 7
8KVM=../../../../virt/kvm 8KVM=../../../../virt/kvm
9 9
10CFLAGS_ARMV7VE :=$(call cc-option, -march=armv7ve)
11
10obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o 12obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o
11obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o 13obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o
12obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o 14obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o
@@ -15,7 +17,10 @@ obj-$(CONFIG_KVM_ARM_HOST) += tlb.o
15obj-$(CONFIG_KVM_ARM_HOST) += cp15-sr.o 17obj-$(CONFIG_KVM_ARM_HOST) += cp15-sr.o
16obj-$(CONFIG_KVM_ARM_HOST) += vfp.o 18obj-$(CONFIG_KVM_ARM_HOST) += vfp.o
17obj-$(CONFIG_KVM_ARM_HOST) += banked-sr.o 19obj-$(CONFIG_KVM_ARM_HOST) += banked-sr.o
20CFLAGS_banked-sr.o += $(CFLAGS_ARMV7VE)
21
18obj-$(CONFIG_KVM_ARM_HOST) += entry.o 22obj-$(CONFIG_KVM_ARM_HOST) += entry.o
19obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o 23obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o
20obj-$(CONFIG_KVM_ARM_HOST) += switch.o 24obj-$(CONFIG_KVM_ARM_HOST) += switch.o
25CFLAGS_switch.o += $(CFLAGS_ARMV7VE)
21obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o 26obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o
diff --git a/arch/arm/kvm/hyp/banked-sr.c b/arch/arm/kvm/hyp/banked-sr.c
index 111bda8cdebd..be4b8b0a40ad 100644
--- a/arch/arm/kvm/hyp/banked-sr.c
+++ b/arch/arm/kvm/hyp/banked-sr.c
@@ -20,6 +20,10 @@
20 20
21#include <asm/kvm_hyp.h> 21#include <asm/kvm_hyp.h>
22 22
23/*
24 * gcc before 4.9 doesn't understand -march=armv7ve, so we have to
25 * trick the assembler.
26 */
23__asm__(".arch_extension virt"); 27__asm__(".arch_extension virt");
24 28
25void __hyp_text __banked_save_state(struct kvm_cpu_context *ctxt) 29void __hyp_text __banked_save_state(struct kvm_cpu_context *ctxt)
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 9c7d70715862..07c6e81163bf 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -22,22 +22,6 @@
22#include "trace.h" 22#include "trace.h"
23#include "trace-s390.h" 23#include "trace-s390.h"
24 24
25
26static const intercept_handler_t instruction_handlers[256] = {
27 [0x01] = kvm_s390_handle_01,
28 [0x82] = kvm_s390_handle_lpsw,
29 [0x83] = kvm_s390_handle_diag,
30 [0xaa] = kvm_s390_handle_aa,
31 [0xae] = kvm_s390_handle_sigp,
32 [0xb2] = kvm_s390_handle_b2,
33 [0xb6] = kvm_s390_handle_stctl,
34 [0xb7] = kvm_s390_handle_lctl,
35 [0xb9] = kvm_s390_handle_b9,
36 [0xe3] = kvm_s390_handle_e3,
37 [0xe5] = kvm_s390_handle_e5,
38 [0xeb] = kvm_s390_handle_eb,
39};
40
41u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu) 25u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu)
42{ 26{
43 struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block; 27 struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
@@ -129,16 +113,39 @@ static int handle_validity(struct kvm_vcpu *vcpu)
129 113
130static int handle_instruction(struct kvm_vcpu *vcpu) 114static int handle_instruction(struct kvm_vcpu *vcpu)
131{ 115{
132 intercept_handler_t handler;
133
134 vcpu->stat.exit_instruction++; 116 vcpu->stat.exit_instruction++;
135 trace_kvm_s390_intercept_instruction(vcpu, 117 trace_kvm_s390_intercept_instruction(vcpu,
136 vcpu->arch.sie_block->ipa, 118 vcpu->arch.sie_block->ipa,
137 vcpu->arch.sie_block->ipb); 119 vcpu->arch.sie_block->ipb);
138 handler = instruction_handlers[vcpu->arch.sie_block->ipa >> 8]; 120
139 if (handler) 121 switch (vcpu->arch.sie_block->ipa >> 8) {
140 return handler(vcpu); 122 case 0x01:
141 return -EOPNOTSUPP; 123 return kvm_s390_handle_01(vcpu);
124 case 0x82:
125 return kvm_s390_handle_lpsw(vcpu);
126 case 0x83:
127 return kvm_s390_handle_diag(vcpu);
128 case 0xaa:
129 return kvm_s390_handle_aa(vcpu);
130 case 0xae:
131 return kvm_s390_handle_sigp(vcpu);
132 case 0xb2:
133 return kvm_s390_handle_b2(vcpu);
134 case 0xb6:
135 return kvm_s390_handle_stctl(vcpu);
136 case 0xb7:
137 return kvm_s390_handle_lctl(vcpu);
138 case 0xb9:
139 return kvm_s390_handle_b9(vcpu);
140 case 0xe3:
141 return kvm_s390_handle_e3(vcpu);
142 case 0xe5:
143 return kvm_s390_handle_e5(vcpu);
144 case 0xeb:
145 return kvm_s390_handle_eb(vcpu);
146 default:
147 return -EOPNOTSUPP;
148 }
142} 149}
143 150
144static int inject_prog_on_prog_intercept(struct kvm_vcpu *vcpu) 151static int inject_prog_on_prog_intercept(struct kvm_vcpu *vcpu)
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index aabf46f5f883..b04616b57a94 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -169,8 +169,15 @@ static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu)
169 169
170static int ckc_irq_pending(struct kvm_vcpu *vcpu) 170static int ckc_irq_pending(struct kvm_vcpu *vcpu)
171{ 171{
172 if (vcpu->arch.sie_block->ckc >= kvm_s390_get_tod_clock_fast(vcpu->kvm)) 172 const u64 now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
173 const u64 ckc = vcpu->arch.sie_block->ckc;
174
175 if (vcpu->arch.sie_block->gcr[0] & 0x0020000000000000ul) {
176 if ((s64)ckc >= (s64)now)
177 return 0;
178 } else if (ckc >= now) {
173 return 0; 179 return 0;
180 }
174 return ckc_interrupts_enabled(vcpu); 181 return ckc_interrupts_enabled(vcpu);
175} 182}
176 183
@@ -187,12 +194,6 @@ static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu)
187 return kvm_s390_get_cpu_timer(vcpu) >> 63; 194 return kvm_s390_get_cpu_timer(vcpu) >> 63;
188} 195}
189 196
190static inline int is_ioirq(unsigned long irq_type)
191{
192 return ((irq_type >= IRQ_PEND_IO_ISC_7) &&
193 (irq_type <= IRQ_PEND_IO_ISC_0));
194}
195
196static uint64_t isc_to_isc_bits(int isc) 197static uint64_t isc_to_isc_bits(int isc)
197{ 198{
198 return (0x80 >> isc) << 24; 199 return (0x80 >> isc) << 24;
@@ -236,10 +237,15 @@ static inline int kvm_s390_gisa_tac_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gis
236 return test_and_clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa); 237 return test_and_clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa);
237} 238}
238 239
239static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu) 240static inline unsigned long pending_irqs_no_gisa(struct kvm_vcpu *vcpu)
240{ 241{
241 return vcpu->kvm->arch.float_int.pending_irqs | 242 return vcpu->kvm->arch.float_int.pending_irqs |
242 vcpu->arch.local_int.pending_irqs | 243 vcpu->arch.local_int.pending_irqs;
244}
245
246static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu)
247{
248 return pending_irqs_no_gisa(vcpu) |
243 kvm_s390_gisa_get_ipm(vcpu->kvm->arch.gisa) << IRQ_PEND_IO_ISC_7; 249 kvm_s390_gisa_get_ipm(vcpu->kvm->arch.gisa) << IRQ_PEND_IO_ISC_7;
244} 250}
245 251
@@ -337,7 +343,7 @@ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
337 343
338static void set_intercept_indicators_io(struct kvm_vcpu *vcpu) 344static void set_intercept_indicators_io(struct kvm_vcpu *vcpu)
339{ 345{
340 if (!(pending_irqs(vcpu) & IRQ_PEND_IO_MASK)) 346 if (!(pending_irqs_no_gisa(vcpu) & IRQ_PEND_IO_MASK))
341 return; 347 return;
342 else if (psw_ioint_disabled(vcpu)) 348 else if (psw_ioint_disabled(vcpu))
343 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IO_INT); 349 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IO_INT);
@@ -1011,24 +1017,6 @@ out:
1011 return rc; 1017 return rc;
1012} 1018}
1013 1019
1014typedef int (*deliver_irq_t)(struct kvm_vcpu *vcpu);
1015
1016static const deliver_irq_t deliver_irq_funcs[] = {
1017 [IRQ_PEND_MCHK_EX] = __deliver_machine_check,
1018 [IRQ_PEND_MCHK_REP] = __deliver_machine_check,
1019 [IRQ_PEND_PROG] = __deliver_prog,
1020 [IRQ_PEND_EXT_EMERGENCY] = __deliver_emergency_signal,
1021 [IRQ_PEND_EXT_EXTERNAL] = __deliver_external_call,
1022 [IRQ_PEND_EXT_CLOCK_COMP] = __deliver_ckc,
1023 [IRQ_PEND_EXT_CPU_TIMER] = __deliver_cpu_timer,
1024 [IRQ_PEND_RESTART] = __deliver_restart,
1025 [IRQ_PEND_SET_PREFIX] = __deliver_set_prefix,
1026 [IRQ_PEND_PFAULT_INIT] = __deliver_pfault_init,
1027 [IRQ_PEND_EXT_SERVICE] = __deliver_service,
1028 [IRQ_PEND_PFAULT_DONE] = __deliver_pfault_done,
1029 [IRQ_PEND_VIRTIO] = __deliver_virtio,
1030};
1031
1032/* Check whether an external call is pending (deliverable or not) */ 1020/* Check whether an external call is pending (deliverable or not) */
1033int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu) 1021int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu)
1034{ 1022{
@@ -1066,13 +1054,19 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
1066 1054
1067static u64 __calculate_sltime(struct kvm_vcpu *vcpu) 1055static u64 __calculate_sltime(struct kvm_vcpu *vcpu)
1068{ 1056{
1069 u64 now, cputm, sltime = 0; 1057 const u64 now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
1058 const u64 ckc = vcpu->arch.sie_block->ckc;
1059 u64 cputm, sltime = 0;
1070 1060
1071 if (ckc_interrupts_enabled(vcpu)) { 1061 if (ckc_interrupts_enabled(vcpu)) {
1072 now = kvm_s390_get_tod_clock_fast(vcpu->kvm); 1062 if (vcpu->arch.sie_block->gcr[0] & 0x0020000000000000ul) {
1073 sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); 1063 if ((s64)now < (s64)ckc)
1074 /* already expired or overflow? */ 1064 sltime = tod_to_ns((s64)ckc - (s64)now);
1075 if (!sltime || vcpu->arch.sie_block->ckc <= now) 1065 } else if (now < ckc) {
1066 sltime = tod_to_ns(ckc - now);
1067 }
1068 /* already expired */
1069 if (!sltime)
1076 return 0; 1070 return 0;
1077 if (cpu_timer_interrupts_enabled(vcpu)) { 1071 if (cpu_timer_interrupts_enabled(vcpu)) {
1078 cputm = kvm_s390_get_cpu_timer(vcpu); 1072 cputm = kvm_s390_get_cpu_timer(vcpu);
@@ -1192,7 +1186,6 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
1192int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) 1186int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
1193{ 1187{
1194 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; 1188 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
1195 deliver_irq_t func;
1196 int rc = 0; 1189 int rc = 0;
1197 unsigned long irq_type; 1190 unsigned long irq_type;
1198 unsigned long irqs; 1191 unsigned long irqs;
@@ -1212,16 +1205,57 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
1212 while ((irqs = deliverable_irqs(vcpu)) && !rc) { 1205 while ((irqs = deliverable_irqs(vcpu)) && !rc) {
1213 /* bits are in the reverse order of interrupt priority */ 1206 /* bits are in the reverse order of interrupt priority */
1214 irq_type = find_last_bit(&irqs, IRQ_PEND_COUNT); 1207 irq_type = find_last_bit(&irqs, IRQ_PEND_COUNT);
1215 if (is_ioirq(irq_type)) { 1208 switch (irq_type) {
1209 case IRQ_PEND_IO_ISC_0:
1210 case IRQ_PEND_IO_ISC_1:
1211 case IRQ_PEND_IO_ISC_2:
1212 case IRQ_PEND_IO_ISC_3:
1213 case IRQ_PEND_IO_ISC_4:
1214 case IRQ_PEND_IO_ISC_5:
1215 case IRQ_PEND_IO_ISC_6:
1216 case IRQ_PEND_IO_ISC_7:
1216 rc = __deliver_io(vcpu, irq_type); 1217 rc = __deliver_io(vcpu, irq_type);
1217 } else { 1218 break;
1218 func = deliver_irq_funcs[irq_type]; 1219 case IRQ_PEND_MCHK_EX:
1219 if (!func) { 1220 case IRQ_PEND_MCHK_REP:
1220 WARN_ON_ONCE(func == NULL); 1221 rc = __deliver_machine_check(vcpu);
1221 clear_bit(irq_type, &li->pending_irqs); 1222 break;
1222 continue; 1223 case IRQ_PEND_PROG:
1223 } 1224 rc = __deliver_prog(vcpu);
1224 rc = func(vcpu); 1225 break;
1226 case IRQ_PEND_EXT_EMERGENCY:
1227 rc = __deliver_emergency_signal(vcpu);
1228 break;
1229 case IRQ_PEND_EXT_EXTERNAL:
1230 rc = __deliver_external_call(vcpu);
1231 break;
1232 case IRQ_PEND_EXT_CLOCK_COMP:
1233 rc = __deliver_ckc(vcpu);
1234 break;
1235 case IRQ_PEND_EXT_CPU_TIMER:
1236 rc = __deliver_cpu_timer(vcpu);
1237 break;
1238 case IRQ_PEND_RESTART:
1239 rc = __deliver_restart(vcpu);
1240 break;
1241 case IRQ_PEND_SET_PREFIX:
1242 rc = __deliver_set_prefix(vcpu);
1243 break;
1244 case IRQ_PEND_PFAULT_INIT:
1245 rc = __deliver_pfault_init(vcpu);
1246 break;
1247 case IRQ_PEND_EXT_SERVICE:
1248 rc = __deliver_service(vcpu);
1249 break;
1250 case IRQ_PEND_PFAULT_DONE:
1251 rc = __deliver_pfault_done(vcpu);
1252 break;
1253 case IRQ_PEND_VIRTIO:
1254 rc = __deliver_virtio(vcpu);
1255 break;
1256 default:
1257 WARN_ONCE(1, "Unknown pending irq type %ld", irq_type);
1258 clear_bit(irq_type, &li->pending_irqs);
1225 } 1259 }
1226 } 1260 }
1227 1261
@@ -1701,7 +1735,8 @@ static void __floating_irq_kick(struct kvm *kvm, u64 type)
1701 kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_STOP_INT); 1735 kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_STOP_INT);
1702 break; 1736 break;
1703 case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: 1737 case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
1704 kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT); 1738 if (!(type & KVM_S390_INT_IO_AI_MASK && kvm->arch.gisa))
1739 kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT);
1705 break; 1740 break;
1706 default: 1741 default:
1707 kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_EXT_INT); 1742 kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_EXT_INT);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index ba4c7092335a..77d7818130db 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -179,6 +179,28 @@ int kvm_arch_hardware_enable(void)
179static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 179static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
180 unsigned long end); 180 unsigned long end);
181 181
182static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
183{
184 u8 delta_idx = 0;
185
186 /*
187 * The TOD jumps by delta, we have to compensate this by adding
188 * -delta to the epoch.
189 */
190 delta = -delta;
191
192 /* sign-extension - we're adding to signed values below */
193 if ((s64)delta < 0)
194 delta_idx = -1;
195
196 scb->epoch += delta;
197 if (scb->ecd & ECD_MEF) {
198 scb->epdx += delta_idx;
199 if (scb->epoch < delta)
200 scb->epdx += 1;
201 }
202}
203
182/* 204/*
183 * This callback is executed during stop_machine(). All CPUs are therefore 205 * This callback is executed during stop_machine(). All CPUs are therefore
184 * temporarily stopped. In order not to change guest behavior, we have to 206 * temporarily stopped. In order not to change guest behavior, we have to
@@ -194,13 +216,17 @@ static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
194 unsigned long long *delta = v; 216 unsigned long long *delta = v;
195 217
196 list_for_each_entry(kvm, &vm_list, vm_list) { 218 list_for_each_entry(kvm, &vm_list, vm_list) {
197 kvm->arch.epoch -= *delta;
198 kvm_for_each_vcpu(i, vcpu, kvm) { 219 kvm_for_each_vcpu(i, vcpu, kvm) {
199 vcpu->arch.sie_block->epoch -= *delta; 220 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
221 if (i == 0) {
222 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
223 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
224 }
200 if (vcpu->arch.cputm_enabled) 225 if (vcpu->arch.cputm_enabled)
201 vcpu->arch.cputm_start += *delta; 226 vcpu->arch.cputm_start += *delta;
202 if (vcpu->arch.vsie_block) 227 if (vcpu->arch.vsie_block)
203 vcpu->arch.vsie_block->epoch -= *delta; 228 kvm_clock_sync_scb(vcpu->arch.vsie_block,
229 *delta);
204 } 230 }
205 } 231 }
206 return NOTIFY_OK; 232 return NOTIFY_OK;
@@ -902,12 +928,9 @@ static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
902 if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod))) 928 if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
903 return -EFAULT; 929 return -EFAULT;
904 930
905 if (test_kvm_facility(kvm, 139)) 931 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
906 kvm_s390_set_tod_clock_ext(kvm, &gtod);
907 else if (gtod.epoch_idx == 0)
908 kvm_s390_set_tod_clock(kvm, gtod.tod);
909 else
910 return -EINVAL; 932 return -EINVAL;
933 kvm_s390_set_tod_clock(kvm, &gtod);
911 934
912 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx", 935 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
913 gtod.epoch_idx, gtod.tod); 936 gtod.epoch_idx, gtod.tod);
@@ -932,13 +955,14 @@ static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
932 955
933static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 956static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
934{ 957{
935 u64 gtod; 958 struct kvm_s390_vm_tod_clock gtod = { 0 };
936 959
937 if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod))) 960 if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
961 sizeof(gtod.tod)))
938 return -EFAULT; 962 return -EFAULT;
939 963
940 kvm_s390_set_tod_clock(kvm, gtod); 964 kvm_s390_set_tod_clock(kvm, &gtod);
941 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod); 965 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
942 return 0; 966 return 0;
943} 967}
944 968
@@ -2389,6 +2413,7 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2389 mutex_lock(&vcpu->kvm->lock); 2413 mutex_lock(&vcpu->kvm->lock);
2390 preempt_disable(); 2414 preempt_disable();
2391 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; 2415 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2416 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2392 preempt_enable(); 2417 preempt_enable();
2393 mutex_unlock(&vcpu->kvm->lock); 2418 mutex_unlock(&vcpu->kvm->lock);
2394 if (!kvm_is_ucontrol(vcpu->kvm)) { 2419 if (!kvm_is_ucontrol(vcpu->kvm)) {
@@ -3021,8 +3046,8 @@ retry:
3021 return 0; 3046 return 0;
3022} 3047}
3023 3048
3024void kvm_s390_set_tod_clock_ext(struct kvm *kvm, 3049void kvm_s390_set_tod_clock(struct kvm *kvm,
3025 const struct kvm_s390_vm_tod_clock *gtod) 3050 const struct kvm_s390_vm_tod_clock *gtod)
3026{ 3051{
3027 struct kvm_vcpu *vcpu; 3052 struct kvm_vcpu *vcpu;
3028 struct kvm_s390_tod_clock_ext htod; 3053 struct kvm_s390_tod_clock_ext htod;
@@ -3034,10 +3059,12 @@ void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
3034 get_tod_clock_ext((char *)&htod); 3059 get_tod_clock_ext((char *)&htod);
3035 3060
3036 kvm->arch.epoch = gtod->tod - htod.tod; 3061 kvm->arch.epoch = gtod->tod - htod.tod;
3037 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx; 3062 kvm->arch.epdx = 0;
3038 3063 if (test_kvm_facility(kvm, 139)) {
3039 if (kvm->arch.epoch > gtod->tod) 3064 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3040 kvm->arch.epdx -= 1; 3065 if (kvm->arch.epoch > gtod->tod)
3066 kvm->arch.epdx -= 1;
3067 }
3041 3068
3042 kvm_s390_vcpu_block_all(kvm); 3069 kvm_s390_vcpu_block_all(kvm);
3043 kvm_for_each_vcpu(i, vcpu, kvm) { 3070 kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -3050,22 +3077,6 @@ void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
3050 mutex_unlock(&kvm->lock); 3077 mutex_unlock(&kvm->lock);
3051} 3078}
3052 3079
3053void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
3054{
3055 struct kvm_vcpu *vcpu;
3056 int i;
3057
3058 mutex_lock(&kvm->lock);
3059 preempt_disable();
3060 kvm->arch.epoch = tod - get_tod_clock();
3061 kvm_s390_vcpu_block_all(kvm);
3062 kvm_for_each_vcpu(i, vcpu, kvm)
3063 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3064 kvm_s390_vcpu_unblock_all(kvm);
3065 preempt_enable();
3066 mutex_unlock(&kvm->lock);
3067}
3068
3069/** 3080/**
3070 * kvm_arch_fault_in_page - fault-in guest page if necessary 3081 * kvm_arch_fault_in_page - fault-in guest page if necessary
3071 * @vcpu: The corresponding virtual cpu 3082 * @vcpu: The corresponding virtual cpu
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index bd31b37b0e6f..f55ac0ef99ea 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -19,8 +19,6 @@
19#include <asm/processor.h> 19#include <asm/processor.h>
20#include <asm/sclp.h> 20#include <asm/sclp.h>
21 21
22typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
23
24/* Transactional Memory Execution related macros */ 22/* Transactional Memory Execution related macros */
25#define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & ECB_TE)) 23#define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & ECB_TE))
26#define TDB_FORMAT1 1 24#define TDB_FORMAT1 1
@@ -283,9 +281,8 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
283int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu); 281int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu);
284 282
285/* implemented in kvm-s390.c */ 283/* implemented in kvm-s390.c */
286void kvm_s390_set_tod_clock_ext(struct kvm *kvm, 284void kvm_s390_set_tod_clock(struct kvm *kvm,
287 const struct kvm_s390_vm_tod_clock *gtod); 285 const struct kvm_s390_vm_tod_clock *gtod);
288void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod);
289long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable); 286long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
290int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); 287int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
291int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr); 288int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index c4c4e157c036..f0b4185158af 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -85,9 +85,10 @@ int kvm_s390_handle_e3(struct kvm_vcpu *vcpu)
85/* Handle SCK (SET CLOCK) interception */ 85/* Handle SCK (SET CLOCK) interception */
86static int handle_set_clock(struct kvm_vcpu *vcpu) 86static int handle_set_clock(struct kvm_vcpu *vcpu)
87{ 87{
88 struct kvm_s390_vm_tod_clock gtod = { 0 };
88 int rc; 89 int rc;
89 u8 ar; 90 u8 ar;
90 u64 op2, val; 91 u64 op2;
91 92
92 vcpu->stat.instruction_sck++; 93 vcpu->stat.instruction_sck++;
93 94
@@ -97,12 +98,12 @@ static int handle_set_clock(struct kvm_vcpu *vcpu)
97 op2 = kvm_s390_get_base_disp_s(vcpu, &ar); 98 op2 = kvm_s390_get_base_disp_s(vcpu, &ar);
98 if (op2 & 7) /* Operand must be on a doubleword boundary */ 99 if (op2 & 7) /* Operand must be on a doubleword boundary */
99 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 100 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
100 rc = read_guest(vcpu, op2, ar, &val, sizeof(val)); 101 rc = read_guest(vcpu, op2, ar, &gtod.tod, sizeof(gtod.tod));
101 if (rc) 102 if (rc)
102 return kvm_s390_inject_prog_cond(vcpu, rc); 103 return kvm_s390_inject_prog_cond(vcpu, rc);
103 104
104 VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", val); 105 VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", gtod.tod);
105 kvm_s390_set_tod_clock(vcpu->kvm, val); 106 kvm_s390_set_tod_clock(vcpu->kvm, &gtod);
106 107
107 kvm_s390_set_psw_cc(vcpu, 0); 108 kvm_s390_set_psw_cc(vcpu, 0);
108 return 0; 109 return 0;
@@ -795,55 +796,60 @@ out:
795 return rc; 796 return rc;
796} 797}
797 798
798static const intercept_handler_t b2_handlers[256] = {
799 [0x02] = handle_stidp,
800 [0x04] = handle_set_clock,
801 [0x10] = handle_set_prefix,
802 [0x11] = handle_store_prefix,
803 [0x12] = handle_store_cpu_address,
804 [0x14] = kvm_s390_handle_vsie,
805 [0x21] = handle_ipte_interlock,
806 [0x29] = handle_iske,
807 [0x2a] = handle_rrbe,
808 [0x2b] = handle_sske,
809 [0x2c] = handle_test_block,
810 [0x30] = handle_io_inst,
811 [0x31] = handle_io_inst,
812 [0x32] = handle_io_inst,
813 [0x33] = handle_io_inst,
814 [0x34] = handle_io_inst,
815 [0x35] = handle_io_inst,
816 [0x36] = handle_io_inst,
817 [0x37] = handle_io_inst,
818 [0x38] = handle_io_inst,
819 [0x39] = handle_io_inst,
820 [0x3a] = handle_io_inst,
821 [0x3b] = handle_io_inst,
822 [0x3c] = handle_io_inst,
823 [0x50] = handle_ipte_interlock,
824 [0x56] = handle_sthyi,
825 [0x5f] = handle_io_inst,
826 [0x74] = handle_io_inst,
827 [0x76] = handle_io_inst,
828 [0x7d] = handle_stsi,
829 [0xb1] = handle_stfl,
830 [0xb2] = handle_lpswe,
831};
832
833int kvm_s390_handle_b2(struct kvm_vcpu *vcpu) 799int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
834{ 800{
835 intercept_handler_t handler; 801 switch (vcpu->arch.sie_block->ipa & 0x00ff) {
836 802 case 0x02:
837 /* 803 return handle_stidp(vcpu);
838 * A lot of B2 instructions are priviledged. Here we check for 804 case 0x04:
839 * the privileged ones, that we can handle in the kernel. 805 return handle_set_clock(vcpu);
840 * Anything else goes to userspace. 806 case 0x10:
841 */ 807 return handle_set_prefix(vcpu);
842 handler = b2_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; 808 case 0x11:
843 if (handler) 809 return handle_store_prefix(vcpu);
844 return handler(vcpu); 810 case 0x12:
845 811 return handle_store_cpu_address(vcpu);
846 return -EOPNOTSUPP; 812 case 0x14:
813 return kvm_s390_handle_vsie(vcpu);
814 case 0x21:
815 case 0x50:
816 return handle_ipte_interlock(vcpu);
817 case 0x29:
818 return handle_iske(vcpu);
819 case 0x2a:
820 return handle_rrbe(vcpu);
821 case 0x2b:
822 return handle_sske(vcpu);
823 case 0x2c:
824 return handle_test_block(vcpu);
825 case 0x30:
826 case 0x31:
827 case 0x32:
828 case 0x33:
829 case 0x34:
830 case 0x35:
831 case 0x36:
832 case 0x37:
833 case 0x38:
834 case 0x39:
835 case 0x3a:
836 case 0x3b:
837 case 0x3c:
838 case 0x5f:
839 case 0x74:
840 case 0x76:
841 return handle_io_inst(vcpu);
842 case 0x56:
843 return handle_sthyi(vcpu);
844 case 0x7d:
845 return handle_stsi(vcpu);
846 case 0xb1:
847 return handle_stfl(vcpu);
848 case 0xb2:
849 return handle_lpswe(vcpu);
850 default:
851 return -EOPNOTSUPP;
852 }
847} 853}
848 854
849static int handle_epsw(struct kvm_vcpu *vcpu) 855static int handle_epsw(struct kvm_vcpu *vcpu)
@@ -1105,25 +1111,22 @@ static int handle_essa(struct kvm_vcpu *vcpu)
1105 return 0; 1111 return 0;
1106} 1112}
1107 1113
1108static const intercept_handler_t b9_handlers[256] = {
1109 [0x8a] = handle_ipte_interlock,
1110 [0x8d] = handle_epsw,
1111 [0x8e] = handle_ipte_interlock,
1112 [0x8f] = handle_ipte_interlock,
1113 [0xab] = handle_essa,
1114 [0xaf] = handle_pfmf,
1115};
1116
1117int kvm_s390_handle_b9(struct kvm_vcpu *vcpu) 1114int kvm_s390_handle_b9(struct kvm_vcpu *vcpu)
1118{ 1115{
1119 intercept_handler_t handler; 1116 switch (vcpu->arch.sie_block->ipa & 0x00ff) {
1120 1117 case 0x8a:
1121 /* This is handled just as for the B2 instructions. */ 1118 case 0x8e:
1122 handler = b9_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; 1119 case 0x8f:
1123 if (handler) 1120 return handle_ipte_interlock(vcpu);
1124 return handler(vcpu); 1121 case 0x8d:
1125 1122 return handle_epsw(vcpu);
1126 return -EOPNOTSUPP; 1123 case 0xab:
1124 return handle_essa(vcpu);
1125 case 0xaf:
1126 return handle_pfmf(vcpu);
1127 default:
1128 return -EOPNOTSUPP;
1129 }
1127} 1130}
1128 1131
1129int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu) 1132int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
@@ -1271,22 +1274,20 @@ static int handle_stctg(struct kvm_vcpu *vcpu)
1271 return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0; 1274 return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0;
1272} 1275}
1273 1276
1274static const intercept_handler_t eb_handlers[256] = {
1275 [0x2f] = handle_lctlg,
1276 [0x25] = handle_stctg,
1277 [0x60] = handle_ri,
1278 [0x61] = handle_ri,
1279 [0x62] = handle_ri,
1280};
1281
1282int kvm_s390_handle_eb(struct kvm_vcpu *vcpu) 1277int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
1283{ 1278{
1284 intercept_handler_t handler; 1279 switch (vcpu->arch.sie_block->ipb & 0x000000ff) {
1285 1280 case 0x25:
1286 handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff]; 1281 return handle_stctg(vcpu);
1287 if (handler) 1282 case 0x2f:
1288 return handler(vcpu); 1283 return handle_lctlg(vcpu);
1289 return -EOPNOTSUPP; 1284 case 0x60:
1285 case 0x61:
1286 case 0x62:
1287 return handle_ri(vcpu);
1288 default:
1289 return -EOPNOTSUPP;
1290 }
1290} 1291}
1291 1292
1292static int handle_tprot(struct kvm_vcpu *vcpu) 1293static int handle_tprot(struct kvm_vcpu *vcpu)
@@ -1346,10 +1347,12 @@ out_unlock:
1346 1347
1347int kvm_s390_handle_e5(struct kvm_vcpu *vcpu) 1348int kvm_s390_handle_e5(struct kvm_vcpu *vcpu)
1348{ 1349{
1349 /* For e5xx... instructions we only handle TPROT */ 1350 switch (vcpu->arch.sie_block->ipa & 0x00ff) {
1350 if ((vcpu->arch.sie_block->ipa & 0x00ff) == 0x01) 1351 case 0x01:
1351 return handle_tprot(vcpu); 1352 return handle_tprot(vcpu);
1352 return -EOPNOTSUPP; 1353 default:
1354 return -EOPNOTSUPP;
1355 }
1353} 1356}
1354 1357
1355static int handle_sckpf(struct kvm_vcpu *vcpu) 1358static int handle_sckpf(struct kvm_vcpu *vcpu)
@@ -1380,17 +1383,14 @@ static int handle_ptff(struct kvm_vcpu *vcpu)
1380 return 0; 1383 return 0;
1381} 1384}
1382 1385
1383static const intercept_handler_t x01_handlers[256] = {
1384 [0x04] = handle_ptff,
1385 [0x07] = handle_sckpf,
1386};
1387
1388int kvm_s390_handle_01(struct kvm_vcpu *vcpu) 1386int kvm_s390_handle_01(struct kvm_vcpu *vcpu)
1389{ 1387{
1390 intercept_handler_t handler; 1388 switch (vcpu->arch.sie_block->ipa & 0x00ff) {
1391 1389 case 0x04:
1392 handler = x01_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; 1390 return handle_ptff(vcpu);
1393 if (handler) 1391 case 0x07:
1394 return handler(vcpu); 1392 return handle_sckpf(vcpu);
1395 return -EOPNOTSUPP; 1393 default:
1394 return -EOPNOTSUPP;
1395 }
1396} 1396}
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index ec772700ff96..8961e3970901 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -821,6 +821,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
821{ 821{
822 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; 822 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
823 struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; 823 struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
824 int guest_bp_isolation;
824 int rc; 825 int rc;
825 826
826 handle_last_fault(vcpu, vsie_page); 827 handle_last_fault(vcpu, vsie_page);
@@ -831,6 +832,20 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
831 s390_handle_mcck(); 832 s390_handle_mcck();
832 833
833 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 834 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
835
836 /* save current guest state of bp isolation override */
837 guest_bp_isolation = test_thread_flag(TIF_ISOLATE_BP_GUEST);
838
839 /*
840 * The guest is running with BPBC, so we have to force it on for our
841 * nested guest. This is done by enabling BPBC globally, so the BPBC
842 * control in the SCB (which the nested guest can modify) is simply
843 * ignored.
844 */
845 if (test_kvm_facility(vcpu->kvm, 82) &&
846 vcpu->arch.sie_block->fpf & FPF_BPBC)
847 set_thread_flag(TIF_ISOLATE_BP_GUEST);
848
834 local_irq_disable(); 849 local_irq_disable();
835 guest_enter_irqoff(); 850 guest_enter_irqoff();
836 local_irq_enable(); 851 local_irq_enable();
@@ -840,6 +855,11 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
840 local_irq_disable(); 855 local_irq_disable();
841 guest_exit_irqoff(); 856 guest_exit_irqoff();
842 local_irq_enable(); 857 local_irq_enable();
858
859 /* restore guest state for bp isolation override */
860 if (!guest_bp_isolation)
861 clear_thread_flag(TIF_ISOLATE_BP_GUEST);
862
843 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 863 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
844 864
845 if (rc == -EINTR) { 865 if (rc == -EINTR) {
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index dd6f57a54a26..0a9e330b34f0 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1464,7 +1464,4 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
1464#define put_smstate(type, buf, offset, val) \ 1464#define put_smstate(type, buf, offset, val) \
1465 *(type *)((buf) + (offset) - 0x7e00) = val 1465 *(type *)((buf) + (offset) - 0x7e00) = val
1466 1466
1467void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
1468 unsigned long start, unsigned long end);
1469
1470#endif /* _ASM_X86_KVM_HOST_H */ 1467#endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 7a2ade4aa235..6cfa9c8cb7d6 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -26,6 +26,7 @@
26#define KVM_FEATURE_PV_EOI 6 26#define KVM_FEATURE_PV_EOI 6
27#define KVM_FEATURE_PV_UNHALT 7 27#define KVM_FEATURE_PV_UNHALT 7
28#define KVM_FEATURE_PV_TLB_FLUSH 9 28#define KVM_FEATURE_PV_TLB_FLUSH 9
29#define KVM_FEATURE_ASYNC_PF_VMEXIT 10
29 30
30/* The last 8 bits are used to indicate how to interpret the flags field 31/* The last 8 bits are used to indicate how to interpret the flags field
31 * in pvclock structure. If no bits are set, all flags are ignored. 32 * in pvclock structure. If no bits are set, all flags are ignored.
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 4e37d1a851a6..bc1a27280c4b 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -49,7 +49,7 @@
49 49
50static int kvmapf = 1; 50static int kvmapf = 1;
51 51
52static int parse_no_kvmapf(char *arg) 52static int __init parse_no_kvmapf(char *arg)
53{ 53{
54 kvmapf = 0; 54 kvmapf = 0;
55 return 0; 55 return 0;
@@ -58,7 +58,7 @@ static int parse_no_kvmapf(char *arg)
58early_param("no-kvmapf", parse_no_kvmapf); 58early_param("no-kvmapf", parse_no_kvmapf);
59 59
60static int steal_acc = 1; 60static int steal_acc = 1;
61static int parse_no_stealacc(char *arg) 61static int __init parse_no_stealacc(char *arg)
62{ 62{
63 steal_acc = 0; 63 steal_acc = 0;
64 return 0; 64 return 0;
@@ -67,7 +67,7 @@ static int parse_no_stealacc(char *arg)
67early_param("no-steal-acc", parse_no_stealacc); 67early_param("no-steal-acc", parse_no_stealacc);
68 68
69static int kvmclock_vsyscall = 1; 69static int kvmclock_vsyscall = 1;
70static int parse_no_kvmclock_vsyscall(char *arg) 70static int __init parse_no_kvmclock_vsyscall(char *arg)
71{ 71{
72 kvmclock_vsyscall = 0; 72 kvmclock_vsyscall = 0;
73 return 0; 73 return 0;
@@ -341,10 +341,10 @@ static void kvm_guest_cpu_init(void)
341#endif 341#endif
342 pa |= KVM_ASYNC_PF_ENABLED; 342 pa |= KVM_ASYNC_PF_ENABLED;
343 343
344 /* Async page fault support for L1 hypervisor is optional */ 344 if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT))
345 if (wrmsr_safe(MSR_KVM_ASYNC_PF_EN, 345 pa |= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
346 (pa | KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT) & 0xffffffff, pa >> 32) < 0) 346
347 wrmsrl(MSR_KVM_ASYNC_PF_EN, pa); 347 wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
348 __this_cpu_write(apf_reason.enabled, 1); 348 __this_cpu_write(apf_reason.enabled, 1);
349 printk(KERN_INFO"KVM setup async PF for cpu %d\n", 349 printk(KERN_INFO"KVM setup async PF for cpu %d\n",
350 smp_processor_id()); 350 smp_processor_id());
@@ -545,7 +545,8 @@ static void __init kvm_guest_init(void)
545 pv_time_ops.steal_clock = kvm_steal_clock; 545 pv_time_ops.steal_clock = kvm_steal_clock;
546 } 546 }
547 547
548 if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH)) 548 if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
549 !kvm_para_has_feature(KVM_FEATURE_STEAL_TIME))
549 pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others; 550 pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others;
550 551
551 if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) 552 if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
@@ -633,7 +634,8 @@ static __init int kvm_setup_pv_tlb_flush(void)
633{ 634{
634 int cpu; 635 int cpu;
635 636
636 if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH)) { 637 if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
638 !kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
637 for_each_possible_cpu(cpu) { 639 for_each_possible_cpu(cpu) {
638 zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu), 640 zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu),
639 GFP_KERNEL, cpu_to_node(cpu)); 641 GFP_KERNEL, cpu_to_node(cpu));
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index a0c5a69bc7c4..b671fc2d0422 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -607,7 +607,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
607 (1 << KVM_FEATURE_PV_EOI) | 607 (1 << KVM_FEATURE_PV_EOI) |
608 (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) | 608 (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
609 (1 << KVM_FEATURE_PV_UNHALT) | 609 (1 << KVM_FEATURE_PV_UNHALT) |
610 (1 << KVM_FEATURE_PV_TLB_FLUSH); 610 (1 << KVM_FEATURE_PV_TLB_FLUSH) |
611 (1 << KVM_FEATURE_ASYNC_PF_VMEXIT);
611 612
612 if (sched_info_on()) 613 if (sched_info_on())
613 entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); 614 entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 924ac8ce9d50..cc5fe7a50dde 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2165,7 +2165,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
2165 */ 2165 */
2166 vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE; 2166 vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
2167 static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */ 2167 static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
2168 kvm_lapic_reset(vcpu, false);
2169 kvm_iodevice_init(&apic->dev, &apic_mmio_ops); 2168 kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
2170 2169
2171 return 0; 2170 return 0;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 46ff304140c7..f551962ac294 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3029,7 +3029,7 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn)
3029 return RET_PF_RETRY; 3029 return RET_PF_RETRY;
3030 } 3030 }
3031 3031
3032 return -EFAULT; 3032 return RET_PF_EMULATE;
3033} 3033}
3034 3034
3035static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, 3035static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index b3e488a74828..3d8377f75eda 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -300,6 +300,8 @@ module_param(vgif, int, 0444);
300static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT); 300static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
301module_param(sev, int, 0444); 301module_param(sev, int, 0444);
302 302
303static u8 rsm_ins_bytes[] = "\x0f\xaa";
304
303static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); 305static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
304static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa); 306static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa);
305static void svm_complete_interrupts(struct vcpu_svm *svm); 307static void svm_complete_interrupts(struct vcpu_svm *svm);
@@ -1383,6 +1385,7 @@ static void init_vmcb(struct vcpu_svm *svm)
1383 set_intercept(svm, INTERCEPT_SKINIT); 1385 set_intercept(svm, INTERCEPT_SKINIT);
1384 set_intercept(svm, INTERCEPT_WBINVD); 1386 set_intercept(svm, INTERCEPT_WBINVD);
1385 set_intercept(svm, INTERCEPT_XSETBV); 1387 set_intercept(svm, INTERCEPT_XSETBV);
1388 set_intercept(svm, INTERCEPT_RSM);
1386 1389
1387 if (!kvm_mwait_in_guest()) { 1390 if (!kvm_mwait_in_guest()) {
1388 set_intercept(svm, INTERCEPT_MONITOR); 1391 set_intercept(svm, INTERCEPT_MONITOR);
@@ -3699,6 +3702,12 @@ static int emulate_on_interception(struct vcpu_svm *svm)
3699 return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; 3702 return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
3700} 3703}
3701 3704
3705static int rsm_interception(struct vcpu_svm *svm)
3706{
3707 return x86_emulate_instruction(&svm->vcpu, 0, 0,
3708 rsm_ins_bytes, 2) == EMULATE_DONE;
3709}
3710
3702static int rdpmc_interception(struct vcpu_svm *svm) 3711static int rdpmc_interception(struct vcpu_svm *svm)
3703{ 3712{
3704 int err; 3713 int err;
@@ -4541,7 +4550,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
4541 [SVM_EXIT_MWAIT] = mwait_interception, 4550 [SVM_EXIT_MWAIT] = mwait_interception,
4542 [SVM_EXIT_XSETBV] = xsetbv_interception, 4551 [SVM_EXIT_XSETBV] = xsetbv_interception,
4543 [SVM_EXIT_NPF] = npf_interception, 4552 [SVM_EXIT_NPF] = npf_interception,
4544 [SVM_EXIT_RSM] = emulate_on_interception, 4553 [SVM_EXIT_RSM] = rsm_interception,
4545 [SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception, 4554 [SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception,
4546 [SVM_EXIT_AVIC_UNACCELERATED_ACCESS] = avic_unaccelerated_access_interception, 4555 [SVM_EXIT_AVIC_UNACCELERATED_ACCESS] = avic_unaccelerated_access_interception,
4547}; 4556};
@@ -6236,16 +6245,18 @@ e_free:
6236 6245
6237static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp) 6246static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
6238{ 6247{
6248 void __user *measure = (void __user *)(uintptr_t)argp->data;
6239 struct kvm_sev_info *sev = &kvm->arch.sev_info; 6249 struct kvm_sev_info *sev = &kvm->arch.sev_info;
6240 struct sev_data_launch_measure *data; 6250 struct sev_data_launch_measure *data;
6241 struct kvm_sev_launch_measure params; 6251 struct kvm_sev_launch_measure params;
6252 void __user *p = NULL;
6242 void *blob = NULL; 6253 void *blob = NULL;
6243 int ret; 6254 int ret;
6244 6255
6245 if (!sev_guest(kvm)) 6256 if (!sev_guest(kvm))
6246 return -ENOTTY; 6257 return -ENOTTY;
6247 6258
6248 if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params))) 6259 if (copy_from_user(&params, measure, sizeof(params)))
6249 return -EFAULT; 6260 return -EFAULT;
6250 6261
6251 data = kzalloc(sizeof(*data), GFP_KERNEL); 6262 data = kzalloc(sizeof(*data), GFP_KERNEL);
@@ -6256,17 +6267,13 @@ static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
6256 if (!params.len) 6267 if (!params.len)
6257 goto cmd; 6268 goto cmd;
6258 6269
6259 if (params.uaddr) { 6270 p = (void __user *)(uintptr_t)params.uaddr;
6271 if (p) {
6260 if (params.len > SEV_FW_BLOB_MAX_SIZE) { 6272 if (params.len > SEV_FW_BLOB_MAX_SIZE) {
6261 ret = -EINVAL; 6273 ret = -EINVAL;
6262 goto e_free; 6274 goto e_free;
6263 } 6275 }
6264 6276
6265 if (!access_ok(VERIFY_WRITE, params.uaddr, params.len)) {
6266 ret = -EFAULT;
6267 goto e_free;
6268 }
6269
6270 ret = -ENOMEM; 6277 ret = -ENOMEM;
6271 blob = kmalloc(params.len, GFP_KERNEL); 6278 blob = kmalloc(params.len, GFP_KERNEL);
6272 if (!blob) 6279 if (!blob)
@@ -6290,13 +6297,13 @@ cmd:
6290 goto e_free_blob; 6297 goto e_free_blob;
6291 6298
6292 if (blob) { 6299 if (blob) {
6293 if (copy_to_user((void __user *)(uintptr_t)params.uaddr, blob, params.len)) 6300 if (copy_to_user(p, blob, params.len))
6294 ret = -EFAULT; 6301 ret = -EFAULT;
6295 } 6302 }
6296 6303
6297done: 6304done:
6298 params.len = data->len; 6305 params.len = data->len;
6299 if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params))) 6306 if (copy_to_user(measure, &params, sizeof(params)))
6300 ret = -EFAULT; 6307 ret = -EFAULT;
6301e_free_blob: 6308e_free_blob:
6302 kfree(blob); 6309 kfree(blob);
@@ -6597,7 +6604,7 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
6597 struct page **pages; 6604 struct page **pages;
6598 void *blob, *hdr; 6605 void *blob, *hdr;
6599 unsigned long n; 6606 unsigned long n;
6600 int ret; 6607 int ret, offset;
6601 6608
6602 if (!sev_guest(kvm)) 6609 if (!sev_guest(kvm))
6603 return -ENOTTY; 6610 return -ENOTTY;
@@ -6623,6 +6630,10 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
6623 if (!data) 6630 if (!data)
6624 goto e_unpin_memory; 6631 goto e_unpin_memory;
6625 6632
6633 offset = params.guest_uaddr & (PAGE_SIZE - 1);
6634 data->guest_address = __sme_page_pa(pages[0]) + offset;
6635 data->guest_len = params.guest_len;
6636
6626 blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len); 6637 blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len);
6627 if (IS_ERR(blob)) { 6638 if (IS_ERR(blob)) {
6628 ret = PTR_ERR(blob); 6639 ret = PTR_ERR(blob);
@@ -6637,8 +6648,8 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
6637 ret = PTR_ERR(hdr); 6648 ret = PTR_ERR(hdr);
6638 goto e_free_blob; 6649 goto e_free_blob;
6639 } 6650 }
6640 data->trans_address = __psp_pa(blob); 6651 data->hdr_address = __psp_pa(hdr);
6641 data->trans_len = params.trans_len; 6652 data->hdr_len = params.hdr_len;
6642 6653
6643 data->handle = sev->handle; 6654 data->handle = sev->handle;
6644 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error); 6655 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3dec126aa302..ec14f2319a87 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4485,7 +4485,8 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
4485 vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, 4485 vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
4486 SECONDARY_EXEC_DESC); 4486 SECONDARY_EXEC_DESC);
4487 hw_cr4 &= ~X86_CR4_UMIP; 4487 hw_cr4 &= ~X86_CR4_UMIP;
4488 } else 4488 } else if (!is_guest_mode(vcpu) ||
4489 !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC))
4489 vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, 4490 vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
4490 SECONDARY_EXEC_DESC); 4491 SECONDARY_EXEC_DESC);
4491 4492
@@ -11199,7 +11200,12 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
11199 if (ret) 11200 if (ret)
11200 return ret; 11201 return ret;
11201 11202
11202 if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) 11203 /*
11204 * If we're entering a halted L2 vcpu and the L2 vcpu won't be woken
11205 * by event injection, halt vcpu.
11206 */
11207 if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) &&
11208 !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK))
11203 return kvm_vcpu_halt(vcpu); 11209 return kvm_vcpu_halt(vcpu);
11204 11210
11205 vmx->nested.nested_run_pending = 1; 11211 vmx->nested.nested_run_pending = 1;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c8a0b545ac20..96edda878dbf 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7975,6 +7975,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
7975 kvm_vcpu_mtrr_init(vcpu); 7975 kvm_vcpu_mtrr_init(vcpu);
7976 vcpu_load(vcpu); 7976 vcpu_load(vcpu);
7977 kvm_vcpu_reset(vcpu, false); 7977 kvm_vcpu_reset(vcpu, false);
7978 kvm_lapic_reset(vcpu, false);
7978 kvm_mmu_setup(vcpu); 7979 kvm_mmu_setup(vcpu);
7979 vcpu_put(vcpu); 7980 vcpu_put(vcpu);
7980 return 0; 7981 return 0;
@@ -8460,10 +8461,8 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
8460 return r; 8461 return r;
8461 } 8462 }
8462 8463
8463 if (!size) { 8464 if (!size)
8464 r = vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE); 8465 vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
8465 WARN_ON(r < 0);
8466 }
8467 8466
8468 return 0; 8467 return 0;
8469} 8468}
diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c
index fcfa5b1eae61..b3afb6cc9d72 100644
--- a/drivers/crypto/ccp/psp-dev.c
+++ b/drivers/crypto/ccp/psp-dev.c
@@ -211,7 +211,7 @@ static int __sev_platform_shutdown_locked(int *error)
211{ 211{
212 int ret; 212 int ret;
213 213
214 ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, 0, error); 214 ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, NULL, error);
215 if (ret) 215 if (ret)
216 return ret; 216 return ret;
217 217
@@ -271,7 +271,7 @@ static int sev_ioctl_do_reset(struct sev_issue_cmd *argp)
271 return rc; 271 return rc;
272 } 272 }
273 273
274 return __sev_do_cmd_locked(SEV_CMD_FACTORY_RESET, 0, &argp->error); 274 return __sev_do_cmd_locked(SEV_CMD_FACTORY_RESET, NULL, &argp->error);
275} 275}
276 276
277static int sev_ioctl_do_platform_status(struct sev_issue_cmd *argp) 277static int sev_ioctl_do_platform_status(struct sev_issue_cmd *argp)
@@ -299,7 +299,7 @@ static int sev_ioctl_do_pek_pdh_gen(int cmd, struct sev_issue_cmd *argp)
299 return rc; 299 return rc;
300 } 300 }
301 301
302 return __sev_do_cmd_locked(cmd, 0, &argp->error); 302 return __sev_do_cmd_locked(cmd, NULL, &argp->error);
303} 303}
304 304
305static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp) 305static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp)
@@ -624,7 +624,7 @@ EXPORT_SYMBOL_GPL(sev_guest_decommission);
624 624
625int sev_guest_df_flush(int *error) 625int sev_guest_df_flush(int *error)
626{ 626{
627 return sev_do_cmd(SEV_CMD_DF_FLUSH, 0, error); 627 return sev_do_cmd(SEV_CMD_DF_FLUSH, NULL, error);
628} 628}
629EXPORT_SYMBOL_GPL(sev_guest_df_flush); 629EXPORT_SYMBOL_GPL(sev_guest_df_flush);
630 630
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ac0062b74aed..6930c63126c7 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1105,7 +1105,6 @@ static inline void kvm_irq_routing_update(struct kvm *kvm)
1105{ 1105{
1106} 1106}
1107#endif 1107#endif
1108void kvm_arch_irq_routing_update(struct kvm *kvm);
1109 1108
1110static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) 1109static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
1111{ 1110{
@@ -1114,6 +1113,8 @@ static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
1114 1113
1115#endif /* CONFIG_HAVE_KVM_EVENTFD */ 1114#endif /* CONFIG_HAVE_KVM_EVENTFD */
1116 1115
1116void kvm_arch_irq_routing_update(struct kvm *kvm);
1117
1117static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) 1118static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
1118{ 1119{
1119 /* 1120 /*
@@ -1272,4 +1273,7 @@ static inline long kvm_arch_vcpu_async_ioctl(struct file *filp,
1272} 1273}
1273#endif /* CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL */ 1274#endif /* CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL */
1274 1275
1276void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
1277 unsigned long start, unsigned long end);
1278
1275#endif 1279#endif
diff --git a/include/uapi/linux/psp-sev.h b/include/uapi/linux/psp-sev.h
index 3d77fe91239a..9008f31c7eb6 100644
--- a/include/uapi/linux/psp-sev.h
+++ b/include/uapi/linux/psp-sev.h
@@ -42,7 +42,7 @@ typedef enum {
42 SEV_RET_INVALID_PLATFORM_STATE, 42 SEV_RET_INVALID_PLATFORM_STATE,
43 SEV_RET_INVALID_GUEST_STATE, 43 SEV_RET_INVALID_GUEST_STATE,
44 SEV_RET_INAVLID_CONFIG, 44 SEV_RET_INAVLID_CONFIG,
45 SEV_RET_INVALID_len, 45 SEV_RET_INVALID_LEN,
46 SEV_RET_ALREADY_OWNED, 46 SEV_RET_ALREADY_OWNED,
47 SEV_RET_INVALID_CERTIFICATE, 47 SEV_RET_INVALID_CERTIFICATE,
48 SEV_RET_POLICY_FAILURE, 48 SEV_RET_POLICY_FAILURE,
diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index a5684d0968b4..5898c22ba310 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -33,7 +33,7 @@ import resource
33import struct 33import struct
34import re 34import re
35import subprocess 35import subprocess
36from collections import defaultdict 36from collections import defaultdict, namedtuple
37 37
38VMX_EXIT_REASONS = { 38VMX_EXIT_REASONS = {
39 'EXCEPTION_NMI': 0, 39 'EXCEPTION_NMI': 0,
@@ -228,6 +228,7 @@ IOCTL_NUMBERS = {
228} 228}
229 229
230ENCODING = locale.getpreferredencoding(False) 230ENCODING = locale.getpreferredencoding(False)
231TRACE_FILTER = re.compile(r'^[^\(]*$')
231 232
232 233
233class Arch(object): 234class Arch(object):
@@ -260,6 +261,11 @@ class Arch(object):
260 return ArchX86(SVM_EXIT_REASONS) 261 return ArchX86(SVM_EXIT_REASONS)
261 return 262 return
262 263
264 def tracepoint_is_child(self, field):
265 if (TRACE_FILTER.match(field)):
266 return None
267 return field.split('(', 1)[0]
268
263 269
264class ArchX86(Arch): 270class ArchX86(Arch):
265 def __init__(self, exit_reasons): 271 def __init__(self, exit_reasons):
@@ -267,6 +273,10 @@ class ArchX86(Arch):
267 self.ioctl_numbers = IOCTL_NUMBERS 273 self.ioctl_numbers = IOCTL_NUMBERS
268 self.exit_reasons = exit_reasons 274 self.exit_reasons = exit_reasons
269 275
276 def debugfs_is_child(self, field):
277 """ Returns name of parent if 'field' is a child, None otherwise """
278 return None
279
270 280
271class ArchPPC(Arch): 281class ArchPPC(Arch):
272 def __init__(self): 282 def __init__(self):
@@ -282,6 +292,10 @@ class ArchPPC(Arch):
282 self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16 292 self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16
283 self.exit_reasons = {} 293 self.exit_reasons = {}
284 294
295 def debugfs_is_child(self, field):
296 """ Returns name of parent if 'field' is a child, None otherwise """
297 return None
298
285 299
286class ArchA64(Arch): 300class ArchA64(Arch):
287 def __init__(self): 301 def __init__(self):
@@ -289,6 +303,10 @@ class ArchA64(Arch):
289 self.ioctl_numbers = IOCTL_NUMBERS 303 self.ioctl_numbers = IOCTL_NUMBERS
290 self.exit_reasons = AARCH64_EXIT_REASONS 304 self.exit_reasons = AARCH64_EXIT_REASONS
291 305
306 def debugfs_is_child(self, field):
307 """ Returns name of parent if 'field' is a child, None otherwise """
308 return None
309
292 310
293class ArchS390(Arch): 311class ArchS390(Arch):
294 def __init__(self): 312 def __init__(self):
@@ -296,6 +314,12 @@ class ArchS390(Arch):
296 self.ioctl_numbers = IOCTL_NUMBERS 314 self.ioctl_numbers = IOCTL_NUMBERS
297 self.exit_reasons = None 315 self.exit_reasons = None
298 316
317 def debugfs_is_child(self, field):
318 """ Returns name of parent if 'field' is a child, None otherwise """
319 if field.startswith('instruction_'):
320 return 'exit_instruction'
321
322
299ARCH = Arch.get_arch() 323ARCH = Arch.get_arch()
300 324
301 325
@@ -331,9 +355,6 @@ class perf_event_attr(ctypes.Structure):
331PERF_TYPE_TRACEPOINT = 2 355PERF_TYPE_TRACEPOINT = 2
332PERF_FORMAT_GROUP = 1 << 3 356PERF_FORMAT_GROUP = 1 << 3
333 357
334PATH_DEBUGFS_TRACING = '/sys/kernel/debug/tracing'
335PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm'
336
337 358
338class Group(object): 359class Group(object):
339 """Represents a perf event group.""" 360 """Represents a perf event group."""
@@ -376,8 +397,8 @@ class Event(object):
376 self.syscall = self.libc.syscall 397 self.syscall = self.libc.syscall
377 self.name = name 398 self.name = name
378 self.fd = None 399 self.fd = None
379 self.setup_event(group, trace_cpu, trace_pid, trace_point, 400 self._setup_event(group, trace_cpu, trace_pid, trace_point,
380 trace_filter, trace_set) 401 trace_filter, trace_set)
381 402
382 def __del__(self): 403 def __del__(self):
383 """Closes the event's file descriptor. 404 """Closes the event's file descriptor.
@@ -390,7 +411,7 @@ class Event(object):
390 if self.fd: 411 if self.fd:
391 os.close(self.fd) 412 os.close(self.fd)
392 413
393 def perf_event_open(self, attr, pid, cpu, group_fd, flags): 414 def _perf_event_open(self, attr, pid, cpu, group_fd, flags):
394 """Wrapper for the sys_perf_evt_open() syscall. 415 """Wrapper for the sys_perf_evt_open() syscall.
395 416
396 Used to set up performance events, returns a file descriptor or -1 417 Used to set up performance events, returns a file descriptor or -1
@@ -409,7 +430,7 @@ class Event(object):
409 ctypes.c_int(pid), ctypes.c_int(cpu), 430 ctypes.c_int(pid), ctypes.c_int(cpu),
410 ctypes.c_int(group_fd), ctypes.c_long(flags)) 431 ctypes.c_int(group_fd), ctypes.c_long(flags))
411 432
412 def setup_event_attribute(self, trace_set, trace_point): 433 def _setup_event_attribute(self, trace_set, trace_point):
413 """Returns an initialized ctype perf_event_attr struct.""" 434 """Returns an initialized ctype perf_event_attr struct."""
414 435
415 id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set, 436 id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set,
@@ -419,8 +440,8 @@ class Event(object):
419 event_attr.config = int(open(id_path).read()) 440 event_attr.config = int(open(id_path).read())
420 return event_attr 441 return event_attr
421 442
422 def setup_event(self, group, trace_cpu, trace_pid, trace_point, 443 def _setup_event(self, group, trace_cpu, trace_pid, trace_point,
423 trace_filter, trace_set): 444 trace_filter, trace_set):
424 """Sets up the perf event in Linux. 445 """Sets up the perf event in Linux.
425 446
426 Issues the syscall to register the event in the kernel and 447 Issues the syscall to register the event in the kernel and
@@ -428,7 +449,7 @@ class Event(object):
428 449
429 """ 450 """
430 451
431 event_attr = self.setup_event_attribute(trace_set, trace_point) 452 event_attr = self._setup_event_attribute(trace_set, trace_point)
432 453
433 # First event will be group leader. 454 # First event will be group leader.
434 group_leader = -1 455 group_leader = -1
@@ -437,8 +458,8 @@ class Event(object):
437 if group.events: 458 if group.events:
438 group_leader = group.events[0].fd 459 group_leader = group.events[0].fd
439 460
440 fd = self.perf_event_open(event_attr, trace_pid, 461 fd = self._perf_event_open(event_attr, trace_pid,
441 trace_cpu, group_leader, 0) 462 trace_cpu, group_leader, 0)
442 if fd == -1: 463 if fd == -1:
443 err = ctypes.get_errno() 464 err = ctypes.get_errno()
444 raise OSError(err, os.strerror(err), 465 raise OSError(err, os.strerror(err),
@@ -475,6 +496,10 @@ class Event(object):
475 496
476class Provider(object): 497class Provider(object):
477 """Encapsulates functionalities used by all providers.""" 498 """Encapsulates functionalities used by all providers."""
499 def __init__(self, pid):
500 self.child_events = False
501 self.pid = pid
502
478 @staticmethod 503 @staticmethod
479 def is_field_wanted(fields_filter, field): 504 def is_field_wanted(fields_filter, field):
480 """Indicate whether field is valid according to fields_filter.""" 505 """Indicate whether field is valid according to fields_filter."""
@@ -500,12 +525,12 @@ class TracepointProvider(Provider):
500 """ 525 """
501 def __init__(self, pid, fields_filter): 526 def __init__(self, pid, fields_filter):
502 self.group_leaders = [] 527 self.group_leaders = []
503 self.filters = self.get_filters() 528 self.filters = self._get_filters()
504 self.update_fields(fields_filter) 529 self.update_fields(fields_filter)
505 self.pid = pid 530 super(TracepointProvider, self).__init__(pid)
506 531
507 @staticmethod 532 @staticmethod
508 def get_filters(): 533 def _get_filters():
509 """Returns a dict of trace events, their filter ids and 534 """Returns a dict of trace events, their filter ids and
510 the values that can be filtered. 535 the values that can be filtered.
511 536
@@ -521,8 +546,8 @@ class TracepointProvider(Provider):
521 filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons) 546 filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons)
522 return filters 547 return filters
523 548
524 def get_available_fields(self): 549 def _get_available_fields(self):
525 """Returns a list of available event's of format 'event name(filter 550 """Returns a list of available events of format 'event name(filter
526 name)'. 551 name)'.
527 552
528 All available events have directories under 553 All available events have directories under
@@ -549,11 +574,12 @@ class TracepointProvider(Provider):
549 574
550 def update_fields(self, fields_filter): 575 def update_fields(self, fields_filter):
551 """Refresh fields, applying fields_filter""" 576 """Refresh fields, applying fields_filter"""
552 self.fields = [field for field in self.get_available_fields() 577 self.fields = [field for field in self._get_available_fields()
553 if self.is_field_wanted(fields_filter, field)] 578 if self.is_field_wanted(fields_filter, field) or
579 ARCH.tracepoint_is_child(field)]
554 580
555 @staticmethod 581 @staticmethod
556 def get_online_cpus(): 582 def _get_online_cpus():
557 """Returns a list of cpu id integers.""" 583 """Returns a list of cpu id integers."""
558 def parse_int_list(list_string): 584 def parse_int_list(list_string):
559 """Returns an int list from a string of comma separated integers and 585 """Returns an int list from a string of comma separated integers and
@@ -575,17 +601,17 @@ class TracepointProvider(Provider):
575 cpu_string = cpu_list.readline() 601 cpu_string = cpu_list.readline()
576 return parse_int_list(cpu_string) 602 return parse_int_list(cpu_string)
577 603
578 def setup_traces(self): 604 def _setup_traces(self):
579 """Creates all event and group objects needed to be able to retrieve 605 """Creates all event and group objects needed to be able to retrieve
580 data.""" 606 data."""
581 fields = self.get_available_fields() 607 fields = self._get_available_fields()
582 if self._pid > 0: 608 if self._pid > 0:
583 # Fetch list of all threads of the monitored pid, as qemu 609 # Fetch list of all threads of the monitored pid, as qemu
584 # starts a thread for each vcpu. 610 # starts a thread for each vcpu.
585 path = os.path.join('/proc', str(self._pid), 'task') 611 path = os.path.join('/proc', str(self._pid), 'task')
586 groupids = self.walkdir(path)[1] 612 groupids = self.walkdir(path)[1]
587 else: 613 else:
588 groupids = self.get_online_cpus() 614 groupids = self._get_online_cpus()
589 615
590 # The constant is needed as a buffer for python libs, std 616 # The constant is needed as a buffer for python libs, std
591 # streams and other files that the script opens. 617 # streams and other files that the script opens.
@@ -663,7 +689,7 @@ class TracepointProvider(Provider):
663 # The garbage collector will get rid of all Event/Group 689 # The garbage collector will get rid of all Event/Group
664 # objects and open files after removing the references. 690 # objects and open files after removing the references.
665 self.group_leaders = [] 691 self.group_leaders = []
666 self.setup_traces() 692 self._setup_traces()
667 self.fields = self._fields 693 self.fields = self._fields
668 694
669 def read(self, by_guest=0): 695 def read(self, by_guest=0):
@@ -671,8 +697,12 @@ class TracepointProvider(Provider):
671 ret = defaultdict(int) 697 ret = defaultdict(int)
672 for group in self.group_leaders: 698 for group in self.group_leaders:
673 for name, val in group.read().items(): 699 for name, val in group.read().items():
674 if name in self._fields: 700 if name not in self._fields:
675 ret[name] += val 701 continue
702 parent = ARCH.tracepoint_is_child(name)
703 if parent:
704 name += ' ' + parent
705 ret[name] += val
676 return ret 706 return ret
677 707
678 def reset(self): 708 def reset(self):
@@ -690,11 +720,11 @@ class DebugfsProvider(Provider):
690 self._baseline = {} 720 self._baseline = {}
691 self.do_read = True 721 self.do_read = True
692 self.paths = [] 722 self.paths = []
693 self.pid = pid 723 super(DebugfsProvider, self).__init__(pid)
694 if include_past: 724 if include_past:
695 self.restore() 725 self._restore()
696 726
697 def get_available_fields(self): 727 def _get_available_fields(self):
698 """"Returns a list of available fields. 728 """"Returns a list of available fields.
699 729
700 The fields are all available KVM debugfs files 730 The fields are all available KVM debugfs files
@@ -704,8 +734,9 @@ class DebugfsProvider(Provider):
704 734
705 def update_fields(self, fields_filter): 735 def update_fields(self, fields_filter):
706 """Refresh fields, applying fields_filter""" 736 """Refresh fields, applying fields_filter"""
707 self._fields = [field for field in self.get_available_fields() 737 self._fields = [field for field in self._get_available_fields()
708 if self.is_field_wanted(fields_filter, field)] 738 if self.is_field_wanted(fields_filter, field) or
739 ARCH.debugfs_is_child(field)]
709 740
710 @property 741 @property
711 def fields(self): 742 def fields(self):
@@ -758,7 +789,7 @@ class DebugfsProvider(Provider):
758 paths.append(dir) 789 paths.append(dir)
759 for path in paths: 790 for path in paths:
760 for field in self._fields: 791 for field in self._fields:
761 value = self.read_field(field, path) 792 value = self._read_field(field, path)
762 key = path + field 793 key = path + field
763 if reset == 1: 794 if reset == 1:
764 self._baseline[key] = value 795 self._baseline[key] = value
@@ -766,20 +797,21 @@ class DebugfsProvider(Provider):
766 self._baseline[key] = 0 797 self._baseline[key] = 0
767 if self._baseline.get(key, -1) == -1: 798 if self._baseline.get(key, -1) == -1:
768 self._baseline[key] = value 799 self._baseline[key] = value
769 increment = (results.get(field, 0) + value - 800 parent = ARCH.debugfs_is_child(field)
770 self._baseline.get(key, 0)) 801 if parent:
771 if by_guest: 802 field = field + ' ' + parent
772 pid = key.split('-')[0] 803 else:
773 if pid in results: 804 if by_guest:
774 results[pid] += increment 805 field = key.split('-')[0] # set 'field' to 'pid'
775 else: 806 increment = value - self._baseline.get(key, 0)
776 results[pid] = increment 807 if field in results:
808 results[field] += increment
777 else: 809 else:
778 results[field] = increment 810 results[field] = increment
779 811
780 return results 812 return results
781 813
782 def read_field(self, field, path): 814 def _read_field(self, field, path):
783 """Returns the value of a single field from a specific VM.""" 815 """Returns the value of a single field from a specific VM."""
784 try: 816 try:
785 return int(open(os.path.join(PATH_DEBUGFS_KVM, 817 return int(open(os.path.join(PATH_DEBUGFS_KVM,
@@ -794,12 +826,15 @@ class DebugfsProvider(Provider):
794 self._baseline = {} 826 self._baseline = {}
795 self.read(1) 827 self.read(1)
796 828
797 def restore(self): 829 def _restore(self):
798 """Reset field counters""" 830 """Reset field counters"""
799 self._baseline = {} 831 self._baseline = {}
800 self.read(2) 832 self.read(2)
801 833
802 834
835EventStat = namedtuple('EventStat', ['value', 'delta'])
836
837
803class Stats(object): 838class Stats(object):
804 """Manages the data providers and the data they provide. 839 """Manages the data providers and the data they provide.
805 840
@@ -808,13 +843,13 @@ class Stats(object):
808 843
809 """ 844 """
810 def __init__(self, options): 845 def __init__(self, options):
811 self.providers = self.get_providers(options) 846 self.providers = self._get_providers(options)
812 self._pid_filter = options.pid 847 self._pid_filter = options.pid
813 self._fields_filter = options.fields 848 self._fields_filter = options.fields
814 self.values = {} 849 self.values = {}
850 self._child_events = False
815 851
816 @staticmethod 852 def _get_providers(self, options):
817 def get_providers(options):
818 """Returns a list of data providers depending on the passed options.""" 853 """Returns a list of data providers depending on the passed options."""
819 providers = [] 854 providers = []
820 855
@@ -826,7 +861,7 @@ class Stats(object):
826 861
827 return providers 862 return providers
828 863
829 def update_provider_filters(self): 864 def _update_provider_filters(self):
830 """Propagates fields filters to providers.""" 865 """Propagates fields filters to providers."""
831 # As we reset the counters when updating the fields we can 866 # As we reset the counters when updating the fields we can
832 # also clear the cache of old values. 867 # also clear the cache of old values.
@@ -847,7 +882,7 @@ class Stats(object):
847 def fields_filter(self, fields_filter): 882 def fields_filter(self, fields_filter):
848 if fields_filter != self._fields_filter: 883 if fields_filter != self._fields_filter:
849 self._fields_filter = fields_filter 884 self._fields_filter = fields_filter
850 self.update_provider_filters() 885 self._update_provider_filters()
851 886
852 @property 887 @property
853 def pid_filter(self): 888 def pid_filter(self):
@@ -861,16 +896,33 @@ class Stats(object):
861 for provider in self.providers: 896 for provider in self.providers:
862 provider.pid = self._pid_filter 897 provider.pid = self._pid_filter
863 898
899 @property
900 def child_events(self):
901 return self._child_events
902
903 @child_events.setter
904 def child_events(self, val):
905 self._child_events = val
906 for provider in self.providers:
907 provider.child_events = val
908
864 def get(self, by_guest=0): 909 def get(self, by_guest=0):
865 """Returns a dict with field -> (value, delta to last value) of all 910 """Returns a dict with field -> (value, delta to last value) of all
866 provider data.""" 911 provider data.
912 Key formats:
913 * plain: 'key' is event name
914 * child-parent: 'key' is in format '<child> <parent>'
915 * pid: 'key' is the pid of the guest, and the record contains the
916 aggregated event data
917 These formats are generated by the providers, and handled in class TUI.
918 """
867 for provider in self.providers: 919 for provider in self.providers:
868 new = provider.read(by_guest=by_guest) 920 new = provider.read(by_guest=by_guest)
869 for key in new if by_guest else provider.fields: 921 for key in new:
870 oldval = self.values.get(key, (0, 0))[0] 922 oldval = self.values.get(key, EventStat(0, 0)).value
871 newval = new.get(key, 0) 923 newval = new.get(key, 0)
872 newdelta = newval - oldval 924 newdelta = newval - oldval
873 self.values[key] = (newval, newdelta) 925 self.values[key] = EventStat(newval, newdelta)
874 return self.values 926 return self.values
875 927
876 def toggle_display_guests(self, to_pid): 928 def toggle_display_guests(self, to_pid):
@@ -899,10 +951,10 @@ class Stats(object):
899 self.get(to_pid) 951 self.get(to_pid)
900 return 0 952 return 0
901 953
954
902DELAY_DEFAULT = 3.0 955DELAY_DEFAULT = 3.0
903MAX_GUEST_NAME_LEN = 48 956MAX_GUEST_NAME_LEN = 48
904MAX_REGEX_LEN = 44 957MAX_REGEX_LEN = 44
905DEFAULT_REGEX = r'^[^\(]*$'
906SORT_DEFAULT = 0 958SORT_DEFAULT = 0
907 959
908 960
@@ -969,7 +1021,7 @@ class Tui(object):
969 1021
970 return res 1022 return res
971 1023
972 def print_all_gnames(self, row): 1024 def _print_all_gnames(self, row):
973 """Print a list of all running guests along with their pids.""" 1025 """Print a list of all running guests along with their pids."""
974 self.screen.addstr(row, 2, '%8s %-60s' % 1026 self.screen.addstr(row, 2, '%8s %-60s' %
975 ('Pid', 'Guest Name (fuzzy list, might be ' 1027 ('Pid', 'Guest Name (fuzzy list, might be '
@@ -1032,19 +1084,13 @@ class Tui(object):
1032 1084
1033 return name 1085 return name
1034 1086
1035 def update_drilldown(self): 1087 def _update_pid(self, pid):
1036 """Sets or removes a filter that only allows fields without braces."""
1037 if not self.stats.fields_filter:
1038 self.stats.fields_filter = DEFAULT_REGEX
1039
1040 elif self.stats.fields_filter == DEFAULT_REGEX:
1041 self.stats.fields_filter = None
1042
1043 def update_pid(self, pid):
1044 """Propagates pid selection to stats object.""" 1088 """Propagates pid selection to stats object."""
1089 self.screen.addstr(4, 1, 'Updating pid filter...')
1090 self.screen.refresh()
1045 self.stats.pid_filter = pid 1091 self.stats.pid_filter = pid
1046 1092
1047 def refresh_header(self, pid=None): 1093 def _refresh_header(self, pid=None):
1048 """Refreshes the header.""" 1094 """Refreshes the header."""
1049 if pid is None: 1095 if pid is None:
1050 pid = self.stats.pid_filter 1096 pid = self.stats.pid_filter
@@ -1059,8 +1105,7 @@ class Tui(object):
1059 .format(pid, gname), curses.A_BOLD) 1105 .format(pid, gname), curses.A_BOLD)
1060 else: 1106 else:
1061 self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD) 1107 self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD)
1062 if self.stats.fields_filter and self.stats.fields_filter \ 1108 if self.stats.fields_filter:
1063 != DEFAULT_REGEX:
1064 regex = self.stats.fields_filter 1109 regex = self.stats.fields_filter
1065 if len(regex) > MAX_REGEX_LEN: 1110 if len(regex) > MAX_REGEX_LEN:
1066 regex = regex[:MAX_REGEX_LEN] + '...' 1111 regex = regex[:MAX_REGEX_LEN] + '...'
@@ -1075,56 +1120,99 @@ class Tui(object):
1075 self.screen.addstr(4, 1, 'Collecting data...') 1120 self.screen.addstr(4, 1, 'Collecting data...')
1076 self.screen.refresh() 1121 self.screen.refresh()
1077 1122
1078 def refresh_body(self, sleeptime): 1123 def _refresh_body(self, sleeptime):
1124 def is_child_field(field):
1125 return field.find('(') != -1
1126
1127 def insert_child(sorted_items, child, values, parent):
1128 num = len(sorted_items)
1129 for i in range(0, num):
1130 # only add child if parent is present
1131 if parent.startswith(sorted_items[i][0]):
1132 sorted_items.insert(i + 1, (' ' + child, values))
1133
1134 def get_sorted_events(self, stats):
1135 """ separate parent and child events """
1136 if self._sorting == SORT_DEFAULT:
1137 def sortkey((_k, v)):
1138 # sort by (delta value, overall value)
1139 return (v.delta, v.value)
1140 else:
1141 def sortkey((_k, v)):
1142 # sort by overall value
1143 return v.value
1144
1145 childs = []
1146 sorted_items = []
1147 # we can't rule out child events to appear prior to parents even
1148 # when sorted - separate out all children first, and add in later
1149 for key, values in sorted(stats.items(), key=sortkey,
1150 reverse=True):
1151 if values == (0, 0):
1152 continue
1153 if key.find(' ') != -1:
1154 if not self.stats.child_events:
1155 continue
1156 childs.insert(0, (key, values))
1157 else:
1158 sorted_items.append((key, values))
1159 if self.stats.child_events:
1160 for key, values in childs:
1161 (child, parent) = key.split(' ')
1162 insert_child(sorted_items, child, values, parent)
1163
1164 return sorted_items
1165
1079 row = 3 1166 row = 3
1080 self.screen.move(row, 0) 1167 self.screen.move(row, 0)
1081 self.screen.clrtobot() 1168 self.screen.clrtobot()
1082 stats = self.stats.get(self._display_guests) 1169 stats = self.stats.get(self._display_guests)
1083 1170 total = 0.
1084 def sortCurAvg(x): 1171 ctotal = 0.
1085 # sort by current events if available 1172 for key, values in stats.items():
1086 if stats[x][1]: 1173 if self._display_guests:
1087 return (-stats[x][1], -stats[x][0]) 1174 if self.get_gname_from_pid(key):
1175 total += values.value
1176 continue
1177 if not key.find(' ') != -1:
1178 total += values.value
1088 else: 1179 else:
1089 return (0, -stats[x][0]) 1180 ctotal += values.value
1181 if total == 0.:
1182 # we don't have any fields, or all non-child events are filtered
1183 total = ctotal
1090 1184
1091 def sortTotal(x): 1185 # print events
1092 # sort by totals
1093 return (0, -stats[x][0])
1094 total = 0.
1095 for key in stats.keys():
1096 if key.find('(') is -1:
1097 total += stats[key][0]
1098 if self._sorting == SORT_DEFAULT:
1099 sortkey = sortCurAvg
1100 else:
1101 sortkey = sortTotal
1102 tavg = 0 1186 tavg = 0
1103 for key in sorted(stats.keys(), key=sortkey): 1187 tcur = 0
1104 if row >= self.screen.getmaxyx()[0] - 1: 1188 for key, values in get_sorted_events(self, stats):
1105 break 1189 if row >= self.screen.getmaxyx()[0] - 1 or values == (0, 0):
1106 values = stats[key]
1107 if not values[0] and not values[1]:
1108 break 1190 break
1109 if values[0] is not None: 1191 if self._display_guests:
1110 cur = int(round(values[1] / sleeptime)) if values[1] else '' 1192 key = self.get_gname_from_pid(key)
1111 if self._display_guests: 1193 if not key:
1112 key = self.get_gname_from_pid(key) 1194 continue
1113 self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' % 1195 cur = int(round(values.delta / sleeptime)) if values.delta else ''
1114 (key, values[0], values[0] * 100 / total, 1196 if key[0] != ' ':
1115 cur)) 1197 if values.delta:
1116 if cur is not '' and key.find('(') is -1: 1198 tcur += values.delta
1117 tavg += cur 1199 ptotal = values.value
1200 ltotal = total
1201 else:
1202 ltotal = ptotal
1203 self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' % (key,
1204 values.value,
1205 values.value * 100 / float(ltotal), cur))
1118 row += 1 1206 row += 1
1119 if row == 3: 1207 if row == 3:
1120 self.screen.addstr(4, 1, 'No matching events reported yet') 1208 self.screen.addstr(4, 1, 'No matching events reported yet')
1121 else: 1209 if row > 4:
1210 tavg = int(round(tcur / sleeptime)) if tcur > 0 else ''
1122 self.screen.addstr(row, 1, '%-40s %10d %8s' % 1211 self.screen.addstr(row, 1, '%-40s %10d %8s' %
1123 ('Total', total, tavg if tavg else ''), 1212 ('Total', total, tavg), curses.A_BOLD)
1124 curses.A_BOLD)
1125 self.screen.refresh() 1213 self.screen.refresh()
1126 1214
1127 def show_msg(self, text): 1215 def _show_msg(self, text):
1128 """Display message centered text and exit on key press""" 1216 """Display message centered text and exit on key press"""
1129 hint = 'Press any key to continue' 1217 hint = 'Press any key to continue'
1130 curses.cbreak() 1218 curses.cbreak()
@@ -1139,16 +1227,16 @@ class Tui(object):
1139 curses.A_STANDOUT) 1227 curses.A_STANDOUT)
1140 self.screen.getkey() 1228 self.screen.getkey()
1141 1229
1142 def show_help_interactive(self): 1230 def _show_help_interactive(self):
1143 """Display help with list of interactive commands""" 1231 """Display help with list of interactive commands"""
1144 msg = (' b toggle events by guests (debugfs only, honors' 1232 msg = (' b toggle events by guests (debugfs only, honors'
1145 ' filters)', 1233 ' filters)',
1146 ' c clear filter', 1234 ' c clear filter',
1147 ' f filter by regular expression', 1235 ' f filter by regular expression',
1148 ' g filter by guest name', 1236 ' g filter by guest name/PID',
1149 ' h display interactive commands reference', 1237 ' h display interactive commands reference',
1150 ' o toggle sorting order (Total vs CurAvg/s)', 1238 ' o toggle sorting order (Total vs CurAvg/s)',
1151 ' p filter by PID', 1239 ' p filter by guest name/PID',
1152 ' q quit', 1240 ' q quit',
1153 ' r reset stats', 1241 ' r reset stats',
1154 ' s set update interval', 1242 ' s set update interval',
@@ -1165,14 +1253,15 @@ class Tui(object):
1165 self.screen.addstr(row, 0, line) 1253 self.screen.addstr(row, 0, line)
1166 row += 1 1254 row += 1
1167 self.screen.getkey() 1255 self.screen.getkey()
1168 self.refresh_header() 1256 self._refresh_header()
1169 1257
1170 def show_filter_selection(self): 1258 def _show_filter_selection(self):
1171 """Draws filter selection mask. 1259 """Draws filter selection mask.
1172 1260
1173 Asks for a valid regex and sets the fields filter accordingly. 1261 Asks for a valid regex and sets the fields filter accordingly.
1174 1262
1175 """ 1263 """
1264 msg = ''
1176 while True: 1265 while True:
1177 self.screen.erase() 1266 self.screen.erase()
1178 self.screen.addstr(0, 0, 1267 self.screen.addstr(0, 0,
@@ -1181,61 +1270,25 @@ class Tui(object):
1181 self.screen.addstr(2, 0, 1270 self.screen.addstr(2, 0,
1182 "Current regex: {0}" 1271 "Current regex: {0}"
1183 .format(self.stats.fields_filter)) 1272 .format(self.stats.fields_filter))
1273 self.screen.addstr(5, 0, msg)
1184 self.screen.addstr(3, 0, "New regex: ") 1274 self.screen.addstr(3, 0, "New regex: ")
1185 curses.echo() 1275 curses.echo()
1186 regex = self.screen.getstr().decode(ENCODING) 1276 regex = self.screen.getstr().decode(ENCODING)
1187 curses.noecho() 1277 curses.noecho()
1188 if len(regex) == 0: 1278 if len(regex) == 0:
1189 self.stats.fields_filter = DEFAULT_REGEX 1279 self.stats.fields_filter = ''
1190 self.refresh_header() 1280 self._refresh_header()
1191 return 1281 return
1192 try: 1282 try:
1193 re.compile(regex) 1283 re.compile(regex)
1194 self.stats.fields_filter = regex 1284 self.stats.fields_filter = regex
1195 self.refresh_header() 1285 self._refresh_header()
1196 return 1286 return
1197 except re.error: 1287 except re.error:
1288 msg = '"' + regex + '": Not a valid regular expression'
1198 continue 1289 continue
1199 1290
1200 def show_vm_selection_by_pid(self): 1291 def _show_set_update_interval(self):
1201 """Draws PID selection mask.
1202
1203 Asks for a pid until a valid pid or 0 has been entered.
1204
1205 """
1206 msg = ''
1207 while True:
1208 self.screen.erase()
1209 self.screen.addstr(0, 0,
1210 'Show statistics for specific pid.',
1211 curses.A_BOLD)
1212 self.screen.addstr(1, 0,
1213 'This might limit the shown data to the trace '
1214 'statistics.')
1215 self.screen.addstr(5, 0, msg)
1216 self.print_all_gnames(7)
1217
1218 curses.echo()
1219 self.screen.addstr(3, 0, "Pid [0 or pid]: ")
1220 pid = self.screen.getstr().decode(ENCODING)
1221 curses.noecho()
1222
1223 try:
1224 if len(pid) > 0:
1225 pid = int(pid)
1226 if pid != 0 and not os.path.isdir(os.path.join('/proc/',
1227 str(pid))):
1228 msg = '"' + str(pid) + '": Not a running process'
1229 continue
1230 else:
1231 pid = 0
1232 self.refresh_header(pid)
1233 self.update_pid(pid)
1234 break
1235 except ValueError:
1236 msg = '"' + str(pid) + '": Not a valid pid'
1237
1238 def show_set_update_interval(self):
1239 """Draws update interval selection mask.""" 1292 """Draws update interval selection mask."""
1240 msg = '' 1293 msg = ''
1241 while True: 1294 while True:
@@ -1265,60 +1318,67 @@ class Tui(object):
1265 1318
1266 except ValueError: 1319 except ValueError:
1267 msg = '"' + str(val) + '": Invalid value' 1320 msg = '"' + str(val) + '": Invalid value'
1268 self.refresh_header() 1321 self._refresh_header()
1269 1322
1270 def show_vm_selection_by_guest_name(self): 1323 def _show_vm_selection_by_guest(self):
1271 """Draws guest selection mask. 1324 """Draws guest selection mask.
1272 1325
1273 Asks for a guest name until a valid guest name or '' is entered. 1326 Asks for a guest name or pid until a valid guest name or '' is entered.
1274 1327
1275 """ 1328 """
1276 msg = '' 1329 msg = ''
1277 while True: 1330 while True:
1278 self.screen.erase() 1331 self.screen.erase()
1279 self.screen.addstr(0, 0, 1332 self.screen.addstr(0, 0,
1280 'Show statistics for specific guest.', 1333 'Show statistics for specific guest or pid.',
1281 curses.A_BOLD) 1334 curses.A_BOLD)
1282 self.screen.addstr(1, 0, 1335 self.screen.addstr(1, 0,
1283 'This might limit the shown data to the trace ' 1336 'This might limit the shown data to the trace '
1284 'statistics.') 1337 'statistics.')
1285 self.screen.addstr(5, 0, msg) 1338 self.screen.addstr(5, 0, msg)
1286 self.print_all_gnames(7) 1339 self._print_all_gnames(7)
1287 curses.echo() 1340 curses.echo()
1288 self.screen.addstr(3, 0, "Guest [ENTER or guest]: ") 1341 curses.curs_set(1)
1289 gname = self.screen.getstr().decode(ENCODING) 1342 self.screen.addstr(3, 0, "Guest or pid [ENTER exits]: ")
1343 guest = self.screen.getstr().decode(ENCODING)
1290 curses.noecho() 1344 curses.noecho()
1291 1345
1292 if not gname: 1346 pid = 0
1293 self.refresh_header(0) 1347 if not guest or guest == '0':
1294 self.update_pid(0)
1295 break 1348 break
1296 else: 1349 if guest.isdigit():
1297 pids = [] 1350 if not os.path.isdir(os.path.join('/proc/', guest)):
1298 try: 1351 msg = '"' + guest + '": Not a running process'
1299 pids = self.get_pid_from_gname(gname)
1300 except:
1301 msg = '"' + gname + '": Internal error while searching, ' \
1302 'use pid filter instead'
1303 continue
1304 if len(pids) == 0:
1305 msg = '"' + gname + '": Not an active guest'
1306 continue 1352 continue
1307 if len(pids) > 1: 1353 pid = int(guest)
1308 msg = '"' + gname + '": Multiple matches found, use pid ' \
1309 'filter instead'
1310 continue
1311 self.refresh_header(pids[0])
1312 self.update_pid(pids[0])
1313 break 1354 break
1355 pids = []
1356 try:
1357 pids = self.get_pid_from_gname(guest)
1358 except:
1359 msg = '"' + guest + '": Internal error while searching, ' \
1360 'use pid filter instead'
1361 continue
1362 if len(pids) == 0:
1363 msg = '"' + guest + '": Not an active guest'
1364 continue
1365 if len(pids) > 1:
1366 msg = '"' + guest + '": Multiple matches found, use pid ' \
1367 'filter instead'
1368 continue
1369 pid = pids[0]
1370 break
1371 curses.curs_set(0)
1372 self._refresh_header(pid)
1373 self._update_pid(pid)
1314 1374
1315 def show_stats(self): 1375 def show_stats(self):
1316 """Refreshes the screen and processes user input.""" 1376 """Refreshes the screen and processes user input."""
1317 sleeptime = self._delay_initial 1377 sleeptime = self._delay_initial
1318 self.refresh_header() 1378 self._refresh_header()
1319 start = 0.0 # result based on init value never appears on screen 1379 start = 0.0 # result based on init value never appears on screen
1320 while True: 1380 while True:
1321 self.refresh_body(time.time() - start) 1381 self._refresh_body(time.time() - start)
1322 curses.halfdelay(int(sleeptime * 10)) 1382 curses.halfdelay(int(sleeptime * 10))
1323 start = time.time() 1383 start = time.time()
1324 sleeptime = self._delay_regular 1384 sleeptime = self._delay_regular
@@ -1327,47 +1387,39 @@ class Tui(object):
1327 if char == 'b': 1387 if char == 'b':
1328 self._display_guests = not self._display_guests 1388 self._display_guests = not self._display_guests
1329 if self.stats.toggle_display_guests(self._display_guests): 1389 if self.stats.toggle_display_guests(self._display_guests):
1330 self.show_msg(['Command not available with tracepoints' 1390 self._show_msg(['Command not available with '
1331 ' enabled', 'Restart with debugfs only ' 1391 'tracepoints enabled', 'Restart with '
1332 '(see option \'-d\') and try again!']) 1392 'debugfs only (see option \'-d\') and '
1393 'try again!'])
1333 self._display_guests = not self._display_guests 1394 self._display_guests = not self._display_guests
1334 self.refresh_header() 1395 self._refresh_header()
1335 if char == 'c': 1396 if char == 'c':
1336 self.stats.fields_filter = DEFAULT_REGEX 1397 self.stats.fields_filter = ''
1337 self.refresh_header(0) 1398 self._refresh_header(0)
1338 self.update_pid(0) 1399 self._update_pid(0)
1339 if char == 'f': 1400 if char == 'f':
1340 curses.curs_set(1) 1401 curses.curs_set(1)
1341 self.show_filter_selection() 1402 self._show_filter_selection()
1342 curses.curs_set(0) 1403 curses.curs_set(0)
1343 sleeptime = self._delay_initial 1404 sleeptime = self._delay_initial
1344 if char == 'g': 1405 if char == 'g' or char == 'p':
1345 curses.curs_set(1) 1406 self._show_vm_selection_by_guest()
1346 self.show_vm_selection_by_guest_name()
1347 curses.curs_set(0)
1348 sleeptime = self._delay_initial 1407 sleeptime = self._delay_initial
1349 if char == 'h': 1408 if char == 'h':
1350 self.show_help_interactive() 1409 self._show_help_interactive()
1351 if char == 'o': 1410 if char == 'o':
1352 self._sorting = not self._sorting 1411 self._sorting = not self._sorting
1353 if char == 'p':
1354 curses.curs_set(1)
1355 self.show_vm_selection_by_pid()
1356 curses.curs_set(0)
1357 sleeptime = self._delay_initial
1358 if char == 'q': 1412 if char == 'q':
1359 break 1413 break
1360 if char == 'r': 1414 if char == 'r':
1361 self.stats.reset() 1415 self.stats.reset()
1362 if char == 's': 1416 if char == 's':
1363 curses.curs_set(1) 1417 curses.curs_set(1)
1364 self.show_set_update_interval() 1418 self._show_set_update_interval()
1365 curses.curs_set(0) 1419 curses.curs_set(0)
1366 sleeptime = self._delay_initial 1420 sleeptime = self._delay_initial
1367 if char == 'x': 1421 if char == 'x':
1368 self.update_drilldown() 1422 self.stats.child_events = not self.stats.child_events
1369 # prevents display of current values on next refresh
1370 self.stats.get(self._display_guests)
1371 except KeyboardInterrupt: 1423 except KeyboardInterrupt:
1372 break 1424 break
1373 except curses.error: 1425 except curses.error:
@@ -1380,9 +1432,9 @@ def batch(stats):
1380 s = stats.get() 1432 s = stats.get()
1381 time.sleep(1) 1433 time.sleep(1)
1382 s = stats.get() 1434 s = stats.get()
1383 for key in sorted(s.keys()): 1435 for key, values in sorted(s.items()):
1384 values = s[key] 1436 print('%-42s%10d%10d' % (key.split(' ')[0], values.value,
1385 print('%-42s%10d%10d' % (key, values[0], values[1])) 1437 values.delta))
1386 except KeyboardInterrupt: 1438 except KeyboardInterrupt:
1387 pass 1439 pass
1388 1440
@@ -1392,14 +1444,14 @@ def log(stats):
1392 keys = sorted(stats.get().keys()) 1444 keys = sorted(stats.get().keys())
1393 1445
1394 def banner(): 1446 def banner():
1395 for k in keys: 1447 for key in keys:
1396 print(k, end=' ') 1448 print(key.split(' ')[0], end=' ')
1397 print() 1449 print()
1398 1450
1399 def statline(): 1451 def statline():
1400 s = stats.get() 1452 s = stats.get()
1401 for k in keys: 1453 for key in keys:
1402 print(' %9d' % s[k][1], end=' ') 1454 print(' %9d' % s[key].delta, end=' ')
1403 print() 1455 print()
1404 line = 0 1456 line = 0
1405 banner_repeat = 20 1457 banner_repeat = 20
@@ -1504,7 +1556,7 @@ Press any other key to refresh statistics immediately.
1504 ) 1556 )
1505 optparser.add_option('-f', '--fields', 1557 optparser.add_option('-f', '--fields',
1506 action='store', 1558 action='store',
1507 default=DEFAULT_REGEX, 1559 default='',
1508 dest='fields', 1560 dest='fields',
1509 help='''fields to display (regex) 1561 help='''fields to display (regex)
1510 "-f help" for a list of available events''', 1562 "-f help" for a list of available events''',
@@ -1539,17 +1591,6 @@ Press any other key to refresh statistics immediately.
1539 1591
1540def check_access(options): 1592def check_access(options):
1541 """Exits if the current user can't access all needed directories.""" 1593 """Exits if the current user can't access all needed directories."""
1542 if not os.path.exists('/sys/kernel/debug'):
1543 sys.stderr.write('Please enable CONFIG_DEBUG_FS in your kernel.')
1544 sys.exit(1)
1545
1546 if not os.path.exists(PATH_DEBUGFS_KVM):
1547 sys.stderr.write("Please make sure, that debugfs is mounted and "
1548 "readable by the current user:\n"
1549 "('mount -t debugfs debugfs /sys/kernel/debug')\n"
1550 "Also ensure, that the kvm modules are loaded.\n")
1551 sys.exit(1)
1552
1553 if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints or 1594 if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints or
1554 not options.debugfs): 1595 not options.debugfs):
1555 sys.stderr.write("Please enable CONFIG_TRACING in your kernel " 1596 sys.stderr.write("Please enable CONFIG_TRACING in your kernel "
@@ -1567,7 +1608,33 @@ def check_access(options):
1567 return options 1608 return options
1568 1609
1569 1610
1611def assign_globals():
1612 global PATH_DEBUGFS_KVM
1613 global PATH_DEBUGFS_TRACING
1614
1615 debugfs = ''
1616 for line in file('/proc/mounts'):
1617 if line.split(' ')[0] == 'debugfs':
1618 debugfs = line.split(' ')[1]
1619 break
1620 if debugfs == '':
1621 sys.stderr.write("Please make sure that CONFIG_DEBUG_FS is enabled in "
1622 "your kernel, mounted and\nreadable by the current "
1623 "user:\n"
1624 "('mount -t debugfs debugfs /sys/kernel/debug')\n")
1625 sys.exit(1)
1626
1627 PATH_DEBUGFS_KVM = os.path.join(debugfs, 'kvm')
1628 PATH_DEBUGFS_TRACING = os.path.join(debugfs, 'tracing')
1629
1630 if not os.path.exists(PATH_DEBUGFS_KVM):
1631 sys.stderr.write("Please make sure that CONFIG_KVM is enabled in "
1632 "your kernel and that the modules are loaded.\n")
1633 sys.exit(1)
1634
1635
1570def main(): 1636def main():
1637 assign_globals()
1571 options = get_options() 1638 options = get_options()
1572 options = check_access(options) 1639 options = check_access(options)
1573 1640
diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt
index b5b3810c9e94..0811d860fe75 100644
--- a/tools/kvm/kvm_stat/kvm_stat.txt
+++ b/tools/kvm/kvm_stat/kvm_stat.txt
@@ -35,13 +35,13 @@ INTERACTIVE COMMANDS
35 35
36*f*:: filter by regular expression 36*f*:: filter by regular expression
37 37
38*g*:: filter by guest name 38*g*:: filter by guest name/PID
39 39
40*h*:: display interactive commands reference 40*h*:: display interactive commands reference
41 41
42*o*:: toggle sorting order (Total vs CurAvg/s) 42*o*:: toggle sorting order (Total vs CurAvg/s)
43 43
44*p*:: filter by PID 44*p*:: filter by guest name/PID
45 45
46*q*:: quit 46*q*:: quit
47 47
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 70268c0bec79..70f4c30918eb 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -36,6 +36,8 @@ static struct timecounter *timecounter;
36static unsigned int host_vtimer_irq; 36static unsigned int host_vtimer_irq;
37static u32 host_vtimer_irq_flags; 37static u32 host_vtimer_irq_flags;
38 38
39static DEFINE_STATIC_KEY_FALSE(has_gic_active_state);
40
39static const struct kvm_irq_level default_ptimer_irq = { 41static const struct kvm_irq_level default_ptimer_irq = {
40 .irq = 30, 42 .irq = 30,
41 .level = 1, 43 .level = 1,
@@ -56,6 +58,12 @@ u64 kvm_phys_timer_read(void)
56 return timecounter->cc->read(timecounter->cc); 58 return timecounter->cc->read(timecounter->cc);
57} 59}
58 60
61static inline bool userspace_irqchip(struct kvm *kvm)
62{
63 return static_branch_unlikely(&userspace_irqchip_in_use) &&
64 unlikely(!irqchip_in_kernel(kvm));
65}
66
59static void soft_timer_start(struct hrtimer *hrt, u64 ns) 67static void soft_timer_start(struct hrtimer *hrt, u64 ns)
60{ 68{
61 hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns), 69 hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns),
@@ -69,25 +77,6 @@ static void soft_timer_cancel(struct hrtimer *hrt, struct work_struct *work)
69 cancel_work_sync(work); 77 cancel_work_sync(work);
70} 78}
71 79
72static void kvm_vtimer_update_mask_user(struct kvm_vcpu *vcpu)
73{
74 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
75
76 /*
77 * When using a userspace irqchip with the architected timers, we must
78 * prevent continuously exiting from the guest, and therefore mask the
79 * physical interrupt by disabling it on the host interrupt controller
80 * when the virtual level is high, such that the guest can make
81 * forward progress. Once we detect the output level being
82 * de-asserted, we unmask the interrupt again so that we exit from the
83 * guest when the timer fires.
84 */
85 if (vtimer->irq.level)
86 disable_percpu_irq(host_vtimer_irq);
87 else
88 enable_percpu_irq(host_vtimer_irq, 0);
89}
90
91static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) 80static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
92{ 81{
93 struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; 82 struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
@@ -106,9 +95,9 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
106 if (kvm_timer_should_fire(vtimer)) 95 if (kvm_timer_should_fire(vtimer))
107 kvm_timer_update_irq(vcpu, true, vtimer); 96 kvm_timer_update_irq(vcpu, true, vtimer);
108 97
109 if (static_branch_unlikely(&userspace_irqchip_in_use) && 98 if (userspace_irqchip(vcpu->kvm) &&
110 unlikely(!irqchip_in_kernel(vcpu->kvm))) 99 !static_branch_unlikely(&has_gic_active_state))
111 kvm_vtimer_update_mask_user(vcpu); 100 disable_percpu_irq(host_vtimer_irq);
112 101
113 return IRQ_HANDLED; 102 return IRQ_HANDLED;
114} 103}
@@ -290,8 +279,7 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
290 trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq, 279 trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq,
291 timer_ctx->irq.level); 280 timer_ctx->irq.level);
292 281
293 if (!static_branch_unlikely(&userspace_irqchip_in_use) || 282 if (!userspace_irqchip(vcpu->kvm)) {
294 likely(irqchip_in_kernel(vcpu->kvm))) {
295 ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, 283 ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
296 timer_ctx->irq.irq, 284 timer_ctx->irq.irq,
297 timer_ctx->irq.level, 285 timer_ctx->irq.level,
@@ -350,12 +338,6 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
350 phys_timer_emulate(vcpu); 338 phys_timer_emulate(vcpu);
351} 339}
352 340
353static void __timer_snapshot_state(struct arch_timer_context *timer)
354{
355 timer->cnt_ctl = read_sysreg_el0(cntv_ctl);
356 timer->cnt_cval = read_sysreg_el0(cntv_cval);
357}
358
359static void vtimer_save_state(struct kvm_vcpu *vcpu) 341static void vtimer_save_state(struct kvm_vcpu *vcpu)
360{ 342{
361 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 343 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
@@ -367,8 +349,10 @@ static void vtimer_save_state(struct kvm_vcpu *vcpu)
367 if (!vtimer->loaded) 349 if (!vtimer->loaded)
368 goto out; 350 goto out;
369 351
370 if (timer->enabled) 352 if (timer->enabled) {
371 __timer_snapshot_state(vtimer); 353 vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
354 vtimer->cnt_cval = read_sysreg_el0(cntv_cval);
355 }
372 356
373 /* Disable the virtual timer */ 357 /* Disable the virtual timer */
374 write_sysreg_el0(0, cntv_ctl); 358 write_sysreg_el0(0, cntv_ctl);
@@ -460,23 +444,43 @@ static void set_cntvoff(u64 cntvoff)
460 kvm_call_hyp(__kvm_timer_set_cntvoff, low, high); 444 kvm_call_hyp(__kvm_timer_set_cntvoff, low, high);
461} 445}
462 446
463static void kvm_timer_vcpu_load_vgic(struct kvm_vcpu *vcpu) 447static inline void set_vtimer_irq_phys_active(struct kvm_vcpu *vcpu, bool active)
448{
449 int r;
450 r = irq_set_irqchip_state(host_vtimer_irq, IRQCHIP_STATE_ACTIVE, active);
451 WARN_ON(r);
452}
453
454static void kvm_timer_vcpu_load_gic(struct kvm_vcpu *vcpu)
464{ 455{
465 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 456 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
466 bool phys_active; 457 bool phys_active;
467 int ret;
468 458
469 phys_active = kvm_vgic_map_is_active(vcpu, vtimer->irq.irq); 459 if (irqchip_in_kernel(vcpu->kvm))
470 460 phys_active = kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
471 ret = irq_set_irqchip_state(host_vtimer_irq, 461 else
472 IRQCHIP_STATE_ACTIVE, 462 phys_active = vtimer->irq.level;
473 phys_active); 463 set_vtimer_irq_phys_active(vcpu, phys_active);
474 WARN_ON(ret);
475} 464}
476 465
477static void kvm_timer_vcpu_load_user(struct kvm_vcpu *vcpu) 466static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
478{ 467{
479 kvm_vtimer_update_mask_user(vcpu); 468 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
469
470 /*
471 * When using a userspace irqchip with the architected timers and a
472 * host interrupt controller that doesn't support an active state, we
473 * must still prevent continuously exiting from the guest, and
474 * therefore mask the physical interrupt by disabling it on the host
475 * interrupt controller when the virtual level is high, such that the
476 * guest can make forward progress. Once we detect the output level
477 * being de-asserted, we unmask the interrupt again so that we exit
478 * from the guest when the timer fires.
479 */
480 if (vtimer->irq.level)
481 disable_percpu_irq(host_vtimer_irq);
482 else
483 enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
480} 484}
481 485
482void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) 486void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
@@ -487,10 +491,10 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
487 if (unlikely(!timer->enabled)) 491 if (unlikely(!timer->enabled))
488 return; 492 return;
489 493
490 if (unlikely(!irqchip_in_kernel(vcpu->kvm))) 494 if (static_branch_likely(&has_gic_active_state))
491 kvm_timer_vcpu_load_user(vcpu); 495 kvm_timer_vcpu_load_gic(vcpu);
492 else 496 else
493 kvm_timer_vcpu_load_vgic(vcpu); 497 kvm_timer_vcpu_load_nogic(vcpu);
494 498
495 set_cntvoff(vtimer->cntvoff); 499 set_cntvoff(vtimer->cntvoff);
496 500
@@ -555,18 +559,24 @@ static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu)
555{ 559{
556 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 560 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
557 561
558 if (unlikely(!irqchip_in_kernel(vcpu->kvm))) { 562 if (!kvm_timer_should_fire(vtimer)) {
559 __timer_snapshot_state(vtimer); 563 kvm_timer_update_irq(vcpu, false, vtimer);
560 if (!kvm_timer_should_fire(vtimer)) { 564 if (static_branch_likely(&has_gic_active_state))
561 kvm_timer_update_irq(vcpu, false, vtimer); 565 set_vtimer_irq_phys_active(vcpu, false);
562 kvm_vtimer_update_mask_user(vcpu); 566 else
563 } 567 enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
564 } 568 }
565} 569}
566 570
567void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) 571void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
568{ 572{
569 unmask_vtimer_irq_user(vcpu); 573 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
574
575 if (unlikely(!timer->enabled))
576 return;
577
578 if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
579 unmask_vtimer_irq_user(vcpu);
570} 580}
571 581
572int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) 582int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
@@ -753,6 +763,8 @@ int kvm_timer_hyp_init(bool has_gic)
753 kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); 763 kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
754 goto out_free_irq; 764 goto out_free_irq;
755 } 765 }
766
767 static_branch_enable(&has_gic_active_state);
756 } 768 }
757 769
758 kvm_info("virtual timer IRQ%d\n", host_vtimer_irq); 770 kvm_info("virtual timer IRQ%d\n", host_vtimer_irq);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4501e658e8d6..65dea3ffef68 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -969,8 +969,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
969 /* Check for overlaps */ 969 /* Check for overlaps */
970 r = -EEXIST; 970 r = -EEXIST;
971 kvm_for_each_memslot(slot, __kvm_memslots(kvm, as_id)) { 971 kvm_for_each_memslot(slot, __kvm_memslots(kvm, as_id)) {
972 if ((slot->id >= KVM_USER_MEM_SLOTS) || 972 if (slot->id == id)
973 (slot->id == id))
974 continue; 973 continue;
975 if (!((base_gfn + npages <= slot->base_gfn) || 974 if (!((base_gfn + npages <= slot->base_gfn) ||
976 (base_gfn >= slot->base_gfn + slot->npages))) 975 (base_gfn >= slot->base_gfn + slot->npages)))