aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-08-31 11:27:44 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-08-31 11:27:44 -0400
commit44e98edcd11a48619b342d8f442d447b094ab2fc (patch)
tree4b35ceb134086fddc6e32610932ece05fcb1998d
parent64291f7db5bd8150a74ad2036f1037e6a0428df2 (diff)
parent4d283ec908e617fa28bcb06bce310206f0655d67 (diff)
Merge tag 'kvm-4.3-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini: "A very small release for x86 and s390 KVM. - s390: timekeeping changes, cleanups and fixes - x86: support for Hyper-V MSRs to report crashes, and a bunch of cleanups. One interesting feature that was planned for 4.3 (emulating the local APIC in kernel while keeping the IOAPIC and 8254 in userspace) had to be delayed because Intel complained about my reading of the manual" * tag 'kvm-4.3-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (42 commits) x86/kvm: Rename VMX's segment access rights defines KVM: x86/vPMU: Fix unnecessary signed extension for AMD PERFCTRn kvm: x86: Fix error handling in the function kvm_lapic_sync_from_vapic KVM: s390: Fix assumption that kvm_set_irq_routing is always run successfully KVM: VMX: drop ept misconfig check KVM: MMU: fully check zero bits for sptes KVM: MMU: introduce is_shadow_zero_bits_set() KVM: MMU: introduce the framework to check zero bits on sptes KVM: MMU: split reset_rsvds_bits_mask_ept KVM: MMU: split reset_rsvds_bits_mask KVM: MMU: introduce rsvd_bits_validate KVM: MMU: move FNAME(is_rsvd_bits_set) to mmu.c KVM: MMU: fix validation of mmio page fault KVM: MTRR: Use default type for non-MTRR-covered gfn before WARN_ON KVM: s390: host STP toleration for VMs KVM: x86: clean/fix memory barriers in irqchip_in_kernel KVM: document memory barriers for kvm->vcpus/kvm->online_vcpus KVM: x86: remove unnecessary memory barriers for shared MSRs KVM: move code related to KVM_SET_BOOT_CPU_ID to x86 KVM: s390: log capability enablement and vm attribute changes ...
-rw-r--r--Documentation/s390/00-INDEX2
-rw-r--r--Documentation/s390/kvm.txt125
-rw-r--r--Documentation/virtual/kvm/api.txt5
-rw-r--r--arch/s390/include/asm/etr.h3
-rw-r--r--arch/s390/include/asm/kvm_host.h4
-rw-r--r--arch/s390/kernel/time.c16
-rw-r--r--arch/s390/kvm/diag.c13
-rw-r--r--arch/s390/kvm/guestdbg.c35
-rw-r--r--arch/s390/kvm/interrupt.c98
-rw-r--r--arch/s390/kvm/kvm-s390.c114
-rw-r--r--arch/s390/kvm/kvm-s390.h11
-rw-r--r--arch/s390/kvm/priv.c28
-rw-r--r--arch/s390/kvm/sigp.c13
-rw-r--r--arch/s390/kvm/trace-s390.h33
-rw-r--r--arch/x86/include/asm/kvm_host.h43
-rw-r--r--arch/x86/include/asm/vmx.h47
-rw-r--r--arch/x86/include/uapi/asm/vmx.h2
-rw-r--r--arch/x86/kvm/Makefile4
-rw-r--r--arch/x86/kvm/hyperv.c377
-rw-r--r--arch/x86/kvm/hyperv.h32
-rw-r--r--arch/x86/kvm/i8259.c15
-rw-r--r--arch/x86/kvm/irq.h8
-rw-r--r--arch/x86/kvm/lapic.c5
-rw-r--r--arch/x86/kvm/lapic.h2
-rw-r--r--arch/x86/kvm/mmu.c285
-rw-r--r--arch/x86/kvm/mmu.h4
-rw-r--r--arch/x86/kvm/paging_tmpl.h13
-rw-r--r--arch/x86/kvm/pmu_amd.c2
-rw-r--r--arch/x86/kvm/svm.c16
-rw-r--r--arch/x86/kvm/vmx.c181
-rw-r--r--arch/x86/kvm/x86.c323
-rw-r--r--arch/x86/kvm/x86.h5
-rw-r--r--include/linux/kvm_host.h24
-rw-r--r--include/uapi/linux/kvm.h2
-rw-r--r--virt/kvm/kvm_main.c19
35 files changed, 1084 insertions, 825 deletions
diff --git a/Documentation/s390/00-INDEX b/Documentation/s390/00-INDEX
index 10c874ebdfe5..9189535f6cd2 100644
--- a/Documentation/s390/00-INDEX
+++ b/Documentation/s390/00-INDEX
@@ -16,8 +16,6 @@ Debugging390.txt
16 - hints for debugging on s390 systems. 16 - hints for debugging on s390 systems.
17driver-model.txt 17driver-model.txt
18 - information on s390 devices and the driver model. 18 - information on s390 devices and the driver model.
19kvm.txt
20 - ioctl calls to /dev/kvm on s390.
21monreader.txt 19monreader.txt
22 - information on accessing the z/VM monitor stream from Linux. 20 - information on accessing the z/VM monitor stream from Linux.
23qeth.txt 21qeth.txt
diff --git a/Documentation/s390/kvm.txt b/Documentation/s390/kvm.txt
deleted file mode 100644
index 85f3280d7ef6..000000000000
--- a/Documentation/s390/kvm.txt
+++ /dev/null
@@ -1,125 +0,0 @@
1*** BIG FAT WARNING ***
2The kvm module is currently in EXPERIMENTAL state for s390. This means that
3the interface to the module is not yet considered to remain stable. Thus, be
4prepared that we keep breaking your userspace application and guest
5compatibility over and over again until we feel happy with the result. Make sure
6your guest kernel, your host kernel, and your userspace launcher are in a
7consistent state.
8
9This Documentation describes the unique ioctl calls to /dev/kvm, the resulting
10kvm-vm file descriptors, and the kvm-vcpu file descriptors that differ from x86.
11
121. ioctl calls to /dev/kvm
13KVM does support the following ioctls on s390 that are common with other
14architectures and do behave the same:
15KVM_GET_API_VERSION
16KVM_CREATE_VM (*) see note
17KVM_CHECK_EXTENSION
18KVM_GET_VCPU_MMAP_SIZE
19
20Notes:
21* KVM_CREATE_VM may fail on s390, if the calling process has multiple
22threads and has not called KVM_S390_ENABLE_SIE before.
23
24In addition, on s390 the following architecture specific ioctls are supported:
25ioctl: KVM_S390_ENABLE_SIE
26args: none
27see also: include/linux/kvm.h
28This call causes the kernel to switch on PGSTE in the user page table. This
29operation is needed in order to run a virtual machine, and it requires the
30calling process to be single-threaded. Note that the first call to KVM_CREATE_VM
31will implicitly try to switch on PGSTE if the user process has not called
32KVM_S390_ENABLE_SIE before. User processes that want to launch multiple threads
33before creating a virtual machine have to call KVM_S390_ENABLE_SIE, or will
34observe an error calling KVM_CREATE_VM. Switching on PGSTE is a one-time
35operation, is not reversible, and will persist over the entire lifetime of
36the calling process. It does not have any user-visible effect other than a small
37performance penalty.
38
392. ioctl calls to the kvm-vm file descriptor
40KVM does support the following ioctls on s390 that are common with other
41architectures and do behave the same:
42KVM_CREATE_VCPU
43KVM_SET_USER_MEMORY_REGION (*) see note
44KVM_GET_DIRTY_LOG (**) see note
45
46Notes:
47* kvm does only allow exactly one memory slot on s390, which has to start
48 at guest absolute address zero and at a user address that is aligned on any
49 page boundary. This hardware "limitation" allows us to have a few unique
50 optimizations. The memory slot doesn't have to be filled
51 with memory actually, it may contain sparse holes. That said, with different
52 user memory layout this does still allow a large flexibility when
53 doing the guest memory setup.
54** KVM_GET_DIRTY_LOG doesn't work properly yet. The user will receive an empty
55log. This ioctl call is only needed for guest migration, and we intend to
56implement this one in the future.
57
58In addition, on s390 the following architecture specific ioctls for the kvm-vm
59file descriptor are supported:
60ioctl: KVM_S390_INTERRUPT
61args: struct kvm_s390_interrupt *
62see also: include/linux/kvm.h
63This ioctl is used to submit a floating interrupt for a virtual machine.
64Floating interrupts may be delivered to any virtual cpu in the configuration.
65Only some interrupt types defined in include/linux/kvm.h make sense when
66submitted as floating interrupts. The following interrupts are not considered
67to be useful as floating interrupts, and a call to inject them will result in
68-EINVAL error code: program interrupts and interprocessor signals. Valid
69floating interrupts are:
70KVM_S390_INT_VIRTIO
71KVM_S390_INT_SERVICE
72
733. ioctl calls to the kvm-vcpu file descriptor
74KVM does support the following ioctls on s390 that are common with other
75architectures and do behave the same:
76KVM_RUN
77KVM_GET_REGS
78KVM_SET_REGS
79KVM_GET_SREGS
80KVM_SET_SREGS
81KVM_GET_FPU
82KVM_SET_FPU
83
84In addition, on s390 the following architecture specific ioctls for the
85kvm-vcpu file descriptor are supported:
86ioctl: KVM_S390_INTERRUPT
87args: struct kvm_s390_interrupt *
88see also: include/linux/kvm.h
89This ioctl is used to submit an interrupt for a specific virtual cpu.
90Only some interrupt types defined in include/linux/kvm.h make sense when
91submitted for a specific cpu. The following interrupts are not considered
92to be useful, and a call to inject them will result in -EINVAL error code:
93service processor calls and virtio interrupts. Valid interrupt types are:
94KVM_S390_PROGRAM_INT
95KVM_S390_SIGP_STOP
96KVM_S390_RESTART
97KVM_S390_SIGP_SET_PREFIX
98KVM_S390_INT_EMERGENCY
99
100ioctl: KVM_S390_STORE_STATUS
101args: unsigned long
102see also: include/linux/kvm.h
103This ioctl stores the state of the cpu at the guest real address given as
104argument, unless one of the following values defined in include/linux/kvm.h
105is given as argument:
106KVM_S390_STORE_STATUS_NOADDR - the CPU stores its status to the save area in
107absolute lowcore as defined by the principles of operation
108KVM_S390_STORE_STATUS_PREFIXED - the CPU stores its status to the save area in
109its prefix page just like the dump tool that comes with zipl. This is useful
110to create a system dump for use with lkcdutils or crash.
111
112ioctl: KVM_S390_SET_INITIAL_PSW
113args: struct kvm_s390_psw *
114see also: include/linux/kvm.h
115This ioctl can be used to set the processor status word (psw) of a stopped cpu
116prior to running it with KVM_RUN. Note that this call is not required to modify
117the psw during sie intercepts that fall back to userspace because struct kvm_run
118does contain the psw, and this value is evaluated during reentry of KVM_RUN
119after the intercept exit was recognized.
120
121ioctl: KVM_S390_INITIAL_RESET
122args: none
123see also: include/linux/kvm.h
124This ioctl can be used to perform an initial cpu reset as defined by the
125principles of operation. The target cpu has to be in stopped state.
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index a7926a90156f..a4ebcb712375 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3277,6 +3277,7 @@ should put the acknowledged interrupt vector into the 'epr' field.
3277 struct { 3277 struct {
3278#define KVM_SYSTEM_EVENT_SHUTDOWN 1 3278#define KVM_SYSTEM_EVENT_SHUTDOWN 1
3279#define KVM_SYSTEM_EVENT_RESET 2 3279#define KVM_SYSTEM_EVENT_RESET 2
3280#define KVM_SYSTEM_EVENT_CRASH 3
3280 __u32 type; 3281 __u32 type;
3281 __u64 flags; 3282 __u64 flags;
3282 } system_event; 3283 } system_event;
@@ -3296,6 +3297,10 @@ Valid values for 'type' are:
3296 KVM_SYSTEM_EVENT_RESET -- the guest has requested a reset of the VM. 3297 KVM_SYSTEM_EVENT_RESET -- the guest has requested a reset of the VM.
3297 As with SHUTDOWN, userspace can choose to ignore the request, or 3298 As with SHUTDOWN, userspace can choose to ignore the request, or
3298 to schedule the reset to occur in the future and may call KVM_RUN again. 3299 to schedule the reset to occur in the future and may call KVM_RUN again.
3300 KVM_SYSTEM_EVENT_CRASH -- the guest crash occurred and the guest
3301 has requested a crash condition maintenance. Userspace can choose
3302 to ignore the request, or to gather VM memory core dump and/or
3303 reset/shutdown of the VM.
3299 3304
3300 /* Fix the size of the union. */ 3305 /* Fix the size of the union. */
3301 char padding[256]; 3306 char padding[256];
diff --git a/arch/s390/include/asm/etr.h b/arch/s390/include/asm/etr.h
index 629b79a93165..f7e5c36688c3 100644
--- a/arch/s390/include/asm/etr.h
+++ b/arch/s390/include/asm/etr.h
@@ -214,6 +214,9 @@ static inline int etr_ptff(void *ptff_block, unsigned int func)
214void etr_switch_to_local(void); 214void etr_switch_to_local(void);
215void etr_sync_check(void); 215void etr_sync_check(void);
216 216
217/* notifier for syncs */
218extern struct atomic_notifier_head s390_epoch_delta_notifier;
219
217/* STP interruption parameter */ 220/* STP interruption parameter */
218struct stp_irq_parm { 221struct stp_irq_parm {
219 unsigned int _pad0 : 14; 222 unsigned int _pad0 : 14;
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 3024acbe1f9d..df4db81254d3 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -258,6 +258,9 @@ struct kvm_vcpu_stat {
258 u32 diagnose_10; 258 u32 diagnose_10;
259 u32 diagnose_44; 259 u32 diagnose_44;
260 u32 diagnose_9c; 260 u32 diagnose_9c;
261 u32 diagnose_258;
262 u32 diagnose_308;
263 u32 diagnose_500;
261}; 264};
262 265
263#define PGM_OPERATION 0x01 266#define PGM_OPERATION 0x01
@@ -630,7 +633,6 @@ extern char sie_exit;
630 633
631static inline void kvm_arch_hardware_disable(void) {} 634static inline void kvm_arch_hardware_disable(void) {}
632static inline void kvm_arch_check_processor_compat(void *rtn) {} 635static inline void kvm_arch_check_processor_compat(void *rtn) {}
633static inline void kvm_arch_exit(void) {}
634static inline void kvm_arch_sync_events(struct kvm *kvm) {} 636static inline void kvm_arch_sync_events(struct kvm *kvm) {}
635static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} 637static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
636static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} 638static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 9e733d965e08..627887b075a7 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -58,6 +58,9 @@ EXPORT_SYMBOL_GPL(sched_clock_base_cc);
58 58
59static DEFINE_PER_CPU(struct clock_event_device, comparators); 59static DEFINE_PER_CPU(struct clock_event_device, comparators);
60 60
61ATOMIC_NOTIFIER_HEAD(s390_epoch_delta_notifier);
62EXPORT_SYMBOL(s390_epoch_delta_notifier);
63
61/* 64/*
62 * Scheduler clock - returns current time in nanosec units. 65 * Scheduler clock - returns current time in nanosec units.
63 */ 66 */
@@ -752,7 +755,7 @@ static void clock_sync_cpu(struct clock_sync_data *sync)
752static int etr_sync_clock(void *data) 755static int etr_sync_clock(void *data)
753{ 756{
754 static int first; 757 static int first;
755 unsigned long long clock, old_clock, delay, delta; 758 unsigned long long clock, old_clock, clock_delta, delay, delta;
756 struct clock_sync_data *etr_sync; 759 struct clock_sync_data *etr_sync;
757 struct etr_aib *sync_port, *aib; 760 struct etr_aib *sync_port, *aib;
758 int port; 761 int port;
@@ -789,6 +792,9 @@ static int etr_sync_clock(void *data)
789 delay = (unsigned long long) 792 delay = (unsigned long long)
790 (aib->edf2.etv - sync_port->edf2.etv) << 32; 793 (aib->edf2.etv - sync_port->edf2.etv) << 32;
791 delta = adjust_time(old_clock, clock, delay); 794 delta = adjust_time(old_clock, clock, delay);
795 clock_delta = clock - old_clock;
796 atomic_notifier_call_chain(&s390_epoch_delta_notifier, 0,
797 &clock_delta);
792 etr_sync->fixup_cc = delta; 798 etr_sync->fixup_cc = delta;
793 fixup_clock_comparator(delta); 799 fixup_clock_comparator(delta);
794 /* Verify that the clock is properly set. */ 800 /* Verify that the clock is properly set. */
@@ -1526,7 +1532,7 @@ void stp_island_check(void)
1526static int stp_sync_clock(void *data) 1532static int stp_sync_clock(void *data)
1527{ 1533{
1528 static int first; 1534 static int first;
1529 unsigned long long old_clock, delta; 1535 unsigned long long old_clock, delta, new_clock, clock_delta;
1530 struct clock_sync_data *stp_sync; 1536 struct clock_sync_data *stp_sync;
1531 int rc; 1537 int rc;
1532 1538
@@ -1551,7 +1557,11 @@ static int stp_sync_clock(void *data)
1551 old_clock = get_tod_clock(); 1557 old_clock = get_tod_clock();
1552 rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0); 1558 rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0);
1553 if (rc == 0) { 1559 if (rc == 0) {
1554 delta = adjust_time(old_clock, get_tod_clock(), 0); 1560 new_clock = get_tod_clock();
1561 delta = adjust_time(old_clock, new_clock, 0);
1562 clock_delta = new_clock - old_clock;
1563 atomic_notifier_call_chain(&s390_epoch_delta_notifier,
1564 0, &clock_delta);
1555 fixup_clock_comparator(delta); 1565 fixup_clock_comparator(delta);
1556 rc = chsc_sstpi(stp_page, &stp_info, 1566 rc = chsc_sstpi(stp_page, &stp_info,
1557 sizeof(struct stp_sstpi)); 1567 sizeof(struct stp_sstpi));
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index fc7ec95848c3..5fbfb88f8477 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -27,13 +27,13 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
27 27
28 start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; 28 start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
29 end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096; 29 end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096;
30 vcpu->stat.diagnose_10++;
30 31
31 if (start & ~PAGE_MASK || end & ~PAGE_MASK || start >= end 32 if (start & ~PAGE_MASK || end & ~PAGE_MASK || start >= end
32 || start < 2 * PAGE_SIZE) 33 || start < 2 * PAGE_SIZE)
33 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 34 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
34 35
35 VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end); 36 VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end);
36 vcpu->stat.diagnose_10++;
37 37
38 /* 38 /*
39 * We checked for start >= end above, so lets check for the 39 * We checked for start >= end above, so lets check for the
@@ -75,6 +75,9 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
75 u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4; 75 u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4;
76 u16 ry = (vcpu->arch.sie_block->ipa & 0x0f); 76 u16 ry = (vcpu->arch.sie_block->ipa & 0x0f);
77 77
78 VCPU_EVENT(vcpu, 3, "diag page reference parameter block at 0x%llx",
79 vcpu->run->s.regs.gprs[rx]);
80 vcpu->stat.diagnose_258++;
78 if (vcpu->run->s.regs.gprs[rx] & 7) 81 if (vcpu->run->s.regs.gprs[rx] & 7)
79 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 82 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
80 rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], rx, &parm, sizeof(parm)); 83 rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], rx, &parm, sizeof(parm));
@@ -85,6 +88,9 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
85 88
86 switch (parm.subcode) { 89 switch (parm.subcode) {
87 case 0: /* TOKEN */ 90 case 0: /* TOKEN */
91 VCPU_EVENT(vcpu, 3, "pageref token addr 0x%llx "
92 "select mask 0x%llx compare mask 0x%llx",
93 parm.token_addr, parm.select_mask, parm.compare_mask);
88 if (vcpu->arch.pfault_token != KVM_S390_PFAULT_TOKEN_INVALID) { 94 if (vcpu->arch.pfault_token != KVM_S390_PFAULT_TOKEN_INVALID) {
89 /* 95 /*
90 * If the pagefault handshake is already activated, 96 * If the pagefault handshake is already activated,
@@ -114,6 +120,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
114 * the cancel, therefore to reduce code complexity, we assume 120 * the cancel, therefore to reduce code complexity, we assume
115 * all outstanding tokens are already pending. 121 * all outstanding tokens are already pending.
116 */ 122 */
123 VCPU_EVENT(vcpu, 3, "pageref cancel addr 0x%llx", parm.token_addr);
117 if (parm.token_addr || parm.select_mask || 124 if (parm.token_addr || parm.select_mask ||
118 parm.compare_mask || parm.zarch) 125 parm.compare_mask || parm.zarch)
119 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 126 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -174,7 +181,8 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
174 unsigned int reg = vcpu->arch.sie_block->ipa & 0xf; 181 unsigned int reg = vcpu->arch.sie_block->ipa & 0xf;
175 unsigned long subcode = vcpu->run->s.regs.gprs[reg] & 0xffff; 182 unsigned long subcode = vcpu->run->s.regs.gprs[reg] & 0xffff;
176 183
177 VCPU_EVENT(vcpu, 5, "diag ipl functions, subcode %lx", subcode); 184 VCPU_EVENT(vcpu, 3, "diag ipl functions, subcode %lx", subcode);
185 vcpu->stat.diagnose_308++;
178 switch (subcode) { 186 switch (subcode) {
179 case 3: 187 case 3:
180 vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR; 188 vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR;
@@ -202,6 +210,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
202{ 210{
203 int ret; 211 int ret;
204 212
213 vcpu->stat.diagnose_500++;
205 /* No virtio-ccw notification? Get out quickly. */ 214 /* No virtio-ccw notification? Get out quickly. */
206 if (!vcpu->kvm->arch.css_support || 215 if (!vcpu->kvm->arch.css_support ||
207 (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY)) 216 (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY))
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
index e97b3455d7e6..47518a324d75 100644
--- a/arch/s390/kvm/guestdbg.c
+++ b/arch/s390/kvm/guestdbg.c
@@ -473,10 +473,45 @@ static void filter_guest_per_event(struct kvm_vcpu *vcpu)
473 vcpu->arch.sie_block->iprcc &= ~PGM_PER; 473 vcpu->arch.sie_block->iprcc &= ~PGM_PER;
474} 474}
475 475
476#define pssec(vcpu) (vcpu->arch.sie_block->gcr[1] & _ASCE_SPACE_SWITCH)
477#define hssec(vcpu) (vcpu->arch.sie_block->gcr[13] & _ASCE_SPACE_SWITCH)
478#define old_ssec(vcpu) ((vcpu->arch.sie_block->tecmc >> 31) & 0x1)
479#define old_as_is_home(vcpu) !(vcpu->arch.sie_block->tecmc & 0xffff)
480
476void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu) 481void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu)
477{ 482{
483 int new_as;
484
478 if (debug_exit_required(vcpu)) 485 if (debug_exit_required(vcpu))
479 vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING; 486 vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING;
480 487
481 filter_guest_per_event(vcpu); 488 filter_guest_per_event(vcpu);
489
490 /*
491 * Only RP, SAC, SACF, PT, PTI, PR, PC instructions can trigger
492 * a space-switch event. PER events enforce space-switch events
493 * for these instructions. So if no PER event for the guest is left,
494 * we might have to filter the space-switch element out, too.
495 */
496 if (vcpu->arch.sie_block->iprcc == PGM_SPACE_SWITCH) {
497 vcpu->arch.sie_block->iprcc = 0;
498 new_as = psw_bits(vcpu->arch.sie_block->gpsw).as;
499
500 /*
501 * If the AS changed from / to home, we had RP, SAC or SACF
502 * instruction. Check primary and home space-switch-event
503 * controls. (theoretically home -> home produced no event)
504 */
505 if (((new_as == PSW_AS_HOME) ^ old_as_is_home(vcpu)) &&
506 (pssec(vcpu) || hssec(vcpu)))
507 vcpu->arch.sie_block->iprcc = PGM_SPACE_SWITCH;
508
509 /*
510 * PT, PTI, PR, PC instruction operate on primary AS only. Check
511 * if the primary-space-switch-event control was or got set.
512 */
513 if (new_as == PSW_AS_PRIMARY && !old_as_is_home(vcpu) &&
514 (pssec(vcpu) || old_ssec(vcpu)))
515 vcpu->arch.sie_block->iprcc = PGM_SPACE_SWITCH;
516 }
482} 517}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index c98d89708e99..b277d50dcf76 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -30,7 +30,6 @@
30#define IOINT_SCHID_MASK 0x0000ffff 30#define IOINT_SCHID_MASK 0x0000ffff
31#define IOINT_SSID_MASK 0x00030000 31#define IOINT_SSID_MASK 0x00030000
32#define IOINT_CSSID_MASK 0x03fc0000 32#define IOINT_CSSID_MASK 0x03fc0000
33#define IOINT_AI_MASK 0x04000000
34#define PFAULT_INIT 0x0600 33#define PFAULT_INIT 0x0600
35#define PFAULT_DONE 0x0680 34#define PFAULT_DONE 0x0680
36#define VIRTIO_PARAM 0x0d00 35#define VIRTIO_PARAM 0x0d00
@@ -72,9 +71,13 @@ static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu)
72 71
73static int ckc_irq_pending(struct kvm_vcpu *vcpu) 72static int ckc_irq_pending(struct kvm_vcpu *vcpu)
74{ 73{
74 preempt_disable();
75 if (!(vcpu->arch.sie_block->ckc < 75 if (!(vcpu->arch.sie_block->ckc <
76 get_tod_clock_fast() + vcpu->arch.sie_block->epoch)) 76 get_tod_clock_fast() + vcpu->arch.sie_block->epoch)) {
77 preempt_enable();
77 return 0; 78 return 0;
79 }
80 preempt_enable();
78 return ckc_interrupts_enabled(vcpu); 81 return ckc_interrupts_enabled(vcpu);
79} 82}
80 83
@@ -311,8 +314,8 @@ static int __must_check __deliver_pfault_init(struct kvm_vcpu *vcpu)
311 li->irq.ext.ext_params2 = 0; 314 li->irq.ext.ext_params2 = 0;
312 spin_unlock(&li->lock); 315 spin_unlock(&li->lock);
313 316
314 VCPU_EVENT(vcpu, 4, "interrupt: pfault init parm:%x,parm64:%llx", 317 VCPU_EVENT(vcpu, 4, "deliver: pfault init token 0x%llx",
315 0, ext.ext_params2); 318 ext.ext_params2);
316 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, 319 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
317 KVM_S390_INT_PFAULT_INIT, 320 KVM_S390_INT_PFAULT_INIT,
318 0, ext.ext_params2); 321 0, ext.ext_params2);
@@ -368,7 +371,7 @@ static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu)
368 spin_unlock(&fi->lock); 371 spin_unlock(&fi->lock);
369 372
370 if (deliver) { 373 if (deliver) {
371 VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx", 374 VCPU_EVENT(vcpu, 3, "deliver: machine check mcic 0x%llx",
372 mchk.mcic); 375 mchk.mcic);
373 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, 376 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
374 KVM_S390_MCHK, 377 KVM_S390_MCHK,
@@ -403,7 +406,7 @@ static int __must_check __deliver_restart(struct kvm_vcpu *vcpu)
403 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; 406 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
404 int rc; 407 int rc;
405 408
406 VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart"); 409 VCPU_EVENT(vcpu, 3, "%s", "deliver: cpu restart");
407 vcpu->stat.deliver_restart_signal++; 410 vcpu->stat.deliver_restart_signal++;
408 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0); 411 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0);
409 412
@@ -427,7 +430,6 @@ static int __must_check __deliver_set_prefix(struct kvm_vcpu *vcpu)
427 clear_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs); 430 clear_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs);
428 spin_unlock(&li->lock); 431 spin_unlock(&li->lock);
429 432
430 VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x", prefix.address);
431 vcpu->stat.deliver_prefix_signal++; 433 vcpu->stat.deliver_prefix_signal++;
432 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, 434 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
433 KVM_S390_SIGP_SET_PREFIX, 435 KVM_S390_SIGP_SET_PREFIX,
@@ -450,7 +452,7 @@ static int __must_check __deliver_emergency_signal(struct kvm_vcpu *vcpu)
450 clear_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs); 452 clear_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
451 spin_unlock(&li->lock); 453 spin_unlock(&li->lock);
452 454
453 VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg"); 455 VCPU_EVENT(vcpu, 4, "%s", "deliver: sigp emerg");
454 vcpu->stat.deliver_emergency_signal++; 456 vcpu->stat.deliver_emergency_signal++;
455 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY, 457 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
456 cpu_addr, 0); 458 cpu_addr, 0);
@@ -477,7 +479,7 @@ static int __must_check __deliver_external_call(struct kvm_vcpu *vcpu)
477 clear_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs); 479 clear_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
478 spin_unlock(&li->lock); 480 spin_unlock(&li->lock);
479 481
480 VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call"); 482 VCPU_EVENT(vcpu, 4, "%s", "deliver: sigp ext call");
481 vcpu->stat.deliver_external_call++; 483 vcpu->stat.deliver_external_call++;
482 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, 484 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
483 KVM_S390_INT_EXTERNAL_CALL, 485 KVM_S390_INT_EXTERNAL_CALL,
@@ -506,7 +508,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
506 memset(&li->irq.pgm, 0, sizeof(pgm_info)); 508 memset(&li->irq.pgm, 0, sizeof(pgm_info));
507 spin_unlock(&li->lock); 509 spin_unlock(&li->lock);
508 510
509 VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x", 511 VCPU_EVENT(vcpu, 3, "deliver: program irq code 0x%x, ilc:%d",
510 pgm_info.code, ilc); 512 pgm_info.code, ilc);
511 vcpu->stat.deliver_program_int++; 513 vcpu->stat.deliver_program_int++;
512 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, 514 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
@@ -622,7 +624,7 @@ static int __must_check __deliver_service(struct kvm_vcpu *vcpu)
622 clear_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs); 624 clear_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs);
623 spin_unlock(&fi->lock); 625 spin_unlock(&fi->lock);
624 626
625 VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x", 627 VCPU_EVENT(vcpu, 4, "deliver: sclp parameter 0x%x",
626 ext.ext_params); 628 ext.ext_params);
627 vcpu->stat.deliver_service_signal++; 629 vcpu->stat.deliver_service_signal++;
628 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_SERVICE, 630 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_SERVICE,
@@ -651,9 +653,6 @@ static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu)
651 struct kvm_s390_interrupt_info, 653 struct kvm_s390_interrupt_info,
652 list); 654 list);
653 if (inti) { 655 if (inti) {
654 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
655 KVM_S390_INT_PFAULT_DONE, 0,
656 inti->ext.ext_params2);
657 list_del(&inti->list); 656 list_del(&inti->list);
658 fi->counters[FIRQ_CNTR_PFAULT] -= 1; 657 fi->counters[FIRQ_CNTR_PFAULT] -= 1;
659 } 658 }
@@ -662,6 +661,12 @@ static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu)
662 spin_unlock(&fi->lock); 661 spin_unlock(&fi->lock);
663 662
664 if (inti) { 663 if (inti) {
664 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
665 KVM_S390_INT_PFAULT_DONE, 0,
666 inti->ext.ext_params2);
667 VCPU_EVENT(vcpu, 4, "deliver: pfault done token 0x%llx",
668 inti->ext.ext_params2);
669
665 rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, 670 rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE,
666 (u16 *)__LC_EXT_INT_CODE); 671 (u16 *)__LC_EXT_INT_CODE);
667 rc |= put_guest_lc(vcpu, PFAULT_DONE, 672 rc |= put_guest_lc(vcpu, PFAULT_DONE,
@@ -691,7 +696,7 @@ static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu)
691 list); 696 list);
692 if (inti) { 697 if (inti) {
693 VCPU_EVENT(vcpu, 4, 698 VCPU_EVENT(vcpu, 4,
694 "interrupt: virtio parm:%x,parm64:%llx", 699 "deliver: virtio parm: 0x%x,parm64: 0x%llx",
695 inti->ext.ext_params, inti->ext.ext_params2); 700 inti->ext.ext_params, inti->ext.ext_params2);
696 vcpu->stat.deliver_virtio_interrupt++; 701 vcpu->stat.deliver_virtio_interrupt++;
697 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, 702 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
@@ -741,7 +746,7 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
741 struct kvm_s390_interrupt_info, 746 struct kvm_s390_interrupt_info,
742 list); 747 list);
743 if (inti) { 748 if (inti) {
744 VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type); 749 VCPU_EVENT(vcpu, 4, "deliver: I/O 0x%llx", inti->type);
745 vcpu->stat.deliver_io_int++; 750 vcpu->stat.deliver_io_int++;
746 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, 751 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
747 inti->type, 752 inti->type,
@@ -855,7 +860,9 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
855 goto no_timer; 860 goto no_timer;
856 } 861 }
857 862
863 preempt_disable();
858 now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch; 864 now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch;
865 preempt_enable();
859 sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); 866 sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
860 867
861 /* underflow */ 868 /* underflow */
@@ -864,7 +871,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
864 871
865 __set_cpu_idle(vcpu); 872 __set_cpu_idle(vcpu);
866 hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL); 873 hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL);
867 VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime); 874 VCPU_EVENT(vcpu, 4, "enabled wait via clock comparator: %llu ns", sltime);
868no_timer: 875no_timer:
869 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 876 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
870 kvm_vcpu_block(vcpu); 877 kvm_vcpu_block(vcpu);
@@ -894,7 +901,9 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
894 u64 now, sltime; 901 u64 now, sltime;
895 902
896 vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer); 903 vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer);
904 preempt_disable();
897 now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch; 905 now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch;
906 preempt_enable();
898 sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); 907 sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
899 908
900 /* 909 /*
@@ -968,6 +977,10 @@ static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
968{ 977{
969 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; 978 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
970 979
980 VCPU_EVENT(vcpu, 3, "inject: program irq code 0x%x", irq->u.pgm.code);
981 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
982 irq->u.pgm.code, 0);
983
971 li->irq.pgm = irq->u.pgm; 984 li->irq.pgm = irq->u.pgm;
972 set_bit(IRQ_PEND_PROG, &li->pending_irqs); 985 set_bit(IRQ_PEND_PROG, &li->pending_irqs);
973 return 0; 986 return 0;
@@ -978,9 +991,6 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
978 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; 991 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
979 struct kvm_s390_irq irq; 992 struct kvm_s390_irq irq;
980 993
981 VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code);
982 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, code,
983 0, 1);
984 spin_lock(&li->lock); 994 spin_lock(&li->lock);
985 irq.u.pgm.code = code; 995 irq.u.pgm.code = code;
986 __inject_prog(vcpu, &irq); 996 __inject_prog(vcpu, &irq);
@@ -996,10 +1006,6 @@ int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
996 struct kvm_s390_irq irq; 1006 struct kvm_s390_irq irq;
997 int rc; 1007 int rc;
998 1008
999 VCPU_EVENT(vcpu, 3, "inject: prog irq %d (from kernel)",
1000 pgm_info->code);
1001 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
1002 pgm_info->code, 0, 1);
1003 spin_lock(&li->lock); 1009 spin_lock(&li->lock);
1004 irq.u.pgm = *pgm_info; 1010 irq.u.pgm = *pgm_info;
1005 rc = __inject_prog(vcpu, &irq); 1011 rc = __inject_prog(vcpu, &irq);
@@ -1012,11 +1018,11 @@ static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
1012{ 1018{
1013 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; 1019 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
1014 1020
1015 VCPU_EVENT(vcpu, 3, "inject: external irq params:%x, params2:%llx", 1021 VCPU_EVENT(vcpu, 4, "inject: pfault init parameter block at 0x%llx",
1016 irq->u.ext.ext_params, irq->u.ext.ext_params2); 1022 irq->u.ext.ext_params2);
1017 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_PFAULT_INIT, 1023 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_PFAULT_INIT,
1018 irq->u.ext.ext_params, 1024 irq->u.ext.ext_params,
1019 irq->u.ext.ext_params2, 2); 1025 irq->u.ext.ext_params2);
1020 1026
1021 li->irq.ext = irq->u.ext; 1027 li->irq.ext = irq->u.ext;
1022 set_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs); 1028 set_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs);
@@ -1045,10 +1051,10 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
1045 struct kvm_s390_extcall_info *extcall = &li->irq.extcall; 1051 struct kvm_s390_extcall_info *extcall = &li->irq.extcall;
1046 uint16_t src_id = irq->u.extcall.code; 1052 uint16_t src_id = irq->u.extcall.code;
1047 1053
1048 VCPU_EVENT(vcpu, 3, "inject: external call source-cpu:%u", 1054 VCPU_EVENT(vcpu, 4, "inject: external call source-cpu:%u",
1049 src_id); 1055 src_id);
1050 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EXTERNAL_CALL, 1056 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EXTERNAL_CALL,
1051 src_id, 0, 2); 1057 src_id, 0);
1052 1058
1053 /* sending vcpu invalid */ 1059 /* sending vcpu invalid */
1054 if (src_id >= KVM_MAX_VCPUS || 1060 if (src_id >= KVM_MAX_VCPUS ||
@@ -1070,10 +1076,10 @@ static int __inject_set_prefix(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
1070 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; 1076 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
1071 struct kvm_s390_prefix_info *prefix = &li->irq.prefix; 1077 struct kvm_s390_prefix_info *prefix = &li->irq.prefix;
1072 1078
1073 VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)", 1079 VCPU_EVENT(vcpu, 3, "inject: set prefix to %x",
1074 irq->u.prefix.address); 1080 irq->u.prefix.address);
1075 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_SET_PREFIX, 1081 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_SET_PREFIX,
1076 irq->u.prefix.address, 0, 2); 1082 irq->u.prefix.address, 0);
1077 1083
1078 if (!is_vcpu_stopped(vcpu)) 1084 if (!is_vcpu_stopped(vcpu))
1079 return -EBUSY; 1085 return -EBUSY;
@@ -1090,7 +1096,7 @@ static int __inject_sigp_stop(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
1090 struct kvm_s390_stop_info *stop = &li->irq.stop; 1096 struct kvm_s390_stop_info *stop = &li->irq.stop;
1091 int rc = 0; 1097 int rc = 0;
1092 1098
1093 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_STOP, 0, 0, 2); 1099 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_STOP, 0, 0);
1094 1100
1095 if (irq->u.stop.flags & ~KVM_S390_STOP_SUPP_FLAGS) 1101 if (irq->u.stop.flags & ~KVM_S390_STOP_SUPP_FLAGS)
1096 return -EINVAL; 1102 return -EINVAL;
@@ -1114,8 +1120,8 @@ static int __inject_sigp_restart(struct kvm_vcpu *vcpu,
1114{ 1120{
1115 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; 1121 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
1116 1122
1117 VCPU_EVENT(vcpu, 3, "inject: restart type %llx", irq->type); 1123 VCPU_EVENT(vcpu, 3, "%s", "inject: restart int");
1118 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0, 2); 1124 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0);
1119 1125
1120 set_bit(IRQ_PEND_RESTART, &li->pending_irqs); 1126 set_bit(IRQ_PEND_RESTART, &li->pending_irqs);
1121 return 0; 1127 return 0;
@@ -1126,10 +1132,10 @@ static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
1126{ 1132{
1127 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; 1133 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
1128 1134
1129 VCPU_EVENT(vcpu, 3, "inject: emergency %u\n", 1135 VCPU_EVENT(vcpu, 4, "inject: emergency from cpu %u",
1130 irq->u.emerg.code); 1136 irq->u.emerg.code);
1131 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY, 1137 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
1132 irq->u.emerg.code, 0, 2); 1138 irq->u.emerg.code, 0);
1133 1139
1134 set_bit(irq->u.emerg.code, li->sigp_emerg_pending); 1140 set_bit(irq->u.emerg.code, li->sigp_emerg_pending);
1135 set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs); 1141 set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
@@ -1142,10 +1148,10 @@ static int __inject_mchk(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
1142 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; 1148 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
1143 struct kvm_s390_mchk_info *mchk = &li->irq.mchk; 1149 struct kvm_s390_mchk_info *mchk = &li->irq.mchk;
1144 1150
1145 VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx", 1151 VCPU_EVENT(vcpu, 3, "inject: machine check mcic 0x%llx",
1146 irq->u.mchk.mcic); 1152 irq->u.mchk.mcic);
1147 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_MCHK, 0, 1153 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_MCHK, 0,
1148 irq->u.mchk.mcic, 2); 1154 irq->u.mchk.mcic);
1149 1155
1150 /* 1156 /*
1151 * Because repressible machine checks can be indicated along with 1157 * Because repressible machine checks can be indicated along with
@@ -1172,9 +1178,9 @@ static int __inject_ckc(struct kvm_vcpu *vcpu)
1172{ 1178{
1173 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; 1179 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
1174 1180
1175 VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CLOCK_COMP); 1181 VCPU_EVENT(vcpu, 3, "%s", "inject: clock comparator external");
1176 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP, 1182 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP,
1177 0, 0, 2); 1183 0, 0);
1178 1184
1179 set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs); 1185 set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
1180 atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); 1186 atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
@@ -1185,9 +1191,9 @@ static int __inject_cpu_timer(struct kvm_vcpu *vcpu)
1185{ 1191{
1186 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; 1192 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
1187 1193
1188 VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CPU_TIMER); 1194 VCPU_EVENT(vcpu, 3, "%s", "inject: cpu timer external");
1189 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER, 1195 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER,
1190 0, 0, 2); 1196 0, 0);
1191 1197
1192 set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs); 1198 set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
1193 atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); 1199 atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
@@ -1435,20 +1441,20 @@ int kvm_s390_inject_vm(struct kvm *kvm,
1435 inti->ext.ext_params2 = s390int->parm64; 1441 inti->ext.ext_params2 = s390int->parm64;
1436 break; 1442 break;
1437 case KVM_S390_INT_SERVICE: 1443 case KVM_S390_INT_SERVICE:
1438 VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm); 1444 VM_EVENT(kvm, 4, "inject: sclp parm:%x", s390int->parm);
1439 inti->ext.ext_params = s390int->parm; 1445 inti->ext.ext_params = s390int->parm;
1440 break; 1446 break;
1441 case KVM_S390_INT_PFAULT_DONE: 1447 case KVM_S390_INT_PFAULT_DONE:
1442 inti->ext.ext_params2 = s390int->parm64; 1448 inti->ext.ext_params2 = s390int->parm64;
1443 break; 1449 break;
1444 case KVM_S390_MCHK: 1450 case KVM_S390_MCHK:
1445 VM_EVENT(kvm, 5, "inject: machine check parm64:%llx", 1451 VM_EVENT(kvm, 3, "inject: machine check mcic 0x%llx",
1446 s390int->parm64); 1452 s390int->parm64);
1447 inti->mchk.cr14 = s390int->parm; /* upper bits are not used */ 1453 inti->mchk.cr14 = s390int->parm; /* upper bits are not used */
1448 inti->mchk.mcic = s390int->parm64; 1454 inti->mchk.mcic = s390int->parm64;
1449 break; 1455 break;
1450 case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: 1456 case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
1451 if (inti->type & IOINT_AI_MASK) 1457 if (inti->type & KVM_S390_INT_IO_AI_MASK)
1452 VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)"); 1458 VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)");
1453 else 1459 else
1454 VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x", 1460 VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x",
@@ -1535,8 +1541,6 @@ static int do_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
1535 1541
1536 switch (irq->type) { 1542 switch (irq->type) {
1537 case KVM_S390_PROGRAM_INT: 1543 case KVM_S390_PROGRAM_INT:
1538 VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)",
1539 irq->u.pgm.code);
1540 rc = __inject_prog(vcpu, irq); 1544 rc = __inject_prog(vcpu, irq);
1541 break; 1545 break;
1542 case KVM_S390_SIGP_SET_PREFIX: 1546 case KVM_S390_SIGP_SET_PREFIX:
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index f32f843a3631..6861b74649ae 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -28,6 +28,7 @@
28#include <linux/vmalloc.h> 28#include <linux/vmalloc.h>
29#include <asm/asm-offsets.h> 29#include <asm/asm-offsets.h>
30#include <asm/lowcore.h> 30#include <asm/lowcore.h>
31#include <asm/etr.h>
31#include <asm/pgtable.h> 32#include <asm/pgtable.h>
32#include <asm/nmi.h> 33#include <asm/nmi.h>
33#include <asm/switch_to.h> 34#include <asm/switch_to.h>
@@ -108,6 +109,9 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
108 { "diagnose_10", VCPU_STAT(diagnose_10) }, 109 { "diagnose_10", VCPU_STAT(diagnose_10) },
109 { "diagnose_44", VCPU_STAT(diagnose_44) }, 110 { "diagnose_44", VCPU_STAT(diagnose_44) },
110 { "diagnose_9c", VCPU_STAT(diagnose_9c) }, 111 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
112 { "diagnose_258", VCPU_STAT(diagnose_258) },
113 { "diagnose_308", VCPU_STAT(diagnose_308) },
114 { "diagnose_500", VCPU_STAT(diagnose_500) },
111 { NULL } 115 { NULL }
112}; 116};
113 117
@@ -124,6 +128,7 @@ unsigned long kvm_s390_fac_list_mask_size(void)
124} 128}
125 129
126static struct gmap_notifier gmap_notifier; 130static struct gmap_notifier gmap_notifier;
131debug_info_t *kvm_s390_dbf;
127 132
128/* Section: not file related */ 133/* Section: not file related */
129int kvm_arch_hardware_enable(void) 134int kvm_arch_hardware_enable(void)
@@ -134,24 +139,69 @@ int kvm_arch_hardware_enable(void)
134 139
135static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address); 140static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
136 141
142/*
143 * This callback is executed during stop_machine(). All CPUs are therefore
144 * temporarily stopped. In order not to change guest behavior, we have to
145 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
146 * so a CPU won't be stopped while calculating with the epoch.
147 */
148static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
149 void *v)
150{
151 struct kvm *kvm;
152 struct kvm_vcpu *vcpu;
153 int i;
154 unsigned long long *delta = v;
155
156 list_for_each_entry(kvm, &vm_list, vm_list) {
157 kvm->arch.epoch -= *delta;
158 kvm_for_each_vcpu(i, vcpu, kvm) {
159 vcpu->arch.sie_block->epoch -= *delta;
160 }
161 }
162 return NOTIFY_OK;
163}
164
165static struct notifier_block kvm_clock_notifier = {
166 .notifier_call = kvm_clock_sync,
167};
168
137int kvm_arch_hardware_setup(void) 169int kvm_arch_hardware_setup(void)
138{ 170{
139 gmap_notifier.notifier_call = kvm_gmap_notifier; 171 gmap_notifier.notifier_call = kvm_gmap_notifier;
140 gmap_register_ipte_notifier(&gmap_notifier); 172 gmap_register_ipte_notifier(&gmap_notifier);
173 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
174 &kvm_clock_notifier);
141 return 0; 175 return 0;
142} 176}
143 177
144void kvm_arch_hardware_unsetup(void) 178void kvm_arch_hardware_unsetup(void)
145{ 179{
146 gmap_unregister_ipte_notifier(&gmap_notifier); 180 gmap_unregister_ipte_notifier(&gmap_notifier);
181 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
182 &kvm_clock_notifier);
147} 183}
148 184
149int kvm_arch_init(void *opaque) 185int kvm_arch_init(void *opaque)
150{ 186{
187 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
188 if (!kvm_s390_dbf)
189 return -ENOMEM;
190
191 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
192 debug_unregister(kvm_s390_dbf);
193 return -ENOMEM;
194 }
195
151 /* Register floating interrupt controller interface. */ 196 /* Register floating interrupt controller interface. */
152 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); 197 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
153} 198}
154 199
200void kvm_arch_exit(void)
201{
202 debug_unregister(kvm_s390_dbf);
203}
204
155/* Section: device related */ 205/* Section: device related */
156long kvm_arch_dev_ioctl(struct file *filp, 206long kvm_arch_dev_ioctl(struct file *filp,
157 unsigned int ioctl, unsigned long arg) 207 unsigned int ioctl, unsigned long arg)
@@ -281,10 +331,12 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
281 331
282 switch (cap->cap) { 332 switch (cap->cap) {
283 case KVM_CAP_S390_IRQCHIP: 333 case KVM_CAP_S390_IRQCHIP:
334 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
284 kvm->arch.use_irqchip = 1; 335 kvm->arch.use_irqchip = 1;
285 r = 0; 336 r = 0;
286 break; 337 break;
287 case KVM_CAP_S390_USER_SIGP: 338 case KVM_CAP_S390_USER_SIGP:
339 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
288 kvm->arch.user_sigp = 1; 340 kvm->arch.user_sigp = 1;
289 r = 0; 341 r = 0;
290 break; 342 break;
@@ -295,8 +347,11 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
295 r = 0; 347 r = 0;
296 } else 348 } else
297 r = -EINVAL; 349 r = -EINVAL;
350 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
351 r ? "(not available)" : "(success)");
298 break; 352 break;
299 case KVM_CAP_S390_USER_STSI: 353 case KVM_CAP_S390_USER_STSI:
354 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
300 kvm->arch.user_stsi = 1; 355 kvm->arch.user_stsi = 1;
301 r = 0; 356 r = 0;
302 break; 357 break;
@@ -314,6 +369,8 @@ static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *att
314 switch (attr->attr) { 369 switch (attr->attr) {
315 case KVM_S390_VM_MEM_LIMIT_SIZE: 370 case KVM_S390_VM_MEM_LIMIT_SIZE:
316 ret = 0; 371 ret = 0;
372 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
373 kvm->arch.gmap->asce_end);
317 if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr)) 374 if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
318 ret = -EFAULT; 375 ret = -EFAULT;
319 break; 376 break;
@@ -330,7 +387,13 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
330 unsigned int idx; 387 unsigned int idx;
331 switch (attr->attr) { 388 switch (attr->attr) {
332 case KVM_S390_VM_MEM_ENABLE_CMMA: 389 case KVM_S390_VM_MEM_ENABLE_CMMA:
390 /* enable CMMA only for z10 and later (EDAT_1) */
391 ret = -EINVAL;
392 if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
393 break;
394
333 ret = -EBUSY; 395 ret = -EBUSY;
396 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
334 mutex_lock(&kvm->lock); 397 mutex_lock(&kvm->lock);
335 if (atomic_read(&kvm->online_vcpus) == 0) { 398 if (atomic_read(&kvm->online_vcpus) == 0) {
336 kvm->arch.use_cmma = 1; 399 kvm->arch.use_cmma = 1;
@@ -339,6 +402,11 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
339 mutex_unlock(&kvm->lock); 402 mutex_unlock(&kvm->lock);
340 break; 403 break;
341 case KVM_S390_VM_MEM_CLR_CMMA: 404 case KVM_S390_VM_MEM_CLR_CMMA:
405 ret = -EINVAL;
406 if (!kvm->arch.use_cmma)
407 break;
408
409 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
342 mutex_lock(&kvm->lock); 410 mutex_lock(&kvm->lock);
343 idx = srcu_read_lock(&kvm->srcu); 411 idx = srcu_read_lock(&kvm->srcu);
344 s390_reset_cmma(kvm->arch.gmap->mm); 412 s390_reset_cmma(kvm->arch.gmap->mm);
@@ -374,6 +442,7 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
374 } 442 }
375 } 443 }
376 mutex_unlock(&kvm->lock); 444 mutex_unlock(&kvm->lock);
445 VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit);
377 break; 446 break;
378 } 447 }
379 default: 448 default:
@@ -400,22 +469,26 @@ static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
400 kvm->arch.crypto.crycb->aes_wrapping_key_mask, 469 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
401 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 470 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
402 kvm->arch.crypto.aes_kw = 1; 471 kvm->arch.crypto.aes_kw = 1;
472 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
403 break; 473 break;
404 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 474 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
405 get_random_bytes( 475 get_random_bytes(
406 kvm->arch.crypto.crycb->dea_wrapping_key_mask, 476 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
407 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 477 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
408 kvm->arch.crypto.dea_kw = 1; 478 kvm->arch.crypto.dea_kw = 1;
479 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
409 break; 480 break;
410 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 481 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
411 kvm->arch.crypto.aes_kw = 0; 482 kvm->arch.crypto.aes_kw = 0;
412 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0, 483 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
413 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 484 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
485 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
414 break; 486 break;
415 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 487 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
416 kvm->arch.crypto.dea_kw = 0; 488 kvm->arch.crypto.dea_kw = 0;
417 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0, 489 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
418 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 490 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
491 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
419 break; 492 break;
420 default: 493 default:
421 mutex_unlock(&kvm->lock); 494 mutex_unlock(&kvm->lock);
@@ -440,6 +513,7 @@ static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
440 513
441 if (gtod_high != 0) 514 if (gtod_high != 0)
442 return -EINVAL; 515 return -EINVAL;
516 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x\n", gtod_high);
443 517
444 return 0; 518 return 0;
445} 519}
@@ -459,12 +533,15 @@ static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
459 return r; 533 return r;
460 534
461 mutex_lock(&kvm->lock); 535 mutex_lock(&kvm->lock);
536 preempt_disable();
462 kvm->arch.epoch = gtod - host_tod; 537 kvm->arch.epoch = gtod - host_tod;
463 kvm_s390_vcpu_block_all(kvm); 538 kvm_s390_vcpu_block_all(kvm);
464 kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm) 539 kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm)
465 cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch; 540 cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch;
466 kvm_s390_vcpu_unblock_all(kvm); 541 kvm_s390_vcpu_unblock_all(kvm);
542 preempt_enable();
467 mutex_unlock(&kvm->lock); 543 mutex_unlock(&kvm->lock);
544 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx\n", gtod);
468 return 0; 545 return 0;
469} 546}
470 547
@@ -496,6 +573,7 @@ static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
496 if (copy_to_user((void __user *)attr->addr, &gtod_high, 573 if (copy_to_user((void __user *)attr->addr, &gtod_high,
497 sizeof(gtod_high))) 574 sizeof(gtod_high)))
498 return -EFAULT; 575 return -EFAULT;
576 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x\n", gtod_high);
499 577
500 return 0; 578 return 0;
501} 579}
@@ -509,9 +587,12 @@ static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
509 if (r) 587 if (r)
510 return r; 588 return r;
511 589
590 preempt_disable();
512 gtod = host_tod + kvm->arch.epoch; 591 gtod = host_tod + kvm->arch.epoch;
592 preempt_enable();
513 if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod))) 593 if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
514 return -EFAULT; 594 return -EFAULT;
595 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx\n", gtod);
515 596
516 return 0; 597 return 0;
517} 598}
@@ -821,7 +902,9 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
821 } 902 }
822 903
823 /* Enable storage key handling for the guest */ 904 /* Enable storage key handling for the guest */
824 s390_enable_skey(); 905 r = s390_enable_skey();
906 if (r)
907 goto out;
825 908
826 for (i = 0; i < args->count; i++) { 909 for (i = 0; i < args->count; i++) {
827 hva = gfn_to_hva(kvm, args->start_gfn + i); 910 hva = gfn_to_hva(kvm, args->start_gfn + i);
@@ -879,8 +962,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
879 if (kvm->arch.use_irqchip) { 962 if (kvm->arch.use_irqchip) {
880 /* Set up dummy routing. */ 963 /* Set up dummy routing. */
881 memset(&routing, 0, sizeof(routing)); 964 memset(&routing, 0, sizeof(routing));
882 kvm_set_irq_routing(kvm, &routing, 0, 0); 965 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
883 r = 0;
884 } 966 }
885 break; 967 break;
886 } 968 }
@@ -1043,7 +1125,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1043 1125
1044 sprintf(debug_name, "kvm-%u", current->pid); 1126 sprintf(debug_name, "kvm-%u", current->pid);
1045 1127
1046 kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long)); 1128 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1047 if (!kvm->arch.dbf) 1129 if (!kvm->arch.dbf)
1048 goto out_err; 1130 goto out_err;
1049 1131
@@ -1086,7 +1168,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1086 mutex_init(&kvm->arch.ipte_mutex); 1168 mutex_init(&kvm->arch.ipte_mutex);
1087 1169
1088 debug_register_view(kvm->arch.dbf, &debug_sprintf_view); 1170 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1089 VM_EVENT(kvm, 3, "%s", "vm created"); 1171 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1090 1172
1091 if (type & KVM_VM_S390_UCONTROL) { 1173 if (type & KVM_VM_S390_UCONTROL) {
1092 kvm->arch.gmap = NULL; 1174 kvm->arch.gmap = NULL;
@@ -1103,6 +1185,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1103 kvm->arch.epoch = 0; 1185 kvm->arch.epoch = 0;
1104 1186
1105 spin_lock_init(&kvm->arch.start_stop_lock); 1187 spin_lock_init(&kvm->arch.start_stop_lock);
1188 KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
1106 1189
1107 return 0; 1190 return 0;
1108out_err: 1191out_err:
@@ -1110,6 +1193,7 @@ out_err:
1110 free_page((unsigned long)kvm->arch.model.fac); 1193 free_page((unsigned long)kvm->arch.model.fac);
1111 debug_unregister(kvm->arch.dbf); 1194 debug_unregister(kvm->arch.dbf);
1112 free_page((unsigned long)(kvm->arch.sca)); 1195 free_page((unsigned long)(kvm->arch.sca));
1196 KVM_EVENT(3, "creation of vm failed: %d", rc);
1113 return rc; 1197 return rc;
1114} 1198}
1115 1199
@@ -1131,7 +1215,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1131 if (kvm_is_ucontrol(vcpu->kvm)) 1215 if (kvm_is_ucontrol(vcpu->kvm))
1132 gmap_free(vcpu->arch.gmap); 1216 gmap_free(vcpu->arch.gmap);
1133 1217
1134 if (kvm_s390_cmma_enabled(vcpu->kvm)) 1218 if (vcpu->kvm->arch.use_cmma)
1135 kvm_s390_vcpu_unsetup_cmma(vcpu); 1219 kvm_s390_vcpu_unsetup_cmma(vcpu);
1136 free_page((unsigned long)(vcpu->arch.sie_block)); 1220 free_page((unsigned long)(vcpu->arch.sie_block));
1137 1221
@@ -1166,6 +1250,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
1166 gmap_free(kvm->arch.gmap); 1250 gmap_free(kvm->arch.gmap);
1167 kvm_s390_destroy_adapters(kvm); 1251 kvm_s390_destroy_adapters(kvm);
1168 kvm_s390_clear_float_irqs(kvm); 1252 kvm_s390_clear_float_irqs(kvm);
1253 KVM_EVENT(3, "vm 0x%p destroyed", kvm);
1169} 1254}
1170 1255
1171/* Section: vcpu related */ 1256/* Section: vcpu related */
@@ -1264,7 +1349,9 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1264void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) 1349void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1265{ 1350{
1266 mutex_lock(&vcpu->kvm->lock); 1351 mutex_lock(&vcpu->kvm->lock);
1352 preempt_disable();
1267 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; 1353 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1354 preempt_enable();
1268 mutex_unlock(&vcpu->kvm->lock); 1355 mutex_unlock(&vcpu->kvm->lock);
1269 if (!kvm_is_ucontrol(vcpu->kvm)) 1356 if (!kvm_is_ucontrol(vcpu->kvm))
1270 vcpu->arch.gmap = vcpu->kvm->arch.gmap; 1357 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
@@ -1342,7 +1429,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1342 } 1429 }
1343 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; 1430 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1344 1431
1345 if (kvm_s390_cmma_enabled(vcpu->kvm)) { 1432 if (vcpu->kvm->arch.use_cmma) {
1346 rc = kvm_s390_vcpu_setup_cmma(vcpu); 1433 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1347 if (rc) 1434 if (rc)
1348 return rc; 1435 return rc;
@@ -1723,18 +1810,6 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1723 return rc; 1810 return rc;
1724} 1811}
1725 1812
1726bool kvm_s390_cmma_enabled(struct kvm *kvm)
1727{
1728 if (!MACHINE_IS_LPAR)
1729 return false;
1730 /* only enable for z10 and later */
1731 if (!MACHINE_HAS_EDAT1)
1732 return false;
1733 if (!kvm->arch.use_cmma)
1734 return false;
1735 return true;
1736}
1737
1738static bool ibs_enabled(struct kvm_vcpu *vcpu) 1813static bool ibs_enabled(struct kvm_vcpu *vcpu)
1739{ 1814{
1740 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS; 1815 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
@@ -2340,6 +2415,7 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2340 case KVM_CAP_S390_CSS_SUPPORT: 2415 case KVM_CAP_S390_CSS_SUPPORT:
2341 if (!vcpu->kvm->arch.css_support) { 2416 if (!vcpu->kvm->arch.css_support) {
2342 vcpu->kvm->arch.css_support = 1; 2417 vcpu->kvm->arch.css_support = 1;
2418 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2343 trace_kvm_s390_enable_css(vcpu->kvm); 2419 trace_kvm_s390_enable_css(vcpu->kvm);
2344 } 2420 }
2345 r = 0; 2421 r = 0;
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index c5704786e473..c446aabf60d3 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -27,6 +27,13 @@ typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
27#define TDB_FORMAT1 1 27#define TDB_FORMAT1 1
28#define IS_ITDB_VALID(vcpu) ((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1)) 28#define IS_ITDB_VALID(vcpu) ((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1))
29 29
30extern debug_info_t *kvm_s390_dbf;
31#define KVM_EVENT(d_loglevel, d_string, d_args...)\
32do { \
33 debug_sprintf_event(kvm_s390_dbf, d_loglevel, d_string "\n", \
34 d_args); \
35} while (0)
36
30#define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\ 37#define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
31do { \ 38do { \
32 debug_sprintf_event(d_kvm->arch.dbf, d_loglevel, d_string "\n", \ 39 debug_sprintf_event(d_kvm->arch.dbf, d_loglevel, d_string "\n", \
@@ -65,6 +72,8 @@ static inline u32 kvm_s390_get_prefix(struct kvm_vcpu *vcpu)
65 72
66static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix) 73static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix)
67{ 74{
75 VCPU_EVENT(vcpu, 3, "set prefix of cpu %03u to 0x%x", vcpu->vcpu_id,
76 prefix);
68 vcpu->arch.sie_block->prefix = prefix >> GUEST_PREFIX_SHIFT; 77 vcpu->arch.sie_block->prefix = prefix >> GUEST_PREFIX_SHIFT;
69 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 78 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
70 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); 79 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
@@ -217,8 +226,6 @@ void exit_sie(struct kvm_vcpu *vcpu);
217void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu); 226void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu);
218int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu); 227int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu);
219void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu); 228void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
220/* is cmma enabled */
221bool kvm_s390_cmma_enabled(struct kvm *kvm);
222unsigned long kvm_s390_fac_list_mask_size(void); 229unsigned long kvm_s390_fac_list_mask_size(void);
223extern unsigned long kvm_s390_fac_list_mask[]; 230extern unsigned long kvm_s390_fac_list_mask[];
224 231
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index ad4242245771..4d21dc4d1a84 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -53,11 +53,14 @@ static int handle_set_clock(struct kvm_vcpu *vcpu)
53 kvm_s390_set_psw_cc(vcpu, 3); 53 kvm_s390_set_psw_cc(vcpu, 3);
54 return 0; 54 return 0;
55 } 55 }
56 VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", val);
56 val = (val - hostclk) & ~0x3fUL; 57 val = (val - hostclk) & ~0x3fUL;
57 58
58 mutex_lock(&vcpu->kvm->lock); 59 mutex_lock(&vcpu->kvm->lock);
60 preempt_disable();
59 kvm_for_each_vcpu(i, cpup, vcpu->kvm) 61 kvm_for_each_vcpu(i, cpup, vcpu->kvm)
60 cpup->arch.sie_block->epoch = val; 62 cpup->arch.sie_block->epoch = val;
63 preempt_enable();
61 mutex_unlock(&vcpu->kvm->lock); 64 mutex_unlock(&vcpu->kvm->lock);
62 65
63 kvm_s390_set_psw_cc(vcpu, 0); 66 kvm_s390_set_psw_cc(vcpu, 0);
@@ -98,8 +101,6 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu)
98 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 101 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
99 102
100 kvm_s390_set_prefix(vcpu, address); 103 kvm_s390_set_prefix(vcpu, address);
101
102 VCPU_EVENT(vcpu, 5, "setting prefix to %x", address);
103 trace_kvm_s390_handle_prefix(vcpu, 1, address); 104 trace_kvm_s390_handle_prefix(vcpu, 1, address);
104 return 0; 105 return 0;
105} 106}
@@ -129,7 +130,7 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
129 if (rc) 130 if (rc)
130 return kvm_s390_inject_prog_cond(vcpu, rc); 131 return kvm_s390_inject_prog_cond(vcpu, rc);
131 132
132 VCPU_EVENT(vcpu, 5, "storing prefix to %x", address); 133 VCPU_EVENT(vcpu, 3, "STPX: storing prefix 0x%x into 0x%llx", address, operand2);
133 trace_kvm_s390_handle_prefix(vcpu, 0, address); 134 trace_kvm_s390_handle_prefix(vcpu, 0, address);
134 return 0; 135 return 0;
135} 136}
@@ -155,7 +156,7 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
155 if (rc) 156 if (rc)
156 return kvm_s390_inject_prog_cond(vcpu, rc); 157 return kvm_s390_inject_prog_cond(vcpu, rc);
157 158
158 VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", ga); 159 VCPU_EVENT(vcpu, 3, "STAP: storing cpu address (%u) to 0x%llx", vcpu_id, ga);
159 trace_kvm_s390_handle_stap(vcpu, ga); 160 trace_kvm_s390_handle_stap(vcpu, ga);
160 return 0; 161 return 0;
161} 162}
@@ -167,6 +168,7 @@ static int __skey_check_enable(struct kvm_vcpu *vcpu)
167 return rc; 168 return rc;
168 169
169 rc = s390_enable_skey(); 170 rc = s390_enable_skey();
171 VCPU_EVENT(vcpu, 3, "%s", "enabling storage keys for guest");
170 trace_kvm_s390_skey_related_inst(vcpu); 172 trace_kvm_s390_skey_related_inst(vcpu);
171 vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE); 173 vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
172 return rc; 174 return rc;
@@ -370,7 +372,7 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
370 &fac, sizeof(fac)); 372 &fac, sizeof(fac));
371 if (rc) 373 if (rc)
372 return rc; 374 return rc;
373 VCPU_EVENT(vcpu, 5, "store facility list value %x", fac); 375 VCPU_EVENT(vcpu, 3, "STFL: store facility list 0x%x", fac);
374 trace_kvm_s390_handle_stfl(vcpu, fac); 376 trace_kvm_s390_handle_stfl(vcpu, fac);
375 return 0; 377 return 0;
376} 378}
@@ -468,7 +470,7 @@ static int handle_stidp(struct kvm_vcpu *vcpu)
468 if (rc) 470 if (rc)
469 return kvm_s390_inject_prog_cond(vcpu, rc); 471 return kvm_s390_inject_prog_cond(vcpu, rc);
470 472
471 VCPU_EVENT(vcpu, 5, "%s", "store cpu id"); 473 VCPU_EVENT(vcpu, 3, "STIDP: store cpu id 0x%llx", stidp_data);
472 return 0; 474 return 0;
473} 475}
474 476
@@ -521,7 +523,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
521 ar_t ar; 523 ar_t ar;
522 524
523 vcpu->stat.instruction_stsi++; 525 vcpu->stat.instruction_stsi++;
524 VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2); 526 VCPU_EVENT(vcpu, 3, "STSI: fc: %u sel1: %u sel2: %u", fc, sel1, sel2);
525 527
526 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) 528 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
527 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); 529 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -758,10 +760,10 @@ static int handle_essa(struct kvm_vcpu *vcpu)
758 struct gmap *gmap; 760 struct gmap *gmap;
759 int i; 761 int i;
760 762
761 VCPU_EVENT(vcpu, 5, "cmma release %d pages", entries); 763 VCPU_EVENT(vcpu, 4, "ESSA: release %d pages", entries);
762 gmap = vcpu->arch.gmap; 764 gmap = vcpu->arch.gmap;
763 vcpu->stat.instruction_essa++; 765 vcpu->stat.instruction_essa++;
764 if (!kvm_s390_cmma_enabled(vcpu->kvm)) 766 if (!vcpu->kvm->arch.use_cmma)
765 return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); 767 return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
766 768
767 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) 769 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
@@ -829,7 +831,7 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
829 if (ga & 3) 831 if (ga & 3)
830 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 832 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
831 833
832 VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga); 834 VCPU_EVENT(vcpu, 4, "LCTL: r1:%d, r3:%d, addr: 0x%llx", reg1, reg3, ga);
833 trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, ga); 835 trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, ga);
834 836
835 nr_regs = ((reg3 - reg1) & 0xf) + 1; 837 nr_regs = ((reg3 - reg1) & 0xf) + 1;
@@ -868,7 +870,7 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu)
868 if (ga & 3) 870 if (ga & 3)
869 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 871 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
870 872
871 VCPU_EVENT(vcpu, 5, "stctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga); 873 VCPU_EVENT(vcpu, 4, "STCTL r1:%d, r3:%d, addr: 0x%llx", reg1, reg3, ga);
872 trace_kvm_s390_handle_stctl(vcpu, 0, reg1, reg3, ga); 874 trace_kvm_s390_handle_stctl(vcpu, 0, reg1, reg3, ga);
873 875
874 reg = reg1; 876 reg = reg1;
@@ -902,7 +904,7 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
902 if (ga & 7) 904 if (ga & 7)
903 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 905 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
904 906
905 VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga); 907 VCPU_EVENT(vcpu, 4, "LCTLG: r1:%d, r3:%d, addr: 0x%llx", reg1, reg3, ga);
906 trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, ga); 908 trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, ga);
907 909
908 nr_regs = ((reg3 - reg1) & 0xf) + 1; 910 nr_regs = ((reg3 - reg1) & 0xf) + 1;
@@ -940,7 +942,7 @@ static int handle_stctg(struct kvm_vcpu *vcpu)
940 if (ga & 7) 942 if (ga & 7)
941 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 943 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
942 944
943 VCPU_EVENT(vcpu, 5, "stctg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga); 945 VCPU_EVENT(vcpu, 4, "STCTG r1:%d, r3:%d, addr: 0x%llx", reg1, reg3, ga);
944 trace_kvm_s390_handle_stctl(vcpu, 1, reg1, reg3, ga); 946 trace_kvm_s390_handle_stctl(vcpu, 1, reg1, reg3, ga);
945 947
946 reg = reg1; 948 reg = reg1;
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 72e58bd2bee7..da690b69f9fe 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -205,9 +205,6 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
205 *reg &= 0xffffffff00000000UL; 205 *reg &= 0xffffffff00000000UL;
206 *reg |= SIGP_STATUS_INCORRECT_STATE; 206 *reg |= SIGP_STATUS_INCORRECT_STATE;
207 return SIGP_CC_STATUS_STORED; 207 return SIGP_CC_STATUS_STORED;
208 } else if (rc == 0) {
209 VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x",
210 dst_vcpu->vcpu_id, irq.u.prefix.address);
211 } 208 }
212 209
213 return rc; 210 return rc;
@@ -371,7 +368,8 @@ static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code,
371 return rc; 368 return rc;
372} 369}
373 370
374static int handle_sigp_order_in_user_space(struct kvm_vcpu *vcpu, u8 order_code) 371static int handle_sigp_order_in_user_space(struct kvm_vcpu *vcpu, u8 order_code,
372 u16 cpu_addr)
375{ 373{
376 if (!vcpu->kvm->arch.user_sigp) 374 if (!vcpu->kvm->arch.user_sigp)
377 return 0; 375 return 0;
@@ -414,9 +412,8 @@ static int handle_sigp_order_in_user_space(struct kvm_vcpu *vcpu, u8 order_code)
414 default: 412 default:
415 vcpu->stat.instruction_sigp_unknown++; 413 vcpu->stat.instruction_sigp_unknown++;
416 } 414 }
417 415 VCPU_EVENT(vcpu, 3, "SIGP: order %u for CPU %d handled in userspace",
418 VCPU_EVENT(vcpu, 4, "sigp order %u: completely handled in user space", 416 order_code, cpu_addr);
419 order_code);
420 417
421 return 1; 418 return 1;
422} 419}
@@ -435,7 +432,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
435 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); 432 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
436 433
437 order_code = kvm_s390_get_base_disp_rs(vcpu, NULL); 434 order_code = kvm_s390_get_base_disp_rs(vcpu, NULL);
438 if (handle_sigp_order_in_user_space(vcpu, order_code)) 435 if (handle_sigp_order_in_user_space(vcpu, order_code, cpu_addr))
439 return -EOPNOTSUPP; 436 return -EOPNOTSUPP;
440 437
441 if (r1 % 2) 438 if (r1 % 2)
diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h
index 3208d33a48cb..cc1d6c68356f 100644
--- a/arch/s390/kvm/trace-s390.h
+++ b/arch/s390/kvm/trace-s390.h
@@ -105,11 +105,22 @@ TRACE_EVENT(kvm_s390_vcpu_start_stop,
105 {KVM_S390_PROGRAM_INT, "program interrupt"}, \ 105 {KVM_S390_PROGRAM_INT, "program interrupt"}, \
106 {KVM_S390_SIGP_SET_PREFIX, "sigp set prefix"}, \ 106 {KVM_S390_SIGP_SET_PREFIX, "sigp set prefix"}, \
107 {KVM_S390_RESTART, "sigp restart"}, \ 107 {KVM_S390_RESTART, "sigp restart"}, \
108 {KVM_S390_INT_PFAULT_INIT, "pfault init"}, \
109 {KVM_S390_INT_PFAULT_DONE, "pfault done"}, \
110 {KVM_S390_MCHK, "machine check"}, \
111 {KVM_S390_INT_CLOCK_COMP, "clock comparator"}, \
112 {KVM_S390_INT_CPU_TIMER, "cpu timer"}, \
108 {KVM_S390_INT_VIRTIO, "virtio interrupt"}, \ 113 {KVM_S390_INT_VIRTIO, "virtio interrupt"}, \
109 {KVM_S390_INT_SERVICE, "sclp interrupt"}, \ 114 {KVM_S390_INT_SERVICE, "sclp interrupt"}, \
110 {KVM_S390_INT_EMERGENCY, "sigp emergency"}, \ 115 {KVM_S390_INT_EMERGENCY, "sigp emergency"}, \
111 {KVM_S390_INT_EXTERNAL_CALL, "sigp ext call"} 116 {KVM_S390_INT_EXTERNAL_CALL, "sigp ext call"}
112 117
118#define get_irq_name(__type) \
119 (__type > KVM_S390_INT_IO_MAX ? \
120 __print_symbolic(__type, kvm_s390_int_type) : \
121 (__type & KVM_S390_INT_IO_AI_MASK ? \
122 "adapter I/O interrupt" : "subchannel I/O interrupt"))
123
113TRACE_EVENT(kvm_s390_inject_vm, 124TRACE_EVENT(kvm_s390_inject_vm,
114 TP_PROTO(__u64 type, __u32 parm, __u64 parm64, int who), 125 TP_PROTO(__u64 type, __u32 parm, __u64 parm64, int who),
115 TP_ARGS(type, parm, parm64, who), 126 TP_ARGS(type, parm, parm64, who),
@@ -131,22 +142,19 @@ TRACE_EVENT(kvm_s390_inject_vm,
131 TP_printk("inject%s: type:%x (%s) parm:%x parm64:%llx", 142 TP_printk("inject%s: type:%x (%s) parm:%x parm64:%llx",
132 (__entry->who == 1) ? " (from kernel)" : 143 (__entry->who == 1) ? " (from kernel)" :
133 (__entry->who == 2) ? " (from user)" : "", 144 (__entry->who == 2) ? " (from user)" : "",
134 __entry->inttype, 145 __entry->inttype, get_irq_name(__entry->inttype),
135 __print_symbolic(__entry->inttype, kvm_s390_int_type),
136 __entry->parm, __entry->parm64) 146 __entry->parm, __entry->parm64)
137 ); 147 );
138 148
139TRACE_EVENT(kvm_s390_inject_vcpu, 149TRACE_EVENT(kvm_s390_inject_vcpu,
140 TP_PROTO(unsigned int id, __u64 type, __u32 parm, __u64 parm64, \ 150 TP_PROTO(unsigned int id, __u64 type, __u32 parm, __u64 parm64),
141 int who), 151 TP_ARGS(id, type, parm, parm64),
142 TP_ARGS(id, type, parm, parm64, who),
143 152
144 TP_STRUCT__entry( 153 TP_STRUCT__entry(
145 __field(int, id) 154 __field(int, id)
146 __field(__u32, inttype) 155 __field(__u32, inttype)
147 __field(__u32, parm) 156 __field(__u32, parm)
148 __field(__u64, parm64) 157 __field(__u64, parm64)
149 __field(int, who)
150 ), 158 ),
151 159
152 TP_fast_assign( 160 TP_fast_assign(
@@ -154,15 +162,12 @@ TRACE_EVENT(kvm_s390_inject_vcpu,
154 __entry->inttype = type & 0x00000000ffffffff; 162 __entry->inttype = type & 0x00000000ffffffff;
155 __entry->parm = parm; 163 __entry->parm = parm;
156 __entry->parm64 = parm64; 164 __entry->parm64 = parm64;
157 __entry->who = who;
158 ), 165 ),
159 166
160 TP_printk("inject%s (vcpu %d): type:%x (%s) parm:%x parm64:%llx", 167 TP_printk("inject (vcpu %d): type:%x (%s) parm:%x parm64:%llx",
161 (__entry->who == 1) ? " (from kernel)" :
162 (__entry->who == 2) ? " (from user)" : "",
163 __entry->id, __entry->inttype, 168 __entry->id, __entry->inttype,
164 __print_symbolic(__entry->inttype, kvm_s390_int_type), 169 get_irq_name(__entry->inttype), __entry->parm,
165 __entry->parm, __entry->parm64) 170 __entry->parm64)
166 ); 171 );
167 172
168/* 173/*
@@ -189,8 +194,8 @@ TRACE_EVENT(kvm_s390_deliver_interrupt,
189 TP_printk("deliver interrupt (vcpu %d): type:%x (%s) " \ 194 TP_printk("deliver interrupt (vcpu %d): type:%x (%s) " \
190 "data:%08llx %016llx", 195 "data:%08llx %016llx",
191 __entry->id, __entry->inttype, 196 __entry->id, __entry->inttype,
192 __print_symbolic(__entry->inttype, kvm_s390_int_type), 197 get_irq_name(__entry->inttype), __entry->data0,
193 __entry->data0, __entry->data1) 198 __entry->data1)
194 ); 199 );
195 200
196/* 201/*
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 49ec9038ec14..c12e845f59e6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -252,6 +252,11 @@ struct kvm_pio_request {
252 int size; 252 int size;
253}; 253};
254 254
255struct rsvd_bits_validate {
256 u64 rsvd_bits_mask[2][4];
257 u64 bad_mt_xwr;
258};
259
255/* 260/*
256 * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level 261 * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
257 * 32-bit). The kvm_mmu structure abstracts the details of the current mmu 262 * 32-bit). The kvm_mmu structure abstracts the details of the current mmu
@@ -289,8 +294,15 @@ struct kvm_mmu {
289 294
290 u64 *pae_root; 295 u64 *pae_root;
291 u64 *lm_root; 296 u64 *lm_root;
292 u64 rsvd_bits_mask[2][4]; 297
293 u64 bad_mt_xwr; 298 /*
299 * check zero bits on shadow page table entries, these
300 * bits include not only hardware reserved bits but also
301 * the bits spte never used.
302 */
303 struct rsvd_bits_validate shadow_zero_check;
304
305 struct rsvd_bits_validate guest_rsvd_check;
294 306
295 /* 307 /*
296 * Bitmap: bit set = last pte in walk 308 * Bitmap: bit set = last pte in walk
@@ -358,6 +370,11 @@ struct kvm_mtrr {
358 struct list_head head; 370 struct list_head head;
359}; 371};
360 372
373/* Hyper-V per vcpu emulation context */
374struct kvm_vcpu_hv {
375 u64 hv_vapic;
376};
377
361struct kvm_vcpu_arch { 378struct kvm_vcpu_arch {
362 /* 379 /*
363 * rip and regs accesses must go through 380 * rip and regs accesses must go through
@@ -514,8 +531,7 @@ struct kvm_vcpu_arch {
514 /* used for guest single stepping over the given code position */ 531 /* used for guest single stepping over the given code position */
515 unsigned long singlestep_rip; 532 unsigned long singlestep_rip;
516 533
517 /* fields used by HYPER-V emulation */ 534 struct kvm_vcpu_hv hyperv;
518 u64 hv_vapic;
519 535
520 cpumask_var_t wbinvd_dirty_mask; 536 cpumask_var_t wbinvd_dirty_mask;
521 537
@@ -586,6 +602,17 @@ struct kvm_apic_map {
586 struct kvm_lapic *logical_map[16][16]; 602 struct kvm_lapic *logical_map[16][16];
587}; 603};
588 604
605/* Hyper-V emulation context */
606struct kvm_hv {
607 u64 hv_guest_os_id;
608 u64 hv_hypercall;
609 u64 hv_tsc_page;
610
611 /* Hyper-v based guest crash (NT kernel bugcheck) parameters */
612 u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS];
613 u64 hv_crash_ctl;
614};
615
589struct kvm_arch { 616struct kvm_arch {
590 unsigned int n_used_mmu_pages; 617 unsigned int n_used_mmu_pages;
591 unsigned int n_requested_mmu_pages; 618 unsigned int n_requested_mmu_pages;
@@ -645,16 +672,14 @@ struct kvm_arch {
645 /* reads protected by irq_srcu, writes by irq_lock */ 672 /* reads protected by irq_srcu, writes by irq_lock */
646 struct hlist_head mask_notifier_list; 673 struct hlist_head mask_notifier_list;
647 674
648 /* fields used by HYPER-V emulation */ 675 struct kvm_hv hyperv;
649 u64 hv_guest_os_id;
650 u64 hv_hypercall;
651 u64 hv_tsc_page;
652 676
653 #ifdef CONFIG_KVM_MMU_AUDIT 677 #ifdef CONFIG_KVM_MMU_AUDIT
654 int audit_point; 678 int audit_point;
655 #endif 679 #endif
656 680
657 bool boot_vcpu_runs_old_kvmclock; 681 bool boot_vcpu_runs_old_kvmclock;
682 u32 bsp_vcpu_id;
658 683
659 u64 disabled_quirks; 684 u64 disabled_quirks;
660}; 685};
@@ -1203,5 +1228,7 @@ int __x86_set_memory_region(struct kvm *kvm,
1203 const struct kvm_userspace_memory_region *mem); 1228 const struct kvm_userspace_memory_region *mem);
1204int x86_set_memory_region(struct kvm *kvm, 1229int x86_set_memory_region(struct kvm *kvm,
1205 const struct kvm_userspace_memory_region *mem); 1230 const struct kvm_userspace_memory_region *mem);
1231bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu);
1232bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
1206 1233
1207#endif /* _ASM_X86_KVM_HOST_H */ 1234#endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index da772edd19ab..448b7ca61aee 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -47,6 +47,7 @@
47#define CPU_BASED_MOV_DR_EXITING 0x00800000 47#define CPU_BASED_MOV_DR_EXITING 0x00800000
48#define CPU_BASED_UNCOND_IO_EXITING 0x01000000 48#define CPU_BASED_UNCOND_IO_EXITING 0x01000000
49#define CPU_BASED_USE_IO_BITMAPS 0x02000000 49#define CPU_BASED_USE_IO_BITMAPS 0x02000000
50#define CPU_BASED_MONITOR_TRAP_FLAG 0x08000000
50#define CPU_BASED_USE_MSR_BITMAPS 0x10000000 51#define CPU_BASED_USE_MSR_BITMAPS 0x10000000
51#define CPU_BASED_MONITOR_EXITING 0x20000000 52#define CPU_BASED_MONITOR_EXITING 0x20000000
52#define CPU_BASED_PAUSE_EXITING 0x40000000 53#define CPU_BASED_PAUSE_EXITING 0x40000000
@@ -367,29 +368,29 @@ enum vmcs_field {
367#define TYPE_PHYSICAL_APIC_EVENT (10 << 12) 368#define TYPE_PHYSICAL_APIC_EVENT (10 << 12)
368#define TYPE_PHYSICAL_APIC_INST (15 << 12) 369#define TYPE_PHYSICAL_APIC_INST (15 << 12)
369 370
370/* segment AR */ 371/* segment AR in VMCS -- these are different from what LAR reports */
371#define SEGMENT_AR_L_MASK (1 << 13) 372#define VMX_SEGMENT_AR_L_MASK (1 << 13)
372 373
373#define AR_TYPE_ACCESSES_MASK 1 374#define VMX_AR_TYPE_ACCESSES_MASK 1
374#define AR_TYPE_READABLE_MASK (1 << 1) 375#define VMX_AR_TYPE_READABLE_MASK (1 << 1)
375#define AR_TYPE_WRITEABLE_MASK (1 << 2) 376#define VMX_AR_TYPE_WRITEABLE_MASK (1 << 2)
376#define AR_TYPE_CODE_MASK (1 << 3) 377#define VMX_AR_TYPE_CODE_MASK (1 << 3)
377#define AR_TYPE_MASK 0x0f 378#define VMX_AR_TYPE_MASK 0x0f
378#define AR_TYPE_BUSY_64_TSS 11 379#define VMX_AR_TYPE_BUSY_64_TSS 11
379#define AR_TYPE_BUSY_32_TSS 11 380#define VMX_AR_TYPE_BUSY_32_TSS 11
380#define AR_TYPE_BUSY_16_TSS 3 381#define VMX_AR_TYPE_BUSY_16_TSS 3
381#define AR_TYPE_LDT 2 382#define VMX_AR_TYPE_LDT 2
382 383
383#define AR_UNUSABLE_MASK (1 << 16) 384#define VMX_AR_UNUSABLE_MASK (1 << 16)
384#define AR_S_MASK (1 << 4) 385#define VMX_AR_S_MASK (1 << 4)
385#define AR_P_MASK (1 << 7) 386#define VMX_AR_P_MASK (1 << 7)
386#define AR_L_MASK (1 << 13) 387#define VMX_AR_L_MASK (1 << 13)
387#define AR_DB_MASK (1 << 14) 388#define VMX_AR_DB_MASK (1 << 14)
388#define AR_G_MASK (1 << 15) 389#define VMX_AR_G_MASK (1 << 15)
389#define AR_DPL_SHIFT 5 390#define VMX_AR_DPL_SHIFT 5
390#define AR_DPL(ar) (((ar) >> AR_DPL_SHIFT) & 3) 391#define VMX_AR_DPL(ar) (((ar) >> VMX_AR_DPL_SHIFT) & 3)
391 392
392#define AR_RESERVD_MASK 0xfffe0f00 393#define VMX_AR_RESERVD_MASK 0xfffe0f00
393 394
394#define TSS_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 0) 395#define TSS_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 0)
395#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 1) 396#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 1)
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 1fe92181ee9e..37fee272618f 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -58,6 +58,7 @@
58#define EXIT_REASON_INVALID_STATE 33 58#define EXIT_REASON_INVALID_STATE 33
59#define EXIT_REASON_MSR_LOAD_FAIL 34 59#define EXIT_REASON_MSR_LOAD_FAIL 34
60#define EXIT_REASON_MWAIT_INSTRUCTION 36 60#define EXIT_REASON_MWAIT_INSTRUCTION 36
61#define EXIT_REASON_MONITOR_TRAP_FLAG 37
61#define EXIT_REASON_MONITOR_INSTRUCTION 39 62#define EXIT_REASON_MONITOR_INSTRUCTION 39
62#define EXIT_REASON_PAUSE_INSTRUCTION 40 63#define EXIT_REASON_PAUSE_INSTRUCTION 40
63#define EXIT_REASON_MCE_DURING_VMENTRY 41 64#define EXIT_REASON_MCE_DURING_VMENTRY 41
@@ -106,6 +107,7 @@
106 { EXIT_REASON_MSR_READ, "MSR_READ" }, \ 107 { EXIT_REASON_MSR_READ, "MSR_READ" }, \
107 { EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \ 108 { EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \
108 { EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \ 109 { EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \
110 { EXIT_REASON_MONITOR_TRAP_FLAG, "MONITOR_TRAP_FLAG" }, \
109 { EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \ 111 { EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \
110 { EXIT_REASON_PAUSE_INSTRUCTION, "PAUSE_INSTRUCTION" }, \ 112 { EXIT_REASON_PAUSE_INSTRUCTION, "PAUSE_INSTRUCTION" }, \
111 { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \ 113 { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 67d215cb8953..a1ff508bb423 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -12,7 +12,9 @@ kvm-y += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
12kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o 12kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o
13 13
14kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ 14kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
15 i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o 15 i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
16 hyperv.o
17
16kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += assigned-dev.o iommu.o 18kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += assigned-dev.o iommu.o
17kvm-intel-y += vmx.o pmu_intel.o 19kvm-intel-y += vmx.o pmu_intel.o
18kvm-amd-y += svm.o pmu_amd.o 20kvm-amd-y += svm.o pmu_amd.o
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
new file mode 100644
index 000000000000..a8160d2ae362
--- /dev/null
+++ b/arch/x86/kvm/hyperv.c
@@ -0,0 +1,377 @@
1/*
2 * KVM Microsoft Hyper-V emulation
3 *
4 * derived from arch/x86/kvm/x86.c
5 *
6 * Copyright (C) 2006 Qumranet, Inc.
7 * Copyright (C) 2008 Qumranet, Inc.
8 * Copyright IBM Corporation, 2008
9 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
10 * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com>
11 *
12 * Authors:
13 * Avi Kivity <avi@qumranet.com>
14 * Yaniv Kamay <yaniv@qumranet.com>
15 * Amit Shah <amit.shah@qumranet.com>
16 * Ben-Ami Yassour <benami@il.ibm.com>
17 * Andrey Smetanin <asmetanin@virtuozzo.com>
18 *
19 * This work is licensed under the terms of the GNU GPL, version 2. See
20 * the COPYING file in the top-level directory.
21 *
22 */
23
24#include "x86.h"
25#include "lapic.h"
26#include "hyperv.h"
27
28#include <linux/kvm_host.h>
29#include <trace/events/kvm.h>
30
31#include "trace.h"
32
33static bool kvm_hv_msr_partition_wide(u32 msr)
34{
35 bool r = false;
36
37 switch (msr) {
38 case HV_X64_MSR_GUEST_OS_ID:
39 case HV_X64_MSR_HYPERCALL:
40 case HV_X64_MSR_REFERENCE_TSC:
41 case HV_X64_MSR_TIME_REF_COUNT:
42 case HV_X64_MSR_CRASH_CTL:
43 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
44 r = true;
45 break;
46 }
47
48 return r;
49}
50
51static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu,
52 u32 index, u64 *pdata)
53{
54 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
55
56 if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param)))
57 return -EINVAL;
58
59 *pdata = hv->hv_crash_param[index];
60 return 0;
61}
62
63static int kvm_hv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u64 *pdata)
64{
65 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
66
67 *pdata = hv->hv_crash_ctl;
68 return 0;
69}
70
71static int kvm_hv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u64 data, bool host)
72{
73 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
74
75 if (host)
76 hv->hv_crash_ctl = data & HV_X64_MSR_CRASH_CTL_NOTIFY;
77
78 if (!host && (data & HV_X64_MSR_CRASH_CTL_NOTIFY)) {
79
80 vcpu_debug(vcpu, "hv crash (0x%llx 0x%llx 0x%llx 0x%llx 0x%llx)\n",
81 hv->hv_crash_param[0],
82 hv->hv_crash_param[1],
83 hv->hv_crash_param[2],
84 hv->hv_crash_param[3],
85 hv->hv_crash_param[4]);
86
87 /* Send notification about crash to user space */
88 kvm_make_request(KVM_REQ_HV_CRASH, vcpu);
89 }
90
91 return 0;
92}
93
94static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu,
95 u32 index, u64 data)
96{
97 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
98
99 if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param)))
100 return -EINVAL;
101
102 hv->hv_crash_param[index] = data;
103 return 0;
104}
105
106static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
107 bool host)
108{
109 struct kvm *kvm = vcpu->kvm;
110 struct kvm_hv *hv = &kvm->arch.hyperv;
111
112 switch (msr) {
113 case HV_X64_MSR_GUEST_OS_ID:
114 hv->hv_guest_os_id = data;
115 /* setting guest os id to zero disables hypercall page */
116 if (!hv->hv_guest_os_id)
117 hv->hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
118 break;
119 case HV_X64_MSR_HYPERCALL: {
120 u64 gfn;
121 unsigned long addr;
122 u8 instructions[4];
123
124 /* if guest os id is not set hypercall should remain disabled */
125 if (!hv->hv_guest_os_id)
126 break;
127 if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
128 hv->hv_hypercall = data;
129 break;
130 }
131 gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
132 addr = gfn_to_hva(kvm, gfn);
133 if (kvm_is_error_hva(addr))
134 return 1;
135 kvm_x86_ops->patch_hypercall(vcpu, instructions);
136 ((unsigned char *)instructions)[3] = 0xc3; /* ret */
137 if (__copy_to_user((void __user *)addr, instructions, 4))
138 return 1;
139 hv->hv_hypercall = data;
140 mark_page_dirty(kvm, gfn);
141 break;
142 }
143 case HV_X64_MSR_REFERENCE_TSC: {
144 u64 gfn;
145 HV_REFERENCE_TSC_PAGE tsc_ref;
146
147 memset(&tsc_ref, 0, sizeof(tsc_ref));
148 hv->hv_tsc_page = data;
149 if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
150 break;
151 gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
152 if (kvm_write_guest(
153 kvm,
154 gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT,
155 &tsc_ref, sizeof(tsc_ref)))
156 return 1;
157 mark_page_dirty(kvm, gfn);
158 break;
159 }
160 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
161 return kvm_hv_msr_set_crash_data(vcpu,
162 msr - HV_X64_MSR_CRASH_P0,
163 data);
164 case HV_X64_MSR_CRASH_CTL:
165 return kvm_hv_msr_set_crash_ctl(vcpu, data, host);
166 default:
167 vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
168 msr, data);
169 return 1;
170 }
171 return 0;
172}
173
174static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
175{
176 struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
177
178 switch (msr) {
179 case HV_X64_MSR_APIC_ASSIST_PAGE: {
180 u64 gfn;
181 unsigned long addr;
182
183 if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
184 hv->hv_vapic = data;
185 if (kvm_lapic_enable_pv_eoi(vcpu, 0))
186 return 1;
187 break;
188 }
189 gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT;
190 addr = kvm_vcpu_gfn_to_hva(vcpu, gfn);
191 if (kvm_is_error_hva(addr))
192 return 1;
193 if (__clear_user((void __user *)addr, PAGE_SIZE))
194 return 1;
195 hv->hv_vapic = data;
196 kvm_vcpu_mark_page_dirty(vcpu, gfn);
197 if (kvm_lapic_enable_pv_eoi(vcpu,
198 gfn_to_gpa(gfn) | KVM_MSR_ENABLED))
199 return 1;
200 break;
201 }
202 case HV_X64_MSR_EOI:
203 return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
204 case HV_X64_MSR_ICR:
205 return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
206 case HV_X64_MSR_TPR:
207 return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
208 default:
209 vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
210 msr, data);
211 return 1;
212 }
213
214 return 0;
215}
216
217static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
218{
219 u64 data = 0;
220 struct kvm *kvm = vcpu->kvm;
221 struct kvm_hv *hv = &kvm->arch.hyperv;
222
223 switch (msr) {
224 case HV_X64_MSR_GUEST_OS_ID:
225 data = hv->hv_guest_os_id;
226 break;
227 case HV_X64_MSR_HYPERCALL:
228 data = hv->hv_hypercall;
229 break;
230 case HV_X64_MSR_TIME_REF_COUNT: {
231 data =
232 div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
233 break;
234 }
235 case HV_X64_MSR_REFERENCE_TSC:
236 data = hv->hv_tsc_page;
237 break;
238 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
239 return kvm_hv_msr_get_crash_data(vcpu,
240 msr - HV_X64_MSR_CRASH_P0,
241 pdata);
242 case HV_X64_MSR_CRASH_CTL:
243 return kvm_hv_msr_get_crash_ctl(vcpu, pdata);
244 default:
245 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
246 return 1;
247 }
248
249 *pdata = data;
250 return 0;
251}
252
253static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
254{
255 u64 data = 0;
256 struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
257
258 switch (msr) {
259 case HV_X64_MSR_VP_INDEX: {
260 int r;
261 struct kvm_vcpu *v;
262
263 kvm_for_each_vcpu(r, v, vcpu->kvm) {
264 if (v == vcpu) {
265 data = r;
266 break;
267 }
268 }
269 break;
270 }
271 case HV_X64_MSR_EOI:
272 return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
273 case HV_X64_MSR_ICR:
274 return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
275 case HV_X64_MSR_TPR:
276 return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
277 case HV_X64_MSR_APIC_ASSIST_PAGE:
278 data = hv->hv_vapic;
279 break;
280 default:
281 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
282 return 1;
283 }
284 *pdata = data;
285 return 0;
286}
287
288int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
289{
290 if (kvm_hv_msr_partition_wide(msr)) {
291 int r;
292
293 mutex_lock(&vcpu->kvm->lock);
294 r = kvm_hv_set_msr_pw(vcpu, msr, data, host);
295 mutex_unlock(&vcpu->kvm->lock);
296 return r;
297 } else
298 return kvm_hv_set_msr(vcpu, msr, data);
299}
300
301int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
302{
303 if (kvm_hv_msr_partition_wide(msr)) {
304 int r;
305
306 mutex_lock(&vcpu->kvm->lock);
307 r = kvm_hv_get_msr_pw(vcpu, msr, pdata);
308 mutex_unlock(&vcpu->kvm->lock);
309 return r;
310 } else
311 return kvm_hv_get_msr(vcpu, msr, pdata);
312}
313
314bool kvm_hv_hypercall_enabled(struct kvm *kvm)
315{
316 return kvm->arch.hyperv.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
317}
318
319int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
320{
321 u64 param, ingpa, outgpa, ret;
322 uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
323 bool fast, longmode;
324
325 /*
326 * hypercall generates UD from non zero cpl and real mode
327 * per HYPER-V spec
328 */
329 if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
330 kvm_queue_exception(vcpu, UD_VECTOR);
331 return 0;
332 }
333
334 longmode = is_64_bit_mode(vcpu);
335
336 if (!longmode) {
337 param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
338 (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
339 ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
340 (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
341 outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
342 (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
343 }
344#ifdef CONFIG_X86_64
345 else {
346 param = kvm_register_read(vcpu, VCPU_REGS_RCX);
347 ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
348 outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
349 }
350#endif
351
352 code = param & 0xffff;
353 fast = (param >> 16) & 0x1;
354 rep_cnt = (param >> 32) & 0xfff;
355 rep_idx = (param >> 48) & 0xfff;
356
357 trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
358
359 switch (code) {
360 case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT:
361 kvm_vcpu_on_spin(vcpu);
362 break;
363 default:
364 res = HV_STATUS_INVALID_HYPERCALL_CODE;
365 break;
366 }
367
368 ret = res | (((u64)rep_done & 0xfff) << 32);
369 if (longmode) {
370 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
371 } else {
372 kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32);
373 kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
374 }
375
376 return 1;
377}
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
new file mode 100644
index 000000000000..c7bce559f67b
--- /dev/null
+++ b/arch/x86/kvm/hyperv.h
@@ -0,0 +1,32 @@
1/*
2 * KVM Microsoft Hyper-V emulation
3 *
4 * derived from arch/x86/kvm/x86.c
5 *
6 * Copyright (C) 2006 Qumranet, Inc.
7 * Copyright (C) 2008 Qumranet, Inc.
8 * Copyright IBM Corporation, 2008
9 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
10 * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com>
11 *
12 * Authors:
13 * Avi Kivity <avi@qumranet.com>
14 * Yaniv Kamay <yaniv@qumranet.com>
15 * Amit Shah <amit.shah@qumranet.com>
16 * Ben-Ami Yassour <benami@il.ibm.com>
17 * Andrey Smetanin <asmetanin@virtuozzo.com>
18 *
19 * This work is licensed under the terms of the GNU GPL, version 2. See
20 * the COPYING file in the top-level directory.
21 *
22 */
23
24#ifndef __ARCH_X86_KVM_HYPERV_H__
25#define __ARCH_X86_KVM_HYPERV_H__
26
27int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host);
28int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
29bool kvm_hv_hypercall_enabled(struct kvm *kvm);
30int kvm_hv_hypercall(struct kvm_vcpu *vcpu);
31
32#endif
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index fef922ff2635..7cc2360f1848 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -651,15 +651,10 @@ fail_unlock:
651 return NULL; 651 return NULL;
652} 652}
653 653
654void kvm_destroy_pic(struct kvm *kvm) 654void kvm_destroy_pic(struct kvm_pic *vpic)
655{ 655{
656 struct kvm_pic *vpic = kvm->arch.vpic; 656 kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_master);
657 657 kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_slave);
658 if (vpic) { 658 kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_eclr);
659 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_master); 659 kfree(vpic);
660 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_slave);
661 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_eclr);
662 kvm->arch.vpic = NULL;
663 kfree(vpic);
664 }
665} 660}
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index ad68c73008c5..3d782a2c336a 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -74,7 +74,7 @@ struct kvm_pic {
74}; 74};
75 75
76struct kvm_pic *kvm_create_pic(struct kvm *kvm); 76struct kvm_pic *kvm_create_pic(struct kvm *kvm);
77void kvm_destroy_pic(struct kvm *kvm); 77void kvm_destroy_pic(struct kvm_pic *vpic);
78int kvm_pic_read_irq(struct kvm *kvm); 78int kvm_pic_read_irq(struct kvm *kvm);
79void kvm_pic_update_irq(struct kvm_pic *s); 79void kvm_pic_update_irq(struct kvm_pic *s);
80 80
@@ -85,11 +85,11 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
85 85
86static inline int irqchip_in_kernel(struct kvm *kvm) 86static inline int irqchip_in_kernel(struct kvm *kvm)
87{ 87{
88 int ret; 88 struct kvm_pic *vpic = pic_irqchip(kvm);
89 89
90 ret = (pic_irqchip(kvm) != NULL); 90 /* Read vpic before kvm->irq_routing. */
91 smp_rmb(); 91 smp_rmb();
92 return ret; 92 return vpic != NULL;
93} 93}
94 94
95void kvm_pic_reset(struct kvm_kpic_state *s); 95void kvm_pic_reset(struct kvm_kpic_state *s);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 2a5ca97c263b..9a3e342e3cda 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1900,8 +1900,9 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
1900 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) 1900 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
1901 return; 1901 return;
1902 1902
1903 kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, 1903 if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
1904 sizeof(u32)); 1904 sizeof(u32)))
1905 return;
1905 1906
1906 apic_set_tpr(vcpu->arch.apic, data & 0xff); 1907 apic_set_tpr(vcpu->arch.apic, data & 0xff);
1907} 1908}
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 71952748222a..764037991d26 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -91,7 +91,7 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
91 91
92static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu) 92static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
93{ 93{
94 return vcpu->arch.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE; 94 return vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE;
95} 95}
96 96
97int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data); 97int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 44171462bd2a..fb16a8ea3dee 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -357,12 +357,6 @@ static u64 __get_spte_lockless(u64 *sptep)
357{ 357{
358 return ACCESS_ONCE(*sptep); 358 return ACCESS_ONCE(*sptep);
359} 359}
360
361static bool __check_direct_spte_mmio_pf(u64 spte)
362{
363 /* It is valid if the spte is zapped. */
364 return spte == 0ull;
365}
366#else 360#else
367union split_spte { 361union split_spte {
368 struct { 362 struct {
@@ -478,23 +472,6 @@ retry:
478 472
479 return spte.spte; 473 return spte.spte;
480} 474}
481
482static bool __check_direct_spte_mmio_pf(u64 spte)
483{
484 union split_spte sspte = (union split_spte)spte;
485 u32 high_mmio_mask = shadow_mmio_mask >> 32;
486
487 /* It is valid if the spte is zapped. */
488 if (spte == 0ull)
489 return true;
490
491 /* It is valid if the spte is being zapped. */
492 if (sspte.spte_low == 0ull &&
493 (sspte.spte_high & high_mmio_mask) == high_mmio_mask)
494 return true;
495
496 return false;
497}
498#endif 475#endif
499 476
500static bool spte_is_locklessly_modifiable(u64 spte) 477static bool spte_is_locklessly_modifiable(u64 spte)
@@ -3291,54 +3268,89 @@ static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr,
3291 return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access, exception); 3268 return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access, exception);
3292} 3269}
3293 3270
3294static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct) 3271static bool
3272__is_rsvd_bits_set(struct rsvd_bits_validate *rsvd_check, u64 pte, int level)
3295{ 3273{
3296 if (direct) 3274 int bit7 = (pte >> 7) & 1, low6 = pte & 0x3f;
3297 return vcpu_match_mmio_gpa(vcpu, addr);
3298 3275
3299 return vcpu_match_mmio_gva(vcpu, addr); 3276 return (pte & rsvd_check->rsvd_bits_mask[bit7][level-1]) |
3277 ((rsvd_check->bad_mt_xwr & (1ull << low6)) != 0);
3300} 3278}
3301 3279
3280static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level)
3281{
3282 return __is_rsvd_bits_set(&mmu->guest_rsvd_check, gpte, level);
3283}
3302 3284
3303/* 3285static bool is_shadow_zero_bits_set(struct kvm_mmu *mmu, u64 spte, int level)
3304 * On direct hosts, the last spte is only allows two states
3305 * for mmio page fault:
3306 * - It is the mmio spte
3307 * - It is zapped or it is being zapped.
3308 *
3309 * This function completely checks the spte when the last spte
3310 * is not the mmio spte.
3311 */
3312static bool check_direct_spte_mmio_pf(u64 spte)
3313{ 3286{
3314 return __check_direct_spte_mmio_pf(spte); 3287 return __is_rsvd_bits_set(&mmu->shadow_zero_check, spte, level);
3315} 3288}
3316 3289
3317static u64 walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr) 3290static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct)
3291{
3292 if (direct)
3293 return vcpu_match_mmio_gpa(vcpu, addr);
3294
3295 return vcpu_match_mmio_gva(vcpu, addr);
3296}
3297
3298/* return true if reserved bit is detected on spte. */
3299static bool
3300walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
3318{ 3301{
3319 struct kvm_shadow_walk_iterator iterator; 3302 struct kvm_shadow_walk_iterator iterator;
3320 u64 spte = 0ull; 3303 u64 sptes[PT64_ROOT_LEVEL], spte = 0ull;
3304 int root, leaf;
3305 bool reserved = false;
3321 3306
3322 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) 3307 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
3323 return spte; 3308 goto exit;
3324 3309
3325 walk_shadow_page_lockless_begin(vcpu); 3310 walk_shadow_page_lockless_begin(vcpu);
3326 for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) 3311
3312 for (shadow_walk_init(&iterator, vcpu, addr), root = iterator.level;
3313 shadow_walk_okay(&iterator);
3314 __shadow_walk_next(&iterator, spte)) {
3315 leaf = iterator.level;
3316 spte = mmu_spte_get_lockless(iterator.sptep);
3317
3318 sptes[leaf - 1] = spte;
3319
3327 if (!is_shadow_present_pte(spte)) 3320 if (!is_shadow_present_pte(spte))
3328 break; 3321 break;
3322
3323 reserved |= is_shadow_zero_bits_set(&vcpu->arch.mmu, spte,
3324 leaf);
3325 }
3326
3329 walk_shadow_page_lockless_end(vcpu); 3327 walk_shadow_page_lockless_end(vcpu);
3330 3328
3331 return spte; 3329 if (reserved) {
3330 pr_err("%s: detect reserved bits on spte, addr 0x%llx, dump hierarchy:\n",
3331 __func__, addr);
3332 while (root >= leaf) {
3333 pr_err("------ spte 0x%llx level %d.\n",
3334 sptes[root - 1], root);
3335 root--;
3336 }
3337 }
3338exit:
3339 *sptep = spte;
3340 return reserved;
3332} 3341}
3333 3342
3334int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct) 3343int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct)
3335{ 3344{
3336 u64 spte; 3345 u64 spte;
3346 bool reserved;
3337 3347
3338 if (quickly_check_mmio_pf(vcpu, addr, direct)) 3348 if (quickly_check_mmio_pf(vcpu, addr, direct))
3339 return RET_MMIO_PF_EMULATE; 3349 return RET_MMIO_PF_EMULATE;
3340 3350
3341 spte = walk_shadow_page_get_mmio_spte(vcpu, addr); 3351 reserved = walk_shadow_page_get_mmio_spte(vcpu, addr, &spte);
3352 if (unlikely(reserved))
3353 return RET_MMIO_PF_BUG;
3342 3354
3343 if (is_mmio_spte(spte)) { 3355 if (is_mmio_spte(spte)) {
3344 gfn_t gfn = get_mmio_spte_gfn(spte); 3356 gfn_t gfn = get_mmio_spte_gfn(spte);
@@ -3356,13 +3368,6 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct)
3356 } 3368 }
3357 3369
3358 /* 3370 /*
3359 * It's ok if the gva is remapped by other cpus on shadow guest,
3360 * it's a BUG if the gfn is not a mmio page.
3361 */
3362 if (direct && !check_direct_spte_mmio_pf(spte))
3363 return RET_MMIO_PF_BUG;
3364
3365 /*
3366 * If the page table is zapped by other cpus, let CPU fault again on 3371 * If the page table is zapped by other cpus, let CPU fault again on
3367 * the address. 3372 * the address.
3368 */ 3373 */
@@ -3604,19 +3609,21 @@ static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gp
3604#include "paging_tmpl.h" 3609#include "paging_tmpl.h"
3605#undef PTTYPE 3610#undef PTTYPE
3606 3611
3607static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, 3612static void
3608 struct kvm_mmu *context) 3613__reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
3614 struct rsvd_bits_validate *rsvd_check,
3615 int maxphyaddr, int level, bool nx, bool gbpages,
3616 bool pse)
3609{ 3617{
3610 int maxphyaddr = cpuid_maxphyaddr(vcpu);
3611 u64 exb_bit_rsvd = 0; 3618 u64 exb_bit_rsvd = 0;
3612 u64 gbpages_bit_rsvd = 0; 3619 u64 gbpages_bit_rsvd = 0;
3613 u64 nonleaf_bit8_rsvd = 0; 3620 u64 nonleaf_bit8_rsvd = 0;
3614 3621
3615 context->bad_mt_xwr = 0; 3622 rsvd_check->bad_mt_xwr = 0;
3616 3623
3617 if (!context->nx) 3624 if (!nx)
3618 exb_bit_rsvd = rsvd_bits(63, 63); 3625 exb_bit_rsvd = rsvd_bits(63, 63);
3619 if (!guest_cpuid_has_gbpages(vcpu)) 3626 if (!gbpages)
3620 gbpages_bit_rsvd = rsvd_bits(7, 7); 3627 gbpages_bit_rsvd = rsvd_bits(7, 7);
3621 3628
3622 /* 3629 /*
@@ -3626,80 +3633,95 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
3626 if (guest_cpuid_is_amd(vcpu)) 3633 if (guest_cpuid_is_amd(vcpu))
3627 nonleaf_bit8_rsvd = rsvd_bits(8, 8); 3634 nonleaf_bit8_rsvd = rsvd_bits(8, 8);
3628 3635
3629 switch (context->root_level) { 3636 switch (level) {
3630 case PT32_ROOT_LEVEL: 3637 case PT32_ROOT_LEVEL:
3631 /* no rsvd bits for 2 level 4K page table entries */ 3638 /* no rsvd bits for 2 level 4K page table entries */
3632 context->rsvd_bits_mask[0][1] = 0; 3639 rsvd_check->rsvd_bits_mask[0][1] = 0;
3633 context->rsvd_bits_mask[0][0] = 0; 3640 rsvd_check->rsvd_bits_mask[0][0] = 0;
3634 context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; 3641 rsvd_check->rsvd_bits_mask[1][0] =
3642 rsvd_check->rsvd_bits_mask[0][0];
3635 3643
3636 if (!is_pse(vcpu)) { 3644 if (!pse) {
3637 context->rsvd_bits_mask[1][1] = 0; 3645 rsvd_check->rsvd_bits_mask[1][1] = 0;
3638 break; 3646 break;
3639 } 3647 }
3640 3648
3641 if (is_cpuid_PSE36()) 3649 if (is_cpuid_PSE36())
3642 /* 36bits PSE 4MB page */ 3650 /* 36bits PSE 4MB page */
3643 context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21); 3651 rsvd_check->rsvd_bits_mask[1][1] = rsvd_bits(17, 21);
3644 else 3652 else
3645 /* 32 bits PSE 4MB page */ 3653 /* 32 bits PSE 4MB page */
3646 context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); 3654 rsvd_check->rsvd_bits_mask[1][1] = rsvd_bits(13, 21);
3647 break; 3655 break;
3648 case PT32E_ROOT_LEVEL: 3656 case PT32E_ROOT_LEVEL:
3649 context->rsvd_bits_mask[0][2] = 3657 rsvd_check->rsvd_bits_mask[0][2] =
3650 rsvd_bits(maxphyaddr, 63) | 3658 rsvd_bits(maxphyaddr, 63) |
3651 rsvd_bits(5, 8) | rsvd_bits(1, 2); /* PDPTE */ 3659 rsvd_bits(5, 8) | rsvd_bits(1, 2); /* PDPTE */
3652 context->rsvd_bits_mask[0][1] = exb_bit_rsvd | 3660 rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd |
3653 rsvd_bits(maxphyaddr, 62); /* PDE */ 3661 rsvd_bits(maxphyaddr, 62); /* PDE */
3654 context->rsvd_bits_mask[0][0] = exb_bit_rsvd | 3662 rsvd_check->rsvd_bits_mask[0][0] = exb_bit_rsvd |
3655 rsvd_bits(maxphyaddr, 62); /* PTE */ 3663 rsvd_bits(maxphyaddr, 62); /* PTE */
3656 context->rsvd_bits_mask[1][1] = exb_bit_rsvd | 3664 rsvd_check->rsvd_bits_mask[1][1] = exb_bit_rsvd |
3657 rsvd_bits(maxphyaddr, 62) | 3665 rsvd_bits(maxphyaddr, 62) |
3658 rsvd_bits(13, 20); /* large page */ 3666 rsvd_bits(13, 20); /* large page */
3659 context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; 3667 rsvd_check->rsvd_bits_mask[1][0] =
3668 rsvd_check->rsvd_bits_mask[0][0];
3660 break; 3669 break;
3661 case PT64_ROOT_LEVEL: 3670 case PT64_ROOT_LEVEL:
3662 context->rsvd_bits_mask[0][3] = exb_bit_rsvd | 3671 rsvd_check->rsvd_bits_mask[0][3] = exb_bit_rsvd |
3663 nonleaf_bit8_rsvd | rsvd_bits(7, 7) | rsvd_bits(maxphyaddr, 51); 3672 nonleaf_bit8_rsvd | rsvd_bits(7, 7) |
3664 context->rsvd_bits_mask[0][2] = exb_bit_rsvd | 3673 rsvd_bits(maxphyaddr, 51);
3665 nonleaf_bit8_rsvd | gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51); 3674 rsvd_check->rsvd_bits_mask[0][2] = exb_bit_rsvd |
3666 context->rsvd_bits_mask[0][1] = exb_bit_rsvd | 3675 nonleaf_bit8_rsvd | gbpages_bit_rsvd |
3667 rsvd_bits(maxphyaddr, 51); 3676 rsvd_bits(maxphyaddr, 51);
3668 context->rsvd_bits_mask[0][0] = exb_bit_rsvd | 3677 rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd |
3669 rsvd_bits(maxphyaddr, 51); 3678 rsvd_bits(maxphyaddr, 51);
3670 context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3]; 3679 rsvd_check->rsvd_bits_mask[0][0] = exb_bit_rsvd |
3671 context->rsvd_bits_mask[1][2] = exb_bit_rsvd | 3680 rsvd_bits(maxphyaddr, 51);
3681 rsvd_check->rsvd_bits_mask[1][3] =
3682 rsvd_check->rsvd_bits_mask[0][3];
3683 rsvd_check->rsvd_bits_mask[1][2] = exb_bit_rsvd |
3672 gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51) | 3684 gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51) |
3673 rsvd_bits(13, 29); 3685 rsvd_bits(13, 29);
3674 context->rsvd_bits_mask[1][1] = exb_bit_rsvd | 3686 rsvd_check->rsvd_bits_mask[1][1] = exb_bit_rsvd |
3675 rsvd_bits(maxphyaddr, 51) | 3687 rsvd_bits(maxphyaddr, 51) |
3676 rsvd_bits(13, 20); /* large page */ 3688 rsvd_bits(13, 20); /* large page */
3677 context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; 3689 rsvd_check->rsvd_bits_mask[1][0] =
3690 rsvd_check->rsvd_bits_mask[0][0];
3678 break; 3691 break;
3679 } 3692 }
3680} 3693}
3681 3694
3682static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, 3695static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
3683 struct kvm_mmu *context, bool execonly) 3696 struct kvm_mmu *context)
3697{
3698 __reset_rsvds_bits_mask(vcpu, &context->guest_rsvd_check,
3699 cpuid_maxphyaddr(vcpu), context->root_level,
3700 context->nx, guest_cpuid_has_gbpages(vcpu),
3701 is_pse(vcpu));
3702}
3703
3704static void
3705__reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check,
3706 int maxphyaddr, bool execonly)
3684{ 3707{
3685 int maxphyaddr = cpuid_maxphyaddr(vcpu);
3686 int pte; 3708 int pte;
3687 3709
3688 context->rsvd_bits_mask[0][3] = 3710 rsvd_check->rsvd_bits_mask[0][3] =
3689 rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7); 3711 rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7);
3690 context->rsvd_bits_mask[0][2] = 3712 rsvd_check->rsvd_bits_mask[0][2] =
3691 rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6); 3713 rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6);
3692 context->rsvd_bits_mask[0][1] = 3714 rsvd_check->rsvd_bits_mask[0][1] =
3693 rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6); 3715 rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6);
3694 context->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51); 3716 rsvd_check->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);
3695 3717
3696 /* large page */ 3718 /* large page */
3697 context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3]; 3719 rsvd_check->rsvd_bits_mask[1][3] = rsvd_check->rsvd_bits_mask[0][3];
3698 context->rsvd_bits_mask[1][2] = 3720 rsvd_check->rsvd_bits_mask[1][2] =
3699 rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 29); 3721 rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 29);
3700 context->rsvd_bits_mask[1][1] = 3722 rsvd_check->rsvd_bits_mask[1][1] =
3701 rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 20); 3723 rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 20);
3702 context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; 3724 rsvd_check->rsvd_bits_mask[1][0] = rsvd_check->rsvd_bits_mask[0][0];
3703 3725
3704 for (pte = 0; pte < 64; pte++) { 3726 for (pte = 0; pte < 64; pte++) {
3705 int rwx_bits = pte & 7; 3727 int rwx_bits = pte & 7;
@@ -3707,10 +3729,64 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
3707 if (mt == 0x2 || mt == 0x3 || mt == 0x7 || 3729 if (mt == 0x2 || mt == 0x3 || mt == 0x7 ||
3708 rwx_bits == 0x2 || rwx_bits == 0x6 || 3730 rwx_bits == 0x2 || rwx_bits == 0x6 ||
3709 (rwx_bits == 0x4 && !execonly)) 3731 (rwx_bits == 0x4 && !execonly))
3710 context->bad_mt_xwr |= (1ull << pte); 3732 rsvd_check->bad_mt_xwr |= (1ull << pte);
3711 } 3733 }
3712} 3734}
3713 3735
3736static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
3737 struct kvm_mmu *context, bool execonly)
3738{
3739 __reset_rsvds_bits_mask_ept(&context->guest_rsvd_check,
3740 cpuid_maxphyaddr(vcpu), execonly);
3741}
3742
3743/*
3744 * the page table on host is the shadow page table for the page
3745 * table in guest or amd nested guest, its mmu features completely
3746 * follow the features in guest.
3747 */
3748void
3749reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
3750{
3751 __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check,
3752 boot_cpu_data.x86_phys_bits,
3753 context->shadow_root_level, context->nx,
3754 guest_cpuid_has_gbpages(vcpu), is_pse(vcpu));
3755}
3756EXPORT_SYMBOL_GPL(reset_shadow_zero_bits_mask);
3757
3758/*
3759 * the direct page table on host, use as much mmu features as
3760 * possible, however, kvm currently does not do execution-protection.
3761 */
3762static void
3763reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
3764 struct kvm_mmu *context)
3765{
3766 if (guest_cpuid_is_amd(vcpu))
3767 __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check,
3768 boot_cpu_data.x86_phys_bits,
3769 context->shadow_root_level, false,
3770 cpu_has_gbpages, true);
3771 else
3772 __reset_rsvds_bits_mask_ept(&context->shadow_zero_check,
3773 boot_cpu_data.x86_phys_bits,
3774 false);
3775
3776}
3777
3778/*
3779 * as the comments in reset_shadow_zero_bits_mask() except it
3780 * is the shadow page table for intel nested guest.
3781 */
3782static void
3783reset_ept_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
3784 struct kvm_mmu *context, bool execonly)
3785{
3786 __reset_rsvds_bits_mask_ept(&context->shadow_zero_check,
3787 boot_cpu_data.x86_phys_bits, execonly);
3788}
3789
3714static void update_permission_bitmask(struct kvm_vcpu *vcpu, 3790static void update_permission_bitmask(struct kvm_vcpu *vcpu,
3715 struct kvm_mmu *mmu, bool ept) 3791 struct kvm_mmu *mmu, bool ept)
3716{ 3792{
@@ -3889,6 +3965,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
3889 3965
3890 update_permission_bitmask(vcpu, context, false); 3966 update_permission_bitmask(vcpu, context, false);
3891 update_last_pte_bitmap(vcpu, context); 3967 update_last_pte_bitmap(vcpu, context);
3968 reset_tdp_shadow_zero_bits_mask(vcpu, context);
3892} 3969}
3893 3970
3894void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu) 3971void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
@@ -3916,6 +3993,7 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
3916 context->base_role.smap_andnot_wp 3993 context->base_role.smap_andnot_wp
3917 = smap && !is_write_protection(vcpu); 3994 = smap && !is_write_protection(vcpu);
3918 context->base_role.smm = is_smm(vcpu); 3995 context->base_role.smm = is_smm(vcpu);
3996 reset_shadow_zero_bits_mask(vcpu, context);
3919} 3997}
3920EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); 3998EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
3921 3999
@@ -3939,6 +4017,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly)
3939 4017
3940 update_permission_bitmask(vcpu, context, true); 4018 update_permission_bitmask(vcpu, context, true);
3941 reset_rsvds_bits_mask_ept(vcpu, context, execonly); 4019 reset_rsvds_bits_mask_ept(vcpu, context, execonly);
4020 reset_ept_shadow_zero_bits_mask(vcpu, context, execonly);
3942} 4021}
3943EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu); 4022EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu);
3944 4023
@@ -4860,28 +4939,6 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
4860 return nr_mmu_pages; 4939 return nr_mmu_pages;
4861} 4940}
4862 4941
4863int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4])
4864{
4865 struct kvm_shadow_walk_iterator iterator;
4866 u64 spte;
4867 int nr_sptes = 0;
4868
4869 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
4870 return nr_sptes;
4871
4872 walk_shadow_page_lockless_begin(vcpu);
4873 for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) {
4874 sptes[iterator.level-1] = spte;
4875 nr_sptes++;
4876 if (!is_shadow_present_pte(spte))
4877 break;
4878 }
4879 walk_shadow_page_lockless_end(vcpu);
4880
4881 return nr_sptes;
4882}
4883EXPORT_SYMBOL_GPL(kvm_mmu_get_spte_hierarchy);
4884
4885void kvm_mmu_destroy(struct kvm_vcpu *vcpu) 4942void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
4886{ 4943{
4887 kvm_mmu_unload(vcpu); 4944 kvm_mmu_unload(vcpu);
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 398d21c0f6dd..e4202e41d535 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -50,9 +50,11 @@ static inline u64 rsvd_bits(int s, int e)
50 return ((1ULL << (e - s + 1)) - 1) << s; 50 return ((1ULL << (e - s + 1)) - 1) << s;
51} 51}
52 52
53int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]);
54void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask); 53void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask);
55 54
55void
56reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
57
56/* 58/*
57 * Return values of handle_mmio_page_fault_common: 59 * Return values of handle_mmio_page_fault_common:
58 * RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction 60 * RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 0f67d7e24800..736e6ab8784d 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -128,14 +128,6 @@ static inline void FNAME(protect_clean_gpte)(unsigned *access, unsigned gpte)
128 *access &= mask; 128 *access &= mask;
129} 129}
130 130
131static bool FNAME(is_rsvd_bits_set)(struct kvm_mmu *mmu, u64 gpte, int level)
132{
133 int bit7 = (gpte >> 7) & 1, low6 = gpte & 0x3f;
134
135 return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) |
136 ((mmu->bad_mt_xwr & (1ull << low6)) != 0);
137}
138
139static inline int FNAME(is_present_gpte)(unsigned long pte) 131static inline int FNAME(is_present_gpte)(unsigned long pte)
140{ 132{
141#if PTTYPE != PTTYPE_EPT 133#if PTTYPE != PTTYPE_EPT
@@ -172,7 +164,7 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
172 struct kvm_mmu_page *sp, u64 *spte, 164 struct kvm_mmu_page *sp, u64 *spte,
173 u64 gpte) 165 u64 gpte)
174{ 166{
175 if (FNAME(is_rsvd_bits_set)(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL)) 167 if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
176 goto no_present; 168 goto no_present;
177 169
178 if (!FNAME(is_present_gpte)(gpte)) 170 if (!FNAME(is_present_gpte)(gpte))
@@ -353,8 +345,7 @@ retry_walk:
353 if (unlikely(!FNAME(is_present_gpte)(pte))) 345 if (unlikely(!FNAME(is_present_gpte)(pte)))
354 goto error; 346 goto error;
355 347
356 if (unlikely(FNAME(is_rsvd_bits_set)(mmu, pte, 348 if (unlikely(is_rsvd_bits_set(mmu, pte, walker->level))) {
357 walker->level))) {
358 errcode |= PFERR_RSVD_MASK | PFERR_PRESENT_MASK; 349 errcode |= PFERR_RSVD_MASK | PFERR_PRESENT_MASK;
359 goto error; 350 goto error;
360 } 351 }
diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c
index 886aa25a7131..39b91127ef07 100644
--- a/arch/x86/kvm/pmu_amd.c
+++ b/arch/x86/kvm/pmu_amd.c
@@ -133,8 +133,6 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
133 /* MSR_K7_PERFCTRn */ 133 /* MSR_K7_PERFCTRn */
134 pmc = get_gp_pmc(pmu, msr, MSR_K7_PERFCTR0); 134 pmc = get_gp_pmc(pmu, msr, MSR_K7_PERFCTR0);
135 if (pmc) { 135 if (pmc) {
136 if (!msr_info->host_initiated)
137 data = (s64)data;
138 pmc->counter += data - pmc_read_counter(pmc); 136 pmc->counter += data - pmc_read_counter(pmc);
139 return 0; 137 return 0;
140 } 138 }
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 8e0c0844c6b9..74d825716f4f 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1173,6 +1173,10 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
1173 if (!is_mmio && !kvm_arch_has_assigned_device(vcpu->kvm)) 1173 if (!is_mmio && !kvm_arch_has_assigned_device(vcpu->kvm))
1174 return 0; 1174 return 0;
1175 1175
1176 if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED) &&
1177 kvm_read_cr0(vcpu) & X86_CR0_CD)
1178 return _PAGE_NOCACHE;
1179
1176 mtrr = kvm_mtrr_get_guest_memory_type(vcpu, gfn); 1180 mtrr = kvm_mtrr_get_guest_memory_type(vcpu, gfn);
1177 return mtrr2protval[mtrr]; 1181 return mtrr2protval[mtrr];
1178} 1182}
@@ -1667,13 +1671,10 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1667 1671
1668 if (!vcpu->fpu_active) 1672 if (!vcpu->fpu_active)
1669 cr0 |= X86_CR0_TS; 1673 cr0 |= X86_CR0_TS;
1670 /* 1674
1671 * re-enable caching here because the QEMU bios 1675 /* These are emulated via page tables. */
1672 * does not do it - this results in some delay at 1676 cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
1673 * reboot 1677
1674 */
1675 if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
1676 cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
1677 svm->vmcb->save.cr0 = cr0; 1678 svm->vmcb->save.cr0 = cr0;
1678 mark_dirty(svm->vmcb, VMCB_CR); 1679 mark_dirty(svm->vmcb, VMCB_CR);
1679 update_cr0_intercept(svm); 1680 update_cr0_intercept(svm);
@@ -2106,6 +2107,7 @@ static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
2106 vcpu->arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr; 2107 vcpu->arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr;
2107 vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit; 2108 vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
2108 vcpu->arch.mmu.shadow_root_level = get_npt_level(); 2109 vcpu->arch.mmu.shadow_root_level = get_npt_level();
2110 reset_shadow_zero_bits_mask(vcpu, &vcpu->arch.mmu);
2109 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; 2111 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
2110} 2112}
2111 2113
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 83b7b5cd75d5..da1590ea43fc 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2443,10 +2443,10 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
2443 CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING | 2443 CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING |
2444#endif 2444#endif
2445 CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING | 2445 CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
2446 CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING | 2446 CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_TRAP_FLAG |
2447 CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING | 2447 CPU_BASED_MONITOR_EXITING | CPU_BASED_RDPMC_EXITING |
2448 CPU_BASED_PAUSE_EXITING | CPU_BASED_TPR_SHADOW | 2448 CPU_BASED_RDTSC_EXITING | CPU_BASED_PAUSE_EXITING |
2449 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; 2449 CPU_BASED_TPR_SHADOW | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
2450 /* 2450 /*
2451 * We can allow some features even when not supported by the 2451 * We can allow some features even when not supported by the
2452 * hardware. For example, L1 can specify an MSR bitmap - and we 2452 * hardware. For example, L1 can specify an MSR bitmap - and we
@@ -3423,12 +3423,12 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
3423 vmx_segment_cache_clear(to_vmx(vcpu)); 3423 vmx_segment_cache_clear(to_vmx(vcpu));
3424 3424
3425 guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES); 3425 guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
3426 if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) { 3426 if ((guest_tr_ar & VMX_AR_TYPE_MASK) != VMX_AR_TYPE_BUSY_64_TSS) {
3427 pr_debug_ratelimited("%s: tss fixup for long mode. \n", 3427 pr_debug_ratelimited("%s: tss fixup for long mode. \n",
3428 __func__); 3428 __func__);
3429 vmcs_write32(GUEST_TR_AR_BYTES, 3429 vmcs_write32(GUEST_TR_AR_BYTES,
3430 (guest_tr_ar & ~AR_TYPE_MASK) 3430 (guest_tr_ar & ~VMX_AR_TYPE_MASK)
3431 | AR_TYPE_BUSY_64_TSS); 3431 | VMX_AR_TYPE_BUSY_64_TSS);
3432 } 3432 }
3433 vmx_set_efer(vcpu, vcpu->arch.efer | EFER_LMA); 3433 vmx_set_efer(vcpu, vcpu->arch.efer | EFER_LMA);
3434} 3434}
@@ -3719,7 +3719,7 @@ static int vmx_get_cpl(struct kvm_vcpu *vcpu)
3719 return 0; 3719 return 0;
3720 else { 3720 else {
3721 int ar = vmx_read_guest_seg_ar(vmx, VCPU_SREG_SS); 3721 int ar = vmx_read_guest_seg_ar(vmx, VCPU_SREG_SS);
3722 return AR_DPL(ar); 3722 return VMX_AR_DPL(ar);
3723 } 3723 }
3724} 3724}
3725 3725
@@ -3847,11 +3847,11 @@ static bool code_segment_valid(struct kvm_vcpu *vcpu)
3847 3847
3848 if (cs.unusable) 3848 if (cs.unusable)
3849 return false; 3849 return false;
3850 if (~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_ACCESSES_MASK)) 3850 if (~cs.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_ACCESSES_MASK))
3851 return false; 3851 return false;
3852 if (!cs.s) 3852 if (!cs.s)
3853 return false; 3853 return false;
3854 if (cs.type & AR_TYPE_WRITEABLE_MASK) { 3854 if (cs.type & VMX_AR_TYPE_WRITEABLE_MASK) {
3855 if (cs.dpl > cs_rpl) 3855 if (cs.dpl > cs_rpl)
3856 return false; 3856 return false;
3857 } else { 3857 } else {
@@ -3901,7 +3901,7 @@ static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg)
3901 return false; 3901 return false;
3902 if (!var.present) 3902 if (!var.present)
3903 return false; 3903 return false;
3904 if (~var.type & (AR_TYPE_CODE_MASK|AR_TYPE_WRITEABLE_MASK)) { 3904 if (~var.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_WRITEABLE_MASK)) {
3905 if (var.dpl < rpl) /* DPL < RPL */ 3905 if (var.dpl < rpl) /* DPL < RPL */
3906 return false; 3906 return false;
3907 } 3907 }
@@ -5759,73 +5759,9 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
5759 return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0); 5759 return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
5760} 5760}
5761 5761
5762static u64 ept_rsvd_mask(u64 spte, int level)
5763{
5764 int i;
5765 u64 mask = 0;
5766
5767 for (i = 51; i > boot_cpu_data.x86_phys_bits; i--)
5768 mask |= (1ULL << i);
5769
5770 if (level == 4)
5771 /* bits 7:3 reserved */
5772 mask |= 0xf8;
5773 else if (spte & (1ULL << 7))
5774 /*
5775 * 1GB/2MB page, bits 29:12 or 20:12 reserved respectively,
5776 * level == 1 if the hypervisor is using the ignored bit 7.
5777 */
5778 mask |= (PAGE_SIZE << ((level - 1) * 9)) - PAGE_SIZE;
5779 else if (level > 1)
5780 /* bits 6:3 reserved */
5781 mask |= 0x78;
5782
5783 return mask;
5784}
5785
5786static void ept_misconfig_inspect_spte(struct kvm_vcpu *vcpu, u64 spte,
5787 int level)
5788{
5789 printk(KERN_ERR "%s: spte 0x%llx level %d\n", __func__, spte, level);
5790
5791 /* 010b (write-only) */
5792 WARN_ON((spte & 0x7) == 0x2);
5793
5794 /* 110b (write/execute) */
5795 WARN_ON((spte & 0x7) == 0x6);
5796
5797 /* 100b (execute-only) and value not supported by logical processor */
5798 if (!cpu_has_vmx_ept_execute_only())
5799 WARN_ON((spte & 0x7) == 0x4);
5800
5801 /* not 000b */
5802 if ((spte & 0x7)) {
5803 u64 rsvd_bits = spte & ept_rsvd_mask(spte, level);
5804
5805 if (rsvd_bits != 0) {
5806 printk(KERN_ERR "%s: rsvd_bits = 0x%llx\n",
5807 __func__, rsvd_bits);
5808 WARN_ON(1);
5809 }
5810
5811 /* bits 5:3 are _not_ reserved for large page or leaf page */
5812 if ((rsvd_bits & 0x38) == 0) {
5813 u64 ept_mem_type = (spte & 0x38) >> 3;
5814
5815 if (ept_mem_type == 2 || ept_mem_type == 3 ||
5816 ept_mem_type == 7) {
5817 printk(KERN_ERR "%s: ept_mem_type=0x%llx\n",
5818 __func__, ept_mem_type);
5819 WARN_ON(1);
5820 }
5821 }
5822 }
5823}
5824
5825static int handle_ept_misconfig(struct kvm_vcpu *vcpu) 5762static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
5826{ 5763{
5827 u64 sptes[4]; 5764 int ret;
5828 int nr_sptes, i, ret;
5829 gpa_t gpa; 5765 gpa_t gpa;
5830 5766
5831 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); 5767 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
@@ -5846,13 +5782,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
5846 return 1; 5782 return 1;
5847 5783
5848 /* It is the real ept misconfig */ 5784 /* It is the real ept misconfig */
5849 printk(KERN_ERR "EPT: Misconfiguration.\n"); 5785 WARN_ON(1);
5850 printk(KERN_ERR "EPT: GPA: 0x%llx\n", gpa);
5851
5852 nr_sptes = kvm_mmu_get_spte_hierarchy(vcpu, gpa, sptes);
5853
5854 for (i = PT64_ROOT_LEVEL; i > PT64_ROOT_LEVEL - nr_sptes; --i)
5855 ept_misconfig_inspect_spte(vcpu, sptes[i-1], i);
5856 5786
5857 vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; 5787 vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
5858 vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG; 5788 vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG;
@@ -6246,6 +6176,11 @@ static int handle_mwait(struct kvm_vcpu *vcpu)
6246 return handle_nop(vcpu); 6176 return handle_nop(vcpu);
6247} 6177}
6248 6178
6179static int handle_monitor_trap(struct kvm_vcpu *vcpu)
6180{
6181 return 1;
6182}
6183
6249static int handle_monitor(struct kvm_vcpu *vcpu) 6184static int handle_monitor(struct kvm_vcpu *vcpu)
6250{ 6185{
6251 printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n"); 6186 printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
@@ -6408,8 +6343,12 @@ static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
6408 */ 6343 */
6409static int get_vmx_mem_address(struct kvm_vcpu *vcpu, 6344static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
6410 unsigned long exit_qualification, 6345 unsigned long exit_qualification,
6411 u32 vmx_instruction_info, gva_t *ret) 6346 u32 vmx_instruction_info, bool wr, gva_t *ret)
6412{ 6347{
6348 gva_t off;
6349 bool exn;
6350 struct kvm_segment s;
6351
6413 /* 6352 /*
6414 * According to Vol. 3B, "Information for VM Exits Due to Instruction 6353 * According to Vol. 3B, "Information for VM Exits Due to Instruction
6415 * Execution", on an exit, vmx_instruction_info holds most of the 6354 * Execution", on an exit, vmx_instruction_info holds most of the
@@ -6434,22 +6373,63 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
6434 6373
6435 /* Addr = segment_base + offset */ 6374 /* Addr = segment_base + offset */
6436 /* offset = base + [index * scale] + displacement */ 6375 /* offset = base + [index * scale] + displacement */
6437 *ret = vmx_get_segment_base(vcpu, seg_reg); 6376 off = exit_qualification; /* holds the displacement */
6438 if (base_is_valid) 6377 if (base_is_valid)
6439 *ret += kvm_register_read(vcpu, base_reg); 6378 off += kvm_register_read(vcpu, base_reg);
6440 if (index_is_valid) 6379 if (index_is_valid)
6441 *ret += kvm_register_read(vcpu, index_reg)<<scaling; 6380 off += kvm_register_read(vcpu, index_reg)<<scaling;
6442 *ret += exit_qualification; /* holds the displacement */ 6381 vmx_get_segment(vcpu, &s, seg_reg);
6382 *ret = s.base + off;
6443 6383
6444 if (addr_size == 1) /* 32 bit */ 6384 if (addr_size == 1) /* 32 bit */
6445 *ret &= 0xffffffff; 6385 *ret &= 0xffffffff;
6446 6386
6447 /* 6387 /* Checks for #GP/#SS exceptions. */
6448 * TODO: throw #GP (and return 1) in various cases that the VM* 6388 exn = false;
6449 * instructions require it - e.g., offset beyond segment limit, 6389 if (is_protmode(vcpu)) {
6450 * unusable or unreadable/unwritable segment, non-canonical 64-bit 6390 /* Protected mode: apply checks for segment validity in the
6451 * address, and so on. Currently these are not checked. 6391 * following order:
6452 */ 6392 * - segment type check (#GP(0) may be thrown)
6393 * - usability check (#GP(0)/#SS(0))
6394 * - limit check (#GP(0)/#SS(0))
6395 */
6396 if (wr)
6397 /* #GP(0) if the destination operand is located in a
6398 * read-only data segment or any code segment.
6399 */
6400 exn = ((s.type & 0xa) == 0 || (s.type & 8));
6401 else
6402 /* #GP(0) if the source operand is located in an
6403 * execute-only code segment
6404 */
6405 exn = ((s.type & 0xa) == 8);
6406 }
6407 if (exn) {
6408 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
6409 return 1;
6410 }
6411 if (is_long_mode(vcpu)) {
6412 /* Long mode: #GP(0)/#SS(0) if the memory address is in a
6413 * non-canonical form. This is an only check for long mode.
6414 */
6415 exn = is_noncanonical_address(*ret);
6416 } else if (is_protmode(vcpu)) {
6417 /* Protected mode: #GP(0)/#SS(0) if the segment is unusable.
6418 */
6419 exn = (s.unusable != 0);
6420 /* Protected mode: #GP(0)/#SS(0) if the memory
6421 * operand is outside the segment limit.
6422 */
6423 exn = exn || (off + sizeof(u64) > s.limit);
6424 }
6425 if (exn) {
6426 kvm_queue_exception_e(vcpu,
6427 seg_reg == VCPU_SREG_SS ?
6428 SS_VECTOR : GP_VECTOR,
6429 0);
6430 return 1;
6431 }
6432
6453 return 0; 6433 return 0;
6454} 6434}
6455 6435
@@ -6471,7 +6451,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
6471 int maxphyaddr = cpuid_maxphyaddr(vcpu); 6451 int maxphyaddr = cpuid_maxphyaddr(vcpu);
6472 6452
6473 if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), 6453 if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
6474 vmcs_read32(VMX_INSTRUCTION_INFO), &gva)) 6454 vmcs_read32(VMX_INSTRUCTION_INFO), false, &gva))
6475 return 1; 6455 return 1;
6476 6456
6477 if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vmptr, 6457 if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vmptr,
@@ -6999,7 +6979,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
6999 field_value); 6979 field_value);
7000 } else { 6980 } else {
7001 if (get_vmx_mem_address(vcpu, exit_qualification, 6981 if (get_vmx_mem_address(vcpu, exit_qualification,
7002 vmx_instruction_info, &gva)) 6982 vmx_instruction_info, true, &gva))
7003 return 1; 6983 return 1;
7004 /* _system ok, as nested_vmx_check_permission verified cpl=0 */ 6984 /* _system ok, as nested_vmx_check_permission verified cpl=0 */
7005 kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, gva, 6985 kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, gva,
@@ -7036,7 +7016,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
7036 (((vmx_instruction_info) >> 3) & 0xf)); 7016 (((vmx_instruction_info) >> 3) & 0xf));
7037 else { 7017 else {
7038 if (get_vmx_mem_address(vcpu, exit_qualification, 7018 if (get_vmx_mem_address(vcpu, exit_qualification,
7039 vmx_instruction_info, &gva)) 7019 vmx_instruction_info, false, &gva))
7040 return 1; 7020 return 1;
7041 if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, 7021 if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva,
7042 &field_value, (is_64_bit_mode(vcpu) ? 8 : 4), &e)) { 7022 &field_value, (is_64_bit_mode(vcpu) ? 8 : 4), &e)) {
@@ -7128,7 +7108,7 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
7128 return 1; 7108 return 1;
7129 7109
7130 if (get_vmx_mem_address(vcpu, exit_qualification, 7110 if (get_vmx_mem_address(vcpu, exit_qualification,
7131 vmx_instruction_info, &vmcs_gva)) 7111 vmx_instruction_info, true, &vmcs_gva))
7132 return 1; 7112 return 1;
7133 /* ok to use *_system, as nested_vmx_check_permission verified cpl=0 */ 7113 /* ok to use *_system, as nested_vmx_check_permission verified cpl=0 */
7134 if (kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, vmcs_gva, 7114 if (kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, vmcs_gva,
@@ -7184,7 +7164,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
7184 * operand is read even if it isn't needed (e.g., for type==global) 7164 * operand is read even if it isn't needed (e.g., for type==global)
7185 */ 7165 */
7186 if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), 7166 if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
7187 vmx_instruction_info, &gva)) 7167 vmx_instruction_info, false, &gva))
7188 return 1; 7168 return 1;
7189 if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand, 7169 if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand,
7190 sizeof(operand), &e)) { 7170 sizeof(operand), &e)) {
@@ -7282,6 +7262,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
7282 [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig, 7262 [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig,
7283 [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause, 7263 [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause,
7284 [EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait, 7264 [EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait,
7265 [EXIT_REASON_MONITOR_TRAP_FLAG] = handle_monitor_trap,
7285 [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor, 7266 [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor,
7286 [EXIT_REASON_INVEPT] = handle_invept, 7267 [EXIT_REASON_INVEPT] = handle_invept,
7287 [EXIT_REASON_INVVPID] = handle_invvpid, 7268 [EXIT_REASON_INVVPID] = handle_invvpid,
@@ -7542,6 +7523,8 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
7542 return true; 7523 return true;
7543 case EXIT_REASON_MWAIT_INSTRUCTION: 7524 case EXIT_REASON_MWAIT_INSTRUCTION:
7544 return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING); 7525 return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING);
7526 case EXIT_REASON_MONITOR_TRAP_FLAG:
7527 return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_TRAP_FLAG);
7545 case EXIT_REASON_MONITOR_INSTRUCTION: 7528 case EXIT_REASON_MONITOR_INSTRUCTION:
7546 return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_EXITING); 7529 return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_EXITING);
7547 case EXIT_REASON_PAUSE_INSTRUCTION: 7530 case EXIT_REASON_PAUSE_INSTRUCTION:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8f0f6eca69da..4bbc2a1676c9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -29,6 +29,7 @@
29#include "cpuid.h" 29#include "cpuid.h"
30#include "assigned-dev.h" 30#include "assigned-dev.h"
31#include "pmu.h" 31#include "pmu.h"
32#include "hyperv.h"
32 33
33#include <linux/clocksource.h> 34#include <linux/clocksource.h>
34#include <linux/interrupt.h> 35#include <linux/interrupt.h>
@@ -221,11 +222,9 @@ static void shared_msr_update(unsigned slot, u32 msr)
221void kvm_define_shared_msr(unsigned slot, u32 msr) 222void kvm_define_shared_msr(unsigned slot, u32 msr)
222{ 223{
223 BUG_ON(slot >= KVM_NR_SHARED_MSRS); 224 BUG_ON(slot >= KVM_NR_SHARED_MSRS);
225 shared_msrs_global.msrs[slot] = msr;
224 if (slot >= shared_msrs_global.nr) 226 if (slot >= shared_msrs_global.nr)
225 shared_msrs_global.nr = slot + 1; 227 shared_msrs_global.nr = slot + 1;
226 shared_msrs_global.msrs[slot] = msr;
227 /* we need ensured the shared_msr_global have been updated */
228 smp_wmb();
229} 228}
230EXPORT_SYMBOL_GPL(kvm_define_shared_msr); 229EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
231 230
@@ -526,7 +525,8 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
526 } 525 }
527 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) { 526 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
528 if (is_present_gpte(pdpte[i]) && 527 if (is_present_gpte(pdpte[i]) &&
529 (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) { 528 (pdpte[i] &
529 vcpu->arch.mmu.guest_rsvd_check.rsvd_bits_mask[0][2])) {
530 ret = 0; 530 ret = 0;
531 goto out; 531 goto out;
532 } 532 }
@@ -949,6 +949,8 @@ static u32 emulated_msrs[] = {
949 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, 949 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
950 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, 950 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
951 HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC, 951 HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
952 HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2,
953 HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL,
952 HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, 954 HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
953 MSR_KVM_PV_EOI_EN, 955 MSR_KVM_PV_EOI_EN,
954 956
@@ -1217,11 +1219,6 @@ static void kvm_get_time_scale(uint32_t scaled_khz, uint32_t base_khz,
1217 __func__, base_khz, scaled_khz, shift, *pmultiplier); 1219 __func__, base_khz, scaled_khz, shift, *pmultiplier);
1218} 1220}
1219 1221
1220static inline u64 get_kernel_ns(void)
1221{
1222 return ktime_get_boot_ns();
1223}
1224
1225#ifdef CONFIG_X86_64 1222#ifdef CONFIG_X86_64
1226static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0); 1223static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
1227#endif 1224#endif
@@ -1869,123 +1866,6 @@ out:
1869 return r; 1866 return r;
1870} 1867}
1871 1868
1872static bool kvm_hv_hypercall_enabled(struct kvm *kvm)
1873{
1874 return kvm->arch.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
1875}
1876
1877static bool kvm_hv_msr_partition_wide(u32 msr)
1878{
1879 bool r = false;
1880 switch (msr) {
1881 case HV_X64_MSR_GUEST_OS_ID:
1882 case HV_X64_MSR_HYPERCALL:
1883 case HV_X64_MSR_REFERENCE_TSC:
1884 case HV_X64_MSR_TIME_REF_COUNT:
1885 r = true;
1886 break;
1887 }
1888
1889 return r;
1890}
1891
1892static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1893{
1894 struct kvm *kvm = vcpu->kvm;
1895
1896 switch (msr) {
1897 case HV_X64_MSR_GUEST_OS_ID:
1898 kvm->arch.hv_guest_os_id = data;
1899 /* setting guest os id to zero disables hypercall page */
1900 if (!kvm->arch.hv_guest_os_id)
1901 kvm->arch.hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
1902 break;
1903 case HV_X64_MSR_HYPERCALL: {
1904 u64 gfn;
1905 unsigned long addr;
1906 u8 instructions[4];
1907
1908 /* if guest os id is not set hypercall should remain disabled */
1909 if (!kvm->arch.hv_guest_os_id)
1910 break;
1911 if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
1912 kvm->arch.hv_hypercall = data;
1913 break;
1914 }
1915 gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
1916 addr = gfn_to_hva(kvm, gfn);
1917 if (kvm_is_error_hva(addr))
1918 return 1;
1919 kvm_x86_ops->patch_hypercall(vcpu, instructions);
1920 ((unsigned char *)instructions)[3] = 0xc3; /* ret */
1921 if (__copy_to_user((void __user *)addr, instructions, 4))
1922 return 1;
1923 kvm->arch.hv_hypercall = data;
1924 mark_page_dirty(kvm, gfn);
1925 break;
1926 }
1927 case HV_X64_MSR_REFERENCE_TSC: {
1928 u64 gfn;
1929 HV_REFERENCE_TSC_PAGE tsc_ref;
1930 memset(&tsc_ref, 0, sizeof(tsc_ref));
1931 kvm->arch.hv_tsc_page = data;
1932 if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
1933 break;
1934 gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
1935 if (kvm_write_guest(kvm, gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT,
1936 &tsc_ref, sizeof(tsc_ref)))
1937 return 1;
1938 mark_page_dirty(kvm, gfn);
1939 break;
1940 }
1941 default:
1942 vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
1943 "data 0x%llx\n", msr, data);
1944 return 1;
1945 }
1946 return 0;
1947}
1948
1949static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1950{
1951 switch (msr) {
1952 case HV_X64_MSR_APIC_ASSIST_PAGE: {
1953 u64 gfn;
1954 unsigned long addr;
1955
1956 if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
1957 vcpu->arch.hv_vapic = data;
1958 if (kvm_lapic_enable_pv_eoi(vcpu, 0))
1959 return 1;
1960 break;
1961 }
1962 gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT;
1963 addr = kvm_vcpu_gfn_to_hva(vcpu, gfn);
1964 if (kvm_is_error_hva(addr))
1965 return 1;
1966 if (__clear_user((void __user *)addr, PAGE_SIZE))
1967 return 1;
1968 vcpu->arch.hv_vapic = data;
1969 kvm_vcpu_mark_page_dirty(vcpu, gfn);
1970 if (kvm_lapic_enable_pv_eoi(vcpu, gfn_to_gpa(gfn) | KVM_MSR_ENABLED))
1971 return 1;
1972 break;
1973 }
1974 case HV_X64_MSR_EOI:
1975 return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
1976 case HV_X64_MSR_ICR:
1977 return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
1978 case HV_X64_MSR_TPR:
1979 return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
1980 default:
1981 vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
1982 "data 0x%llx\n", msr, data);
1983 return 1;
1984 }
1985
1986 return 0;
1987}
1988
1989static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) 1869static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
1990{ 1870{
1991 gpa_t gpa = data & ~0x3f; 1871 gpa_t gpa = data & ~0x3f;
@@ -2224,15 +2104,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2224 */ 2104 */
2225 break; 2105 break;
2226 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: 2106 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
2227 if (kvm_hv_msr_partition_wide(msr)) { 2107 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
2228 int r; 2108 case HV_X64_MSR_CRASH_CTL:
2229 mutex_lock(&vcpu->kvm->lock); 2109 return kvm_hv_set_msr_common(vcpu, msr, data,
2230 r = set_msr_hyperv_pw(vcpu, msr, data); 2110 msr_info->host_initiated);
2231 mutex_unlock(&vcpu->kvm->lock);
2232 return r;
2233 } else
2234 return set_msr_hyperv(vcpu, msr, data);
2235 break;
2236 case MSR_IA32_BBL_CR_CTL3: 2111 case MSR_IA32_BBL_CR_CTL3:
2237 /* Drop writes to this legacy MSR -- see rdmsr 2112 /* Drop writes to this legacy MSR -- see rdmsr
2238 * counterpart for further detail. 2113 * counterpart for further detail.
@@ -2315,68 +2190,6 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2315 return 0; 2190 return 0;
2316} 2191}
2317 2192
2318static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2319{
2320 u64 data = 0;
2321 struct kvm *kvm = vcpu->kvm;
2322
2323 switch (msr) {
2324 case HV_X64_MSR_GUEST_OS_ID:
2325 data = kvm->arch.hv_guest_os_id;
2326 break;
2327 case HV_X64_MSR_HYPERCALL:
2328 data = kvm->arch.hv_hypercall;
2329 break;
2330 case HV_X64_MSR_TIME_REF_COUNT: {
2331 data =
2332 div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
2333 break;
2334 }
2335 case HV_X64_MSR_REFERENCE_TSC:
2336 data = kvm->arch.hv_tsc_page;
2337 break;
2338 default:
2339 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
2340 return 1;
2341 }
2342
2343 *pdata = data;
2344 return 0;
2345}
2346
2347static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2348{
2349 u64 data = 0;
2350
2351 switch (msr) {
2352 case HV_X64_MSR_VP_INDEX: {
2353 int r;
2354 struct kvm_vcpu *v;
2355 kvm_for_each_vcpu(r, v, vcpu->kvm) {
2356 if (v == vcpu) {
2357 data = r;
2358 break;
2359 }
2360 }
2361 break;
2362 }
2363 case HV_X64_MSR_EOI:
2364 return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
2365 case HV_X64_MSR_ICR:
2366 return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
2367 case HV_X64_MSR_TPR:
2368 return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
2369 case HV_X64_MSR_APIC_ASSIST_PAGE:
2370 data = vcpu->arch.hv_vapic;
2371 break;
2372 default:
2373 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
2374 return 1;
2375 }
2376 *pdata = data;
2377 return 0;
2378}
2379
2380int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) 2193int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2381{ 2194{
2382 switch (msr_info->index) { 2195 switch (msr_info->index) {
@@ -2493,14 +2306,10 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2493 msr_info->data = 0x20000000; 2306 msr_info->data = 0x20000000;
2494 break; 2307 break;
2495 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: 2308 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
2496 if (kvm_hv_msr_partition_wide(msr_info->index)) { 2309 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
2497 int r; 2310 case HV_X64_MSR_CRASH_CTL:
2498 mutex_lock(&vcpu->kvm->lock); 2311 return kvm_hv_get_msr_common(vcpu,
2499 r = get_msr_hyperv_pw(vcpu, msr_info->index, &msr_info->data); 2312 msr_info->index, &msr_info->data);
2500 mutex_unlock(&vcpu->kvm->lock);
2501 return r;
2502 } else
2503 return get_msr_hyperv(vcpu, msr_info->index, &msr_info->data);
2504 break; 2313 break;
2505 case MSR_IA32_BBL_CR_CTL3: 2314 case MSR_IA32_BBL_CR_CTL3:
2506 /* This legacy MSR exists but isn't fully documented in current 2315 /* This legacy MSR exists but isn't fully documented in current
@@ -2651,6 +2460,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
2651 case KVM_CAP_TSC_DEADLINE_TIMER: 2460 case KVM_CAP_TSC_DEADLINE_TIMER:
2652 case KVM_CAP_ENABLE_CAP_VM: 2461 case KVM_CAP_ENABLE_CAP_VM:
2653 case KVM_CAP_DISABLE_QUIRKS: 2462 case KVM_CAP_DISABLE_QUIRKS:
2463 case KVM_CAP_SET_BOOT_CPU_ID:
2654#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT 2464#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
2655 case KVM_CAP_ASSIGN_DEV_IRQ: 2465 case KVM_CAP_ASSIGN_DEV_IRQ:
2656 case KVM_CAP_PCI_2_3: 2466 case KVM_CAP_PCI_2_3:
@@ -3817,30 +3627,25 @@ long kvm_arch_vm_ioctl(struct file *filp,
3817 r = kvm_ioapic_init(kvm); 3627 r = kvm_ioapic_init(kvm);
3818 if (r) { 3628 if (r) {
3819 mutex_lock(&kvm->slots_lock); 3629 mutex_lock(&kvm->slots_lock);
3820 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, 3630 kvm_destroy_pic(vpic);
3821 &vpic->dev_master);
3822 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
3823 &vpic->dev_slave);
3824 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
3825 &vpic->dev_eclr);
3826 mutex_unlock(&kvm->slots_lock); 3631 mutex_unlock(&kvm->slots_lock);
3827 kfree(vpic);
3828 goto create_irqchip_unlock; 3632 goto create_irqchip_unlock;
3829 } 3633 }
3830 } else 3634 } else
3831 goto create_irqchip_unlock; 3635 goto create_irqchip_unlock;
3832 smp_wmb();
3833 kvm->arch.vpic = vpic;
3834 smp_wmb();
3835 r = kvm_setup_default_irq_routing(kvm); 3636 r = kvm_setup_default_irq_routing(kvm);
3836 if (r) { 3637 if (r) {
3837 mutex_lock(&kvm->slots_lock); 3638 mutex_lock(&kvm->slots_lock);
3838 mutex_lock(&kvm->irq_lock); 3639 mutex_lock(&kvm->irq_lock);
3839 kvm_ioapic_destroy(kvm); 3640 kvm_ioapic_destroy(kvm);
3840 kvm_destroy_pic(kvm); 3641 kvm_destroy_pic(vpic);
3841 mutex_unlock(&kvm->irq_lock); 3642 mutex_unlock(&kvm->irq_lock);
3842 mutex_unlock(&kvm->slots_lock); 3643 mutex_unlock(&kvm->slots_lock);
3644 goto create_irqchip_unlock;
3843 } 3645 }
3646 /* Write kvm->irq_routing before kvm->arch.vpic. */
3647 smp_wmb();
3648 kvm->arch.vpic = vpic;
3844 create_irqchip_unlock: 3649 create_irqchip_unlock:
3845 mutex_unlock(&kvm->lock); 3650 mutex_unlock(&kvm->lock);
3846 break; 3651 break;
@@ -3967,6 +3772,15 @@ long kvm_arch_vm_ioctl(struct file *filp,
3967 r = kvm_vm_ioctl_reinject(kvm, &control); 3772 r = kvm_vm_ioctl_reinject(kvm, &control);
3968 break; 3773 break;
3969 } 3774 }
3775 case KVM_SET_BOOT_CPU_ID:
3776 r = 0;
3777 mutex_lock(&kvm->lock);
3778 if (atomic_read(&kvm->online_vcpus) != 0)
3779 r = -EBUSY;
3780 else
3781 kvm->arch.bsp_vcpu_id = arg;
3782 mutex_unlock(&kvm->lock);
3783 break;
3970 case KVM_XEN_HVM_CONFIG: { 3784 case KVM_XEN_HVM_CONFIG: {
3971 r = -EFAULT; 3785 r = -EFAULT;
3972 if (copy_from_user(&kvm->arch.xen_hvm_config, argp, 3786 if (copy_from_user(&kvm->arch.xen_hvm_config, argp,
@@ -5882,66 +5696,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
5882} 5696}
5883EXPORT_SYMBOL_GPL(kvm_emulate_halt); 5697EXPORT_SYMBOL_GPL(kvm_emulate_halt);
5884 5698
5885int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
5886{
5887 u64 param, ingpa, outgpa, ret;
5888 uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
5889 bool fast, longmode;
5890
5891 /*
5892 * hypercall generates UD from non zero cpl and real mode
5893 * per HYPER-V spec
5894 */
5895 if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
5896 kvm_queue_exception(vcpu, UD_VECTOR);
5897 return 0;
5898 }
5899
5900 longmode = is_64_bit_mode(vcpu);
5901
5902 if (!longmode) {
5903 param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
5904 (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
5905 ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
5906 (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
5907 outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
5908 (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
5909 }
5910#ifdef CONFIG_X86_64
5911 else {
5912 param = kvm_register_read(vcpu, VCPU_REGS_RCX);
5913 ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
5914 outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
5915 }
5916#endif
5917
5918 code = param & 0xffff;
5919 fast = (param >> 16) & 0x1;
5920 rep_cnt = (param >> 32) & 0xfff;
5921 rep_idx = (param >> 48) & 0xfff;
5922
5923 trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
5924
5925 switch (code) {
5926 case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT:
5927 kvm_vcpu_on_spin(vcpu);
5928 break;
5929 default:
5930 res = HV_STATUS_INVALID_HYPERCALL_CODE;
5931 break;
5932 }
5933
5934 ret = res | (((u64)rep_done & 0xfff) << 32);
5935 if (longmode) {
5936 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
5937 } else {
5938 kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32);
5939 kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
5940 }
5941
5942 return 1;
5943}
5944
5945/* 5699/*
5946 * kvm_pv_kick_cpu_op: Kick a vcpu. 5700 * kvm_pv_kick_cpu_op: Kick a vcpu.
5947 * 5701 *
@@ -6518,6 +6272,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
6518 vcpu_scan_ioapic(vcpu); 6272 vcpu_scan_ioapic(vcpu);
6519 if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu)) 6273 if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
6520 kvm_vcpu_reload_apic_access_page(vcpu); 6274 kvm_vcpu_reload_apic_access_page(vcpu);
6275 if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) {
6276 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
6277 vcpu->run->system_event.type = KVM_SYSTEM_EVENT_CRASH;
6278 r = 0;
6279 goto out;
6280 }
6521 } 6281 }
6522 6282
6523 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { 6283 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
@@ -7540,6 +7300,17 @@ void kvm_arch_check_processor_compat(void *rtn)
7540 kvm_x86_ops->check_processor_compatibility(rtn); 7300 kvm_x86_ops->check_processor_compatibility(rtn);
7541} 7301}
7542 7302
7303bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu)
7304{
7305 return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id;
7306}
7307EXPORT_SYMBOL_GPL(kvm_vcpu_is_reset_bsp);
7308
7309bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
7310{
7311 return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0;
7312}
7313
7543bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) 7314bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
7544{ 7315{
7545 return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL); 7316 return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 0ca2f3e4803c..2f822cd886c2 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -147,6 +147,11 @@ static inline void kvm_register_writel(struct kvm_vcpu *vcpu,
147 return kvm_register_write(vcpu, reg, val); 147 return kvm_register_write(vcpu, reg, val);
148} 148}
149 149
150static inline u64 get_kernel_ns(void)
151{
152 return ktime_get_boot_ns();
153}
154
150static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk) 155static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk)
151{ 156{
152 return !(kvm->arch.disabled_quirks & quirk); 157 return !(kvm->arch.disabled_quirks & quirk);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 05e99b8ef465..81089cf1f0c1 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -139,6 +139,7 @@ static inline bool is_error_page(struct page *page)
139#define KVM_REQ_DISABLE_IBS 24 139#define KVM_REQ_DISABLE_IBS 24
140#define KVM_REQ_APIC_PAGE_RELOAD 25 140#define KVM_REQ_APIC_PAGE_RELOAD 25
141#define KVM_REQ_SMI 26 141#define KVM_REQ_SMI 26
142#define KVM_REQ_HV_CRASH 27
142 143
143#define KVM_USERSPACE_IRQ_SOURCE_ID 0 144#define KVM_USERSPACE_IRQ_SOURCE_ID 0
144#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 145#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1
@@ -363,9 +364,6 @@ struct kvm {
363 struct kvm_memslots *memslots[KVM_ADDRESS_SPACE_NUM]; 364 struct kvm_memslots *memslots[KVM_ADDRESS_SPACE_NUM];
364 struct srcu_struct srcu; 365 struct srcu_struct srcu;
365 struct srcu_struct irq_srcu; 366 struct srcu_struct irq_srcu;
366#ifdef CONFIG_KVM_APIC_ARCHITECTURE
367 u32 bsp_vcpu_id;
368#endif
369 struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; 367 struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
370 atomic_t online_vcpus; 368 atomic_t online_vcpus;
371 int last_boosted_vcpu; 369 int last_boosted_vcpu;
@@ -424,8 +422,15 @@ struct kvm {
424#define vcpu_unimpl(vcpu, fmt, ...) \ 422#define vcpu_unimpl(vcpu, fmt, ...) \
425 kvm_pr_unimpl("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__) 423 kvm_pr_unimpl("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
426 424
425#define vcpu_debug(vcpu, fmt, ...) \
426 kvm_debug("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
427
427static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) 428static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
428{ 429{
430 /* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu, in case
431 * the caller has read kvm->online_vcpus before (as is the case
432 * for kvm_for_each_vcpu, for example).
433 */
429 smp_rmb(); 434 smp_rmb();
430 return kvm->vcpus[i]; 435 return kvm->vcpus[i];
431} 436}
@@ -1055,22 +1060,9 @@ static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
1055#endif /* CONFIG_HAVE_KVM_EVENTFD */ 1060#endif /* CONFIG_HAVE_KVM_EVENTFD */
1056 1061
1057#ifdef CONFIG_KVM_APIC_ARCHITECTURE 1062#ifdef CONFIG_KVM_APIC_ARCHITECTURE
1058static inline bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu)
1059{
1060 return vcpu->kvm->bsp_vcpu_id == vcpu->vcpu_id;
1061}
1062
1063static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
1064{
1065 return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0;
1066}
1067
1068bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu); 1063bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu);
1069
1070#else 1064#else
1071
1072static inline bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) { return true; } 1065static inline bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) { return true; }
1073
1074#endif 1066#endif
1075 1067
1076static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) 1068static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 716ad4ae4d4b..0d831f94f8a8 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -317,6 +317,7 @@ struct kvm_run {
317 struct { 317 struct {
318#define KVM_SYSTEM_EVENT_SHUTDOWN 1 318#define KVM_SYSTEM_EVENT_SHUTDOWN 1
319#define KVM_SYSTEM_EVENT_RESET 2 319#define KVM_SYSTEM_EVENT_RESET 2
320#define KVM_SYSTEM_EVENT_CRASH 3
320 __u32 type; 321 __u32 type;
321 __u64 flags; 322 __u64 flags;
322 } system_event; 323 } system_event;
@@ -481,6 +482,7 @@ struct kvm_s390_psw {
481 ((ai) << 26)) 482 ((ai) << 26))
482#define KVM_S390_INT_IO_MIN 0x00000000u 483#define KVM_S390_INT_IO_MIN 0x00000000u
483#define KVM_S390_INT_IO_MAX 0xfffdffffu 484#define KVM_S390_INT_IO_MAX 0xfffdffffu
485#define KVM_S390_INT_IO_AI_MASK 0x04000000u
484 486
485 487
486struct kvm_s390_interrupt { 488struct kvm_s390_interrupt {
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 8b8a44453670..d8db2f8fce9c 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2206,6 +2206,11 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
2206 } 2206 }
2207 2207
2208 kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu; 2208 kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu;
2209
2210 /*
2211 * Pairs with smp_rmb() in kvm_get_vcpu. Write kvm->vcpus
2212 * before kvm->online_vcpu's incremented value.
2213 */
2209 smp_wmb(); 2214 smp_wmb();
2210 atomic_inc(&kvm->online_vcpus); 2215 atomic_inc(&kvm->online_vcpus);
2211 2216
@@ -2618,9 +2623,6 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
2618 case KVM_CAP_USER_MEMORY: 2623 case KVM_CAP_USER_MEMORY:
2619 case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: 2624 case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
2620 case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS: 2625 case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS:
2621#ifdef CONFIG_KVM_APIC_ARCHITECTURE
2622 case KVM_CAP_SET_BOOT_CPU_ID:
2623#endif
2624 case KVM_CAP_INTERNAL_ERROR_DATA: 2626 case KVM_CAP_INTERNAL_ERROR_DATA:
2625#ifdef CONFIG_HAVE_KVM_MSI 2627#ifdef CONFIG_HAVE_KVM_MSI
2626 case KVM_CAP_SIGNAL_MSI: 2628 case KVM_CAP_SIGNAL_MSI:
@@ -2716,17 +2718,6 @@ static long kvm_vm_ioctl(struct file *filp,
2716 r = kvm_ioeventfd(kvm, &data); 2718 r = kvm_ioeventfd(kvm, &data);
2717 break; 2719 break;
2718 } 2720 }
2719#ifdef CONFIG_KVM_APIC_ARCHITECTURE
2720 case KVM_SET_BOOT_CPU_ID:
2721 r = 0;
2722 mutex_lock(&kvm->lock);
2723 if (atomic_read(&kvm->online_vcpus) != 0)
2724 r = -EBUSY;
2725 else
2726 kvm->bsp_vcpu_id = arg;
2727 mutex_unlock(&kvm->lock);
2728 break;
2729#endif
2730#ifdef CONFIG_HAVE_KVM_MSI 2721#ifdef CONFIG_HAVE_KVM_MSI
2731 case KVM_SIGNAL_MSI: { 2722 case KVM_SIGNAL_MSI: {
2732 struct kvm_msi msi; 2723 struct kvm_msi msi;