aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-01-23 00:40:43 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-01-23 00:40:43 -0500
commit7ebd3faa9b5b42caf2d5aa1352a93dcfa0098011 (patch)
treec45acf88b7976dcec117b6a3dbe31a7fe710ef33
parentbb1281f2aae08e5ef23eb0692c8833e95579cdf2 (diff)
parent7650b6870930055426abb32cc47d164ccdea49db (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini: "First round of KVM updates for 3.14; PPC parts will come next week. Nothing major here, just bugfixes all over the place. The most interesting part is the ARM guys' virtualized interrupt controller overhaul, which lets userspace get/set the state and thus enables migration of ARM VMs" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (67 commits) kvm: make KVM_MMU_AUDIT help text more readable KVM: s390: Fix memory access error detection KVM: nVMX: Update guest activity state field on L2 exits KVM: nVMX: Fix nested_run_pending on activity state HLT KVM: nVMX: Clean up handling of VMX-related MSRs KVM: nVMX: Add tracepoints for nested_vmexit and nested_vmexit_inject KVM: nVMX: Pass vmexit parameters to nested_vmx_vmexit KVM: nVMX: Leave VMX mode on clearing of feature control MSR KVM: VMX: Fix DR6 update on #DB exception KVM: SVM: Fix reading of DR6 KVM: x86: Sync DR7 on KVM_SET_DEBUGREGS add support for Hyper-V reference time counter KVM: remove useless write to vcpu->hv_clock.tsc_timestamp KVM: x86: fix tsc catchup issue with tsc scaling KVM: x86: limit PIT timer frequency KVM: x86: handle invalid root_hpa everywhere kvm: Provide kvm_vcpu_eligible_for_directed_yield() stub kvm: vfio: silence GCC warning KVM: ARM: Remove duplicate include arm/arm64: KVM: relax the requirements of VMA alignment for THP ...
-rw-r--r--Documentation/virtual/kvm/api.txt13
-rw-r--r--Documentation/virtual/kvm/devices/arm-vgic.txt73
-rw-r--r--Documentation/virtual/kvm/hypercalls.txt5
-rw-r--r--Documentation/virtual/kvm/locking.txt4
-rw-r--r--Documentation/virtual/kvm/ppc-pv.txt2
-rw-r--r--Documentation/virtual/kvm/s390-diag.txt80
-rw-r--r--Documentation/virtual/kvm/timekeeping.txt2
-rw-r--r--MAINTAINERS2
-rw-r--r--arch/arm/include/asm/kvm_host.h3
-rw-r--r--arch/arm/include/asm/kvm_mmu.h1
-rw-r--r--arch/arm/include/uapi/asm/kvm.h28
-rw-r--r--arch/arm/kvm/arm.c49
-rw-r--r--arch/arm/kvm/guest.c92
-rw-r--r--arch/arm/kvm/handle_exit.c2
-rw-r--r--arch/arm/kvm/mmu.c24
-rw-r--r--arch/arm/kvm/psci.c11
-rw-r--r--arch/arm64/include/asm/kvm_host.h7
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h1
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h21
-rw-r--r--arch/arm64/kvm/Kconfig11
-rw-r--r--arch/arm64/kvm/guest.c32
-rw-r--r--arch/arm64/kvm/handle_exit.c3
-rw-r--r--arch/arm64/kvm/sys_regs_generic_v8.c3
-rw-r--r--arch/ia64/kvm/kvm-ia64.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv.c2
-rw-r--r--arch/s390/include/asm/sigp.h2
-rw-r--r--arch/s390/kvm/diag.c4
-rw-r--r--arch/s390/kvm/kvm-s390.c55
-rw-r--r--arch/s390/kvm/kvm-s390.h10
-rw-r--r--arch/s390/kvm/priv.c4
-rw-r--r--arch/s390/kvm/sigp.c120
-rw-r--r--arch/s390/kvm/trace.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h3
-rw-r--r--arch/x86/include/asm/vmx.h1
-rw-r--r--arch/x86/include/uapi/asm/hyperv.h13
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h1
-rw-r--r--arch/x86/kvm/Kconfig2
-rw-r--r--arch/x86/kvm/i8254.c18
-rw-r--r--arch/x86/kvm/lapic.c9
-rw-r--r--arch/x86/kvm/mmu.c12
-rw-r--r--arch/x86/kvm/paging_tmpl.h8
-rw-r--r--arch/x86/kvm/svm.c15
-rw-r--r--arch/x86/kvm/vmx.c323
-rw-r--r--arch/x86/kvm/x86.c101
-rw-r--r--arch/x86/kvm/x86.h2
-rw-r--r--include/kvm/arm_vgic.h2
-rw-r--r--include/linux/irqchip/arm-gic.h12
-rw-r--r--include/linux/kvm_host.h20
-rw-r--r--include/uapi/linux/kvm.h2
-rw-r--r--virt/kvm/arm/arch_timer.c34
-rw-r--r--virt/kvm/arm/vgic.c584
-rw-r--r--virt/kvm/ioapic.c2
-rw-r--r--virt/kvm/ioapic.h1
-rw-r--r--virt/kvm/kvm_main.c81
-rw-r--r--virt/kvm/vfio.c6
55 files changed, 1515 insertions, 406 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index a30035dd4c26..366bf4b47ef4 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2104,7 +2104,7 @@ Returns: 0 on success, -1 on error
2104Allows setting an eventfd to directly trigger a guest interrupt. 2104Allows setting an eventfd to directly trigger a guest interrupt.
2105kvm_irqfd.fd specifies the file descriptor to use as the eventfd and 2105kvm_irqfd.fd specifies the file descriptor to use as the eventfd and
2106kvm_irqfd.gsi specifies the irqchip pin toggled by this event. When 2106kvm_irqfd.gsi specifies the irqchip pin toggled by this event. When
2107an event is tiggered on the eventfd, an interrupt is injected into 2107an event is triggered on the eventfd, an interrupt is injected into
2108the guest using the specified gsi pin. The irqfd is removed using 2108the guest using the specified gsi pin. The irqfd is removed using
2109the KVM_IRQFD_FLAG_DEASSIGN flag, specifying both kvm_irqfd.fd 2109the KVM_IRQFD_FLAG_DEASSIGN flag, specifying both kvm_irqfd.fd
2110and kvm_irqfd.gsi. 2110and kvm_irqfd.gsi.
@@ -2115,7 +2115,7 @@ interrupts. When KVM_IRQFD_FLAG_RESAMPLE is set the user must pass an
2115additional eventfd in the kvm_irqfd.resamplefd field. When operating 2115additional eventfd in the kvm_irqfd.resamplefd field. When operating
2116in resample mode, posting of an interrupt through kvm_irq.fd asserts 2116in resample mode, posting of an interrupt through kvm_irq.fd asserts
2117the specified gsi in the irqchip. When the irqchip is resampled, such 2117the specified gsi in the irqchip. When the irqchip is resampled, such
2118as from an EOI, the gsi is de-asserted and the user is notifed via 2118as from an EOI, the gsi is de-asserted and the user is notified via
2119kvm_irqfd.resamplefd. It is the user's responsibility to re-queue 2119kvm_irqfd.resamplefd. It is the user's responsibility to re-queue
2120the interrupt if the device making use of it still requires service. 2120the interrupt if the device making use of it still requires service.
2121Note that closing the resamplefd is not sufficient to disable the 2121Note that closing the resamplefd is not sufficient to disable the
@@ -2327,7 +2327,7 @@ current state. "addr" is ignored.
2327Capability: basic 2327Capability: basic
2328Architectures: arm, arm64 2328Architectures: arm, arm64
2329Type: vcpu ioctl 2329Type: vcpu ioctl
2330Parameters: struct struct kvm_vcpu_init (in) 2330Parameters: struct kvm_vcpu_init (in)
2331Returns: 0 on success; -1 on error 2331Returns: 0 on success; -1 on error
2332Errors: 2332Errors:
2333  EINVAL:    the target is unknown, or the combination of features is invalid. 2333  EINVAL:    the target is unknown, or the combination of features is invalid.
@@ -2391,7 +2391,8 @@ struct kvm_reg_list {
2391This ioctl returns the guest registers that are supported for the 2391This ioctl returns the guest registers that are supported for the
2392KVM_GET_ONE_REG/KVM_SET_ONE_REG calls. 2392KVM_GET_ONE_REG/KVM_SET_ONE_REG calls.
2393 2393
23944.85 KVM_ARM_SET_DEVICE_ADDR 2394
23954.85 KVM_ARM_SET_DEVICE_ADDR (deprecated)
2395 2396
2396Capability: KVM_CAP_ARM_SET_DEVICE_ADDR 2397Capability: KVM_CAP_ARM_SET_DEVICE_ADDR
2397Architectures: arm, arm64 2398Architectures: arm, arm64
@@ -2429,6 +2430,10 @@ must be called after calling KVM_CREATE_IRQCHIP, but before calling
2429KVM_RUN on any of the VCPUs. Calling this ioctl twice for any of the 2430KVM_RUN on any of the VCPUs. Calling this ioctl twice for any of the
2430base addresses will return -EEXIST. 2431base addresses will return -EEXIST.
2431 2432
2433Note, this IOCTL is deprecated and the more flexible SET/GET_DEVICE_ATTR API
2434should be used instead.
2435
2436
24324.86 KVM_PPC_RTAS_DEFINE_TOKEN 24374.86 KVM_PPC_RTAS_DEFINE_TOKEN
2433 2438
2434Capability: KVM_CAP_PPC_RTAS 2439Capability: KVM_CAP_PPC_RTAS
diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt
new file mode 100644
index 000000000000..7f4e91b1316b
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/arm-vgic.txt
@@ -0,0 +1,73 @@
1ARM Virtual Generic Interrupt Controller (VGIC)
2===============================================
3
4Device types supported:
5 KVM_DEV_TYPE_ARM_VGIC_V2 ARM Generic Interrupt Controller v2.0
6
7Only one VGIC instance may be instantiated through either this API or the
8legacy KVM_CREATE_IRQCHIP api. The created VGIC will act as the VM interrupt
9controller, requiring emulated user-space devices to inject interrupts to the
10VGIC instead of directly to CPUs.
11
12Groups:
13 KVM_DEV_ARM_VGIC_GRP_ADDR
14 Attributes:
15 KVM_VGIC_V2_ADDR_TYPE_DIST (rw, 64-bit)
16 Base address in the guest physical address space of the GIC distributor
17 register mappings.
18
19 KVM_VGIC_V2_ADDR_TYPE_CPU (rw, 64-bit)
20 Base address in the guest physical address space of the GIC virtual cpu
21 interface register mappings.
22
23 KVM_DEV_ARM_VGIC_GRP_DIST_REGS
24 Attributes:
25 The attr field of kvm_device_attr encodes two values:
26 bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 |
27 values: | reserved | cpu id | offset |
28
29 All distributor regs are (rw, 32-bit)
30
31 The offset is relative to the "Distributor base address" as defined in the
32 GICv2 specs. Getting or setting such a register has the same effect as
33 reading or writing the register on the actual hardware from the cpu
34 specified with cpu id field. Note that most distributor fields are not
35 banked, but return the same value regardless of the cpu id used to access
36 the register.
37 Limitations:
38 - Priorities are not implemented, and registers are RAZ/WI
39 Errors:
40 -ENODEV: Getting or setting this register is not yet supported
41 -EBUSY: One or more VCPUs are running
42
43 KVM_DEV_ARM_VGIC_GRP_CPU_REGS
44 Attributes:
45 The attr field of kvm_device_attr encodes two values:
46 bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 |
47 values: | reserved | cpu id | offset |
48
49 All CPU interface regs are (rw, 32-bit)
50
51 The offset specifies the offset from the "CPU interface base address" as
52 defined in the GICv2 specs. Getting or setting such a register has the
53 same effect as reading or writing the register on the actual hardware.
54
55 The Active Priorities Registers APRn are implementation defined, so we set a
56 fixed format for our implementation that fits with the model of a "GICv2
57 implementation without the security extensions" which we present to the
58 guest. This interface always exposes four register APR[0-3] describing the
59 maximum possible 128 preemption levels. The semantics of the register
60 indicate if any interrupts in a given preemption level are in the active
61 state by setting the corresponding bit.
62
63 Thus, preemption level X has one or more active interrupts if and only if:
64
65 APRn[X mod 32] == 0b1, where n = X / 32
66
67 Bits for undefined preemption levels are RAZ/WI.
68
69 Limitations:
70 - Priorities are not implemented, and registers are RAZ/WI
71 Errors:
72 -ENODEV: Getting or setting this register is not yet supported
73 -EBUSY: One or more VCPUs are running
diff --git a/Documentation/virtual/kvm/hypercalls.txt b/Documentation/virtual/kvm/hypercalls.txt
index 022198e389d7..c8d040e27046 100644
--- a/Documentation/virtual/kvm/hypercalls.txt
+++ b/Documentation/virtual/kvm/hypercalls.txt
@@ -17,6 +17,9 @@ S390:
17 S390 uses diagnose instruction as hypercall (0x500) along with hypercall 17 S390 uses diagnose instruction as hypercall (0x500) along with hypercall
18 number in R1. 18 number in R1.
19 19
20 For further information on the S390 diagnose call as supported by KVM,
21 refer to Documentation/virtual/kvm/s390-diag.txt.
22
20 PowerPC: 23 PowerPC:
21 It uses R3-R10 and hypercall number in R11. R4-R11 are used as output registers. 24 It uses R3-R10 and hypercall number in R11. R4-R11 are used as output registers.
22 Return value is placed in R3. 25 Return value is placed in R3.
@@ -74,7 +77,7 @@ Usage example : A vcpu of a paravirtualized guest that is busywaiting in guest
74kernel mode for an event to occur (ex: a spinlock to become available) can 77kernel mode for an event to occur (ex: a spinlock to become available) can
75execute HLT instruction once it has busy-waited for more than a threshold 78execute HLT instruction once it has busy-waited for more than a threshold
76time-interval. Execution of HLT instruction would cause the hypervisor to put 79time-interval. Execution of HLT instruction would cause the hypervisor to put
77the vcpu to sleep until occurence of an appropriate event. Another vcpu of the 80the vcpu to sleep until occurrence of an appropriate event. Another vcpu of the
78same guest can wakeup the sleeping vcpu by issuing KVM_HC_KICK_CPU hypercall, 81same guest can wakeup the sleeping vcpu by issuing KVM_HC_KICK_CPU hypercall,
79specifying APIC ID (a1) of the vcpu to be woken up. An additional argument (a0) 82specifying APIC ID (a1) of the vcpu to be woken up. An additional argument (a0)
80is used in the hypercall for future use. 83is used in the hypercall for future use.
diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt
index f8869410d40c..d68af4dc3006 100644
--- a/Documentation/virtual/kvm/locking.txt
+++ b/Documentation/virtual/kvm/locking.txt
@@ -112,7 +112,7 @@ The Dirty bit is lost in this case.
112 112
113In order to avoid this kind of issue, we always treat the spte as "volatile" 113In order to avoid this kind of issue, we always treat the spte as "volatile"
114if it can be updated out of mmu-lock, see spte_has_volatile_bits(), it means, 114if it can be updated out of mmu-lock, see spte_has_volatile_bits(), it means,
115the spte is always atomicly updated in this case. 115the spte is always atomically updated in this case.
116 116
1173): flush tlbs due to spte updated 1173): flush tlbs due to spte updated
118If the spte is updated from writable to readonly, we should flush all TLBs, 118If the spte is updated from writable to readonly, we should flush all TLBs,
@@ -125,7 +125,7 @@ be flushed caused by this reason in mmu_spte_update() since this is a common
125function to update spte (present -> present). 125function to update spte (present -> present).
126 126
127Since the spte is "volatile" if it can be updated out of mmu-lock, we always 127Since the spte is "volatile" if it can be updated out of mmu-lock, we always
128atomicly update the spte, the race caused by fast page fault can be avoided, 128atomically update the spte, the race caused by fast page fault can be avoided,
129See the comments in spte_has_volatile_bits() and mmu_spte_update(). 129See the comments in spte_has_volatile_bits() and mmu_spte_update().
130 130
1313. Reference 1313. Reference
diff --git a/Documentation/virtual/kvm/ppc-pv.txt b/Documentation/virtual/kvm/ppc-pv.txt
index 4cd076febb02..4643cde517c4 100644
--- a/Documentation/virtual/kvm/ppc-pv.txt
+++ b/Documentation/virtual/kvm/ppc-pv.txt
@@ -115,7 +115,7 @@ If any other bit changes in the MSR, please still use mtmsr(d).
115Patched instructions 115Patched instructions
116==================== 116====================
117 117
118The "ld" and "std" instructions are transormed to "lwz" and "stw" instructions 118The "ld" and "std" instructions are transformed to "lwz" and "stw" instructions
119respectively on 32 bit systems with an added offset of 4 to accommodate for big 119respectively on 32 bit systems with an added offset of 4 to accommodate for big
120endianness. 120endianness.
121 121
diff --git a/Documentation/virtual/kvm/s390-diag.txt b/Documentation/virtual/kvm/s390-diag.txt
new file mode 100644
index 000000000000..f1de4fbade15
--- /dev/null
+++ b/Documentation/virtual/kvm/s390-diag.txt
@@ -0,0 +1,80 @@
1The s390 DIAGNOSE call on KVM
2=============================
3
4KVM on s390 supports the DIAGNOSE call for making hypercalls, both for
5native hypercalls and for selected hypercalls found on other s390
6hypervisors.
7
8Note that bits are numbered as by the usual s390 convention (most significant
9bit on the left).
10
11
12General remarks
13---------------
14
15DIAGNOSE calls by the guest cause a mandatory intercept. This implies
16all supported DIAGNOSE calls need to be handled by either KVM or its
17userspace.
18
19All DIAGNOSE calls supported by KVM use the RS-a format:
20
21--------------------------------------
22| '83' | R1 | R3 | B2 | D2 |
23--------------------------------------
240 8 12 16 20 31
25
26The second-operand address (obtained by the base/displacement calculation)
27is not used to address data. Instead, bits 48-63 of this address specify
28the function code, and bits 0-47 are ignored.
29
30The supported DIAGNOSE function codes vary by the userspace used. For
31DIAGNOSE function codes not specific to KVM, please refer to the
32documentation for the s390 hypervisors defining them.
33
34
35DIAGNOSE function code 'X'500' - KVM virtio functions
36-----------------------------------------------------
37
38If the function code specifies 0x500, various virtio-related functions
39are performed.
40
41General register 1 contains the virtio subfunction code. Supported
42virtio subfunctions depend on KVM's userspace. Generally, userspace
43provides either s390-virtio (subcodes 0-2) or virtio-ccw (subcode 3).
44
45Upon completion of the DIAGNOSE instruction, general register 2 contains
46the function's return code, which is either a return code or a subcode
47specific value.
48
49Subcode 0 - s390-virtio notification and early console printk
50 Handled by userspace.
51
52Subcode 1 - s390-virtio reset
53 Handled by userspace.
54
55Subcode 2 - s390-virtio set status
56 Handled by userspace.
57
58Subcode 3 - virtio-ccw notification
59 Handled by either userspace or KVM (ioeventfd case).
60
61 General register 2 contains a subchannel-identification word denoting
62 the subchannel of the virtio-ccw proxy device to be notified.
63
64 General register 3 contains the number of the virtqueue to be notified.
65
66 General register 4 contains a 64bit identifier for KVM usage (the
67 kvm_io_bus cookie). If general register 4 does not contain a valid
68 identifier, it is ignored.
69
70 After completion of the DIAGNOSE call, general register 2 may contain
71 a 64bit identifier (in the kvm_io_bus cookie case).
72
73 See also the virtio standard for a discussion of this hypercall.
74
75
76DIAGNOSE function code 'X'501 - KVM breakpoint
77----------------------------------------------
78
79If the function code specifies 0x501, breakpoint functions may be performed.
80This function code is handled by userspace.
diff --git a/Documentation/virtual/kvm/timekeeping.txt b/Documentation/virtual/kvm/timekeeping.txt
index df8946377cb6..76808a17ad84 100644
--- a/Documentation/virtual/kvm/timekeeping.txt
+++ b/Documentation/virtual/kvm/timekeeping.txt
@@ -467,7 +467,7 @@ at any time. This causes problems as the passage of real time, the injection
467of machine interrupts and the associated clock sources are no longer completely 467of machine interrupts and the associated clock sources are no longer completely
468synchronized with real time. 468synchronized with real time.
469 469
470This same problem can occur on native harware to a degree, as SMM mode may 470This same problem can occur on native hardware to a degree, as SMM mode may
471steal cycles from the naturally on X86 systems when SMM mode is used by the 471steal cycles from the naturally on X86 systems when SMM mode is used by the
472BIOS, but not in such an extreme fashion. However, the fact that SMM mode may 472BIOS, but not in such an extreme fashion. However, the fact that SMM mode may
473cause similar problems to virtualization makes it a good justification for 473cause similar problems to virtualization makes it a good justification for
diff --git a/MAINTAINERS b/MAINTAINERS
index 64b68e83c7ec..e945c6380f56 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4915,7 +4915,7 @@ F: include/linux/sunrpc/
4915F: include/uapi/linux/sunrpc/ 4915F: include/uapi/linux/sunrpc/
4916 4916
4917KERNEL VIRTUAL MACHINE (KVM) 4917KERNEL VIRTUAL MACHINE (KVM)
4918M: Gleb Natapov <gleb@redhat.com> 4918M: Gleb Natapov <gleb@kernel.org>
4919M: Paolo Bonzini <pbonzini@redhat.com> 4919M: Paolo Bonzini <pbonzini@redhat.com>
4920L: kvm@vger.kernel.org 4920L: kvm@vger.kernel.org
4921W: http://www.linux-kvm.org 4921W: http://www.linux-kvm.org
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 8a6f6db14ee4..098f7dd6d564 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -225,4 +225,7 @@ static inline int kvm_arch_dev_ioctl_check_extension(long ext)
225int kvm_perf_init(void); 225int kvm_perf_init(void);
226int kvm_perf_teardown(void); 226int kvm_perf_teardown(void);
227 227
228u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
229int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
230
228#endif /* __ARM_KVM_HOST_H__ */ 231#endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 77de4a41cc50..2d122adcdb22 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -140,6 +140,7 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva,
140} 140}
141 141
142#define kvm_flush_dcache_to_poc(a,l) __cpuc_flush_dcache_area((a), (l)) 142#define kvm_flush_dcache_to_poc(a,l) __cpuc_flush_dcache_area((a), (l))
143#define kvm_virt_to_phys(x) virt_to_idmap((unsigned long)(x))
143 144
144#endif /* !__ASSEMBLY__ */ 145#endif /* !__ASSEMBLY__ */
145 146
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index c498b60c0505..ef0c8785ba16 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -119,6 +119,26 @@ struct kvm_arch_memory_slot {
119#define KVM_REG_ARM_32_CRN_MASK 0x0000000000007800 119#define KVM_REG_ARM_32_CRN_MASK 0x0000000000007800
120#define KVM_REG_ARM_32_CRN_SHIFT 11 120#define KVM_REG_ARM_32_CRN_SHIFT 11
121 121
122#define ARM_CP15_REG_SHIFT_MASK(x,n) \
123 (((x) << KVM_REG_ARM_ ## n ## _SHIFT) & KVM_REG_ARM_ ## n ## _MASK)
124
125#define __ARM_CP15_REG(op1,crn,crm,op2) \
126 (KVM_REG_ARM | (15 << KVM_REG_ARM_COPROC_SHIFT) | \
127 ARM_CP15_REG_SHIFT_MASK(op1, OPC1) | \
128 ARM_CP15_REG_SHIFT_MASK(crn, 32_CRN) | \
129 ARM_CP15_REG_SHIFT_MASK(crm, CRM) | \
130 ARM_CP15_REG_SHIFT_MASK(op2, 32_OPC2))
131
132#define ARM_CP15_REG32(...) (__ARM_CP15_REG(__VA_ARGS__) | KVM_REG_SIZE_U32)
133
134#define __ARM_CP15_REG64(op1,crm) \
135 (__ARM_CP15_REG(op1, 0, crm, 0) | KVM_REG_SIZE_U64)
136#define ARM_CP15_REG64(...) __ARM_CP15_REG64(__VA_ARGS__)
137
138#define KVM_REG_ARM_TIMER_CTL ARM_CP15_REG32(0, 14, 3, 1)
139#define KVM_REG_ARM_TIMER_CNT ARM_CP15_REG64(1, 14)
140#define KVM_REG_ARM_TIMER_CVAL ARM_CP15_REG64(3, 14)
141
122/* Normal registers are mapped as coprocessor 16. */ 142/* Normal registers are mapped as coprocessor 16. */
123#define KVM_REG_ARM_CORE (0x0010 << KVM_REG_ARM_COPROC_SHIFT) 143#define KVM_REG_ARM_CORE (0x0010 << KVM_REG_ARM_COPROC_SHIFT)
124#define KVM_REG_ARM_CORE_REG(name) (offsetof(struct kvm_regs, name) / 4) 144#define KVM_REG_ARM_CORE_REG(name) (offsetof(struct kvm_regs, name) / 4)
@@ -143,6 +163,14 @@ struct kvm_arch_memory_slot {
143#define KVM_REG_ARM_VFP_FPINST 0x1009 163#define KVM_REG_ARM_VFP_FPINST 0x1009
144#define KVM_REG_ARM_VFP_FPINST2 0x100A 164#define KVM_REG_ARM_VFP_FPINST2 0x100A
145 165
166/* Device Control API: ARM VGIC */
167#define KVM_DEV_ARM_VGIC_GRP_ADDR 0
168#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1
169#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2
170#define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32
171#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
172#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0
173#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
146 174
147/* KVM_IRQ_LINE irq field index values */ 175/* KVM_IRQ_LINE irq field index values */
148#define KVM_ARM_IRQ_TYPE_SHIFT 24 176#define KVM_ARM_IRQ_TYPE_SHIFT 24
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index b18165ca1d38..1d8248ea5669 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -138,6 +138,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
138 if (ret) 138 if (ret)
139 goto out_free_stage2_pgd; 139 goto out_free_stage2_pgd;
140 140
141 kvm_timer_init(kvm);
142
141 /* Mark the initial VMID generation invalid */ 143 /* Mark the initial VMID generation invalid */
142 kvm->arch.vmid_gen = 0; 144 kvm->arch.vmid_gen = 0;
143 145
@@ -189,6 +191,7 @@ int kvm_dev_ioctl_check_extension(long ext)
189 case KVM_CAP_IRQCHIP: 191 case KVM_CAP_IRQCHIP:
190 r = vgic_present; 192 r = vgic_present;
191 break; 193 break;
194 case KVM_CAP_DEVICE_CTRL:
192 case KVM_CAP_USER_MEMORY: 195 case KVM_CAP_USER_MEMORY:
193 case KVM_CAP_SYNC_MMU: 196 case KVM_CAP_SYNC_MMU:
194 case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: 197 case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
@@ -340,6 +343,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
340 343
341void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 344void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
342{ 345{
346 /*
347 * The arch-generic KVM code expects the cpu field of a vcpu to be -1
348 * if the vcpu is no longer assigned to a cpu. This is used for the
349 * optimized make_all_cpus_request path.
350 */
351 vcpu->cpu = -1;
352
343 kvm_arm_set_running_vcpu(NULL); 353 kvm_arm_set_running_vcpu(NULL);
344} 354}
345 355
@@ -463,6 +473,8 @@ static void update_vttbr(struct kvm *kvm)
463 473
464static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) 474static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
465{ 475{
476 int ret;
477
466 if (likely(vcpu->arch.has_run_once)) 478 if (likely(vcpu->arch.has_run_once))
467 return 0; 479 return 0;
468 480
@@ -472,22 +484,12 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
472 * Initialize the VGIC before running a vcpu the first time on 484 * Initialize the VGIC before running a vcpu the first time on
473 * this VM. 485 * this VM.
474 */ 486 */
475 if (irqchip_in_kernel(vcpu->kvm) && 487 if (unlikely(!vgic_initialized(vcpu->kvm))) {
476 unlikely(!vgic_initialized(vcpu->kvm))) { 488 ret = kvm_vgic_init(vcpu->kvm);
477 int ret = kvm_vgic_init(vcpu->kvm);
478 if (ret) 489 if (ret)
479 return ret; 490 return ret;
480 } 491 }
481 492
482 /*
483 * Handle the "start in power-off" case by calling into the
484 * PSCI code.
485 */
486 if (test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) {
487 *vcpu_reg(vcpu, 0) = KVM_PSCI_FN_CPU_OFF;
488 kvm_psci_call(vcpu);
489 }
490
491 return 0; 493 return 0;
492} 494}
493 495
@@ -701,6 +703,24 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
701 return -EINVAL; 703 return -EINVAL;
702} 704}
703 705
706static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
707 struct kvm_vcpu_init *init)
708{
709 int ret;
710
711 ret = kvm_vcpu_set_target(vcpu, init);
712 if (ret)
713 return ret;
714
715 /*
716 * Handle the "start in power-off" case by marking the VCPU as paused.
717 */
718 if (__test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
719 vcpu->arch.pause = true;
720
721 return 0;
722}
723
704long kvm_arch_vcpu_ioctl(struct file *filp, 724long kvm_arch_vcpu_ioctl(struct file *filp,
705 unsigned int ioctl, unsigned long arg) 725 unsigned int ioctl, unsigned long arg)
706{ 726{
@@ -714,8 +734,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
714 if (copy_from_user(&init, argp, sizeof(init))) 734 if (copy_from_user(&init, argp, sizeof(init)))
715 return -EFAULT; 735 return -EFAULT;
716 736
717 return kvm_vcpu_set_target(vcpu, &init); 737 return kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init);
718
719 } 738 }
720 case KVM_SET_ONE_REG: 739 case KVM_SET_ONE_REG:
721 case KVM_GET_ONE_REG: { 740 case KVM_GET_ONE_REG: {
@@ -773,7 +792,7 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
773 case KVM_ARM_DEVICE_VGIC_V2: 792 case KVM_ARM_DEVICE_VGIC_V2:
774 if (!vgic_present) 793 if (!vgic_present)
775 return -ENXIO; 794 return -ENXIO;
776 return kvm_vgic_set_addr(kvm, type, dev_addr->addr); 795 return kvm_vgic_addr(kvm, type, &dev_addr->addr, true);
777 default: 796 default:
778 return -ENODEV; 797 return -ENODEV;
779 } 798 }
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 20f8d97904af..2786eae10c0d 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -109,6 +109,83 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
109 return -EINVAL; 109 return -EINVAL;
110} 110}
111 111
112#ifndef CONFIG_KVM_ARM_TIMER
113
114#define NUM_TIMER_REGS 0
115
116static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
117{
118 return 0;
119}
120
121static bool is_timer_reg(u64 index)
122{
123 return false;
124}
125
126int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
127{
128 return 0;
129}
130
131u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
132{
133 return 0;
134}
135
136#else
137
138#define NUM_TIMER_REGS 3
139
140static bool is_timer_reg(u64 index)
141{
142 switch (index) {
143 case KVM_REG_ARM_TIMER_CTL:
144 case KVM_REG_ARM_TIMER_CNT:
145 case KVM_REG_ARM_TIMER_CVAL:
146 return true;
147 }
148 return false;
149}
150
151static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
152{
153 if (put_user(KVM_REG_ARM_TIMER_CTL, uindices))
154 return -EFAULT;
155 uindices++;
156 if (put_user(KVM_REG_ARM_TIMER_CNT, uindices))
157 return -EFAULT;
158 uindices++;
159 if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices))
160 return -EFAULT;
161
162 return 0;
163}
164
165#endif
166
167static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
168{
169 void __user *uaddr = (void __user *)(long)reg->addr;
170 u64 val;
171 int ret;
172
173 ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id));
174 if (ret != 0)
175 return ret;
176
177 return kvm_arm_timer_set_reg(vcpu, reg->id, val);
178}
179
180static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
181{
182 void __user *uaddr = (void __user *)(long)reg->addr;
183 u64 val;
184
185 val = kvm_arm_timer_get_reg(vcpu, reg->id);
186 return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id));
187}
188
112static unsigned long num_core_regs(void) 189static unsigned long num_core_regs(void)
113{ 190{
114 return sizeof(struct kvm_regs) / sizeof(u32); 191 return sizeof(struct kvm_regs) / sizeof(u32);
@@ -121,7 +198,8 @@ static unsigned long num_core_regs(void)
121 */ 198 */
122unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) 199unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
123{ 200{
124 return num_core_regs() + kvm_arm_num_coproc_regs(vcpu); 201 return num_core_regs() + kvm_arm_num_coproc_regs(vcpu)
202 + NUM_TIMER_REGS;
125} 203}
126 204
127/** 205/**
@@ -133,6 +211,7 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
133{ 211{
134 unsigned int i; 212 unsigned int i;
135 const u64 core_reg = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE; 213 const u64 core_reg = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE;
214 int ret;
136 215
137 for (i = 0; i < sizeof(struct kvm_regs)/sizeof(u32); i++) { 216 for (i = 0; i < sizeof(struct kvm_regs)/sizeof(u32); i++) {
138 if (put_user(core_reg | i, uindices)) 217 if (put_user(core_reg | i, uindices))
@@ -140,6 +219,11 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
140 uindices++; 219 uindices++;
141 } 220 }
142 221
222 ret = copy_timer_indices(vcpu, uindices);
223 if (ret)
224 return ret;
225 uindices += NUM_TIMER_REGS;
226
143 return kvm_arm_copy_coproc_indices(vcpu, uindices); 227 return kvm_arm_copy_coproc_indices(vcpu, uindices);
144} 228}
145 229
@@ -153,6 +237,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
153 if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) 237 if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
154 return get_core_reg(vcpu, reg); 238 return get_core_reg(vcpu, reg);
155 239
240 if (is_timer_reg(reg->id))
241 return get_timer_reg(vcpu, reg);
242
156 return kvm_arm_coproc_get_reg(vcpu, reg); 243 return kvm_arm_coproc_get_reg(vcpu, reg);
157} 244}
158 245
@@ -166,6 +253,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
166 if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) 253 if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
167 return set_core_reg(vcpu, reg); 254 return set_core_reg(vcpu, reg);
168 255
256 if (is_timer_reg(reg->id))
257 return set_timer_reg(vcpu, reg);
258
169 return kvm_arm_coproc_set_reg(vcpu, reg); 259 return kvm_arm_coproc_set_reg(vcpu, reg);
170} 260}
171 261
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index a92079011a83..0de91fc6de0f 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -26,8 +26,6 @@
26 26
27#include "trace.h" 27#include "trace.h"
28 28
29#include "trace.h"
30
31typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *); 29typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
32 30
33static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) 31static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 580906989db1..7789857d1470 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -667,14 +667,16 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
667 gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; 667 gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
668 } else { 668 } else {
669 /* 669 /*
670 * Pages belonging to VMAs not aligned to the PMD mapping 670 * Pages belonging to memslots that don't have the same
671 * granularity cannot be mapped using block descriptors even 671 * alignment for userspace and IPA cannot be mapped using
672 * if the pages belong to a THP for the process, because the 672 * block descriptors even if the pages belong to a THP for
673 * stage-2 block descriptor will cover more than a single THP 673 * the process, because the stage-2 block descriptor will
674 * and we loose atomicity for unmapping, updates, and splits 674 * cover more than a single THP and we loose atomicity for
675 * of the THP or other pages in the stage-2 block range. 675 * unmapping, updates, and splits of the THP or other pages
676 * in the stage-2 block range.
676 */ 677 */
677 if (vma->vm_start & ~PMD_MASK) 678 if ((memslot->userspace_addr & ~PMD_MASK) !=
679 ((memslot->base_gfn << PAGE_SHIFT) & ~PMD_MASK))
678 force_pte = true; 680 force_pte = true;
679 } 681 }
680 up_read(&current->mm->mmap_sem); 682 up_read(&current->mm->mmap_sem);
@@ -916,9 +918,9 @@ int kvm_mmu_init(void)
916{ 918{
917 int err; 919 int err;
918 920
919 hyp_idmap_start = virt_to_phys(__hyp_idmap_text_start); 921 hyp_idmap_start = kvm_virt_to_phys(__hyp_idmap_text_start);
920 hyp_idmap_end = virt_to_phys(__hyp_idmap_text_end); 922 hyp_idmap_end = kvm_virt_to_phys(__hyp_idmap_text_end);
921 hyp_idmap_vector = virt_to_phys(__kvm_hyp_init); 923 hyp_idmap_vector = kvm_virt_to_phys(__kvm_hyp_init);
922 924
923 if ((hyp_idmap_start ^ hyp_idmap_end) & PAGE_MASK) { 925 if ((hyp_idmap_start ^ hyp_idmap_end) & PAGE_MASK) {
924 /* 926 /*
@@ -945,7 +947,7 @@ int kvm_mmu_init(void)
945 */ 947 */
946 kvm_flush_dcache_to_poc(init_bounce_page, len); 948 kvm_flush_dcache_to_poc(init_bounce_page, len);
947 949
948 phys_base = virt_to_phys(init_bounce_page); 950 phys_base = kvm_virt_to_phys(init_bounce_page);
949 hyp_idmap_vector += phys_base - hyp_idmap_start; 951 hyp_idmap_vector += phys_base - hyp_idmap_start;
950 hyp_idmap_start = phys_base; 952 hyp_idmap_start = phys_base;
951 hyp_idmap_end = phys_base + len; 953 hyp_idmap_end = phys_base + len;
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 0881bf169fbc..448f60e8d23c 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -54,15 +54,15 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
54 } 54 }
55 } 55 }
56 56
57 if (!vcpu) 57 /*
58 * Make sure the caller requested a valid CPU and that the CPU is
59 * turned off.
60 */
61 if (!vcpu || !vcpu->arch.pause)
58 return KVM_PSCI_RET_INVAL; 62 return KVM_PSCI_RET_INVAL;
59 63
60 target_pc = *vcpu_reg(source_vcpu, 2); 64 target_pc = *vcpu_reg(source_vcpu, 2);
61 65
62 wq = kvm_arch_vcpu_wq(vcpu);
63 if (!waitqueue_active(wq))
64 return KVM_PSCI_RET_INVAL;
65
66 kvm_reset_vcpu(vcpu); 66 kvm_reset_vcpu(vcpu);
67 67
68 /* Gracefully handle Thumb2 entry point */ 68 /* Gracefully handle Thumb2 entry point */
@@ -79,6 +79,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
79 vcpu->arch.pause = false; 79 vcpu->arch.pause = false;
80 smp_mb(); /* Make sure the above is visible */ 80 smp_mb(); /* Make sure the above is visible */
81 81
82 wq = kvm_arch_vcpu_wq(vcpu);
82 wake_up_interruptible(wq); 83 wake_up_interruptible(wq);
83 84
84 return KVM_PSCI_RET_SUCCESS; 85 return KVM_PSCI_RET_SUCCESS;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 5d85a02d1231..0a1d69751562 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -26,7 +26,12 @@
26#include <asm/kvm_asm.h> 26#include <asm/kvm_asm.h>
27#include <asm/kvm_mmio.h> 27#include <asm/kvm_mmio.h>
28 28
29#define KVM_MAX_VCPUS 4 29#if defined(CONFIG_KVM_ARM_MAX_VCPUS)
30#define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS
31#else
32#define KVM_MAX_VCPUS 0
33#endif
34
30#define KVM_USER_MEM_SLOTS 32 35#define KVM_USER_MEM_SLOTS 32
31#define KVM_PRIVATE_MEM_SLOTS 4 36#define KVM_PRIVATE_MEM_SLOTS 4
32#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 37#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 680f74e67497..7f1f9408ff66 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -136,6 +136,7 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva,
136} 136}
137 137
138#define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l)) 138#define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l))
139#define kvm_virt_to_phys(x) __virt_to_phys((unsigned long)(x))
139 140
140#endif /* __ASSEMBLY__ */ 141#endif /* __ASSEMBLY__ */
141#endif /* __ARM64_KVM_MMU_H__ */ 142#endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 5031f4263937..495ab6f84a61 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -55,8 +55,9 @@ struct kvm_regs {
55#define KVM_ARM_TARGET_AEM_V8 0 55#define KVM_ARM_TARGET_AEM_V8 0
56#define KVM_ARM_TARGET_FOUNDATION_V8 1 56#define KVM_ARM_TARGET_FOUNDATION_V8 1
57#define KVM_ARM_TARGET_CORTEX_A57 2 57#define KVM_ARM_TARGET_CORTEX_A57 2
58#define KVM_ARM_TARGET_XGENE_POTENZA 3
58 59
59#define KVM_ARM_NUM_TARGETS 3 60#define KVM_ARM_NUM_TARGETS 4
60 61
61/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ 62/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
62#define KVM_ARM_DEVICE_TYPE_SHIFT 0 63#define KVM_ARM_DEVICE_TYPE_SHIFT 0
@@ -129,6 +130,24 @@ struct kvm_arch_memory_slot {
129#define KVM_REG_ARM64_SYSREG_OP2_MASK 0x0000000000000007 130#define KVM_REG_ARM64_SYSREG_OP2_MASK 0x0000000000000007
130#define KVM_REG_ARM64_SYSREG_OP2_SHIFT 0 131#define KVM_REG_ARM64_SYSREG_OP2_SHIFT 0
131 132
133#define ARM64_SYS_REG_SHIFT_MASK(x,n) \
134 (((x) << KVM_REG_ARM64_SYSREG_ ## n ## _SHIFT) & \
135 KVM_REG_ARM64_SYSREG_ ## n ## _MASK)
136
137#define __ARM64_SYS_REG(op0,op1,crn,crm,op2) \
138 (KVM_REG_ARM64 | KVM_REG_ARM64_SYSREG | \
139 ARM64_SYS_REG_SHIFT_MASK(op0, OP0) | \
140 ARM64_SYS_REG_SHIFT_MASK(op1, OP1) | \
141 ARM64_SYS_REG_SHIFT_MASK(crn, CRN) | \
142 ARM64_SYS_REG_SHIFT_MASK(crm, CRM) | \
143 ARM64_SYS_REG_SHIFT_MASK(op2, OP2))
144
145#define ARM64_SYS_REG(...) (__ARM64_SYS_REG(__VA_ARGS__) | KVM_REG_SIZE_U64)
146
147#define KVM_REG_ARM_TIMER_CTL ARM64_SYS_REG(3, 3, 14, 3, 1)
148#define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2)
149#define KVM_REG_ARM_TIMER_CVAL ARM64_SYS_REG(3, 3, 14, 0, 2)
150
132/* KVM_IRQ_LINE irq field index values */ 151/* KVM_IRQ_LINE irq field index values */
133#define KVM_ARM_IRQ_TYPE_SHIFT 24 152#define KVM_ARM_IRQ_TYPE_SHIFT 24
134#define KVM_ARM_IRQ_TYPE_MASK 0xff 153#define KVM_ARM_IRQ_TYPE_MASK 0xff
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 4480ab339a00..8ba85e9ea388 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -36,6 +36,17 @@ config KVM_ARM_HOST
36 ---help--- 36 ---help---
37 Provides host support for ARM processors. 37 Provides host support for ARM processors.
38 38
39config KVM_ARM_MAX_VCPUS
40 int "Number maximum supported virtual CPUs per VM"
41 depends on KVM_ARM_HOST
42 default 4
43 help
44 Static number of max supported virtual CPUs per VM.
45
46 If you choose a high number, the vcpu structures will be quite
47 large, so only choose a reasonable number that you expect to
48 actually use.
49
39config KVM_ARM_VGIC 50config KVM_ARM_VGIC
40 bool 51 bool
41 depends on KVM_ARM_HOST && OF 52 depends on KVM_ARM_HOST && OF
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 3f0731e53274..08745578d54d 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -207,20 +207,26 @@ int __attribute_const__ kvm_target_cpu(void)
207 unsigned long implementor = read_cpuid_implementor(); 207 unsigned long implementor = read_cpuid_implementor();
208 unsigned long part_number = read_cpuid_part_number(); 208 unsigned long part_number = read_cpuid_part_number();
209 209
210 if (implementor != ARM_CPU_IMP_ARM) 210 switch (implementor) {
211 return -EINVAL; 211 case ARM_CPU_IMP_ARM:
212 switch (part_number) {
213 case ARM_CPU_PART_AEM_V8:
214 return KVM_ARM_TARGET_AEM_V8;
215 case ARM_CPU_PART_FOUNDATION:
216 return KVM_ARM_TARGET_FOUNDATION_V8;
217 case ARM_CPU_PART_CORTEX_A57:
218 return KVM_ARM_TARGET_CORTEX_A57;
219 };
220 break;
221 case ARM_CPU_IMP_APM:
222 switch (part_number) {
223 case APM_CPU_PART_POTENZA:
224 return KVM_ARM_TARGET_XGENE_POTENZA;
225 };
226 break;
227 };
212 228
213 switch (part_number) { 229 return -EINVAL;
214 case ARM_CPU_PART_AEM_V8:
215 return KVM_ARM_TARGET_AEM_V8;
216 case ARM_CPU_PART_FOUNDATION:
217 return KVM_ARM_TARGET_FOUNDATION_V8;
218 case ARM_CPU_PART_CORTEX_A57:
219 /* Currently handled by the generic backend */
220 return KVM_ARM_TARGET_CORTEX_A57;
221 default:
222 return -EINVAL;
223 }
224} 230}
225 231
226int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, 232int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 42a0f1bddfe7..7bc41eab4c64 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -39,9 +39,6 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
39 39
40static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) 40static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
41{ 41{
42 if (kvm_psci_call(vcpu))
43 return 1;
44
45 kvm_inject_undefined(vcpu); 42 kvm_inject_undefined(vcpu);
46 return 1; 43 return 1;
47} 44}
diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c
index 4268ab9356b1..8fe6f76b0edc 100644
--- a/arch/arm64/kvm/sys_regs_generic_v8.c
+++ b/arch/arm64/kvm/sys_regs_generic_v8.c
@@ -90,6 +90,9 @@ static int __init sys_reg_genericv8_init(void)
90 &genericv8_target_table); 90 &genericv8_target_table);
91 kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57, 91 kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57,
92 &genericv8_target_table); 92 &genericv8_target_table);
93 kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA,
94 &genericv8_target_table);
95
93 return 0; 96 return 0;
94} 97}
95late_initcall(sys_reg_genericv8_init); 98late_initcall(sys_reg_genericv8_init);
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 985bf80c622e..53f44bee9ebb 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -702,7 +702,7 @@ again:
702out: 702out:
703 srcu_read_unlock(&vcpu->kvm->srcu, idx); 703 srcu_read_unlock(&vcpu->kvm->srcu, idx);
704 if (r > 0) { 704 if (r > 0) {
705 kvm_resched(vcpu); 705 cond_resched();
706 idx = srcu_read_lock(&vcpu->kvm->srcu); 706 idx = srcu_read_lock(&vcpu->kvm->srcu);
707 goto again; 707 goto again;
708 } 708 }
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index b51d5db78068..3818bd95327c 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1352,7 +1352,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
1352 kvm_guest_exit(); 1352 kvm_guest_exit();
1353 1353
1354 preempt_enable(); 1354 preempt_enable();
1355 kvm_resched(vcpu); 1355 cond_resched();
1356 1356
1357 spin_lock(&vc->lock); 1357 spin_lock(&vc->lock);
1358 now = get_tb(); 1358 now = get_tb();
diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h
index 5a87d16d3e7c..d091aa1aaf11 100644
--- a/arch/s390/include/asm/sigp.h
+++ b/arch/s390/include/asm/sigp.h
@@ -5,6 +5,7 @@
5#define SIGP_SENSE 1 5#define SIGP_SENSE 1
6#define SIGP_EXTERNAL_CALL 2 6#define SIGP_EXTERNAL_CALL 2
7#define SIGP_EMERGENCY_SIGNAL 3 7#define SIGP_EMERGENCY_SIGNAL 3
8#define SIGP_START 4
8#define SIGP_STOP 5 9#define SIGP_STOP 5
9#define SIGP_RESTART 6 10#define SIGP_RESTART 6
10#define SIGP_STOP_AND_STORE_STATUS 9 11#define SIGP_STOP_AND_STORE_STATUS 9
@@ -12,6 +13,7 @@
12#define SIGP_SET_PREFIX 13 13#define SIGP_SET_PREFIX 13
13#define SIGP_STORE_STATUS_AT_ADDRESS 14 14#define SIGP_STORE_STATUS_AT_ADDRESS 14
14#define SIGP_SET_ARCHITECTURE 18 15#define SIGP_SET_ARCHITECTURE 18
16#define SIGP_COND_EMERGENCY_SIGNAL 19
15#define SIGP_SENSE_RUNNING 21 17#define SIGP_SENSE_RUNNING 21
16 18
17/* SIGP condition codes */ 19/* SIGP condition codes */
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 78d967f180f4..8216c0e0b2e2 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -121,7 +121,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
121 * - gpr 4 contains the index on the bus (optionally) 121 * - gpr 4 contains the index on the bus (optionally)
122 */ 122 */
123 ret = kvm_io_bus_write_cookie(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS, 123 ret = kvm_io_bus_write_cookie(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS,
124 vcpu->run->s.regs.gprs[2], 124 vcpu->run->s.regs.gprs[2] & 0xffffffff,
125 8, &vcpu->run->s.regs.gprs[3], 125 8, &vcpu->run->s.regs.gprs[3],
126 vcpu->run->s.regs.gprs[4]); 126 vcpu->run->s.regs.gprs[4]);
127 127
@@ -137,7 +137,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
137 137
138int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) 138int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
139{ 139{
140 int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16; 140 int code = kvm_s390_get_base_disp_rs(vcpu) & 0xffff;
141 141
142 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) 142 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
143 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); 143 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 569494e01ec6..7635c00a1479 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -732,14 +732,16 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
732 732
733 if (exit_reason >= 0) { 733 if (exit_reason >= 0) {
734 rc = 0; 734 rc = 0;
735 } else if (kvm_is_ucontrol(vcpu->kvm)) {
736 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
737 vcpu->run->s390_ucontrol.trans_exc_code =
738 current->thread.gmap_addr;
739 vcpu->run->s390_ucontrol.pgm_code = 0x10;
740 rc = -EREMOTE;
735 } else { 741 } else {
736 if (kvm_is_ucontrol(vcpu->kvm)) { 742 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
737 rc = SIE_INTERCEPT_UCONTROL; 743 trace_kvm_s390_sie_fault(vcpu);
738 } else { 744 rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
739 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
740 trace_kvm_s390_sie_fault(vcpu);
741 rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
742 }
743 } 745 }
744 746
745 memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16); 747 memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
@@ -833,16 +835,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
833 rc = -EINTR; 835 rc = -EINTR;
834 } 836 }
835 837
836#ifdef CONFIG_KVM_S390_UCONTROL
837 if (rc == SIE_INTERCEPT_UCONTROL) {
838 kvm_run->exit_reason = KVM_EXIT_S390_UCONTROL;
839 kvm_run->s390_ucontrol.trans_exc_code =
840 current->thread.gmap_addr;
841 kvm_run->s390_ucontrol.pgm_code = 0x10;
842 rc = 0;
843 }
844#endif
845
846 if (rc == -EOPNOTSUPP) { 838 if (rc == -EOPNOTSUPP) {
847 /* intercept cannot be handled in-kernel, prepare kvm-run */ 839 /* intercept cannot be handled in-kernel, prepare kvm-run */
848 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC; 840 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
@@ -885,10 +877,11 @@ static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, void *from,
885 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit 877 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
886 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix 878 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
887 */ 879 */
888int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) 880int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr)
889{ 881{
890 unsigned char archmode = 1; 882 unsigned char archmode = 1;
891 int prefix; 883 int prefix;
884 u64 clkcomp;
892 885
893 if (addr == KVM_S390_STORE_STATUS_NOADDR) { 886 if (addr == KVM_S390_STORE_STATUS_NOADDR) {
894 if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1)) 887 if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
@@ -903,15 +896,6 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
903 } else 896 } else
904 prefix = 0; 897 prefix = 0;
905 898
906 /*
907 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
908 * copying in vcpu load/put. Lets update our copies before we save
909 * it into the save area
910 */
911 save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
912 save_fp_regs(vcpu->arch.guest_fpregs.fprs);
913 save_access_regs(vcpu->run->s.regs.acrs);
914
915 if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs), 899 if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs),
916 vcpu->arch.guest_fpregs.fprs, 128, prefix)) 900 vcpu->arch.guest_fpregs.fprs, 128, prefix))
917 return -EFAULT; 901 return -EFAULT;
@@ -941,8 +925,9 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
941 &vcpu->arch.sie_block->cputm, 8, prefix)) 925 &vcpu->arch.sie_block->cputm, 8, prefix))
942 return -EFAULT; 926 return -EFAULT;
943 927
928 clkcomp = vcpu->arch.sie_block->ckc >> 8;
944 if (__guestcopy(vcpu, addr + offsetof(struct save_area, clk_cmp), 929 if (__guestcopy(vcpu, addr + offsetof(struct save_area, clk_cmp),
945 &vcpu->arch.sie_block->ckc, 8, prefix)) 930 &clkcomp, 8, prefix))
946 return -EFAULT; 931 return -EFAULT;
947 932
948 if (__guestcopy(vcpu, addr + offsetof(struct save_area, acc_regs), 933 if (__guestcopy(vcpu, addr + offsetof(struct save_area, acc_regs),
@@ -956,6 +941,20 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
956 return 0; 941 return 0;
957} 942}
958 943
944int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
945{
946 /*
947 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
948 * copying in vcpu load/put. Lets update our copies before we save
949 * it into the save area
950 */
951 save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
952 save_fp_regs(vcpu->arch.guest_fpregs.fprs);
953 save_access_regs(vcpu->run->s.regs.acrs);
954
955 return kvm_s390_store_status_unloaded(vcpu, addr);
956}
957
959static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, 958static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
960 struct kvm_enable_cap *cap) 959 struct kvm_enable_cap *cap)
961{ 960{
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index b44912a32949..095cf51b16ec 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -19,16 +19,11 @@
19#include <linux/kvm.h> 19#include <linux/kvm.h>
20#include <linux/kvm_host.h> 20#include <linux/kvm_host.h>
21 21
22/* The current code can have up to 256 pages for virtio */
23#define VIRTIODESCSPACE (256ul * 4096ul)
24
25typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); 22typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
26 23
27/* declare vfacilities extern */ 24/* declare vfacilities extern */
28extern unsigned long *vfacilities; 25extern unsigned long *vfacilities;
29 26
30/* negativ values are error codes, positive values for internal conditions */
31#define SIE_INTERCEPT_UCONTROL (1<<0)
32int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu); 27int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
33 28
34#define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\ 29#define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
@@ -133,7 +128,6 @@ int __must_check kvm_s390_inject_vm(struct kvm *kvm,
133int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, 128int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
134 struct kvm_s390_interrupt *s390int); 129 struct kvm_s390_interrupt *s390int);
135int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); 130int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
136int __must_check kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action);
137struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, 131struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
138 u64 cr6, u64 schid); 132 u64 cr6, u64 schid);
139 133
@@ -150,8 +144,8 @@ int kvm_s390_handle_eb(struct kvm_vcpu *vcpu);
150int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); 144int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
151 145
152/* implemented in kvm-s390.c */ 146/* implemented in kvm-s390.c */
153int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, 147int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
154 unsigned long addr); 148int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr);
155void s390_vcpu_block(struct kvm_vcpu *vcpu); 149void s390_vcpu_block(struct kvm_vcpu *vcpu);
156void s390_vcpu_unblock(struct kvm_vcpu *vcpu); 150void s390_vcpu_unblock(struct kvm_vcpu *vcpu);
157void exit_sie(struct kvm_vcpu *vcpu); 151void exit_sie(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index d101dae62771..75beea632a10 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -197,7 +197,7 @@ static int handle_tpi(struct kvm_vcpu *vcpu)
197 if (addr & 3) 197 if (addr & 3)
198 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 198 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
199 cc = 0; 199 cc = 0;
200 inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->run->s.regs.crs[6], 0); 200 inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->arch.sie_block->gcr[6], 0);
201 if (!inti) 201 if (!inti)
202 goto no_interrupt; 202 goto no_interrupt;
203 cc = 1; 203 cc = 1;
@@ -638,7 +638,6 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
638 638
639static const intercept_handler_t b9_handlers[256] = { 639static const intercept_handler_t b9_handlers[256] = {
640 [0x8d] = handle_epsw, 640 [0x8d] = handle_epsw,
641 [0x9c] = handle_io_inst,
642 [0xaf] = handle_pfmf, 641 [0xaf] = handle_pfmf,
643}; 642};
644 643
@@ -731,7 +730,6 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
731 730
732static const intercept_handler_t eb_handlers[256] = { 731static const intercept_handler_t eb_handlers[256] = {
733 [0x2f] = handle_lctlg, 732 [0x2f] = handle_lctlg,
734 [0x8a] = handle_io_inst,
735}; 733};
736 734
737int kvm_s390_handle_eb(struct kvm_vcpu *vcpu) 735int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index bec398c57acf..87c2b3a3bd3e 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * handling interprocessor communication 2 * handling interprocessor communication
3 * 3 *
4 * Copyright IBM Corp. 2008, 2009 4 * Copyright IBM Corp. 2008, 2013
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only) 7 * it under the terms of the GNU General Public License (version 2 only)
@@ -89,6 +89,37 @@ unlock:
89 return rc; 89 return rc;
90} 90}
91 91
92static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr,
93 u16 asn, u64 *reg)
94{
95 struct kvm_vcpu *dst_vcpu = NULL;
96 const u64 psw_int_mask = PSW_MASK_IO | PSW_MASK_EXT;
97 u16 p_asn, s_asn;
98 psw_t *psw;
99 u32 flags;
100
101 if (cpu_addr < KVM_MAX_VCPUS)
102 dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
103 if (!dst_vcpu)
104 return SIGP_CC_NOT_OPERATIONAL;
105 flags = atomic_read(&dst_vcpu->arch.sie_block->cpuflags);
106 psw = &dst_vcpu->arch.sie_block->gpsw;
107 p_asn = dst_vcpu->arch.sie_block->gcr[4] & 0xffff; /* Primary ASN */
108 s_asn = dst_vcpu->arch.sie_block->gcr[3] & 0xffff; /* Secondary ASN */
109
110 /* Deliver the emergency signal? */
111 if (!(flags & CPUSTAT_STOPPED)
112 || (psw->mask & psw_int_mask) != psw_int_mask
113 || ((flags & CPUSTAT_WAIT) && psw->addr != 0)
114 || (!(flags & CPUSTAT_WAIT) && (asn == p_asn || asn == s_asn))) {
115 return __sigp_emergency(vcpu, cpu_addr);
116 } else {
117 *reg &= 0xffffffff00000000UL;
118 *reg |= SIGP_STATUS_INCORRECT_STATE;
119 return SIGP_CC_STATUS_STORED;
120 }
121}
122
92static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr) 123static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr)
93{ 124{
94 struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; 125 struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
@@ -130,6 +161,7 @@ unlock:
130static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) 161static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
131{ 162{
132 struct kvm_s390_interrupt_info *inti; 163 struct kvm_s390_interrupt_info *inti;
164 int rc = SIGP_CC_ORDER_CODE_ACCEPTED;
133 165
134 inti = kzalloc(sizeof(*inti), GFP_ATOMIC); 166 inti = kzalloc(sizeof(*inti), GFP_ATOMIC);
135 if (!inti) 167 if (!inti)
@@ -139,6 +171,8 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
139 spin_lock_bh(&li->lock); 171 spin_lock_bh(&li->lock);
140 if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) { 172 if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) {
141 kfree(inti); 173 kfree(inti);
174 if ((action & ACTION_STORE_ON_STOP) != 0)
175 rc = -ESHUTDOWN;
142 goto out; 176 goto out;
143 } 177 }
144 list_add_tail(&inti->list, &li->list); 178 list_add_tail(&inti->list, &li->list);
@@ -150,7 +184,7 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
150out: 184out:
151 spin_unlock_bh(&li->lock); 185 spin_unlock_bh(&li->lock);
152 186
153 return SIGP_CC_ORDER_CODE_ACCEPTED; 187 return rc;
154} 188}
155 189
156static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action) 190static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action)
@@ -174,13 +208,17 @@ static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action)
174unlock: 208unlock:
175 spin_unlock(&fi->lock); 209 spin_unlock(&fi->lock);
176 VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr); 210 VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr);
177 return rc;
178}
179 211
180int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action) 212 if ((action & ACTION_STORE_ON_STOP) != 0 && rc == -ESHUTDOWN) {
181{ 213 /* If the CPU has already been stopped, we still have
182 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; 214 * to save the status when doing stop-and-store. This
183 return __inject_sigp_stop(li, action); 215 * has to be done after unlocking all spinlocks. */
216 struct kvm_vcpu *dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
217 rc = kvm_s390_store_status_unloaded(dst_vcpu,
218 KVM_S390_STORE_STATUS_NOADDR);
219 }
220
221 return rc;
184} 222}
185 223
186static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter) 224static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
@@ -262,6 +300,37 @@ out_fi:
262 return rc; 300 return rc;
263} 301}
264 302
303static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id,
304 u32 addr, u64 *reg)
305{
306 struct kvm_vcpu *dst_vcpu = NULL;
307 int flags;
308 int rc;
309
310 if (cpu_id < KVM_MAX_VCPUS)
311 dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_id);
312 if (!dst_vcpu)
313 return SIGP_CC_NOT_OPERATIONAL;
314
315 spin_lock_bh(&dst_vcpu->arch.local_int.lock);
316 flags = atomic_read(dst_vcpu->arch.local_int.cpuflags);
317 spin_unlock_bh(&dst_vcpu->arch.local_int.lock);
318 if (!(flags & CPUSTAT_STOPPED)) {
319 *reg &= 0xffffffff00000000UL;
320 *reg |= SIGP_STATUS_INCORRECT_STATE;
321 return SIGP_CC_STATUS_STORED;
322 }
323
324 addr &= 0x7ffffe00;
325 rc = kvm_s390_store_status_unloaded(dst_vcpu, addr);
326 if (rc == -EFAULT) {
327 *reg &= 0xffffffff00000000UL;
328 *reg |= SIGP_STATUS_INVALID_PARAMETER;
329 rc = SIGP_CC_STATUS_STORED;
330 }
331 return rc;
332}
333
265static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr, 334static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr,
266 u64 *reg) 335 u64 *reg)
267{ 336{
@@ -294,7 +363,8 @@ static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr,
294 return rc; 363 return rc;
295} 364}
296 365
297static int __sigp_restart(struct kvm_vcpu *vcpu, u16 cpu_addr) 366/* Test whether the destination CPU is available and not busy */
367static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr)
298{ 368{
299 struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; 369 struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
300 struct kvm_s390_local_interrupt *li; 370 struct kvm_s390_local_interrupt *li;
@@ -313,9 +383,6 @@ static int __sigp_restart(struct kvm_vcpu *vcpu, u16 cpu_addr)
313 spin_lock_bh(&li->lock); 383 spin_lock_bh(&li->lock);
314 if (li->action_bits & ACTION_STOP_ON_STOP) 384 if (li->action_bits & ACTION_STOP_ON_STOP)
315 rc = SIGP_CC_BUSY; 385 rc = SIGP_CC_BUSY;
316 else
317 VCPU_EVENT(vcpu, 4, "sigp restart %x to handle userspace",
318 cpu_addr);
319 spin_unlock_bh(&li->lock); 386 spin_unlock_bh(&li->lock);
320out: 387out:
321 spin_unlock(&fi->lock); 388 spin_unlock(&fi->lock);
@@ -366,6 +433,10 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
366 rc = __sigp_stop(vcpu, cpu_addr, ACTION_STORE_ON_STOP | 433 rc = __sigp_stop(vcpu, cpu_addr, ACTION_STORE_ON_STOP |
367 ACTION_STOP_ON_STOP); 434 ACTION_STOP_ON_STOP);
368 break; 435 break;
436 case SIGP_STORE_STATUS_AT_ADDRESS:
437 rc = __sigp_store_status_at_addr(vcpu, cpu_addr, parameter,
438 &vcpu->run->s.regs.gprs[r1]);
439 break;
369 case SIGP_SET_ARCHITECTURE: 440 case SIGP_SET_ARCHITECTURE:
370 vcpu->stat.instruction_sigp_arch++; 441 vcpu->stat.instruction_sigp_arch++;
371 rc = __sigp_set_arch(vcpu, parameter); 442 rc = __sigp_set_arch(vcpu, parameter);
@@ -375,17 +446,31 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
375 rc = __sigp_set_prefix(vcpu, cpu_addr, parameter, 446 rc = __sigp_set_prefix(vcpu, cpu_addr, parameter,
376 &vcpu->run->s.regs.gprs[r1]); 447 &vcpu->run->s.regs.gprs[r1]);
377 break; 448 break;
449 case SIGP_COND_EMERGENCY_SIGNAL:
450 rc = __sigp_conditional_emergency(vcpu, cpu_addr, parameter,
451 &vcpu->run->s.regs.gprs[r1]);
452 break;
378 case SIGP_SENSE_RUNNING: 453 case SIGP_SENSE_RUNNING:
379 vcpu->stat.instruction_sigp_sense_running++; 454 vcpu->stat.instruction_sigp_sense_running++;
380 rc = __sigp_sense_running(vcpu, cpu_addr, 455 rc = __sigp_sense_running(vcpu, cpu_addr,
381 &vcpu->run->s.regs.gprs[r1]); 456 &vcpu->run->s.regs.gprs[r1]);
382 break; 457 break;
458 case SIGP_START:
459 rc = sigp_check_callable(vcpu, cpu_addr);
460 if (rc == SIGP_CC_ORDER_CODE_ACCEPTED)
461 rc = -EOPNOTSUPP; /* Handle START in user space */
462 break;
383 case SIGP_RESTART: 463 case SIGP_RESTART:
384 vcpu->stat.instruction_sigp_restart++; 464 vcpu->stat.instruction_sigp_restart++;
385 rc = __sigp_restart(vcpu, cpu_addr); 465 rc = sigp_check_callable(vcpu, cpu_addr);
386 if (rc == SIGP_CC_BUSY) 466 if (rc == SIGP_CC_ORDER_CODE_ACCEPTED) {
387 break; 467 VCPU_EVENT(vcpu, 4,
388 /* user space must know about restart */ 468 "sigp restart %x to handle userspace",
469 cpu_addr);
470 /* user space must know about restart */
471 rc = -EOPNOTSUPP;
472 }
473 break;
389 default: 474 default:
390 return -EOPNOTSUPP; 475 return -EOPNOTSUPP;
391 } 476 }
@@ -393,7 +478,6 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
393 if (rc < 0) 478 if (rc < 0)
394 return rc; 479 return rc;
395 480
396 vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); 481 kvm_s390_set_psw_cc(vcpu, rc);
397 vcpu->arch.sie_block->gpsw.mask |= (rc & 3ul) << 44;
398 return 0; 482 return 0;
399} 483}
diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h
index 0c991c6748ab..3db76b2daed7 100644
--- a/arch/s390/kvm/trace.h
+++ b/arch/s390/kvm/trace.h
@@ -175,6 +175,7 @@ TRACE_EVENT(kvm_s390_intercept_validity,
175 {SIGP_STOP_AND_STORE_STATUS, "stop and store status"}, \ 175 {SIGP_STOP_AND_STORE_STATUS, "stop and store status"}, \
176 {SIGP_SET_ARCHITECTURE, "set architecture"}, \ 176 {SIGP_SET_ARCHITECTURE, "set architecture"}, \
177 {SIGP_SET_PREFIX, "set prefix"}, \ 177 {SIGP_SET_PREFIX, "set prefix"}, \
178 {SIGP_STORE_STATUS_AT_ADDRESS, "store status at addr"}, \
178 {SIGP_SENSE_RUNNING, "sense running"}, \ 179 {SIGP_SENSE_RUNNING, "sense running"}, \
179 {SIGP_RESTART, "restart"} 180 {SIGP_RESTART, "restart"}
180 181
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ae5d7830855c..fdf83afbb7d9 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -605,6 +605,7 @@ struct kvm_arch {
605 /* fields used by HYPER-V emulation */ 605 /* fields used by HYPER-V emulation */
606 u64 hv_guest_os_id; 606 u64 hv_guest_os_id;
607 u64 hv_hypercall; 607 u64 hv_hypercall;
608 u64 hv_tsc_page;
608 609
609 #ifdef CONFIG_KVM_MMU_AUDIT 610 #ifdef CONFIG_KVM_MMU_AUDIT
610 int audit_point; 611 int audit_point;
@@ -699,6 +700,8 @@ struct kvm_x86_ops {
699 void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); 700 void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
700 void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); 701 void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
701 void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); 702 void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
703 u64 (*get_dr6)(struct kvm_vcpu *vcpu);
704 void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value);
702 void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value); 705 void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value);
703 void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); 706 void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
704 unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); 707 unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 966502d4682e..2067264fb7f5 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -100,6 +100,7 @@
100 100
101#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f 101#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f
102#define VMX_MISC_SAVE_EFER_LMA 0x00000020 102#define VMX_MISC_SAVE_EFER_LMA 0x00000020
103#define VMX_MISC_ACTIVITY_HLT 0x00000040
103 104
104/* VMCS Encodings */ 105/* VMCS Encodings */
105enum vmcs_field { 106enum vmcs_field {
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index b8f1c0176cbc..462efe746d77 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -28,6 +28,9 @@
28/* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ 28/* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
29#define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) 29#define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1)
30 30
31/* A partition's reference time stamp counter (TSC) page */
32#define HV_X64_MSR_REFERENCE_TSC 0x40000021
33
31/* 34/*
32 * There is a single feature flag that signifies the presence of the MSR 35 * There is a single feature flag that signifies the presence of the MSR
33 * that can be used to retrieve both the local APIC Timer frequency as 36 * that can be used to retrieve both the local APIC Timer frequency as
@@ -198,6 +201,9 @@
198#define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ 201#define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \
199 (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) 202 (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
200 203
204#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001
205#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12
206
201#define HV_PROCESSOR_POWER_STATE_C0 0 207#define HV_PROCESSOR_POWER_STATE_C0 0
202#define HV_PROCESSOR_POWER_STATE_C1 1 208#define HV_PROCESSOR_POWER_STATE_C1 1
203#define HV_PROCESSOR_POWER_STATE_C2 2 209#define HV_PROCESSOR_POWER_STATE_C2 2
@@ -210,4 +216,11 @@
210#define HV_STATUS_INVALID_ALIGNMENT 4 216#define HV_STATUS_INVALID_ALIGNMENT 4
211#define HV_STATUS_INSUFFICIENT_BUFFERS 19 217#define HV_STATUS_INSUFFICIENT_BUFFERS 19
212 218
219typedef struct _HV_REFERENCE_TSC_PAGE {
220 __u32 tsc_sequence;
221 __u32 res1;
222 __u64 tsc_scale;
223 __s64 tsc_offset;
224} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
225
213#endif 226#endif
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 59cea185ad1d..c19fc60ff062 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -528,6 +528,7 @@
528#define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e 528#define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e
529#define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f 529#define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f
530#define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490 530#define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490
531#define MSR_IA32_VMX_VMFUNC 0x00000491
531 532
532/* VMX_BASIC bits and bitmasks */ 533/* VMX_BASIC bits and bitmasks */
533#define VMX_BASIC_VMCS_SIZE_SHIFT 32 534#define VMX_BASIC_VMCS_SIZE_SHIFT 32
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index b89c5db2b832..287e4c85fff9 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -80,7 +80,7 @@ config KVM_MMU_AUDIT
80 depends on KVM && TRACEPOINTS 80 depends on KVM && TRACEPOINTS
81 ---help--- 81 ---help---
82 This option adds a R/W kVM module parameter 'mmu_audit', which allows 82 This option adds a R/W kVM module parameter 'mmu_audit', which allows
83 audit KVM MMU at runtime. 83 auditing of KVM MMU events at runtime.
84 84
85config KVM_DEVICE_ASSIGNMENT 85config KVM_DEVICE_ASSIGNMENT
86 bool "KVM legacy PCI device assignment support" 86 bool "KVM legacy PCI device assignment support"
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 412a5aa0ef94..518d86471b76 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -37,6 +37,7 @@
37 37
38#include "irq.h" 38#include "irq.h"
39#include "i8254.h" 39#include "i8254.h"
40#include "x86.h"
40 41
41#ifndef CONFIG_X86_64 42#ifndef CONFIG_X86_64
42#define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) 43#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
@@ -349,6 +350,23 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
349 atomic_set(&ps->pending, 0); 350 atomic_set(&ps->pending, 0);
350 ps->irq_ack = 1; 351 ps->irq_ack = 1;
351 352
353 /*
354 * Do not allow the guest to program periodic timers with small
355 * interval, since the hrtimers are not throttled by the host
356 * scheduler.
357 */
358 if (ps->is_periodic) {
359 s64 min_period = min_timer_period_us * 1000LL;
360
361 if (ps->period < min_period) {
362 pr_info_ratelimited(
363 "kvm: requested %lld ns "
364 "i8254 timer period limited to %lld ns\n",
365 ps->period, min_period);
366 ps->period = min_period;
367 }
368 }
369
352 hrtimer_start(&ps->timer, ktime_add_ns(ktime_get(), interval), 370 hrtimer_start(&ps->timer, ktime_add_ns(ktime_get(), interval),
353 HRTIMER_MODE_ABS); 371 HRTIMER_MODE_ABS);
354} 372}
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 775702f649ca..9736529ade08 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -71,9 +71,6 @@
71#define VEC_POS(v) ((v) & (32 - 1)) 71#define VEC_POS(v) ((v) & (32 - 1))
72#define REG_POS(v) (((v) >> 5) << 4) 72#define REG_POS(v) (((v) >> 5) << 4)
73 73
74static unsigned int min_timer_period_us = 500;
75module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
76
77static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val) 74static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val)
78{ 75{
79 *((u32 *) (apic->regs + reg_off)) = val; 76 *((u32 *) (apic->regs + reg_off)) = val;
@@ -435,7 +432,7 @@ static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
435 u8 val; 432 u8 val;
436 if (pv_eoi_get_user(vcpu, &val) < 0) 433 if (pv_eoi_get_user(vcpu, &val) < 0)
437 apic_debug("Can't read EOI MSR value: 0x%llx\n", 434 apic_debug("Can't read EOI MSR value: 0x%llx\n",
438 (unsigned long long)vcpi->arch.pv_eoi.msr_val); 435 (unsigned long long)vcpu->arch.pv_eoi.msr_val);
439 return val & 0x1; 436 return val & 0x1;
440} 437}
441 438
@@ -443,7 +440,7 @@ static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
443{ 440{
444 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) { 441 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) {
445 apic_debug("Can't set EOI MSR value: 0x%llx\n", 442 apic_debug("Can't set EOI MSR value: 0x%llx\n",
446 (unsigned long long)vcpi->arch.pv_eoi.msr_val); 443 (unsigned long long)vcpu->arch.pv_eoi.msr_val);
447 return; 444 return;
448 } 445 }
449 __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); 446 __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
@@ -453,7 +450,7 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
453{ 450{
454 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) { 451 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) {
455 apic_debug("Can't clear EOI MSR value: 0x%llx\n", 452 apic_debug("Can't clear EOI MSR value: 0x%llx\n",
456 (unsigned long long)vcpi->arch.pv_eoi.msr_val); 453 (unsigned long long)vcpu->arch.pv_eoi.msr_val);
457 return; 454 return;
458 } 455 }
459 __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); 456 __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 40772ef0f2b1..e50425d0f5f7 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2659,6 +2659,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
2659 int emulate = 0; 2659 int emulate = 0;
2660 gfn_t pseudo_gfn; 2660 gfn_t pseudo_gfn;
2661 2661
2662 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
2663 return 0;
2664
2662 for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { 2665 for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
2663 if (iterator.level == level) { 2666 if (iterator.level == level) {
2664 mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, 2667 mmu_set_spte(vcpu, iterator.sptep, ACC_ALL,
@@ -2829,6 +2832,9 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
2829 bool ret = false; 2832 bool ret = false;
2830 u64 spte = 0ull; 2833 u64 spte = 0ull;
2831 2834
2835 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
2836 return false;
2837
2832 if (!page_fault_can_be_fast(error_code)) 2838 if (!page_fault_can_be_fast(error_code))
2833 return false; 2839 return false;
2834 2840
@@ -3224,6 +3230,9 @@ static u64 walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr)
3224 struct kvm_shadow_walk_iterator iterator; 3230 struct kvm_shadow_walk_iterator iterator;
3225 u64 spte = 0ull; 3231 u64 spte = 0ull;
3226 3232
3233 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
3234 return spte;
3235
3227 walk_shadow_page_lockless_begin(vcpu); 3236 walk_shadow_page_lockless_begin(vcpu);
3228 for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) 3237 for_each_shadow_entry_lockless(vcpu, addr, iterator, spte)
3229 if (!is_shadow_present_pte(spte)) 3238 if (!is_shadow_present_pte(spte))
@@ -4510,6 +4519,9 @@ int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4])
4510 u64 spte; 4519 u64 spte;
4511 int nr_sptes = 0; 4520 int nr_sptes = 0;
4512 4521
4522 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
4523 return nr_sptes;
4524
4513 walk_shadow_page_lockless_begin(vcpu); 4525 walk_shadow_page_lockless_begin(vcpu);
4514 for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) { 4526 for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) {
4515 sptes[iterator.level-1] = spte; 4527 sptes[iterator.level-1] = spte;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index ad75d77999d0..cba218a2f08d 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -569,6 +569,9 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
569 if (FNAME(gpte_changed)(vcpu, gw, top_level)) 569 if (FNAME(gpte_changed)(vcpu, gw, top_level))
570 goto out_gpte_changed; 570 goto out_gpte_changed;
571 571
572 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
573 goto out_gpte_changed;
574
572 for (shadow_walk_init(&it, vcpu, addr); 575 for (shadow_walk_init(&it, vcpu, addr);
573 shadow_walk_okay(&it) && it.level > gw->level; 576 shadow_walk_okay(&it) && it.level > gw->level;
574 shadow_walk_next(&it)) { 577 shadow_walk_next(&it)) {
@@ -820,6 +823,11 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
820 */ 823 */
821 mmu_topup_memory_caches(vcpu); 824 mmu_topup_memory_caches(vcpu);
822 825
826 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) {
827 WARN_ON(1);
828 return;
829 }
830
823 spin_lock(&vcpu->kvm->mmu_lock); 831 spin_lock(&vcpu->kvm->mmu_lock);
824 for_each_shadow_entry(vcpu, gva, iterator) { 832 for_each_shadow_entry(vcpu, gva, iterator) {
825 level = iterator.level; 833 level = iterator.level;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index c7168a5cff1b..e81df8fce027 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1671,6 +1671,19 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
1671 mark_dirty(svm->vmcb, VMCB_ASID); 1671 mark_dirty(svm->vmcb, VMCB_ASID);
1672} 1672}
1673 1673
1674static u64 svm_get_dr6(struct kvm_vcpu *vcpu)
1675{
1676 return to_svm(vcpu)->vmcb->save.dr6;
1677}
1678
1679static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value)
1680{
1681 struct vcpu_svm *svm = to_svm(vcpu);
1682
1683 svm->vmcb->save.dr6 = value;
1684 mark_dirty(svm->vmcb, VMCB_DR);
1685}
1686
1674static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) 1687static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
1675{ 1688{
1676 struct vcpu_svm *svm = to_svm(vcpu); 1689 struct vcpu_svm *svm = to_svm(vcpu);
@@ -4286,6 +4299,8 @@ static struct kvm_x86_ops svm_x86_ops = {
4286 .set_idt = svm_set_idt, 4299 .set_idt = svm_set_idt,
4287 .get_gdt = svm_get_gdt, 4300 .get_gdt = svm_get_gdt,
4288 .set_gdt = svm_set_gdt, 4301 .set_gdt = svm_set_gdt,
4302 .get_dr6 = svm_get_dr6,
4303 .set_dr6 = svm_set_dr6,
4289 .set_dr7 = svm_set_dr7, 4304 .set_dr7 = svm_set_dr7,
4290 .cache_reg = svm_cache_reg, 4305 .cache_reg = svm_cache_reg,
4291 .get_rflags = svm_get_rflags, 4306 .get_rflags = svm_get_rflags,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index da7837e1349d..5c8879127cfa 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -418,6 +418,8 @@ struct vcpu_vmx {
418 u64 msr_host_kernel_gs_base; 418 u64 msr_host_kernel_gs_base;
419 u64 msr_guest_kernel_gs_base; 419 u64 msr_guest_kernel_gs_base;
420#endif 420#endif
421 u32 vm_entry_controls_shadow;
422 u32 vm_exit_controls_shadow;
421 /* 423 /*
422 * loaded_vmcs points to the VMCS currently used in this vcpu. For a 424 * loaded_vmcs points to the VMCS currently used in this vcpu. For a
423 * non-nested (L1) guest, it always points to vmcs01. For a nested 425 * non-nested (L1) guest, it always points to vmcs01. For a nested
@@ -1056,7 +1058,9 @@ static inline bool is_exception(u32 intr_info)
1056 == (INTR_TYPE_HARD_EXCEPTION | INTR_INFO_VALID_MASK); 1058 == (INTR_TYPE_HARD_EXCEPTION | INTR_INFO_VALID_MASK);
1057} 1059}
1058 1060
1059static void nested_vmx_vmexit(struct kvm_vcpu *vcpu); 1061static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
1062 u32 exit_intr_info,
1063 unsigned long exit_qualification);
1060static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu, 1064static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu,
1061 struct vmcs12 *vmcs12, 1065 struct vmcs12 *vmcs12,
1062 u32 reason, unsigned long qualification); 1066 u32 reason, unsigned long qualification);
@@ -1326,6 +1330,62 @@ static void vmcs_set_bits(unsigned long field, u32 mask)
1326 vmcs_writel(field, vmcs_readl(field) | mask); 1330 vmcs_writel(field, vmcs_readl(field) | mask);
1327} 1331}
1328 1332
1333static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val)
1334{
1335 vmcs_write32(VM_ENTRY_CONTROLS, val);
1336 vmx->vm_entry_controls_shadow = val;
1337}
1338
1339static inline void vm_entry_controls_set(struct vcpu_vmx *vmx, u32 val)
1340{
1341 if (vmx->vm_entry_controls_shadow != val)
1342 vm_entry_controls_init(vmx, val);
1343}
1344
1345static inline u32 vm_entry_controls_get(struct vcpu_vmx *vmx)
1346{
1347 return vmx->vm_entry_controls_shadow;
1348}
1349
1350
1351static inline void vm_entry_controls_setbit(struct vcpu_vmx *vmx, u32 val)
1352{
1353 vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) | val);
1354}
1355
1356static inline void vm_entry_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
1357{
1358 vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) & ~val);
1359}
1360
1361static inline void vm_exit_controls_init(struct vcpu_vmx *vmx, u32 val)
1362{
1363 vmcs_write32(VM_EXIT_CONTROLS, val);
1364 vmx->vm_exit_controls_shadow = val;
1365}
1366
1367static inline void vm_exit_controls_set(struct vcpu_vmx *vmx, u32 val)
1368{
1369 if (vmx->vm_exit_controls_shadow != val)
1370 vm_exit_controls_init(vmx, val);
1371}
1372
1373static inline u32 vm_exit_controls_get(struct vcpu_vmx *vmx)
1374{
1375 return vmx->vm_exit_controls_shadow;
1376}
1377
1378
1379static inline void vm_exit_controls_setbit(struct vcpu_vmx *vmx, u32 val)
1380{
1381 vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) | val);
1382}
1383
1384static inline void vm_exit_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
1385{
1386 vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) & ~val);
1387}
1388
1329static void vmx_segment_cache_clear(struct vcpu_vmx *vmx) 1389static void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
1330{ 1390{
1331 vmx->segment_cache.bitmask = 0; 1391 vmx->segment_cache.bitmask = 0;
@@ -1410,11 +1470,11 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
1410 vmcs_write32(EXCEPTION_BITMAP, eb); 1470 vmcs_write32(EXCEPTION_BITMAP, eb);
1411} 1471}
1412 1472
1413static void clear_atomic_switch_msr_special(unsigned long entry, 1473static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
1414 unsigned long exit) 1474 unsigned long entry, unsigned long exit)
1415{ 1475{
1416 vmcs_clear_bits(VM_ENTRY_CONTROLS, entry); 1476 vm_entry_controls_clearbit(vmx, entry);
1417 vmcs_clear_bits(VM_EXIT_CONTROLS, exit); 1477 vm_exit_controls_clearbit(vmx, exit);
1418} 1478}
1419 1479
1420static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) 1480static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
@@ -1425,14 +1485,15 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
1425 switch (msr) { 1485 switch (msr) {
1426 case MSR_EFER: 1486 case MSR_EFER:
1427 if (cpu_has_load_ia32_efer) { 1487 if (cpu_has_load_ia32_efer) {
1428 clear_atomic_switch_msr_special(VM_ENTRY_LOAD_IA32_EFER, 1488 clear_atomic_switch_msr_special(vmx,
1489 VM_ENTRY_LOAD_IA32_EFER,
1429 VM_EXIT_LOAD_IA32_EFER); 1490 VM_EXIT_LOAD_IA32_EFER);
1430 return; 1491 return;
1431 } 1492 }
1432 break; 1493 break;
1433 case MSR_CORE_PERF_GLOBAL_CTRL: 1494 case MSR_CORE_PERF_GLOBAL_CTRL:
1434 if (cpu_has_load_perf_global_ctrl) { 1495 if (cpu_has_load_perf_global_ctrl) {
1435 clear_atomic_switch_msr_special( 1496 clear_atomic_switch_msr_special(vmx,
1436 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, 1497 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
1437 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL); 1498 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
1438 return; 1499 return;
@@ -1453,14 +1514,15 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
1453 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr); 1514 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
1454} 1515}
1455 1516
1456static void add_atomic_switch_msr_special(unsigned long entry, 1517static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
1457 unsigned long exit, unsigned long guest_val_vmcs, 1518 unsigned long entry, unsigned long exit,
1458 unsigned long host_val_vmcs, u64 guest_val, u64 host_val) 1519 unsigned long guest_val_vmcs, unsigned long host_val_vmcs,
1520 u64 guest_val, u64 host_val)
1459{ 1521{
1460 vmcs_write64(guest_val_vmcs, guest_val); 1522 vmcs_write64(guest_val_vmcs, guest_val);
1461 vmcs_write64(host_val_vmcs, host_val); 1523 vmcs_write64(host_val_vmcs, host_val);
1462 vmcs_set_bits(VM_ENTRY_CONTROLS, entry); 1524 vm_entry_controls_setbit(vmx, entry);
1463 vmcs_set_bits(VM_EXIT_CONTROLS, exit); 1525 vm_exit_controls_setbit(vmx, exit);
1464} 1526}
1465 1527
1466static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, 1528static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
@@ -1472,7 +1534,8 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
1472 switch (msr) { 1534 switch (msr) {
1473 case MSR_EFER: 1535 case MSR_EFER:
1474 if (cpu_has_load_ia32_efer) { 1536 if (cpu_has_load_ia32_efer) {
1475 add_atomic_switch_msr_special(VM_ENTRY_LOAD_IA32_EFER, 1537 add_atomic_switch_msr_special(vmx,
1538 VM_ENTRY_LOAD_IA32_EFER,
1476 VM_EXIT_LOAD_IA32_EFER, 1539 VM_EXIT_LOAD_IA32_EFER,
1477 GUEST_IA32_EFER, 1540 GUEST_IA32_EFER,
1478 HOST_IA32_EFER, 1541 HOST_IA32_EFER,
@@ -1482,7 +1545,7 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
1482 break; 1545 break;
1483 case MSR_CORE_PERF_GLOBAL_CTRL: 1546 case MSR_CORE_PERF_GLOBAL_CTRL:
1484 if (cpu_has_load_perf_global_ctrl) { 1547 if (cpu_has_load_perf_global_ctrl) {
1485 add_atomic_switch_msr_special( 1548 add_atomic_switch_msr_special(vmx,
1486 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, 1549 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
1487 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL, 1550 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL,
1488 GUEST_IA32_PERF_GLOBAL_CTRL, 1551 GUEST_IA32_PERF_GLOBAL_CTRL,
@@ -1906,7 +1969,9 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr)
1906 if (!(vmcs12->exception_bitmap & (1u << nr))) 1969 if (!(vmcs12->exception_bitmap & (1u << nr)))
1907 return 0; 1970 return 0;
1908 1971
1909 nested_vmx_vmexit(vcpu); 1972 nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason,
1973 vmcs_read32(VM_EXIT_INTR_INFO),
1974 vmcs_readl(EXIT_QUALIFICATION));
1910 return 1; 1975 return 1;
1911} 1976}
1912 1977
@@ -2279,6 +2344,7 @@ static __init void nested_vmx_setup_ctls_msrs(void)
2279 rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high); 2344 rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high);
2280 nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK | 2345 nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK |
2281 VMX_MISC_SAVE_EFER_LMA; 2346 VMX_MISC_SAVE_EFER_LMA;
2347 nested_vmx_misc_low |= VMX_MISC_ACTIVITY_HLT;
2282 nested_vmx_misc_high = 0; 2348 nested_vmx_misc_high = 0;
2283} 2349}
2284 2350
@@ -2295,32 +2361,10 @@ static inline u64 vmx_control_msr(u32 low, u32 high)
2295 return low | ((u64)high << 32); 2361 return low | ((u64)high << 32);
2296} 2362}
2297 2363
2298/* 2364/* Returns 0 on success, non-0 otherwise. */
2299 * If we allow our guest to use VMX instructions (i.e., nested VMX), we should
2300 * also let it use VMX-specific MSRs.
2301 * vmx_get_vmx_msr() and vmx_set_vmx_msr() return 1 when we handled a
2302 * VMX-specific MSR, or 0 when we haven't (and the caller should handle it
2303 * like all other MSRs).
2304 */
2305static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) 2365static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
2306{ 2366{
2307 if (!nested_vmx_allowed(vcpu) && msr_index >= MSR_IA32_VMX_BASIC &&
2308 msr_index <= MSR_IA32_VMX_TRUE_ENTRY_CTLS) {
2309 /*
2310 * According to the spec, processors which do not support VMX
2311 * should throw a #GP(0) when VMX capability MSRs are read.
2312 */
2313 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
2314 return 1;
2315 }
2316
2317 switch (msr_index) { 2367 switch (msr_index) {
2318 case MSR_IA32_FEATURE_CONTROL:
2319 if (nested_vmx_allowed(vcpu)) {
2320 *pdata = to_vmx(vcpu)->nested.msr_ia32_feature_control;
2321 break;
2322 }
2323 return 0;
2324 case MSR_IA32_VMX_BASIC: 2368 case MSR_IA32_VMX_BASIC:
2325 /* 2369 /*
2326 * This MSR reports some information about VMX support. We 2370 * This MSR reports some information about VMX support. We
@@ -2387,34 +2431,9 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
2387 *pdata = nested_vmx_ept_caps; 2431 *pdata = nested_vmx_ept_caps;
2388 break; 2432 break;
2389 default: 2433 default:
2390 return 0;
2391 }
2392
2393 return 1;
2394}
2395
2396static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2397{
2398 u32 msr_index = msr_info->index;
2399 u64 data = msr_info->data;
2400 bool host_initialized = msr_info->host_initiated;
2401
2402 if (!nested_vmx_allowed(vcpu))
2403 return 0;
2404
2405 if (msr_index == MSR_IA32_FEATURE_CONTROL) {
2406 if (!host_initialized &&
2407 to_vmx(vcpu)->nested.msr_ia32_feature_control
2408 & FEATURE_CONTROL_LOCKED)
2409 return 0;
2410 to_vmx(vcpu)->nested.msr_ia32_feature_control = data;
2411 return 1; 2434 return 1;
2412 } 2435 }
2413 2436
2414 /*
2415 * No need to treat VMX capability MSRs specially: If we don't handle
2416 * them, handle_wrmsr will #GP(0), which is correct (they are readonly)
2417 */
2418 return 0; 2437 return 0;
2419} 2438}
2420 2439
@@ -2460,13 +2479,20 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
2460 case MSR_IA32_SYSENTER_ESP: 2479 case MSR_IA32_SYSENTER_ESP:
2461 data = vmcs_readl(GUEST_SYSENTER_ESP); 2480 data = vmcs_readl(GUEST_SYSENTER_ESP);
2462 break; 2481 break;
2482 case MSR_IA32_FEATURE_CONTROL:
2483 if (!nested_vmx_allowed(vcpu))
2484 return 1;
2485 data = to_vmx(vcpu)->nested.msr_ia32_feature_control;
2486 break;
2487 case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
2488 if (!nested_vmx_allowed(vcpu))
2489 return 1;
2490 return vmx_get_vmx_msr(vcpu, msr_index, pdata);
2463 case MSR_TSC_AUX: 2491 case MSR_TSC_AUX:
2464 if (!to_vmx(vcpu)->rdtscp_enabled) 2492 if (!to_vmx(vcpu)->rdtscp_enabled)
2465 return 1; 2493 return 1;
2466 /* Otherwise falls through */ 2494 /* Otherwise falls through */
2467 default: 2495 default:
2468 if (vmx_get_vmx_msr(vcpu, msr_index, pdata))
2469 return 0;
2470 msr = find_msr_entry(to_vmx(vcpu), msr_index); 2496 msr = find_msr_entry(to_vmx(vcpu), msr_index);
2471 if (msr) { 2497 if (msr) {
2472 data = msr->data; 2498 data = msr->data;
@@ -2479,6 +2505,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
2479 return 0; 2505 return 0;
2480} 2506}
2481 2507
2508static void vmx_leave_nested(struct kvm_vcpu *vcpu);
2509
2482/* 2510/*
2483 * Writes msr value into into the appropriate "register". 2511 * Writes msr value into into the appropriate "register".
2484 * Returns 0 on success, non-0 otherwise. 2512 * Returns 0 on success, non-0 otherwise.
@@ -2533,6 +2561,17 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2533 case MSR_IA32_TSC_ADJUST: 2561 case MSR_IA32_TSC_ADJUST:
2534 ret = kvm_set_msr_common(vcpu, msr_info); 2562 ret = kvm_set_msr_common(vcpu, msr_info);
2535 break; 2563 break;
2564 case MSR_IA32_FEATURE_CONTROL:
2565 if (!nested_vmx_allowed(vcpu) ||
2566 (to_vmx(vcpu)->nested.msr_ia32_feature_control &
2567 FEATURE_CONTROL_LOCKED && !msr_info->host_initiated))
2568 return 1;
2569 vmx->nested.msr_ia32_feature_control = data;
2570 if (msr_info->host_initiated && data == 0)
2571 vmx_leave_nested(vcpu);
2572 break;
2573 case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
2574 return 1; /* they are read-only */
2536 case MSR_TSC_AUX: 2575 case MSR_TSC_AUX:
2537 if (!vmx->rdtscp_enabled) 2576 if (!vmx->rdtscp_enabled)
2538 return 1; 2577 return 1;
@@ -2541,8 +2580,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2541 return 1; 2580 return 1;
2542 /* Otherwise falls through */ 2581 /* Otherwise falls through */
2543 default: 2582 default:
2544 if (vmx_set_vmx_msr(vcpu, msr_info))
2545 break;
2546 msr = find_msr_entry(vmx, msr_index); 2583 msr = find_msr_entry(vmx, msr_index);
2547 if (msr) { 2584 if (msr) {
2548 msr->data = data; 2585 msr->data = data;
@@ -3182,14 +3219,10 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
3182 vmx_load_host_state(to_vmx(vcpu)); 3219 vmx_load_host_state(to_vmx(vcpu));
3183 vcpu->arch.efer = efer; 3220 vcpu->arch.efer = efer;
3184 if (efer & EFER_LMA) { 3221 if (efer & EFER_LMA) {
3185 vmcs_write32(VM_ENTRY_CONTROLS, 3222 vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
3186 vmcs_read32(VM_ENTRY_CONTROLS) |
3187 VM_ENTRY_IA32E_MODE);
3188 msr->data = efer; 3223 msr->data = efer;
3189 } else { 3224 } else {
3190 vmcs_write32(VM_ENTRY_CONTROLS, 3225 vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
3191 vmcs_read32(VM_ENTRY_CONTROLS) &
3192 ~VM_ENTRY_IA32E_MODE);
3193 3226
3194 msr->data = efer & ~EFER_LME; 3227 msr->data = efer & ~EFER_LME;
3195 } 3228 }
@@ -3217,9 +3250,7 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
3217 3250
3218static void exit_lmode(struct kvm_vcpu *vcpu) 3251static void exit_lmode(struct kvm_vcpu *vcpu)
3219{ 3252{
3220 vmcs_write32(VM_ENTRY_CONTROLS, 3253 vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
3221 vmcs_read32(VM_ENTRY_CONTROLS)
3222 & ~VM_ENTRY_IA32E_MODE);
3223 vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA); 3254 vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA);
3224} 3255}
3225 3256
@@ -4346,10 +4377,11 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
4346 ++vmx->nmsrs; 4377 ++vmx->nmsrs;
4347 } 4378 }
4348 4379
4349 vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); 4380
4381 vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl);
4350 4382
4351 /* 22.2.1, 20.8.1 */ 4383 /* 22.2.1, 20.8.1 */
4352 vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl); 4384 vm_entry_controls_init(vmx, vmcs_config.vmentry_ctrl);
4353 4385
4354 vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); 4386 vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
4355 set_cr4_guest_host_mask(vmx); 4387 set_cr4_guest_host_mask(vmx);
@@ -4588,15 +4620,12 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
4588static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) 4620static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
4589{ 4621{
4590 if (is_guest_mode(vcpu)) { 4622 if (is_guest_mode(vcpu)) {
4591 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
4592
4593 if (to_vmx(vcpu)->nested.nested_run_pending) 4623 if (to_vmx(vcpu)->nested.nested_run_pending)
4594 return 0; 4624 return 0;
4595 if (nested_exit_on_nmi(vcpu)) { 4625 if (nested_exit_on_nmi(vcpu)) {
4596 nested_vmx_vmexit(vcpu); 4626 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
4597 vmcs12->vm_exit_reason = EXIT_REASON_EXCEPTION_NMI; 4627 NMI_VECTOR | INTR_TYPE_NMI_INTR |
4598 vmcs12->vm_exit_intr_info = NMI_VECTOR | 4628 INTR_INFO_VALID_MASK, 0);
4599 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK;
4600 /* 4629 /*
4601 * The NMI-triggered VM exit counts as injection: 4630 * The NMI-triggered VM exit counts as injection:
4602 * clear this one and block further NMIs. 4631 * clear this one and block further NMIs.
@@ -4618,15 +4647,11 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
4618static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) 4647static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
4619{ 4648{
4620 if (is_guest_mode(vcpu)) { 4649 if (is_guest_mode(vcpu)) {
4621 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
4622
4623 if (to_vmx(vcpu)->nested.nested_run_pending) 4650 if (to_vmx(vcpu)->nested.nested_run_pending)
4624 return 0; 4651 return 0;
4625 if (nested_exit_on_intr(vcpu)) { 4652 if (nested_exit_on_intr(vcpu)) {
4626 nested_vmx_vmexit(vcpu); 4653 nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT,
4627 vmcs12->vm_exit_reason = 4654 0, 0);
4628 EXIT_REASON_EXTERNAL_INTERRUPT;
4629 vmcs12->vm_exit_intr_info = 0;
4630 /* 4655 /*
4631 * fall through to normal code, but now in L1, not L2 4656 * fall through to normal code, but now in L1, not L2
4632 */ 4657 */
@@ -4812,7 +4837,8 @@ static int handle_exception(struct kvm_vcpu *vcpu)
4812 dr6 = vmcs_readl(EXIT_QUALIFICATION); 4837 dr6 = vmcs_readl(EXIT_QUALIFICATION);
4813 if (!(vcpu->guest_debug & 4838 if (!(vcpu->guest_debug &
4814 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { 4839 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
4815 vcpu->arch.dr6 = dr6 | DR6_FIXED_1; 4840 vcpu->arch.dr6 &= ~15;
4841 vcpu->arch.dr6 |= dr6;
4816 kvm_queue_exception(vcpu, DB_VECTOR); 4842 kvm_queue_exception(vcpu, DB_VECTOR);
4817 return 1; 4843 return 1;
4818 } 4844 }
@@ -5080,14 +5106,27 @@ static int handle_dr(struct kvm_vcpu *vcpu)
5080 reg = DEBUG_REG_ACCESS_REG(exit_qualification); 5106 reg = DEBUG_REG_ACCESS_REG(exit_qualification);
5081 if (exit_qualification & TYPE_MOV_FROM_DR) { 5107 if (exit_qualification & TYPE_MOV_FROM_DR) {
5082 unsigned long val; 5108 unsigned long val;
5083 if (!kvm_get_dr(vcpu, dr, &val)) 5109
5084 kvm_register_write(vcpu, reg, val); 5110 if (kvm_get_dr(vcpu, dr, &val))
5111 return 1;
5112 kvm_register_write(vcpu, reg, val);
5085 } else 5113 } else
5086 kvm_set_dr(vcpu, dr, vcpu->arch.regs[reg]); 5114 if (kvm_set_dr(vcpu, dr, vcpu->arch.regs[reg]))
5115 return 1;
5116
5087 skip_emulated_instruction(vcpu); 5117 skip_emulated_instruction(vcpu);
5088 return 1; 5118 return 1;
5089} 5119}
5090 5120
5121static u64 vmx_get_dr6(struct kvm_vcpu *vcpu)
5122{
5123 return vcpu->arch.dr6;
5124}
5125
5126static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val)
5127{
5128}
5129
5091static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) 5130static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
5092{ 5131{
5093 vmcs_writel(GUEST_DR7, val); 5132 vmcs_writel(GUEST_DR7, val);
@@ -6460,11 +6499,8 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
6460 int size; 6499 int size;
6461 u8 b; 6500 u8 b;
6462 6501
6463 if (nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING))
6464 return 1;
6465
6466 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) 6502 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
6467 return 0; 6503 return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
6468 6504
6469 exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 6505 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
6470 6506
@@ -6628,6 +6664,13 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
6628 struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 6664 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
6629 u32 exit_reason = vmx->exit_reason; 6665 u32 exit_reason = vmx->exit_reason;
6630 6666
6667 trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason,
6668 vmcs_readl(EXIT_QUALIFICATION),
6669 vmx->idt_vectoring_info,
6670 intr_info,
6671 vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
6672 KVM_ISA_VMX);
6673
6631 if (vmx->nested.nested_run_pending) 6674 if (vmx->nested.nested_run_pending)
6632 return 0; 6675 return 0;
6633 6676
@@ -6777,7 +6820,9 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
6777 return handle_invalid_guest_state(vcpu); 6820 return handle_invalid_guest_state(vcpu);
6778 6821
6779 if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) { 6822 if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) {
6780 nested_vmx_vmexit(vcpu); 6823 nested_vmx_vmexit(vcpu, exit_reason,
6824 vmcs_read32(VM_EXIT_INTR_INFO),
6825 vmcs_readl(EXIT_QUALIFICATION));
6781 return 1; 6826 return 1;
6782 } 6827 }
6783 6828
@@ -7332,8 +7377,8 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
7332 struct vcpu_vmx *vmx = to_vmx(vcpu); 7377 struct vcpu_vmx *vmx = to_vmx(vcpu);
7333 7378
7334 free_vpid(vmx); 7379 free_vpid(vmx);
7335 free_nested(vmx);
7336 free_loaded_vmcs(vmx->loaded_vmcs); 7380 free_loaded_vmcs(vmx->loaded_vmcs);
7381 free_nested(vmx);
7337 kfree(vmx->guest_msrs); 7382 kfree(vmx->guest_msrs);
7338 kvm_vcpu_uninit(vcpu); 7383 kvm_vcpu_uninit(vcpu);
7339 kmem_cache_free(kvm_vcpu_cache, vmx); 7384 kmem_cache_free(kvm_vcpu_cache, vmx);
@@ -7518,15 +7563,14 @@ static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
7518static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, 7563static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
7519 struct x86_exception *fault) 7564 struct x86_exception *fault)
7520{ 7565{
7521 struct vmcs12 *vmcs12; 7566 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
7522 nested_vmx_vmexit(vcpu); 7567 u32 exit_reason;
7523 vmcs12 = get_vmcs12(vcpu);
7524 7568
7525 if (fault->error_code & PFERR_RSVD_MASK) 7569 if (fault->error_code & PFERR_RSVD_MASK)
7526 vmcs12->vm_exit_reason = EXIT_REASON_EPT_MISCONFIG; 7570 exit_reason = EXIT_REASON_EPT_MISCONFIG;
7527 else 7571 else
7528 vmcs12->vm_exit_reason = EXIT_REASON_EPT_VIOLATION; 7572 exit_reason = EXIT_REASON_EPT_VIOLATION;
7529 vmcs12->exit_qualification = vcpu->arch.exit_qualification; 7573 nested_vmx_vmexit(vcpu, exit_reason, 0, vcpu->arch.exit_qualification);
7530 vmcs12->guest_physical_address = fault->address; 7574 vmcs12->guest_physical_address = fault->address;
7531} 7575}
7532 7576
@@ -7564,7 +7608,9 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
7564 7608
7565 /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */ 7609 /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */
7566 if (vmcs12->exception_bitmap & (1u << PF_VECTOR)) 7610 if (vmcs12->exception_bitmap & (1u << PF_VECTOR))
7567 nested_vmx_vmexit(vcpu); 7611 nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason,
7612 vmcs_read32(VM_EXIT_INTR_INFO),
7613 vmcs_readl(EXIT_QUALIFICATION));
7568 else 7614 else
7569 kvm_inject_page_fault(vcpu, fault); 7615 kvm_inject_page_fault(vcpu, fault);
7570} 7616}
@@ -7706,6 +7752,11 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7706 else 7752 else
7707 vmcs_write64(APIC_ACCESS_ADDR, 7753 vmcs_write64(APIC_ACCESS_ADDR,
7708 page_to_phys(vmx->nested.apic_access_page)); 7754 page_to_phys(vmx->nested.apic_access_page));
7755 } else if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) {
7756 exec_control |=
7757 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
7758 vmcs_write64(APIC_ACCESS_ADDR,
7759 page_to_phys(vcpu->kvm->arch.apic_access_page));
7709 } 7760 }
7710 7761
7711 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); 7762 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
@@ -7759,12 +7810,12 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7759 exit_control = vmcs_config.vmexit_ctrl; 7810 exit_control = vmcs_config.vmexit_ctrl;
7760 if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) 7811 if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
7761 exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; 7812 exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
7762 vmcs_write32(VM_EXIT_CONTROLS, exit_control); 7813 vm_exit_controls_init(vmx, exit_control);
7763 7814
7764 /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are 7815 /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
7765 * emulated by vmx_set_efer(), below. 7816 * emulated by vmx_set_efer(), below.
7766 */ 7817 */
7767 vmcs_write32(VM_ENTRY_CONTROLS, 7818 vm_entry_controls_init(vmx,
7768 (vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER & 7819 (vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER &
7769 ~VM_ENTRY_IA32E_MODE) | 7820 ~VM_ENTRY_IA32E_MODE) |
7770 (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); 7821 (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
@@ -7882,7 +7933,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
7882 return 1; 7933 return 1;
7883 } 7934 }
7884 7935
7885 if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE) { 7936 if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
7937 vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) {
7886 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); 7938 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
7887 return 1; 7939 return 1;
7888 } 7940 }
@@ -7994,8 +8046,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
7994 8046
7995 enter_guest_mode(vcpu); 8047 enter_guest_mode(vcpu);
7996 8048
7997 vmx->nested.nested_run_pending = 1;
7998
7999 vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); 8049 vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET);
8000 8050
8001 cpu = get_cpu(); 8051 cpu = get_cpu();
@@ -8011,6 +8061,11 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
8011 8061
8012 prepare_vmcs02(vcpu, vmcs12); 8062 prepare_vmcs02(vcpu, vmcs12);
8013 8063
8064 if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
8065 return kvm_emulate_halt(vcpu);
8066
8067 vmx->nested.nested_run_pending = 1;
8068
8014 /* 8069 /*
8015 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point 8070 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
8016 * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet 8071 * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
@@ -8110,7 +8165,9 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
8110 * exit-information fields only. Other fields are modified by L1 with VMWRITE, 8165 * exit-information fields only. Other fields are modified by L1 with VMWRITE,
8111 * which already writes to vmcs12 directly. 8166 * which already writes to vmcs12 directly.
8112 */ 8167 */
8113static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) 8168static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
8169 u32 exit_reason, u32 exit_intr_info,
8170 unsigned long exit_qualification)
8114{ 8171{
8115 /* update guest state fields: */ 8172 /* update guest state fields: */
8116 vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); 8173 vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
@@ -8162,6 +8219,10 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
8162 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); 8219 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
8163 vmcs12->guest_pending_dbg_exceptions = 8220 vmcs12->guest_pending_dbg_exceptions =
8164 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); 8221 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
8222 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
8223 vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT;
8224 else
8225 vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
8165 8226
8166 if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) && 8227 if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) &&
8167 (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) 8228 (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
@@ -8186,7 +8247,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
8186 8247
8187 vmcs12->vm_entry_controls = 8248 vmcs12->vm_entry_controls =
8188 (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | 8249 (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
8189 (vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE); 8250 (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE);
8190 8251
8191 /* TODO: These cannot have changed unless we have MSR bitmaps and 8252 /* TODO: These cannot have changed unless we have MSR bitmaps and
8192 * the relevant bit asks not to trap the change */ 8253 * the relevant bit asks not to trap the change */
@@ -8201,10 +8262,10 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
8201 8262
8202 /* update exit information fields: */ 8263 /* update exit information fields: */
8203 8264
8204 vmcs12->vm_exit_reason = to_vmx(vcpu)->exit_reason; 8265 vmcs12->vm_exit_reason = exit_reason;
8205 vmcs12->exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 8266 vmcs12->exit_qualification = exit_qualification;
8206 8267
8207 vmcs12->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); 8268 vmcs12->vm_exit_intr_info = exit_intr_info;
8208 if ((vmcs12->vm_exit_intr_info & 8269 if ((vmcs12->vm_exit_intr_info &
8209 (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) == 8270 (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) ==
8210 (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) 8271 (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK))
@@ -8370,7 +8431,9 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
8370 * and modify vmcs12 to make it see what it would expect to see there if 8431 * and modify vmcs12 to make it see what it would expect to see there if
8371 * L2 was its real guest. Must only be called when in L2 (is_guest_mode()) 8432 * L2 was its real guest. Must only be called when in L2 (is_guest_mode())
8372 */ 8433 */
8373static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) 8434static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
8435 u32 exit_intr_info,
8436 unsigned long exit_qualification)
8374{ 8437{
8375 struct vcpu_vmx *vmx = to_vmx(vcpu); 8438 struct vcpu_vmx *vmx = to_vmx(vcpu);
8376 int cpu; 8439 int cpu;
@@ -8380,7 +8443,15 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
8380 WARN_ON_ONCE(vmx->nested.nested_run_pending); 8443 WARN_ON_ONCE(vmx->nested.nested_run_pending);
8381 8444
8382 leave_guest_mode(vcpu); 8445 leave_guest_mode(vcpu);
8383 prepare_vmcs12(vcpu, vmcs12); 8446 prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
8447 exit_qualification);
8448
8449 trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
8450 vmcs12->exit_qualification,
8451 vmcs12->idt_vectoring_info_field,
8452 vmcs12->vm_exit_intr_info,
8453 vmcs12->vm_exit_intr_error_code,
8454 KVM_ISA_VMX);
8384 8455
8385 cpu = get_cpu(); 8456 cpu = get_cpu();
8386 vmx->loaded_vmcs = &vmx->vmcs01; 8457 vmx->loaded_vmcs = &vmx->vmcs01;
@@ -8389,6 +8460,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
8389 vcpu->cpu = cpu; 8460 vcpu->cpu = cpu;
8390 put_cpu(); 8461 put_cpu();
8391 8462
8463 vm_entry_controls_init(vmx, vmcs_read32(VM_ENTRY_CONTROLS));
8464 vm_exit_controls_init(vmx, vmcs_read32(VM_EXIT_CONTROLS));
8392 vmx_segment_cache_clear(vmx); 8465 vmx_segment_cache_clear(vmx);
8393 8466
8394 /* if no vmcs02 cache requested, remove the one we used */ 8467 /* if no vmcs02 cache requested, remove the one we used */
@@ -8424,6 +8497,16 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
8424} 8497}
8425 8498
8426/* 8499/*
8500 * Forcibly leave nested mode in order to be able to reset the VCPU later on.
8501 */
8502static void vmx_leave_nested(struct kvm_vcpu *vcpu)
8503{
8504 if (is_guest_mode(vcpu))
8505 nested_vmx_vmexit(vcpu, -1, 0, 0);
8506 free_nested(to_vmx(vcpu));
8507}
8508
8509/*
8427 * L1's failure to enter L2 is a subset of a normal exit, as explained in 8510 * L1's failure to enter L2 is a subset of a normal exit, as explained in
8428 * 23.7 "VM-entry failures during or after loading guest state" (this also 8511 * 23.7 "VM-entry failures during or after loading guest state" (this also
8429 * lists the acceptable exit-reason and exit-qualification parameters). 8512 * lists the acceptable exit-reason and exit-qualification parameters).
@@ -8486,6 +8569,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
8486 .set_idt = vmx_set_idt, 8569 .set_idt = vmx_set_idt,
8487 .get_gdt = vmx_get_gdt, 8570 .get_gdt = vmx_get_gdt,
8488 .set_gdt = vmx_set_gdt, 8571 .set_gdt = vmx_set_gdt,
8572 .get_dr6 = vmx_get_dr6,
8573 .set_dr6 = vmx_set_dr6,
8489 .set_dr7 = vmx_set_dr7, 8574 .set_dr7 = vmx_set_dr7,
8490 .cache_reg = vmx_cache_reg, 8575 .cache_reg = vmx_cache_reg,
8491 .get_rflags = vmx_get_rflags, 8576 .get_rflags = vmx_get_rflags,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5d004da1e35d..0c76f7cfdb32 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -94,6 +94,9 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops);
94static bool ignore_msrs = 0; 94static bool ignore_msrs = 0;
95module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); 95module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
96 96
97unsigned int min_timer_period_us = 500;
98module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
99
97bool kvm_has_tsc_control; 100bool kvm_has_tsc_control;
98EXPORT_SYMBOL_GPL(kvm_has_tsc_control); 101EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
99u32 kvm_max_guest_tsc_khz; 102u32 kvm_max_guest_tsc_khz;
@@ -719,6 +722,12 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
719} 722}
720EXPORT_SYMBOL_GPL(kvm_get_cr8); 723EXPORT_SYMBOL_GPL(kvm_get_cr8);
721 724
725static void kvm_update_dr6(struct kvm_vcpu *vcpu)
726{
727 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
728 kvm_x86_ops->set_dr6(vcpu, vcpu->arch.dr6);
729}
730
722static void kvm_update_dr7(struct kvm_vcpu *vcpu) 731static void kvm_update_dr7(struct kvm_vcpu *vcpu)
723{ 732{
724 unsigned long dr7; 733 unsigned long dr7;
@@ -747,6 +756,7 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
747 if (val & 0xffffffff00000000ULL) 756 if (val & 0xffffffff00000000ULL)
748 return -1; /* #GP */ 757 return -1; /* #GP */
749 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; 758 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
759 kvm_update_dr6(vcpu);
750 break; 760 break;
751 case 5: 761 case 5:
752 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) 762 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
@@ -788,7 +798,10 @@ static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
788 return 1; 798 return 1;
789 /* fall through */ 799 /* fall through */
790 case 6: 800 case 6:
791 *val = vcpu->arch.dr6; 801 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
802 *val = vcpu->arch.dr6;
803 else
804 *val = kvm_x86_ops->get_dr6(vcpu);
792 break; 805 break;
793 case 5: 806 case 5:
794 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) 807 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
@@ -836,11 +849,12 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
836 * kvm-specific. Those are put in the beginning of the list. 849 * kvm-specific. Those are put in the beginning of the list.
837 */ 850 */
838 851
839#define KVM_SAVE_MSRS_BEGIN 10 852#define KVM_SAVE_MSRS_BEGIN 12
840static u32 msrs_to_save[] = { 853static u32 msrs_to_save[] = {
841 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, 854 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
842 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, 855 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
843 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, 856 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
857 HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
844 HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, 858 HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
845 MSR_KVM_PV_EOI_EN, 859 MSR_KVM_PV_EOI_EN,
846 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, 860 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
@@ -1275,8 +1289,6 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
1275 kvm->arch.last_tsc_write = data; 1289 kvm->arch.last_tsc_write = data;
1276 kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz; 1290 kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
1277 1291
1278 /* Reset of TSC must disable overshoot protection below */
1279 vcpu->arch.hv_clock.tsc_timestamp = 0;
1280 vcpu->arch.last_guest_tsc = data; 1292 vcpu->arch.last_guest_tsc = data;
1281 1293
1282 /* Keep track of which generation this VCPU has synchronized to */ 1294 /* Keep track of which generation this VCPU has synchronized to */
@@ -1484,7 +1496,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1484 unsigned long flags, this_tsc_khz; 1496 unsigned long flags, this_tsc_khz;
1485 struct kvm_vcpu_arch *vcpu = &v->arch; 1497 struct kvm_vcpu_arch *vcpu = &v->arch;
1486 struct kvm_arch *ka = &v->kvm->arch; 1498 struct kvm_arch *ka = &v->kvm->arch;
1487 s64 kernel_ns, max_kernel_ns; 1499 s64 kernel_ns;
1488 u64 tsc_timestamp, host_tsc; 1500 u64 tsc_timestamp, host_tsc;
1489 struct pvclock_vcpu_time_info guest_hv_clock; 1501 struct pvclock_vcpu_time_info guest_hv_clock;
1490 u8 pvclock_flags; 1502 u8 pvclock_flags;
@@ -1543,37 +1555,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1543 if (!vcpu->pv_time_enabled) 1555 if (!vcpu->pv_time_enabled)
1544 return 0; 1556 return 0;
1545 1557
1546 /*
1547 * Time as measured by the TSC may go backwards when resetting the base
1548 * tsc_timestamp. The reason for this is that the TSC resolution is
1549 * higher than the resolution of the other clock scales. Thus, many
1550 * possible measurments of the TSC correspond to one measurement of any
1551 * other clock, and so a spread of values is possible. This is not a
1552 * problem for the computation of the nanosecond clock; with TSC rates
1553 * around 1GHZ, there can only be a few cycles which correspond to one
1554 * nanosecond value, and any path through this code will inevitably
1555 * take longer than that. However, with the kernel_ns value itself,
1556 * the precision may be much lower, down to HZ granularity. If the
1557 * first sampling of TSC against kernel_ns ends in the low part of the
1558 * range, and the second in the high end of the range, we can get:
1559 *
1560 * (TSC - offset_low) * S + kns_old > (TSC - offset_high) * S + kns_new
1561 *
1562 * As the sampling errors potentially range in the thousands of cycles,
1563 * it is possible such a time value has already been observed by the
1564 * guest. To protect against this, we must compute the system time as
1565 * observed by the guest and ensure the new system time is greater.
1566 */
1567 max_kernel_ns = 0;
1568 if (vcpu->hv_clock.tsc_timestamp) {
1569 max_kernel_ns = vcpu->last_guest_tsc -
1570 vcpu->hv_clock.tsc_timestamp;
1571 max_kernel_ns = pvclock_scale_delta(max_kernel_ns,
1572 vcpu->hv_clock.tsc_to_system_mul,
1573 vcpu->hv_clock.tsc_shift);
1574 max_kernel_ns += vcpu->last_kernel_ns;
1575 }
1576
1577 if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) { 1558 if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) {
1578 kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz, 1559 kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz,
1579 &vcpu->hv_clock.tsc_shift, 1560 &vcpu->hv_clock.tsc_shift,
@@ -1581,14 +1562,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1581 vcpu->hw_tsc_khz = this_tsc_khz; 1562 vcpu->hw_tsc_khz = this_tsc_khz;
1582 } 1563 }
1583 1564
1584 /* with a master <monotonic time, tsc value> tuple,
1585 * pvclock clock reads always increase at the (scaled) rate
1586 * of guest TSC - no need to deal with sampling errors.
1587 */
1588 if (!use_master_clock) {
1589 if (max_kernel_ns > kernel_ns)
1590 kernel_ns = max_kernel_ns;
1591 }
1592 /* With all the info we got, fill in the values */ 1565 /* With all the info we got, fill in the values */
1593 vcpu->hv_clock.tsc_timestamp = tsc_timestamp; 1566 vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
1594 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; 1567 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
@@ -1826,6 +1799,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
1826 switch (msr) { 1799 switch (msr) {
1827 case HV_X64_MSR_GUEST_OS_ID: 1800 case HV_X64_MSR_GUEST_OS_ID:
1828 case HV_X64_MSR_HYPERCALL: 1801 case HV_X64_MSR_HYPERCALL:
1802 case HV_X64_MSR_REFERENCE_TSC:
1803 case HV_X64_MSR_TIME_REF_COUNT:
1829 r = true; 1804 r = true;
1830 break; 1805 break;
1831 } 1806 }
@@ -1867,6 +1842,20 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1867 kvm->arch.hv_hypercall = data; 1842 kvm->arch.hv_hypercall = data;
1868 break; 1843 break;
1869 } 1844 }
1845 case HV_X64_MSR_REFERENCE_TSC: {
1846 u64 gfn;
1847 HV_REFERENCE_TSC_PAGE tsc_ref;
1848 memset(&tsc_ref, 0, sizeof(tsc_ref));
1849 kvm->arch.hv_tsc_page = data;
1850 if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
1851 break;
1852 gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
1853 if (kvm_write_guest(kvm, data,
1854 &tsc_ref, sizeof(tsc_ref)))
1855 return 1;
1856 mark_page_dirty(kvm, gfn);
1857 break;
1858 }
1870 default: 1859 default:
1871 vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x " 1860 vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
1872 "data 0x%llx\n", msr, data); 1861 "data 0x%llx\n", msr, data);
@@ -2291,6 +2280,14 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2291 case HV_X64_MSR_HYPERCALL: 2280 case HV_X64_MSR_HYPERCALL:
2292 data = kvm->arch.hv_hypercall; 2281 data = kvm->arch.hv_hypercall;
2293 break; 2282 break;
2283 case HV_X64_MSR_TIME_REF_COUNT: {
2284 data =
2285 div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
2286 break;
2287 }
2288 case HV_X64_MSR_REFERENCE_TSC:
2289 data = kvm->arch.hv_tsc_page;
2290 break;
2294 default: 2291 default:
2295 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); 2292 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
2296 return 1; 2293 return 1;
@@ -2604,6 +2601,7 @@ int kvm_dev_ioctl_check_extension(long ext)
2604#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT 2601#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
2605 case KVM_CAP_ASSIGN_DEV_IRQ: 2602 case KVM_CAP_ASSIGN_DEV_IRQ:
2606 case KVM_CAP_PCI_2_3: 2603 case KVM_CAP_PCI_2_3:
2604 case KVM_CAP_HYPERV_TIME:
2607#endif 2605#endif
2608 r = 1; 2606 r = 1;
2609 break; 2607 break;
@@ -2972,8 +2970,11 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
2972static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, 2970static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
2973 struct kvm_debugregs *dbgregs) 2971 struct kvm_debugregs *dbgregs)
2974{ 2972{
2973 unsigned long val;
2974
2975 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); 2975 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
2976 dbgregs->dr6 = vcpu->arch.dr6; 2976 _kvm_get_dr(vcpu, 6, &val);
2977 dbgregs->dr6 = val;
2977 dbgregs->dr7 = vcpu->arch.dr7; 2978 dbgregs->dr7 = vcpu->arch.dr7;
2978 dbgregs->flags = 0; 2979 dbgregs->flags = 0;
2979 memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved)); 2980 memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
@@ -2987,7 +2988,9 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
2987 2988
2988 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); 2989 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
2989 vcpu->arch.dr6 = dbgregs->dr6; 2990 vcpu->arch.dr6 = dbgregs->dr6;
2991 kvm_update_dr6(vcpu);
2990 vcpu->arch.dr7 = dbgregs->dr7; 2992 vcpu->arch.dr7 = dbgregs->dr7;
2993 kvm_update_dr7(vcpu);
2991 2994
2992 return 0; 2995 return 0;
2993} 2996}
@@ -5834,6 +5837,11 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
5834 kvm_apic_update_tmr(vcpu, tmr); 5837 kvm_apic_update_tmr(vcpu, tmr);
5835} 5838}
5836 5839
5840/*
5841 * Returns 1 to let __vcpu_run() continue the guest execution loop without
5842 * exiting to the userspace. Otherwise, the value will be returned to the
5843 * userspace.
5844 */
5837static int vcpu_enter_guest(struct kvm_vcpu *vcpu) 5845static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5838{ 5846{
5839 int r; 5847 int r;
@@ -6089,7 +6097,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
6089 } 6097 }
6090 if (need_resched()) { 6098 if (need_resched()) {
6091 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); 6099 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
6092 kvm_resched(vcpu); 6100 cond_resched();
6093 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); 6101 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
6094 } 6102 }
6095 } 6103 }
@@ -6717,6 +6725,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu)
6717 6725
6718 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); 6726 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
6719 vcpu->arch.dr6 = DR6_FIXED_1; 6727 vcpu->arch.dr6 = DR6_FIXED_1;
6728 kvm_update_dr6(vcpu);
6720 vcpu->arch.dr7 = DR7_FIXED_1; 6729 vcpu->arch.dr7 = DR7_FIXED_1;
6721 kvm_update_dr7(vcpu); 6730 kvm_update_dr7(vcpu);
6722 6731
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 587fb9ede436..8da5823bcde6 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -125,5 +125,7 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
125#define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) 125#define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM)
126extern u64 host_xcr0; 126extern u64 host_xcr0;
127 127
128extern unsigned int min_timer_period_us;
129
128extern struct static_key kvm_no_apic_vcpu; 130extern struct static_key kvm_no_apic_vcpu;
129#endif 131#endif
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 7e2d15837b02..be85127bfed3 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -144,7 +144,7 @@ struct kvm_run;
144struct kvm_exit_mmio; 144struct kvm_exit_mmio;
145 145
146#ifdef CONFIG_KVM_ARM_VGIC 146#ifdef CONFIG_KVM_ARM_VGIC
147int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr); 147int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
148int kvm_vgic_hyp_init(void); 148int kvm_vgic_hyp_init(void);
149int kvm_vgic_init(struct kvm *kvm); 149int kvm_vgic_init(struct kvm *kvm);
150int kvm_vgic_create(struct kvm *kvm); 150int kvm_vgic_create(struct kvm *kvm);
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index cac496b1e279..0ceb389dba6c 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -17,6 +17,9 @@
17#define GIC_CPU_EOI 0x10 17#define GIC_CPU_EOI 0x10
18#define GIC_CPU_RUNNINGPRI 0x14 18#define GIC_CPU_RUNNINGPRI 0x14
19#define GIC_CPU_HIGHPRI 0x18 19#define GIC_CPU_HIGHPRI 0x18
20#define GIC_CPU_ALIAS_BINPOINT 0x1c
21#define GIC_CPU_ACTIVEPRIO 0xd0
22#define GIC_CPU_IDENT 0xfc
20 23
21#define GIC_DIST_CTRL 0x000 24#define GIC_DIST_CTRL 0x000
22#define GIC_DIST_CTR 0x004 25#define GIC_DIST_CTR 0x004
@@ -56,6 +59,15 @@
56#define GICH_LR_ACTIVE_BIT (1 << 29) 59#define GICH_LR_ACTIVE_BIT (1 << 29)
57#define GICH_LR_EOI (1 << 19) 60#define GICH_LR_EOI (1 << 19)
58 61
62#define GICH_VMCR_CTRL_SHIFT 0
63#define GICH_VMCR_CTRL_MASK (0x21f << GICH_VMCR_CTRL_SHIFT)
64#define GICH_VMCR_PRIMASK_SHIFT 27
65#define GICH_VMCR_PRIMASK_MASK (0x1f << GICH_VMCR_PRIMASK_SHIFT)
66#define GICH_VMCR_BINPOINT_SHIFT 21
67#define GICH_VMCR_BINPOINT_MASK (0x7 << GICH_VMCR_BINPOINT_SHIFT)
68#define GICH_VMCR_ALIAS_BINPOINT_SHIFT 18
69#define GICH_VMCR_ALIAS_BINPOINT_MASK (0x7 << GICH_VMCR_ALIAS_BINPOINT_SHIFT)
70
59#define GICH_MISR_EOI (1 << 0) 71#define GICH_MISR_EOI (1 << 0)
60#define GICH_MISR_U (1 << 1) 72#define GICH_MISR_U (1 << 1)
61 73
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9523d2ad7535..b8e9a43e501a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -172,8 +172,6 @@ int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
172 int len, const void *val, long cookie); 172 int len, const void *val, long cookie);
173int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, 173int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len,
174 void *val); 174 void *val);
175int kvm_io_bus_read_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
176 int len, void *val, long cookie);
177int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, 175int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
178 int len, struct kvm_io_device *dev); 176 int len, struct kvm_io_device *dev);
179int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, 177int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
@@ -463,8 +461,6 @@ void kvm_exit(void);
463 461
464void kvm_get_kvm(struct kvm *kvm); 462void kvm_get_kvm(struct kvm *kvm);
465void kvm_put_kvm(struct kvm *kvm); 463void kvm_put_kvm(struct kvm *kvm);
466void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new,
467 u64 last_generation);
468 464
469static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm) 465static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
470{ 466{
@@ -537,7 +533,6 @@ unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable);
537unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn); 533unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
538void kvm_release_page_clean(struct page *page); 534void kvm_release_page_clean(struct page *page);
539void kvm_release_page_dirty(struct page *page); 535void kvm_release_page_dirty(struct page *page);
540void kvm_set_page_dirty(struct page *page);
541void kvm_set_page_accessed(struct page *page); 536void kvm_set_page_accessed(struct page *page);
542 537
543pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn); 538pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn);
@@ -549,7 +544,6 @@ pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
549pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn); 544pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
550pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn); 545pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn);
551 546
552void kvm_release_pfn_dirty(pfn_t pfn);
553void kvm_release_pfn_clean(pfn_t pfn); 547void kvm_release_pfn_clean(pfn_t pfn);
554void kvm_set_pfn_dirty(pfn_t pfn); 548void kvm_set_pfn_dirty(pfn_t pfn);
555void kvm_set_pfn_accessed(pfn_t pfn); 549void kvm_set_pfn_accessed(pfn_t pfn);
@@ -576,14 +570,11 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
576int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); 570int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
577unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn); 571unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn);
578void mark_page_dirty(struct kvm *kvm, gfn_t gfn); 572void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
579void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot,
580 gfn_t gfn);
581 573
582void kvm_vcpu_block(struct kvm_vcpu *vcpu); 574void kvm_vcpu_block(struct kvm_vcpu *vcpu);
583void kvm_vcpu_kick(struct kvm_vcpu *vcpu); 575void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
584bool kvm_vcpu_yield_to(struct kvm_vcpu *target); 576bool kvm_vcpu_yield_to(struct kvm_vcpu *target);
585void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu); 577void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
586void kvm_resched(struct kvm_vcpu *vcpu);
587void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); 578void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
588void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); 579void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
589 580
@@ -605,8 +596,6 @@ int kvm_get_dirty_log(struct kvm *kvm,
605int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 596int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
606 struct kvm_dirty_log *log); 597 struct kvm_dirty_log *log);
607 598
608int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
609 struct kvm_userspace_memory_region *mem);
610int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, 599int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
611 bool line_status); 600 bool line_status);
612long kvm_arch_vm_ioctl(struct file *filp, 601long kvm_arch_vm_ioctl(struct file *filp,
@@ -654,8 +643,6 @@ void kvm_arch_check_processor_compat(void *rtn);
654int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); 643int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
655int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); 644int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
656 645
657void kvm_free_physmem(struct kvm *kvm);
658
659void *kvm_kvzalloc(unsigned long size); 646void *kvm_kvzalloc(unsigned long size);
660void kvm_kvfree(const void *addr); 647void kvm_kvfree(const void *addr);
661 648
@@ -1076,6 +1063,7 @@ struct kvm_device *kvm_device_from_filp(struct file *filp);
1076extern struct kvm_device_ops kvm_mpic_ops; 1063extern struct kvm_device_ops kvm_mpic_ops;
1077extern struct kvm_device_ops kvm_xics_ops; 1064extern struct kvm_device_ops kvm_xics_ops;
1078extern struct kvm_device_ops kvm_vfio_ops; 1065extern struct kvm_device_ops kvm_vfio_ops;
1066extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
1079 1067
1080#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT 1068#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
1081 1069
@@ -1097,12 +1085,6 @@ static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
1097static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val) 1085static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val)
1098{ 1086{
1099} 1087}
1100
1101static inline bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
1102{
1103 return true;
1104}
1105
1106#endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */ 1088#endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */
1107#endif 1089#endif
1108 1090
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 902f12461873..932d7f2637d6 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -674,6 +674,7 @@ struct kvm_ppc_smmu_info {
674#define KVM_CAP_ARM_EL1_32BIT 93 674#define KVM_CAP_ARM_EL1_32BIT 93
675#define KVM_CAP_SPAPR_MULTITCE 94 675#define KVM_CAP_SPAPR_MULTITCE 94
676#define KVM_CAP_EXT_EMUL_CPUID 95 676#define KVM_CAP_EXT_EMUL_CPUID 95
677#define KVM_CAP_HYPERV_TIME 96
677 678
678#ifdef KVM_CAP_IRQ_ROUTING 679#ifdef KVM_CAP_IRQ_ROUTING
679 680
@@ -853,6 +854,7 @@ struct kvm_device_attr {
853#define KVM_DEV_VFIO_GROUP 1 854#define KVM_DEV_VFIO_GROUP 1
854#define KVM_DEV_VFIO_GROUP_ADD 1 855#define KVM_DEV_VFIO_GROUP_ADD 1
855#define KVM_DEV_VFIO_GROUP_DEL 2 856#define KVM_DEV_VFIO_GROUP_DEL 2
857#define KVM_DEV_TYPE_ARM_VGIC_V2 5
856 858
857/* 859/*
858 * ioctls for VM fds 860 * ioctls for VM fds
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index c2e1ef4604e8..5081e809821f 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -182,6 +182,40 @@ static void kvm_timer_init_interrupt(void *info)
182 enable_percpu_irq(host_vtimer_irq, 0); 182 enable_percpu_irq(host_vtimer_irq, 0);
183} 183}
184 184
185int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
186{
187 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
188
189 switch (regid) {
190 case KVM_REG_ARM_TIMER_CTL:
191 timer->cntv_ctl = value;
192 break;
193 case KVM_REG_ARM_TIMER_CNT:
194 vcpu->kvm->arch.timer.cntvoff = kvm_phys_timer_read() - value;
195 break;
196 case KVM_REG_ARM_TIMER_CVAL:
197 timer->cntv_cval = value;
198 break;
199 default:
200 return -1;
201 }
202 return 0;
203}
204
205u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
206{
207 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
208
209 switch (regid) {
210 case KVM_REG_ARM_TIMER_CTL:
211 return timer->cntv_ctl;
212 case KVM_REG_ARM_TIMER_CNT:
213 return kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
214 case KVM_REG_ARM_TIMER_CVAL:
215 return timer->cntv_cval;
216 }
217 return (u64)-1;
218}
185 219
186static int kvm_timer_cpu_notify(struct notifier_block *self, 220static int kvm_timer_cpu_notify(struct notifier_block *self,
187 unsigned long action, void *cpu) 221 unsigned long action, void *cpu)
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 685fc72fc751..be456ce264d0 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -71,6 +71,10 @@
71#define VGIC_ADDR_UNDEF (-1) 71#define VGIC_ADDR_UNDEF (-1)
72#define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF) 72#define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF)
73 73
74#define PRODUCT_ID_KVM 0x4b /* ASCII code K */
75#define IMPLEMENTER_ARM 0x43b
76#define GICC_ARCH_VERSION_V2 0x2
77
74/* Physical address of vgic virtual cpu interface */ 78/* Physical address of vgic virtual cpu interface */
75static phys_addr_t vgic_vcpu_base; 79static phys_addr_t vgic_vcpu_base;
76 80
@@ -312,7 +316,7 @@ static bool handle_mmio_misc(struct kvm_vcpu *vcpu,
312 u32 word_offset = offset & 3; 316 u32 word_offset = offset & 3;
313 317
314 switch (offset & ~3) { 318 switch (offset & ~3) {
315 case 0: /* CTLR */ 319 case 0: /* GICD_CTLR */
316 reg = vcpu->kvm->arch.vgic.enabled; 320 reg = vcpu->kvm->arch.vgic.enabled;
317 vgic_reg_access(mmio, &reg, word_offset, 321 vgic_reg_access(mmio, &reg, word_offset,
318 ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); 322 ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
@@ -323,15 +327,15 @@ static bool handle_mmio_misc(struct kvm_vcpu *vcpu,
323 } 327 }
324 break; 328 break;
325 329
326 case 4: /* TYPER */ 330 case 4: /* GICD_TYPER */
327 reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5; 331 reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
328 reg |= (VGIC_NR_IRQS >> 5) - 1; 332 reg |= (VGIC_NR_IRQS >> 5) - 1;
329 vgic_reg_access(mmio, &reg, word_offset, 333 vgic_reg_access(mmio, &reg, word_offset,
330 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); 334 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
331 break; 335 break;
332 336
333 case 8: /* IIDR */ 337 case 8: /* GICD_IIDR */
334 reg = 0x4B00043B; 338 reg = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
335 vgic_reg_access(mmio, &reg, word_offset, 339 vgic_reg_access(mmio, &reg, word_offset,
336 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); 340 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
337 break; 341 break;
@@ -589,6 +593,156 @@ static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu,
589 return false; 593 return false;
590} 594}
591 595
596#define LR_CPUID(lr) \
597 (((lr) & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT)
598#define LR_IRQID(lr) \
599 ((lr) & GICH_LR_VIRTUALID)
600
601static void vgic_retire_lr(int lr_nr, int irq, struct vgic_cpu *vgic_cpu)
602{
603 clear_bit(lr_nr, vgic_cpu->lr_used);
604 vgic_cpu->vgic_lr[lr_nr] &= ~GICH_LR_STATE;
605 vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
606}
607
608/**
609 * vgic_unqueue_irqs - move pending IRQs from LRs to the distributor
610 * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs
611 *
612 * Move any pending IRQs that have already been assigned to LRs back to the
613 * emulated distributor state so that the complete emulated state can be read
614 * from the main emulation structures without investigating the LRs.
615 *
616 * Note that IRQs in the active state in the LRs get their pending state moved
617 * to the distributor but the active state stays in the LRs, because we don't
618 * track the active state on the distributor side.
619 */
620static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
621{
622 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
623 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
624 int vcpu_id = vcpu->vcpu_id;
625 int i, irq, source_cpu;
626 u32 *lr;
627
628 for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
629 lr = &vgic_cpu->vgic_lr[i];
630 irq = LR_IRQID(*lr);
631 source_cpu = LR_CPUID(*lr);
632
633 /*
634 * There are three options for the state bits:
635 *
636 * 01: pending
637 * 10: active
638 * 11: pending and active
639 *
640 * If the LR holds only an active interrupt (not pending) then
641 * just leave it alone.
642 */
643 if ((*lr & GICH_LR_STATE) == GICH_LR_ACTIVE_BIT)
644 continue;
645
646 /*
647 * Reestablish the pending state on the distributor and the
648 * CPU interface. It may have already been pending, but that
649 * is fine, then we are only setting a few bits that were
650 * already set.
651 */
652 vgic_dist_irq_set(vcpu, irq);
653 if (irq < VGIC_NR_SGIS)
654 dist->irq_sgi_sources[vcpu_id][irq] |= 1 << source_cpu;
655 *lr &= ~GICH_LR_PENDING_BIT;
656
657 /*
658 * If there's no state left on the LR (it could still be
659 * active), then the LR does not hold any useful info and can
660 * be marked as free for other use.
661 */
662 if (!(*lr & GICH_LR_STATE))
663 vgic_retire_lr(i, irq, vgic_cpu);
664
665 /* Finally update the VGIC state. */
666 vgic_update_state(vcpu->kvm);
667 }
668}
669
670/* Handle reads of GICD_CPENDSGIRn and GICD_SPENDSGIRn */
671static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
672 struct kvm_exit_mmio *mmio,
673 phys_addr_t offset)
674{
675 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
676 int sgi;
677 int min_sgi = (offset & ~0x3) * 4;
678 int max_sgi = min_sgi + 3;
679 int vcpu_id = vcpu->vcpu_id;
680 u32 reg = 0;
681
682 /* Copy source SGIs from distributor side */
683 for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
684 int shift = 8 * (sgi - min_sgi);
685 reg |= (u32)dist->irq_sgi_sources[vcpu_id][sgi] << shift;
686 }
687
688 mmio_data_write(mmio, ~0, reg);
689 return false;
690}
691
692static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
693 struct kvm_exit_mmio *mmio,
694 phys_addr_t offset, bool set)
695{
696 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
697 int sgi;
698 int min_sgi = (offset & ~0x3) * 4;
699 int max_sgi = min_sgi + 3;
700 int vcpu_id = vcpu->vcpu_id;
701 u32 reg;
702 bool updated = false;
703
704 reg = mmio_data_read(mmio, ~0);
705
706 /* Clear pending SGIs on the distributor */
707 for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
708 u8 mask = reg >> (8 * (sgi - min_sgi));
709 if (set) {
710 if ((dist->irq_sgi_sources[vcpu_id][sgi] & mask) != mask)
711 updated = true;
712 dist->irq_sgi_sources[vcpu_id][sgi] |= mask;
713 } else {
714 if (dist->irq_sgi_sources[vcpu_id][sgi] & mask)
715 updated = true;
716 dist->irq_sgi_sources[vcpu_id][sgi] &= ~mask;
717 }
718 }
719
720 if (updated)
721 vgic_update_state(vcpu->kvm);
722
723 return updated;
724}
725
726static bool handle_mmio_sgi_set(struct kvm_vcpu *vcpu,
727 struct kvm_exit_mmio *mmio,
728 phys_addr_t offset)
729{
730 if (!mmio->is_write)
731 return read_set_clear_sgi_pend_reg(vcpu, mmio, offset);
732 else
733 return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, true);
734}
735
736static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu,
737 struct kvm_exit_mmio *mmio,
738 phys_addr_t offset)
739{
740 if (!mmio->is_write)
741 return read_set_clear_sgi_pend_reg(vcpu, mmio, offset);
742 else
743 return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, false);
744}
745
592/* 746/*
593 * I would have liked to use the kvm_bus_io_*() API instead, but it 747 * I would have liked to use the kvm_bus_io_*() API instead, but it
594 * cannot cope with banked registers (only the VM pointer is passed 748 * cannot cope with banked registers (only the VM pointer is passed
@@ -602,7 +756,7 @@ struct mmio_range {
602 phys_addr_t offset); 756 phys_addr_t offset);
603}; 757};
604 758
605static const struct mmio_range vgic_ranges[] = { 759static const struct mmio_range vgic_dist_ranges[] = {
606 { 760 {
607 .base = GIC_DIST_CTRL, 761 .base = GIC_DIST_CTRL,
608 .len = 12, 762 .len = 12,
@@ -663,20 +817,29 @@ static const struct mmio_range vgic_ranges[] = {
663 .len = 4, 817 .len = 4,
664 .handle_mmio = handle_mmio_sgi_reg, 818 .handle_mmio = handle_mmio_sgi_reg,
665 }, 819 },
820 {
821 .base = GIC_DIST_SGI_PENDING_CLEAR,
822 .len = VGIC_NR_SGIS,
823 .handle_mmio = handle_mmio_sgi_clear,
824 },
825 {
826 .base = GIC_DIST_SGI_PENDING_SET,
827 .len = VGIC_NR_SGIS,
828 .handle_mmio = handle_mmio_sgi_set,
829 },
666 {} 830 {}
667}; 831};
668 832
669static const 833static const
670struct mmio_range *find_matching_range(const struct mmio_range *ranges, 834struct mmio_range *find_matching_range(const struct mmio_range *ranges,
671 struct kvm_exit_mmio *mmio, 835 struct kvm_exit_mmio *mmio,
672 phys_addr_t base) 836 phys_addr_t offset)
673{ 837{
674 const struct mmio_range *r = ranges; 838 const struct mmio_range *r = ranges;
675 phys_addr_t addr = mmio->phys_addr - base;
676 839
677 while (r->len) { 840 while (r->len) {
678 if (addr >= r->base && 841 if (offset >= r->base &&
679 (addr + mmio->len) <= (r->base + r->len)) 842 (offset + mmio->len) <= (r->base + r->len))
680 return r; 843 return r;
681 r++; 844 r++;
682 } 845 }
@@ -713,7 +876,8 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
713 return true; 876 return true;
714 } 877 }
715 878
716 range = find_matching_range(vgic_ranges, mmio, base); 879 offset = mmio->phys_addr - base;
880 range = find_matching_range(vgic_dist_ranges, mmio, offset);
717 if (unlikely(!range || !range->handle_mmio)) { 881 if (unlikely(!range || !range->handle_mmio)) {
718 pr_warn("Unhandled access %d %08llx %d\n", 882 pr_warn("Unhandled access %d %08llx %d\n",
719 mmio->is_write, mmio->phys_addr, mmio->len); 883 mmio->is_write, mmio->phys_addr, mmio->len);
@@ -824,8 +988,6 @@ static void vgic_update_state(struct kvm *kvm)
824 } 988 }
825} 989}
826 990
827#define LR_CPUID(lr) \
828 (((lr) & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT)
829#define MK_LR_PEND(src, irq) \ 991#define MK_LR_PEND(src, irq) \
830 (GICH_LR_PENDING_BIT | ((src) << GICH_LR_PHYSID_CPUID_SHIFT) | (irq)) 992 (GICH_LR_PENDING_BIT | ((src) << GICH_LR_PHYSID_CPUID_SHIFT) | (irq))
831 993
@@ -847,9 +1009,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
847 int irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID; 1009 int irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID;
848 1010
849 if (!vgic_irq_is_enabled(vcpu, irq)) { 1011 if (!vgic_irq_is_enabled(vcpu, irq)) {
850 vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; 1012 vgic_retire_lr(lr, irq, vgic_cpu);
851 clear_bit(lr, vgic_cpu->lr_used);
852 vgic_cpu->vgic_lr[lr] &= ~GICH_LR_STATE;
853 if (vgic_irq_is_active(vcpu, irq)) 1013 if (vgic_irq_is_active(vcpu, irq))
854 vgic_irq_clear_active(vcpu, irq); 1014 vgic_irq_clear_active(vcpu, irq);
855 } 1015 }
@@ -1243,15 +1403,19 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
1243 return IRQ_HANDLED; 1403 return IRQ_HANDLED;
1244} 1404}
1245 1405
1406/**
1407 * kvm_vgic_vcpu_init - Initialize per-vcpu VGIC state
1408 * @vcpu: pointer to the vcpu struct
1409 *
1410 * Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to
1411 * this vcpu and enable the VGIC for this VCPU
1412 */
1246int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) 1413int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
1247{ 1414{
1248 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 1415 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1249 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 1416 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1250 int i; 1417 int i;
1251 1418
1252 if (!irqchip_in_kernel(vcpu->kvm))
1253 return 0;
1254
1255 if (vcpu->vcpu_id >= VGIC_MAX_CPUS) 1419 if (vcpu->vcpu_id >= VGIC_MAX_CPUS)
1256 return -EBUSY; 1420 return -EBUSY;
1257 1421
@@ -1383,10 +1547,22 @@ out:
1383 return ret; 1547 return ret;
1384} 1548}
1385 1549
1550/**
1551 * kvm_vgic_init - Initialize global VGIC state before running any VCPUs
1552 * @kvm: pointer to the kvm struct
1553 *
1554 * Map the virtual CPU interface into the VM before running any VCPUs. We
1555 * can't do this at creation time, because user space must first set the
1556 * virtual CPU interface address in the guest physical address space. Also
1557 * initialize the ITARGETSRn regs to 0 on the emulated distributor.
1558 */
1386int kvm_vgic_init(struct kvm *kvm) 1559int kvm_vgic_init(struct kvm *kvm)
1387{ 1560{
1388 int ret = 0, i; 1561 int ret = 0, i;
1389 1562
1563 if (!irqchip_in_kernel(kvm))
1564 return 0;
1565
1390 mutex_lock(&kvm->lock); 1566 mutex_lock(&kvm->lock);
1391 1567
1392 if (vgic_initialized(kvm)) 1568 if (vgic_initialized(kvm))
@@ -1409,7 +1585,6 @@ int kvm_vgic_init(struct kvm *kvm)
1409 for (i = VGIC_NR_PRIVATE_IRQS; i < VGIC_NR_IRQS; i += 4) 1585 for (i = VGIC_NR_PRIVATE_IRQS; i < VGIC_NR_IRQS; i += 4)
1410 vgic_set_target_reg(kvm, 0, i); 1586 vgic_set_target_reg(kvm, 0, i);
1411 1587
1412 kvm_timer_init(kvm);
1413 kvm->arch.vgic.ready = true; 1588 kvm->arch.vgic.ready = true;
1414out: 1589out:
1415 mutex_unlock(&kvm->lock); 1590 mutex_unlock(&kvm->lock);
@@ -1418,20 +1593,45 @@ out:
1418 1593
1419int kvm_vgic_create(struct kvm *kvm) 1594int kvm_vgic_create(struct kvm *kvm)
1420{ 1595{
1421 int ret = 0; 1596 int i, vcpu_lock_idx = -1, ret = 0;
1597 struct kvm_vcpu *vcpu;
1422 1598
1423 mutex_lock(&kvm->lock); 1599 mutex_lock(&kvm->lock);
1424 1600
1425 if (atomic_read(&kvm->online_vcpus) || kvm->arch.vgic.vctrl_base) { 1601 if (kvm->arch.vgic.vctrl_base) {
1426 ret = -EEXIST; 1602 ret = -EEXIST;
1427 goto out; 1603 goto out;
1428 } 1604 }
1429 1605
1606 /*
1607 * Any time a vcpu is run, vcpu_load is called which tries to grab the
1608 * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure
1609 * that no other VCPUs are run while we create the vgic.
1610 */
1611 kvm_for_each_vcpu(i, vcpu, kvm) {
1612 if (!mutex_trylock(&vcpu->mutex))
1613 goto out_unlock;
1614 vcpu_lock_idx = i;
1615 }
1616
1617 kvm_for_each_vcpu(i, vcpu, kvm) {
1618 if (vcpu->arch.has_run_once) {
1619 ret = -EBUSY;
1620 goto out_unlock;
1621 }
1622 }
1623
1430 spin_lock_init(&kvm->arch.vgic.lock); 1624 spin_lock_init(&kvm->arch.vgic.lock);
1431 kvm->arch.vgic.vctrl_base = vgic_vctrl_base; 1625 kvm->arch.vgic.vctrl_base = vgic_vctrl_base;
1432 kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; 1626 kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
1433 kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; 1627 kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
1434 1628
1629out_unlock:
1630 for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
1631 vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
1632 mutex_unlock(&vcpu->mutex);
1633 }
1634
1435out: 1635out:
1436 mutex_unlock(&kvm->lock); 1636 mutex_unlock(&kvm->lock);
1437 return ret; 1637 return ret;
@@ -1455,6 +1655,12 @@ static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr,
1455{ 1655{
1456 int ret; 1656 int ret;
1457 1657
1658 if (addr & ~KVM_PHYS_MASK)
1659 return -E2BIG;
1660
1661 if (addr & (SZ_4K - 1))
1662 return -EINVAL;
1663
1458 if (!IS_VGIC_ADDR_UNDEF(*ioaddr)) 1664 if (!IS_VGIC_ADDR_UNDEF(*ioaddr))
1459 return -EEXIST; 1665 return -EEXIST;
1460 if (addr + size < addr) 1666 if (addr + size < addr)
@@ -1467,26 +1673,41 @@ static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr,
1467 return ret; 1673 return ret;
1468} 1674}
1469 1675
1470int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr) 1676/**
1677 * kvm_vgic_addr - set or get vgic VM base addresses
1678 * @kvm: pointer to the vm struct
1679 * @type: the VGIC addr type, one of KVM_VGIC_V2_ADDR_TYPE_XXX
1680 * @addr: pointer to address value
1681 * @write: if true set the address in the VM address space, if false read the
1682 * address
1683 *
1684 * Set or get the vgic base addresses for the distributor and the virtual CPU
1685 * interface in the VM physical address space. These addresses are properties
1686 * of the emulated core/SoC and therefore user space initially knows this
1687 * information.
1688 */
1689int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
1471{ 1690{
1472 int r = 0; 1691 int r = 0;
1473 struct vgic_dist *vgic = &kvm->arch.vgic; 1692 struct vgic_dist *vgic = &kvm->arch.vgic;
1474 1693
1475 if (addr & ~KVM_PHYS_MASK)
1476 return -E2BIG;
1477
1478 if (addr & (SZ_4K - 1))
1479 return -EINVAL;
1480
1481 mutex_lock(&kvm->lock); 1694 mutex_lock(&kvm->lock);
1482 switch (type) { 1695 switch (type) {
1483 case KVM_VGIC_V2_ADDR_TYPE_DIST: 1696 case KVM_VGIC_V2_ADDR_TYPE_DIST:
1484 r = vgic_ioaddr_assign(kvm, &vgic->vgic_dist_base, 1697 if (write) {
1485 addr, KVM_VGIC_V2_DIST_SIZE); 1698 r = vgic_ioaddr_assign(kvm, &vgic->vgic_dist_base,
1699 *addr, KVM_VGIC_V2_DIST_SIZE);
1700 } else {
1701 *addr = vgic->vgic_dist_base;
1702 }
1486 break; 1703 break;
1487 case KVM_VGIC_V2_ADDR_TYPE_CPU: 1704 case KVM_VGIC_V2_ADDR_TYPE_CPU:
1488 r = vgic_ioaddr_assign(kvm, &vgic->vgic_cpu_base, 1705 if (write) {
1489 addr, KVM_VGIC_V2_CPU_SIZE); 1706 r = vgic_ioaddr_assign(kvm, &vgic->vgic_cpu_base,
1707 *addr, KVM_VGIC_V2_CPU_SIZE);
1708 } else {
1709 *addr = vgic->vgic_cpu_base;
1710 }
1490 break; 1711 break;
1491 default: 1712 default:
1492 r = -ENODEV; 1713 r = -ENODEV;
@@ -1495,3 +1716,302 @@ int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr)
1495 mutex_unlock(&kvm->lock); 1716 mutex_unlock(&kvm->lock);
1496 return r; 1717 return r;
1497} 1718}
1719
1720static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu,
1721 struct kvm_exit_mmio *mmio, phys_addr_t offset)
1722{
1723 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1724 u32 reg, mask = 0, shift = 0;
1725 bool updated = false;
1726
1727 switch (offset & ~0x3) {
1728 case GIC_CPU_CTRL:
1729 mask = GICH_VMCR_CTRL_MASK;
1730 shift = GICH_VMCR_CTRL_SHIFT;
1731 break;
1732 case GIC_CPU_PRIMASK:
1733 mask = GICH_VMCR_PRIMASK_MASK;
1734 shift = GICH_VMCR_PRIMASK_SHIFT;
1735 break;
1736 case GIC_CPU_BINPOINT:
1737 mask = GICH_VMCR_BINPOINT_MASK;
1738 shift = GICH_VMCR_BINPOINT_SHIFT;
1739 break;
1740 case GIC_CPU_ALIAS_BINPOINT:
1741 mask = GICH_VMCR_ALIAS_BINPOINT_MASK;
1742 shift = GICH_VMCR_ALIAS_BINPOINT_SHIFT;
1743 break;
1744 }
1745
1746 if (!mmio->is_write) {
1747 reg = (vgic_cpu->vgic_vmcr & mask) >> shift;
1748 mmio_data_write(mmio, ~0, reg);
1749 } else {
1750 reg = mmio_data_read(mmio, ~0);
1751 reg = (reg << shift) & mask;
1752 if (reg != (vgic_cpu->vgic_vmcr & mask))
1753 updated = true;
1754 vgic_cpu->vgic_vmcr &= ~mask;
1755 vgic_cpu->vgic_vmcr |= reg;
1756 }
1757 return updated;
1758}
1759
1760static bool handle_mmio_abpr(struct kvm_vcpu *vcpu,
1761 struct kvm_exit_mmio *mmio, phys_addr_t offset)
1762{
1763 return handle_cpu_mmio_misc(vcpu, mmio, GIC_CPU_ALIAS_BINPOINT);
1764}
1765
1766static bool handle_cpu_mmio_ident(struct kvm_vcpu *vcpu,
1767 struct kvm_exit_mmio *mmio,
1768 phys_addr_t offset)
1769{
1770 u32 reg;
1771
1772 if (mmio->is_write)
1773 return false;
1774
1775 /* GICC_IIDR */
1776 reg = (PRODUCT_ID_KVM << 20) |
1777 (GICC_ARCH_VERSION_V2 << 16) |
1778 (IMPLEMENTER_ARM << 0);
1779 mmio_data_write(mmio, ~0, reg);
1780 return false;
1781}
1782
1783/*
1784 * CPU Interface Register accesses - these are not accessed by the VM, but by
1785 * user space for saving and restoring VGIC state.
1786 */
1787static const struct mmio_range vgic_cpu_ranges[] = {
1788 {
1789 .base = GIC_CPU_CTRL,
1790 .len = 12,
1791 .handle_mmio = handle_cpu_mmio_misc,
1792 },
1793 {
1794 .base = GIC_CPU_ALIAS_BINPOINT,
1795 .len = 4,
1796 .handle_mmio = handle_mmio_abpr,
1797 },
1798 {
1799 .base = GIC_CPU_ACTIVEPRIO,
1800 .len = 16,
1801 .handle_mmio = handle_mmio_raz_wi,
1802 },
1803 {
1804 .base = GIC_CPU_IDENT,
1805 .len = 4,
1806 .handle_mmio = handle_cpu_mmio_ident,
1807 },
1808};
1809
1810static int vgic_attr_regs_access(struct kvm_device *dev,
1811 struct kvm_device_attr *attr,
1812 u32 *reg, bool is_write)
1813{
1814 const struct mmio_range *r = NULL, *ranges;
1815 phys_addr_t offset;
1816 int ret, cpuid, c;
1817 struct kvm_vcpu *vcpu, *tmp_vcpu;
1818 struct vgic_dist *vgic;
1819 struct kvm_exit_mmio mmio;
1820
1821 offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
1822 cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >>
1823 KVM_DEV_ARM_VGIC_CPUID_SHIFT;
1824
1825 mutex_lock(&dev->kvm->lock);
1826
1827 if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) {
1828 ret = -EINVAL;
1829 goto out;
1830 }
1831
1832 vcpu = kvm_get_vcpu(dev->kvm, cpuid);
1833 vgic = &dev->kvm->arch.vgic;
1834
1835 mmio.len = 4;
1836 mmio.is_write = is_write;
1837 if (is_write)
1838 mmio_data_write(&mmio, ~0, *reg);
1839 switch (attr->group) {
1840 case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
1841 mmio.phys_addr = vgic->vgic_dist_base + offset;
1842 ranges = vgic_dist_ranges;
1843 break;
1844 case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
1845 mmio.phys_addr = vgic->vgic_cpu_base + offset;
1846 ranges = vgic_cpu_ranges;
1847 break;
1848 default:
1849 BUG();
1850 }
1851 r = find_matching_range(ranges, &mmio, offset);
1852
1853 if (unlikely(!r || !r->handle_mmio)) {
1854 ret = -ENXIO;
1855 goto out;
1856 }
1857
1858
1859 spin_lock(&vgic->lock);
1860
1861 /*
1862 * Ensure that no other VCPU is running by checking the vcpu->cpu
1863 * field. If no other VPCUs are running we can safely access the VGIC
1864 * state, because even if another VPU is run after this point, that
1865 * VCPU will not touch the vgic state, because it will block on
1866 * getting the vgic->lock in kvm_vgic_sync_hwstate().
1867 */
1868 kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) {
1869 if (unlikely(tmp_vcpu->cpu != -1)) {
1870 ret = -EBUSY;
1871 goto out_vgic_unlock;
1872 }
1873 }
1874
1875 /*
1876 * Move all pending IRQs from the LRs on all VCPUs so the pending
1877 * state can be properly represented in the register state accessible
1878 * through this API.
1879 */
1880 kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm)
1881 vgic_unqueue_irqs(tmp_vcpu);
1882
1883 offset -= r->base;
1884 r->handle_mmio(vcpu, &mmio, offset);
1885
1886 if (!is_write)
1887 *reg = mmio_data_read(&mmio, ~0);
1888
1889 ret = 0;
1890out_vgic_unlock:
1891 spin_unlock(&vgic->lock);
1892out:
1893 mutex_unlock(&dev->kvm->lock);
1894 return ret;
1895}
1896
1897static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1898{
1899 int r;
1900
1901 switch (attr->group) {
1902 case KVM_DEV_ARM_VGIC_GRP_ADDR: {
1903 u64 __user *uaddr = (u64 __user *)(long)attr->addr;
1904 u64 addr;
1905 unsigned long type = (unsigned long)attr->attr;
1906
1907 if (copy_from_user(&addr, uaddr, sizeof(addr)))
1908 return -EFAULT;
1909
1910 r = kvm_vgic_addr(dev->kvm, type, &addr, true);
1911 return (r == -ENODEV) ? -ENXIO : r;
1912 }
1913
1914 case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
1915 case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
1916 u32 __user *uaddr = (u32 __user *)(long)attr->addr;
1917 u32 reg;
1918
1919 if (get_user(reg, uaddr))
1920 return -EFAULT;
1921
1922 return vgic_attr_regs_access(dev, attr, &reg, true);
1923 }
1924
1925 }
1926
1927 return -ENXIO;
1928}
1929
1930static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1931{
1932 int r = -ENXIO;
1933
1934 switch (attr->group) {
1935 case KVM_DEV_ARM_VGIC_GRP_ADDR: {
1936 u64 __user *uaddr = (u64 __user *)(long)attr->addr;
1937 u64 addr;
1938 unsigned long type = (unsigned long)attr->attr;
1939
1940 r = kvm_vgic_addr(dev->kvm, type, &addr, false);
1941 if (r)
1942 return (r == -ENODEV) ? -ENXIO : r;
1943
1944 if (copy_to_user(uaddr, &addr, sizeof(addr)))
1945 return -EFAULT;
1946 break;
1947 }
1948
1949 case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
1950 case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
1951 u32 __user *uaddr = (u32 __user *)(long)attr->addr;
1952 u32 reg = 0;
1953
1954 r = vgic_attr_regs_access(dev, attr, &reg, false);
1955 if (r)
1956 return r;
1957 r = put_user(reg, uaddr);
1958 break;
1959 }
1960
1961 }
1962
1963 return r;
1964}
1965
1966static int vgic_has_attr_regs(const struct mmio_range *ranges,
1967 phys_addr_t offset)
1968{
1969 struct kvm_exit_mmio dev_attr_mmio;
1970
1971 dev_attr_mmio.len = 4;
1972 if (find_matching_range(ranges, &dev_attr_mmio, offset))
1973 return 0;
1974 else
1975 return -ENXIO;
1976}
1977
1978static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1979{
1980 phys_addr_t offset;
1981
1982 switch (attr->group) {
1983 case KVM_DEV_ARM_VGIC_GRP_ADDR:
1984 switch (attr->attr) {
1985 case KVM_VGIC_V2_ADDR_TYPE_DIST:
1986 case KVM_VGIC_V2_ADDR_TYPE_CPU:
1987 return 0;
1988 }
1989 break;
1990 case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
1991 offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
1992 return vgic_has_attr_regs(vgic_dist_ranges, offset);
1993 case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
1994 offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
1995 return vgic_has_attr_regs(vgic_cpu_ranges, offset);
1996 }
1997 return -ENXIO;
1998}
1999
2000static void vgic_destroy(struct kvm_device *dev)
2001{
2002 kfree(dev);
2003}
2004
2005static int vgic_create(struct kvm_device *dev, u32 type)
2006{
2007 return kvm_vgic_create(dev->kvm);
2008}
2009
2010struct kvm_device_ops kvm_arm_vgic_v2_ops = {
2011 .name = "kvm-arm-vgic",
2012 .create = vgic_create,
2013 .destroy = vgic_destroy,
2014 .set_attr = vgic_set_attr,
2015 .get_attr = vgic_get_attr,
2016 .has_attr = vgic_has_attr,
2017};
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 2d682977ce82..ce9ed99ad7dc 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -520,7 +520,7 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
520 return 0; 520 return 0;
521} 521}
522 522
523void kvm_ioapic_reset(struct kvm_ioapic *ioapic) 523static void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
524{ 524{
525 int i; 525 int i;
526 526
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 615d8c995c3c..90d43e95dcf8 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -91,7 +91,6 @@ void kvm_ioapic_destroy(struct kvm *kvm);
91int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, 91int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
92 int level, bool line_status); 92 int level, bool line_status);
93void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id); 93void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id);
94void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
95int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, 94int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
96 struct kvm_lapic_irq *irq, unsigned long *dest_map); 95 struct kvm_lapic_irq *irq, unsigned long *dest_map);
97int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); 96int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4f588bc94186..03a0381b1cb7 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -95,6 +95,12 @@ static int hardware_enable_all(void);
95static void hardware_disable_all(void); 95static void hardware_disable_all(void);
96 96
97static void kvm_io_bus_destroy(struct kvm_io_bus *bus); 97static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
98static void update_memslots(struct kvm_memslots *slots,
99 struct kvm_memory_slot *new, u64 last_generation);
100
101static void kvm_release_pfn_dirty(pfn_t pfn);
102static void mark_page_dirty_in_slot(struct kvm *kvm,
103 struct kvm_memory_slot *memslot, gfn_t gfn);
98 104
99bool kvm_rebooting; 105bool kvm_rebooting;
100EXPORT_SYMBOL_GPL(kvm_rebooting); 106EXPORT_SYMBOL_GPL(kvm_rebooting);
@@ -553,7 +559,7 @@ static void kvm_free_physmem_slot(struct kvm *kvm, struct kvm_memory_slot *free,
553 free->npages = 0; 559 free->npages = 0;
554} 560}
555 561
556void kvm_free_physmem(struct kvm *kvm) 562static void kvm_free_physmem(struct kvm *kvm)
557{ 563{
558 struct kvm_memslots *slots = kvm->memslots; 564 struct kvm_memslots *slots = kvm->memslots;
559 struct kvm_memory_slot *memslot; 565 struct kvm_memory_slot *memslot;
@@ -675,8 +681,9 @@ static void sort_memslots(struct kvm_memslots *slots)
675 slots->id_to_index[slots->memslots[i].id] = i; 681 slots->id_to_index[slots->memslots[i].id] = i;
676} 682}
677 683
678void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new, 684static void update_memslots(struct kvm_memslots *slots,
679 u64 last_generation) 685 struct kvm_memory_slot *new,
686 u64 last_generation)
680{ 687{
681 if (new) { 688 if (new) {
682 int id = new->id; 689 int id = new->id;
@@ -924,8 +931,8 @@ int kvm_set_memory_region(struct kvm *kvm,
924} 931}
925EXPORT_SYMBOL_GPL(kvm_set_memory_region); 932EXPORT_SYMBOL_GPL(kvm_set_memory_region);
926 933
927int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, 934static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
928 struct kvm_userspace_memory_region *mem) 935 struct kvm_userspace_memory_region *mem)
929{ 936{
930 if (mem->slot >= KVM_USER_MEM_SLOTS) 937 if (mem->slot >= KVM_USER_MEM_SLOTS)
931 return -EINVAL; 938 return -EINVAL;
@@ -1047,7 +1054,7 @@ static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
1047} 1054}
1048 1055
1049unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, 1056unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot,
1050 gfn_t gfn) 1057 gfn_t gfn)
1051{ 1058{
1052 return gfn_to_hva_many(slot, gfn, NULL); 1059 return gfn_to_hva_many(slot, gfn, NULL);
1053} 1060}
@@ -1387,18 +1394,11 @@ void kvm_release_page_dirty(struct page *page)
1387} 1394}
1388EXPORT_SYMBOL_GPL(kvm_release_page_dirty); 1395EXPORT_SYMBOL_GPL(kvm_release_page_dirty);
1389 1396
1390void kvm_release_pfn_dirty(pfn_t pfn) 1397static void kvm_release_pfn_dirty(pfn_t pfn)
1391{ 1398{
1392 kvm_set_pfn_dirty(pfn); 1399 kvm_set_pfn_dirty(pfn);
1393 kvm_release_pfn_clean(pfn); 1400 kvm_release_pfn_clean(pfn);
1394} 1401}
1395EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty);
1396
1397void kvm_set_page_dirty(struct page *page)
1398{
1399 kvm_set_pfn_dirty(page_to_pfn(page));
1400}
1401EXPORT_SYMBOL_GPL(kvm_set_page_dirty);
1402 1402
1403void kvm_set_pfn_dirty(pfn_t pfn) 1403void kvm_set_pfn_dirty(pfn_t pfn)
1404{ 1404{
@@ -1640,8 +1640,9 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
1640} 1640}
1641EXPORT_SYMBOL_GPL(kvm_clear_guest); 1641EXPORT_SYMBOL_GPL(kvm_clear_guest);
1642 1642
1643void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot, 1643static void mark_page_dirty_in_slot(struct kvm *kvm,
1644 gfn_t gfn) 1644 struct kvm_memory_slot *memslot,
1645 gfn_t gfn)
1645{ 1646{
1646 if (memslot && memslot->dirty_bitmap) { 1647 if (memslot && memslot->dirty_bitmap) {
1647 unsigned long rel_gfn = gfn - memslot->base_gfn; 1648 unsigned long rel_gfn = gfn - memslot->base_gfn;
@@ -1710,14 +1711,6 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
1710EXPORT_SYMBOL_GPL(kvm_vcpu_kick); 1711EXPORT_SYMBOL_GPL(kvm_vcpu_kick);
1711#endif /* !CONFIG_S390 */ 1712#endif /* !CONFIG_S390 */
1712 1713
1713void kvm_resched(struct kvm_vcpu *vcpu)
1714{
1715 if (!need_resched())
1716 return;
1717 cond_resched();
1718}
1719EXPORT_SYMBOL_GPL(kvm_resched);
1720
1721bool kvm_vcpu_yield_to(struct kvm_vcpu *target) 1714bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
1722{ 1715{
1723 struct pid *pid; 1716 struct pid *pid;
@@ -1742,7 +1735,6 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
1742} 1735}
1743EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to); 1736EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
1744 1737
1745#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
1746/* 1738/*
1747 * Helper that checks whether a VCPU is eligible for directed yield. 1739 * Helper that checks whether a VCPU is eligible for directed yield.
1748 * Most eligible candidate to yield is decided by following heuristics: 1740 * Most eligible candidate to yield is decided by following heuristics:
@@ -1765,8 +1757,9 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
1765 * locking does not harm. It may result in trying to yield to same VCPU, fail 1757 * locking does not harm. It may result in trying to yield to same VCPU, fail
1766 * and continue with next VCPU and so on. 1758 * and continue with next VCPU and so on.
1767 */ 1759 */
1768bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) 1760static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
1769{ 1761{
1762#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
1770 bool eligible; 1763 bool eligible;
1771 1764
1772 eligible = !vcpu->spin_loop.in_spin_loop || 1765 eligible = !vcpu->spin_loop.in_spin_loop ||
@@ -1777,8 +1770,10 @@ bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
1777 kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible); 1770 kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible);
1778 1771
1779 return eligible; 1772 return eligible;
1780} 1773#else
1774 return true;
1781#endif 1775#endif
1776}
1782 1777
1783void kvm_vcpu_on_spin(struct kvm_vcpu *me) 1778void kvm_vcpu_on_spin(struct kvm_vcpu *me)
1784{ 1779{
@@ -2284,6 +2279,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
2284 ops = &kvm_vfio_ops; 2279 ops = &kvm_vfio_ops;
2285 break; 2280 break;
2286#endif 2281#endif
2282#ifdef CONFIG_KVM_ARM_VGIC
2283 case KVM_DEV_TYPE_ARM_VGIC_V2:
2284 ops = &kvm_arm_vgic_v2_ops;
2285 break;
2286#endif
2287 default: 2287 default:
2288 return -ENODEV; 2288 return -ENODEV;
2289 } 2289 }
@@ -2939,33 +2939,6 @@ int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
2939 return r < 0 ? r : 0; 2939 return r < 0 ? r : 0;
2940} 2940}
2941 2941
2942/* kvm_io_bus_read_cookie - called under kvm->slots_lock */
2943int kvm_io_bus_read_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
2944 int len, void *val, long cookie)
2945{
2946 struct kvm_io_bus *bus;
2947 struct kvm_io_range range;
2948
2949 range = (struct kvm_io_range) {
2950 .addr = addr,
2951 .len = len,
2952 };
2953
2954 bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
2955
2956 /* First try the device referenced by cookie. */
2957 if ((cookie >= 0) && (cookie < bus->dev_count) &&
2958 (kvm_io_bus_cmp(&range, &bus->range[cookie]) == 0))
2959 if (!kvm_iodevice_read(bus->range[cookie].dev, addr, len,
2960 val))
2961 return cookie;
2962
2963 /*
2964 * cookie contained garbage; fall back to search and return the
2965 * correct cookie value.
2966 */
2967 return __kvm_io_bus_read(bus, &range, val);
2968}
2969 2942
2970/* Caller must hold slots_lock. */ 2943/* Caller must hold slots_lock. */
2971int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, 2944int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index ca4260e35037..b4f9507ae650 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -101,14 +101,14 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
101 struct kvm_vfio *kv = dev->private; 101 struct kvm_vfio *kv = dev->private;
102 struct vfio_group *vfio_group; 102 struct vfio_group *vfio_group;
103 struct kvm_vfio_group *kvg; 103 struct kvm_vfio_group *kvg;
104 void __user *argp = (void __user *)arg; 104 int32_t __user *argp = (int32_t __user *)(unsigned long)arg;
105 struct fd f; 105 struct fd f;
106 int32_t fd; 106 int32_t fd;
107 int ret; 107 int ret;
108 108
109 switch (attr) { 109 switch (attr) {
110 case KVM_DEV_VFIO_GROUP_ADD: 110 case KVM_DEV_VFIO_GROUP_ADD:
111 if (get_user(fd, (int32_t __user *)argp)) 111 if (get_user(fd, argp))
112 return -EFAULT; 112 return -EFAULT;
113 113
114 f = fdget(fd); 114 f = fdget(fd);
@@ -148,7 +148,7 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
148 return 0; 148 return 0;
149 149
150 case KVM_DEV_VFIO_GROUP_DEL: 150 case KVM_DEV_VFIO_GROUP_DEL:
151 if (get_user(fd, (int32_t __user *)argp)) 151 if (get_user(fd, argp))
152 return -EFAULT; 152 return -EFAULT;
153 153
154 f = fdget(fd); 154 f = fdget(fd);