diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-07-06 21:38:31 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-07-06 21:38:31 -0400 |
commit | c136b84393d4e340e1b53fc7f737dd5827b19ee5 (patch) | |
tree | 985a1bdfafe7ec5ce2d3c738f601cad3998d8ce9 | |
parent | e0f25a3f2d052e36ff67a9b4db835c3e27e950d8 (diff) | |
parent | 1372324b328cd5dabaef5e345e37ad48c63df2a9 (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini:
"PPC:
- Better machine check handling for HV KVM
- Ability to support guests with threads=2, 4 or 8 on POWER9
- Fix for a race that could cause delayed recognition of signals
- Fix for a bug where POWER9 guests could sleep with interrupts pending.
ARM:
- VCPU request overhaul
- allow timer and PMU to have their interrupt number selected from userspace
- workaround for Cavium erratum 30115
- handling of memory poisonning
- the usual crop of fixes and cleanups
s390:
- initial machine check forwarding
- migration support for the CMMA page hinting information
- cleanups and fixes
x86:
- nested VMX bugfixes and improvements
- more reliable NMI window detection on AMD
- APIC timer optimizations
Generic:
- VCPU request overhaul + documentation of common code patterns
- kvm_stat improvements"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (124 commits)
Update my email address
kvm: vmx: allow host to access guest MSR_IA32_BNDCFGS
x86: kvm: mmu: use ept a/d in vmcs02 iff used in vmcs12
kvm: x86: mmu: allow A/D bits to be disabled in an mmu
x86: kvm: mmu: make spte mmio mask more explicit
x86: kvm: mmu: dead code thanks to access tracking
KVM: PPC: Book3S: Fix typo in XICS-on-XIVE state saving code
KVM: PPC: Book3S HV: Close race with testing for signals on guest entry
KVM: PPC: Book3S HV: Simplify dynamic micro-threading code
KVM: x86: remove ignored type attribute
KVM: LAPIC: Fix lapic timer injection delay
KVM: lapic: reorganize restart_apic_timer
KVM: lapic: reorganize start_hv_timer
kvm: nVMX: Check memory operand to INVVPID
KVM: s390: Inject machine check into the nested guest
KVM: s390: Inject machine check into the guest
tools/kvm_stat: add new interactive command 'b'
tools/kvm_stat: add new command line switch '-i'
tools/kvm_stat: fix error on interactive command 'g'
KVM: SVM: suppress unnecessary NMI singlestep on GIF=0 and nested exit
...
95 files changed, 4250 insertions, 967 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index f24ee1c99412..aa1d4409fe0a 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt | |||
@@ -1862,6 +1862,18 @@ | |||
1862 | for all guests. | 1862 | for all guests. |
1863 | Default is 1 (enabled) if in 64-bit or 32-bit PAE mode. | 1863 | Default is 1 (enabled) if in 64-bit or 32-bit PAE mode. |
1864 | 1864 | ||
1865 | kvm-arm.vgic_v3_group0_trap= | ||
1866 | [KVM,ARM] Trap guest accesses to GICv3 group-0 | ||
1867 | system registers | ||
1868 | |||
1869 | kvm-arm.vgic_v3_group1_trap= | ||
1870 | [KVM,ARM] Trap guest accesses to GICv3 group-1 | ||
1871 | system registers | ||
1872 | |||
1873 | kvm-arm.vgic_v3_common_trap= | ||
1874 | [KVM,ARM] Trap guest accesses to GICv3 common | ||
1875 | system registers | ||
1876 | |||
1865 | kvm-intel.ept= [KVM,Intel] Disable extended page tables | 1877 | kvm-intel.ept= [KVM,Intel] Disable extended page tables |
1866 | (virtualized MMU) support on capable Intel chips. | 1878 | (virtualized MMU) support on capable Intel chips. |
1867 | Default is 1 (enabled) | 1879 | Default is 1 (enabled) |
diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt index 10f2dddbf449..f5f93dca54b7 100644 --- a/Documentation/arm64/silicon-errata.txt +++ b/Documentation/arm64/silicon-errata.txt | |||
@@ -62,6 +62,7 @@ stable kernels. | |||
62 | | Cavium | ThunderX GICv3 | #23154 | CAVIUM_ERRATUM_23154 | | 62 | | Cavium | ThunderX GICv3 | #23154 | CAVIUM_ERRATUM_23154 | |
63 | | Cavium | ThunderX Core | #27456 | CAVIUM_ERRATUM_27456 | | 63 | | Cavium | ThunderX Core | #27456 | CAVIUM_ERRATUM_27456 | |
64 | | Cavium | ThunderX SMMUv2 | #27704 | N/A | | 64 | | Cavium | ThunderX SMMUv2 | #27704 | N/A | |
65 | | Cavium | ThunderX Core | #30115 | CAVIUM_ERRATUM_30115 | | ||
65 | | | | | | | 66 | | | | | | |
66 | | Freescale/NXP | LS2080A/LS1043A | A-008585 | FSL_ERRATUM_A008585 | | 67 | | Freescale/NXP | LS2080A/LS1043A | A-008585 | FSL_ERRATUM_A008585 | |
67 | | | | | | | 68 | | | | | | |
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 4029943887a3..3a9831b72945 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt | |||
@@ -3255,6 +3255,141 @@ Otherwise, if the MCE is a corrected error, KVM will just | |||
3255 | store it in the corresponding bank (provided this bank is | 3255 | store it in the corresponding bank (provided this bank is |
3256 | not holding a previously reported uncorrected error). | 3256 | not holding a previously reported uncorrected error). |
3257 | 3257 | ||
3258 | 4.107 KVM_S390_GET_CMMA_BITS | ||
3259 | |||
3260 | Capability: KVM_CAP_S390_CMMA_MIGRATION | ||
3261 | Architectures: s390 | ||
3262 | Type: vm ioctl | ||
3263 | Parameters: struct kvm_s390_cmma_log (in, out) | ||
3264 | Returns: 0 on success, a negative value on error | ||
3265 | |||
3266 | This ioctl is used to get the values of the CMMA bits on the s390 | ||
3267 | architecture. It is meant to be used in two scenarios: | ||
3268 | - During live migration to save the CMMA values. Live migration needs | ||
3269 | to be enabled via the KVM_REQ_START_MIGRATION VM property. | ||
3270 | - To non-destructively peek at the CMMA values, with the flag | ||
3271 | KVM_S390_CMMA_PEEK set. | ||
3272 | |||
3273 | The ioctl takes parameters via the kvm_s390_cmma_log struct. The desired | ||
3274 | values are written to a buffer whose location is indicated via the "values" | ||
3275 | member in the kvm_s390_cmma_log struct. The values in the input struct are | ||
3276 | also updated as needed. | ||
3277 | Each CMMA value takes up one byte. | ||
3278 | |||
3279 | struct kvm_s390_cmma_log { | ||
3280 | __u64 start_gfn; | ||
3281 | __u32 count; | ||
3282 | __u32 flags; | ||
3283 | union { | ||
3284 | __u64 remaining; | ||
3285 | __u64 mask; | ||
3286 | }; | ||
3287 | __u64 values; | ||
3288 | }; | ||
3289 | |||
3290 | start_gfn is the number of the first guest frame whose CMMA values are | ||
3291 | to be retrieved, | ||
3292 | |||
3293 | count is the length of the buffer in bytes, | ||
3294 | |||
3295 | values points to the buffer where the result will be written to. | ||
3296 | |||
3297 | If count is greater than KVM_S390_SKEYS_MAX, then it is considered to be | ||
3298 | KVM_S390_SKEYS_MAX. KVM_S390_SKEYS_MAX is re-used for consistency with | ||
3299 | other ioctls. | ||
3300 | |||
3301 | The result is written in the buffer pointed to by the field values, and | ||
3302 | the values of the input parameter are updated as follows. | ||
3303 | |||
3304 | Depending on the flags, different actions are performed. The only | ||
3305 | supported flag so far is KVM_S390_CMMA_PEEK. | ||
3306 | |||
3307 | The default behaviour if KVM_S390_CMMA_PEEK is not set is: | ||
3308 | start_gfn will indicate the first page frame whose CMMA bits were dirty. | ||
3309 | It is not necessarily the same as the one passed as input, as clean pages | ||
3310 | are skipped. | ||
3311 | |||
3312 | count will indicate the number of bytes actually written in the buffer. | ||
3313 | It can (and very often will) be smaller than the input value, since the | ||
3314 | buffer is only filled until 16 bytes of clean values are found (which | ||
3315 | are then not copied in the buffer). Since a CMMA migration block needs | ||
3316 | the base address and the length, for a total of 16 bytes, we will send | ||
3317 | back some clean data if there is some dirty data afterwards, as long as | ||
3318 | the size of the clean data does not exceed the size of the header. This | ||
3319 | allows to minimize the amount of data to be saved or transferred over | ||
3320 | the network at the expense of more roundtrips to userspace. The next | ||
3321 | invocation of the ioctl will skip over all the clean values, saving | ||
3322 | potentially more than just the 16 bytes we found. | ||
3323 | |||
3324 | If KVM_S390_CMMA_PEEK is set: | ||
3325 | the existing storage attributes are read even when not in migration | ||
3326 | mode, and no other action is performed; | ||
3327 | |||
3328 | the output start_gfn will be equal to the input start_gfn, | ||
3329 | |||
3330 | the output count will be equal to the input count, except if the end of | ||
3331 | memory has been reached. | ||
3332 | |||
3333 | In both cases: | ||
3334 | the field "remaining" will indicate the total number of dirty CMMA values | ||
3335 | still remaining, or 0 if KVM_S390_CMMA_PEEK is set and migration mode is | ||
3336 | not enabled. | ||
3337 | |||
3338 | mask is unused. | ||
3339 | |||
3340 | values points to the userspace buffer where the result will be stored. | ||
3341 | |||
3342 | This ioctl can fail with -ENOMEM if not enough memory can be allocated to | ||
3343 | complete the task, with -ENXIO if CMMA is not enabled, with -EINVAL if | ||
3344 | KVM_S390_CMMA_PEEK is not set but migration mode was not enabled, with | ||
3345 | -EFAULT if the userspace address is invalid or if no page table is | ||
3346 | present for the addresses (e.g. when using hugepages). | ||
3347 | |||
3348 | 4.108 KVM_S390_SET_CMMA_BITS | ||
3349 | |||
3350 | Capability: KVM_CAP_S390_CMMA_MIGRATION | ||
3351 | Architectures: s390 | ||
3352 | Type: vm ioctl | ||
3353 | Parameters: struct kvm_s390_cmma_log (in) | ||
3354 | Returns: 0 on success, a negative value on error | ||
3355 | |||
3356 | This ioctl is used to set the values of the CMMA bits on the s390 | ||
3357 | architecture. It is meant to be used during live migration to restore | ||
3358 | the CMMA values, but there are no restrictions on its use. | ||
3359 | The ioctl takes parameters via the kvm_s390_cmma_values struct. | ||
3360 | Each CMMA value takes up one byte. | ||
3361 | |||
3362 | struct kvm_s390_cmma_log { | ||
3363 | __u64 start_gfn; | ||
3364 | __u32 count; | ||
3365 | __u32 flags; | ||
3366 | union { | ||
3367 | __u64 remaining; | ||
3368 | __u64 mask; | ||
3369 | }; | ||
3370 | __u64 values; | ||
3371 | }; | ||
3372 | |||
3373 | start_gfn indicates the starting guest frame number, | ||
3374 | |||
3375 | count indicates how many values are to be considered in the buffer, | ||
3376 | |||
3377 | flags is not used and must be 0. | ||
3378 | |||
3379 | mask indicates which PGSTE bits are to be considered. | ||
3380 | |||
3381 | remaining is not used. | ||
3382 | |||
3383 | values points to the buffer in userspace where to store the values. | ||
3384 | |||
3385 | This ioctl can fail with -ENOMEM if not enough memory can be allocated to | ||
3386 | complete the task, with -ENXIO if CMMA is not enabled, with -EINVAL if | ||
3387 | the count field is too large (e.g. more than KVM_S390_CMMA_SIZE_MAX) or | ||
3388 | if the flags field was not 0, with -EFAULT if the userspace address is | ||
3389 | invalid, if invalid pages are written to (e.g. after the end of memory) | ||
3390 | or if no page table is present for the addresses (e.g. when using | ||
3391 | hugepages). | ||
3392 | |||
3258 | 5. The kvm_run structure | 3393 | 5. The kvm_run structure |
3259 | ------------------------ | 3394 | ------------------------ |
3260 | 3395 | ||
@@ -3996,6 +4131,34 @@ Parameters: none | |||
3996 | Allow use of adapter-interruption suppression. | 4131 | Allow use of adapter-interruption suppression. |
3997 | Returns: 0 on success; -EBUSY if a VCPU has already been created. | 4132 | Returns: 0 on success; -EBUSY if a VCPU has already been created. |
3998 | 4133 | ||
4134 | 7.11 KVM_CAP_PPC_SMT | ||
4135 | |||
4136 | Architectures: ppc | ||
4137 | Parameters: vsmt_mode, flags | ||
4138 | |||
4139 | Enabling this capability on a VM provides userspace with a way to set | ||
4140 | the desired virtual SMT mode (i.e. the number of virtual CPUs per | ||
4141 | virtual core). The virtual SMT mode, vsmt_mode, must be a power of 2 | ||
4142 | between 1 and 8. On POWER8, vsmt_mode must also be no greater than | ||
4143 | the number of threads per subcore for the host. Currently flags must | ||
4144 | be 0. A successful call to enable this capability will result in | ||
4145 | vsmt_mode being returned when the KVM_CAP_PPC_SMT capability is | ||
4146 | subsequently queried for the VM. This capability is only supported by | ||
4147 | HV KVM, and can only be set before any VCPUs have been created. | ||
4148 | The KVM_CAP_PPC_SMT_POSSIBLE capability indicates which virtual SMT | ||
4149 | modes are available. | ||
4150 | |||
4151 | 7.12 KVM_CAP_PPC_FWNMI | ||
4152 | |||
4153 | Architectures: ppc | ||
4154 | Parameters: none | ||
4155 | |||
4156 | With this capability a machine check exception in the guest address | ||
4157 | space will cause KVM to exit the guest with NMI exit reason. This | ||
4158 | enables QEMU to build error log and branch to guest kernel registered | ||
4159 | machine check handling routine. Without this capability KVM will | ||
4160 | branch to guests' 0x200 interrupt vector. | ||
4161 | |||
3999 | 8. Other capabilities. | 4162 | 8. Other capabilities. |
4000 | ---------------------- | 4163 | ---------------------- |
4001 | 4164 | ||
@@ -4157,3 +4320,12 @@ Currently the following bits are defined for the device_irq_level bitmap: | |||
4157 | Future versions of kvm may implement additional events. These will get | 4320 | Future versions of kvm may implement additional events. These will get |
4158 | indicated by returning a higher number from KVM_CHECK_EXTENSION and will be | 4321 | indicated by returning a higher number from KVM_CHECK_EXTENSION and will be |
4159 | listed above. | 4322 | listed above. |
4323 | |||
4324 | 8.10 KVM_CAP_PPC_SMT_POSSIBLE | ||
4325 | |||
4326 | Architectures: ppc | ||
4327 | |||
4328 | Querying this capability returns a bitmap indicating the possible | ||
4329 | virtual SMT modes that can be set using KVM_CAP_PPC_SMT. If bit N | ||
4330 | (counting from the right) is set, then a virtual SMT mode of 2^N is | ||
4331 | available. | ||
diff --git a/Documentation/virtual/kvm/devices/s390_flic.txt b/Documentation/virtual/kvm/devices/s390_flic.txt index c2518cea8ab4..2f1cbf1301d2 100644 --- a/Documentation/virtual/kvm/devices/s390_flic.txt +++ b/Documentation/virtual/kvm/devices/s390_flic.txt | |||
@@ -16,6 +16,7 @@ FLIC provides support to | |||
16 | - register and modify adapter interrupt sources (KVM_DEV_FLIC_ADAPTER_*) | 16 | - register and modify adapter interrupt sources (KVM_DEV_FLIC_ADAPTER_*) |
17 | - modify AIS (adapter-interruption-suppression) mode state (KVM_DEV_FLIC_AISM) | 17 | - modify AIS (adapter-interruption-suppression) mode state (KVM_DEV_FLIC_AISM) |
18 | - inject adapter interrupts on a specified adapter (KVM_DEV_FLIC_AIRQ_INJECT) | 18 | - inject adapter interrupts on a specified adapter (KVM_DEV_FLIC_AIRQ_INJECT) |
19 | - get/set all AIS mode states (KVM_DEV_FLIC_AISM_ALL) | ||
19 | 20 | ||
20 | Groups: | 21 | Groups: |
21 | KVM_DEV_FLIC_ENQUEUE | 22 | KVM_DEV_FLIC_ENQUEUE |
@@ -136,6 +137,20 @@ struct kvm_s390_ais_req { | |||
136 | an isc according to the adapter-interruption-suppression mode on condition | 137 | an isc according to the adapter-interruption-suppression mode on condition |
137 | that the AIS capability is enabled. | 138 | that the AIS capability is enabled. |
138 | 139 | ||
140 | KVM_DEV_FLIC_AISM_ALL | ||
141 | Gets or sets the adapter-interruption-suppression mode for all ISCs. Takes | ||
142 | a kvm_s390_ais_all describing: | ||
143 | |||
144 | struct kvm_s390_ais_all { | ||
145 | __u8 simm; /* Single-Interruption-Mode mask */ | ||
146 | __u8 nimm; /* No-Interruption-Mode mask * | ||
147 | }; | ||
148 | |||
149 | simm contains Single-Interruption-Mode mask for all ISCs, nimm contains | ||
150 | No-Interruption-Mode mask for all ISCs. Each bit in simm and nimm corresponds | ||
151 | to an ISC (MSB0 bit 0 to ISC 0 and so on). The combination of simm bit and | ||
152 | nimm bit presents AIS mode for a ISC. | ||
153 | |||
139 | Note: The KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR device ioctls executed on | 154 | Note: The KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR device ioctls executed on |
140 | FLIC with an unknown group or attribute gives the error code EINVAL (instead of | 155 | FLIC with an unknown group or attribute gives the error code EINVAL (instead of |
141 | ENXIO, as specified in the API documentation). It is not possible to conclude | 156 | ENXIO, as specified in the API documentation). It is not possible to conclude |
diff --git a/Documentation/virtual/kvm/devices/vcpu.txt b/Documentation/virtual/kvm/devices/vcpu.txt index 02f50686c418..2b5dab16c4f2 100644 --- a/Documentation/virtual/kvm/devices/vcpu.txt +++ b/Documentation/virtual/kvm/devices/vcpu.txt | |||
@@ -16,7 +16,9 @@ Parameters: in kvm_device_attr.addr the address for PMU overflow interrupt is a | |||
16 | Returns: -EBUSY: The PMU overflow interrupt is already set | 16 | Returns: -EBUSY: The PMU overflow interrupt is already set |
17 | -ENXIO: The overflow interrupt not set when attempting to get it | 17 | -ENXIO: The overflow interrupt not set when attempting to get it |
18 | -ENODEV: PMUv3 not supported | 18 | -ENODEV: PMUv3 not supported |
19 | -EINVAL: Invalid PMU overflow interrupt number supplied | 19 | -EINVAL: Invalid PMU overflow interrupt number supplied or |
20 | trying to set the IRQ number without using an in-kernel | ||
21 | irqchip. | ||
20 | 22 | ||
21 | A value describing the PMUv3 (Performance Monitor Unit v3) overflow interrupt | 23 | A value describing the PMUv3 (Performance Monitor Unit v3) overflow interrupt |
22 | number for this vcpu. This interrupt could be a PPI or SPI, but the interrupt | 24 | number for this vcpu. This interrupt could be a PPI or SPI, but the interrupt |
@@ -25,11 +27,36 @@ all vcpus, while as an SPI it must be a separate number per vcpu. | |||
25 | 27 | ||
26 | 1.2 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_INIT | 28 | 1.2 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_INIT |
27 | Parameters: no additional parameter in kvm_device_attr.addr | 29 | Parameters: no additional parameter in kvm_device_attr.addr |
28 | Returns: -ENODEV: PMUv3 not supported | 30 | Returns: -ENODEV: PMUv3 not supported or GIC not initialized |
29 | -ENXIO: PMUv3 not properly configured as required prior to calling this | 31 | -ENXIO: PMUv3 not properly configured or in-kernel irqchip not |
30 | attribute | 32 | configured as required prior to calling this attribute |
31 | -EBUSY: PMUv3 already initialized | 33 | -EBUSY: PMUv3 already initialized |
32 | 34 | ||
33 | Request the initialization of the PMUv3. This must be done after creating the | 35 | Request the initialization of the PMUv3. If using the PMUv3 with an in-kernel |
34 | in-kernel irqchip. Creating a PMU with a userspace irqchip is currently not | 36 | virtual GIC implementation, this must be done after initializing the in-kernel |
35 | supported. | 37 | irqchip. |
38 | |||
39 | |||
40 | 2. GROUP: KVM_ARM_VCPU_TIMER_CTRL | ||
41 | Architectures: ARM,ARM64 | ||
42 | |||
43 | 2.1. ATTRIBUTE: KVM_ARM_VCPU_TIMER_IRQ_VTIMER | ||
44 | 2.2. ATTRIBUTE: KVM_ARM_VCPU_TIMER_IRQ_PTIMER | ||
45 | Parameters: in kvm_device_attr.addr the address for the timer interrupt is a | ||
46 | pointer to an int | ||
47 | Returns: -EINVAL: Invalid timer interrupt number | ||
48 | -EBUSY: One or more VCPUs has already run | ||
49 | |||
50 | A value describing the architected timer interrupt number when connected to an | ||
51 | in-kernel virtual GIC. These must be a PPI (16 <= intid < 32). Setting the | ||
52 | attribute overrides the default values (see below). | ||
53 | |||
54 | KVM_ARM_VCPU_TIMER_IRQ_VTIMER: The EL1 virtual timer intid (default: 27) | ||
55 | KVM_ARM_VCPU_TIMER_IRQ_PTIMER: The EL1 physical timer intid (default: 30) | ||
56 | |||
57 | Setting the same PPI for different timers will prevent the VCPUs from running. | ||
58 | Setting the interrupt number on a VCPU configures all VCPUs created at that | ||
59 | time to use the number provided for a given timer, overwriting any previously | ||
60 | configured values on other VCPUs. Userspace should configure the interrupt | ||
61 | numbers on at least one VCPU after creating all VCPUs and before running any | ||
62 | VCPUs. | ||
diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virtual/kvm/devices/vm.txt index 575ccb022aac..903fc926860b 100644 --- a/Documentation/virtual/kvm/devices/vm.txt +++ b/Documentation/virtual/kvm/devices/vm.txt | |||
@@ -222,3 +222,36 @@ Allows user space to disable dea key wrapping, clearing the wrapping key. | |||
222 | 222 | ||
223 | Parameters: none | 223 | Parameters: none |
224 | Returns: 0 | 224 | Returns: 0 |
225 | |||
226 | 5. GROUP: KVM_S390_VM_MIGRATION | ||
227 | Architectures: s390 | ||
228 | |||
229 | 5.1. ATTRIBUTE: KVM_S390_VM_MIGRATION_STOP (w/o) | ||
230 | |||
231 | Allows userspace to stop migration mode, needed for PGSTE migration. | ||
232 | Setting this attribute when migration mode is not active will have no | ||
233 | effects. | ||
234 | |||
235 | Parameters: none | ||
236 | Returns: 0 | ||
237 | |||
238 | 5.2. ATTRIBUTE: KVM_S390_VM_MIGRATION_START (w/o) | ||
239 | |||
240 | Allows userspace to start migration mode, needed for PGSTE migration. | ||
241 | Setting this attribute when migration mode is already active will have | ||
242 | no effects. | ||
243 | |||
244 | Parameters: none | ||
245 | Returns: -ENOMEM if there is not enough free memory to start migration mode | ||
246 | -EINVAL if the state of the VM is invalid (e.g. no memory defined) | ||
247 | 0 in case of success. | ||
248 | |||
249 | 5.3. ATTRIBUTE: KVM_S390_VM_MIGRATION_STATUS (r/o) | ||
250 | |||
251 | Allows userspace to query the status of migration mode. | ||
252 | |||
253 | Parameters: address of a buffer in user space to store the data (u64) to; | ||
254 | the data itself is either 0 if migration mode is disabled or 1 | ||
255 | if it is enabled | ||
256 | Returns: -EFAULT if the given address is not accessible from kernel space | ||
257 | 0 in case of success. | ||
diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt index 481b6a9c25d5..f50d45b1e967 100644 --- a/Documentation/virtual/kvm/mmu.txt +++ b/Documentation/virtual/kvm/mmu.txt | |||
@@ -179,6 +179,10 @@ Shadow pages contain the following information: | |||
179 | shadow page; it is also used to go back from a struct kvm_mmu_page | 179 | shadow page; it is also used to go back from a struct kvm_mmu_page |
180 | to a memslot, through the kvm_memslots_for_spte_role macro and | 180 | to a memslot, through the kvm_memslots_for_spte_role macro and |
181 | __gfn_to_memslot. | 181 | __gfn_to_memslot. |
182 | role.ad_disabled: | ||
183 | Is 1 if the MMU instance cannot use A/D bits. EPT did not have A/D | ||
184 | bits before Haswell; shadow EPT page tables also cannot use A/D bits | ||
185 | if the L1 hypervisor does not enable them. | ||
182 | gfn: | 186 | gfn: |
183 | Either the guest page table containing the translations shadowed by this | 187 | Either the guest page table containing the translations shadowed by this |
184 | page, or the base page frame for linear translations. See role.direct. | 188 | page, or the base page frame for linear translations. See role.direct. |
diff --git a/Documentation/virtual/kvm/vcpu-requests.rst b/Documentation/virtual/kvm/vcpu-requests.rst new file mode 100644 index 000000000000..5feb3706a7ae --- /dev/null +++ b/Documentation/virtual/kvm/vcpu-requests.rst | |||
@@ -0,0 +1,307 @@ | |||
1 | ================= | ||
2 | KVM VCPU Requests | ||
3 | ================= | ||
4 | |||
5 | Overview | ||
6 | ======== | ||
7 | |||
8 | KVM supports an internal API enabling threads to request a VCPU thread to | ||
9 | perform some activity. For example, a thread may request a VCPU to flush | ||
10 | its TLB with a VCPU request. The API consists of the following functions:: | ||
11 | |||
12 | /* Check if any requests are pending for VCPU @vcpu. */ | ||
13 | bool kvm_request_pending(struct kvm_vcpu *vcpu); | ||
14 | |||
15 | /* Check if VCPU @vcpu has request @req pending. */ | ||
16 | bool kvm_test_request(int req, struct kvm_vcpu *vcpu); | ||
17 | |||
18 | /* Clear request @req for VCPU @vcpu. */ | ||
19 | void kvm_clear_request(int req, struct kvm_vcpu *vcpu); | ||
20 | |||
21 | /* | ||
22 | * Check if VCPU @vcpu has request @req pending. When the request is | ||
23 | * pending it will be cleared and a memory barrier, which pairs with | ||
24 | * another in kvm_make_request(), will be issued. | ||
25 | */ | ||
26 | bool kvm_check_request(int req, struct kvm_vcpu *vcpu); | ||
27 | |||
28 | /* | ||
29 | * Make request @req of VCPU @vcpu. Issues a memory barrier, which pairs | ||
30 | * with another in kvm_check_request(), prior to setting the request. | ||
31 | */ | ||
32 | void kvm_make_request(int req, struct kvm_vcpu *vcpu); | ||
33 | |||
34 | /* Make request @req of all VCPUs of the VM with struct kvm @kvm. */ | ||
35 | bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req); | ||
36 | |||
37 | Typically a requester wants the VCPU to perform the activity as soon | ||
38 | as possible after making the request. This means most requests | ||
39 | (kvm_make_request() calls) are followed by a call to kvm_vcpu_kick(), | ||
40 | and kvm_make_all_cpus_request() has the kicking of all VCPUs built | ||
41 | into it. | ||
42 | |||
43 | VCPU Kicks | ||
44 | ---------- | ||
45 | |||
46 | The goal of a VCPU kick is to bring a VCPU thread out of guest mode in | ||
47 | order to perform some KVM maintenance. To do so, an IPI is sent, forcing | ||
48 | a guest mode exit. However, a VCPU thread may not be in guest mode at the | ||
49 | time of the kick. Therefore, depending on the mode and state of the VCPU | ||
50 | thread, there are two other actions a kick may take. All three actions | ||
51 | are listed below: | ||
52 | |||
53 | 1) Send an IPI. This forces a guest mode exit. | ||
54 | 2) Waking a sleeping VCPU. Sleeping VCPUs are VCPU threads outside guest | ||
55 | mode that wait on waitqueues. Waking them removes the threads from | ||
56 | the waitqueues, allowing the threads to run again. This behavior | ||
57 | may be suppressed, see KVM_REQUEST_NO_WAKEUP below. | ||
58 | 3) Nothing. When the VCPU is not in guest mode and the VCPU thread is not | ||
59 | sleeping, then there is nothing to do. | ||
60 | |||
61 | VCPU Mode | ||
62 | --------- | ||
63 | |||
64 | VCPUs have a mode state, ``vcpu->mode``, that is used to track whether the | ||
65 | guest is running in guest mode or not, as well as some specific | ||
66 | outside guest mode states. The architecture may use ``vcpu->mode`` to | ||
67 | ensure VCPU requests are seen by VCPUs (see "Ensuring Requests Are Seen"), | ||
68 | as well as to avoid sending unnecessary IPIs (see "IPI Reduction"), and | ||
69 | even to ensure IPI acknowledgements are waited upon (see "Waiting for | ||
70 | Acknowledgements"). The following modes are defined: | ||
71 | |||
72 | OUTSIDE_GUEST_MODE | ||
73 | |||
74 | The VCPU thread is outside guest mode. | ||
75 | |||
76 | IN_GUEST_MODE | ||
77 | |||
78 | The VCPU thread is in guest mode. | ||
79 | |||
80 | EXITING_GUEST_MODE | ||
81 | |||
82 | The VCPU thread is transitioning from IN_GUEST_MODE to | ||
83 | OUTSIDE_GUEST_MODE. | ||
84 | |||
85 | READING_SHADOW_PAGE_TABLES | ||
86 | |||
87 | The VCPU thread is outside guest mode, but it wants the sender of | ||
88 | certain VCPU requests, namely KVM_REQ_TLB_FLUSH, to wait until the VCPU | ||
89 | thread is done reading the page tables. | ||
90 | |||
91 | VCPU Request Internals | ||
92 | ====================== | ||
93 | |||
94 | VCPU requests are simply bit indices of the ``vcpu->requests`` bitmap. | ||
95 | This means general bitops, like those documented in [atomic-ops]_ could | ||
96 | also be used, e.g. :: | ||
97 | |||
98 | clear_bit(KVM_REQ_UNHALT & KVM_REQUEST_MASK, &vcpu->requests); | ||
99 | |||
100 | However, VCPU request users should refrain from doing so, as it would | ||
101 | break the abstraction. The first 8 bits are reserved for architecture | ||
102 | independent requests, all additional bits are available for architecture | ||
103 | dependent requests. | ||
104 | |||
105 | Architecture Independent Requests | ||
106 | --------------------------------- | ||
107 | |||
108 | KVM_REQ_TLB_FLUSH | ||
109 | |||
110 | KVM's common MMU notifier may need to flush all of a guest's TLB | ||
111 | entries, calling kvm_flush_remote_tlbs() to do so. Architectures that | ||
112 | choose to use the common kvm_flush_remote_tlbs() implementation will | ||
113 | need to handle this VCPU request. | ||
114 | |||
115 | KVM_REQ_MMU_RELOAD | ||
116 | |||
117 | When shadow page tables are used and memory slots are removed it's | ||
118 | necessary to inform each VCPU to completely refresh the tables. This | ||
119 | request is used for that. | ||
120 | |||
121 | KVM_REQ_PENDING_TIMER | ||
122 | |||
123 | This request may be made from a timer handler run on the host on behalf | ||
124 | of a VCPU. It informs the VCPU thread to inject a timer interrupt. | ||
125 | |||
126 | KVM_REQ_UNHALT | ||
127 | |||
128 | This request may be made from the KVM common function kvm_vcpu_block(), | ||
129 | which is used to emulate an instruction that causes a CPU to halt until | ||
130 | one of an architectural specific set of events and/or interrupts is | ||
131 | received (determined by checking kvm_arch_vcpu_runnable()). When that | ||
132 | event or interrupt arrives kvm_vcpu_block() makes the request. This is | ||
133 | in contrast to when kvm_vcpu_block() returns due to any other reason, | ||
134 | such as a pending signal, which does not indicate the VCPU's halt | ||
135 | emulation should stop, and therefore does not make the request. | ||
136 | |||
137 | KVM_REQUEST_MASK | ||
138 | ---------------- | ||
139 | |||
140 | VCPU requests should be masked by KVM_REQUEST_MASK before using them with | ||
141 | bitops. This is because only the lower 8 bits are used to represent the | ||
142 | request's number. The upper bits are used as flags. Currently only two | ||
143 | flags are defined. | ||
144 | |||
145 | VCPU Request Flags | ||
146 | ------------------ | ||
147 | |||
148 | KVM_REQUEST_NO_WAKEUP | ||
149 | |||
150 | This flag is applied to requests that only need immediate attention | ||
151 | from VCPUs running in guest mode. That is, sleeping VCPUs do not need | ||
152 | to be awaken for these requests. Sleeping VCPUs will handle the | ||
153 | requests when they are awaken later for some other reason. | ||
154 | |||
155 | KVM_REQUEST_WAIT | ||
156 | |||
157 | When requests with this flag are made with kvm_make_all_cpus_request(), | ||
158 | then the caller will wait for each VCPU to acknowledge its IPI before | ||
159 | proceeding. This flag only applies to VCPUs that would receive IPIs. | ||
160 | If, for example, the VCPU is sleeping, so no IPI is necessary, then | ||
161 | the requesting thread does not wait. This means that this flag may be | ||
162 | safely combined with KVM_REQUEST_NO_WAKEUP. See "Waiting for | ||
163 | Acknowledgements" for more information about requests with | ||
164 | KVM_REQUEST_WAIT. | ||
165 | |||
166 | VCPU Requests with Associated State | ||
167 | =================================== | ||
168 | |||
169 | Requesters that want the receiving VCPU to handle new state need to ensure | ||
170 | the newly written state is observable to the receiving VCPU thread's CPU | ||
171 | by the time it observes the request. This means a write memory barrier | ||
172 | must be inserted after writing the new state and before setting the VCPU | ||
173 | request bit. Additionally, on the receiving VCPU thread's side, a | ||
174 | corresponding read barrier must be inserted after reading the request bit | ||
175 | and before proceeding to read the new state associated with it. See | ||
176 | scenario 3, Message and Flag, of [lwn-mb]_ and the kernel documentation | ||
177 | [memory-barriers]_. | ||
178 | |||
179 | The pair of functions, kvm_check_request() and kvm_make_request(), provide | ||
180 | the memory barriers, allowing this requirement to be handled internally by | ||
181 | the API. | ||
182 | |||
183 | Ensuring Requests Are Seen | ||
184 | ========================== | ||
185 | |||
186 | When making requests to VCPUs, we want to avoid the receiving VCPU | ||
187 | executing in guest mode for an arbitrary long time without handling the | ||
188 | request. We can be sure this won't happen as long as we ensure the VCPU | ||
189 | thread checks kvm_request_pending() before entering guest mode and that a | ||
190 | kick will send an IPI to force an exit from guest mode when necessary. | ||
191 | Extra care must be taken to cover the period after the VCPU thread's last | ||
192 | kvm_request_pending() check and before it has entered guest mode, as kick | ||
193 | IPIs will only trigger guest mode exits for VCPU threads that are in guest | ||
194 | mode or at least have already disabled interrupts in order to prepare to | ||
195 | enter guest mode. This means that an optimized implementation (see "IPI | ||
196 | Reduction") must be certain when it's safe to not send the IPI. One | ||
197 | solution, which all architectures except s390 apply, is to: | ||
198 | |||
199 | - set ``vcpu->mode`` to IN_GUEST_MODE between disabling the interrupts and | ||
200 | the last kvm_request_pending() check; | ||
201 | - enable interrupts atomically when entering the guest. | ||
202 | |||
203 | This solution also requires memory barriers to be placed carefully in both | ||
204 | the requesting thread and the receiving VCPU. With the memory barriers we | ||
205 | can exclude the possibility of a VCPU thread observing | ||
206 | !kvm_request_pending() on its last check and then not receiving an IPI for | ||
207 | the next request made of it, even if the request is made immediately after | ||
208 | the check. This is done by way of the Dekker memory barrier pattern | ||
209 | (scenario 10 of [lwn-mb]_). As the Dekker pattern requires two variables, | ||
210 | this solution pairs ``vcpu->mode`` with ``vcpu->requests``. Substituting | ||
211 | them into the pattern gives:: | ||
212 | |||
213 | CPU1 CPU2 | ||
214 | ================= ================= | ||
215 | local_irq_disable(); | ||
216 | WRITE_ONCE(vcpu->mode, IN_GUEST_MODE); kvm_make_request(REQ, vcpu); | ||
217 | smp_mb(); smp_mb(); | ||
218 | if (kvm_request_pending(vcpu)) { if (READ_ONCE(vcpu->mode) == | ||
219 | IN_GUEST_MODE) { | ||
220 | ...abort guest entry... ...send IPI... | ||
221 | } } | ||
222 | |||
223 | As stated above, the IPI is only useful for VCPU threads in guest mode or | ||
224 | that have already disabled interrupts. This is why this specific case of | ||
225 | the Dekker pattern has been extended to disable interrupts before setting | ||
226 | ``vcpu->mode`` to IN_GUEST_MODE. WRITE_ONCE() and READ_ONCE() are used to | ||
227 | pedantically implement the memory barrier pattern, guaranteeing the | ||
228 | compiler doesn't interfere with ``vcpu->mode``'s carefully planned | ||
229 | accesses. | ||
230 | |||
231 | IPI Reduction | ||
232 | ------------- | ||
233 | |||
234 | As only one IPI is needed to get a VCPU to check for any/all requests, | ||
235 | then they may be coalesced. This is easily done by having the first IPI | ||
236 | sending kick also change the VCPU mode to something !IN_GUEST_MODE. The | ||
237 | transitional state, EXITING_GUEST_MODE, is used for this purpose. | ||
238 | |||
239 | Waiting for Acknowledgements | ||
240 | ---------------------------- | ||
241 | |||
242 | Some requests, those with the KVM_REQUEST_WAIT flag set, require IPIs to | ||
243 | be sent, and the acknowledgements to be waited upon, even when the target | ||
244 | VCPU threads are in modes other than IN_GUEST_MODE. For example, one case | ||
245 | is when a target VCPU thread is in READING_SHADOW_PAGE_TABLES mode, which | ||
246 | is set after disabling interrupts. To support these cases, the | ||
247 | KVM_REQUEST_WAIT flag changes the condition for sending an IPI from | ||
248 | checking that the VCPU is IN_GUEST_MODE to checking that it is not | ||
249 | OUTSIDE_GUEST_MODE. | ||
250 | |||
251 | Request-less VCPU Kicks | ||
252 | ----------------------- | ||
253 | |||
254 | As the determination of whether or not to send an IPI depends on the | ||
255 | two-variable Dekker memory barrier pattern, then it's clear that | ||
256 | request-less VCPU kicks are almost never correct. Without the assurance | ||
257 | that a non-IPI generating kick will still result in an action by the | ||
258 | receiving VCPU, as the final kvm_request_pending() check does for | ||
259 | request-accompanying kicks, then the kick may not do anything useful at | ||
260 | all. If, for instance, a request-less kick was made to a VCPU that was | ||
261 | just about to set its mode to IN_GUEST_MODE, meaning no IPI is sent, then | ||
262 | the VCPU thread may continue its entry without actually having done | ||
263 | whatever it was the kick was meant to initiate. | ||
264 | |||
265 | One exception is x86's posted interrupt mechanism. In this case, however, | ||
266 | even the request-less VCPU kick is coupled with the same | ||
267 | local_irq_disable() + smp_mb() pattern described above; the ON bit | ||
268 | (Outstanding Notification) in the posted interrupt descriptor takes the | ||
269 | role of ``vcpu->requests``. When sending a posted interrupt, PIR.ON is | ||
270 | set before reading ``vcpu->mode``; dually, in the VCPU thread, | ||
271 | vmx_sync_pir_to_irr() reads PIR after setting ``vcpu->mode`` to | ||
272 | IN_GUEST_MODE. | ||
273 | |||
274 | Additional Considerations | ||
275 | ========================= | ||
276 | |||
277 | Sleeping VCPUs | ||
278 | -------------- | ||
279 | |||
280 | VCPU threads may need to consider requests before and/or after calling | ||
281 | functions that may put them to sleep, e.g. kvm_vcpu_block(). Whether they | ||
282 | do or not, and, if they do, which requests need consideration, is | ||
283 | architecture dependent. kvm_vcpu_block() calls kvm_arch_vcpu_runnable() | ||
284 | to check if it should awaken. One reason to do so is to provide | ||
285 | architectures a function where requests may be checked if necessary. | ||
286 | |||
287 | Clearing Requests | ||
288 | ----------------- | ||
289 | |||
290 | Generally it only makes sense for the receiving VCPU thread to clear a | ||
291 | request. However, in some circumstances, such as when the requesting | ||
292 | thread and the receiving VCPU thread are executed serially, such as when | ||
293 | they are the same thread, or when they are using some form of concurrency | ||
294 | control to temporarily execute synchronously, then it's possible to know | ||
295 | that the request may be cleared immediately, rather than waiting for the | ||
296 | receiving VCPU thread to handle the request in VCPU RUN. The only current | ||
297 | examples of this are kvm_vcpu_block() calls made by VCPUs to block | ||
298 | themselves. A possible side-effect of that call is to make the | ||
299 | KVM_REQ_UNHALT request, which may then be cleared immediately when the | ||
300 | VCPU returns from the call. | ||
301 | |||
302 | References | ||
303 | ========== | ||
304 | |||
305 | .. [atomic-ops] Documentation/core-api/atomic_ops.rst | ||
306 | .. [memory-barriers] Documentation/memory-barriers.txt | ||
307 | .. [lwn-mb] https://lwn.net/Articles/573436/ | ||
diff --git a/MAINTAINERS b/MAINTAINERS index 75ac9dc85804..1c1d106a3347 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -7350,7 +7350,7 @@ F: arch/powerpc/kvm/ | |||
7350 | 7350 | ||
7351 | KERNEL VIRTUAL MACHINE for s390 (KVM/s390) | 7351 | KERNEL VIRTUAL MACHINE for s390 (KVM/s390) |
7352 | M: Christian Borntraeger <borntraeger@de.ibm.com> | 7352 | M: Christian Borntraeger <borntraeger@de.ibm.com> |
7353 | M: Cornelia Huck <cornelia.huck@de.ibm.com> | 7353 | M: Cornelia Huck <cohuck@redhat.com> |
7354 | L: linux-s390@vger.kernel.org | 7354 | L: linux-s390@vger.kernel.org |
7355 | W: http://www.ibm.com/developerworks/linux/linux390/ | 7355 | W: http://www.ibm.com/developerworks/linux/linux390/ |
7356 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git | 7356 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git |
@@ -11268,7 +11268,7 @@ S: Supported | |||
11268 | F: drivers/iommu/s390-iommu.c | 11268 | F: drivers/iommu/s390-iommu.c |
11269 | 11269 | ||
11270 | S390 VFIO-CCW DRIVER | 11270 | S390 VFIO-CCW DRIVER |
11271 | M: Cornelia Huck <cornelia.huck@de.ibm.com> | 11271 | M: Cornelia Huck <cohuck@redhat.com> |
11272 | M: Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com> | 11272 | M: Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com> |
11273 | L: linux-s390@vger.kernel.org | 11273 | L: linux-s390@vger.kernel.org |
11274 | L: kvm@vger.kernel.org | 11274 | L: kvm@vger.kernel.org |
@@ -13814,7 +13814,7 @@ F: include/uapi/linux/virtio_*.h | |||
13814 | F: drivers/crypto/virtio/ | 13814 | F: drivers/crypto/virtio/ |
13815 | 13815 | ||
13816 | VIRTIO DRIVERS FOR S390 | 13816 | VIRTIO DRIVERS FOR S390 |
13817 | M: Cornelia Huck <cornelia.huck@de.ibm.com> | 13817 | M: Cornelia Huck <cohuck@redhat.com> |
13818 | M: Halil Pasic <pasic@linux.vnet.ibm.com> | 13818 | M: Halil Pasic <pasic@linux.vnet.ibm.com> |
13819 | L: linux-s390@vger.kernel.org | 13819 | L: linux-s390@vger.kernel.org |
13820 | L: virtualization@lists.linux-foundation.org | 13820 | L: virtualization@lists.linux-foundation.org |
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index f0e66577ce05..127e2dd2e21c 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h | |||
@@ -44,7 +44,9 @@ | |||
44 | #define KVM_MAX_VCPUS VGIC_V2_MAX_CPUS | 44 | #define KVM_MAX_VCPUS VGIC_V2_MAX_CPUS |
45 | #endif | 45 | #endif |
46 | 46 | ||
47 | #define KVM_REQ_VCPU_EXIT (8 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) | 47 | #define KVM_REQ_SLEEP \ |
48 | KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) | ||
49 | #define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1) | ||
48 | 50 | ||
49 | u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); | 51 | u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); |
50 | int __attribute_const__ kvm_target_cpu(void); | 52 | int __attribute_const__ kvm_target_cpu(void); |
@@ -233,8 +235,6 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void); | |||
233 | struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); | 235 | struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); |
234 | void kvm_arm_halt_guest(struct kvm *kvm); | 236 | void kvm_arm_halt_guest(struct kvm *kvm); |
235 | void kvm_arm_resume_guest(struct kvm *kvm); | 237 | void kvm_arm_resume_guest(struct kvm *kvm); |
236 | void kvm_arm_halt_vcpu(struct kvm_vcpu *vcpu); | ||
237 | void kvm_arm_resume_vcpu(struct kvm_vcpu *vcpu); | ||
238 | 238 | ||
239 | int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); | 239 | int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); |
240 | unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu); | 240 | unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu); |
@@ -291,20 +291,12 @@ static inline void kvm_arm_init_debug(void) {} | |||
291 | static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {} | 291 | static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {} |
292 | static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {} | 292 | static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {} |
293 | static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {} | 293 | static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {} |
294 | static inline int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, | 294 | |
295 | struct kvm_device_attr *attr) | 295 | int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, |
296 | { | 296 | struct kvm_device_attr *attr); |
297 | return -ENXIO; | 297 | int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, |
298 | } | 298 | struct kvm_device_attr *attr); |
299 | static inline int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, | 299 | int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, |
300 | struct kvm_device_attr *attr) | 300 | struct kvm_device_attr *attr); |
301 | { | ||
302 | return -ENXIO; | ||
303 | } | ||
304 | static inline int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, | ||
305 | struct kvm_device_attr *attr) | ||
306 | { | ||
307 | return -ENXIO; | ||
308 | } | ||
309 | 301 | ||
310 | #endif /* __ARM_KVM_HOST_H__ */ | 302 | #endif /* __ARM_KVM_HOST_H__ */ |
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 5e3c673fa3f4..5db2d4c6a55f 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h | |||
@@ -203,6 +203,14 @@ struct kvm_arch_memory_slot { | |||
203 | #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff | 203 | #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff |
204 | #define VGIC_LEVEL_INFO_LINE_LEVEL 0 | 204 | #define VGIC_LEVEL_INFO_LINE_LEVEL 0 |
205 | 205 | ||
206 | /* Device Control API on vcpu fd */ | ||
207 | #define KVM_ARM_VCPU_PMU_V3_CTRL 0 | ||
208 | #define KVM_ARM_VCPU_PMU_V3_IRQ 0 | ||
209 | #define KVM_ARM_VCPU_PMU_V3_INIT 1 | ||
210 | #define KVM_ARM_VCPU_TIMER_CTRL 1 | ||
211 | #define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 | ||
212 | #define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 | ||
213 | |||
206 | #define KVM_DEV_ARM_VGIC_CTRL_INIT 0 | 214 | #define KVM_DEV_ARM_VGIC_CTRL_INIT 0 |
207 | #define KVM_DEV_ARM_ITS_SAVE_TABLES 1 | 215 | #define KVM_DEV_ARM_ITS_SAVE_TABLES 1 |
208 | #define KVM_DEV_ARM_ITS_RESTORE_TABLES 2 | 216 | #define KVM_DEV_ARM_ITS_RESTORE_TABLES 2 |
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index fa6182a40941..1e0784ebbfd6 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c | |||
@@ -301,3 +301,54 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | |||
301 | { | 301 | { |
302 | return -EINVAL; | 302 | return -EINVAL; |
303 | } | 303 | } |
304 | |||
305 | int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, | ||
306 | struct kvm_device_attr *attr) | ||
307 | { | ||
308 | int ret; | ||
309 | |||
310 | switch (attr->group) { | ||
311 | case KVM_ARM_VCPU_TIMER_CTRL: | ||
312 | ret = kvm_arm_timer_set_attr(vcpu, attr); | ||
313 | break; | ||
314 | default: | ||
315 | ret = -ENXIO; | ||
316 | break; | ||
317 | } | ||
318 | |||
319 | return ret; | ||
320 | } | ||
321 | |||
322 | int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, | ||
323 | struct kvm_device_attr *attr) | ||
324 | { | ||
325 | int ret; | ||
326 | |||
327 | switch (attr->group) { | ||
328 | case KVM_ARM_VCPU_TIMER_CTRL: | ||
329 | ret = kvm_arm_timer_get_attr(vcpu, attr); | ||
330 | break; | ||
331 | default: | ||
332 | ret = -ENXIO; | ||
333 | break; | ||
334 | } | ||
335 | |||
336 | return ret; | ||
337 | } | ||
338 | |||
339 | int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, | ||
340 | struct kvm_device_attr *attr) | ||
341 | { | ||
342 | int ret; | ||
343 | |||
344 | switch (attr->group) { | ||
345 | case KVM_ARM_VCPU_TIMER_CTRL: | ||
346 | ret = kvm_arm_timer_has_attr(vcpu, attr); | ||
347 | break; | ||
348 | default: | ||
349 | ret = -ENXIO; | ||
350 | break; | ||
351 | } | ||
352 | |||
353 | return ret; | ||
354 | } | ||
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index f86a9aaef462..54442e375354 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c | |||
@@ -72,6 +72,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
72 | trace_kvm_wfx(*vcpu_pc(vcpu), false); | 72 | trace_kvm_wfx(*vcpu_pc(vcpu), false); |
73 | vcpu->stat.wfi_exit_stat++; | 73 | vcpu->stat.wfi_exit_stat++; |
74 | kvm_vcpu_block(vcpu); | 74 | kvm_vcpu_block(vcpu); |
75 | kvm_clear_request(KVM_REQ_UNHALT, vcpu); | ||
75 | } | 76 | } |
76 | 77 | ||
77 | kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); | 78 | kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); |
diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c index 624a510d31df..ebd2dd46adf7 100644 --- a/arch/arm/kvm/hyp/switch.c +++ b/arch/arm/kvm/hyp/switch.c | |||
@@ -237,8 +237,10 @@ void __hyp_text __noreturn __hyp_panic(int cause) | |||
237 | 237 | ||
238 | vcpu = (struct kvm_vcpu *)read_sysreg(HTPIDR); | 238 | vcpu = (struct kvm_vcpu *)read_sysreg(HTPIDR); |
239 | host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); | 239 | host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); |
240 | __timer_save_state(vcpu); | ||
240 | __deactivate_traps(vcpu); | 241 | __deactivate_traps(vcpu); |
241 | __deactivate_vm(vcpu); | 242 | __deactivate_vm(vcpu); |
243 | __banked_restore_state(host_ctxt); | ||
242 | __sysreg_restore_state(host_ctxt); | 244 | __sysreg_restore_state(host_ctxt); |
243 | } | 245 | } |
244 | 246 | ||
diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c index 1da8b2d14550..5ed0c3ee33d6 100644 --- a/arch/arm/kvm/reset.c +++ b/arch/arm/kvm/reset.c | |||
@@ -37,16 +37,6 @@ static struct kvm_regs cortexa_regs_reset = { | |||
37 | .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT, | 37 | .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT, |
38 | }; | 38 | }; |
39 | 39 | ||
40 | static const struct kvm_irq_level cortexa_ptimer_irq = { | ||
41 | { .irq = 30 }, | ||
42 | .level = 1, | ||
43 | }; | ||
44 | |||
45 | static const struct kvm_irq_level cortexa_vtimer_irq = { | ||
46 | { .irq = 27 }, | ||
47 | .level = 1, | ||
48 | }; | ||
49 | |||
50 | 40 | ||
51 | /******************************************************************************* | 41 | /******************************************************************************* |
52 | * Exported reset function | 42 | * Exported reset function |
@@ -62,16 +52,12 @@ static const struct kvm_irq_level cortexa_vtimer_irq = { | |||
62 | int kvm_reset_vcpu(struct kvm_vcpu *vcpu) | 52 | int kvm_reset_vcpu(struct kvm_vcpu *vcpu) |
63 | { | 53 | { |
64 | struct kvm_regs *reset_regs; | 54 | struct kvm_regs *reset_regs; |
65 | const struct kvm_irq_level *cpu_vtimer_irq; | ||
66 | const struct kvm_irq_level *cpu_ptimer_irq; | ||
67 | 55 | ||
68 | switch (vcpu->arch.target) { | 56 | switch (vcpu->arch.target) { |
69 | case KVM_ARM_TARGET_CORTEX_A7: | 57 | case KVM_ARM_TARGET_CORTEX_A7: |
70 | case KVM_ARM_TARGET_CORTEX_A15: | 58 | case KVM_ARM_TARGET_CORTEX_A15: |
71 | reset_regs = &cortexa_regs_reset; | 59 | reset_regs = &cortexa_regs_reset; |
72 | vcpu->arch.midr = read_cpuid_id(); | 60 | vcpu->arch.midr = read_cpuid_id(); |
73 | cpu_vtimer_irq = &cortexa_vtimer_irq; | ||
74 | cpu_ptimer_irq = &cortexa_ptimer_irq; | ||
75 | break; | 61 | break; |
76 | default: | 62 | default: |
77 | return -ENODEV; | 63 | return -ENODEV; |
@@ -84,5 +70,5 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) | |||
84 | kvm_reset_coprocs(vcpu); | 70 | kvm_reset_coprocs(vcpu); |
85 | 71 | ||
86 | /* Reset arch_timer context */ | 72 | /* Reset arch_timer context */ |
87 | return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq, cpu_ptimer_irq); | 73 | return kvm_timer_vcpu_reset(vcpu); |
88 | } | 74 | } |
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 9f7a934ff707..192208ea2842 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig | |||
@@ -488,6 +488,17 @@ config CAVIUM_ERRATUM_27456 | |||
488 | 488 | ||
489 | If unsure, say Y. | 489 | If unsure, say Y. |
490 | 490 | ||
491 | config CAVIUM_ERRATUM_30115 | ||
492 | bool "Cavium erratum 30115: Guest may disable interrupts in host" | ||
493 | default y | ||
494 | help | ||
495 | On ThunderX T88 pass 1.x through 2.2, T81 pass 1.0 through | ||
496 | 1.2, and T83 Pass 1.0, KVM guest execution may disable | ||
497 | interrupts in host. Trapping both GICv3 group-0 and group-1 | ||
498 | accesses sidesteps the issue. | ||
499 | |||
500 | If unsure, say Y. | ||
501 | |||
491 | config QCOM_FALKOR_ERRATUM_1003 | 502 | config QCOM_FALKOR_ERRATUM_1003 |
492 | bool "Falkor E1003: Incorrect translation due to ASID change" | 503 | bool "Falkor E1003: Incorrect translation due to ASID change" |
493 | default y | 504 | default y |
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 1a98bc8602a2..8cef47fa2218 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h | |||
@@ -89,7 +89,7 @@ static inline void gic_write_ctlr(u32 val) | |||
89 | 89 | ||
90 | static inline void gic_write_grpen1(u32 val) | 90 | static inline void gic_write_grpen1(u32 val) |
91 | { | 91 | { |
92 | write_sysreg_s(val, SYS_ICC_GRPEN1_EL1); | 92 | write_sysreg_s(val, SYS_ICC_IGRPEN1_EL1); |
93 | isb(); | 93 | isb(); |
94 | } | 94 | } |
95 | 95 | ||
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index b3aab8a17868..8d2272c6822c 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h | |||
@@ -38,7 +38,8 @@ | |||
38 | #define ARM64_WORKAROUND_REPEAT_TLBI 17 | 38 | #define ARM64_WORKAROUND_REPEAT_TLBI 17 |
39 | #define ARM64_WORKAROUND_QCOM_FALKOR_E1003 18 | 39 | #define ARM64_WORKAROUND_QCOM_FALKOR_E1003 18 |
40 | #define ARM64_WORKAROUND_858921 19 | 40 | #define ARM64_WORKAROUND_858921 19 |
41 | #define ARM64_WORKAROUND_CAVIUM_30115 20 | ||
41 | 42 | ||
42 | #define ARM64_NCAPS 20 | 43 | #define ARM64_NCAPS 21 |
43 | 44 | ||
44 | #endif /* __ASM_CPUCAPS_H */ | 45 | #endif /* __ASM_CPUCAPS_H */ |
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 0984d1b3a8f2..235e77d98261 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h | |||
@@ -86,6 +86,7 @@ | |||
86 | 86 | ||
87 | #define CAVIUM_CPU_PART_THUNDERX 0x0A1 | 87 | #define CAVIUM_CPU_PART_THUNDERX 0x0A1 |
88 | #define CAVIUM_CPU_PART_THUNDERX_81XX 0x0A2 | 88 | #define CAVIUM_CPU_PART_THUNDERX_81XX 0x0A2 |
89 | #define CAVIUM_CPU_PART_THUNDERX_83XX 0x0A3 | ||
89 | 90 | ||
90 | #define BRCM_CPU_PART_VULCAN 0x516 | 91 | #define BRCM_CPU_PART_VULCAN 0x516 |
91 | 92 | ||
@@ -96,6 +97,7 @@ | |||
96 | #define MIDR_CORTEX_A73 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A73) | 97 | #define MIDR_CORTEX_A73 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A73) |
97 | #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) | 98 | #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) |
98 | #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) | 99 | #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) |
100 | #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) | ||
99 | #define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1) | 101 | #define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1) |
100 | 102 | ||
101 | #ifndef __ASSEMBLY__ | 103 | #ifndef __ASSEMBLY__ |
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 28bf02efce76..8cabd57b6348 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h | |||
@@ -19,6 +19,7 @@ | |||
19 | #define __ASM_ESR_H | 19 | #define __ASM_ESR_H |
20 | 20 | ||
21 | #include <asm/memory.h> | 21 | #include <asm/memory.h> |
22 | #include <asm/sysreg.h> | ||
22 | 23 | ||
23 | #define ESR_ELx_EC_UNKNOWN (0x00) | 24 | #define ESR_ELx_EC_UNKNOWN (0x00) |
24 | #define ESR_ELx_EC_WFx (0x01) | 25 | #define ESR_ELx_EC_WFx (0x01) |
@@ -182,6 +183,29 @@ | |||
182 | #define ESR_ELx_SYS64_ISS_SYS_CNTFRQ (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 0, 14, 0) | \ | 183 | #define ESR_ELx_SYS64_ISS_SYS_CNTFRQ (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 0, 14, 0) | \ |
183 | ESR_ELx_SYS64_ISS_DIR_READ) | 184 | ESR_ELx_SYS64_ISS_DIR_READ) |
184 | 185 | ||
186 | #define esr_sys64_to_sysreg(e) \ | ||
187 | sys_reg((((e) & ESR_ELx_SYS64_ISS_OP0_MASK) >> \ | ||
188 | ESR_ELx_SYS64_ISS_OP0_SHIFT), \ | ||
189 | (((e) & ESR_ELx_SYS64_ISS_OP1_MASK) >> \ | ||
190 | ESR_ELx_SYS64_ISS_OP1_SHIFT), \ | ||
191 | (((e) & ESR_ELx_SYS64_ISS_CRN_MASK) >> \ | ||
192 | ESR_ELx_SYS64_ISS_CRN_SHIFT), \ | ||
193 | (((e) & ESR_ELx_SYS64_ISS_CRM_MASK) >> \ | ||
194 | ESR_ELx_SYS64_ISS_CRM_SHIFT), \ | ||
195 | (((e) & ESR_ELx_SYS64_ISS_OP2_MASK) >> \ | ||
196 | ESR_ELx_SYS64_ISS_OP2_SHIFT)) | ||
197 | |||
198 | #define esr_cp15_to_sysreg(e) \ | ||
199 | sys_reg(3, \ | ||
200 | (((e) & ESR_ELx_SYS64_ISS_OP1_MASK) >> \ | ||
201 | ESR_ELx_SYS64_ISS_OP1_SHIFT), \ | ||
202 | (((e) & ESR_ELx_SYS64_ISS_CRN_MASK) >> \ | ||
203 | ESR_ELx_SYS64_ISS_CRN_SHIFT), \ | ||
204 | (((e) & ESR_ELx_SYS64_ISS_CRM_MASK) >> \ | ||
205 | ESR_ELx_SYS64_ISS_CRM_SHIFT), \ | ||
206 | (((e) & ESR_ELx_SYS64_ISS_OP2_MASK) >> \ | ||
207 | ESR_ELx_SYS64_ISS_OP2_SHIFT)) | ||
208 | |||
185 | #ifndef __ASSEMBLY__ | 209 | #ifndef __ASSEMBLY__ |
186 | #include <asm/types.h> | 210 | #include <asm/types.h> |
187 | 211 | ||
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 1f252a95bc02..d68630007b14 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h | |||
@@ -42,7 +42,9 @@ | |||
42 | 42 | ||
43 | #define KVM_VCPU_MAX_FEATURES 4 | 43 | #define KVM_VCPU_MAX_FEATURES 4 |
44 | 44 | ||
45 | #define KVM_REQ_VCPU_EXIT (8 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) | 45 | #define KVM_REQ_SLEEP \ |
46 | KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) | ||
47 | #define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1) | ||
46 | 48 | ||
47 | int __attribute_const__ kvm_target_cpu(void); | 49 | int __attribute_const__ kvm_target_cpu(void); |
48 | int kvm_reset_vcpu(struct kvm_vcpu *vcpu); | 50 | int kvm_reset_vcpu(struct kvm_vcpu *vcpu); |
@@ -334,8 +336,6 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void); | |||
334 | struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); | 336 | struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); |
335 | void kvm_arm_halt_guest(struct kvm *kvm); | 337 | void kvm_arm_halt_guest(struct kvm *kvm); |
336 | void kvm_arm_resume_guest(struct kvm *kvm); | 338 | void kvm_arm_resume_guest(struct kvm *kvm); |
337 | void kvm_arm_halt_vcpu(struct kvm_vcpu *vcpu); | ||
338 | void kvm_arm_resume_vcpu(struct kvm_vcpu *vcpu); | ||
339 | 339 | ||
340 | u64 __kvm_call_hyp(void *hypfn, ...); | 340 | u64 __kvm_call_hyp(void *hypfn, ...); |
341 | #define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__) | 341 | #define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__) |
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index b18e852d27e8..4572a9b560fa 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h | |||
@@ -127,6 +127,7 @@ int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu); | |||
127 | 127 | ||
128 | void __vgic_v3_save_state(struct kvm_vcpu *vcpu); | 128 | void __vgic_v3_save_state(struct kvm_vcpu *vcpu); |
129 | void __vgic_v3_restore_state(struct kvm_vcpu *vcpu); | 129 | void __vgic_v3_restore_state(struct kvm_vcpu *vcpu); |
130 | int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu); | ||
130 | 131 | ||
131 | void __timer_save_state(struct kvm_vcpu *vcpu); | 132 | void __timer_save_state(struct kvm_vcpu *vcpu); |
132 | void __timer_restore_state(struct kvm_vcpu *vcpu); | 133 | void __timer_restore_state(struct kvm_vcpu *vcpu); |
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index b4d13d9267ff..16e44fa9b3b6 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h | |||
@@ -180,14 +180,31 @@ | |||
180 | 180 | ||
181 | #define SYS_VBAR_EL1 sys_reg(3, 0, 12, 0, 0) | 181 | #define SYS_VBAR_EL1 sys_reg(3, 0, 12, 0, 0) |
182 | 182 | ||
183 | #define SYS_ICC_IAR0_EL1 sys_reg(3, 0, 12, 8, 0) | ||
184 | #define SYS_ICC_EOIR0_EL1 sys_reg(3, 0, 12, 8, 1) | ||
185 | #define SYS_ICC_HPPIR0_EL1 sys_reg(3, 0, 12, 8, 2) | ||
186 | #define SYS_ICC_BPR0_EL1 sys_reg(3, 0, 12, 8, 3) | ||
187 | #define SYS_ICC_AP0Rn_EL1(n) sys_reg(3, 0, 12, 8, 4 | n) | ||
188 | #define SYS_ICC_AP0R0_EL1 SYS_ICC_AP0Rn_EL1(0) | ||
189 | #define SYS_ICC_AP0R1_EL1 SYS_ICC_AP0Rn_EL1(1) | ||
190 | #define SYS_ICC_AP0R2_EL1 SYS_ICC_AP0Rn_EL1(2) | ||
191 | #define SYS_ICC_AP0R3_EL1 SYS_ICC_AP0Rn_EL1(3) | ||
192 | #define SYS_ICC_AP1Rn_EL1(n) sys_reg(3, 0, 12, 9, n) | ||
193 | #define SYS_ICC_AP1R0_EL1 SYS_ICC_AP1Rn_EL1(0) | ||
194 | #define SYS_ICC_AP1R1_EL1 SYS_ICC_AP1Rn_EL1(1) | ||
195 | #define SYS_ICC_AP1R2_EL1 SYS_ICC_AP1Rn_EL1(2) | ||
196 | #define SYS_ICC_AP1R3_EL1 SYS_ICC_AP1Rn_EL1(3) | ||
183 | #define SYS_ICC_DIR_EL1 sys_reg(3, 0, 12, 11, 1) | 197 | #define SYS_ICC_DIR_EL1 sys_reg(3, 0, 12, 11, 1) |
198 | #define SYS_ICC_RPR_EL1 sys_reg(3, 0, 12, 11, 3) | ||
184 | #define SYS_ICC_SGI1R_EL1 sys_reg(3, 0, 12, 11, 5) | 199 | #define SYS_ICC_SGI1R_EL1 sys_reg(3, 0, 12, 11, 5) |
185 | #define SYS_ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0) | 200 | #define SYS_ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0) |
186 | #define SYS_ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1) | 201 | #define SYS_ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1) |
202 | #define SYS_ICC_HPPIR1_EL1 sys_reg(3, 0, 12, 12, 2) | ||
187 | #define SYS_ICC_BPR1_EL1 sys_reg(3, 0, 12, 12, 3) | 203 | #define SYS_ICC_BPR1_EL1 sys_reg(3, 0, 12, 12, 3) |
188 | #define SYS_ICC_CTLR_EL1 sys_reg(3, 0, 12, 12, 4) | 204 | #define SYS_ICC_CTLR_EL1 sys_reg(3, 0, 12, 12, 4) |
189 | #define SYS_ICC_SRE_EL1 sys_reg(3, 0, 12, 12, 5) | 205 | #define SYS_ICC_SRE_EL1 sys_reg(3, 0, 12, 12, 5) |
190 | #define SYS_ICC_GRPEN1_EL1 sys_reg(3, 0, 12, 12, 7) | 206 | #define SYS_ICC_IGRPEN0_EL1 sys_reg(3, 0, 12, 12, 6) |
207 | #define SYS_ICC_IGRPEN1_EL1 sys_reg(3, 0, 12, 12, 7) | ||
191 | 208 | ||
192 | #define SYS_CONTEXTIDR_EL1 sys_reg(3, 0, 13, 0, 1) | 209 | #define SYS_CONTEXTIDR_EL1 sys_reg(3, 0, 13, 0, 1) |
193 | #define SYS_TPIDR_EL1 sys_reg(3, 0, 13, 0, 4) | 210 | #define SYS_TPIDR_EL1 sys_reg(3, 0, 13, 0, 4) |
@@ -287,8 +304,8 @@ | |||
287 | #define SCTLR_ELx_M 1 | 304 | #define SCTLR_ELx_M 1 |
288 | 305 | ||
289 | #define SCTLR_EL2_RES1 ((1 << 4) | (1 << 5) | (1 << 11) | (1 << 16) | \ | 306 | #define SCTLR_EL2_RES1 ((1 << 4) | (1 << 5) | (1 << 11) | (1 << 16) | \ |
290 | (1 << 16) | (1 << 18) | (1 << 22) | (1 << 23) | \ | 307 | (1 << 18) | (1 << 22) | (1 << 23) | (1 << 28) | \ |
291 | (1 << 28) | (1 << 29)) | 308 | (1 << 29)) |
292 | 309 | ||
293 | #define SCTLR_ELx_FLAGS (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \ | 310 | #define SCTLR_ELx_FLAGS (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \ |
294 | SCTLR_ELx_SA | SCTLR_ELx_I) | 311 | SCTLR_ELx_SA | SCTLR_ELx_I) |
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 70eea2ecc663..9f3ca24bbcc6 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h | |||
@@ -232,6 +232,9 @@ struct kvm_arch_memory_slot { | |||
232 | #define KVM_ARM_VCPU_PMU_V3_CTRL 0 | 232 | #define KVM_ARM_VCPU_PMU_V3_CTRL 0 |
233 | #define KVM_ARM_VCPU_PMU_V3_IRQ 0 | 233 | #define KVM_ARM_VCPU_PMU_V3_IRQ 0 |
234 | #define KVM_ARM_VCPU_PMU_V3_INIT 1 | 234 | #define KVM_ARM_VCPU_PMU_V3_INIT 1 |
235 | #define KVM_ARM_VCPU_TIMER_CTRL 1 | ||
236 | #define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 | ||
237 | #define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 | ||
235 | 238 | ||
236 | /* KVM_IRQ_LINE irq field index values */ | 239 | /* KVM_IRQ_LINE irq field index values */ |
237 | #define KVM_ARM_IRQ_TYPE_SHIFT 24 | 240 | #define KVM_ARM_IRQ_TYPE_SHIFT 24 |
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 2ed2a7657711..0e27f86ee709 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c | |||
@@ -133,6 +133,27 @@ const struct arm64_cpu_capabilities arm64_errata[] = { | |||
133 | MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x00), | 133 | MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x00), |
134 | }, | 134 | }, |
135 | #endif | 135 | #endif |
136 | #ifdef CONFIG_CAVIUM_ERRATUM_30115 | ||
137 | { | ||
138 | /* Cavium ThunderX, T88 pass 1.x - 2.2 */ | ||
139 | .desc = "Cavium erratum 30115", | ||
140 | .capability = ARM64_WORKAROUND_CAVIUM_30115, | ||
141 | MIDR_RANGE(MIDR_THUNDERX, 0x00, | ||
142 | (1 << MIDR_VARIANT_SHIFT) | 2), | ||
143 | }, | ||
144 | { | ||
145 | /* Cavium ThunderX, T81 pass 1.0 - 1.2 */ | ||
146 | .desc = "Cavium erratum 30115", | ||
147 | .capability = ARM64_WORKAROUND_CAVIUM_30115, | ||
148 | MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x02), | ||
149 | }, | ||
150 | { | ||
151 | /* Cavium ThunderX, T83 pass 1.0 */ | ||
152 | .desc = "Cavium erratum 30115", | ||
153 | .capability = ARM64_WORKAROUND_CAVIUM_30115, | ||
154 | MIDR_RANGE(MIDR_THUNDERX_83XX, 0x00, 0x00), | ||
155 | }, | ||
156 | #endif | ||
136 | { | 157 | { |
137 | .desc = "Mismatched cache line size", | 158 | .desc = "Mismatched cache line size", |
138 | .capability = ARM64_MISMATCHED_CACHE_LINE_SIZE, | 159 | .capability = ARM64_MISMATCHED_CACHE_LINE_SIZE, |
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index b37446a8ffdb..5c7f657dd207 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c | |||
@@ -390,6 +390,9 @@ int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, | |||
390 | case KVM_ARM_VCPU_PMU_V3_CTRL: | 390 | case KVM_ARM_VCPU_PMU_V3_CTRL: |
391 | ret = kvm_arm_pmu_v3_set_attr(vcpu, attr); | 391 | ret = kvm_arm_pmu_v3_set_attr(vcpu, attr); |
392 | break; | 392 | break; |
393 | case KVM_ARM_VCPU_TIMER_CTRL: | ||
394 | ret = kvm_arm_timer_set_attr(vcpu, attr); | ||
395 | break; | ||
393 | default: | 396 | default: |
394 | ret = -ENXIO; | 397 | ret = -ENXIO; |
395 | break; | 398 | break; |
@@ -407,6 +410,9 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, | |||
407 | case KVM_ARM_VCPU_PMU_V3_CTRL: | 410 | case KVM_ARM_VCPU_PMU_V3_CTRL: |
408 | ret = kvm_arm_pmu_v3_get_attr(vcpu, attr); | 411 | ret = kvm_arm_pmu_v3_get_attr(vcpu, attr); |
409 | break; | 412 | break; |
413 | case KVM_ARM_VCPU_TIMER_CTRL: | ||
414 | ret = kvm_arm_timer_get_attr(vcpu, attr); | ||
415 | break; | ||
410 | default: | 416 | default: |
411 | ret = -ENXIO; | 417 | ret = -ENXIO; |
412 | break; | 418 | break; |
@@ -424,6 +430,9 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, | |||
424 | case KVM_ARM_VCPU_PMU_V3_CTRL: | 430 | case KVM_ARM_VCPU_PMU_V3_CTRL: |
425 | ret = kvm_arm_pmu_v3_has_attr(vcpu, attr); | 431 | ret = kvm_arm_pmu_v3_has_attr(vcpu, attr); |
426 | break; | 432 | break; |
433 | case KVM_ARM_VCPU_TIMER_CTRL: | ||
434 | ret = kvm_arm_timer_has_attr(vcpu, attr); | ||
435 | break; | ||
427 | default: | 436 | default: |
428 | ret = -ENXIO; | 437 | ret = -ENXIO; |
429 | break; | 438 | break; |
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index fa1b18e364fc..17d8a1677a0b 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c | |||
@@ -89,6 +89,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
89 | trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false); | 89 | trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false); |
90 | vcpu->stat.wfi_exit_stat++; | 90 | vcpu->stat.wfi_exit_stat++; |
91 | kvm_vcpu_block(vcpu); | 91 | kvm_vcpu_block(vcpu); |
92 | kvm_clear_request(KVM_REQ_UNHALT, vcpu); | ||
92 | } | 93 | } |
93 | 94 | ||
94 | kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); | 95 | kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); |
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index aede1658aeda..945e79c641c4 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c | |||
@@ -350,6 +350,20 @@ again: | |||
350 | } | 350 | } |
351 | } | 351 | } |
352 | 352 | ||
353 | if (static_branch_unlikely(&vgic_v3_cpuif_trap) && | ||
354 | exit_code == ARM_EXCEPTION_TRAP && | ||
355 | (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 || | ||
356 | kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) { | ||
357 | int ret = __vgic_v3_perform_cpuif_access(vcpu); | ||
358 | |||
359 | if (ret == 1) { | ||
360 | __skip_instr(vcpu); | ||
361 | goto again; | ||
362 | } | ||
363 | |||
364 | /* 0 falls through to be handled out of EL2 */ | ||
365 | } | ||
366 | |||
353 | fp_enabled = __fpsimd_enabled(); | 367 | fp_enabled = __fpsimd_enabled(); |
354 | 368 | ||
355 | __sysreg_save_guest_state(guest_ctxt); | 369 | __sysreg_save_guest_state(guest_ctxt); |
@@ -422,6 +436,7 @@ void __hyp_text __noreturn __hyp_panic(void) | |||
422 | 436 | ||
423 | vcpu = (struct kvm_vcpu *)read_sysreg(tpidr_el2); | 437 | vcpu = (struct kvm_vcpu *)read_sysreg(tpidr_el2); |
424 | host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); | 438 | host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); |
439 | __timer_save_state(vcpu); | ||
425 | __deactivate_traps(vcpu); | 440 | __deactivate_traps(vcpu); |
426 | __deactivate_vm(vcpu); | 441 | __deactivate_vm(vcpu); |
427 | __sysreg_restore_host_state(host_ctxt); | 442 | __sysreg_restore_host_state(host_ctxt); |
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 561badf93de8..3256b9228e75 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c | |||
@@ -46,16 +46,6 @@ static const struct kvm_regs default_regs_reset32 = { | |||
46 | COMPAT_PSR_I_BIT | COMPAT_PSR_F_BIT), | 46 | COMPAT_PSR_I_BIT | COMPAT_PSR_F_BIT), |
47 | }; | 47 | }; |
48 | 48 | ||
49 | static const struct kvm_irq_level default_ptimer_irq = { | ||
50 | .irq = 30, | ||
51 | .level = 1, | ||
52 | }; | ||
53 | |||
54 | static const struct kvm_irq_level default_vtimer_irq = { | ||
55 | .irq = 27, | ||
56 | .level = 1, | ||
57 | }; | ||
58 | |||
59 | static bool cpu_has_32bit_el1(void) | 49 | static bool cpu_has_32bit_el1(void) |
60 | { | 50 | { |
61 | u64 pfr0; | 51 | u64 pfr0; |
@@ -108,8 +98,6 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext) | |||
108 | */ | 98 | */ |
109 | int kvm_reset_vcpu(struct kvm_vcpu *vcpu) | 99 | int kvm_reset_vcpu(struct kvm_vcpu *vcpu) |
110 | { | 100 | { |
111 | const struct kvm_irq_level *cpu_vtimer_irq; | ||
112 | const struct kvm_irq_level *cpu_ptimer_irq; | ||
113 | const struct kvm_regs *cpu_reset; | 101 | const struct kvm_regs *cpu_reset; |
114 | 102 | ||
115 | switch (vcpu->arch.target) { | 103 | switch (vcpu->arch.target) { |
@@ -122,8 +110,6 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) | |||
122 | cpu_reset = &default_regs_reset; | 110 | cpu_reset = &default_regs_reset; |
123 | } | 111 | } |
124 | 112 | ||
125 | cpu_vtimer_irq = &default_vtimer_irq; | ||
126 | cpu_ptimer_irq = &default_ptimer_irq; | ||
127 | break; | 113 | break; |
128 | } | 114 | } |
129 | 115 | ||
@@ -137,5 +123,5 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) | |||
137 | kvm_pmu_vcpu_reset(vcpu); | 123 | kvm_pmu_vcpu_reset(vcpu); |
138 | 124 | ||
139 | /* Reset timer */ | 125 | /* Reset timer */ |
140 | return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq, cpu_ptimer_irq); | 126 | return kvm_timer_vcpu_reset(vcpu); |
141 | } | 127 | } |
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 0fe27024a2e1..77862881ae86 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c | |||
@@ -56,7 +56,8 @@ | |||
56 | */ | 56 | */ |
57 | 57 | ||
58 | static bool read_from_write_only(struct kvm_vcpu *vcpu, | 58 | static bool read_from_write_only(struct kvm_vcpu *vcpu, |
59 | const struct sys_reg_params *params) | 59 | struct sys_reg_params *params, |
60 | const struct sys_reg_desc *r) | ||
60 | { | 61 | { |
61 | WARN_ONCE(1, "Unexpected sys_reg read to write-only register\n"); | 62 | WARN_ONCE(1, "Unexpected sys_reg read to write-only register\n"); |
62 | print_sys_reg_instr(params); | 63 | print_sys_reg_instr(params); |
@@ -64,6 +65,16 @@ static bool read_from_write_only(struct kvm_vcpu *vcpu, | |||
64 | return false; | 65 | return false; |
65 | } | 66 | } |
66 | 67 | ||
68 | static bool write_to_read_only(struct kvm_vcpu *vcpu, | ||
69 | struct sys_reg_params *params, | ||
70 | const struct sys_reg_desc *r) | ||
71 | { | ||
72 | WARN_ONCE(1, "Unexpected sys_reg write to read-only register\n"); | ||
73 | print_sys_reg_instr(params); | ||
74 | kvm_inject_undefined(vcpu); | ||
75 | return false; | ||
76 | } | ||
77 | |||
67 | /* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */ | 78 | /* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */ |
68 | static u32 cache_levels; | 79 | static u32 cache_levels; |
69 | 80 | ||
@@ -93,7 +104,7 @@ static bool access_dcsw(struct kvm_vcpu *vcpu, | |||
93 | const struct sys_reg_desc *r) | 104 | const struct sys_reg_desc *r) |
94 | { | 105 | { |
95 | if (!p->is_write) | 106 | if (!p->is_write) |
96 | return read_from_write_only(vcpu, p); | 107 | return read_from_write_only(vcpu, p, r); |
97 | 108 | ||
98 | kvm_set_way_flush(vcpu); | 109 | kvm_set_way_flush(vcpu); |
99 | return true; | 110 | return true; |
@@ -135,7 +146,7 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu, | |||
135 | const struct sys_reg_desc *r) | 146 | const struct sys_reg_desc *r) |
136 | { | 147 | { |
137 | if (!p->is_write) | 148 | if (!p->is_write) |
138 | return read_from_write_only(vcpu, p); | 149 | return read_from_write_only(vcpu, p, r); |
139 | 150 | ||
140 | vgic_v3_dispatch_sgi(vcpu, p->regval); | 151 | vgic_v3_dispatch_sgi(vcpu, p->regval); |
141 | 152 | ||
@@ -773,7 +784,7 @@ static bool access_pmswinc(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | |||
773 | return trap_raz_wi(vcpu, p, r); | 784 | return trap_raz_wi(vcpu, p, r); |
774 | 785 | ||
775 | if (!p->is_write) | 786 | if (!p->is_write) |
776 | return read_from_write_only(vcpu, p); | 787 | return read_from_write_only(vcpu, p, r); |
777 | 788 | ||
778 | if (pmu_write_swinc_el0_disabled(vcpu)) | 789 | if (pmu_write_swinc_el0_disabled(vcpu)) |
779 | return false; | 790 | return false; |
@@ -953,7 +964,15 @@ static const struct sys_reg_desc sys_reg_descs[] = { | |||
953 | 964 | ||
954 | { SYS_DESC(SYS_VBAR_EL1), NULL, reset_val, VBAR_EL1, 0 }, | 965 | { SYS_DESC(SYS_VBAR_EL1), NULL, reset_val, VBAR_EL1, 0 }, |
955 | 966 | ||
967 | { SYS_DESC(SYS_ICC_IAR0_EL1), write_to_read_only }, | ||
968 | { SYS_DESC(SYS_ICC_EOIR0_EL1), read_from_write_only }, | ||
969 | { SYS_DESC(SYS_ICC_HPPIR0_EL1), write_to_read_only }, | ||
970 | { SYS_DESC(SYS_ICC_DIR_EL1), read_from_write_only }, | ||
971 | { SYS_DESC(SYS_ICC_RPR_EL1), write_to_read_only }, | ||
956 | { SYS_DESC(SYS_ICC_SGI1R_EL1), access_gic_sgi }, | 972 | { SYS_DESC(SYS_ICC_SGI1R_EL1), access_gic_sgi }, |
973 | { SYS_DESC(SYS_ICC_IAR1_EL1), write_to_read_only }, | ||
974 | { SYS_DESC(SYS_ICC_EOIR1_EL1), read_from_write_only }, | ||
975 | { SYS_DESC(SYS_ICC_HPPIR1_EL1), write_to_read_only }, | ||
957 | { SYS_DESC(SYS_ICC_SRE_EL1), access_gic_sre }, | 976 | { SYS_DESC(SYS_ICC_SRE_EL1), access_gic_sre }, |
958 | 977 | ||
959 | { SYS_DESC(SYS_CONTEXTIDR_EL1), access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 }, | 978 | { SYS_DESC(SYS_CONTEXTIDR_EL1), access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 }, |
diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c index 6260b69e5622..116786d2e8e8 100644 --- a/arch/arm64/kvm/vgic-sys-reg-v3.c +++ b/arch/arm64/kvm/vgic-sys-reg-v3.c | |||
@@ -268,36 +268,21 @@ static bool access_gic_sre(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | |||
268 | return true; | 268 | return true; |
269 | } | 269 | } |
270 | static const struct sys_reg_desc gic_v3_icc_reg_descs[] = { | 270 | static const struct sys_reg_desc gic_v3_icc_reg_descs[] = { |
271 | /* ICC_PMR_EL1 */ | 271 | { SYS_DESC(SYS_ICC_PMR_EL1), access_gic_pmr }, |
272 | { Op0(3), Op1(0), CRn(4), CRm(6), Op2(0), access_gic_pmr }, | 272 | { SYS_DESC(SYS_ICC_BPR0_EL1), access_gic_bpr0 }, |
273 | /* ICC_BPR0_EL1 */ | 273 | { SYS_DESC(SYS_ICC_AP0R0_EL1), access_gic_ap0r }, |
274 | { Op0(3), Op1(0), CRn(12), CRm(8), Op2(3), access_gic_bpr0 }, | 274 | { SYS_DESC(SYS_ICC_AP0R1_EL1), access_gic_ap0r }, |
275 | /* ICC_AP0R0_EL1 */ | 275 | { SYS_DESC(SYS_ICC_AP0R2_EL1), access_gic_ap0r }, |
276 | { Op0(3), Op1(0), CRn(12), CRm(8), Op2(4), access_gic_ap0r }, | 276 | { SYS_DESC(SYS_ICC_AP0R3_EL1), access_gic_ap0r }, |
277 | /* ICC_AP0R1_EL1 */ | 277 | { SYS_DESC(SYS_ICC_AP1R0_EL1), access_gic_ap1r }, |
278 | { Op0(3), Op1(0), CRn(12), CRm(8), Op2(5), access_gic_ap0r }, | 278 | { SYS_DESC(SYS_ICC_AP1R1_EL1), access_gic_ap1r }, |
279 | /* ICC_AP0R2_EL1 */ | 279 | { SYS_DESC(SYS_ICC_AP1R2_EL1), access_gic_ap1r }, |
280 | { Op0(3), Op1(0), CRn(12), CRm(8), Op2(6), access_gic_ap0r }, | 280 | { SYS_DESC(SYS_ICC_AP1R3_EL1), access_gic_ap1r }, |
281 | /* ICC_AP0R3_EL1 */ | 281 | { SYS_DESC(SYS_ICC_BPR1_EL1), access_gic_bpr1 }, |
282 | { Op0(3), Op1(0), CRn(12), CRm(8), Op2(7), access_gic_ap0r }, | 282 | { SYS_DESC(SYS_ICC_CTLR_EL1), access_gic_ctlr }, |
283 | /* ICC_AP1R0_EL1 */ | 283 | { SYS_DESC(SYS_ICC_SRE_EL1), access_gic_sre }, |
284 | { Op0(3), Op1(0), CRn(12), CRm(9), Op2(0), access_gic_ap1r }, | 284 | { SYS_DESC(SYS_ICC_IGRPEN0_EL1), access_gic_grpen0 }, |
285 | /* ICC_AP1R1_EL1 */ | 285 | { SYS_DESC(SYS_ICC_IGRPEN1_EL1), access_gic_grpen1 }, |
286 | { Op0(3), Op1(0), CRn(12), CRm(9), Op2(1), access_gic_ap1r }, | ||
287 | /* ICC_AP1R2_EL1 */ | ||
288 | { Op0(3), Op1(0), CRn(12), CRm(9), Op2(2), access_gic_ap1r }, | ||
289 | /* ICC_AP1R3_EL1 */ | ||
290 | { Op0(3), Op1(0), CRn(12), CRm(9), Op2(3), access_gic_ap1r }, | ||
291 | /* ICC_BPR1_EL1 */ | ||
292 | { Op0(3), Op1(0), CRn(12), CRm(12), Op2(3), access_gic_bpr1 }, | ||
293 | /* ICC_CTLR_EL1 */ | ||
294 | { Op0(3), Op1(0), CRn(12), CRm(12), Op2(4), access_gic_ctlr }, | ||
295 | /* ICC_SRE_EL1 */ | ||
296 | { Op0(3), Op1(0), CRn(12), CRm(12), Op2(5), access_gic_sre }, | ||
297 | /* ICC_IGRPEN0_EL1 */ | ||
298 | { Op0(3), Op1(0), CRn(12), CRm(12), Op2(6), access_gic_grpen0 }, | ||
299 | /* ICC_GRPEN1_EL1 */ | ||
300 | { Op0(3), Op1(0), CRn(12), CRm(12), Op2(7), access_gic_grpen1 }, | ||
301 | }; | 286 | }; |
302 | 287 | ||
303 | int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id, | 288 | int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id, |
diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c index a563759fd142..6a0d7040d882 100644 --- a/arch/mips/kvm/trap_emul.c +++ b/arch/mips/kvm/trap_emul.c | |||
@@ -1094,7 +1094,7 @@ static void kvm_trap_emul_check_requests(struct kvm_vcpu *vcpu, int cpu, | |||
1094 | struct mm_struct *mm; | 1094 | struct mm_struct *mm; |
1095 | int i; | 1095 | int i; |
1096 | 1096 | ||
1097 | if (likely(!vcpu->requests)) | 1097 | if (likely(!kvm_request_pending(vcpu))) |
1098 | return; | 1098 | return; |
1099 | 1099 | ||
1100 | if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { | 1100 | if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { |
diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c index 71d8856ade64..74805035edc8 100644 --- a/arch/mips/kvm/vz.c +++ b/arch/mips/kvm/vz.c | |||
@@ -2337,7 +2337,7 @@ static int kvm_vz_check_requests(struct kvm_vcpu *vcpu, int cpu) | |||
2337 | int ret = 0; | 2337 | int ret = 0; |
2338 | int i; | 2338 | int i; |
2339 | 2339 | ||
2340 | if (!vcpu->requests) | 2340 | if (!kvm_request_pending(vcpu)) |
2341 | return 0; | 2341 | return 0; |
2342 | 2342 | ||
2343 | if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { | 2343 | if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { |
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 2bf35017ffc0..b8d5b8e35244 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h | |||
@@ -86,7 +86,6 @@ struct kvmppc_vcore { | |||
86 | u16 last_cpu; | 86 | u16 last_cpu; |
87 | u8 vcore_state; | 87 | u8 vcore_state; |
88 | u8 in_guest; | 88 | u8 in_guest; |
89 | struct kvmppc_vcore *master_vcore; | ||
90 | struct kvm_vcpu *runnable_threads[MAX_SMT_THREADS]; | 89 | struct kvm_vcpu *runnable_threads[MAX_SMT_THREADS]; |
91 | struct list_head preempt_list; | 90 | struct list_head preempt_list; |
92 | spinlock_t lock; | 91 | spinlock_t lock; |
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index b148496ffe36..7cea76f11c26 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h | |||
@@ -81,7 +81,7 @@ struct kvm_split_mode { | |||
81 | u8 subcore_size; | 81 | u8 subcore_size; |
82 | u8 do_nap; | 82 | u8 do_nap; |
83 | u8 napped[MAX_SMT_THREADS]; | 83 | u8 napped[MAX_SMT_THREADS]; |
84 | struct kvmppc_vcore *master_vcs[MAX_SUBCORES]; | 84 | struct kvmppc_vcore *vc[MAX_SUBCORES]; |
85 | }; | 85 | }; |
86 | 86 | ||
87 | /* | 87 | /* |
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 9c51ac4b8f36..8b3f1238d07f 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h | |||
@@ -35,6 +35,7 @@ | |||
35 | #include <asm/page.h> | 35 | #include <asm/page.h> |
36 | #include <asm/cacheflush.h> | 36 | #include <asm/cacheflush.h> |
37 | #include <asm/hvcall.h> | 37 | #include <asm/hvcall.h> |
38 | #include <asm/mce.h> | ||
38 | 39 | ||
39 | #define KVM_MAX_VCPUS NR_CPUS | 40 | #define KVM_MAX_VCPUS NR_CPUS |
40 | #define KVM_MAX_VCORES NR_CPUS | 41 | #define KVM_MAX_VCORES NR_CPUS |
@@ -52,8 +53,8 @@ | |||
52 | #define KVM_IRQCHIP_NUM_PINS 256 | 53 | #define KVM_IRQCHIP_NUM_PINS 256 |
53 | 54 | ||
54 | /* PPC-specific vcpu->requests bit members */ | 55 | /* PPC-specific vcpu->requests bit members */ |
55 | #define KVM_REQ_WATCHDOG 8 | 56 | #define KVM_REQ_WATCHDOG KVM_ARCH_REQ(0) |
56 | #define KVM_REQ_EPR_EXIT 9 | 57 | #define KVM_REQ_EPR_EXIT KVM_ARCH_REQ(1) |
57 | 58 | ||
58 | #include <linux/mmu_notifier.h> | 59 | #include <linux/mmu_notifier.h> |
59 | 60 | ||
@@ -267,6 +268,8 @@ struct kvm_resize_hpt; | |||
267 | 268 | ||
268 | struct kvm_arch { | 269 | struct kvm_arch { |
269 | unsigned int lpid; | 270 | unsigned int lpid; |
271 | unsigned int smt_mode; /* # vcpus per virtual core */ | ||
272 | unsigned int emul_smt_mode; /* emualted SMT mode, on P9 */ | ||
270 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE | 273 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE |
271 | unsigned int tlb_sets; | 274 | unsigned int tlb_sets; |
272 | struct kvm_hpt_info hpt; | 275 | struct kvm_hpt_info hpt; |
@@ -285,6 +288,7 @@ struct kvm_arch { | |||
285 | cpumask_t need_tlb_flush; | 288 | cpumask_t need_tlb_flush; |
286 | cpumask_t cpu_in_guest; | 289 | cpumask_t cpu_in_guest; |
287 | u8 radix; | 290 | u8 radix; |
291 | u8 fwnmi_enabled; | ||
288 | pgd_t *pgtable; | 292 | pgd_t *pgtable; |
289 | u64 process_table; | 293 | u64 process_table; |
290 | struct dentry *debugfs_dir; | 294 | struct dentry *debugfs_dir; |
@@ -566,6 +570,7 @@ struct kvm_vcpu_arch { | |||
566 | ulong wort; | 570 | ulong wort; |
567 | ulong tid; | 571 | ulong tid; |
568 | ulong psscr; | 572 | ulong psscr; |
573 | ulong hfscr; | ||
569 | ulong shadow_srr1; | 574 | ulong shadow_srr1; |
570 | #endif | 575 | #endif |
571 | u32 vrsave; /* also USPRG0 */ | 576 | u32 vrsave; /* also USPRG0 */ |
@@ -579,7 +584,7 @@ struct kvm_vcpu_arch { | |||
579 | ulong mcsrr0; | 584 | ulong mcsrr0; |
580 | ulong mcsrr1; | 585 | ulong mcsrr1; |
581 | ulong mcsr; | 586 | ulong mcsr; |
582 | u32 dec; | 587 | ulong dec; |
583 | #ifdef CONFIG_BOOKE | 588 | #ifdef CONFIG_BOOKE |
584 | u32 decar; | 589 | u32 decar; |
585 | #endif | 590 | #endif |
@@ -710,6 +715,7 @@ struct kvm_vcpu_arch { | |||
710 | unsigned long pending_exceptions; | 715 | unsigned long pending_exceptions; |
711 | u8 ceded; | 716 | u8 ceded; |
712 | u8 prodded; | 717 | u8 prodded; |
718 | u8 doorbell_request; | ||
713 | u32 last_inst; | 719 | u32 last_inst; |
714 | 720 | ||
715 | struct swait_queue_head *wqp; | 721 | struct swait_queue_head *wqp; |
@@ -722,6 +728,7 @@ struct kvm_vcpu_arch { | |||
722 | int prev_cpu; | 728 | int prev_cpu; |
723 | bool timer_running; | 729 | bool timer_running; |
724 | wait_queue_head_t cpu_run; | 730 | wait_queue_head_t cpu_run; |
731 | struct machine_check_event mce_evt; /* Valid if trap == 0x200 */ | ||
725 | 732 | ||
726 | struct kvm_vcpu_arch_shared *shared; | 733 | struct kvm_vcpu_arch_shared *shared; |
727 | #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE) | 734 | #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE) |
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index e0d88c38602b..ba5fadd6f3c9 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h | |||
@@ -315,6 +315,8 @@ struct kvmppc_ops { | |||
315 | struct irq_bypass_producer *); | 315 | struct irq_bypass_producer *); |
316 | int (*configure_mmu)(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg); | 316 | int (*configure_mmu)(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg); |
317 | int (*get_rmmu_info)(struct kvm *kvm, struct kvm_ppc_rmmu_info *info); | 317 | int (*get_rmmu_info)(struct kvm *kvm, struct kvm_ppc_rmmu_info *info); |
318 | int (*set_smt_mode)(struct kvm *kvm, unsigned long mode, | ||
319 | unsigned long flags); | ||
318 | }; | 320 | }; |
319 | 321 | ||
320 | extern struct kvmppc_ops *kvmppc_hv_ops; | 322 | extern struct kvmppc_ops *kvmppc_hv_ops; |
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 3a8d278e7421..1a9b45198c06 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h | |||
@@ -103,6 +103,8 @@ | |||
103 | #define OP_31_XOP_STBUX 247 | 103 | #define OP_31_XOP_STBUX 247 |
104 | #define OP_31_XOP_LHZX 279 | 104 | #define OP_31_XOP_LHZX 279 |
105 | #define OP_31_XOP_LHZUX 311 | 105 | #define OP_31_XOP_LHZUX 311 |
106 | #define OP_31_XOP_MSGSNDP 142 | ||
107 | #define OP_31_XOP_MSGCLRP 174 | ||
106 | #define OP_31_XOP_MFSPR 339 | 108 | #define OP_31_XOP_MFSPR 339 |
107 | #define OP_31_XOP_LWAX 341 | 109 | #define OP_31_XOP_LWAX 341 |
108 | #define OP_31_XOP_LHAX 343 | 110 | #define OP_31_XOP_LHAX 343 |
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 07fbeb927834..8cf8f0c96906 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h | |||
@@ -60,6 +60,12 @@ struct kvm_regs { | |||
60 | 60 | ||
61 | #define KVM_SREGS_E_FSL_PIDn (1 << 0) /* PID1/PID2 */ | 61 | #define KVM_SREGS_E_FSL_PIDn (1 << 0) /* PID1/PID2 */ |
62 | 62 | ||
63 | /* flags for kvm_run.flags */ | ||
64 | #define KVM_RUN_PPC_NMI_DISP_MASK (3 << 0) | ||
65 | #define KVM_RUN_PPC_NMI_DISP_FULLY_RECOV (1 << 0) | ||
66 | #define KVM_RUN_PPC_NMI_DISP_LIMITED_RECOV (2 << 0) | ||
67 | #define KVM_RUN_PPC_NMI_DISP_NOT_RECOV (3 << 0) | ||
68 | |||
63 | /* | 69 | /* |
64 | * Feature bits indicate which sections of the sregs struct are valid, | 70 | * Feature bits indicate which sections of the sregs struct are valid, |
65 | * both in KVM_GET_SREGS and KVM_SET_SREGS. On KVM_SET_SREGS, registers | 71 | * both in KVM_GET_SREGS and KVM_SET_SREGS. On KVM_SET_SREGS, registers |
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 709e23425317..ae8e89e0d083 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c | |||
@@ -485,6 +485,7 @@ int main(void) | |||
485 | OFFSET(KVM_ENABLED_HCALLS, kvm, arch.enabled_hcalls); | 485 | OFFSET(KVM_ENABLED_HCALLS, kvm, arch.enabled_hcalls); |
486 | OFFSET(KVM_VRMA_SLB_V, kvm, arch.vrma_slb_v); | 486 | OFFSET(KVM_VRMA_SLB_V, kvm, arch.vrma_slb_v); |
487 | OFFSET(KVM_RADIX, kvm, arch.radix); | 487 | OFFSET(KVM_RADIX, kvm, arch.radix); |
488 | OFFSET(KVM_FWNMI, kvm, arch.fwnmi_enabled); | ||
488 | OFFSET(VCPU_DSISR, kvm_vcpu, arch.shregs.dsisr); | 489 | OFFSET(VCPU_DSISR, kvm_vcpu, arch.shregs.dsisr); |
489 | OFFSET(VCPU_DAR, kvm_vcpu, arch.shregs.dar); | 490 | OFFSET(VCPU_DAR, kvm_vcpu, arch.shregs.dar); |
490 | OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr); | 491 | OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr); |
@@ -513,6 +514,7 @@ int main(void) | |||
513 | OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions); | 514 | OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions); |
514 | OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded); | 515 | OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded); |
515 | OFFSET(VCPU_PRODDED, kvm_vcpu, arch.prodded); | 516 | OFFSET(VCPU_PRODDED, kvm_vcpu, arch.prodded); |
517 | OFFSET(VCPU_DBELL_REQ, kvm_vcpu, arch.doorbell_request); | ||
516 | OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr); | 518 | OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr); |
517 | OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc); | 519 | OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc); |
518 | OFFSET(VCPU_SPMC, kvm_vcpu, arch.spmc); | 520 | OFFSET(VCPU_SPMC, kvm_vcpu, arch.spmc); |
@@ -542,6 +544,7 @@ int main(void) | |||
542 | OFFSET(VCPU_WORT, kvm_vcpu, arch.wort); | 544 | OFFSET(VCPU_WORT, kvm_vcpu, arch.wort); |
543 | OFFSET(VCPU_TID, kvm_vcpu, arch.tid); | 545 | OFFSET(VCPU_TID, kvm_vcpu, arch.tid); |
544 | OFFSET(VCPU_PSSCR, kvm_vcpu, arch.psscr); | 546 | OFFSET(VCPU_PSSCR, kvm_vcpu, arch.psscr); |
547 | OFFSET(VCPU_HFSCR, kvm_vcpu, arch.hfscr); | ||
545 | OFFSET(VCORE_ENTRY_EXIT, kvmppc_vcore, entry_exit_map); | 548 | OFFSET(VCORE_ENTRY_EXIT, kvmppc_vcore, entry_exit_map); |
546 | OFFSET(VCORE_IN_GUEST, kvmppc_vcore, in_guest); | 549 | OFFSET(VCORE_IN_GUEST, kvmppc_vcore, in_guest); |
547 | OFFSET(VCORE_NAPPING_THREADS, kvmppc_vcore, napping_threads); | 550 | OFFSET(VCORE_NAPPING_THREADS, kvmppc_vcore, napping_threads); |
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 5f9eada3519b..a9bfa49f3698 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c | |||
@@ -405,6 +405,7 @@ void machine_check_print_event_info(struct machine_check_event *evt, | |||
405 | break; | 405 | break; |
406 | } | 406 | } |
407 | } | 407 | } |
408 | EXPORT_SYMBOL_GPL(machine_check_print_event_info); | ||
408 | 409 | ||
409 | uint64_t get_mce_fault_addr(struct machine_check_event *evt) | 410 | uint64_t get_mce_fault_addr(struct machine_check_event *evt) |
410 | { | 411 | { |
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 773b35d16a0b..0b436df746fc 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c | |||
@@ -46,6 +46,8 @@ | |||
46 | #include <linux/of.h> | 46 | #include <linux/of.h> |
47 | 47 | ||
48 | #include <asm/reg.h> | 48 | #include <asm/reg.h> |
49 | #include <asm/ppc-opcode.h> | ||
50 | #include <asm/disassemble.h> | ||
49 | #include <asm/cputable.h> | 51 | #include <asm/cputable.h> |
50 | #include <asm/cacheflush.h> | 52 | #include <asm/cacheflush.h> |
51 | #include <asm/tlbflush.h> | 53 | #include <asm/tlbflush.h> |
@@ -645,6 +647,7 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, | |||
645 | unsigned long stolen; | 647 | unsigned long stolen; |
646 | unsigned long core_stolen; | 648 | unsigned long core_stolen; |
647 | u64 now; | 649 | u64 now; |
650 | unsigned long flags; | ||
648 | 651 | ||
649 | dt = vcpu->arch.dtl_ptr; | 652 | dt = vcpu->arch.dtl_ptr; |
650 | vpa = vcpu->arch.vpa.pinned_addr; | 653 | vpa = vcpu->arch.vpa.pinned_addr; |
@@ -652,10 +655,10 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, | |||
652 | core_stolen = vcore_stolen_time(vc, now); | 655 | core_stolen = vcore_stolen_time(vc, now); |
653 | stolen = core_stolen - vcpu->arch.stolen_logged; | 656 | stolen = core_stolen - vcpu->arch.stolen_logged; |
654 | vcpu->arch.stolen_logged = core_stolen; | 657 | vcpu->arch.stolen_logged = core_stolen; |
655 | spin_lock_irq(&vcpu->arch.tbacct_lock); | 658 | spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); |
656 | stolen += vcpu->arch.busy_stolen; | 659 | stolen += vcpu->arch.busy_stolen; |
657 | vcpu->arch.busy_stolen = 0; | 660 | vcpu->arch.busy_stolen = 0; |
658 | spin_unlock_irq(&vcpu->arch.tbacct_lock); | 661 | spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags); |
659 | if (!dt || !vpa) | 662 | if (!dt || !vpa) |
660 | return; | 663 | return; |
661 | memset(dt, 0, sizeof(struct dtl_entry)); | 664 | memset(dt, 0, sizeof(struct dtl_entry)); |
@@ -675,6 +678,26 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, | |||
675 | vcpu->arch.dtl.dirty = true; | 678 | vcpu->arch.dtl.dirty = true; |
676 | } | 679 | } |
677 | 680 | ||
681 | /* See if there is a doorbell interrupt pending for a vcpu */ | ||
682 | static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu) | ||
683 | { | ||
684 | int thr; | ||
685 | struct kvmppc_vcore *vc; | ||
686 | |||
687 | if (vcpu->arch.doorbell_request) | ||
688 | return true; | ||
689 | /* | ||
690 | * Ensure that the read of vcore->dpdes comes after the read | ||
691 | * of vcpu->doorbell_request. This barrier matches the | ||
692 | * lwsync in book3s_hv_rmhandlers.S just before the | ||
693 | * fast_guest_return label. | ||
694 | */ | ||
695 | smp_rmb(); | ||
696 | vc = vcpu->arch.vcore; | ||
697 | thr = vcpu->vcpu_id - vc->first_vcpuid; | ||
698 | return !!(vc->dpdes & (1 << thr)); | ||
699 | } | ||
700 | |||
678 | static bool kvmppc_power8_compatible(struct kvm_vcpu *vcpu) | 701 | static bool kvmppc_power8_compatible(struct kvm_vcpu *vcpu) |
679 | { | 702 | { |
680 | if (vcpu->arch.vcore->arch_compat >= PVR_ARCH_207) | 703 | if (vcpu->arch.vcore->arch_compat >= PVR_ARCH_207) |
@@ -926,6 +949,101 @@ static int kvmppc_emulate_debug_inst(struct kvm_run *run, | |||
926 | } | 949 | } |
927 | } | 950 | } |
928 | 951 | ||
952 | static void do_nothing(void *x) | ||
953 | { | ||
954 | } | ||
955 | |||
956 | static unsigned long kvmppc_read_dpdes(struct kvm_vcpu *vcpu) | ||
957 | { | ||
958 | int thr, cpu, pcpu, nthreads; | ||
959 | struct kvm_vcpu *v; | ||
960 | unsigned long dpdes; | ||
961 | |||
962 | nthreads = vcpu->kvm->arch.emul_smt_mode; | ||
963 | dpdes = 0; | ||
964 | cpu = vcpu->vcpu_id & ~(nthreads - 1); | ||
965 | for (thr = 0; thr < nthreads; ++thr, ++cpu) { | ||
966 | v = kvmppc_find_vcpu(vcpu->kvm, cpu); | ||
967 | if (!v) | ||
968 | continue; | ||
969 | /* | ||
970 | * If the vcpu is currently running on a physical cpu thread, | ||
971 | * interrupt it in order to pull it out of the guest briefly, | ||
972 | * which will update its vcore->dpdes value. | ||
973 | */ | ||
974 | pcpu = READ_ONCE(v->cpu); | ||
975 | if (pcpu >= 0) | ||
976 | smp_call_function_single(pcpu, do_nothing, NULL, 1); | ||
977 | if (kvmppc_doorbell_pending(v)) | ||
978 | dpdes |= 1 << thr; | ||
979 | } | ||
980 | return dpdes; | ||
981 | } | ||
982 | |||
983 | /* | ||
984 | * On POWER9, emulate doorbell-related instructions in order to | ||
985 | * give the guest the illusion of running on a multi-threaded core. | ||
986 | * The instructions emulated are msgsndp, msgclrp, mfspr TIR, | ||
987 | * and mfspr DPDES. | ||
988 | */ | ||
989 | static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu) | ||
990 | { | ||
991 | u32 inst, rb, thr; | ||
992 | unsigned long arg; | ||
993 | struct kvm *kvm = vcpu->kvm; | ||
994 | struct kvm_vcpu *tvcpu; | ||
995 | |||
996 | if (!cpu_has_feature(CPU_FTR_ARCH_300)) | ||
997 | return EMULATE_FAIL; | ||
998 | if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst) != EMULATE_DONE) | ||
999 | return RESUME_GUEST; | ||
1000 | if (get_op(inst) != 31) | ||
1001 | return EMULATE_FAIL; | ||
1002 | rb = get_rb(inst); | ||
1003 | thr = vcpu->vcpu_id & (kvm->arch.emul_smt_mode - 1); | ||
1004 | switch (get_xop(inst)) { | ||
1005 | case OP_31_XOP_MSGSNDP: | ||
1006 | arg = kvmppc_get_gpr(vcpu, rb); | ||
1007 | if (((arg >> 27) & 0xf) != PPC_DBELL_SERVER) | ||
1008 | break; | ||
1009 | arg &= 0x3f; | ||
1010 | if (arg >= kvm->arch.emul_smt_mode) | ||
1011 | break; | ||
1012 | tvcpu = kvmppc_find_vcpu(kvm, vcpu->vcpu_id - thr + arg); | ||
1013 | if (!tvcpu) | ||
1014 | break; | ||
1015 | if (!tvcpu->arch.doorbell_request) { | ||
1016 | tvcpu->arch.doorbell_request = 1; | ||
1017 | kvmppc_fast_vcpu_kick_hv(tvcpu); | ||
1018 | } | ||
1019 | break; | ||
1020 | case OP_31_XOP_MSGCLRP: | ||
1021 | arg = kvmppc_get_gpr(vcpu, rb); | ||
1022 | if (((arg >> 27) & 0xf) != PPC_DBELL_SERVER) | ||
1023 | break; | ||
1024 | vcpu->arch.vcore->dpdes = 0; | ||
1025 | vcpu->arch.doorbell_request = 0; | ||
1026 | break; | ||
1027 | case OP_31_XOP_MFSPR: | ||
1028 | switch (get_sprn(inst)) { | ||
1029 | case SPRN_TIR: | ||
1030 | arg = thr; | ||
1031 | break; | ||
1032 | case SPRN_DPDES: | ||
1033 | arg = kvmppc_read_dpdes(vcpu); | ||
1034 | break; | ||
1035 | default: | ||
1036 | return EMULATE_FAIL; | ||
1037 | } | ||
1038 | kvmppc_set_gpr(vcpu, get_rt(inst), arg); | ||
1039 | break; | ||
1040 | default: | ||
1041 | return EMULATE_FAIL; | ||
1042 | } | ||
1043 | kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4); | ||
1044 | return RESUME_GUEST; | ||
1045 | } | ||
1046 | |||
929 | static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, | 1047 | static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, |
930 | struct task_struct *tsk) | 1048 | struct task_struct *tsk) |
931 | { | 1049 | { |
@@ -971,15 +1089,20 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
971 | r = RESUME_GUEST; | 1089 | r = RESUME_GUEST; |
972 | break; | 1090 | break; |
973 | case BOOK3S_INTERRUPT_MACHINE_CHECK: | 1091 | case BOOK3S_INTERRUPT_MACHINE_CHECK: |
974 | /* | 1092 | /* Exit to guest with KVM_EXIT_NMI as exit reason */ |
975 | * Deliver a machine check interrupt to the guest. | 1093 | run->exit_reason = KVM_EXIT_NMI; |
976 | * We have to do this, even if the host has handled the | 1094 | run->hw.hardware_exit_reason = vcpu->arch.trap; |
977 | * machine check, because machine checks use SRR0/1 and | 1095 | /* Clear out the old NMI status from run->flags */ |
978 | * the interrupt might have trashed guest state in them. | 1096 | run->flags &= ~KVM_RUN_PPC_NMI_DISP_MASK; |
979 | */ | 1097 | /* Now set the NMI status */ |
980 | kvmppc_book3s_queue_irqprio(vcpu, | 1098 | if (vcpu->arch.mce_evt.disposition == MCE_DISPOSITION_RECOVERED) |
981 | BOOK3S_INTERRUPT_MACHINE_CHECK); | 1099 | run->flags |= KVM_RUN_PPC_NMI_DISP_FULLY_RECOV; |
982 | r = RESUME_GUEST; | 1100 | else |
1101 | run->flags |= KVM_RUN_PPC_NMI_DISP_NOT_RECOV; | ||
1102 | |||
1103 | r = RESUME_HOST; | ||
1104 | /* Print the MCE event to host console. */ | ||
1105 | machine_check_print_event_info(&vcpu->arch.mce_evt, false); | ||
983 | break; | 1106 | break; |
984 | case BOOK3S_INTERRUPT_PROGRAM: | 1107 | case BOOK3S_INTERRUPT_PROGRAM: |
985 | { | 1108 | { |
@@ -1048,12 +1171,19 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
1048 | break; | 1171 | break; |
1049 | /* | 1172 | /* |
1050 | * This occurs if the guest (kernel or userspace), does something that | 1173 | * This occurs if the guest (kernel or userspace), does something that |
1051 | * is prohibited by HFSCR. We just generate a program interrupt to | 1174 | * is prohibited by HFSCR. |
1052 | * the guest. | 1175 | * On POWER9, this could be a doorbell instruction that we need |
1176 | * to emulate. | ||
1177 | * Otherwise, we just generate a program interrupt to the guest. | ||
1053 | */ | 1178 | */ |
1054 | case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: | 1179 | case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: |
1055 | kvmppc_core_queue_program(vcpu, SRR1_PROGILL); | 1180 | r = EMULATE_FAIL; |
1056 | r = RESUME_GUEST; | 1181 | if ((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG) |
1182 | r = kvmppc_emulate_doorbell_instr(vcpu); | ||
1183 | if (r == EMULATE_FAIL) { | ||
1184 | kvmppc_core_queue_program(vcpu, SRR1_PROGILL); | ||
1185 | r = RESUME_GUEST; | ||
1186 | } | ||
1057 | break; | 1187 | break; |
1058 | case BOOK3S_INTERRUPT_HV_RM_HARD: | 1188 | case BOOK3S_INTERRUPT_HV_RM_HARD: |
1059 | r = RESUME_PASSTHROUGH; | 1189 | r = RESUME_PASSTHROUGH; |
@@ -1143,6 +1273,12 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr, | |||
1143 | mask = LPCR_DPFD | LPCR_ILE | LPCR_TC; | 1273 | mask = LPCR_DPFD | LPCR_ILE | LPCR_TC; |
1144 | if (cpu_has_feature(CPU_FTR_ARCH_207S)) | 1274 | if (cpu_has_feature(CPU_FTR_ARCH_207S)) |
1145 | mask |= LPCR_AIL; | 1275 | mask |= LPCR_AIL; |
1276 | /* | ||
1277 | * On POWER9, allow userspace to enable large decrementer for the | ||
1278 | * guest, whether or not the host has it enabled. | ||
1279 | */ | ||
1280 | if (cpu_has_feature(CPU_FTR_ARCH_300)) | ||
1281 | mask |= LPCR_LD; | ||
1146 | 1282 | ||
1147 | /* Broken 32-bit version of LPCR must not clear top bits */ | 1283 | /* Broken 32-bit version of LPCR must not clear top bits */ |
1148 | if (preserve_top32) | 1284 | if (preserve_top32) |
@@ -1611,7 +1747,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core) | |||
1611 | init_swait_queue_head(&vcore->wq); | 1747 | init_swait_queue_head(&vcore->wq); |
1612 | vcore->preempt_tb = TB_NIL; | 1748 | vcore->preempt_tb = TB_NIL; |
1613 | vcore->lpcr = kvm->arch.lpcr; | 1749 | vcore->lpcr = kvm->arch.lpcr; |
1614 | vcore->first_vcpuid = core * threads_per_vcore(); | 1750 | vcore->first_vcpuid = core * kvm->arch.smt_mode; |
1615 | vcore->kvm = kvm; | 1751 | vcore->kvm = kvm; |
1616 | INIT_LIST_HEAD(&vcore->preempt_list); | 1752 | INIT_LIST_HEAD(&vcore->preempt_list); |
1617 | 1753 | ||
@@ -1770,14 +1906,10 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, | |||
1770 | unsigned int id) | 1906 | unsigned int id) |
1771 | { | 1907 | { |
1772 | struct kvm_vcpu *vcpu; | 1908 | struct kvm_vcpu *vcpu; |
1773 | int err = -EINVAL; | 1909 | int err; |
1774 | int core; | 1910 | int core; |
1775 | struct kvmppc_vcore *vcore; | 1911 | struct kvmppc_vcore *vcore; |
1776 | 1912 | ||
1777 | core = id / threads_per_vcore(); | ||
1778 | if (core >= KVM_MAX_VCORES) | ||
1779 | goto out; | ||
1780 | |||
1781 | err = -ENOMEM; | 1913 | err = -ENOMEM; |
1782 | vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); | 1914 | vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); |
1783 | if (!vcpu) | 1915 | if (!vcpu) |
@@ -1808,6 +1940,20 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, | |||
1808 | vcpu->arch.busy_preempt = TB_NIL; | 1940 | vcpu->arch.busy_preempt = TB_NIL; |
1809 | vcpu->arch.intr_msr = MSR_SF | MSR_ME; | 1941 | vcpu->arch.intr_msr = MSR_SF | MSR_ME; |
1810 | 1942 | ||
1943 | /* | ||
1944 | * Set the default HFSCR for the guest from the host value. | ||
1945 | * This value is only used on POWER9. | ||
1946 | * On POWER9 DD1, TM doesn't work, so we make sure to | ||
1947 | * prevent the guest from using it. | ||
1948 | * On POWER9, we want to virtualize the doorbell facility, so we | ||
1949 | * turn off the HFSCR bit, which causes those instructions to trap. | ||
1950 | */ | ||
1951 | vcpu->arch.hfscr = mfspr(SPRN_HFSCR); | ||
1952 | if (!cpu_has_feature(CPU_FTR_TM)) | ||
1953 | vcpu->arch.hfscr &= ~HFSCR_TM; | ||
1954 | if (cpu_has_feature(CPU_FTR_ARCH_300)) | ||
1955 | vcpu->arch.hfscr &= ~HFSCR_MSGP; | ||
1956 | |||
1811 | kvmppc_mmu_book3s_hv_init(vcpu); | 1957 | kvmppc_mmu_book3s_hv_init(vcpu); |
1812 | 1958 | ||
1813 | vcpu->arch.state = KVMPPC_VCPU_NOTREADY; | 1959 | vcpu->arch.state = KVMPPC_VCPU_NOTREADY; |
@@ -1815,11 +1961,17 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, | |||
1815 | init_waitqueue_head(&vcpu->arch.cpu_run); | 1961 | init_waitqueue_head(&vcpu->arch.cpu_run); |
1816 | 1962 | ||
1817 | mutex_lock(&kvm->lock); | 1963 | mutex_lock(&kvm->lock); |
1818 | vcore = kvm->arch.vcores[core]; | 1964 | vcore = NULL; |
1819 | if (!vcore) { | 1965 | err = -EINVAL; |
1820 | vcore = kvmppc_vcore_create(kvm, core); | 1966 | core = id / kvm->arch.smt_mode; |
1821 | kvm->arch.vcores[core] = vcore; | 1967 | if (core < KVM_MAX_VCORES) { |
1822 | kvm->arch.online_vcores++; | 1968 | vcore = kvm->arch.vcores[core]; |
1969 | if (!vcore) { | ||
1970 | err = -ENOMEM; | ||
1971 | vcore = kvmppc_vcore_create(kvm, core); | ||
1972 | kvm->arch.vcores[core] = vcore; | ||
1973 | kvm->arch.online_vcores++; | ||
1974 | } | ||
1823 | } | 1975 | } |
1824 | mutex_unlock(&kvm->lock); | 1976 | mutex_unlock(&kvm->lock); |
1825 | 1977 | ||
@@ -1847,6 +1999,43 @@ out: | |||
1847 | return ERR_PTR(err); | 1999 | return ERR_PTR(err); |
1848 | } | 2000 | } |
1849 | 2001 | ||
2002 | static int kvmhv_set_smt_mode(struct kvm *kvm, unsigned long smt_mode, | ||
2003 | unsigned long flags) | ||
2004 | { | ||
2005 | int err; | ||
2006 | int esmt = 0; | ||
2007 | |||
2008 | if (flags) | ||
2009 | return -EINVAL; | ||
2010 | if (smt_mode > MAX_SMT_THREADS || !is_power_of_2(smt_mode)) | ||
2011 | return -EINVAL; | ||
2012 | if (!cpu_has_feature(CPU_FTR_ARCH_300)) { | ||
2013 | /* | ||
2014 | * On POWER8 (or POWER7), the threading mode is "strict", | ||
2015 | * so we pack smt_mode vcpus per vcore. | ||
2016 | */ | ||
2017 | if (smt_mode > threads_per_subcore) | ||
2018 | return -EINVAL; | ||
2019 | } else { | ||
2020 | /* | ||
2021 | * On POWER9, the threading mode is "loose", | ||
2022 | * so each vcpu gets its own vcore. | ||
2023 | */ | ||
2024 | esmt = smt_mode; | ||
2025 | smt_mode = 1; | ||
2026 | } | ||
2027 | mutex_lock(&kvm->lock); | ||
2028 | err = -EBUSY; | ||
2029 | if (!kvm->arch.online_vcores) { | ||
2030 | kvm->arch.smt_mode = smt_mode; | ||
2031 | kvm->arch.emul_smt_mode = esmt; | ||
2032 | err = 0; | ||
2033 | } | ||
2034 | mutex_unlock(&kvm->lock); | ||
2035 | |||
2036 | return err; | ||
2037 | } | ||
2038 | |||
1850 | static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa) | 2039 | static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa) |
1851 | { | 2040 | { |
1852 | if (vpa->pinned_addr) | 2041 | if (vpa->pinned_addr) |
@@ -1897,7 +2086,7 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu) | |||
1897 | } | 2086 | } |
1898 | } | 2087 | } |
1899 | 2088 | ||
1900 | extern void __kvmppc_vcore_entry(void); | 2089 | extern int __kvmppc_vcore_entry(void); |
1901 | 2090 | ||
1902 | static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, | 2091 | static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, |
1903 | struct kvm_vcpu *vcpu) | 2092 | struct kvm_vcpu *vcpu) |
@@ -1962,10 +2151,6 @@ static void kvmppc_release_hwthread(int cpu) | |||
1962 | tpaca->kvm_hstate.kvm_split_mode = NULL; | 2151 | tpaca->kvm_hstate.kvm_split_mode = NULL; |
1963 | } | 2152 | } |
1964 | 2153 | ||
1965 | static void do_nothing(void *x) | ||
1966 | { | ||
1967 | } | ||
1968 | |||
1969 | static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu) | 2154 | static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu) |
1970 | { | 2155 | { |
1971 | int i; | 2156 | int i; |
@@ -1983,11 +2168,35 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu) | |||
1983 | smp_call_function_single(cpu + i, do_nothing, NULL, 1); | 2168 | smp_call_function_single(cpu + i, do_nothing, NULL, 1); |
1984 | } | 2169 | } |
1985 | 2170 | ||
2171 | static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu) | ||
2172 | { | ||
2173 | struct kvm *kvm = vcpu->kvm; | ||
2174 | |||
2175 | /* | ||
2176 | * With radix, the guest can do TLB invalidations itself, | ||
2177 | * and it could choose to use the local form (tlbiel) if | ||
2178 | * it is invalidating a translation that has only ever been | ||
2179 | * used on one vcpu. However, that doesn't mean it has | ||
2180 | * only ever been used on one physical cpu, since vcpus | ||
2181 | * can move around between pcpus. To cope with this, when | ||
2182 | * a vcpu moves from one pcpu to another, we need to tell | ||
2183 | * any vcpus running on the same core as this vcpu previously | ||
2184 | * ran to flush the TLB. The TLB is shared between threads, | ||
2185 | * so we use a single bit in .need_tlb_flush for all 4 threads. | ||
2186 | */ | ||
2187 | if (vcpu->arch.prev_cpu != pcpu) { | ||
2188 | if (vcpu->arch.prev_cpu >= 0 && | ||
2189 | cpu_first_thread_sibling(vcpu->arch.prev_cpu) != | ||
2190 | cpu_first_thread_sibling(pcpu)) | ||
2191 | radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu); | ||
2192 | vcpu->arch.prev_cpu = pcpu; | ||
2193 | } | ||
2194 | } | ||
2195 | |||
1986 | static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc) | 2196 | static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc) |
1987 | { | 2197 | { |
1988 | int cpu; | 2198 | int cpu; |
1989 | struct paca_struct *tpaca; | 2199 | struct paca_struct *tpaca; |
1990 | struct kvmppc_vcore *mvc = vc->master_vcore; | ||
1991 | struct kvm *kvm = vc->kvm; | 2200 | struct kvm *kvm = vc->kvm; |
1992 | 2201 | ||
1993 | cpu = vc->pcpu; | 2202 | cpu = vc->pcpu; |
@@ -1997,36 +2206,16 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc) | |||
1997 | vcpu->arch.timer_running = 0; | 2206 | vcpu->arch.timer_running = 0; |
1998 | } | 2207 | } |
1999 | cpu += vcpu->arch.ptid; | 2208 | cpu += vcpu->arch.ptid; |
2000 | vcpu->cpu = mvc->pcpu; | 2209 | vcpu->cpu = vc->pcpu; |
2001 | vcpu->arch.thread_cpu = cpu; | 2210 | vcpu->arch.thread_cpu = cpu; |
2002 | |||
2003 | /* | ||
2004 | * With radix, the guest can do TLB invalidations itself, | ||
2005 | * and it could choose to use the local form (tlbiel) if | ||
2006 | * it is invalidating a translation that has only ever been | ||
2007 | * used on one vcpu. However, that doesn't mean it has | ||
2008 | * only ever been used on one physical cpu, since vcpus | ||
2009 | * can move around between pcpus. To cope with this, when | ||
2010 | * a vcpu moves from one pcpu to another, we need to tell | ||
2011 | * any vcpus running on the same core as this vcpu previously | ||
2012 | * ran to flush the TLB. The TLB is shared between threads, | ||
2013 | * so we use a single bit in .need_tlb_flush for all 4 threads. | ||
2014 | */ | ||
2015 | if (kvm_is_radix(kvm) && vcpu->arch.prev_cpu != cpu) { | ||
2016 | if (vcpu->arch.prev_cpu >= 0 && | ||
2017 | cpu_first_thread_sibling(vcpu->arch.prev_cpu) != | ||
2018 | cpu_first_thread_sibling(cpu)) | ||
2019 | radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu); | ||
2020 | vcpu->arch.prev_cpu = cpu; | ||
2021 | } | ||
2022 | cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest); | 2211 | cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest); |
2023 | } | 2212 | } |
2024 | tpaca = &paca[cpu]; | 2213 | tpaca = &paca[cpu]; |
2025 | tpaca->kvm_hstate.kvm_vcpu = vcpu; | 2214 | tpaca->kvm_hstate.kvm_vcpu = vcpu; |
2026 | tpaca->kvm_hstate.ptid = cpu - mvc->pcpu; | 2215 | tpaca->kvm_hstate.ptid = cpu - vc->pcpu; |
2027 | /* Order stores to hstate.kvm_vcpu etc. before store to kvm_vcore */ | 2216 | /* Order stores to hstate.kvm_vcpu etc. before store to kvm_vcore */ |
2028 | smp_wmb(); | 2217 | smp_wmb(); |
2029 | tpaca->kvm_hstate.kvm_vcore = mvc; | 2218 | tpaca->kvm_hstate.kvm_vcore = vc; |
2030 | if (cpu != smp_processor_id()) | 2219 | if (cpu != smp_processor_id()) |
2031 | kvmppc_ipi_thread(cpu); | 2220 | kvmppc_ipi_thread(cpu); |
2032 | } | 2221 | } |
@@ -2155,8 +2344,7 @@ struct core_info { | |||
2155 | int max_subcore_threads; | 2344 | int max_subcore_threads; |
2156 | int total_threads; | 2345 | int total_threads; |
2157 | int subcore_threads[MAX_SUBCORES]; | 2346 | int subcore_threads[MAX_SUBCORES]; |
2158 | struct kvm *subcore_vm[MAX_SUBCORES]; | 2347 | struct kvmppc_vcore *vc[MAX_SUBCORES]; |
2159 | struct list_head vcs[MAX_SUBCORES]; | ||
2160 | }; | 2348 | }; |
2161 | 2349 | ||
2162 | /* | 2350 | /* |
@@ -2167,17 +2355,12 @@ static int subcore_thread_map[MAX_SUBCORES] = { 0, 4, 2, 6 }; | |||
2167 | 2355 | ||
2168 | static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc) | 2356 | static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc) |
2169 | { | 2357 | { |
2170 | int sub; | ||
2171 | |||
2172 | memset(cip, 0, sizeof(*cip)); | 2358 | memset(cip, 0, sizeof(*cip)); |
2173 | cip->n_subcores = 1; | 2359 | cip->n_subcores = 1; |
2174 | cip->max_subcore_threads = vc->num_threads; | 2360 | cip->max_subcore_threads = vc->num_threads; |
2175 | cip->total_threads = vc->num_threads; | 2361 | cip->total_threads = vc->num_threads; |
2176 | cip->subcore_threads[0] = vc->num_threads; | 2362 | cip->subcore_threads[0] = vc->num_threads; |
2177 | cip->subcore_vm[0] = vc->kvm; | 2363 | cip->vc[0] = vc; |
2178 | for (sub = 0; sub < MAX_SUBCORES; ++sub) | ||
2179 | INIT_LIST_HEAD(&cip->vcs[sub]); | ||
2180 | list_add_tail(&vc->preempt_list, &cip->vcs[0]); | ||
2181 | } | 2364 | } |
2182 | 2365 | ||
2183 | static bool subcore_config_ok(int n_subcores, int n_threads) | 2366 | static bool subcore_config_ok(int n_subcores, int n_threads) |
@@ -2197,9 +2380,8 @@ static bool subcore_config_ok(int n_subcores, int n_threads) | |||
2197 | return n_subcores * roundup_pow_of_two(n_threads) <= MAX_SMT_THREADS; | 2380 | return n_subcores * roundup_pow_of_two(n_threads) <= MAX_SMT_THREADS; |
2198 | } | 2381 | } |
2199 | 2382 | ||
2200 | static void init_master_vcore(struct kvmppc_vcore *vc) | 2383 | static void init_vcore_to_run(struct kvmppc_vcore *vc) |
2201 | { | 2384 | { |
2202 | vc->master_vcore = vc; | ||
2203 | vc->entry_exit_map = 0; | 2385 | vc->entry_exit_map = 0; |
2204 | vc->in_guest = 0; | 2386 | vc->in_guest = 0; |
2205 | vc->napping_threads = 0; | 2387 | vc->napping_threads = 0; |
@@ -2224,9 +2406,9 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip) | |||
2224 | ++cip->n_subcores; | 2406 | ++cip->n_subcores; |
2225 | cip->total_threads += vc->num_threads; | 2407 | cip->total_threads += vc->num_threads; |
2226 | cip->subcore_threads[sub] = vc->num_threads; | 2408 | cip->subcore_threads[sub] = vc->num_threads; |
2227 | cip->subcore_vm[sub] = vc->kvm; | 2409 | cip->vc[sub] = vc; |
2228 | init_master_vcore(vc); | 2410 | init_vcore_to_run(vc); |
2229 | list_move_tail(&vc->preempt_list, &cip->vcs[sub]); | 2411 | list_del_init(&vc->preempt_list); |
2230 | 2412 | ||
2231 | return true; | 2413 | return true; |
2232 | } | 2414 | } |
@@ -2294,6 +2476,18 @@ static void collect_piggybacks(struct core_info *cip, int target_threads) | |||
2294 | spin_unlock(&lp->lock); | 2476 | spin_unlock(&lp->lock); |
2295 | } | 2477 | } |
2296 | 2478 | ||
2479 | static bool recheck_signals(struct core_info *cip) | ||
2480 | { | ||
2481 | int sub, i; | ||
2482 | struct kvm_vcpu *vcpu; | ||
2483 | |||
2484 | for (sub = 0; sub < cip->n_subcores; ++sub) | ||
2485 | for_each_runnable_thread(i, vcpu, cip->vc[sub]) | ||
2486 | if (signal_pending(vcpu->arch.run_task)) | ||
2487 | return true; | ||
2488 | return false; | ||
2489 | } | ||
2490 | |||
2297 | static void post_guest_process(struct kvmppc_vcore *vc, bool is_master) | 2491 | static void post_guest_process(struct kvmppc_vcore *vc, bool is_master) |
2298 | { | 2492 | { |
2299 | int still_running = 0, i; | 2493 | int still_running = 0, i; |
@@ -2331,7 +2525,6 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master) | |||
2331 | wake_up(&vcpu->arch.cpu_run); | 2525 | wake_up(&vcpu->arch.cpu_run); |
2332 | } | 2526 | } |
2333 | } | 2527 | } |
2334 | list_del_init(&vc->preempt_list); | ||
2335 | if (!is_master) { | 2528 | if (!is_master) { |
2336 | if (still_running > 0) { | 2529 | if (still_running > 0) { |
2337 | kvmppc_vcore_preempt(vc); | 2530 | kvmppc_vcore_preempt(vc); |
@@ -2393,6 +2586,21 @@ static inline int kvmppc_set_host_core(unsigned int cpu) | |||
2393 | return 0; | 2586 | return 0; |
2394 | } | 2587 | } |
2395 | 2588 | ||
2589 | static void set_irq_happened(int trap) | ||
2590 | { | ||
2591 | switch (trap) { | ||
2592 | case BOOK3S_INTERRUPT_EXTERNAL: | ||
2593 | local_paca->irq_happened |= PACA_IRQ_EE; | ||
2594 | break; | ||
2595 | case BOOK3S_INTERRUPT_H_DOORBELL: | ||
2596 | local_paca->irq_happened |= PACA_IRQ_DBELL; | ||
2597 | break; | ||
2598 | case BOOK3S_INTERRUPT_HMI: | ||
2599 | local_paca->irq_happened |= PACA_IRQ_HMI; | ||
2600 | break; | ||
2601 | } | ||
2602 | } | ||
2603 | |||
2396 | /* | 2604 | /* |
2397 | * Run a set of guest threads on a physical core. | 2605 | * Run a set of guest threads on a physical core. |
2398 | * Called with vc->lock held. | 2606 | * Called with vc->lock held. |
@@ -2403,7 +2611,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) | |||
2403 | int i; | 2611 | int i; |
2404 | int srcu_idx; | 2612 | int srcu_idx; |
2405 | struct core_info core_info; | 2613 | struct core_info core_info; |
2406 | struct kvmppc_vcore *pvc, *vcnext; | 2614 | struct kvmppc_vcore *pvc; |
2407 | struct kvm_split_mode split_info, *sip; | 2615 | struct kvm_split_mode split_info, *sip; |
2408 | int split, subcore_size, active; | 2616 | int split, subcore_size, active; |
2409 | int sub; | 2617 | int sub; |
@@ -2412,6 +2620,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) | |||
2412 | int pcpu, thr; | 2620 | int pcpu, thr; |
2413 | int target_threads; | 2621 | int target_threads; |
2414 | int controlled_threads; | 2622 | int controlled_threads; |
2623 | int trap; | ||
2415 | 2624 | ||
2416 | /* | 2625 | /* |
2417 | * Remove from the list any threads that have a signal pending | 2626 | * Remove from the list any threads that have a signal pending |
@@ -2426,7 +2635,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) | |||
2426 | /* | 2635 | /* |
2427 | * Initialize *vc. | 2636 | * Initialize *vc. |
2428 | */ | 2637 | */ |
2429 | init_master_vcore(vc); | 2638 | init_vcore_to_run(vc); |
2430 | vc->preempt_tb = TB_NIL; | 2639 | vc->preempt_tb = TB_NIL; |
2431 | 2640 | ||
2432 | /* | 2641 | /* |
@@ -2463,6 +2672,43 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) | |||
2463 | if (vc->num_threads < target_threads) | 2672 | if (vc->num_threads < target_threads) |
2464 | collect_piggybacks(&core_info, target_threads); | 2673 | collect_piggybacks(&core_info, target_threads); |
2465 | 2674 | ||
2675 | /* | ||
2676 | * On radix, arrange for TLB flushing if necessary. | ||
2677 | * This has to be done before disabling interrupts since | ||
2678 | * it uses smp_call_function(). | ||
2679 | */ | ||
2680 | pcpu = smp_processor_id(); | ||
2681 | if (kvm_is_radix(vc->kvm)) { | ||
2682 | for (sub = 0; sub < core_info.n_subcores; ++sub) | ||
2683 | for_each_runnable_thread(i, vcpu, core_info.vc[sub]) | ||
2684 | kvmppc_prepare_radix_vcpu(vcpu, pcpu); | ||
2685 | } | ||
2686 | |||
2687 | /* | ||
2688 | * Hard-disable interrupts, and check resched flag and signals. | ||
2689 | * If we need to reschedule or deliver a signal, clean up | ||
2690 | * and return without going into the guest(s). | ||
2691 | */ | ||
2692 | local_irq_disable(); | ||
2693 | hard_irq_disable(); | ||
2694 | if (lazy_irq_pending() || need_resched() || | ||
2695 | recheck_signals(&core_info)) { | ||
2696 | local_irq_enable(); | ||
2697 | vc->vcore_state = VCORE_INACTIVE; | ||
2698 | /* Unlock all except the primary vcore */ | ||
2699 | for (sub = 1; sub < core_info.n_subcores; ++sub) { | ||
2700 | pvc = core_info.vc[sub]; | ||
2701 | /* Put back on to the preempted vcores list */ | ||
2702 | kvmppc_vcore_preempt(pvc); | ||
2703 | spin_unlock(&pvc->lock); | ||
2704 | } | ||
2705 | for (i = 0; i < controlled_threads; ++i) | ||
2706 | kvmppc_release_hwthread(pcpu + i); | ||
2707 | return; | ||
2708 | } | ||
2709 | |||
2710 | kvmppc_clear_host_core(pcpu); | ||
2711 | |||
2466 | /* Decide on micro-threading (split-core) mode */ | 2712 | /* Decide on micro-threading (split-core) mode */ |
2467 | subcore_size = threads_per_subcore; | 2713 | subcore_size = threads_per_subcore; |
2468 | cmd_bit = stat_bit = 0; | 2714 | cmd_bit = stat_bit = 0; |
@@ -2486,13 +2732,10 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) | |||
2486 | split_info.ldbar = mfspr(SPRN_LDBAR); | 2732 | split_info.ldbar = mfspr(SPRN_LDBAR); |
2487 | split_info.subcore_size = subcore_size; | 2733 | split_info.subcore_size = subcore_size; |
2488 | for (sub = 0; sub < core_info.n_subcores; ++sub) | 2734 | for (sub = 0; sub < core_info.n_subcores; ++sub) |
2489 | split_info.master_vcs[sub] = | 2735 | split_info.vc[sub] = core_info.vc[sub]; |
2490 | list_first_entry(&core_info.vcs[sub], | ||
2491 | struct kvmppc_vcore, preempt_list); | ||
2492 | /* order writes to split_info before kvm_split_mode pointer */ | 2736 | /* order writes to split_info before kvm_split_mode pointer */ |
2493 | smp_wmb(); | 2737 | smp_wmb(); |
2494 | } | 2738 | } |
2495 | pcpu = smp_processor_id(); | ||
2496 | for (thr = 0; thr < controlled_threads; ++thr) | 2739 | for (thr = 0; thr < controlled_threads; ++thr) |
2497 | paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip; | 2740 | paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip; |
2498 | 2741 | ||
@@ -2512,32 +2755,29 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) | |||
2512 | } | 2755 | } |
2513 | } | 2756 | } |
2514 | 2757 | ||
2515 | kvmppc_clear_host_core(pcpu); | ||
2516 | |||
2517 | /* Start all the threads */ | 2758 | /* Start all the threads */ |
2518 | active = 0; | 2759 | active = 0; |
2519 | for (sub = 0; sub < core_info.n_subcores; ++sub) { | 2760 | for (sub = 0; sub < core_info.n_subcores; ++sub) { |
2520 | thr = subcore_thread_map[sub]; | 2761 | thr = subcore_thread_map[sub]; |
2521 | thr0_done = false; | 2762 | thr0_done = false; |
2522 | active |= 1 << thr; | 2763 | active |= 1 << thr; |
2523 | list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) { | 2764 | pvc = core_info.vc[sub]; |
2524 | pvc->pcpu = pcpu + thr; | 2765 | pvc->pcpu = pcpu + thr; |
2525 | for_each_runnable_thread(i, vcpu, pvc) { | 2766 | for_each_runnable_thread(i, vcpu, pvc) { |
2526 | kvmppc_start_thread(vcpu, pvc); | 2767 | kvmppc_start_thread(vcpu, pvc); |
2527 | kvmppc_create_dtl_entry(vcpu, pvc); | 2768 | kvmppc_create_dtl_entry(vcpu, pvc); |
2528 | trace_kvm_guest_enter(vcpu); | 2769 | trace_kvm_guest_enter(vcpu); |
2529 | if (!vcpu->arch.ptid) | 2770 | if (!vcpu->arch.ptid) |
2530 | thr0_done = true; | 2771 | thr0_done = true; |
2531 | active |= 1 << (thr + vcpu->arch.ptid); | 2772 | active |= 1 << (thr + vcpu->arch.ptid); |
2532 | } | ||
2533 | /* | ||
2534 | * We need to start the first thread of each subcore | ||
2535 | * even if it doesn't have a vcpu. | ||
2536 | */ | ||
2537 | if (pvc->master_vcore == pvc && !thr0_done) | ||
2538 | kvmppc_start_thread(NULL, pvc); | ||
2539 | thr += pvc->num_threads; | ||
2540 | } | 2773 | } |
2774 | /* | ||
2775 | * We need to start the first thread of each subcore | ||
2776 | * even if it doesn't have a vcpu. | ||
2777 | */ | ||
2778 | if (!thr0_done) | ||
2779 | kvmppc_start_thread(NULL, pvc); | ||
2780 | thr += pvc->num_threads; | ||
2541 | } | 2781 | } |
2542 | 2782 | ||
2543 | /* | 2783 | /* |
@@ -2564,17 +2804,27 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) | |||
2564 | trace_kvmppc_run_core(vc, 0); | 2804 | trace_kvmppc_run_core(vc, 0); |
2565 | 2805 | ||
2566 | for (sub = 0; sub < core_info.n_subcores; ++sub) | 2806 | for (sub = 0; sub < core_info.n_subcores; ++sub) |
2567 | list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) | 2807 | spin_unlock(&core_info.vc[sub]->lock); |
2568 | spin_unlock(&pvc->lock); | 2808 | |
2809 | /* | ||
2810 | * Interrupts will be enabled once we get into the guest, | ||
2811 | * so tell lockdep that we're about to enable interrupts. | ||
2812 | */ | ||
2813 | trace_hardirqs_on(); | ||
2569 | 2814 | ||
2570 | guest_enter(); | 2815 | guest_enter(); |
2571 | 2816 | ||
2572 | srcu_idx = srcu_read_lock(&vc->kvm->srcu); | 2817 | srcu_idx = srcu_read_lock(&vc->kvm->srcu); |
2573 | 2818 | ||
2574 | __kvmppc_vcore_entry(); | 2819 | trap = __kvmppc_vcore_entry(); |
2575 | 2820 | ||
2576 | srcu_read_unlock(&vc->kvm->srcu, srcu_idx); | 2821 | srcu_read_unlock(&vc->kvm->srcu, srcu_idx); |
2577 | 2822 | ||
2823 | guest_exit(); | ||
2824 | |||
2825 | trace_hardirqs_off(); | ||
2826 | set_irq_happened(trap); | ||
2827 | |||
2578 | spin_lock(&vc->lock); | 2828 | spin_lock(&vc->lock); |
2579 | /* prevent other vcpu threads from doing kvmppc_start_thread() now */ | 2829 | /* prevent other vcpu threads from doing kvmppc_start_thread() now */ |
2580 | vc->vcore_state = VCORE_EXITING; | 2830 | vc->vcore_state = VCORE_EXITING; |
@@ -2602,6 +2852,10 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) | |||
2602 | split_info.do_nap = 0; | 2852 | split_info.do_nap = 0; |
2603 | } | 2853 | } |
2604 | 2854 | ||
2855 | kvmppc_set_host_core(pcpu); | ||
2856 | |||
2857 | local_irq_enable(); | ||
2858 | |||
2605 | /* Let secondaries go back to the offline loop */ | 2859 | /* Let secondaries go back to the offline loop */ |
2606 | for (i = 0; i < controlled_threads; ++i) { | 2860 | for (i = 0; i < controlled_threads; ++i) { |
2607 | kvmppc_release_hwthread(pcpu + i); | 2861 | kvmppc_release_hwthread(pcpu + i); |
@@ -2610,18 +2864,15 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) | |||
2610 | cpumask_clear_cpu(pcpu + i, &vc->kvm->arch.cpu_in_guest); | 2864 | cpumask_clear_cpu(pcpu + i, &vc->kvm->arch.cpu_in_guest); |
2611 | } | 2865 | } |
2612 | 2866 | ||
2613 | kvmppc_set_host_core(pcpu); | ||
2614 | |||
2615 | spin_unlock(&vc->lock); | 2867 | spin_unlock(&vc->lock); |
2616 | 2868 | ||
2617 | /* make sure updates to secondary vcpu structs are visible now */ | 2869 | /* make sure updates to secondary vcpu structs are visible now */ |
2618 | smp_mb(); | 2870 | smp_mb(); |
2619 | guest_exit(); | ||
2620 | 2871 | ||
2621 | for (sub = 0; sub < core_info.n_subcores; ++sub) | 2872 | for (sub = 0; sub < core_info.n_subcores; ++sub) { |
2622 | list_for_each_entry_safe(pvc, vcnext, &core_info.vcs[sub], | 2873 | pvc = core_info.vc[sub]; |
2623 | preempt_list) | 2874 | post_guest_process(pvc, pvc == vc); |
2624 | post_guest_process(pvc, pvc == vc); | 2875 | } |
2625 | 2876 | ||
2626 | spin_lock(&vc->lock); | 2877 | spin_lock(&vc->lock); |
2627 | preempt_enable(); | 2878 | preempt_enable(); |
@@ -2666,6 +2917,30 @@ static void shrink_halt_poll_ns(struct kvmppc_vcore *vc) | |||
2666 | vc->halt_poll_ns /= halt_poll_ns_shrink; | 2917 | vc->halt_poll_ns /= halt_poll_ns_shrink; |
2667 | } | 2918 | } |
2668 | 2919 | ||
2920 | #ifdef CONFIG_KVM_XICS | ||
2921 | static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu) | ||
2922 | { | ||
2923 | if (!xive_enabled()) | ||
2924 | return false; | ||
2925 | return vcpu->arch.xive_saved_state.pipr < | ||
2926 | vcpu->arch.xive_saved_state.cppr; | ||
2927 | } | ||
2928 | #else | ||
2929 | static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu) | ||
2930 | { | ||
2931 | return false; | ||
2932 | } | ||
2933 | #endif /* CONFIG_KVM_XICS */ | ||
2934 | |||
2935 | static bool kvmppc_vcpu_woken(struct kvm_vcpu *vcpu) | ||
2936 | { | ||
2937 | if (vcpu->arch.pending_exceptions || vcpu->arch.prodded || | ||
2938 | kvmppc_doorbell_pending(vcpu) || xive_interrupt_pending(vcpu)) | ||
2939 | return true; | ||
2940 | |||
2941 | return false; | ||
2942 | } | ||
2943 | |||
2669 | /* | 2944 | /* |
2670 | * Check to see if any of the runnable vcpus on the vcore have pending | 2945 | * Check to see if any of the runnable vcpus on the vcore have pending |
2671 | * exceptions or are no longer ceded | 2946 | * exceptions or are no longer ceded |
@@ -2676,8 +2951,7 @@ static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc) | |||
2676 | int i; | 2951 | int i; |
2677 | 2952 | ||
2678 | for_each_runnable_thread(i, vcpu, vc) { | 2953 | for_each_runnable_thread(i, vcpu, vc) { |
2679 | if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded || | 2954 | if (!vcpu->arch.ceded || kvmppc_vcpu_woken(vcpu)) |
2680 | vcpu->arch.prodded) | ||
2681 | return 1; | 2955 | return 1; |
2682 | } | 2956 | } |
2683 | 2957 | ||
@@ -2819,15 +3093,14 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
2819 | */ | 3093 | */ |
2820 | if (!signal_pending(current)) { | 3094 | if (!signal_pending(current)) { |
2821 | if (vc->vcore_state == VCORE_PIGGYBACK) { | 3095 | if (vc->vcore_state == VCORE_PIGGYBACK) { |
2822 | struct kvmppc_vcore *mvc = vc->master_vcore; | 3096 | if (spin_trylock(&vc->lock)) { |
2823 | if (spin_trylock(&mvc->lock)) { | 3097 | if (vc->vcore_state == VCORE_RUNNING && |
2824 | if (mvc->vcore_state == VCORE_RUNNING && | 3098 | !VCORE_IS_EXITING(vc)) { |
2825 | !VCORE_IS_EXITING(mvc)) { | ||
2826 | kvmppc_create_dtl_entry(vcpu, vc); | 3099 | kvmppc_create_dtl_entry(vcpu, vc); |
2827 | kvmppc_start_thread(vcpu, vc); | 3100 | kvmppc_start_thread(vcpu, vc); |
2828 | trace_kvm_guest_enter(vcpu); | 3101 | trace_kvm_guest_enter(vcpu); |
2829 | } | 3102 | } |
2830 | spin_unlock(&mvc->lock); | 3103 | spin_unlock(&vc->lock); |
2831 | } | 3104 | } |
2832 | } else if (vc->vcore_state == VCORE_RUNNING && | 3105 | } else if (vc->vcore_state == VCORE_RUNNING && |
2833 | !VCORE_IS_EXITING(vc)) { | 3106 | !VCORE_IS_EXITING(vc)) { |
@@ -2863,7 +3136,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
2863 | break; | 3136 | break; |
2864 | n_ceded = 0; | 3137 | n_ceded = 0; |
2865 | for_each_runnable_thread(i, v, vc) { | 3138 | for_each_runnable_thread(i, v, vc) { |
2866 | if (!v->arch.pending_exceptions && !v->arch.prodded) | 3139 | if (!kvmppc_vcpu_woken(v)) |
2867 | n_ceded += v->arch.ceded; | 3140 | n_ceded += v->arch.ceded; |
2868 | else | 3141 | else |
2869 | v->arch.ceded = 0; | 3142 | v->arch.ceded = 0; |
@@ -3519,6 +3792,19 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) | |||
3519 | kvm_hv_vm_activated(); | 3792 | kvm_hv_vm_activated(); |
3520 | 3793 | ||
3521 | /* | 3794 | /* |
3795 | * Initialize smt_mode depending on processor. | ||
3796 | * POWER8 and earlier have to use "strict" threading, where | ||
3797 | * all vCPUs in a vcore have to run on the same (sub)core, | ||
3798 | * whereas on POWER9 the threads can each run a different | ||
3799 | * guest. | ||
3800 | */ | ||
3801 | if (!cpu_has_feature(CPU_FTR_ARCH_300)) | ||
3802 | kvm->arch.smt_mode = threads_per_subcore; | ||
3803 | else | ||
3804 | kvm->arch.smt_mode = 1; | ||
3805 | kvm->arch.emul_smt_mode = 1; | ||
3806 | |||
3807 | /* | ||
3522 | * Create a debugfs directory for the VM | 3808 | * Create a debugfs directory for the VM |
3523 | */ | 3809 | */ |
3524 | snprintf(buf, sizeof(buf), "vm%d", current->pid); | 3810 | snprintf(buf, sizeof(buf), "vm%d", current->pid); |
@@ -3947,6 +4233,7 @@ static struct kvmppc_ops kvm_ops_hv = { | |||
3947 | #endif | 4233 | #endif |
3948 | .configure_mmu = kvmhv_configure_mmu, | 4234 | .configure_mmu = kvmhv_configure_mmu, |
3949 | .get_rmmu_info = kvmhv_get_rmmu_info, | 4235 | .get_rmmu_info = kvmhv_get_rmmu_info, |
4236 | .set_smt_mode = kvmhv_set_smt_mode, | ||
3950 | }; | 4237 | }; |
3951 | 4238 | ||
3952 | static int kvm_init_subcore_bitmap(void) | 4239 | static int kvm_init_subcore_bitmap(void) |
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index ee4c2558c305..90644db9d38e 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c | |||
@@ -307,7 +307,7 @@ void kvmhv_commence_exit(int trap) | |||
307 | return; | 307 | return; |
308 | 308 | ||
309 | for (i = 0; i < MAX_SUBCORES; ++i) { | 309 | for (i = 0; i < MAX_SUBCORES; ++i) { |
310 | vc = sip->master_vcs[i]; | 310 | vc = sip->vc[i]; |
311 | if (!vc) | 311 | if (!vc) |
312 | break; | 312 | break; |
313 | do { | 313 | do { |
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index 404deb512844..dc54373c8780 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S | |||
@@ -61,13 +61,6 @@ BEGIN_FTR_SECTION | |||
61 | std r3, HSTATE_DABR(r13) | 61 | std r3, HSTATE_DABR(r13) |
62 | END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) | 62 | END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) |
63 | 63 | ||
64 | /* Hard-disable interrupts */ | ||
65 | mfmsr r10 | ||
66 | std r10, HSTATE_HOST_MSR(r13) | ||
67 | rldicl r10,r10,48,1 | ||
68 | rotldi r10,r10,16 | ||
69 | mtmsrd r10,1 | ||
70 | |||
71 | /* Save host PMU registers */ | 64 | /* Save host PMU registers */ |
72 | BEGIN_FTR_SECTION | 65 | BEGIN_FTR_SECTION |
73 | /* Work around P8 PMAE bug */ | 66 | /* Work around P8 PMAE bug */ |
@@ -153,6 +146,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) | |||
153 | * | 146 | * |
154 | * R1 = host R1 | 147 | * R1 = host R1 |
155 | * R2 = host R2 | 148 | * R2 = host R2 |
149 | * R3 = trap number on this thread | ||
156 | * R12 = exit handler id | 150 | * R12 = exit handler id |
157 | * R13 = PACA | 151 | * R13 = PACA |
158 | */ | 152 | */ |
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index 7ef0993214f3..c356f9a40b24 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c | |||
@@ -130,12 +130,28 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) | |||
130 | 130 | ||
131 | out: | 131 | out: |
132 | /* | 132 | /* |
133 | * For guest that supports FWNMI capability, hook the MCE event into | ||
134 | * vcpu structure. We are going to exit the guest with KVM_EXIT_NMI | ||
135 | * exit reason. On our way to exit we will pull this event from vcpu | ||
136 | * structure and print it from thread 0 of the core/subcore. | ||
137 | * | ||
138 | * For guest that does not support FWNMI capability (old QEMU): | ||
133 | * We are now going enter guest either through machine check | 139 | * We are now going enter guest either through machine check |
134 | * interrupt (for unhandled errors) or will continue from | 140 | * interrupt (for unhandled errors) or will continue from |
135 | * current HSRR0 (for handled errors) in guest. Hence | 141 | * current HSRR0 (for handled errors) in guest. Hence |
136 | * queue up the event so that we can log it from host console later. | 142 | * queue up the event so that we can log it from host console later. |
137 | */ | 143 | */ |
138 | machine_check_queue_event(); | 144 | if (vcpu->kvm->arch.fwnmi_enabled) { |
145 | /* | ||
146 | * Hook up the mce event on to vcpu structure. | ||
147 | * First clear the old event. | ||
148 | */ | ||
149 | memset(&vcpu->arch.mce_evt, 0, sizeof(vcpu->arch.mce_evt)); | ||
150 | if (get_mce_event(&mce_evt, MCE_EVENT_RELEASE)) { | ||
151 | vcpu->arch.mce_evt = mce_evt; | ||
152 | } | ||
153 | } else | ||
154 | machine_check_queue_event(); | ||
139 | 155 | ||
140 | return handled; | 156 | return handled; |
141 | } | 157 | } |
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 4888dd494604..6ea4b53f4b16 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S | |||
@@ -45,7 +45,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) | |||
45 | #define NAPPING_NOVCPU 2 | 45 | #define NAPPING_NOVCPU 2 |
46 | 46 | ||
47 | /* Stack frame offsets for kvmppc_hv_entry */ | 47 | /* Stack frame offsets for kvmppc_hv_entry */ |
48 | #define SFS 144 | 48 | #define SFS 160 |
49 | #define STACK_SLOT_TRAP (SFS-4) | 49 | #define STACK_SLOT_TRAP (SFS-4) |
50 | #define STACK_SLOT_TID (SFS-16) | 50 | #define STACK_SLOT_TID (SFS-16) |
51 | #define STACK_SLOT_PSSCR (SFS-24) | 51 | #define STACK_SLOT_PSSCR (SFS-24) |
@@ -54,6 +54,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) | |||
54 | #define STACK_SLOT_CIABR (SFS-48) | 54 | #define STACK_SLOT_CIABR (SFS-48) |
55 | #define STACK_SLOT_DAWR (SFS-56) | 55 | #define STACK_SLOT_DAWR (SFS-56) |
56 | #define STACK_SLOT_DAWRX (SFS-64) | 56 | #define STACK_SLOT_DAWRX (SFS-64) |
57 | #define STACK_SLOT_HFSCR (SFS-72) | ||
57 | 58 | ||
58 | /* | 59 | /* |
59 | * Call kvmppc_hv_entry in real mode. | 60 | * Call kvmppc_hv_entry in real mode. |
@@ -68,6 +69,7 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline) | |||
68 | std r0, PPC_LR_STKOFF(r1) | 69 | std r0, PPC_LR_STKOFF(r1) |
69 | stdu r1, -112(r1) | 70 | stdu r1, -112(r1) |
70 | mfmsr r10 | 71 | mfmsr r10 |
72 | std r10, HSTATE_HOST_MSR(r13) | ||
71 | LOAD_REG_ADDR(r5, kvmppc_call_hv_entry) | 73 | LOAD_REG_ADDR(r5, kvmppc_call_hv_entry) |
72 | li r0,MSR_RI | 74 | li r0,MSR_RI |
73 | andc r0,r10,r0 | 75 | andc r0,r10,r0 |
@@ -152,20 +154,21 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) | |||
152 | stb r0, HSTATE_HWTHREAD_REQ(r13) | 154 | stb r0, HSTATE_HWTHREAD_REQ(r13) |
153 | 155 | ||
154 | /* | 156 | /* |
155 | * For external and machine check interrupts, we need | 157 | * For external interrupts we need to call the Linux |
156 | * to call the Linux handler to process the interrupt. | 158 | * handler to process the interrupt. We do that by jumping |
157 | * We do that by jumping to absolute address 0x500 for | 159 | * to absolute address 0x500 for external interrupts. |
158 | * external interrupts, or the machine_check_fwnmi label | 160 | * The [h]rfid at the end of the handler will return to |
159 | * for machine checks (since firmware might have patched | 161 | * the book3s_hv_interrupts.S code. For other interrupts |
160 | * the vector area at 0x200). The [h]rfid at the end of the | 162 | * we do the rfid to get back to the book3s_hv_interrupts.S |
161 | * handler will return to the book3s_hv_interrupts.S code. | 163 | * code here. |
162 | * For other interrupts we do the rfid to get back | ||
163 | * to the book3s_hv_interrupts.S code here. | ||
164 | */ | 164 | */ |
165 | ld r8, 112+PPC_LR_STKOFF(r1) | 165 | ld r8, 112+PPC_LR_STKOFF(r1) |
166 | addi r1, r1, 112 | 166 | addi r1, r1, 112 |
167 | ld r7, HSTATE_HOST_MSR(r13) | 167 | ld r7, HSTATE_HOST_MSR(r13) |
168 | 168 | ||
169 | /* Return the trap number on this thread as the return value */ | ||
170 | mr r3, r12 | ||
171 | |||
169 | /* | 172 | /* |
170 | * If we came back from the guest via a relocation-on interrupt, | 173 | * If we came back from the guest via a relocation-on interrupt, |
171 | * we will be in virtual mode at this point, which makes it a | 174 | * we will be in virtual mode at this point, which makes it a |
@@ -175,59 +178,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) | |||
175 | andi. r0, r0, MSR_IR /* in real mode? */ | 178 | andi. r0, r0, MSR_IR /* in real mode? */ |
176 | bne .Lvirt_return | 179 | bne .Lvirt_return |
177 | 180 | ||
178 | cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK | 181 | /* RFI into the highmem handler */ |
179 | cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL | ||
180 | beq 11f | ||
181 | cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL | ||
182 | beq 15f /* Invoke the H_DOORBELL handler */ | ||
183 | cmpwi cr2, r12, BOOK3S_INTERRUPT_HMI | ||
184 | beq cr2, 14f /* HMI check */ | ||
185 | |||
186 | /* RFI into the highmem handler, or branch to interrupt handler */ | ||
187 | mfmsr r6 | 182 | mfmsr r6 |
188 | li r0, MSR_RI | 183 | li r0, MSR_RI |
189 | andc r6, r6, r0 | 184 | andc r6, r6, r0 |
190 | mtmsrd r6, 1 /* Clear RI in MSR */ | 185 | mtmsrd r6, 1 /* Clear RI in MSR */ |
191 | mtsrr0 r8 | 186 | mtsrr0 r8 |
192 | mtsrr1 r7 | 187 | mtsrr1 r7 |
193 | beq cr1, 13f /* machine check */ | ||
194 | RFI | 188 | RFI |
195 | 189 | ||
196 | /* On POWER7, we have external interrupts set to use HSRR0/1 */ | 190 | /* Virtual-mode return */ |
197 | 11: mtspr SPRN_HSRR0, r8 | ||
198 | mtspr SPRN_HSRR1, r7 | ||
199 | ba 0x500 | ||
200 | |||
201 | 13: b machine_check_fwnmi | ||
202 | |||
203 | 14: mtspr SPRN_HSRR0, r8 | ||
204 | mtspr SPRN_HSRR1, r7 | ||
205 | b hmi_exception_after_realmode | ||
206 | |||
207 | 15: mtspr SPRN_HSRR0, r8 | ||
208 | mtspr SPRN_HSRR1, r7 | ||
209 | ba 0xe80 | ||
210 | |||
211 | /* Virtual-mode return - can't get here for HMI or machine check */ | ||
212 | .Lvirt_return: | 191 | .Lvirt_return: |
213 | cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL | 192 | mtlr r8 |
214 | beq 16f | ||
215 | cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL | ||
216 | beq 17f | ||
217 | andi. r0, r7, MSR_EE /* were interrupts hard-enabled? */ | ||
218 | beq 18f | ||
219 | mtmsrd r7, 1 /* if so then re-enable them */ | ||
220 | 18: mtlr r8 | ||
221 | blr | 193 | blr |
222 | 194 | ||
223 | 16: mtspr SPRN_HSRR0, r8 /* jump to reloc-on external vector */ | ||
224 | mtspr SPRN_HSRR1, r7 | ||
225 | b exc_virt_0x4500_hardware_interrupt | ||
226 | |||
227 | 17: mtspr SPRN_HSRR0, r8 | ||
228 | mtspr SPRN_HSRR1, r7 | ||
229 | b exc_virt_0x4e80_h_doorbell | ||
230 | |||
231 | kvmppc_primary_no_guest: | 195 | kvmppc_primary_no_guest: |
232 | /* We handle this much like a ceded vcpu */ | 196 | /* We handle this much like a ceded vcpu */ |
233 | /* put the HDEC into the DEC, since HDEC interrupts don't wake us */ | 197 | /* put the HDEC into the DEC, since HDEC interrupts don't wake us */ |
@@ -769,6 +733,8 @@ BEGIN_FTR_SECTION | |||
769 | std r6, STACK_SLOT_PSSCR(r1) | 733 | std r6, STACK_SLOT_PSSCR(r1) |
770 | std r7, STACK_SLOT_PID(r1) | 734 | std r7, STACK_SLOT_PID(r1) |
771 | std r8, STACK_SLOT_IAMR(r1) | 735 | std r8, STACK_SLOT_IAMR(r1) |
736 | mfspr r5, SPRN_HFSCR | ||
737 | std r5, STACK_SLOT_HFSCR(r1) | ||
772 | END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) | 738 | END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) |
773 | BEGIN_FTR_SECTION | 739 | BEGIN_FTR_SECTION |
774 | mfspr r5, SPRN_CIABR | 740 | mfspr r5, SPRN_CIABR |
@@ -920,8 +886,10 @@ FTR_SECTION_ELSE | |||
920 | ld r5, VCPU_TID(r4) | 886 | ld r5, VCPU_TID(r4) |
921 | ld r6, VCPU_PSSCR(r4) | 887 | ld r6, VCPU_PSSCR(r4) |
922 | oris r6, r6, PSSCR_EC@h /* This makes stop trap to HV */ | 888 | oris r6, r6, PSSCR_EC@h /* This makes stop trap to HV */ |
889 | ld r7, VCPU_HFSCR(r4) | ||
923 | mtspr SPRN_TIDR, r5 | 890 | mtspr SPRN_TIDR, r5 |
924 | mtspr SPRN_PSSCR, r6 | 891 | mtspr SPRN_PSSCR, r6 |
892 | mtspr SPRN_HFSCR, r7 | ||
925 | ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) | 893 | ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) |
926 | 8: | 894 | 8: |
927 | 895 | ||
@@ -936,7 +904,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) | |||
936 | mftb r7 | 904 | mftb r7 |
937 | subf r3,r7,r8 | 905 | subf r3,r7,r8 |
938 | mtspr SPRN_DEC,r3 | 906 | mtspr SPRN_DEC,r3 |
939 | stw r3,VCPU_DEC(r4) | 907 | std r3,VCPU_DEC(r4) |
940 | 908 | ||
941 | ld r5, VCPU_SPRG0(r4) | 909 | ld r5, VCPU_SPRG0(r4) |
942 | ld r6, VCPU_SPRG1(r4) | 910 | ld r6, VCPU_SPRG1(r4) |
@@ -1048,7 +1016,13 @@ kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */ | |||
1048 | li r0, BOOK3S_INTERRUPT_EXTERNAL | 1016 | li r0, BOOK3S_INTERRUPT_EXTERNAL |
1049 | bne cr1, 12f | 1017 | bne cr1, 12f |
1050 | mfspr r0, SPRN_DEC | 1018 | mfspr r0, SPRN_DEC |
1051 | cmpwi r0, 0 | 1019 | BEGIN_FTR_SECTION |
1020 | /* On POWER9 check whether the guest has large decrementer enabled */ | ||
1021 | andis. r8, r8, LPCR_LD@h | ||
1022 | bne 15f | ||
1023 | END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) | ||
1024 | extsw r0, r0 | ||
1025 | 15: cmpdi r0, 0 | ||
1052 | li r0, BOOK3S_INTERRUPT_DECREMENTER | 1026 | li r0, BOOK3S_INTERRUPT_DECREMENTER |
1053 | bge 5f | 1027 | bge 5f |
1054 | 1028 | ||
@@ -1058,6 +1032,23 @@ kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */ | |||
1058 | mr r9, r4 | 1032 | mr r9, r4 |
1059 | bl kvmppc_msr_interrupt | 1033 | bl kvmppc_msr_interrupt |
1060 | 5: | 1034 | 5: |
1035 | BEGIN_FTR_SECTION | ||
1036 | b fast_guest_return | ||
1037 | END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) | ||
1038 | /* On POWER9, check for pending doorbell requests */ | ||
1039 | lbz r0, VCPU_DBELL_REQ(r4) | ||
1040 | cmpwi r0, 0 | ||
1041 | beq fast_guest_return | ||
1042 | ld r5, HSTATE_KVM_VCORE(r13) | ||
1043 | /* Set DPDES register so the CPU will take a doorbell interrupt */ | ||
1044 | li r0, 1 | ||
1045 | mtspr SPRN_DPDES, r0 | ||
1046 | std r0, VCORE_DPDES(r5) | ||
1047 | /* Make sure other cpus see vcore->dpdes set before dbell req clear */ | ||
1048 | lwsync | ||
1049 | /* Clear the pending doorbell request */ | ||
1050 | li r0, 0 | ||
1051 | stb r0, VCPU_DBELL_REQ(r4) | ||
1061 | 1052 | ||
1062 | /* | 1053 | /* |
1063 | * Required state: | 1054 | * Required state: |
@@ -1232,6 +1223,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) | |||
1232 | 1223 | ||
1233 | stw r12,VCPU_TRAP(r9) | 1224 | stw r12,VCPU_TRAP(r9) |
1234 | 1225 | ||
1226 | /* | ||
1227 | * Now that we have saved away SRR0/1 and HSRR0/1, | ||
1228 | * interrupts are recoverable in principle, so set MSR_RI. | ||
1229 | * This becomes important for relocation-on interrupts from | ||
1230 | * the guest, which we can get in radix mode on POWER9. | ||
1231 | */ | ||
1232 | li r0, MSR_RI | ||
1233 | mtmsrd r0, 1 | ||
1234 | |||
1235 | #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING | 1235 | #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING |
1236 | addi r3, r9, VCPU_TB_RMINTR | 1236 | addi r3, r9, VCPU_TB_RMINTR |
1237 | mr r4, r9 | 1237 | mr r4, r9 |
@@ -1288,6 +1288,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) | |||
1288 | beq 4f | 1288 | beq 4f |
1289 | b guest_exit_cont | 1289 | b guest_exit_cont |
1290 | 3: | 1290 | 3: |
1291 | /* If it's a hypervisor facility unavailable interrupt, save HFSCR */ | ||
1292 | cmpwi r12, BOOK3S_INTERRUPT_H_FAC_UNAVAIL | ||
1293 | bne 14f | ||
1294 | mfspr r3, SPRN_HFSCR | ||
1295 | std r3, VCPU_HFSCR(r9) | ||
1296 | b guest_exit_cont | ||
1297 | 14: | ||
1291 | /* External interrupt ? */ | 1298 | /* External interrupt ? */ |
1292 | cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL | 1299 | cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL |
1293 | bne+ guest_exit_cont | 1300 | bne+ guest_exit_cont |
@@ -1475,12 +1482,18 @@ mc_cont: | |||
1475 | mtspr SPRN_SPURR,r4 | 1482 | mtspr SPRN_SPURR,r4 |
1476 | 1483 | ||
1477 | /* Save DEC */ | 1484 | /* Save DEC */ |
1485 | ld r3, HSTATE_KVM_VCORE(r13) | ||
1478 | mfspr r5,SPRN_DEC | 1486 | mfspr r5,SPRN_DEC |
1479 | mftb r6 | 1487 | mftb r6 |
1488 | /* On P9, if the guest has large decr enabled, don't sign extend */ | ||
1489 | BEGIN_FTR_SECTION | ||
1490 | ld r4, VCORE_LPCR(r3) | ||
1491 | andis. r4, r4, LPCR_LD@h | ||
1492 | bne 16f | ||
1493 | END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) | ||
1480 | extsw r5,r5 | 1494 | extsw r5,r5 |
1481 | add r5,r5,r6 | 1495 | 16: add r5,r5,r6 |
1482 | /* r5 is a guest timebase value here, convert to host TB */ | 1496 | /* r5 is a guest timebase value here, convert to host TB */ |
1483 | ld r3,HSTATE_KVM_VCORE(r13) | ||
1484 | ld r4,VCORE_TB_OFFSET(r3) | 1497 | ld r4,VCORE_TB_OFFSET(r3) |
1485 | subf r5,r4,r5 | 1498 | subf r5,r4,r5 |
1486 | std r5,VCPU_DEC_EXPIRES(r9) | 1499 | std r5,VCPU_DEC_EXPIRES(r9) |
@@ -1525,6 +1538,9 @@ FTR_SECTION_ELSE | |||
1525 | rldicl r6, r6, 4, 50 /* r6 &= PSSCR_GUEST_VIS */ | 1538 | rldicl r6, r6, 4, 50 /* r6 &= PSSCR_GUEST_VIS */ |
1526 | rotldi r6, r6, 60 | 1539 | rotldi r6, r6, 60 |
1527 | std r6, VCPU_PSSCR(r9) | 1540 | std r6, VCPU_PSSCR(r9) |
1541 | /* Restore host HFSCR value */ | ||
1542 | ld r7, STACK_SLOT_HFSCR(r1) | ||
1543 | mtspr SPRN_HFSCR, r7 | ||
1528 | ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) | 1544 | ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) |
1529 | /* | 1545 | /* |
1530 | * Restore various registers to 0, where non-zero values | 1546 | * Restore various registers to 0, where non-zero values |
@@ -2402,8 +2418,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) | |||
2402 | mfspr r3, SPRN_DEC | 2418 | mfspr r3, SPRN_DEC |
2403 | mfspr r4, SPRN_HDEC | 2419 | mfspr r4, SPRN_HDEC |
2404 | mftb r5 | 2420 | mftb r5 |
2421 | BEGIN_FTR_SECTION | ||
2422 | /* On P9 check whether the guest has large decrementer mode enabled */ | ||
2423 | ld r6, HSTATE_KVM_VCORE(r13) | ||
2424 | ld r6, VCORE_LPCR(r6) | ||
2425 | andis. r6, r6, LPCR_LD@h | ||
2426 | bne 68f | ||
2427 | END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) | ||
2405 | extsw r3, r3 | 2428 | extsw r3, r3 |
2406 | EXTEND_HDEC(r4) | 2429 | 68: EXTEND_HDEC(r4) |
2407 | cmpd r3, r4 | 2430 | cmpd r3, r4 |
2408 | ble 67f | 2431 | ble 67f |
2409 | mtspr SPRN_DEC, r4 | 2432 | mtspr SPRN_DEC, r4 |
@@ -2589,22 +2612,32 @@ machine_check_realmode: | |||
2589 | ld r9, HSTATE_KVM_VCPU(r13) | 2612 | ld r9, HSTATE_KVM_VCPU(r13) |
2590 | li r12, BOOK3S_INTERRUPT_MACHINE_CHECK | 2613 | li r12, BOOK3S_INTERRUPT_MACHINE_CHECK |
2591 | /* | 2614 | /* |
2592 | * Deliver unhandled/fatal (e.g. UE) MCE errors to guest through | 2615 | * For the guest that is FWNMI capable, deliver all the MCE errors |
2593 | * machine check interrupt (set HSRR0 to 0x200). And for handled | 2616 | * (handled/unhandled) by exiting the guest with KVM_EXIT_NMI exit |
2594 | * errors (no-fatal), just go back to guest execution with current | 2617 | * reason. This new approach injects machine check errors in guest |
2595 | * HSRR0 instead of exiting guest. This new approach will inject | 2618 | * address space to guest with additional information in the form |
2596 | * machine check to guest for fatal error causing guest to crash. | 2619 | * of RTAS event, thus enabling guest kernel to suitably handle |
2597 | * | 2620 | * such errors. |
2598 | * The old code used to return to host for unhandled errors which | ||
2599 | * was causing guest to hang with soft lockups inside guest and | ||
2600 | * makes it difficult to recover guest instance. | ||
2601 | * | 2621 | * |
2622 | * For the guest that is not FWNMI capable (old QEMU) fallback | ||
2623 | * to old behaviour for backward compatibility: | ||
2624 | * Deliver unhandled/fatal (e.g. UE) MCE errors to guest either | ||
2625 | * through machine check interrupt (set HSRR0 to 0x200). | ||
2626 | * For handled errors (no-fatal), just go back to guest execution | ||
2627 | * with current HSRR0. | ||
2602 | * if we receive machine check with MSR(RI=0) then deliver it to | 2628 | * if we receive machine check with MSR(RI=0) then deliver it to |
2603 | * guest as machine check causing guest to crash. | 2629 | * guest as machine check causing guest to crash. |
2604 | */ | 2630 | */ |
2605 | ld r11, VCPU_MSR(r9) | 2631 | ld r11, VCPU_MSR(r9) |
2606 | rldicl. r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */ | 2632 | rldicl. r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */ |
2607 | bne mc_cont /* if so, exit to host */ | 2633 | bne mc_cont /* if so, exit to host */ |
2634 | /* Check if guest is capable of handling NMI exit */ | ||
2635 | ld r10, VCPU_KVM(r9) | ||
2636 | lbz r10, KVM_FWNMI(r10) | ||
2637 | cmpdi r10, 1 /* FWNMI capable? */ | ||
2638 | beq mc_cont /* if so, exit with KVM_EXIT_NMI. */ | ||
2639 | |||
2640 | /* if not, fall through for backward compatibility. */ | ||
2608 | andi. r10, r11, MSR_RI /* check for unrecoverable exception */ | 2641 | andi. r10, r11, MSR_RI /* check for unrecoverable exception */ |
2609 | beq 1f /* Deliver a machine check to guest */ | 2642 | beq 1f /* Deliver a machine check to guest */ |
2610 | ld r10, VCPU_PC(r9) | 2643 | ld r10, VCPU_PC(r9) |
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index ffe1da95033a..08b200a0bbce 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c | |||
@@ -1257,8 +1257,8 @@ static void xive_pre_save_scan(struct kvmppc_xive *xive) | |||
1257 | if (!xc) | 1257 | if (!xc) |
1258 | continue; | 1258 | continue; |
1259 | for (j = 0; j < KVMPPC_XIVE_Q_COUNT; j++) { | 1259 | for (j = 0; j < KVMPPC_XIVE_Q_COUNT; j++) { |
1260 | if (xc->queues[i].qpage) | 1260 | if (xc->queues[j].qpage) |
1261 | xive_pre_save_queue(xive, &xc->queues[i]); | 1261 | xive_pre_save_queue(xive, &xc->queues[j]); |
1262 | } | 1262 | } |
1263 | } | 1263 | } |
1264 | 1264 | ||
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 3eaac3809977..071b87ee682f 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c | |||
@@ -687,7 +687,7 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) | |||
687 | 687 | ||
688 | kvmppc_core_check_exceptions(vcpu); | 688 | kvmppc_core_check_exceptions(vcpu); |
689 | 689 | ||
690 | if (vcpu->requests) { | 690 | if (kvm_request_pending(vcpu)) { |
691 | /* Exception delivery raised request; start over */ | 691 | /* Exception delivery raised request; start over */ |
692 | return 1; | 692 | return 1; |
693 | } | 693 | } |
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index c873ffe55362..4d8b4d6cebff 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c | |||
@@ -39,7 +39,7 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) | |||
39 | unsigned long dec_nsec; | 39 | unsigned long dec_nsec; |
40 | unsigned long long dec_time; | 40 | unsigned long long dec_time; |
41 | 41 | ||
42 | pr_debug("mtDEC: %x\n", vcpu->arch.dec); | 42 | pr_debug("mtDEC: %lx\n", vcpu->arch.dec); |
43 | hrtimer_try_to_cancel(&vcpu->arch.dec_timer); | 43 | hrtimer_try_to_cancel(&vcpu->arch.dec_timer); |
44 | 44 | ||
45 | #ifdef CONFIG_PPC_BOOK3S | 45 | #ifdef CONFIG_PPC_BOOK3S |
@@ -109,7 +109,7 @@ static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) | |||
109 | case SPRN_TBWU: break; | 109 | case SPRN_TBWU: break; |
110 | 110 | ||
111 | case SPRN_DEC: | 111 | case SPRN_DEC: |
112 | vcpu->arch.dec = spr_val; | 112 | vcpu->arch.dec = (u32) spr_val; |
113 | kvmppc_emulate_dec(vcpu); | 113 | kvmppc_emulate_dec(vcpu); |
114 | break; | 114 | break; |
115 | 115 | ||
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 7f71ab5fcad1..1a75c0b5f4ca 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c | |||
@@ -55,8 +55,7 @@ EXPORT_SYMBOL_GPL(kvmppc_pr_ops); | |||
55 | 55 | ||
56 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) | 56 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) |
57 | { | 57 | { |
58 | return !!(v->arch.pending_exceptions) || | 58 | return !!(v->arch.pending_exceptions) || kvm_request_pending(v); |
59 | v->requests; | ||
60 | } | 59 | } |
61 | 60 | ||
62 | int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) | 61 | int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) |
@@ -108,7 +107,7 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) | |||
108 | */ | 107 | */ |
109 | smp_mb(); | 108 | smp_mb(); |
110 | 109 | ||
111 | if (vcpu->requests) { | 110 | if (kvm_request_pending(vcpu)) { |
112 | /* Make sure we process requests preemptable */ | 111 | /* Make sure we process requests preemptable */ |
113 | local_irq_enable(); | 112 | local_irq_enable(); |
114 | trace_kvm_check_requests(vcpu); | 113 | trace_kvm_check_requests(vcpu); |
@@ -554,13 +553,28 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
554 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE | 553 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE |
555 | case KVM_CAP_PPC_SMT: | 554 | case KVM_CAP_PPC_SMT: |
556 | r = 0; | 555 | r = 0; |
557 | if (hv_enabled) { | 556 | if (kvm) { |
557 | if (kvm->arch.emul_smt_mode > 1) | ||
558 | r = kvm->arch.emul_smt_mode; | ||
559 | else | ||
560 | r = kvm->arch.smt_mode; | ||
561 | } else if (hv_enabled) { | ||
558 | if (cpu_has_feature(CPU_FTR_ARCH_300)) | 562 | if (cpu_has_feature(CPU_FTR_ARCH_300)) |
559 | r = 1; | 563 | r = 1; |
560 | else | 564 | else |
561 | r = threads_per_subcore; | 565 | r = threads_per_subcore; |
562 | } | 566 | } |
563 | break; | 567 | break; |
568 | case KVM_CAP_PPC_SMT_POSSIBLE: | ||
569 | r = 1; | ||
570 | if (hv_enabled) { | ||
571 | if (!cpu_has_feature(CPU_FTR_ARCH_300)) | ||
572 | r = ((threads_per_subcore << 1) - 1); | ||
573 | else | ||
574 | /* P9 can emulate dbells, so allow any mode */ | ||
575 | r = 8 | 4 | 2 | 1; | ||
576 | } | ||
577 | break; | ||
564 | case KVM_CAP_PPC_RMA: | 578 | case KVM_CAP_PPC_RMA: |
565 | r = 0; | 579 | r = 0; |
566 | break; | 580 | break; |
@@ -619,6 +633,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
619 | r = !!hv_enabled && !cpu_has_feature(CPU_FTR_ARCH_300); | 633 | r = !!hv_enabled && !cpu_has_feature(CPU_FTR_ARCH_300); |
620 | break; | 634 | break; |
621 | #endif | 635 | #endif |
636 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE | ||
637 | case KVM_CAP_PPC_FWNMI: | ||
638 | r = hv_enabled; | ||
639 | break; | ||
640 | #endif | ||
622 | case KVM_CAP_PPC_HTM: | 641 | case KVM_CAP_PPC_HTM: |
623 | r = cpu_has_feature(CPU_FTR_TM_COMP) && | 642 | r = cpu_has_feature(CPU_FTR_TM_COMP) && |
624 | is_kvmppc_hv_enabled(kvm); | 643 | is_kvmppc_hv_enabled(kvm); |
@@ -1538,6 +1557,15 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, | |||
1538 | break; | 1557 | break; |
1539 | } | 1558 | } |
1540 | #endif /* CONFIG_KVM_XICS */ | 1559 | #endif /* CONFIG_KVM_XICS */ |
1560 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE | ||
1561 | case KVM_CAP_PPC_FWNMI: | ||
1562 | r = -EINVAL; | ||
1563 | if (!is_kvmppc_hv_enabled(vcpu->kvm)) | ||
1564 | break; | ||
1565 | r = 0; | ||
1566 | vcpu->kvm->arch.fwnmi_enabled = true; | ||
1567 | break; | ||
1568 | #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ | ||
1541 | default: | 1569 | default: |
1542 | r = -EINVAL; | 1570 | r = -EINVAL; |
1543 | break; | 1571 | break; |
@@ -1712,6 +1740,15 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, | |||
1712 | r = 0; | 1740 | r = 0; |
1713 | break; | 1741 | break; |
1714 | } | 1742 | } |
1743 | case KVM_CAP_PPC_SMT: { | ||
1744 | unsigned long mode = cap->args[0]; | ||
1745 | unsigned long flags = cap->args[1]; | ||
1746 | |||
1747 | r = -EINVAL; | ||
1748 | if (kvm->arch.kvm_ops->set_smt_mode) | ||
1749 | r = kvm->arch.kvm_ops->set_smt_mode(kvm, mode, flags); | ||
1750 | break; | ||
1751 | } | ||
1715 | #endif | 1752 | #endif |
1716 | default: | 1753 | default: |
1717 | r = -EINVAL; | 1754 | r = -EINVAL; |
diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h index d0441ad2a990..e508dff92535 100644 --- a/arch/s390/include/asm/ctl_reg.h +++ b/arch/s390/include/asm/ctl_reg.h | |||
@@ -59,7 +59,9 @@ union ctlreg0 { | |||
59 | unsigned long lap : 1; /* Low-address-protection control */ | 59 | unsigned long lap : 1; /* Low-address-protection control */ |
60 | unsigned long : 4; | 60 | unsigned long : 4; |
61 | unsigned long edat : 1; /* Enhanced-DAT-enablement control */ | 61 | unsigned long edat : 1; /* Enhanced-DAT-enablement control */ |
62 | unsigned long : 4; | 62 | unsigned long : 2; |
63 | unsigned long iep : 1; /* Instruction-Execution-Protection */ | ||
64 | unsigned long : 1; | ||
63 | unsigned long afp : 1; /* AFP-register control */ | 65 | unsigned long afp : 1; /* AFP-register control */ |
64 | unsigned long vx : 1; /* Vector enablement control */ | 66 | unsigned long vx : 1; /* Vector enablement control */ |
65 | unsigned long : 7; | 67 | unsigned long : 7; |
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 6baae236f461..a409d5991934 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h | |||
@@ -42,9 +42,11 @@ | |||
42 | #define KVM_HALT_POLL_NS_DEFAULT 80000 | 42 | #define KVM_HALT_POLL_NS_DEFAULT 80000 |
43 | 43 | ||
44 | /* s390-specific vcpu->requests bit members */ | 44 | /* s390-specific vcpu->requests bit members */ |
45 | #define KVM_REQ_ENABLE_IBS 8 | 45 | #define KVM_REQ_ENABLE_IBS KVM_ARCH_REQ(0) |
46 | #define KVM_REQ_DISABLE_IBS 9 | 46 | #define KVM_REQ_DISABLE_IBS KVM_ARCH_REQ(1) |
47 | #define KVM_REQ_ICPT_OPEREXC 10 | 47 | #define KVM_REQ_ICPT_OPEREXC KVM_ARCH_REQ(2) |
48 | #define KVM_REQ_START_MIGRATION KVM_ARCH_REQ(3) | ||
49 | #define KVM_REQ_STOP_MIGRATION KVM_ARCH_REQ(4) | ||
48 | 50 | ||
49 | #define SIGP_CTRL_C 0x80 | 51 | #define SIGP_CTRL_C 0x80 |
50 | #define SIGP_CTRL_SCN_MASK 0x3f | 52 | #define SIGP_CTRL_SCN_MASK 0x3f |
@@ -56,7 +58,7 @@ union bsca_sigp_ctrl { | |||
56 | __u8 r : 1; | 58 | __u8 r : 1; |
57 | __u8 scn : 6; | 59 | __u8 scn : 6; |
58 | }; | 60 | }; |
59 | } __packed; | 61 | }; |
60 | 62 | ||
61 | union esca_sigp_ctrl { | 63 | union esca_sigp_ctrl { |
62 | __u16 value; | 64 | __u16 value; |
@@ -65,14 +67,14 @@ union esca_sigp_ctrl { | |||
65 | __u8 reserved: 7; | 67 | __u8 reserved: 7; |
66 | __u8 scn; | 68 | __u8 scn; |
67 | }; | 69 | }; |
68 | } __packed; | 70 | }; |
69 | 71 | ||
70 | struct esca_entry { | 72 | struct esca_entry { |
71 | union esca_sigp_ctrl sigp_ctrl; | 73 | union esca_sigp_ctrl sigp_ctrl; |
72 | __u16 reserved1[3]; | 74 | __u16 reserved1[3]; |
73 | __u64 sda; | 75 | __u64 sda; |
74 | __u64 reserved2[6]; | 76 | __u64 reserved2[6]; |
75 | } __packed; | 77 | }; |
76 | 78 | ||
77 | struct bsca_entry { | 79 | struct bsca_entry { |
78 | __u8 reserved0; | 80 | __u8 reserved0; |
@@ -80,7 +82,7 @@ struct bsca_entry { | |||
80 | __u16 reserved[3]; | 82 | __u16 reserved[3]; |
81 | __u64 sda; | 83 | __u64 sda; |
82 | __u64 reserved2[2]; | 84 | __u64 reserved2[2]; |
83 | } __attribute__((packed)); | 85 | }; |
84 | 86 | ||
85 | union ipte_control { | 87 | union ipte_control { |
86 | unsigned long val; | 88 | unsigned long val; |
@@ -97,7 +99,7 @@ struct bsca_block { | |||
97 | __u64 mcn; | 99 | __u64 mcn; |
98 | __u64 reserved2; | 100 | __u64 reserved2; |
99 | struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS]; | 101 | struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS]; |
100 | } __attribute__((packed)); | 102 | }; |
101 | 103 | ||
102 | struct esca_block { | 104 | struct esca_block { |
103 | union ipte_control ipte_control; | 105 | union ipte_control ipte_control; |
@@ -105,7 +107,7 @@ struct esca_block { | |||
105 | __u64 mcn[4]; | 107 | __u64 mcn[4]; |
106 | __u64 reserved2[20]; | 108 | __u64 reserved2[20]; |
107 | struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS]; | 109 | struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS]; |
108 | } __packed; | 110 | }; |
109 | 111 | ||
110 | /* | 112 | /* |
111 | * This struct is used to store some machine check info from lowcore | 113 | * This struct is used to store some machine check info from lowcore |
@@ -274,7 +276,7 @@ struct kvm_s390_sie_block { | |||
274 | 276 | ||
275 | struct kvm_s390_itdb { | 277 | struct kvm_s390_itdb { |
276 | __u8 data[256]; | 278 | __u8 data[256]; |
277 | } __packed; | 279 | }; |
278 | 280 | ||
279 | struct sie_page { | 281 | struct sie_page { |
280 | struct kvm_s390_sie_block sie_block; | 282 | struct kvm_s390_sie_block sie_block; |
@@ -282,7 +284,7 @@ struct sie_page { | |||
282 | __u8 reserved218[1000]; /* 0x0218 */ | 284 | __u8 reserved218[1000]; /* 0x0218 */ |
283 | struct kvm_s390_itdb itdb; /* 0x0600 */ | 285 | struct kvm_s390_itdb itdb; /* 0x0600 */ |
284 | __u8 reserved700[2304]; /* 0x0700 */ | 286 | __u8 reserved700[2304]; /* 0x0700 */ |
285 | } __packed; | 287 | }; |
286 | 288 | ||
287 | struct kvm_vcpu_stat { | 289 | struct kvm_vcpu_stat { |
288 | u64 exit_userspace; | 290 | u64 exit_userspace; |
@@ -695,7 +697,7 @@ struct sie_page2 { | |||
695 | __u64 fac_list[S390_ARCH_FAC_LIST_SIZE_U64]; /* 0x0000 */ | 697 | __u64 fac_list[S390_ARCH_FAC_LIST_SIZE_U64]; /* 0x0000 */ |
696 | struct kvm_s390_crypto_cb crycb; /* 0x0800 */ | 698 | struct kvm_s390_crypto_cb crycb; /* 0x0800 */ |
697 | u8 reserved900[0x1000 - 0x900]; /* 0x0900 */ | 699 | u8 reserved900[0x1000 - 0x900]; /* 0x0900 */ |
698 | } __packed; | 700 | }; |
699 | 701 | ||
700 | struct kvm_s390_vsie { | 702 | struct kvm_s390_vsie { |
701 | struct mutex mutex; | 703 | struct mutex mutex; |
@@ -705,6 +707,12 @@ struct kvm_s390_vsie { | |||
705 | struct page *pages[KVM_MAX_VCPUS]; | 707 | struct page *pages[KVM_MAX_VCPUS]; |
706 | }; | 708 | }; |
707 | 709 | ||
710 | struct kvm_s390_migration_state { | ||
711 | unsigned long bitmap_size; /* in bits (number of guest pages) */ | ||
712 | atomic64_t dirty_pages; /* number of dirty pages */ | ||
713 | unsigned long *pgste_bitmap; | ||
714 | }; | ||
715 | |||
708 | struct kvm_arch{ | 716 | struct kvm_arch{ |
709 | void *sca; | 717 | void *sca; |
710 | int use_esca; | 718 | int use_esca; |
@@ -732,6 +740,7 @@ struct kvm_arch{ | |||
732 | struct kvm_s390_crypto crypto; | 740 | struct kvm_s390_crypto crypto; |
733 | struct kvm_s390_vsie vsie; | 741 | struct kvm_s390_vsie vsie; |
734 | u64 epoch; | 742 | u64 epoch; |
743 | struct kvm_s390_migration_state *migration_state; | ||
735 | /* subset of available cpu features enabled by user space */ | 744 | /* subset of available cpu features enabled by user space */ |
736 | DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); | 745 | DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); |
737 | }; | 746 | }; |
diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h index 13623b9991d4..9d91cf3e427f 100644 --- a/arch/s390/include/asm/nmi.h +++ b/arch/s390/include/asm/nmi.h | |||
@@ -26,6 +26,12 @@ | |||
26 | #define MCCK_CODE_PSW_MWP_VALID _BITUL(63 - 20) | 26 | #define MCCK_CODE_PSW_MWP_VALID _BITUL(63 - 20) |
27 | #define MCCK_CODE_PSW_IA_VALID _BITUL(63 - 23) | 27 | #define MCCK_CODE_PSW_IA_VALID _BITUL(63 - 23) |
28 | 28 | ||
29 | #define MCCK_CR14_CR_PENDING_SUB_MASK (1 << 28) | ||
30 | #define MCCK_CR14_RECOVERY_SUB_MASK (1 << 27) | ||
31 | #define MCCK_CR14_DEGRAD_SUB_MASK (1 << 26) | ||
32 | #define MCCK_CR14_EXT_DAMAGE_SUB_MASK (1 << 25) | ||
33 | #define MCCK_CR14_WARN_SUB_MASK (1 << 24) | ||
34 | |||
29 | #ifndef __ASSEMBLY__ | 35 | #ifndef __ASSEMBLY__ |
30 | 36 | ||
31 | union mci { | 37 | union mci { |
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index 3dd2a1d308dd..69d09c39bbcd 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h | |||
@@ -28,6 +28,7 @@ | |||
28 | #define KVM_DEV_FLIC_CLEAR_IO_IRQ 8 | 28 | #define KVM_DEV_FLIC_CLEAR_IO_IRQ 8 |
29 | #define KVM_DEV_FLIC_AISM 9 | 29 | #define KVM_DEV_FLIC_AISM 9 |
30 | #define KVM_DEV_FLIC_AIRQ_INJECT 10 | 30 | #define KVM_DEV_FLIC_AIRQ_INJECT 10 |
31 | #define KVM_DEV_FLIC_AISM_ALL 11 | ||
31 | /* | 32 | /* |
32 | * We can have up to 4*64k pending subchannels + 8 adapter interrupts, | 33 | * We can have up to 4*64k pending subchannels + 8 adapter interrupts, |
33 | * as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts. | 34 | * as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts. |
@@ -53,6 +54,11 @@ struct kvm_s390_ais_req { | |||
53 | __u16 mode; | 54 | __u16 mode; |
54 | }; | 55 | }; |
55 | 56 | ||
57 | struct kvm_s390_ais_all { | ||
58 | __u8 simm; | ||
59 | __u8 nimm; | ||
60 | }; | ||
61 | |||
56 | #define KVM_S390_IO_ADAPTER_MASK 1 | 62 | #define KVM_S390_IO_ADAPTER_MASK 1 |
57 | #define KVM_S390_IO_ADAPTER_MAP 2 | 63 | #define KVM_S390_IO_ADAPTER_MAP 2 |
58 | #define KVM_S390_IO_ADAPTER_UNMAP 3 | 64 | #define KVM_S390_IO_ADAPTER_UNMAP 3 |
@@ -70,6 +76,7 @@ struct kvm_s390_io_adapter_req { | |||
70 | #define KVM_S390_VM_TOD 1 | 76 | #define KVM_S390_VM_TOD 1 |
71 | #define KVM_S390_VM_CRYPTO 2 | 77 | #define KVM_S390_VM_CRYPTO 2 |
72 | #define KVM_S390_VM_CPU_MODEL 3 | 78 | #define KVM_S390_VM_CPU_MODEL 3 |
79 | #define KVM_S390_VM_MIGRATION 4 | ||
73 | 80 | ||
74 | /* kvm attributes for mem_ctrl */ | 81 | /* kvm attributes for mem_ctrl */ |
75 | #define KVM_S390_VM_MEM_ENABLE_CMMA 0 | 82 | #define KVM_S390_VM_MEM_ENABLE_CMMA 0 |
@@ -151,6 +158,11 @@ struct kvm_s390_vm_cpu_subfunc { | |||
151 | #define KVM_S390_VM_CRYPTO_DISABLE_AES_KW 2 | 158 | #define KVM_S390_VM_CRYPTO_DISABLE_AES_KW 2 |
152 | #define KVM_S390_VM_CRYPTO_DISABLE_DEA_KW 3 | 159 | #define KVM_S390_VM_CRYPTO_DISABLE_DEA_KW 3 |
153 | 160 | ||
161 | /* kvm attributes for migration mode */ | ||
162 | #define KVM_S390_VM_MIGRATION_STOP 0 | ||
163 | #define KVM_S390_VM_MIGRATION_START 1 | ||
164 | #define KVM_S390_VM_MIGRATION_STATUS 2 | ||
165 | |||
154 | /* for KVM_GET_REGS and KVM_SET_REGS */ | 166 | /* for KVM_GET_REGS and KVM_SET_REGS */ |
155 | struct kvm_regs { | 167 | struct kvm_regs { |
156 | /* general purpose regs for s390 */ | 168 | /* general purpose regs for s390 */ |
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 875f8bea8c67..653cae5e1ee1 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c | |||
@@ -89,7 +89,7 @@ struct region3_table_entry_fc1 { | |||
89 | unsigned long f : 1; /* Fetch-Protection Bit */ | 89 | unsigned long f : 1; /* Fetch-Protection Bit */ |
90 | unsigned long fc : 1; /* Format-Control */ | 90 | unsigned long fc : 1; /* Format-Control */ |
91 | unsigned long p : 1; /* DAT-Protection Bit */ | 91 | unsigned long p : 1; /* DAT-Protection Bit */ |
92 | unsigned long co : 1; /* Change-Recording Override */ | 92 | unsigned long iep: 1; /* Instruction-Execution-Protection */ |
93 | unsigned long : 2; | 93 | unsigned long : 2; |
94 | unsigned long i : 1; /* Region-Invalid Bit */ | 94 | unsigned long i : 1; /* Region-Invalid Bit */ |
95 | unsigned long cr : 1; /* Common-Region Bit */ | 95 | unsigned long cr : 1; /* Common-Region Bit */ |
@@ -131,7 +131,7 @@ struct segment_entry_fc1 { | |||
131 | unsigned long f : 1; /* Fetch-Protection Bit */ | 131 | unsigned long f : 1; /* Fetch-Protection Bit */ |
132 | unsigned long fc : 1; /* Format-Control */ | 132 | unsigned long fc : 1; /* Format-Control */ |
133 | unsigned long p : 1; /* DAT-Protection Bit */ | 133 | unsigned long p : 1; /* DAT-Protection Bit */ |
134 | unsigned long co : 1; /* Change-Recording Override */ | 134 | unsigned long iep: 1; /* Instruction-Execution-Protection */ |
135 | unsigned long : 2; | 135 | unsigned long : 2; |
136 | unsigned long i : 1; /* Segment-Invalid Bit */ | 136 | unsigned long i : 1; /* Segment-Invalid Bit */ |
137 | unsigned long cs : 1; /* Common-Segment Bit */ | 137 | unsigned long cs : 1; /* Common-Segment Bit */ |
@@ -168,7 +168,8 @@ union page_table_entry { | |||
168 | unsigned long z : 1; /* Zero Bit */ | 168 | unsigned long z : 1; /* Zero Bit */ |
169 | unsigned long i : 1; /* Page-Invalid Bit */ | 169 | unsigned long i : 1; /* Page-Invalid Bit */ |
170 | unsigned long p : 1; /* DAT-Protection Bit */ | 170 | unsigned long p : 1; /* DAT-Protection Bit */ |
171 | unsigned long : 9; | 171 | unsigned long iep: 1; /* Instruction-Execution-Protection */ |
172 | unsigned long : 8; | ||
172 | }; | 173 | }; |
173 | }; | 174 | }; |
174 | 175 | ||
@@ -241,7 +242,7 @@ struct ale { | |||
241 | unsigned long asteo : 25; /* ASN-Second-Table-Entry Origin */ | 242 | unsigned long asteo : 25; /* ASN-Second-Table-Entry Origin */ |
242 | unsigned long : 6; | 243 | unsigned long : 6; |
243 | unsigned long astesn : 32; /* ASTE Sequence Number */ | 244 | unsigned long astesn : 32; /* ASTE Sequence Number */ |
244 | } __packed; | 245 | }; |
245 | 246 | ||
246 | struct aste { | 247 | struct aste { |
247 | unsigned long i : 1; /* ASX-Invalid Bit */ | 248 | unsigned long i : 1; /* ASX-Invalid Bit */ |
@@ -257,7 +258,7 @@ struct aste { | |||
257 | unsigned long ald : 32; | 258 | unsigned long ald : 32; |
258 | unsigned long astesn : 32; | 259 | unsigned long astesn : 32; |
259 | /* .. more fields there */ | 260 | /* .. more fields there */ |
260 | } __packed; | 261 | }; |
261 | 262 | ||
262 | int ipte_lock_held(struct kvm_vcpu *vcpu) | 263 | int ipte_lock_held(struct kvm_vcpu *vcpu) |
263 | { | 264 | { |
@@ -485,6 +486,7 @@ enum prot_type { | |||
485 | PROT_TYPE_KEYC = 1, | 486 | PROT_TYPE_KEYC = 1, |
486 | PROT_TYPE_ALC = 2, | 487 | PROT_TYPE_ALC = 2, |
487 | PROT_TYPE_DAT = 3, | 488 | PROT_TYPE_DAT = 3, |
489 | PROT_TYPE_IEP = 4, | ||
488 | }; | 490 | }; |
489 | 491 | ||
490 | static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva, | 492 | static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva, |
@@ -500,6 +502,9 @@ static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva, | |||
500 | switch (code) { | 502 | switch (code) { |
501 | case PGM_PROTECTION: | 503 | case PGM_PROTECTION: |
502 | switch (prot) { | 504 | switch (prot) { |
505 | case PROT_TYPE_IEP: | ||
506 | tec->b61 = 1; | ||
507 | /* FALL THROUGH */ | ||
503 | case PROT_TYPE_LA: | 508 | case PROT_TYPE_LA: |
504 | tec->b56 = 1; | 509 | tec->b56 = 1; |
505 | break; | 510 | break; |
@@ -591,6 +596,7 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val) | |||
591 | * @gpa: points to where guest physical (absolute) address should be stored | 596 | * @gpa: points to where guest physical (absolute) address should be stored |
592 | * @asce: effective asce | 597 | * @asce: effective asce |
593 | * @mode: indicates the access mode to be used | 598 | * @mode: indicates the access mode to be used |
599 | * @prot: returns the type for protection exceptions | ||
594 | * | 600 | * |
595 | * Translate a guest virtual address into a guest absolute address by means | 601 | * Translate a guest virtual address into a guest absolute address by means |
596 | * of dynamic address translation as specified by the architecture. | 602 | * of dynamic address translation as specified by the architecture. |
@@ -606,19 +612,21 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val) | |||
606 | */ | 612 | */ |
607 | static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, | 613 | static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, |
608 | unsigned long *gpa, const union asce asce, | 614 | unsigned long *gpa, const union asce asce, |
609 | enum gacc_mode mode) | 615 | enum gacc_mode mode, enum prot_type *prot) |
610 | { | 616 | { |
611 | union vaddress vaddr = {.addr = gva}; | 617 | union vaddress vaddr = {.addr = gva}; |
612 | union raddress raddr = {.addr = gva}; | 618 | union raddress raddr = {.addr = gva}; |
613 | union page_table_entry pte; | 619 | union page_table_entry pte; |
614 | int dat_protection = 0; | 620 | int dat_protection = 0; |
621 | int iep_protection = 0; | ||
615 | union ctlreg0 ctlreg0; | 622 | union ctlreg0 ctlreg0; |
616 | unsigned long ptr; | 623 | unsigned long ptr; |
617 | int edat1, edat2; | 624 | int edat1, edat2, iep; |
618 | 625 | ||
619 | ctlreg0.val = vcpu->arch.sie_block->gcr[0]; | 626 | ctlreg0.val = vcpu->arch.sie_block->gcr[0]; |
620 | edat1 = ctlreg0.edat && test_kvm_facility(vcpu->kvm, 8); | 627 | edat1 = ctlreg0.edat && test_kvm_facility(vcpu->kvm, 8); |
621 | edat2 = edat1 && test_kvm_facility(vcpu->kvm, 78); | 628 | edat2 = edat1 && test_kvm_facility(vcpu->kvm, 78); |
629 | iep = ctlreg0.iep && test_kvm_facility(vcpu->kvm, 130); | ||
622 | if (asce.r) | 630 | if (asce.r) |
623 | goto real_address; | 631 | goto real_address; |
624 | ptr = asce.origin * 4096; | 632 | ptr = asce.origin * 4096; |
@@ -702,6 +710,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, | |||
702 | return PGM_TRANSLATION_SPEC; | 710 | return PGM_TRANSLATION_SPEC; |
703 | if (rtte.fc && edat2) { | 711 | if (rtte.fc && edat2) { |
704 | dat_protection |= rtte.fc1.p; | 712 | dat_protection |= rtte.fc1.p; |
713 | iep_protection = rtte.fc1.iep; | ||
705 | raddr.rfaa = rtte.fc1.rfaa; | 714 | raddr.rfaa = rtte.fc1.rfaa; |
706 | goto absolute_address; | 715 | goto absolute_address; |
707 | } | 716 | } |
@@ -729,6 +738,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, | |||
729 | return PGM_TRANSLATION_SPEC; | 738 | return PGM_TRANSLATION_SPEC; |
730 | if (ste.fc && edat1) { | 739 | if (ste.fc && edat1) { |
731 | dat_protection |= ste.fc1.p; | 740 | dat_protection |= ste.fc1.p; |
741 | iep_protection = ste.fc1.iep; | ||
732 | raddr.sfaa = ste.fc1.sfaa; | 742 | raddr.sfaa = ste.fc1.sfaa; |
733 | goto absolute_address; | 743 | goto absolute_address; |
734 | } | 744 | } |
@@ -745,12 +755,19 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, | |||
745 | if (pte.z) | 755 | if (pte.z) |
746 | return PGM_TRANSLATION_SPEC; | 756 | return PGM_TRANSLATION_SPEC; |
747 | dat_protection |= pte.p; | 757 | dat_protection |= pte.p; |
758 | iep_protection = pte.iep; | ||
748 | raddr.pfra = pte.pfra; | 759 | raddr.pfra = pte.pfra; |
749 | real_address: | 760 | real_address: |
750 | raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr); | 761 | raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr); |
751 | absolute_address: | 762 | absolute_address: |
752 | if (mode == GACC_STORE && dat_protection) | 763 | if (mode == GACC_STORE && dat_protection) { |
764 | *prot = PROT_TYPE_DAT; | ||
753 | return PGM_PROTECTION; | 765 | return PGM_PROTECTION; |
766 | } | ||
767 | if (mode == GACC_IFETCH && iep_protection && iep) { | ||
768 | *prot = PROT_TYPE_IEP; | ||
769 | return PGM_PROTECTION; | ||
770 | } | ||
754 | if (kvm_is_error_gpa(vcpu->kvm, raddr.addr)) | 771 | if (kvm_is_error_gpa(vcpu->kvm, raddr.addr)) |
755 | return PGM_ADDRESSING; | 772 | return PGM_ADDRESSING; |
756 | *gpa = raddr.addr; | 773 | *gpa = raddr.addr; |
@@ -782,6 +799,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, | |||
782 | { | 799 | { |
783 | psw_t *psw = &vcpu->arch.sie_block->gpsw; | 800 | psw_t *psw = &vcpu->arch.sie_block->gpsw; |
784 | int lap_enabled, rc = 0; | 801 | int lap_enabled, rc = 0; |
802 | enum prot_type prot; | ||
785 | 803 | ||
786 | lap_enabled = low_address_protection_enabled(vcpu, asce); | 804 | lap_enabled = low_address_protection_enabled(vcpu, asce); |
787 | while (nr_pages) { | 805 | while (nr_pages) { |
@@ -791,7 +809,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, | |||
791 | PROT_TYPE_LA); | 809 | PROT_TYPE_LA); |
792 | ga &= PAGE_MASK; | 810 | ga &= PAGE_MASK; |
793 | if (psw_bits(*psw).dat) { | 811 | if (psw_bits(*psw).dat) { |
794 | rc = guest_translate(vcpu, ga, pages, asce, mode); | 812 | rc = guest_translate(vcpu, ga, pages, asce, mode, &prot); |
795 | if (rc < 0) | 813 | if (rc < 0) |
796 | return rc; | 814 | return rc; |
797 | } else { | 815 | } else { |
@@ -800,7 +818,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, | |||
800 | rc = PGM_ADDRESSING; | 818 | rc = PGM_ADDRESSING; |
801 | } | 819 | } |
802 | if (rc) | 820 | if (rc) |
803 | return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_DAT); | 821 | return trans_exc(vcpu, rc, ga, ar, mode, prot); |
804 | ga += PAGE_SIZE; | 822 | ga += PAGE_SIZE; |
805 | pages++; | 823 | pages++; |
806 | nr_pages--; | 824 | nr_pages--; |
@@ -886,6 +904,7 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, | |||
886 | unsigned long *gpa, enum gacc_mode mode) | 904 | unsigned long *gpa, enum gacc_mode mode) |
887 | { | 905 | { |
888 | psw_t *psw = &vcpu->arch.sie_block->gpsw; | 906 | psw_t *psw = &vcpu->arch.sie_block->gpsw; |
907 | enum prot_type prot; | ||
889 | union asce asce; | 908 | union asce asce; |
890 | int rc; | 909 | int rc; |
891 | 910 | ||
@@ -900,9 +919,9 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, | |||
900 | } | 919 | } |
901 | 920 | ||
902 | if (psw_bits(*psw).dat && !asce.r) { /* Use DAT? */ | 921 | if (psw_bits(*psw).dat && !asce.r) { /* Use DAT? */ |
903 | rc = guest_translate(vcpu, gva, gpa, asce, mode); | 922 | rc = guest_translate(vcpu, gva, gpa, asce, mode, &prot); |
904 | if (rc > 0) | 923 | if (rc > 0) |
905 | return trans_exc(vcpu, rc, gva, 0, mode, PROT_TYPE_DAT); | 924 | return trans_exc(vcpu, rc, gva, 0, mode, prot); |
906 | } else { | 925 | } else { |
907 | *gpa = kvm_s390_real_to_abs(vcpu, gva); | 926 | *gpa = kvm_s390_real_to_abs(vcpu, gva); |
908 | if (kvm_is_error_gpa(vcpu->kvm, *gpa)) | 927 | if (kvm_is_error_gpa(vcpu->kvm, *gpa)) |
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 2d120fef7d90..a619ddae610d 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c | |||
@@ -251,8 +251,13 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu) | |||
251 | __clear_bit(IRQ_PEND_EXT_SERVICE, &active_mask); | 251 | __clear_bit(IRQ_PEND_EXT_SERVICE, &active_mask); |
252 | if (psw_mchk_disabled(vcpu)) | 252 | if (psw_mchk_disabled(vcpu)) |
253 | active_mask &= ~IRQ_PEND_MCHK_MASK; | 253 | active_mask &= ~IRQ_PEND_MCHK_MASK; |
254 | /* | ||
255 | * Check both floating and local interrupt's cr14 because | ||
256 | * bit IRQ_PEND_MCHK_REP could be set in both cases. | ||
257 | */ | ||
254 | if (!(vcpu->arch.sie_block->gcr[14] & | 258 | if (!(vcpu->arch.sie_block->gcr[14] & |
255 | vcpu->kvm->arch.float_int.mchk.cr14)) | 259 | (vcpu->kvm->arch.float_int.mchk.cr14 | |
260 | vcpu->arch.local_int.irq.mchk.cr14))) | ||
256 | __clear_bit(IRQ_PEND_MCHK_REP, &active_mask); | 261 | __clear_bit(IRQ_PEND_MCHK_REP, &active_mask); |
257 | 262 | ||
258 | /* | 263 | /* |
@@ -1876,6 +1881,28 @@ out: | |||
1876 | return ret < 0 ? ret : n; | 1881 | return ret < 0 ? ret : n; |
1877 | } | 1882 | } |
1878 | 1883 | ||
1884 | static int flic_ais_mode_get_all(struct kvm *kvm, struct kvm_device_attr *attr) | ||
1885 | { | ||
1886 | struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; | ||
1887 | struct kvm_s390_ais_all ais; | ||
1888 | |||
1889 | if (attr->attr < sizeof(ais)) | ||
1890 | return -EINVAL; | ||
1891 | |||
1892 | if (!test_kvm_facility(kvm, 72)) | ||
1893 | return -ENOTSUPP; | ||
1894 | |||
1895 | mutex_lock(&fi->ais_lock); | ||
1896 | ais.simm = fi->simm; | ||
1897 | ais.nimm = fi->nimm; | ||
1898 | mutex_unlock(&fi->ais_lock); | ||
1899 | |||
1900 | if (copy_to_user((void __user *)attr->addr, &ais, sizeof(ais))) | ||
1901 | return -EFAULT; | ||
1902 | |||
1903 | return 0; | ||
1904 | } | ||
1905 | |||
1879 | static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) | 1906 | static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) |
1880 | { | 1907 | { |
1881 | int r; | 1908 | int r; |
@@ -1885,6 +1912,9 @@ static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) | |||
1885 | r = get_all_floating_irqs(dev->kvm, (u8 __user *) attr->addr, | 1912 | r = get_all_floating_irqs(dev->kvm, (u8 __user *) attr->addr, |
1886 | attr->attr); | 1913 | attr->attr); |
1887 | break; | 1914 | break; |
1915 | case KVM_DEV_FLIC_AISM_ALL: | ||
1916 | r = flic_ais_mode_get_all(dev->kvm, attr); | ||
1917 | break; | ||
1888 | default: | 1918 | default: |
1889 | r = -EINVAL; | 1919 | r = -EINVAL; |
1890 | } | 1920 | } |
@@ -2235,6 +2265,25 @@ static int flic_inject_airq(struct kvm *kvm, struct kvm_device_attr *attr) | |||
2235 | return kvm_s390_inject_airq(kvm, adapter); | 2265 | return kvm_s390_inject_airq(kvm, adapter); |
2236 | } | 2266 | } |
2237 | 2267 | ||
2268 | static int flic_ais_mode_set_all(struct kvm *kvm, struct kvm_device_attr *attr) | ||
2269 | { | ||
2270 | struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; | ||
2271 | struct kvm_s390_ais_all ais; | ||
2272 | |||
2273 | if (!test_kvm_facility(kvm, 72)) | ||
2274 | return -ENOTSUPP; | ||
2275 | |||
2276 | if (copy_from_user(&ais, (void __user *)attr->addr, sizeof(ais))) | ||
2277 | return -EFAULT; | ||
2278 | |||
2279 | mutex_lock(&fi->ais_lock); | ||
2280 | fi->simm = ais.simm; | ||
2281 | fi->nimm = ais.nimm; | ||
2282 | mutex_unlock(&fi->ais_lock); | ||
2283 | |||
2284 | return 0; | ||
2285 | } | ||
2286 | |||
2238 | static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) | 2287 | static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) |
2239 | { | 2288 | { |
2240 | int r = 0; | 2289 | int r = 0; |
@@ -2277,6 +2326,9 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) | |||
2277 | case KVM_DEV_FLIC_AIRQ_INJECT: | 2326 | case KVM_DEV_FLIC_AIRQ_INJECT: |
2278 | r = flic_inject_airq(dev->kvm, attr); | 2327 | r = flic_inject_airq(dev->kvm, attr); |
2279 | break; | 2328 | break; |
2329 | case KVM_DEV_FLIC_AISM_ALL: | ||
2330 | r = flic_ais_mode_set_all(dev->kvm, attr); | ||
2331 | break; | ||
2280 | default: | 2332 | default: |
2281 | r = -EINVAL; | 2333 | r = -EINVAL; |
2282 | } | 2334 | } |
@@ -2298,6 +2350,7 @@ static int flic_has_attr(struct kvm_device *dev, | |||
2298 | case KVM_DEV_FLIC_CLEAR_IO_IRQ: | 2350 | case KVM_DEV_FLIC_CLEAR_IO_IRQ: |
2299 | case KVM_DEV_FLIC_AISM: | 2351 | case KVM_DEV_FLIC_AISM: |
2300 | case KVM_DEV_FLIC_AIRQ_INJECT: | 2352 | case KVM_DEV_FLIC_AIRQ_INJECT: |
2353 | case KVM_DEV_FLIC_AISM_ALL: | ||
2301 | return 0; | 2354 | return 0; |
2302 | } | 2355 | } |
2303 | return -ENXIO; | 2356 | return -ENXIO; |
@@ -2415,6 +2468,42 @@ static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e, | |||
2415 | return ret; | 2468 | return ret; |
2416 | } | 2469 | } |
2417 | 2470 | ||
2471 | /* | ||
2472 | * Inject the machine check to the guest. | ||
2473 | */ | ||
2474 | void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu, | ||
2475 | struct mcck_volatile_info *mcck_info) | ||
2476 | { | ||
2477 | struct kvm_s390_interrupt_info inti; | ||
2478 | struct kvm_s390_irq irq; | ||
2479 | struct kvm_s390_mchk_info *mchk; | ||
2480 | union mci mci; | ||
2481 | __u64 cr14 = 0; /* upper bits are not used */ | ||
2482 | |||
2483 | mci.val = mcck_info->mcic; | ||
2484 | if (mci.sr) | ||
2485 | cr14 |= MCCK_CR14_RECOVERY_SUB_MASK; | ||
2486 | if (mci.dg) | ||
2487 | cr14 |= MCCK_CR14_DEGRAD_SUB_MASK; | ||
2488 | if (mci.w) | ||
2489 | cr14 |= MCCK_CR14_WARN_SUB_MASK; | ||
2490 | |||
2491 | mchk = mci.ck ? &inti.mchk : &irq.u.mchk; | ||
2492 | mchk->cr14 = cr14; | ||
2493 | mchk->mcic = mcck_info->mcic; | ||
2494 | mchk->ext_damage_code = mcck_info->ext_damage_code; | ||
2495 | mchk->failing_storage_address = mcck_info->failing_storage_address; | ||
2496 | if (mci.ck) { | ||
2497 | /* Inject the floating machine check */ | ||
2498 | inti.type = KVM_S390_MCHK; | ||
2499 | WARN_ON_ONCE(__inject_vm(vcpu->kvm, &inti)); | ||
2500 | } else { | ||
2501 | /* Inject the machine check to specified vcpu */ | ||
2502 | irq.type = KVM_S390_MCHK; | ||
2503 | WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq)); | ||
2504 | } | ||
2505 | } | ||
2506 | |||
2418 | int kvm_set_routing_entry(struct kvm *kvm, | 2507 | int kvm_set_routing_entry(struct kvm *kvm, |
2419 | struct kvm_kernel_irq_routing_entry *e, | 2508 | struct kvm_kernel_irq_routing_entry *e, |
2420 | const struct kvm_irq_routing_entry *ue) | 2509 | const struct kvm_irq_routing_entry *ue) |
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index b0d7de5a533d..3f2884e99ed4 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/vmalloc.h> | 30 | #include <linux/vmalloc.h> |
31 | #include <linux/bitmap.h> | 31 | #include <linux/bitmap.h> |
32 | #include <linux/sched/signal.h> | 32 | #include <linux/sched/signal.h> |
33 | #include <linux/string.h> | ||
33 | 34 | ||
34 | #include <asm/asm-offsets.h> | 35 | #include <asm/asm-offsets.h> |
35 | #include <asm/lowcore.h> | 36 | #include <asm/lowcore.h> |
@@ -386,6 +387,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
386 | case KVM_CAP_S390_SKEYS: | 387 | case KVM_CAP_S390_SKEYS: |
387 | case KVM_CAP_S390_IRQ_STATE: | 388 | case KVM_CAP_S390_IRQ_STATE: |
388 | case KVM_CAP_S390_USER_INSTR0: | 389 | case KVM_CAP_S390_USER_INSTR0: |
390 | case KVM_CAP_S390_CMMA_MIGRATION: | ||
389 | case KVM_CAP_S390_AIS: | 391 | case KVM_CAP_S390_AIS: |
390 | r = 1; | 392 | r = 1; |
391 | break; | 393 | break; |
@@ -749,6 +751,129 @@ static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr) | |||
749 | return 0; | 751 | return 0; |
750 | } | 752 | } |
751 | 753 | ||
754 | static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req) | ||
755 | { | ||
756 | int cx; | ||
757 | struct kvm_vcpu *vcpu; | ||
758 | |||
759 | kvm_for_each_vcpu(cx, vcpu, kvm) | ||
760 | kvm_s390_sync_request(req, vcpu); | ||
761 | } | ||
762 | |||
763 | /* | ||
764 | * Must be called with kvm->srcu held to avoid races on memslots, and with | ||
765 | * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration. | ||
766 | */ | ||
767 | static int kvm_s390_vm_start_migration(struct kvm *kvm) | ||
768 | { | ||
769 | struct kvm_s390_migration_state *mgs; | ||
770 | struct kvm_memory_slot *ms; | ||
771 | /* should be the only one */ | ||
772 | struct kvm_memslots *slots; | ||
773 | unsigned long ram_pages; | ||
774 | int slotnr; | ||
775 | |||
776 | /* migration mode already enabled */ | ||
777 | if (kvm->arch.migration_state) | ||
778 | return 0; | ||
779 | |||
780 | slots = kvm_memslots(kvm); | ||
781 | if (!slots || !slots->used_slots) | ||
782 | return -EINVAL; | ||
783 | |||
784 | mgs = kzalloc(sizeof(*mgs), GFP_KERNEL); | ||
785 | if (!mgs) | ||
786 | return -ENOMEM; | ||
787 | kvm->arch.migration_state = mgs; | ||
788 | |||
789 | if (kvm->arch.use_cmma) { | ||
790 | /* | ||
791 | * Get the last slot. They should be sorted by base_gfn, so the | ||
792 | * last slot is also the one at the end of the address space. | ||
793 | * We have verified above that at least one slot is present. | ||
794 | */ | ||
795 | ms = slots->memslots + slots->used_slots - 1; | ||
796 | /* round up so we only use full longs */ | ||
797 | ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG); | ||
798 | /* allocate enough bytes to store all the bits */ | ||
799 | mgs->pgste_bitmap = vmalloc(ram_pages / 8); | ||
800 | if (!mgs->pgste_bitmap) { | ||
801 | kfree(mgs); | ||
802 | kvm->arch.migration_state = NULL; | ||
803 | return -ENOMEM; | ||
804 | } | ||
805 | |||
806 | mgs->bitmap_size = ram_pages; | ||
807 | atomic64_set(&mgs->dirty_pages, ram_pages); | ||
808 | /* mark all the pages in active slots as dirty */ | ||
809 | for (slotnr = 0; slotnr < slots->used_slots; slotnr++) { | ||
810 | ms = slots->memslots + slotnr; | ||
811 | bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages); | ||
812 | } | ||
813 | |||
814 | kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION); | ||
815 | } | ||
816 | return 0; | ||
817 | } | ||
818 | |||
819 | /* | ||
820 | * Must be called with kvm->lock to avoid races with ourselves and | ||
821 | * kvm_s390_vm_start_migration. | ||
822 | */ | ||
823 | static int kvm_s390_vm_stop_migration(struct kvm *kvm) | ||
824 | { | ||
825 | struct kvm_s390_migration_state *mgs; | ||
826 | |||
827 | /* migration mode already disabled */ | ||
828 | if (!kvm->arch.migration_state) | ||
829 | return 0; | ||
830 | mgs = kvm->arch.migration_state; | ||
831 | kvm->arch.migration_state = NULL; | ||
832 | |||
833 | if (kvm->arch.use_cmma) { | ||
834 | kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION); | ||
835 | vfree(mgs->pgste_bitmap); | ||
836 | } | ||
837 | kfree(mgs); | ||
838 | return 0; | ||
839 | } | ||
840 | |||
841 | static int kvm_s390_vm_set_migration(struct kvm *kvm, | ||
842 | struct kvm_device_attr *attr) | ||
843 | { | ||
844 | int idx, res = -ENXIO; | ||
845 | |||
846 | mutex_lock(&kvm->lock); | ||
847 | switch (attr->attr) { | ||
848 | case KVM_S390_VM_MIGRATION_START: | ||
849 | idx = srcu_read_lock(&kvm->srcu); | ||
850 | res = kvm_s390_vm_start_migration(kvm); | ||
851 | srcu_read_unlock(&kvm->srcu, idx); | ||
852 | break; | ||
853 | case KVM_S390_VM_MIGRATION_STOP: | ||
854 | res = kvm_s390_vm_stop_migration(kvm); | ||
855 | break; | ||
856 | default: | ||
857 | break; | ||
858 | } | ||
859 | mutex_unlock(&kvm->lock); | ||
860 | |||
861 | return res; | ||
862 | } | ||
863 | |||
864 | static int kvm_s390_vm_get_migration(struct kvm *kvm, | ||
865 | struct kvm_device_attr *attr) | ||
866 | { | ||
867 | u64 mig = (kvm->arch.migration_state != NULL); | ||
868 | |||
869 | if (attr->attr != KVM_S390_VM_MIGRATION_STATUS) | ||
870 | return -ENXIO; | ||
871 | |||
872 | if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig))) | ||
873 | return -EFAULT; | ||
874 | return 0; | ||
875 | } | ||
876 | |||
752 | static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) | 877 | static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) |
753 | { | 878 | { |
754 | u8 gtod_high; | 879 | u8 gtod_high; |
@@ -1089,6 +1214,9 @@ static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) | |||
1089 | case KVM_S390_VM_CRYPTO: | 1214 | case KVM_S390_VM_CRYPTO: |
1090 | ret = kvm_s390_vm_set_crypto(kvm, attr); | 1215 | ret = kvm_s390_vm_set_crypto(kvm, attr); |
1091 | break; | 1216 | break; |
1217 | case KVM_S390_VM_MIGRATION: | ||
1218 | ret = kvm_s390_vm_set_migration(kvm, attr); | ||
1219 | break; | ||
1092 | default: | 1220 | default: |
1093 | ret = -ENXIO; | 1221 | ret = -ENXIO; |
1094 | break; | 1222 | break; |
@@ -1111,6 +1239,9 @@ static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr) | |||
1111 | case KVM_S390_VM_CPU_MODEL: | 1239 | case KVM_S390_VM_CPU_MODEL: |
1112 | ret = kvm_s390_get_cpu_model(kvm, attr); | 1240 | ret = kvm_s390_get_cpu_model(kvm, attr); |
1113 | break; | 1241 | break; |
1242 | case KVM_S390_VM_MIGRATION: | ||
1243 | ret = kvm_s390_vm_get_migration(kvm, attr); | ||
1244 | break; | ||
1114 | default: | 1245 | default: |
1115 | ret = -ENXIO; | 1246 | ret = -ENXIO; |
1116 | break; | 1247 | break; |
@@ -1178,6 +1309,9 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) | |||
1178 | break; | 1309 | break; |
1179 | } | 1310 | } |
1180 | break; | 1311 | break; |
1312 | case KVM_S390_VM_MIGRATION: | ||
1313 | ret = 0; | ||
1314 | break; | ||
1181 | default: | 1315 | default: |
1182 | ret = -ENXIO; | 1316 | ret = -ENXIO; |
1183 | break; | 1317 | break; |
@@ -1285,6 +1419,182 @@ out: | |||
1285 | return r; | 1419 | return r; |
1286 | } | 1420 | } |
1287 | 1421 | ||
1422 | /* | ||
1423 | * Base address and length must be sent at the start of each block, therefore | ||
1424 | * it's cheaper to send some clean data, as long as it's less than the size of | ||
1425 | * two longs. | ||
1426 | */ | ||
1427 | #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *)) | ||
1428 | /* for consistency */ | ||
1429 | #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX) | ||
1430 | |||
1431 | /* | ||
1432 | * This function searches for the next page with dirty CMMA attributes, and | ||
1433 | * saves the attributes in the buffer up to either the end of the buffer or | ||
1434 | * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found; | ||
1435 | * no trailing clean bytes are saved. | ||
1436 | * In case no dirty bits were found, or if CMMA was not enabled or used, the | ||
1437 | * output buffer will indicate 0 as length. | ||
1438 | */ | ||
1439 | static int kvm_s390_get_cmma_bits(struct kvm *kvm, | ||
1440 | struct kvm_s390_cmma_log *args) | ||
1441 | { | ||
1442 | struct kvm_s390_migration_state *s = kvm->arch.migration_state; | ||
1443 | unsigned long bufsize, hva, pgstev, i, next, cur; | ||
1444 | int srcu_idx, peek, r = 0, rr; | ||
1445 | u8 *res; | ||
1446 | |||
1447 | cur = args->start_gfn; | ||
1448 | i = next = pgstev = 0; | ||
1449 | |||
1450 | if (unlikely(!kvm->arch.use_cmma)) | ||
1451 | return -ENXIO; | ||
1452 | /* Invalid/unsupported flags were specified */ | ||
1453 | if (args->flags & ~KVM_S390_CMMA_PEEK) | ||
1454 | return -EINVAL; | ||
1455 | /* Migration mode query, and we are not doing a migration */ | ||
1456 | peek = !!(args->flags & KVM_S390_CMMA_PEEK); | ||
1457 | if (!peek && !s) | ||
1458 | return -EINVAL; | ||
1459 | /* CMMA is disabled or was not used, or the buffer has length zero */ | ||
1460 | bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX); | ||
1461 | if (!bufsize || !kvm->mm->context.use_cmma) { | ||
1462 | memset(args, 0, sizeof(*args)); | ||
1463 | return 0; | ||
1464 | } | ||
1465 | |||
1466 | if (!peek) { | ||
1467 | /* We are not peeking, and there are no dirty pages */ | ||
1468 | if (!atomic64_read(&s->dirty_pages)) { | ||
1469 | memset(args, 0, sizeof(*args)); | ||
1470 | return 0; | ||
1471 | } | ||
1472 | cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, | ||
1473 | args->start_gfn); | ||
1474 | if (cur >= s->bitmap_size) /* nothing found, loop back */ | ||
1475 | cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0); | ||
1476 | if (cur >= s->bitmap_size) { /* again! (very unlikely) */ | ||
1477 | memset(args, 0, sizeof(*args)); | ||
1478 | return 0; | ||
1479 | } | ||
1480 | next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1); | ||
1481 | } | ||
1482 | |||
1483 | res = vmalloc(bufsize); | ||
1484 | if (!res) | ||
1485 | return -ENOMEM; | ||
1486 | |||
1487 | args->start_gfn = cur; | ||
1488 | |||
1489 | down_read(&kvm->mm->mmap_sem); | ||
1490 | srcu_idx = srcu_read_lock(&kvm->srcu); | ||
1491 | while (i < bufsize) { | ||
1492 | hva = gfn_to_hva(kvm, cur); | ||
1493 | if (kvm_is_error_hva(hva)) { | ||
1494 | r = -EFAULT; | ||
1495 | break; | ||
1496 | } | ||
1497 | /* decrement only if we actually flipped the bit to 0 */ | ||
1498 | if (!peek && test_and_clear_bit(cur, s->pgste_bitmap)) | ||
1499 | atomic64_dec(&s->dirty_pages); | ||
1500 | r = get_pgste(kvm->mm, hva, &pgstev); | ||
1501 | if (r < 0) | ||
1502 | pgstev = 0; | ||
1503 | /* save the value */ | ||
1504 | res[i++] = (pgstev >> 24) & 0x3; | ||
1505 | /* | ||
1506 | * if the next bit is too far away, stop. | ||
1507 | * if we reached the previous "next", find the next one | ||
1508 | */ | ||
1509 | if (!peek) { | ||
1510 | if (next > cur + KVM_S390_MAX_BIT_DISTANCE) | ||
1511 | break; | ||
1512 | if (cur == next) | ||
1513 | next = find_next_bit(s->pgste_bitmap, | ||
1514 | s->bitmap_size, cur + 1); | ||
1515 | /* reached the end of the bitmap or of the buffer, stop */ | ||
1516 | if ((next >= s->bitmap_size) || | ||
1517 | (next >= args->start_gfn + bufsize)) | ||
1518 | break; | ||
1519 | } | ||
1520 | cur++; | ||
1521 | } | ||
1522 | srcu_read_unlock(&kvm->srcu, srcu_idx); | ||
1523 | up_read(&kvm->mm->mmap_sem); | ||
1524 | args->count = i; | ||
1525 | args->remaining = s ? atomic64_read(&s->dirty_pages) : 0; | ||
1526 | |||
1527 | rr = copy_to_user((void __user *)args->values, res, args->count); | ||
1528 | if (rr) | ||
1529 | r = -EFAULT; | ||
1530 | |||
1531 | vfree(res); | ||
1532 | return r; | ||
1533 | } | ||
1534 | |||
1535 | /* | ||
1536 | * This function sets the CMMA attributes for the given pages. If the input | ||
1537 | * buffer has zero length, no action is taken, otherwise the attributes are | ||
1538 | * set and the mm->context.use_cmma flag is set. | ||
1539 | */ | ||
1540 | static int kvm_s390_set_cmma_bits(struct kvm *kvm, | ||
1541 | const struct kvm_s390_cmma_log *args) | ||
1542 | { | ||
1543 | unsigned long hva, mask, pgstev, i; | ||
1544 | uint8_t *bits; | ||
1545 | int srcu_idx, r = 0; | ||
1546 | |||
1547 | mask = args->mask; | ||
1548 | |||
1549 | if (!kvm->arch.use_cmma) | ||
1550 | return -ENXIO; | ||
1551 | /* invalid/unsupported flags */ | ||
1552 | if (args->flags != 0) | ||
1553 | return -EINVAL; | ||
1554 | /* Enforce sane limit on memory allocation */ | ||
1555 | if (args->count > KVM_S390_CMMA_SIZE_MAX) | ||
1556 | return -EINVAL; | ||
1557 | /* Nothing to do */ | ||
1558 | if (args->count == 0) | ||
1559 | return 0; | ||
1560 | |||
1561 | bits = vmalloc(sizeof(*bits) * args->count); | ||
1562 | if (!bits) | ||
1563 | return -ENOMEM; | ||
1564 | |||
1565 | r = copy_from_user(bits, (void __user *)args->values, args->count); | ||
1566 | if (r) { | ||
1567 | r = -EFAULT; | ||
1568 | goto out; | ||
1569 | } | ||
1570 | |||
1571 | down_read(&kvm->mm->mmap_sem); | ||
1572 | srcu_idx = srcu_read_lock(&kvm->srcu); | ||
1573 | for (i = 0; i < args->count; i++) { | ||
1574 | hva = gfn_to_hva(kvm, args->start_gfn + i); | ||
1575 | if (kvm_is_error_hva(hva)) { | ||
1576 | r = -EFAULT; | ||
1577 | break; | ||
1578 | } | ||
1579 | |||
1580 | pgstev = bits[i]; | ||
1581 | pgstev = pgstev << 24; | ||
1582 | mask &= _PGSTE_GPS_USAGE_MASK; | ||
1583 | set_pgste_bits(kvm->mm, hva, mask, pgstev); | ||
1584 | } | ||
1585 | srcu_read_unlock(&kvm->srcu, srcu_idx); | ||
1586 | up_read(&kvm->mm->mmap_sem); | ||
1587 | |||
1588 | if (!kvm->mm->context.use_cmma) { | ||
1589 | down_write(&kvm->mm->mmap_sem); | ||
1590 | kvm->mm->context.use_cmma = 1; | ||
1591 | up_write(&kvm->mm->mmap_sem); | ||
1592 | } | ||
1593 | out: | ||
1594 | vfree(bits); | ||
1595 | return r; | ||
1596 | } | ||
1597 | |||
1288 | long kvm_arch_vm_ioctl(struct file *filp, | 1598 | long kvm_arch_vm_ioctl(struct file *filp, |
1289 | unsigned int ioctl, unsigned long arg) | 1599 | unsigned int ioctl, unsigned long arg) |
1290 | { | 1600 | { |
@@ -1363,6 +1673,29 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
1363 | r = kvm_s390_set_skeys(kvm, &args); | 1673 | r = kvm_s390_set_skeys(kvm, &args); |
1364 | break; | 1674 | break; |
1365 | } | 1675 | } |
1676 | case KVM_S390_GET_CMMA_BITS: { | ||
1677 | struct kvm_s390_cmma_log args; | ||
1678 | |||
1679 | r = -EFAULT; | ||
1680 | if (copy_from_user(&args, argp, sizeof(args))) | ||
1681 | break; | ||
1682 | r = kvm_s390_get_cmma_bits(kvm, &args); | ||
1683 | if (!r) { | ||
1684 | r = copy_to_user(argp, &args, sizeof(args)); | ||
1685 | if (r) | ||
1686 | r = -EFAULT; | ||
1687 | } | ||
1688 | break; | ||
1689 | } | ||
1690 | case KVM_S390_SET_CMMA_BITS: { | ||
1691 | struct kvm_s390_cmma_log args; | ||
1692 | |||
1693 | r = -EFAULT; | ||
1694 | if (copy_from_user(&args, argp, sizeof(args))) | ||
1695 | break; | ||
1696 | r = kvm_s390_set_cmma_bits(kvm, &args); | ||
1697 | break; | ||
1698 | } | ||
1366 | default: | 1699 | default: |
1367 | r = -ENOTTY; | 1700 | r = -ENOTTY; |
1368 | } | 1701 | } |
@@ -1631,6 +1964,10 @@ void kvm_arch_destroy_vm(struct kvm *kvm) | |||
1631 | kvm_s390_destroy_adapters(kvm); | 1964 | kvm_s390_destroy_adapters(kvm); |
1632 | kvm_s390_clear_float_irqs(kvm); | 1965 | kvm_s390_clear_float_irqs(kvm); |
1633 | kvm_s390_vsie_destroy(kvm); | 1966 | kvm_s390_vsie_destroy(kvm); |
1967 | if (kvm->arch.migration_state) { | ||
1968 | vfree(kvm->arch.migration_state->pgste_bitmap); | ||
1969 | kfree(kvm->arch.migration_state); | ||
1970 | } | ||
1634 | KVM_EVENT(3, "vm 0x%pK destroyed", kvm); | 1971 | KVM_EVENT(3, "vm 0x%pK destroyed", kvm); |
1635 | } | 1972 | } |
1636 | 1973 | ||
@@ -1975,7 +2312,6 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) | |||
1975 | if (!vcpu->arch.sie_block->cbrlo) | 2312 | if (!vcpu->arch.sie_block->cbrlo) |
1976 | return -ENOMEM; | 2313 | return -ENOMEM; |
1977 | 2314 | ||
1978 | vcpu->arch.sie_block->ecb2 |= ECB2_CMMA; | ||
1979 | vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI; | 2315 | vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI; |
1980 | return 0; | 2316 | return 0; |
1981 | } | 2317 | } |
@@ -2439,7 +2775,7 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) | |||
2439 | { | 2775 | { |
2440 | retry: | 2776 | retry: |
2441 | kvm_s390_vcpu_request_handled(vcpu); | 2777 | kvm_s390_vcpu_request_handled(vcpu); |
2442 | if (!vcpu->requests) | 2778 | if (!kvm_request_pending(vcpu)) |
2443 | return 0; | 2779 | return 0; |
2444 | /* | 2780 | /* |
2445 | * We use MMU_RELOAD just to re-arm the ipte notifier for the | 2781 | * We use MMU_RELOAD just to re-arm the ipte notifier for the |
@@ -2488,6 +2824,27 @@ retry: | |||
2488 | goto retry; | 2824 | goto retry; |
2489 | } | 2825 | } |
2490 | 2826 | ||
2827 | if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) { | ||
2828 | /* | ||
2829 | * Disable CMMA virtualization; we will emulate the ESSA | ||
2830 | * instruction manually, in order to provide additional | ||
2831 | * functionalities needed for live migration. | ||
2832 | */ | ||
2833 | vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA; | ||
2834 | goto retry; | ||
2835 | } | ||
2836 | |||
2837 | if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) { | ||
2838 | /* | ||
2839 | * Re-enable CMMA virtualization if CMMA is available and | ||
2840 | * was used. | ||
2841 | */ | ||
2842 | if ((vcpu->kvm->arch.use_cmma) && | ||
2843 | (vcpu->kvm->mm->context.use_cmma)) | ||
2844 | vcpu->arch.sie_block->ecb2 |= ECB2_CMMA; | ||
2845 | goto retry; | ||
2846 | } | ||
2847 | |||
2491 | /* nothing to do, just clear the request */ | 2848 | /* nothing to do, just clear the request */ |
2492 | kvm_clear_request(KVM_REQ_UNHALT, vcpu); | 2849 | kvm_clear_request(KVM_REQ_UNHALT, vcpu); |
2493 | 2850 | ||
@@ -2682,6 +3039,9 @@ static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) | |||
2682 | 3039 | ||
2683 | static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) | 3040 | static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) |
2684 | { | 3041 | { |
3042 | struct mcck_volatile_info *mcck_info; | ||
3043 | struct sie_page *sie_page; | ||
3044 | |||
2685 | VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", | 3045 | VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", |
2686 | vcpu->arch.sie_block->icptcode); | 3046 | vcpu->arch.sie_block->icptcode); |
2687 | trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); | 3047 | trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); |
@@ -2692,6 +3052,15 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) | |||
2692 | vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14; | 3052 | vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14; |
2693 | vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15; | 3053 | vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15; |
2694 | 3054 | ||
3055 | if (exit_reason == -EINTR) { | ||
3056 | VCPU_EVENT(vcpu, 3, "%s", "machine check"); | ||
3057 | sie_page = container_of(vcpu->arch.sie_block, | ||
3058 | struct sie_page, sie_block); | ||
3059 | mcck_info = &sie_page->mcck_info; | ||
3060 | kvm_s390_reinject_machine_check(vcpu, mcck_info); | ||
3061 | return 0; | ||
3062 | } | ||
3063 | |||
2695 | if (vcpu->arch.sie_block->icptcode > 0) { | 3064 | if (vcpu->arch.sie_block->icptcode > 0) { |
2696 | int rc = kvm_handle_sie_intercept(vcpu); | 3065 | int rc = kvm_handle_sie_intercept(vcpu); |
2697 | 3066 | ||
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 55f5c8457d6d..6fedc8bc7a37 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h | |||
@@ -397,4 +397,6 @@ static inline int kvm_s390_use_sca_entries(void) | |||
397 | */ | 397 | */ |
398 | return sclp.has_sigpif; | 398 | return sclp.has_sigpif; |
399 | } | 399 | } |
400 | void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu, | ||
401 | struct mcck_volatile_info *mcck_info); | ||
400 | #endif | 402 | #endif |
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index e53292a89257..8a1dac793d6b 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <asm/ebcdic.h> | 24 | #include <asm/ebcdic.h> |
25 | #include <asm/sysinfo.h> | 25 | #include <asm/sysinfo.h> |
26 | #include <asm/pgtable.h> | 26 | #include <asm/pgtable.h> |
27 | #include <asm/page-states.h> | ||
27 | #include <asm/pgalloc.h> | 28 | #include <asm/pgalloc.h> |
28 | #include <asm/gmap.h> | 29 | #include <asm/gmap.h> |
29 | #include <asm/io.h> | 30 | #include <asm/io.h> |
@@ -949,13 +950,72 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) | |||
949 | return 0; | 950 | return 0; |
950 | } | 951 | } |
951 | 952 | ||
953 | static inline int do_essa(struct kvm_vcpu *vcpu, const int orc) | ||
954 | { | ||
955 | struct kvm_s390_migration_state *ms = vcpu->kvm->arch.migration_state; | ||
956 | int r1, r2, nappended, entries; | ||
957 | unsigned long gfn, hva, res, pgstev, ptev; | ||
958 | unsigned long *cbrlo; | ||
959 | |||
960 | /* | ||
961 | * We don't need to set SD.FPF.SK to 1 here, because if we have a | ||
962 | * machine check here we either handle it or crash | ||
963 | */ | ||
964 | |||
965 | kvm_s390_get_regs_rre(vcpu, &r1, &r2); | ||
966 | gfn = vcpu->run->s.regs.gprs[r2] >> PAGE_SHIFT; | ||
967 | hva = gfn_to_hva(vcpu->kvm, gfn); | ||
968 | entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3; | ||
969 | |||
970 | if (kvm_is_error_hva(hva)) | ||
971 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | ||
972 | |||
973 | nappended = pgste_perform_essa(vcpu->kvm->mm, hva, orc, &ptev, &pgstev); | ||
974 | if (nappended < 0) { | ||
975 | res = orc ? 0x10 : 0; | ||
976 | vcpu->run->s.regs.gprs[r1] = res; /* Exception Indication */ | ||
977 | return 0; | ||
978 | } | ||
979 | res = (pgstev & _PGSTE_GPS_USAGE_MASK) >> 22; | ||
980 | /* | ||
981 | * Set the block-content state part of the result. 0 means resident, so | ||
982 | * nothing to do if the page is valid. 2 is for preserved pages | ||
983 | * (non-present and non-zero), and 3 for zero pages (non-present and | ||
984 | * zero). | ||
985 | */ | ||
986 | if (ptev & _PAGE_INVALID) { | ||
987 | res |= 2; | ||
988 | if (pgstev & _PGSTE_GPS_ZERO) | ||
989 | res |= 1; | ||
990 | } | ||
991 | vcpu->run->s.regs.gprs[r1] = res; | ||
992 | /* | ||
993 | * It is possible that all the normal 511 slots were full, in which case | ||
994 | * we will now write in the 512th slot, which is reserved for host use. | ||
995 | * In both cases we let the normal essa handling code process all the | ||
996 | * slots, including the reserved one, if needed. | ||
997 | */ | ||
998 | if (nappended > 0) { | ||
999 | cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo & PAGE_MASK); | ||
1000 | cbrlo[entries] = gfn << PAGE_SHIFT; | ||
1001 | } | ||
1002 | |||
1003 | if (orc) { | ||
1004 | /* increment only if we are really flipping the bit to 1 */ | ||
1005 | if (!test_and_set_bit(gfn, ms->pgste_bitmap)) | ||
1006 | atomic64_inc(&ms->dirty_pages); | ||
1007 | } | ||
1008 | |||
1009 | return nappended; | ||
1010 | } | ||
1011 | |||
952 | static int handle_essa(struct kvm_vcpu *vcpu) | 1012 | static int handle_essa(struct kvm_vcpu *vcpu) |
953 | { | 1013 | { |
954 | /* entries expected to be 1FF */ | 1014 | /* entries expected to be 1FF */ |
955 | int entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3; | 1015 | int entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3; |
956 | unsigned long *cbrlo; | 1016 | unsigned long *cbrlo; |
957 | struct gmap *gmap; | 1017 | struct gmap *gmap; |
958 | int i; | 1018 | int i, orc; |
959 | 1019 | ||
960 | VCPU_EVENT(vcpu, 4, "ESSA: release %d pages", entries); | 1020 | VCPU_EVENT(vcpu, 4, "ESSA: release %d pages", entries); |
961 | gmap = vcpu->arch.gmap; | 1021 | gmap = vcpu->arch.gmap; |
@@ -965,12 +1025,45 @@ static int handle_essa(struct kvm_vcpu *vcpu) | |||
965 | 1025 | ||
966 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) | 1026 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) |
967 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); | 1027 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); |
968 | 1028 | /* Check for invalid operation request code */ | |
969 | if (((vcpu->arch.sie_block->ipb & 0xf0000000) >> 28) > 6) | 1029 | orc = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28; |
1030 | if (orc > ESSA_MAX) | ||
970 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | 1031 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); |
971 | 1032 | ||
972 | /* Retry the ESSA instruction */ | 1033 | if (likely(!vcpu->kvm->arch.migration_state)) { |
973 | kvm_s390_retry_instr(vcpu); | 1034 | /* |
1035 | * CMMA is enabled in the KVM settings, but is disabled in | ||
1036 | * the SIE block and in the mm_context, and we are not doing | ||
1037 | * a migration. Enable CMMA in the mm_context. | ||
1038 | * Since we need to take a write lock to write to the context | ||
1039 | * to avoid races with storage keys handling, we check if the | ||
1040 | * value really needs to be written to; if the value is | ||
1041 | * already correct, we do nothing and avoid the lock. | ||
1042 | */ | ||
1043 | if (vcpu->kvm->mm->context.use_cmma == 0) { | ||
1044 | down_write(&vcpu->kvm->mm->mmap_sem); | ||
1045 | vcpu->kvm->mm->context.use_cmma = 1; | ||
1046 | up_write(&vcpu->kvm->mm->mmap_sem); | ||
1047 | } | ||
1048 | /* | ||
1049 | * If we are here, we are supposed to have CMMA enabled in | ||
1050 | * the SIE block. Enabling CMMA works on a per-CPU basis, | ||
1051 | * while the context use_cmma flag is per process. | ||
1052 | * It's possible that the context flag is enabled and the | ||
1053 | * SIE flag is not, so we set the flag always; if it was | ||
1054 | * already set, nothing changes, otherwise we enable it | ||
1055 | * on this CPU too. | ||
1056 | */ | ||
1057 | vcpu->arch.sie_block->ecb2 |= ECB2_CMMA; | ||
1058 | /* Retry the ESSA instruction */ | ||
1059 | kvm_s390_retry_instr(vcpu); | ||
1060 | } else { | ||
1061 | /* Account for the possible extra cbrl entry */ | ||
1062 | i = do_essa(vcpu, orc); | ||
1063 | if (i < 0) | ||
1064 | return i; | ||
1065 | entries += i; | ||
1066 | } | ||
974 | vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */ | 1067 | vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */ |
975 | cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo); | 1068 | cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo); |
976 | down_read(&gmap->mm->mmap_sem); | 1069 | down_read(&gmap->mm->mmap_sem); |
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 4719ecb9ab42..715c19c45d9a 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c | |||
@@ -26,16 +26,21 @@ | |||
26 | 26 | ||
27 | struct vsie_page { | 27 | struct vsie_page { |
28 | struct kvm_s390_sie_block scb_s; /* 0x0000 */ | 28 | struct kvm_s390_sie_block scb_s; /* 0x0000 */ |
29 | /* | ||
30 | * the backup info for machine check. ensure it's at | ||
31 | * the same offset as that in struct sie_page! | ||
32 | */ | ||
33 | struct mcck_volatile_info mcck_info; /* 0x0200 */ | ||
29 | /* the pinned originial scb */ | 34 | /* the pinned originial scb */ |
30 | struct kvm_s390_sie_block *scb_o; /* 0x0200 */ | 35 | struct kvm_s390_sie_block *scb_o; /* 0x0218 */ |
31 | /* the shadow gmap in use by the vsie_page */ | 36 | /* the shadow gmap in use by the vsie_page */ |
32 | struct gmap *gmap; /* 0x0208 */ | 37 | struct gmap *gmap; /* 0x0220 */ |
33 | /* address of the last reported fault to guest2 */ | 38 | /* address of the last reported fault to guest2 */ |
34 | unsigned long fault_addr; /* 0x0210 */ | 39 | unsigned long fault_addr; /* 0x0228 */ |
35 | __u8 reserved[0x0700 - 0x0218]; /* 0x0218 */ | 40 | __u8 reserved[0x0700 - 0x0230]; /* 0x0230 */ |
36 | struct kvm_s390_crypto_cb crycb; /* 0x0700 */ | 41 | struct kvm_s390_crypto_cb crycb; /* 0x0700 */ |
37 | __u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */ | 42 | __u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */ |
38 | } __packed; | 43 | }; |
39 | 44 | ||
40 | /* trigger a validity icpt for the given scb */ | 45 | /* trigger a validity icpt for the given scb */ |
41 | static int set_validity_icpt(struct kvm_s390_sie_block *scb, | 46 | static int set_validity_icpt(struct kvm_s390_sie_block *scb, |
@@ -801,6 +806,8 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | |||
801 | { | 806 | { |
802 | struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; | 807 | struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; |
803 | struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; | 808 | struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; |
809 | struct mcck_volatile_info *mcck_info; | ||
810 | struct sie_page *sie_page; | ||
804 | int rc; | 811 | int rc; |
805 | 812 | ||
806 | handle_last_fault(vcpu, vsie_page); | 813 | handle_last_fault(vcpu, vsie_page); |
@@ -822,6 +829,14 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | |||
822 | local_irq_enable(); | 829 | local_irq_enable(); |
823 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | 830 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); |
824 | 831 | ||
832 | if (rc == -EINTR) { | ||
833 | VCPU_EVENT(vcpu, 3, "%s", "machine check"); | ||
834 | sie_page = container_of(scb_s, struct sie_page, sie_block); | ||
835 | mcck_info = &sie_page->mcck_info; | ||
836 | kvm_s390_reinject_machine_check(vcpu, mcck_info); | ||
837 | return 0; | ||
838 | } | ||
839 | |||
825 | if (rc > 0) | 840 | if (rc > 0) |
826 | rc = 0; /* we could still have an icpt */ | 841 | rc = 0; /* we could still have an icpt */ |
827 | else if (rc == -EFAULT) | 842 | else if (rc == -EFAULT) |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 695605eb1dfb..1588e9e3dc01 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -48,28 +48,31 @@ | |||
48 | #define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS | 48 | #define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS |
49 | 49 | ||
50 | /* x86-specific vcpu->requests bit members */ | 50 | /* x86-specific vcpu->requests bit members */ |
51 | #define KVM_REQ_MIGRATE_TIMER 8 | 51 | #define KVM_REQ_MIGRATE_TIMER KVM_ARCH_REQ(0) |
52 | #define KVM_REQ_REPORT_TPR_ACCESS 9 | 52 | #define KVM_REQ_REPORT_TPR_ACCESS KVM_ARCH_REQ(1) |
53 | #define KVM_REQ_TRIPLE_FAULT 10 | 53 | #define KVM_REQ_TRIPLE_FAULT KVM_ARCH_REQ(2) |
54 | #define KVM_REQ_MMU_SYNC 11 | 54 | #define KVM_REQ_MMU_SYNC KVM_ARCH_REQ(3) |
55 | #define KVM_REQ_CLOCK_UPDATE 12 | 55 | #define KVM_REQ_CLOCK_UPDATE KVM_ARCH_REQ(4) |
56 | #define KVM_REQ_EVENT 14 | 56 | #define KVM_REQ_EVENT KVM_ARCH_REQ(6) |
57 | #define KVM_REQ_APF_HALT 15 | 57 | #define KVM_REQ_APF_HALT KVM_ARCH_REQ(7) |
58 | #define KVM_REQ_STEAL_UPDATE 16 | 58 | #define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(8) |
59 | #define KVM_REQ_NMI 17 | 59 | #define KVM_REQ_NMI KVM_ARCH_REQ(9) |
60 | #define KVM_REQ_PMU 18 | 60 | #define KVM_REQ_PMU KVM_ARCH_REQ(10) |
61 | #define KVM_REQ_PMI 19 | 61 | #define KVM_REQ_PMI KVM_ARCH_REQ(11) |
62 | #define KVM_REQ_SMI 20 | 62 | #define KVM_REQ_SMI KVM_ARCH_REQ(12) |
63 | #define KVM_REQ_MASTERCLOCK_UPDATE 21 | 63 | #define KVM_REQ_MASTERCLOCK_UPDATE KVM_ARCH_REQ(13) |
64 | #define KVM_REQ_MCLOCK_INPROGRESS (22 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) | 64 | #define KVM_REQ_MCLOCK_INPROGRESS \ |
65 | #define KVM_REQ_SCAN_IOAPIC (23 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) | 65 | KVM_ARCH_REQ_FLAGS(14, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) |
66 | #define KVM_REQ_GLOBAL_CLOCK_UPDATE 24 | 66 | #define KVM_REQ_SCAN_IOAPIC \ |
67 | #define KVM_REQ_APIC_PAGE_RELOAD (25 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) | 67 | KVM_ARCH_REQ_FLAGS(15, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) |
68 | #define KVM_REQ_HV_CRASH 26 | 68 | #define KVM_REQ_GLOBAL_CLOCK_UPDATE KVM_ARCH_REQ(16) |
69 | #define KVM_REQ_IOAPIC_EOI_EXIT 27 | 69 | #define KVM_REQ_APIC_PAGE_RELOAD \ |
70 | #define KVM_REQ_HV_RESET 28 | 70 | KVM_ARCH_REQ_FLAGS(17, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) |
71 | #define KVM_REQ_HV_EXIT 29 | 71 | #define KVM_REQ_HV_CRASH KVM_ARCH_REQ(18) |
72 | #define KVM_REQ_HV_STIMER 30 | 72 | #define KVM_REQ_IOAPIC_EOI_EXIT KVM_ARCH_REQ(19) |
73 | #define KVM_REQ_HV_RESET KVM_ARCH_REQ(20) | ||
74 | #define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21) | ||
75 | #define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22) | ||
73 | 76 | ||
74 | #define CR0_RESERVED_BITS \ | 77 | #define CR0_RESERVED_BITS \ |
75 | (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ | 78 | (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ |
@@ -254,7 +257,8 @@ union kvm_mmu_page_role { | |||
254 | unsigned cr0_wp:1; | 257 | unsigned cr0_wp:1; |
255 | unsigned smep_andnot_wp:1; | 258 | unsigned smep_andnot_wp:1; |
256 | unsigned smap_andnot_wp:1; | 259 | unsigned smap_andnot_wp:1; |
257 | unsigned :8; | 260 | unsigned ad_disabled:1; |
261 | unsigned :7; | ||
258 | 262 | ||
259 | /* | 263 | /* |
260 | * This is left at the top of the word so that | 264 | * This is left at the top of the word so that |
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index d406894cd9a2..5573c75f8e4c 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -426,6 +426,8 @@ | |||
426 | #define MSR_IA32_TSC_ADJUST 0x0000003b | 426 | #define MSR_IA32_TSC_ADJUST 0x0000003b |
427 | #define MSR_IA32_BNDCFGS 0x00000d90 | 427 | #define MSR_IA32_BNDCFGS 0x00000d90 |
428 | 428 | ||
429 | #define MSR_IA32_BNDCFGS_RSVD 0x00000ffc | ||
430 | |||
429 | #define MSR_IA32_XSS 0x00000da0 | 431 | #define MSR_IA32_XSS 0x00000da0 |
430 | 432 | ||
431 | #define FEATURE_CONTROL_LOCKED (1<<0) | 433 | #define FEATURE_CONTROL_LOCKED (1<<0) |
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index a6fd40aade7c..da6728383052 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h | |||
@@ -144,6 +144,14 @@ static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu) | |||
144 | return best && (best->ebx & bit(X86_FEATURE_RTM)); | 144 | return best && (best->ebx & bit(X86_FEATURE_RTM)); |
145 | } | 145 | } |
146 | 146 | ||
147 | static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu) | ||
148 | { | ||
149 | struct kvm_cpuid_entry2 *best; | ||
150 | |||
151 | best = kvm_find_cpuid_entry(vcpu, 7, 0); | ||
152 | return best && (best->ebx & bit(X86_FEATURE_MPX)); | ||
153 | } | ||
154 | |||
147 | static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu) | 155 | static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu) |
148 | { | 156 | { |
149 | struct kvm_cpuid_entry2 *best; | 157 | struct kvm_cpuid_entry2 *best; |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 80890dee66ce..fb0055953fbc 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -900,7 +900,7 @@ static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, | |||
900 | if (rc != X86EMUL_CONTINUE) \ | 900 | if (rc != X86EMUL_CONTINUE) \ |
901 | goto done; \ | 901 | goto done; \ |
902 | ctxt->_eip += sizeof(_type); \ | 902 | ctxt->_eip += sizeof(_type); \ |
903 | _x = *(_type __aligned(1) *) ctxt->fetch.ptr; \ | 903 | memcpy(&_x, ctxt->fetch.ptr, sizeof(_type)); \ |
904 | ctxt->fetch.ptr += sizeof(_type); \ | 904 | ctxt->fetch.ptr += sizeof(_type); \ |
905 | _x; \ | 905 | _x; \ |
906 | }) | 906 | }) |
@@ -3942,6 +3942,25 @@ static int check_fxsr(struct x86_emulate_ctxt *ctxt) | |||
3942 | } | 3942 | } |
3943 | 3943 | ||
3944 | /* | 3944 | /* |
3945 | * Hardware doesn't save and restore XMM 0-7 without CR4.OSFXSR, but does save | ||
3946 | * and restore MXCSR. | ||
3947 | */ | ||
3948 | static size_t __fxstate_size(int nregs) | ||
3949 | { | ||
3950 | return offsetof(struct fxregs_state, xmm_space[0]) + nregs * 16; | ||
3951 | } | ||
3952 | |||
3953 | static inline size_t fxstate_size(struct x86_emulate_ctxt *ctxt) | ||
3954 | { | ||
3955 | bool cr4_osfxsr; | ||
3956 | if (ctxt->mode == X86EMUL_MODE_PROT64) | ||
3957 | return __fxstate_size(16); | ||
3958 | |||
3959 | cr4_osfxsr = ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR; | ||
3960 | return __fxstate_size(cr4_osfxsr ? 8 : 0); | ||
3961 | } | ||
3962 | |||
3963 | /* | ||
3945 | * FXSAVE and FXRSTOR have 4 different formats depending on execution mode, | 3964 | * FXSAVE and FXRSTOR have 4 different formats depending on execution mode, |
3946 | * 1) 16 bit mode | 3965 | * 1) 16 bit mode |
3947 | * 2) 32 bit mode | 3966 | * 2) 32 bit mode |
@@ -3962,7 +3981,6 @@ static int check_fxsr(struct x86_emulate_ctxt *ctxt) | |||
3962 | static int em_fxsave(struct x86_emulate_ctxt *ctxt) | 3981 | static int em_fxsave(struct x86_emulate_ctxt *ctxt) |
3963 | { | 3982 | { |
3964 | struct fxregs_state fx_state; | 3983 | struct fxregs_state fx_state; |
3965 | size_t size; | ||
3966 | int rc; | 3984 | int rc; |
3967 | 3985 | ||
3968 | rc = check_fxsr(ctxt); | 3986 | rc = check_fxsr(ctxt); |
@@ -3978,68 +3996,42 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt) | |||
3978 | if (rc != X86EMUL_CONTINUE) | 3996 | if (rc != X86EMUL_CONTINUE) |
3979 | return rc; | 3997 | return rc; |
3980 | 3998 | ||
3981 | if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR) | 3999 | return segmented_write_std(ctxt, ctxt->memop.addr.mem, &fx_state, |
3982 | size = offsetof(struct fxregs_state, xmm_space[8 * 16/4]); | 4000 | fxstate_size(ctxt)); |
3983 | else | ||
3984 | size = offsetof(struct fxregs_state, xmm_space[0]); | ||
3985 | |||
3986 | return segmented_write_std(ctxt, ctxt->memop.addr.mem, &fx_state, size); | ||
3987 | } | ||
3988 | |||
3989 | static int fxrstor_fixup(struct x86_emulate_ctxt *ctxt, | ||
3990 | struct fxregs_state *new) | ||
3991 | { | ||
3992 | int rc = X86EMUL_CONTINUE; | ||
3993 | struct fxregs_state old; | ||
3994 | |||
3995 | rc = asm_safe("fxsave %[fx]", , [fx] "+m"(old)); | ||
3996 | if (rc != X86EMUL_CONTINUE) | ||
3997 | return rc; | ||
3998 | |||
3999 | /* | ||
4000 | * 64 bit host will restore XMM 8-15, which is not correct on non-64 | ||
4001 | * bit guests. Load the current values in order to preserve 64 bit | ||
4002 | * XMMs after fxrstor. | ||
4003 | */ | ||
4004 | #ifdef CONFIG_X86_64 | ||
4005 | /* XXX: accessing XMM 8-15 very awkwardly */ | ||
4006 | memcpy(&new->xmm_space[8 * 16/4], &old.xmm_space[8 * 16/4], 8 * 16); | ||
4007 | #endif | ||
4008 | |||
4009 | /* | ||
4010 | * Hardware doesn't save and restore XMM 0-7 without CR4.OSFXSR, but | ||
4011 | * does save and restore MXCSR. | ||
4012 | */ | ||
4013 | if (!(ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR)) | ||
4014 | memcpy(new->xmm_space, old.xmm_space, 8 * 16); | ||
4015 | |||
4016 | return rc; | ||
4017 | } | 4001 | } |
4018 | 4002 | ||
4019 | static int em_fxrstor(struct x86_emulate_ctxt *ctxt) | 4003 | static int em_fxrstor(struct x86_emulate_ctxt *ctxt) |
4020 | { | 4004 | { |
4021 | struct fxregs_state fx_state; | 4005 | struct fxregs_state fx_state; |
4022 | int rc; | 4006 | int rc; |
4007 | size_t size; | ||
4023 | 4008 | ||
4024 | rc = check_fxsr(ctxt); | 4009 | rc = check_fxsr(ctxt); |
4025 | if (rc != X86EMUL_CONTINUE) | 4010 | if (rc != X86EMUL_CONTINUE) |
4026 | return rc; | 4011 | return rc; |
4027 | 4012 | ||
4028 | rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, 512); | 4013 | ctxt->ops->get_fpu(ctxt); |
4029 | if (rc != X86EMUL_CONTINUE) | ||
4030 | return rc; | ||
4031 | 4014 | ||
4032 | if (fx_state.mxcsr >> 16) | 4015 | size = fxstate_size(ctxt); |
4033 | return emulate_gp(ctxt, 0); | 4016 | if (size < __fxstate_size(16)) { |
4017 | rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state)); | ||
4018 | if (rc != X86EMUL_CONTINUE) | ||
4019 | goto out; | ||
4020 | } | ||
4034 | 4021 | ||
4035 | ctxt->ops->get_fpu(ctxt); | 4022 | rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size); |
4023 | if (rc != X86EMUL_CONTINUE) | ||
4024 | goto out; | ||
4036 | 4025 | ||
4037 | if (ctxt->mode < X86EMUL_MODE_PROT64) | 4026 | if (fx_state.mxcsr >> 16) { |
4038 | rc = fxrstor_fixup(ctxt, &fx_state); | 4027 | rc = emulate_gp(ctxt, 0); |
4028 | goto out; | ||
4029 | } | ||
4039 | 4030 | ||
4040 | if (rc == X86EMUL_CONTINUE) | 4031 | if (rc == X86EMUL_CONTINUE) |
4041 | rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state)); | 4032 | rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state)); |
4042 | 4033 | ||
4034 | out: | ||
4043 | ctxt->ops->put_fpu(ctxt); | 4035 | ctxt->ops->put_fpu(ctxt); |
4044 | 4036 | ||
4045 | return rc; | 4037 | return rc; |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index d24c8742d9b0..2819d4c123eb 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -1495,6 +1495,7 @@ EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use); | |||
1495 | 1495 | ||
1496 | static void cancel_hv_timer(struct kvm_lapic *apic) | 1496 | static void cancel_hv_timer(struct kvm_lapic *apic) |
1497 | { | 1497 | { |
1498 | WARN_ON(!apic->lapic_timer.hv_timer_in_use); | ||
1498 | preempt_disable(); | 1499 | preempt_disable(); |
1499 | kvm_x86_ops->cancel_hv_timer(apic->vcpu); | 1500 | kvm_x86_ops->cancel_hv_timer(apic->vcpu); |
1500 | apic->lapic_timer.hv_timer_in_use = false; | 1501 | apic->lapic_timer.hv_timer_in_use = false; |
@@ -1503,25 +1504,56 @@ static void cancel_hv_timer(struct kvm_lapic *apic) | |||
1503 | 1504 | ||
1504 | static bool start_hv_timer(struct kvm_lapic *apic) | 1505 | static bool start_hv_timer(struct kvm_lapic *apic) |
1505 | { | 1506 | { |
1506 | u64 tscdeadline = apic->lapic_timer.tscdeadline; | 1507 | struct kvm_timer *ktimer = &apic->lapic_timer; |
1508 | int r; | ||
1507 | 1509 | ||
1508 | if ((atomic_read(&apic->lapic_timer.pending) && | 1510 | if (!kvm_x86_ops->set_hv_timer) |
1509 | !apic_lvtt_period(apic)) || | 1511 | return false; |
1510 | kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline)) { | 1512 | |
1511 | if (apic->lapic_timer.hv_timer_in_use) | 1513 | if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending)) |
1512 | cancel_hv_timer(apic); | 1514 | return false; |
1513 | } else { | ||
1514 | apic->lapic_timer.hv_timer_in_use = true; | ||
1515 | hrtimer_cancel(&apic->lapic_timer.timer); | ||
1516 | 1515 | ||
1517 | /* In case the sw timer triggered in the window */ | 1516 | r = kvm_x86_ops->set_hv_timer(apic->vcpu, ktimer->tscdeadline); |
1518 | if (atomic_read(&apic->lapic_timer.pending) && | 1517 | if (r < 0) |
1519 | !apic_lvtt_period(apic)) | 1518 | return false; |
1520 | cancel_hv_timer(apic); | 1519 | |
1520 | ktimer->hv_timer_in_use = true; | ||
1521 | hrtimer_cancel(&ktimer->timer); | ||
1522 | |||
1523 | /* | ||
1524 | * Also recheck ktimer->pending, in case the sw timer triggered in | ||
1525 | * the window. For periodic timer, leave the hv timer running for | ||
1526 | * simplicity, and the deadline will be recomputed on the next vmexit. | ||
1527 | */ | ||
1528 | if (!apic_lvtt_period(apic) && (r || atomic_read(&ktimer->pending))) { | ||
1529 | if (r) | ||
1530 | apic_timer_expired(apic); | ||
1531 | return false; | ||
1521 | } | 1532 | } |
1522 | trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, | 1533 | |
1523 | apic->lapic_timer.hv_timer_in_use); | 1534 | trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, true); |
1524 | return apic->lapic_timer.hv_timer_in_use; | 1535 | return true; |
1536 | } | ||
1537 | |||
1538 | static void start_sw_timer(struct kvm_lapic *apic) | ||
1539 | { | ||
1540 | struct kvm_timer *ktimer = &apic->lapic_timer; | ||
1541 | if (apic->lapic_timer.hv_timer_in_use) | ||
1542 | cancel_hv_timer(apic); | ||
1543 | if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending)) | ||
1544 | return; | ||
1545 | |||
1546 | if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) | ||
1547 | start_sw_period(apic); | ||
1548 | else if (apic_lvtt_tscdeadline(apic)) | ||
1549 | start_sw_tscdeadline(apic); | ||
1550 | trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, false); | ||
1551 | } | ||
1552 | |||
1553 | static void restart_apic_timer(struct kvm_lapic *apic) | ||
1554 | { | ||
1555 | if (!start_hv_timer(apic)) | ||
1556 | start_sw_timer(apic); | ||
1525 | } | 1557 | } |
1526 | 1558 | ||
1527 | void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu) | 1559 | void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu) |
@@ -1535,19 +1567,14 @@ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu) | |||
1535 | 1567 | ||
1536 | if (apic_lvtt_period(apic) && apic->lapic_timer.period) { | 1568 | if (apic_lvtt_period(apic) && apic->lapic_timer.period) { |
1537 | advance_periodic_target_expiration(apic); | 1569 | advance_periodic_target_expiration(apic); |
1538 | if (!start_hv_timer(apic)) | 1570 | restart_apic_timer(apic); |
1539 | start_sw_period(apic); | ||
1540 | } | 1571 | } |
1541 | } | 1572 | } |
1542 | EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer); | 1573 | EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer); |
1543 | 1574 | ||
1544 | void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu) | 1575 | void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu) |
1545 | { | 1576 | { |
1546 | struct kvm_lapic *apic = vcpu->arch.apic; | 1577 | restart_apic_timer(vcpu->arch.apic); |
1547 | |||
1548 | WARN_ON(apic->lapic_timer.hv_timer_in_use); | ||
1549 | |||
1550 | start_hv_timer(apic); | ||
1551 | } | 1578 | } |
1552 | EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer); | 1579 | EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer); |
1553 | 1580 | ||
@@ -1556,33 +1583,28 @@ void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu) | |||
1556 | struct kvm_lapic *apic = vcpu->arch.apic; | 1583 | struct kvm_lapic *apic = vcpu->arch.apic; |
1557 | 1584 | ||
1558 | /* Possibly the TSC deadline timer is not enabled yet */ | 1585 | /* Possibly the TSC deadline timer is not enabled yet */ |
1559 | if (!apic->lapic_timer.hv_timer_in_use) | 1586 | if (apic->lapic_timer.hv_timer_in_use) |
1560 | return; | 1587 | start_sw_timer(apic); |
1561 | 1588 | } | |
1562 | cancel_hv_timer(apic); | 1589 | EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer); |
1563 | 1590 | ||
1564 | if (atomic_read(&apic->lapic_timer.pending)) | 1591 | void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu) |
1565 | return; | 1592 | { |
1593 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
1566 | 1594 | ||
1567 | if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) | 1595 | WARN_ON(!apic->lapic_timer.hv_timer_in_use); |
1568 | start_sw_period(apic); | 1596 | restart_apic_timer(apic); |
1569 | else if (apic_lvtt_tscdeadline(apic)) | ||
1570 | start_sw_tscdeadline(apic); | ||
1571 | } | 1597 | } |
1572 | EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer); | ||
1573 | 1598 | ||
1574 | static void start_apic_timer(struct kvm_lapic *apic) | 1599 | static void start_apic_timer(struct kvm_lapic *apic) |
1575 | { | 1600 | { |
1576 | atomic_set(&apic->lapic_timer.pending, 0); | 1601 | atomic_set(&apic->lapic_timer.pending, 0); |
1577 | 1602 | ||
1578 | if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) { | 1603 | if ((apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) |
1579 | if (set_target_expiration(apic) && | 1604 | && !set_target_expiration(apic)) |
1580 | !(kvm_x86_ops->set_hv_timer && start_hv_timer(apic))) | 1605 | return; |
1581 | start_sw_period(apic); | 1606 | |
1582 | } else if (apic_lvtt_tscdeadline(apic)) { | 1607 | restart_apic_timer(apic); |
1583 | if (!(kvm_x86_ops->set_hv_timer && start_hv_timer(apic))) | ||
1584 | start_sw_tscdeadline(apic); | ||
1585 | } | ||
1586 | } | 1608 | } |
1587 | 1609 | ||
1588 | static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) | 1610 | static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) |
@@ -1813,16 +1835,6 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu) | |||
1813 | * LAPIC interface | 1835 | * LAPIC interface |
1814 | *---------------------------------------------------------------------- | 1836 | *---------------------------------------------------------------------- |
1815 | */ | 1837 | */ |
1816 | u64 kvm_get_lapic_target_expiration_tsc(struct kvm_vcpu *vcpu) | ||
1817 | { | ||
1818 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
1819 | |||
1820 | if (!lapic_in_kernel(vcpu)) | ||
1821 | return 0; | ||
1822 | |||
1823 | return apic->lapic_timer.tscdeadline; | ||
1824 | } | ||
1825 | |||
1826 | u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu) | 1838 | u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu) |
1827 | { | 1839 | { |
1828 | struct kvm_lapic *apic = vcpu->arch.apic; | 1840 | struct kvm_lapic *apic = vcpu->arch.apic; |
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index bcbe811f3b97..29caa2c3dff9 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
@@ -87,7 +87,6 @@ int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s); | |||
87 | int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s); | 87 | int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s); |
88 | int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); | 88 | int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); |
89 | 89 | ||
90 | u64 kvm_get_lapic_target_expiration_tsc(struct kvm_vcpu *vcpu); | ||
91 | u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); | 90 | u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); |
92 | void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data); | 91 | void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data); |
93 | 92 | ||
@@ -216,4 +215,5 @@ void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu); | |||
216 | void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu); | 215 | void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu); |
217 | void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu); | 216 | void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu); |
218 | bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu); | 217 | bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu); |
218 | void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu); | ||
219 | #endif | 219 | #endif |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index cb8225969255..aafd399cf8c6 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -183,13 +183,13 @@ static u64 __read_mostly shadow_user_mask; | |||
183 | static u64 __read_mostly shadow_accessed_mask; | 183 | static u64 __read_mostly shadow_accessed_mask; |
184 | static u64 __read_mostly shadow_dirty_mask; | 184 | static u64 __read_mostly shadow_dirty_mask; |
185 | static u64 __read_mostly shadow_mmio_mask; | 185 | static u64 __read_mostly shadow_mmio_mask; |
186 | static u64 __read_mostly shadow_mmio_value; | ||
186 | static u64 __read_mostly shadow_present_mask; | 187 | static u64 __read_mostly shadow_present_mask; |
187 | 188 | ||
188 | /* | 189 | /* |
189 | * The mask/value to distinguish a PTE that has been marked not-present for | 190 | * SPTEs used by MMUs without A/D bits are marked with shadow_acc_track_value. |
190 | * access tracking purposes. | 191 | * Non-present SPTEs with shadow_acc_track_value set are in place for access |
191 | * The mask would be either 0 if access tracking is disabled, or | 192 | * tracking. |
192 | * SPTE_SPECIAL_MASK|VMX_EPT_RWX_MASK if access tracking is enabled. | ||
193 | */ | 193 | */ |
194 | static u64 __read_mostly shadow_acc_track_mask; | 194 | static u64 __read_mostly shadow_acc_track_mask; |
195 | static const u64 shadow_acc_track_value = SPTE_SPECIAL_MASK; | 195 | static const u64 shadow_acc_track_value = SPTE_SPECIAL_MASK; |
@@ -207,16 +207,40 @@ static const u64 shadow_acc_track_saved_bits_shift = PT64_SECOND_AVAIL_BITS_SHIF | |||
207 | static void mmu_spte_set(u64 *sptep, u64 spte); | 207 | static void mmu_spte_set(u64 *sptep, u64 spte); |
208 | static void mmu_free_roots(struct kvm_vcpu *vcpu); | 208 | static void mmu_free_roots(struct kvm_vcpu *vcpu); |
209 | 209 | ||
210 | void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask) | 210 | void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value) |
211 | { | 211 | { |
212 | BUG_ON((mmio_mask & mmio_value) != mmio_value); | ||
213 | shadow_mmio_value = mmio_value | SPTE_SPECIAL_MASK; | ||
212 | shadow_mmio_mask = mmio_mask | SPTE_SPECIAL_MASK; | 214 | shadow_mmio_mask = mmio_mask | SPTE_SPECIAL_MASK; |
213 | } | 215 | } |
214 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); | 216 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); |
215 | 217 | ||
218 | static inline bool sp_ad_disabled(struct kvm_mmu_page *sp) | ||
219 | { | ||
220 | return sp->role.ad_disabled; | ||
221 | } | ||
222 | |||
223 | static inline bool spte_ad_enabled(u64 spte) | ||
224 | { | ||
225 | MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value); | ||
226 | return !(spte & shadow_acc_track_value); | ||
227 | } | ||
228 | |||
229 | static inline u64 spte_shadow_accessed_mask(u64 spte) | ||
230 | { | ||
231 | MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value); | ||
232 | return spte_ad_enabled(spte) ? shadow_accessed_mask : 0; | ||
233 | } | ||
234 | |||
235 | static inline u64 spte_shadow_dirty_mask(u64 spte) | ||
236 | { | ||
237 | MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value); | ||
238 | return spte_ad_enabled(spte) ? shadow_dirty_mask : 0; | ||
239 | } | ||
240 | |||
216 | static inline bool is_access_track_spte(u64 spte) | 241 | static inline bool is_access_track_spte(u64 spte) |
217 | { | 242 | { |
218 | /* Always false if shadow_acc_track_mask is zero. */ | 243 | return !spte_ad_enabled(spte) && (spte & shadow_acc_track_mask) == 0; |
219 | return (spte & shadow_acc_track_mask) == shadow_acc_track_value; | ||
220 | } | 244 | } |
221 | 245 | ||
222 | /* | 246 | /* |
@@ -270,7 +294,7 @@ static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn, | |||
270 | u64 mask = generation_mmio_spte_mask(gen); | 294 | u64 mask = generation_mmio_spte_mask(gen); |
271 | 295 | ||
272 | access &= ACC_WRITE_MASK | ACC_USER_MASK; | 296 | access &= ACC_WRITE_MASK | ACC_USER_MASK; |
273 | mask |= shadow_mmio_mask | access | gfn << PAGE_SHIFT; | 297 | mask |= shadow_mmio_value | access | gfn << PAGE_SHIFT; |
274 | 298 | ||
275 | trace_mark_mmio_spte(sptep, gfn, access, gen); | 299 | trace_mark_mmio_spte(sptep, gfn, access, gen); |
276 | mmu_spte_set(sptep, mask); | 300 | mmu_spte_set(sptep, mask); |
@@ -278,7 +302,7 @@ static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn, | |||
278 | 302 | ||
279 | static bool is_mmio_spte(u64 spte) | 303 | static bool is_mmio_spte(u64 spte) |
280 | { | 304 | { |
281 | return (spte & shadow_mmio_mask) == shadow_mmio_mask; | 305 | return (spte & shadow_mmio_mask) == shadow_mmio_value; |
282 | } | 306 | } |
283 | 307 | ||
284 | static gfn_t get_mmio_spte_gfn(u64 spte) | 308 | static gfn_t get_mmio_spte_gfn(u64 spte) |
@@ -315,12 +339,20 @@ static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte) | |||
315 | return likely(kvm_gen == spte_gen); | 339 | return likely(kvm_gen == spte_gen); |
316 | } | 340 | } |
317 | 341 | ||
342 | /* | ||
343 | * Sets the shadow PTE masks used by the MMU. | ||
344 | * | ||
345 | * Assumptions: | ||
346 | * - Setting either @accessed_mask or @dirty_mask requires setting both | ||
347 | * - At least one of @accessed_mask or @acc_track_mask must be set | ||
348 | */ | ||
318 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | 349 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, |
319 | u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask, | 350 | u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask, |
320 | u64 acc_track_mask) | 351 | u64 acc_track_mask) |
321 | { | 352 | { |
322 | if (acc_track_mask != 0) | 353 | BUG_ON(!dirty_mask != !accessed_mask); |
323 | acc_track_mask |= SPTE_SPECIAL_MASK; | 354 | BUG_ON(!accessed_mask && !acc_track_mask); |
355 | BUG_ON(acc_track_mask & shadow_acc_track_value); | ||
324 | 356 | ||
325 | shadow_user_mask = user_mask; | 357 | shadow_user_mask = user_mask; |
326 | shadow_accessed_mask = accessed_mask; | 358 | shadow_accessed_mask = accessed_mask; |
@@ -329,7 +361,6 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | |||
329 | shadow_x_mask = x_mask; | 361 | shadow_x_mask = x_mask; |
330 | shadow_present_mask = p_mask; | 362 | shadow_present_mask = p_mask; |
331 | shadow_acc_track_mask = acc_track_mask; | 363 | shadow_acc_track_mask = acc_track_mask; |
332 | WARN_ON(shadow_accessed_mask != 0 && shadow_acc_track_mask != 0); | ||
333 | } | 364 | } |
334 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); | 365 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); |
335 | 366 | ||
@@ -549,7 +580,7 @@ static bool spte_has_volatile_bits(u64 spte) | |||
549 | is_access_track_spte(spte)) | 580 | is_access_track_spte(spte)) |
550 | return true; | 581 | return true; |
551 | 582 | ||
552 | if (shadow_accessed_mask) { | 583 | if (spte_ad_enabled(spte)) { |
553 | if ((spte & shadow_accessed_mask) == 0 || | 584 | if ((spte & shadow_accessed_mask) == 0 || |
554 | (is_writable_pte(spte) && (spte & shadow_dirty_mask) == 0)) | 585 | (is_writable_pte(spte) && (spte & shadow_dirty_mask) == 0)) |
555 | return true; | 586 | return true; |
@@ -560,14 +591,17 @@ static bool spte_has_volatile_bits(u64 spte) | |||
560 | 591 | ||
561 | static bool is_accessed_spte(u64 spte) | 592 | static bool is_accessed_spte(u64 spte) |
562 | { | 593 | { |
563 | return shadow_accessed_mask ? spte & shadow_accessed_mask | 594 | u64 accessed_mask = spte_shadow_accessed_mask(spte); |
564 | : !is_access_track_spte(spte); | 595 | |
596 | return accessed_mask ? spte & accessed_mask | ||
597 | : !is_access_track_spte(spte); | ||
565 | } | 598 | } |
566 | 599 | ||
567 | static bool is_dirty_spte(u64 spte) | 600 | static bool is_dirty_spte(u64 spte) |
568 | { | 601 | { |
569 | return shadow_dirty_mask ? spte & shadow_dirty_mask | 602 | u64 dirty_mask = spte_shadow_dirty_mask(spte); |
570 | : spte & PT_WRITABLE_MASK; | 603 | |
604 | return dirty_mask ? spte & dirty_mask : spte & PT_WRITABLE_MASK; | ||
571 | } | 605 | } |
572 | 606 | ||
573 | /* Rules for using mmu_spte_set: | 607 | /* Rules for using mmu_spte_set: |
@@ -707,10 +741,10 @@ static u64 mmu_spte_get_lockless(u64 *sptep) | |||
707 | 741 | ||
708 | static u64 mark_spte_for_access_track(u64 spte) | 742 | static u64 mark_spte_for_access_track(u64 spte) |
709 | { | 743 | { |
710 | if (shadow_accessed_mask != 0) | 744 | if (spte_ad_enabled(spte)) |
711 | return spte & ~shadow_accessed_mask; | 745 | return spte & ~shadow_accessed_mask; |
712 | 746 | ||
713 | if (shadow_acc_track_mask == 0 || is_access_track_spte(spte)) | 747 | if (is_access_track_spte(spte)) |
714 | return spte; | 748 | return spte; |
715 | 749 | ||
716 | /* | 750 | /* |
@@ -729,7 +763,6 @@ static u64 mark_spte_for_access_track(u64 spte) | |||
729 | spte |= (spte & shadow_acc_track_saved_bits_mask) << | 763 | spte |= (spte & shadow_acc_track_saved_bits_mask) << |
730 | shadow_acc_track_saved_bits_shift; | 764 | shadow_acc_track_saved_bits_shift; |
731 | spte &= ~shadow_acc_track_mask; | 765 | spte &= ~shadow_acc_track_mask; |
732 | spte |= shadow_acc_track_value; | ||
733 | 766 | ||
734 | return spte; | 767 | return spte; |
735 | } | 768 | } |
@@ -741,6 +774,7 @@ static u64 restore_acc_track_spte(u64 spte) | |||
741 | u64 saved_bits = (spte >> shadow_acc_track_saved_bits_shift) | 774 | u64 saved_bits = (spte >> shadow_acc_track_saved_bits_shift) |
742 | & shadow_acc_track_saved_bits_mask; | 775 | & shadow_acc_track_saved_bits_mask; |
743 | 776 | ||
777 | WARN_ON_ONCE(spte_ad_enabled(spte)); | ||
744 | WARN_ON_ONCE(!is_access_track_spte(spte)); | 778 | WARN_ON_ONCE(!is_access_track_spte(spte)); |
745 | 779 | ||
746 | new_spte &= ~shadow_acc_track_mask; | 780 | new_spte &= ~shadow_acc_track_mask; |
@@ -759,7 +793,7 @@ static bool mmu_spte_age(u64 *sptep) | |||
759 | if (!is_accessed_spte(spte)) | 793 | if (!is_accessed_spte(spte)) |
760 | return false; | 794 | return false; |
761 | 795 | ||
762 | if (shadow_accessed_mask) { | 796 | if (spte_ad_enabled(spte)) { |
763 | clear_bit((ffs(shadow_accessed_mask) - 1), | 797 | clear_bit((ffs(shadow_accessed_mask) - 1), |
764 | (unsigned long *)sptep); | 798 | (unsigned long *)sptep); |
765 | } else { | 799 | } else { |
@@ -1390,6 +1424,22 @@ static bool spte_clear_dirty(u64 *sptep) | |||
1390 | return mmu_spte_update(sptep, spte); | 1424 | return mmu_spte_update(sptep, spte); |
1391 | } | 1425 | } |
1392 | 1426 | ||
1427 | static bool wrprot_ad_disabled_spte(u64 *sptep) | ||
1428 | { | ||
1429 | bool was_writable = test_and_clear_bit(PT_WRITABLE_SHIFT, | ||
1430 | (unsigned long *)sptep); | ||
1431 | if (was_writable) | ||
1432 | kvm_set_pfn_dirty(spte_to_pfn(*sptep)); | ||
1433 | |||
1434 | return was_writable; | ||
1435 | } | ||
1436 | |||
1437 | /* | ||
1438 | * Gets the GFN ready for another round of dirty logging by clearing the | ||
1439 | * - D bit on ad-enabled SPTEs, and | ||
1440 | * - W bit on ad-disabled SPTEs. | ||
1441 | * Returns true iff any D or W bits were cleared. | ||
1442 | */ | ||
1393 | static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head) | 1443 | static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head) |
1394 | { | 1444 | { |
1395 | u64 *sptep; | 1445 | u64 *sptep; |
@@ -1397,7 +1447,10 @@ static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head) | |||
1397 | bool flush = false; | 1447 | bool flush = false; |
1398 | 1448 | ||
1399 | for_each_rmap_spte(rmap_head, &iter, sptep) | 1449 | for_each_rmap_spte(rmap_head, &iter, sptep) |
1400 | flush |= spte_clear_dirty(sptep); | 1450 | if (spte_ad_enabled(*sptep)) |
1451 | flush |= spte_clear_dirty(sptep); | ||
1452 | else | ||
1453 | flush |= wrprot_ad_disabled_spte(sptep); | ||
1401 | 1454 | ||
1402 | return flush; | 1455 | return flush; |
1403 | } | 1456 | } |
@@ -1420,7 +1473,8 @@ static bool __rmap_set_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head) | |||
1420 | bool flush = false; | 1473 | bool flush = false; |
1421 | 1474 | ||
1422 | for_each_rmap_spte(rmap_head, &iter, sptep) | 1475 | for_each_rmap_spte(rmap_head, &iter, sptep) |
1423 | flush |= spte_set_dirty(sptep); | 1476 | if (spte_ad_enabled(*sptep)) |
1477 | flush |= spte_set_dirty(sptep); | ||
1424 | 1478 | ||
1425 | return flush; | 1479 | return flush; |
1426 | } | 1480 | } |
@@ -1452,7 +1506,8 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, | |||
1452 | } | 1506 | } |
1453 | 1507 | ||
1454 | /** | 1508 | /** |
1455 | * kvm_mmu_clear_dirty_pt_masked - clear MMU D-bit for PT level pages | 1509 | * kvm_mmu_clear_dirty_pt_masked - clear MMU D-bit for PT level pages, or write |
1510 | * protect the page if the D-bit isn't supported. | ||
1456 | * @kvm: kvm instance | 1511 | * @kvm: kvm instance |
1457 | * @slot: slot to clear D-bit | 1512 | * @slot: slot to clear D-bit |
1458 | * @gfn_offset: start of the BITS_PER_LONG pages we care about | 1513 | * @gfn_offset: start of the BITS_PER_LONG pages we care about |
@@ -1766,18 +1821,9 @@ static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, | |||
1766 | u64 *sptep; | 1821 | u64 *sptep; |
1767 | struct rmap_iterator iter; | 1822 | struct rmap_iterator iter; |
1768 | 1823 | ||
1769 | /* | ||
1770 | * If there's no access bit in the secondary pte set by the hardware and | ||
1771 | * fast access tracking is also not enabled, it's up to gup-fast/gup to | ||
1772 | * set the access bit in the primary pte or in the page structure. | ||
1773 | */ | ||
1774 | if (!shadow_accessed_mask && !shadow_acc_track_mask) | ||
1775 | goto out; | ||
1776 | |||
1777 | for_each_rmap_spte(rmap_head, &iter, sptep) | 1824 | for_each_rmap_spte(rmap_head, &iter, sptep) |
1778 | if (is_accessed_spte(*sptep)) | 1825 | if (is_accessed_spte(*sptep)) |
1779 | return 1; | 1826 | return 1; |
1780 | out: | ||
1781 | return 0; | 1827 | return 0; |
1782 | } | 1828 | } |
1783 | 1829 | ||
@@ -1798,18 +1844,6 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | |||
1798 | 1844 | ||
1799 | int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) | 1845 | int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) |
1800 | { | 1846 | { |
1801 | /* | ||
1802 | * In case of absence of EPT Access and Dirty Bits supports, | ||
1803 | * emulate the accessed bit for EPT, by checking if this page has | ||
1804 | * an EPT mapping, and clearing it if it does. On the next access, | ||
1805 | * a new EPT mapping will be established. | ||
1806 | * This has some overhead, but not as much as the cost of swapping | ||
1807 | * out actively used pages or breaking up actively used hugepages. | ||
1808 | */ | ||
1809 | if (!shadow_accessed_mask && !shadow_acc_track_mask) | ||
1810 | return kvm_handle_hva_range(kvm, start, end, 0, | ||
1811 | kvm_unmap_rmapp); | ||
1812 | |||
1813 | return kvm_handle_hva_range(kvm, start, end, 0, kvm_age_rmapp); | 1847 | return kvm_handle_hva_range(kvm, start, end, 0, kvm_age_rmapp); |
1814 | } | 1848 | } |
1815 | 1849 | ||
@@ -2398,7 +2432,12 @@ static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2398 | BUILD_BUG_ON(VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK); | 2432 | BUILD_BUG_ON(VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK); |
2399 | 2433 | ||
2400 | spte = __pa(sp->spt) | shadow_present_mask | PT_WRITABLE_MASK | | 2434 | spte = __pa(sp->spt) | shadow_present_mask | PT_WRITABLE_MASK | |
2401 | shadow_user_mask | shadow_x_mask | shadow_accessed_mask; | 2435 | shadow_user_mask | shadow_x_mask; |
2436 | |||
2437 | if (sp_ad_disabled(sp)) | ||
2438 | spte |= shadow_acc_track_value; | ||
2439 | else | ||
2440 | spte |= shadow_accessed_mask; | ||
2402 | 2441 | ||
2403 | mmu_spte_set(sptep, spte); | 2442 | mmu_spte_set(sptep, spte); |
2404 | 2443 | ||
@@ -2666,10 +2705,15 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2666 | { | 2705 | { |
2667 | u64 spte = 0; | 2706 | u64 spte = 0; |
2668 | int ret = 0; | 2707 | int ret = 0; |
2708 | struct kvm_mmu_page *sp; | ||
2669 | 2709 | ||
2670 | if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access)) | 2710 | if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access)) |
2671 | return 0; | 2711 | return 0; |
2672 | 2712 | ||
2713 | sp = page_header(__pa(sptep)); | ||
2714 | if (sp_ad_disabled(sp)) | ||
2715 | spte |= shadow_acc_track_value; | ||
2716 | |||
2673 | /* | 2717 | /* |
2674 | * For the EPT case, shadow_present_mask is 0 if hardware | 2718 | * For the EPT case, shadow_present_mask is 0 if hardware |
2675 | * supports exec-only page table entries. In that case, | 2719 | * supports exec-only page table entries. In that case, |
@@ -2678,7 +2722,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2678 | */ | 2722 | */ |
2679 | spte |= shadow_present_mask; | 2723 | spte |= shadow_present_mask; |
2680 | if (!speculative) | 2724 | if (!speculative) |
2681 | spte |= shadow_accessed_mask; | 2725 | spte |= spte_shadow_accessed_mask(spte); |
2682 | 2726 | ||
2683 | if (pte_access & ACC_EXEC_MASK) | 2727 | if (pte_access & ACC_EXEC_MASK) |
2684 | spte |= shadow_x_mask; | 2728 | spte |= shadow_x_mask; |
@@ -2735,7 +2779,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2735 | 2779 | ||
2736 | if (pte_access & ACC_WRITE_MASK) { | 2780 | if (pte_access & ACC_WRITE_MASK) { |
2737 | kvm_vcpu_mark_page_dirty(vcpu, gfn); | 2781 | kvm_vcpu_mark_page_dirty(vcpu, gfn); |
2738 | spte |= shadow_dirty_mask; | 2782 | spte |= spte_shadow_dirty_mask(spte); |
2739 | } | 2783 | } |
2740 | 2784 | ||
2741 | if (speculative) | 2785 | if (speculative) |
@@ -2877,16 +2921,16 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) | |||
2877 | { | 2921 | { |
2878 | struct kvm_mmu_page *sp; | 2922 | struct kvm_mmu_page *sp; |
2879 | 2923 | ||
2924 | sp = page_header(__pa(sptep)); | ||
2925 | |||
2880 | /* | 2926 | /* |
2881 | * Since it's no accessed bit on EPT, it's no way to | 2927 | * Without accessed bits, there's no way to distinguish between |
2882 | * distinguish between actually accessed translations | 2928 | * actually accessed translations and prefetched, so disable pte |
2883 | * and prefetched, so disable pte prefetch if EPT is | 2929 | * prefetch if accessed bits aren't available. |
2884 | * enabled. | ||
2885 | */ | 2930 | */ |
2886 | if (!shadow_accessed_mask) | 2931 | if (sp_ad_disabled(sp)) |
2887 | return; | 2932 | return; |
2888 | 2933 | ||
2889 | sp = page_header(__pa(sptep)); | ||
2890 | if (sp->role.level > PT_PAGE_TABLE_LEVEL) | 2934 | if (sp->role.level > PT_PAGE_TABLE_LEVEL) |
2891 | return; | 2935 | return; |
2892 | 2936 | ||
@@ -4290,6 +4334,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | |||
4290 | 4334 | ||
4291 | context->base_role.word = 0; | 4335 | context->base_role.word = 0; |
4292 | context->base_role.smm = is_smm(vcpu); | 4336 | context->base_role.smm = is_smm(vcpu); |
4337 | context->base_role.ad_disabled = (shadow_accessed_mask == 0); | ||
4293 | context->page_fault = tdp_page_fault; | 4338 | context->page_fault = tdp_page_fault; |
4294 | context->sync_page = nonpaging_sync_page; | 4339 | context->sync_page = nonpaging_sync_page; |
4295 | context->invlpg = nonpaging_invlpg; | 4340 | context->invlpg = nonpaging_invlpg; |
@@ -4377,6 +4422,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, | |||
4377 | context->root_level = context->shadow_root_level; | 4422 | context->root_level = context->shadow_root_level; |
4378 | context->root_hpa = INVALID_PAGE; | 4423 | context->root_hpa = INVALID_PAGE; |
4379 | context->direct_map = false; | 4424 | context->direct_map = false; |
4425 | context->base_role.ad_disabled = !accessed_dirty; | ||
4380 | 4426 | ||
4381 | update_permission_bitmask(vcpu, context, true); | 4427 | update_permission_bitmask(vcpu, context, true); |
4382 | update_pkru_bitmask(vcpu, context, true); | 4428 | update_pkru_bitmask(vcpu, context, true); |
@@ -4636,6 +4682,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
4636 | mask.smep_andnot_wp = 1; | 4682 | mask.smep_andnot_wp = 1; |
4637 | mask.smap_andnot_wp = 1; | 4683 | mask.smap_andnot_wp = 1; |
4638 | mask.smm = 1; | 4684 | mask.smm = 1; |
4685 | mask.ad_disabled = 1; | ||
4639 | 4686 | ||
4640 | /* | 4687 | /* |
4641 | * If we don't have indirect shadow pages, it means no page is | 4688 | * If we don't have indirect shadow pages, it means no page is |
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 330bf3a811fb..a276834950c1 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h | |||
@@ -51,7 +51,7 @@ static inline u64 rsvd_bits(int s, int e) | |||
51 | return ((1ULL << (e - s + 1)) - 1) << s; | 51 | return ((1ULL << (e - s + 1)) - 1) << s; |
52 | } | 52 | } |
53 | 53 | ||
54 | void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask); | 54 | void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value); |
55 | 55 | ||
56 | void | 56 | void |
57 | reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context); | 57 | reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context); |
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index 5a24b846a1cb..8b97a6cba8d1 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h | |||
@@ -30,8 +30,9 @@ | |||
30 | \ | 30 | \ |
31 | role.word = __entry->role; \ | 31 | role.word = __entry->role; \ |
32 | \ | 32 | \ |
33 | trace_seq_printf(p, "sp gen %lx gfn %llx %u%s q%u%s %s%s" \ | 33 | trace_seq_printf(p, "sp gen %lx gfn %llx l%u%s q%u%s %s%s" \ |
34 | " %snxe root %u %s%c", __entry->mmu_valid_gen, \ | 34 | " %snxe %sad root %u %s%c", \ |
35 | __entry->mmu_valid_gen, \ | ||
35 | __entry->gfn, role.level, \ | 36 | __entry->gfn, role.level, \ |
36 | role.cr4_pae ? " pae" : "", \ | 37 | role.cr4_pae ? " pae" : "", \ |
37 | role.quadrant, \ | 38 | role.quadrant, \ |
@@ -39,6 +40,7 @@ | |||
39 | access_str[role.access], \ | 40 | access_str[role.access], \ |
40 | role.invalid ? " invalid" : "", \ | 41 | role.invalid ? " invalid" : "", \ |
41 | role.nxe ? "" : "!", \ | 42 | role.nxe ? "" : "!", \ |
43 | role.ad_disabled ? "!" : "", \ | ||
42 | __entry->root_count, \ | 44 | __entry->root_count, \ |
43 | __entry->unsync ? "unsync" : "sync", 0); \ | 45 | __entry->unsync ? "unsync" : "sync", 0); \ |
44 | saved_ptr; \ | 46 | saved_ptr; \ |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 33460fcdeef9..905ea6052517 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -190,6 +190,7 @@ struct vcpu_svm { | |||
190 | struct nested_state nested; | 190 | struct nested_state nested; |
191 | 191 | ||
192 | bool nmi_singlestep; | 192 | bool nmi_singlestep; |
193 | u64 nmi_singlestep_guest_rflags; | ||
193 | 194 | ||
194 | unsigned int3_injected; | 195 | unsigned int3_injected; |
195 | unsigned long int3_rip; | 196 | unsigned long int3_rip; |
@@ -964,6 +965,18 @@ static void svm_disable_lbrv(struct vcpu_svm *svm) | |||
964 | set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0); | 965 | set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0); |
965 | } | 966 | } |
966 | 967 | ||
968 | static void disable_nmi_singlestep(struct vcpu_svm *svm) | ||
969 | { | ||
970 | svm->nmi_singlestep = false; | ||
971 | if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) { | ||
972 | /* Clear our flags if they were not set by the guest */ | ||
973 | if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF)) | ||
974 | svm->vmcb->save.rflags &= ~X86_EFLAGS_TF; | ||
975 | if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF)) | ||
976 | svm->vmcb->save.rflags &= ~X86_EFLAGS_RF; | ||
977 | } | ||
978 | } | ||
979 | |||
967 | /* Note: | 980 | /* Note: |
968 | * This hash table is used to map VM_ID to a struct kvm_arch, | 981 | * This hash table is used to map VM_ID to a struct kvm_arch, |
969 | * when handling AMD IOMMU GALOG notification to schedule in | 982 | * when handling AMD IOMMU GALOG notification to schedule in |
@@ -1713,11 +1726,24 @@ static void svm_vcpu_unblocking(struct kvm_vcpu *vcpu) | |||
1713 | 1726 | ||
1714 | static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) | 1727 | static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) |
1715 | { | 1728 | { |
1716 | return to_svm(vcpu)->vmcb->save.rflags; | 1729 | struct vcpu_svm *svm = to_svm(vcpu); |
1730 | unsigned long rflags = svm->vmcb->save.rflags; | ||
1731 | |||
1732 | if (svm->nmi_singlestep) { | ||
1733 | /* Hide our flags if they were not set by the guest */ | ||
1734 | if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF)) | ||
1735 | rflags &= ~X86_EFLAGS_TF; | ||
1736 | if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF)) | ||
1737 | rflags &= ~X86_EFLAGS_RF; | ||
1738 | } | ||
1739 | return rflags; | ||
1717 | } | 1740 | } |
1718 | 1741 | ||
1719 | static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | 1742 | static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) |
1720 | { | 1743 | { |
1744 | if (to_svm(vcpu)->nmi_singlestep) | ||
1745 | rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); | ||
1746 | |||
1721 | /* | 1747 | /* |
1722 | * Any change of EFLAGS.VM is accompanied by a reload of SS | 1748 | * Any change of EFLAGS.VM is accompanied by a reload of SS |
1723 | * (caused by either a task switch or an inter-privilege IRET), | 1749 | * (caused by either a task switch or an inter-privilege IRET), |
@@ -2112,10 +2138,7 @@ static int db_interception(struct vcpu_svm *svm) | |||
2112 | } | 2138 | } |
2113 | 2139 | ||
2114 | if (svm->nmi_singlestep) { | 2140 | if (svm->nmi_singlestep) { |
2115 | svm->nmi_singlestep = false; | 2141 | disable_nmi_singlestep(svm); |
2116 | if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) | ||
2117 | svm->vmcb->save.rflags &= | ||
2118 | ~(X86_EFLAGS_TF | X86_EFLAGS_RF); | ||
2119 | } | 2142 | } |
2120 | 2143 | ||
2121 | if (svm->vcpu.guest_debug & | 2144 | if (svm->vcpu.guest_debug & |
@@ -2370,8 +2393,8 @@ static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu) | |||
2370 | 2393 | ||
2371 | static int nested_svm_check_permissions(struct vcpu_svm *svm) | 2394 | static int nested_svm_check_permissions(struct vcpu_svm *svm) |
2372 | { | 2395 | { |
2373 | if (!(svm->vcpu.arch.efer & EFER_SVME) | 2396 | if (!(svm->vcpu.arch.efer & EFER_SVME) || |
2374 | || !is_paging(&svm->vcpu)) { | 2397 | !is_paging(&svm->vcpu)) { |
2375 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); | 2398 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); |
2376 | return 1; | 2399 | return 1; |
2377 | } | 2400 | } |
@@ -2381,7 +2404,7 @@ static int nested_svm_check_permissions(struct vcpu_svm *svm) | |||
2381 | return 1; | 2404 | return 1; |
2382 | } | 2405 | } |
2383 | 2406 | ||
2384 | return 0; | 2407 | return 0; |
2385 | } | 2408 | } |
2386 | 2409 | ||
2387 | static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, | 2410 | static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, |
@@ -2534,6 +2557,31 @@ static int nested_svm_exit_handled_msr(struct vcpu_svm *svm) | |||
2534 | return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; | 2557 | return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; |
2535 | } | 2558 | } |
2536 | 2559 | ||
2560 | /* DB exceptions for our internal use must not cause vmexit */ | ||
2561 | static int nested_svm_intercept_db(struct vcpu_svm *svm) | ||
2562 | { | ||
2563 | unsigned long dr6; | ||
2564 | |||
2565 | /* if we're not singlestepping, it's not ours */ | ||
2566 | if (!svm->nmi_singlestep) | ||
2567 | return NESTED_EXIT_DONE; | ||
2568 | |||
2569 | /* if it's not a singlestep exception, it's not ours */ | ||
2570 | if (kvm_get_dr(&svm->vcpu, 6, &dr6)) | ||
2571 | return NESTED_EXIT_DONE; | ||
2572 | if (!(dr6 & DR6_BS)) | ||
2573 | return NESTED_EXIT_DONE; | ||
2574 | |||
2575 | /* if the guest is singlestepping, it should get the vmexit */ | ||
2576 | if (svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF) { | ||
2577 | disable_nmi_singlestep(svm); | ||
2578 | return NESTED_EXIT_DONE; | ||
2579 | } | ||
2580 | |||
2581 | /* it's ours, the nested hypervisor must not see this one */ | ||
2582 | return NESTED_EXIT_HOST; | ||
2583 | } | ||
2584 | |||
2537 | static int nested_svm_exit_special(struct vcpu_svm *svm) | 2585 | static int nested_svm_exit_special(struct vcpu_svm *svm) |
2538 | { | 2586 | { |
2539 | u32 exit_code = svm->vmcb->control.exit_code; | 2587 | u32 exit_code = svm->vmcb->control.exit_code; |
@@ -2589,8 +2637,12 @@ static int nested_svm_intercept(struct vcpu_svm *svm) | |||
2589 | } | 2637 | } |
2590 | case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: { | 2638 | case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: { |
2591 | u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE); | 2639 | u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE); |
2592 | if (svm->nested.intercept_exceptions & excp_bits) | 2640 | if (svm->nested.intercept_exceptions & excp_bits) { |
2593 | vmexit = NESTED_EXIT_DONE; | 2641 | if (exit_code == SVM_EXIT_EXCP_BASE + DB_VECTOR) |
2642 | vmexit = nested_svm_intercept_db(svm); | ||
2643 | else | ||
2644 | vmexit = NESTED_EXIT_DONE; | ||
2645 | } | ||
2594 | /* async page fault always cause vmexit */ | 2646 | /* async page fault always cause vmexit */ |
2595 | else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) && | 2647 | else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) && |
2596 | svm->apf_reason != 0) | 2648 | svm->apf_reason != 0) |
@@ -4627,10 +4679,17 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) | |||
4627 | == HF_NMI_MASK) | 4679 | == HF_NMI_MASK) |
4628 | return; /* IRET will cause a vm exit */ | 4680 | return; /* IRET will cause a vm exit */ |
4629 | 4681 | ||
4682 | if ((svm->vcpu.arch.hflags & HF_GIF_MASK) == 0) | ||
4683 | return; /* STGI will cause a vm exit */ | ||
4684 | |||
4685 | if (svm->nested.exit_required) | ||
4686 | return; /* we're not going to run the guest yet */ | ||
4687 | |||
4630 | /* | 4688 | /* |
4631 | * Something prevents NMI from been injected. Single step over possible | 4689 | * Something prevents NMI from been injected. Single step over possible |
4632 | * problem (IRET or exception injection or interrupt shadow) | 4690 | * problem (IRET or exception injection or interrupt shadow) |
4633 | */ | 4691 | */ |
4692 | svm->nmi_singlestep_guest_rflags = svm_get_rflags(vcpu); | ||
4634 | svm->nmi_singlestep = true; | 4693 | svm->nmi_singlestep = true; |
4635 | svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); | 4694 | svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); |
4636 | } | 4695 | } |
@@ -4771,6 +4830,22 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
4771 | if (unlikely(svm->nested.exit_required)) | 4830 | if (unlikely(svm->nested.exit_required)) |
4772 | return; | 4831 | return; |
4773 | 4832 | ||
4833 | /* | ||
4834 | * Disable singlestep if we're injecting an interrupt/exception. | ||
4835 | * We don't want our modified rflags to be pushed on the stack where | ||
4836 | * we might not be able to easily reset them if we disabled NMI | ||
4837 | * singlestep later. | ||
4838 | */ | ||
4839 | if (svm->nmi_singlestep && svm->vmcb->control.event_inj) { | ||
4840 | /* | ||
4841 | * Event injection happens before external interrupts cause a | ||
4842 | * vmexit and interrupts are disabled here, so smp_send_reschedule | ||
4843 | * is enough to force an immediate vmexit. | ||
4844 | */ | ||
4845 | disable_nmi_singlestep(svm); | ||
4846 | smp_send_reschedule(vcpu->cpu); | ||
4847 | } | ||
4848 | |||
4774 | pre_svm_run(svm); | 4849 | pre_svm_run(svm); |
4775 | 4850 | ||
4776 | sync_lapic_to_cr8(vcpu); | 4851 | sync_lapic_to_cr8(vcpu); |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 6dcc4873e435..f76efad248ab 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -913,8 +913,9 @@ static void nested_release_page_clean(struct page *page) | |||
913 | kvm_release_page_clean(page); | 913 | kvm_release_page_clean(page); |
914 | } | 914 | } |
915 | 915 | ||
916 | static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu); | ||
916 | static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu); | 917 | static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu); |
917 | static u64 construct_eptp(unsigned long root_hpa); | 918 | static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa); |
918 | static bool vmx_xsaves_supported(void); | 919 | static bool vmx_xsaves_supported(void); |
919 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); | 920 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); |
920 | static void vmx_set_segment(struct kvm_vcpu *vcpu, | 921 | static void vmx_set_segment(struct kvm_vcpu *vcpu, |
@@ -2772,7 +2773,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) | |||
2772 | if (enable_ept_ad_bits) { | 2773 | if (enable_ept_ad_bits) { |
2773 | vmx->nested.nested_vmx_secondary_ctls_high |= | 2774 | vmx->nested.nested_vmx_secondary_ctls_high |= |
2774 | SECONDARY_EXEC_ENABLE_PML; | 2775 | SECONDARY_EXEC_ENABLE_PML; |
2775 | vmx->nested.nested_vmx_ept_caps |= VMX_EPT_AD_BIT; | 2776 | vmx->nested.nested_vmx_ept_caps |= VMX_EPT_AD_BIT; |
2776 | } | 2777 | } |
2777 | } else | 2778 | } else |
2778 | vmx->nested.nested_vmx_ept_caps = 0; | 2779 | vmx->nested.nested_vmx_ept_caps = 0; |
@@ -3198,7 +3199,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
3198 | msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP); | 3199 | msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP); |
3199 | break; | 3200 | break; |
3200 | case MSR_IA32_BNDCFGS: | 3201 | case MSR_IA32_BNDCFGS: |
3201 | if (!kvm_mpx_supported()) | 3202 | if (!kvm_mpx_supported() || |
3203 | (!msr_info->host_initiated && !guest_cpuid_has_mpx(vcpu))) | ||
3202 | return 1; | 3204 | return 1; |
3203 | msr_info->data = vmcs_read64(GUEST_BNDCFGS); | 3205 | msr_info->data = vmcs_read64(GUEST_BNDCFGS); |
3204 | break; | 3206 | break; |
@@ -3280,7 +3282,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
3280 | vmcs_writel(GUEST_SYSENTER_ESP, data); | 3282 | vmcs_writel(GUEST_SYSENTER_ESP, data); |
3281 | break; | 3283 | break; |
3282 | case MSR_IA32_BNDCFGS: | 3284 | case MSR_IA32_BNDCFGS: |
3283 | if (!kvm_mpx_supported()) | 3285 | if (!kvm_mpx_supported() || |
3286 | (!msr_info->host_initiated && !guest_cpuid_has_mpx(vcpu))) | ||
3287 | return 1; | ||
3288 | if (is_noncanonical_address(data & PAGE_MASK) || | ||
3289 | (data & MSR_IA32_BNDCFGS_RSVD)) | ||
3284 | return 1; | 3290 | return 1; |
3285 | vmcs_write64(GUEST_BNDCFGS, data); | 3291 | vmcs_write64(GUEST_BNDCFGS, data); |
3286 | break; | 3292 | break; |
@@ -4013,7 +4019,7 @@ static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid) | |||
4013 | if (enable_ept) { | 4019 | if (enable_ept) { |
4014 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | 4020 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) |
4015 | return; | 4021 | return; |
4016 | ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa)); | 4022 | ept_sync_context(construct_eptp(vcpu, vcpu->arch.mmu.root_hpa)); |
4017 | } else { | 4023 | } else { |
4018 | vpid_sync_context(vpid); | 4024 | vpid_sync_context(vpid); |
4019 | } | 4025 | } |
@@ -4188,14 +4194,15 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
4188 | vmx->emulation_required = emulation_required(vcpu); | 4194 | vmx->emulation_required = emulation_required(vcpu); |
4189 | } | 4195 | } |
4190 | 4196 | ||
4191 | static u64 construct_eptp(unsigned long root_hpa) | 4197 | static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa) |
4192 | { | 4198 | { |
4193 | u64 eptp; | 4199 | u64 eptp; |
4194 | 4200 | ||
4195 | /* TODO write the value reading from MSR */ | 4201 | /* TODO write the value reading from MSR */ |
4196 | eptp = VMX_EPT_DEFAULT_MT | | 4202 | eptp = VMX_EPT_DEFAULT_MT | |
4197 | VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT; | 4203 | VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT; |
4198 | if (enable_ept_ad_bits) | 4204 | if (enable_ept_ad_bits && |
4205 | (!is_guest_mode(vcpu) || nested_ept_ad_enabled(vcpu))) | ||
4199 | eptp |= VMX_EPT_AD_ENABLE_BIT; | 4206 | eptp |= VMX_EPT_AD_ENABLE_BIT; |
4200 | eptp |= (root_hpa & PAGE_MASK); | 4207 | eptp |= (root_hpa & PAGE_MASK); |
4201 | 4208 | ||
@@ -4209,7 +4216,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
4209 | 4216 | ||
4210 | guest_cr3 = cr3; | 4217 | guest_cr3 = cr3; |
4211 | if (enable_ept) { | 4218 | if (enable_ept) { |
4212 | eptp = construct_eptp(cr3); | 4219 | eptp = construct_eptp(vcpu, cr3); |
4213 | vmcs_write64(EPT_POINTER, eptp); | 4220 | vmcs_write64(EPT_POINTER, eptp); |
4214 | if (is_paging(vcpu) || is_guest_mode(vcpu)) | 4221 | if (is_paging(vcpu) || is_guest_mode(vcpu)) |
4215 | guest_cr3 = kvm_read_cr3(vcpu); | 4222 | guest_cr3 = kvm_read_cr3(vcpu); |
@@ -5170,7 +5177,8 @@ static void ept_set_mmio_spte_mask(void) | |||
5170 | * EPT Misconfigurations can be generated if the value of bits 2:0 | 5177 | * EPT Misconfigurations can be generated if the value of bits 2:0 |
5171 | * of an EPT paging-structure entry is 110b (write/execute). | 5178 | * of an EPT paging-structure entry is 110b (write/execute). |
5172 | */ | 5179 | */ |
5173 | kvm_mmu_set_mmio_spte_mask(VMX_EPT_MISCONFIG_WX_VALUE); | 5180 | kvm_mmu_set_mmio_spte_mask(VMX_EPT_RWX_MASK, |
5181 | VMX_EPT_MISCONFIG_WX_VALUE); | ||
5174 | } | 5182 | } |
5175 | 5183 | ||
5176 | #define VMX_XSS_EXIT_BITMAP 0 | 5184 | #define VMX_XSS_EXIT_BITMAP 0 |
@@ -6220,17 +6228,6 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) | |||
6220 | 6228 | ||
6221 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 6229 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
6222 | 6230 | ||
6223 | if (is_guest_mode(vcpu) | ||
6224 | && !(exit_qualification & EPT_VIOLATION_GVA_TRANSLATED)) { | ||
6225 | /* | ||
6226 | * Fix up exit_qualification according to whether guest | ||
6227 | * page table accesses are reads or writes. | ||
6228 | */ | ||
6229 | u64 eptp = nested_ept_get_cr3(vcpu); | ||
6230 | if (!(eptp & VMX_EPT_AD_ENABLE_BIT)) | ||
6231 | exit_qualification &= ~EPT_VIOLATION_ACC_WRITE; | ||
6232 | } | ||
6233 | |||
6234 | /* | 6231 | /* |
6235 | * EPT violation happened while executing iret from NMI, | 6232 | * EPT violation happened while executing iret from NMI, |
6236 | * "blocked by NMI" bit has to be set before next VM entry. | 6233 | * "blocked by NMI" bit has to be set before next VM entry. |
@@ -6453,7 +6450,7 @@ void vmx_enable_tdp(void) | |||
6453 | enable_ept_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull, | 6450 | enable_ept_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull, |
6454 | 0ull, VMX_EPT_EXECUTABLE_MASK, | 6451 | 0ull, VMX_EPT_EXECUTABLE_MASK, |
6455 | cpu_has_vmx_ept_execute_only() ? 0ull : VMX_EPT_READABLE_MASK, | 6452 | cpu_has_vmx_ept_execute_only() ? 0ull : VMX_EPT_READABLE_MASK, |
6456 | enable_ept_ad_bits ? 0ull : VMX_EPT_RWX_MASK); | 6453 | VMX_EPT_RWX_MASK); |
6457 | 6454 | ||
6458 | ept_set_mmio_spte_mask(); | 6455 | ept_set_mmio_spte_mask(); |
6459 | kvm_enable_tdp(); | 6456 | kvm_enable_tdp(); |
@@ -6557,7 +6554,6 @@ static __init int hardware_setup(void) | |||
6557 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); | 6554 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); |
6558 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); | 6555 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); |
6559 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); | 6556 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); |
6560 | vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true); | ||
6561 | 6557 | ||
6562 | memcpy(vmx_msr_bitmap_legacy_x2apic_apicv, | 6558 | memcpy(vmx_msr_bitmap_legacy_x2apic_apicv, |
6563 | vmx_msr_bitmap_legacy, PAGE_SIZE); | 6559 | vmx_msr_bitmap_legacy, PAGE_SIZE); |
@@ -7661,7 +7657,10 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) | |||
7661 | unsigned long type, types; | 7657 | unsigned long type, types; |
7662 | gva_t gva; | 7658 | gva_t gva; |
7663 | struct x86_exception e; | 7659 | struct x86_exception e; |
7664 | int vpid; | 7660 | struct { |
7661 | u64 vpid; | ||
7662 | u64 gla; | ||
7663 | } operand; | ||
7665 | 7664 | ||
7666 | if (!(vmx->nested.nested_vmx_secondary_ctls_high & | 7665 | if (!(vmx->nested.nested_vmx_secondary_ctls_high & |
7667 | SECONDARY_EXEC_ENABLE_VPID) || | 7666 | SECONDARY_EXEC_ENABLE_VPID) || |
@@ -7691,17 +7690,28 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) | |||
7691 | if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), | 7690 | if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), |
7692 | vmx_instruction_info, false, &gva)) | 7691 | vmx_instruction_info, false, &gva)) |
7693 | return 1; | 7692 | return 1; |
7694 | if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vpid, | 7693 | if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand, |
7695 | sizeof(u32), &e)) { | 7694 | sizeof(operand), &e)) { |
7696 | kvm_inject_page_fault(vcpu, &e); | 7695 | kvm_inject_page_fault(vcpu, &e); |
7697 | return 1; | 7696 | return 1; |
7698 | } | 7697 | } |
7698 | if (operand.vpid >> 16) { | ||
7699 | nested_vmx_failValid(vcpu, | ||
7700 | VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); | ||
7701 | return kvm_skip_emulated_instruction(vcpu); | ||
7702 | } | ||
7699 | 7703 | ||
7700 | switch (type) { | 7704 | switch (type) { |
7701 | case VMX_VPID_EXTENT_INDIVIDUAL_ADDR: | 7705 | case VMX_VPID_EXTENT_INDIVIDUAL_ADDR: |
7706 | if (is_noncanonical_address(operand.gla)) { | ||
7707 | nested_vmx_failValid(vcpu, | ||
7708 | VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); | ||
7709 | return kvm_skip_emulated_instruction(vcpu); | ||
7710 | } | ||
7711 | /* fall through */ | ||
7702 | case VMX_VPID_EXTENT_SINGLE_CONTEXT: | 7712 | case VMX_VPID_EXTENT_SINGLE_CONTEXT: |
7703 | case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL: | 7713 | case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL: |
7704 | if (!vpid) { | 7714 | if (!operand.vpid) { |
7705 | nested_vmx_failValid(vcpu, | 7715 | nested_vmx_failValid(vcpu, |
7706 | VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); | 7716 | VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); |
7707 | return kvm_skip_emulated_instruction(vcpu); | 7717 | return kvm_skip_emulated_instruction(vcpu); |
@@ -9394,6 +9404,11 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, | |||
9394 | vmcs12->guest_physical_address = fault->address; | 9404 | vmcs12->guest_physical_address = fault->address; |
9395 | } | 9405 | } |
9396 | 9406 | ||
9407 | static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu) | ||
9408 | { | ||
9409 | return nested_ept_get_cr3(vcpu) & VMX_EPT_AD_ENABLE_BIT; | ||
9410 | } | ||
9411 | |||
9397 | /* Callbacks for nested_ept_init_mmu_context: */ | 9412 | /* Callbacks for nested_ept_init_mmu_context: */ |
9398 | 9413 | ||
9399 | static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu) | 9414 | static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu) |
@@ -9404,18 +9419,18 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu) | |||
9404 | 9419 | ||
9405 | static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) | 9420 | static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) |
9406 | { | 9421 | { |
9407 | u64 eptp; | 9422 | bool wants_ad; |
9408 | 9423 | ||
9409 | WARN_ON(mmu_is_nested(vcpu)); | 9424 | WARN_ON(mmu_is_nested(vcpu)); |
9410 | eptp = nested_ept_get_cr3(vcpu); | 9425 | wants_ad = nested_ept_ad_enabled(vcpu); |
9411 | if ((eptp & VMX_EPT_AD_ENABLE_BIT) && !enable_ept_ad_bits) | 9426 | if (wants_ad && !enable_ept_ad_bits) |
9412 | return 1; | 9427 | return 1; |
9413 | 9428 | ||
9414 | kvm_mmu_unload(vcpu); | 9429 | kvm_mmu_unload(vcpu); |
9415 | kvm_init_shadow_ept_mmu(vcpu, | 9430 | kvm_init_shadow_ept_mmu(vcpu, |
9416 | to_vmx(vcpu)->nested.nested_vmx_ept_caps & | 9431 | to_vmx(vcpu)->nested.nested_vmx_ept_caps & |
9417 | VMX_EPT_EXECUTE_ONLY_BIT, | 9432 | VMX_EPT_EXECUTE_ONLY_BIT, |
9418 | eptp & VMX_EPT_AD_ENABLE_BIT); | 9433 | wants_ad); |
9419 | vcpu->arch.mmu.set_cr3 = vmx_set_cr3; | 9434 | vcpu->arch.mmu.set_cr3 = vmx_set_cr3; |
9420 | vcpu->arch.mmu.get_cr3 = nested_ept_get_cr3; | 9435 | vcpu->arch.mmu.get_cr3 = nested_ept_get_cr3; |
9421 | vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault; | 9436 | vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault; |
@@ -10728,8 +10743,7 @@ static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
10728 | vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3); | 10743 | vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3); |
10729 | } | 10744 | } |
10730 | 10745 | ||
10731 | if (nested_cpu_has_ept(vmcs12)) | 10746 | vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS); |
10732 | vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS); | ||
10733 | 10747 | ||
10734 | if (nested_cpu_has_vid(vmcs12)) | 10748 | if (nested_cpu_has_vid(vmcs12)) |
10735 | vmcs12->guest_intr_status = vmcs_read16(GUEST_INTR_STATUS); | 10749 | vmcs12->guest_intr_status = vmcs_read16(GUEST_INTR_STATUS); |
@@ -10754,8 +10768,6 @@ static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
10754 | vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); | 10768 | vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); |
10755 | if (kvm_mpx_supported()) | 10769 | if (kvm_mpx_supported()) |
10756 | vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); | 10770 | vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); |
10757 | if (nested_cpu_has_xsaves(vmcs12)) | ||
10758 | vmcs12->xss_exit_bitmap = vmcs_read64(XSS_EXIT_BITMAP); | ||
10759 | } | 10771 | } |
10760 | 10772 | ||
10761 | /* | 10773 | /* |
@@ -11152,7 +11164,8 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc) | |||
11152 | vmx->hv_deadline_tsc = tscl + delta_tsc; | 11164 | vmx->hv_deadline_tsc = tscl + delta_tsc; |
11153 | vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL, | 11165 | vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL, |
11154 | PIN_BASED_VMX_PREEMPTION_TIMER); | 11166 | PIN_BASED_VMX_PREEMPTION_TIMER); |
11155 | return 0; | 11167 | |
11168 | return delta_tsc == 0; | ||
11156 | } | 11169 | } |
11157 | 11170 | ||
11158 | static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu) | 11171 | static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu) |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0e846f0cb83b..6c7266f7766d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -2841,10 +2841,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
2841 | kvm_vcpu_write_tsc_offset(vcpu, offset); | 2841 | kvm_vcpu_write_tsc_offset(vcpu, offset); |
2842 | vcpu->arch.tsc_catchup = 1; | 2842 | vcpu->arch.tsc_catchup = 1; |
2843 | } | 2843 | } |
2844 | if (kvm_lapic_hv_timer_in_use(vcpu) && | 2844 | |
2845 | kvm_x86_ops->set_hv_timer(vcpu, | 2845 | if (kvm_lapic_hv_timer_in_use(vcpu)) |
2846 | kvm_get_lapic_target_expiration_tsc(vcpu))) | 2846 | kvm_lapic_restart_hv_timer(vcpu); |
2847 | kvm_lapic_switch_to_sw_timer(vcpu); | 2847 | |
2848 | /* | 2848 | /* |
2849 | * On a host with synchronized TSC, there is no need to update | 2849 | * On a host with synchronized TSC, there is no need to update |
2850 | * kvmclock on vcpu->cpu migration | 2850 | * kvmclock on vcpu->cpu migration |
@@ -6011,7 +6011,7 @@ static void kvm_set_mmio_spte_mask(void) | |||
6011 | mask &= ~1ull; | 6011 | mask &= ~1ull; |
6012 | #endif | 6012 | #endif |
6013 | 6013 | ||
6014 | kvm_mmu_set_mmio_spte_mask(mask); | 6014 | kvm_mmu_set_mmio_spte_mask(mask, mask); |
6015 | } | 6015 | } |
6016 | 6016 | ||
6017 | #ifdef CONFIG_X86_64 | 6017 | #ifdef CONFIG_X86_64 |
@@ -6733,7 +6733,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
6733 | 6733 | ||
6734 | bool req_immediate_exit = false; | 6734 | bool req_immediate_exit = false; |
6735 | 6735 | ||
6736 | if (vcpu->requests) { | 6736 | if (kvm_request_pending(vcpu)) { |
6737 | if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) | 6737 | if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) |
6738 | kvm_mmu_unload(vcpu); | 6738 | kvm_mmu_unload(vcpu); |
6739 | if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) | 6739 | if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) |
@@ -6897,7 +6897,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
6897 | kvm_x86_ops->sync_pir_to_irr(vcpu); | 6897 | kvm_x86_ops->sync_pir_to_irr(vcpu); |
6898 | } | 6898 | } |
6899 | 6899 | ||
6900 | if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests | 6900 | if (vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu) |
6901 | || need_resched() || signal_pending(current)) { | 6901 | || need_resched() || signal_pending(current)) { |
6902 | vcpu->mode = OUTSIDE_GUEST_MODE; | 6902 | vcpu->mode = OUTSIDE_GUEST_MODE; |
6903 | smp_wmb(); | 6903 | smp_wmb(); |
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 295584f31a4e..f0053f884b4a 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h | |||
@@ -57,9 +57,7 @@ struct arch_timer_cpu { | |||
57 | 57 | ||
58 | int kvm_timer_hyp_init(void); | 58 | int kvm_timer_hyp_init(void); |
59 | int kvm_timer_enable(struct kvm_vcpu *vcpu); | 59 | int kvm_timer_enable(struct kvm_vcpu *vcpu); |
60 | int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, | 60 | int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu); |
61 | const struct kvm_irq_level *virt_irq, | ||
62 | const struct kvm_irq_level *phys_irq); | ||
63 | void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); | 61 | void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); |
64 | void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu); | 62 | void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu); |
65 | void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu); | 63 | void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu); |
@@ -70,6 +68,10 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu); | |||
70 | u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); | 68 | u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); |
71 | int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); | 69 | int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); |
72 | 70 | ||
71 | int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); | ||
72 | int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); | ||
73 | int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); | ||
74 | |||
73 | bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx); | 75 | bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx); |
74 | void kvm_timer_schedule(struct kvm_vcpu *vcpu); | 76 | void kvm_timer_schedule(struct kvm_vcpu *vcpu); |
75 | void kvm_timer_unschedule(struct kvm_vcpu *vcpu); | 77 | void kvm_timer_unschedule(struct kvm_vcpu *vcpu); |
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 1ab4633adf4f..f6e030617467 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h | |||
@@ -35,6 +35,7 @@ struct kvm_pmu { | |||
35 | int irq_num; | 35 | int irq_num; |
36 | struct kvm_pmc pmc[ARMV8_PMU_MAX_COUNTERS]; | 36 | struct kvm_pmc pmc[ARMV8_PMU_MAX_COUNTERS]; |
37 | bool ready; | 37 | bool ready; |
38 | bool created; | ||
38 | bool irq_level; | 39 | bool irq_level; |
39 | }; | 40 | }; |
40 | 41 | ||
@@ -63,6 +64,7 @@ int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, | |||
63 | struct kvm_device_attr *attr); | 64 | struct kvm_device_attr *attr); |
64 | int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, | 65 | int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, |
65 | struct kvm_device_attr *attr); | 66 | struct kvm_device_attr *attr); |
67 | int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu); | ||
66 | #else | 68 | #else |
67 | struct kvm_pmu { | 69 | struct kvm_pmu { |
68 | }; | 70 | }; |
@@ -112,6 +114,10 @@ static inline int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, | |||
112 | { | 114 | { |
113 | return -ENXIO; | 115 | return -ENXIO; |
114 | } | 116 | } |
117 | static inline int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) | ||
118 | { | ||
119 | return 0; | ||
120 | } | ||
115 | #endif | 121 | #endif |
116 | 122 | ||
117 | #endif | 123 | #endif |
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index ef718586321c..34dba516ef24 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h | |||
@@ -38,6 +38,10 @@ | |||
38 | #define VGIC_MIN_LPI 8192 | 38 | #define VGIC_MIN_LPI 8192 |
39 | #define KVM_IRQCHIP_NUM_PINS (1020 - 32) | 39 | #define KVM_IRQCHIP_NUM_PINS (1020 - 32) |
40 | 40 | ||
41 | #define irq_is_ppi(irq) ((irq) >= VGIC_NR_SGIS && (irq) < VGIC_NR_PRIVATE_IRQS) | ||
42 | #define irq_is_spi(irq) ((irq) >= VGIC_NR_PRIVATE_IRQS && \ | ||
43 | (irq) <= VGIC_MAX_SPI) | ||
44 | |||
41 | enum vgic_type { | 45 | enum vgic_type { |
42 | VGIC_V2, /* Good ol' GICv2 */ | 46 | VGIC_V2, /* Good ol' GICv2 */ |
43 | VGIC_V3, /* New fancy GICv3 */ | 47 | VGIC_V3, /* New fancy GICv3 */ |
@@ -119,6 +123,9 @@ struct vgic_irq { | |||
119 | u8 source; /* GICv2 SGIs only */ | 123 | u8 source; /* GICv2 SGIs only */ |
120 | u8 priority; | 124 | u8 priority; |
121 | enum vgic_irq_config config; /* Level or edge */ | 125 | enum vgic_irq_config config; /* Level or edge */ |
126 | |||
127 | void *owner; /* Opaque pointer to reserve an interrupt | ||
128 | for in-kernel devices. */ | ||
122 | }; | 129 | }; |
123 | 130 | ||
124 | struct vgic_register_region; | 131 | struct vgic_register_region; |
@@ -285,6 +292,7 @@ struct vgic_cpu { | |||
285 | }; | 292 | }; |
286 | 293 | ||
287 | extern struct static_key_false vgic_v2_cpuif_trap; | 294 | extern struct static_key_false vgic_v2_cpuif_trap; |
295 | extern struct static_key_false vgic_v3_cpuif_trap; | ||
288 | 296 | ||
289 | int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write); | 297 | int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write); |
290 | void kvm_vgic_early_init(struct kvm *kvm); | 298 | void kvm_vgic_early_init(struct kvm *kvm); |
@@ -298,9 +306,7 @@ int kvm_vgic_hyp_init(void); | |||
298 | void kvm_vgic_init_cpu_hardware(void); | 306 | void kvm_vgic_init_cpu_hardware(void); |
299 | 307 | ||
300 | int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, | 308 | int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, |
301 | bool level); | 309 | bool level, void *owner); |
302 | int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, unsigned int intid, | ||
303 | bool level); | ||
304 | int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq); | 310 | int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq); |
305 | int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq); | 311 | int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq); |
306 | bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq); | 312 | bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq); |
@@ -341,4 +347,6 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi); | |||
341 | */ | 347 | */ |
342 | int kvm_vgic_setup_default_irq_routing(struct kvm *kvm); | 348 | int kvm_vgic_setup_default_irq_routing(struct kvm *kvm); |
343 | 349 | ||
350 | int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner); | ||
351 | |||
344 | #endif /* __KVM_ARM_VGIC_H */ | 352 | #endif /* __KVM_ARM_VGIC_H */ |
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 1fa293a37f4a..6a1f87ff94e2 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h | |||
@@ -405,6 +405,7 @@ | |||
405 | #define ICH_LR_PHYS_ID_SHIFT 32 | 405 | #define ICH_LR_PHYS_ID_SHIFT 32 |
406 | #define ICH_LR_PHYS_ID_MASK (0x3ffULL << ICH_LR_PHYS_ID_SHIFT) | 406 | #define ICH_LR_PHYS_ID_MASK (0x3ffULL << ICH_LR_PHYS_ID_SHIFT) |
407 | #define ICH_LR_PRIORITY_SHIFT 48 | 407 | #define ICH_LR_PRIORITY_SHIFT 48 |
408 | #define ICH_LR_PRIORITY_MASK (0xffULL << ICH_LR_PRIORITY_SHIFT) | ||
408 | 409 | ||
409 | /* These are for GICv2 emulation only */ | 410 | /* These are for GICv2 emulation only */ |
410 | #define GICH_LR_VIRTUALID (0x3ffUL << 0) | 411 | #define GICH_LR_VIRTUALID (0x3ffUL << 0) |
@@ -416,6 +417,11 @@ | |||
416 | 417 | ||
417 | #define ICH_HCR_EN (1 << 0) | 418 | #define ICH_HCR_EN (1 << 0) |
418 | #define ICH_HCR_UIE (1 << 1) | 419 | #define ICH_HCR_UIE (1 << 1) |
420 | #define ICH_HCR_TC (1 << 10) | ||
421 | #define ICH_HCR_TALL0 (1 << 11) | ||
422 | #define ICH_HCR_TALL1 (1 << 12) | ||
423 | #define ICH_HCR_EOIcount_SHIFT 27 | ||
424 | #define ICH_HCR_EOIcount_MASK (0x1f << ICH_HCR_EOIcount_SHIFT) | ||
419 | 425 | ||
420 | #define ICH_VMCR_ACK_CTL_SHIFT 2 | 426 | #define ICH_VMCR_ACK_CTL_SHIFT 2 |
421 | #define ICH_VMCR_ACK_CTL_MASK (1 << ICH_VMCR_ACK_CTL_SHIFT) | 427 | #define ICH_VMCR_ACK_CTL_MASK (1 << ICH_VMCR_ACK_CTL_SHIFT) |
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8c0664309815..0b50e7b35ed4 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
@@ -126,6 +126,13 @@ static inline bool is_error_page(struct page *page) | |||
126 | #define KVM_REQ_MMU_RELOAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) | 126 | #define KVM_REQ_MMU_RELOAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) |
127 | #define KVM_REQ_PENDING_TIMER 2 | 127 | #define KVM_REQ_PENDING_TIMER 2 |
128 | #define KVM_REQ_UNHALT 3 | 128 | #define KVM_REQ_UNHALT 3 |
129 | #define KVM_REQUEST_ARCH_BASE 8 | ||
130 | |||
131 | #define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \ | ||
132 | BUILD_BUG_ON((unsigned)(nr) >= 32 - KVM_REQUEST_ARCH_BASE); \ | ||
133 | (unsigned)(((nr) + KVM_REQUEST_ARCH_BASE) | (flags)); \ | ||
134 | }) | ||
135 | #define KVM_ARCH_REQ(nr) KVM_ARCH_REQ_FLAGS(nr, 0) | ||
129 | 136 | ||
130 | #define KVM_USERSPACE_IRQ_SOURCE_ID 0 | 137 | #define KVM_USERSPACE_IRQ_SOURCE_ID 0 |
131 | #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 | 138 | #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 |
@@ -1098,6 +1105,11 @@ static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) | |||
1098 | set_bit(req & KVM_REQUEST_MASK, &vcpu->requests); | 1105 | set_bit(req & KVM_REQUEST_MASK, &vcpu->requests); |
1099 | } | 1106 | } |
1100 | 1107 | ||
1108 | static inline bool kvm_request_pending(struct kvm_vcpu *vcpu) | ||
1109 | { | ||
1110 | return READ_ONCE(vcpu->requests); | ||
1111 | } | ||
1112 | |||
1101 | static inline bool kvm_test_request(int req, struct kvm_vcpu *vcpu) | 1113 | static inline bool kvm_test_request(int req, struct kvm_vcpu *vcpu) |
1102 | { | 1114 | { |
1103 | return test_bit(req & KVM_REQUEST_MASK, &vcpu->requests); | 1115 | return test_bit(req & KVM_REQUEST_MASK, &vcpu->requests); |
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 577429a95ad8..c0b6dfec5f87 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h | |||
@@ -155,6 +155,35 @@ struct kvm_s390_skeys { | |||
155 | __u32 reserved[9]; | 155 | __u32 reserved[9]; |
156 | }; | 156 | }; |
157 | 157 | ||
158 | #define KVM_S390_CMMA_PEEK (1 << 0) | ||
159 | |||
160 | /** | ||
161 | * kvm_s390_cmma_log - Used for CMMA migration. | ||
162 | * | ||
163 | * Used both for input and output. | ||
164 | * | ||
165 | * @start_gfn: Guest page number to start from. | ||
166 | * @count: Size of the result buffer. | ||
167 | * @flags: Control operation mode via KVM_S390_CMMA_* flags | ||
168 | * @remaining: Used with KVM_S390_GET_CMMA_BITS. Indicates how many dirty | ||
169 | * pages are still remaining. | ||
170 | * @mask: Used with KVM_S390_SET_CMMA_BITS. Bitmap of bits to actually set | ||
171 | * in the PGSTE. | ||
172 | * @values: Pointer to the values buffer. | ||
173 | * | ||
174 | * Used in KVM_S390_{G,S}ET_CMMA_BITS ioctls. | ||
175 | */ | ||
176 | struct kvm_s390_cmma_log { | ||
177 | __u64 start_gfn; | ||
178 | __u32 count; | ||
179 | __u32 flags; | ||
180 | union { | ||
181 | __u64 remaining; | ||
182 | __u64 mask; | ||
183 | }; | ||
184 | __u64 values; | ||
185 | }; | ||
186 | |||
158 | struct kvm_hyperv_exit { | 187 | struct kvm_hyperv_exit { |
159 | #define KVM_EXIT_HYPERV_SYNIC 1 | 188 | #define KVM_EXIT_HYPERV_SYNIC 1 |
160 | #define KVM_EXIT_HYPERV_HCALL 2 | 189 | #define KVM_EXIT_HYPERV_HCALL 2 |
@@ -895,6 +924,9 @@ struct kvm_ppc_resize_hpt { | |||
895 | #define KVM_CAP_SPAPR_TCE_VFIO 142 | 924 | #define KVM_CAP_SPAPR_TCE_VFIO 142 |
896 | #define KVM_CAP_X86_GUEST_MWAIT 143 | 925 | #define KVM_CAP_X86_GUEST_MWAIT 143 |
897 | #define KVM_CAP_ARM_USER_IRQ 144 | 926 | #define KVM_CAP_ARM_USER_IRQ 144 |
927 | #define KVM_CAP_S390_CMMA_MIGRATION 145 | ||
928 | #define KVM_CAP_PPC_FWNMI 146 | ||
929 | #define KVM_CAP_PPC_SMT_POSSIBLE 147 | ||
898 | 930 | ||
899 | #ifdef KVM_CAP_IRQ_ROUTING | 931 | #ifdef KVM_CAP_IRQ_ROUTING |
900 | 932 | ||
@@ -1318,6 +1350,9 @@ struct kvm_s390_ucas_mapping { | |||
1318 | #define KVM_S390_GET_IRQ_STATE _IOW(KVMIO, 0xb6, struct kvm_s390_irq_state) | 1350 | #define KVM_S390_GET_IRQ_STATE _IOW(KVMIO, 0xb6, struct kvm_s390_irq_state) |
1319 | /* Available with KVM_CAP_X86_SMM */ | 1351 | /* Available with KVM_CAP_X86_SMM */ |
1320 | #define KVM_SMI _IO(KVMIO, 0xb7) | 1352 | #define KVM_SMI _IO(KVMIO, 0xb7) |
1353 | /* Available with KVM_CAP_S390_CMMA_MIGRATION */ | ||
1354 | #define KVM_S390_GET_CMMA_BITS _IOW(KVMIO, 0xb8, struct kvm_s390_cmma_log) | ||
1355 | #define KVM_S390_SET_CMMA_BITS _IOW(KVMIO, 0xb9, struct kvm_s390_cmma_log) | ||
1321 | 1356 | ||
1322 | #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) | 1357 | #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) |
1323 | #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) | 1358 | #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) |
diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index 8f74ed8e7237..dd8f00cfb8b4 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat | |||
@@ -295,114 +295,6 @@ class ArchS390(Arch): | |||
295 | ARCH = Arch.get_arch() | 295 | ARCH = Arch.get_arch() |
296 | 296 | ||
297 | 297 | ||
298 | def walkdir(path): | ||
299 | """Returns os.walk() data for specified directory. | ||
300 | |||
301 | As it is only a wrapper it returns the same 3-tuple of (dirpath, | ||
302 | dirnames, filenames). | ||
303 | """ | ||
304 | return next(os.walk(path)) | ||
305 | |||
306 | |||
307 | def parse_int_list(list_string): | ||
308 | """Returns an int list from a string of comma separated integers and | ||
309 | integer ranges.""" | ||
310 | integers = [] | ||
311 | members = list_string.split(',') | ||
312 | |||
313 | for member in members: | ||
314 | if '-' not in member: | ||
315 | integers.append(int(member)) | ||
316 | else: | ||
317 | int_range = member.split('-') | ||
318 | integers.extend(range(int(int_range[0]), | ||
319 | int(int_range[1]) + 1)) | ||
320 | |||
321 | return integers | ||
322 | |||
323 | |||
324 | def get_pid_from_gname(gname): | ||
325 | """Fuzzy function to convert guest name to QEMU process pid. | ||
326 | |||
327 | Returns a list of potential pids, can be empty if no match found. | ||
328 | Throws an exception on processing errors. | ||
329 | |||
330 | """ | ||
331 | pids = [] | ||
332 | try: | ||
333 | child = subprocess.Popen(['ps', '-A', '--format', 'pid,args'], | ||
334 | stdout=subprocess.PIPE) | ||
335 | except: | ||
336 | raise Exception | ||
337 | for line in child.stdout: | ||
338 | line = line.lstrip().split(' ', 1) | ||
339 | # perform a sanity check before calling the more expensive | ||
340 | # function to possibly extract the guest name | ||
341 | if ' -name ' in line[1] and gname == get_gname_from_pid(line[0]): | ||
342 | pids.append(int(line[0])) | ||
343 | child.stdout.close() | ||
344 | |||
345 | return pids | ||
346 | |||
347 | |||
348 | def get_gname_from_pid(pid): | ||
349 | """Returns the guest name for a QEMU process pid. | ||
350 | |||
351 | Extracts the guest name from the QEMU comma line by processing the '-name' | ||
352 | option. Will also handle names specified out of sequence. | ||
353 | |||
354 | """ | ||
355 | name = '' | ||
356 | try: | ||
357 | line = open('/proc/{}/cmdline'.format(pid), 'rb').read().split('\0') | ||
358 | parms = line[line.index('-name') + 1].split(',') | ||
359 | while '' in parms: | ||
360 | # commas are escaped (i.e. ',,'), hence e.g. 'foo,bar' results in | ||
361 | # ['foo', '', 'bar'], which we revert here | ||
362 | idx = parms.index('') | ||
363 | parms[idx - 1] += ',' + parms[idx + 1] | ||
364 | del parms[idx:idx+2] | ||
365 | # the '-name' switch allows for two ways to specify the guest name, | ||
366 | # where the plain name overrides the name specified via 'guest=' | ||
367 | for arg in parms: | ||
368 | if '=' not in arg: | ||
369 | name = arg | ||
370 | break | ||
371 | if arg[:6] == 'guest=': | ||
372 | name = arg[6:] | ||
373 | except (ValueError, IOError, IndexError): | ||
374 | pass | ||
375 | |||
376 | return name | ||
377 | |||
378 | |||
379 | def get_online_cpus(): | ||
380 | """Returns a list of cpu id integers.""" | ||
381 | with open('/sys/devices/system/cpu/online') as cpu_list: | ||
382 | cpu_string = cpu_list.readline() | ||
383 | return parse_int_list(cpu_string) | ||
384 | |||
385 | |||
386 | def get_filters(): | ||
387 | """Returns a dict of trace events, their filter ids and | ||
388 | the values that can be filtered. | ||
389 | |||
390 | Trace events can be filtered for special values by setting a | ||
391 | filter string via an ioctl. The string normally has the format | ||
392 | identifier==value. For each filter a new event will be created, to | ||
393 | be able to distinguish the events. | ||
394 | |||
395 | """ | ||
396 | filters = {} | ||
397 | filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS) | ||
398 | if ARCH.exit_reasons: | ||
399 | filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons) | ||
400 | return filters | ||
401 | |||
402 | libc = ctypes.CDLL('libc.so.6', use_errno=True) | ||
403 | syscall = libc.syscall | ||
404 | |||
405 | |||
406 | class perf_event_attr(ctypes.Structure): | 298 | class perf_event_attr(ctypes.Structure): |
407 | """Struct that holds the necessary data to set up a trace event. | 299 | """Struct that holds the necessary data to set up a trace event. |
408 | 300 | ||
@@ -432,25 +324,6 @@ class perf_event_attr(ctypes.Structure): | |||
432 | self.read_format = PERF_FORMAT_GROUP | 324 | self.read_format = PERF_FORMAT_GROUP |
433 | 325 | ||
434 | 326 | ||
435 | def perf_event_open(attr, pid, cpu, group_fd, flags): | ||
436 | """Wrapper for the sys_perf_evt_open() syscall. | ||
437 | |||
438 | Used to set up performance events, returns a file descriptor or -1 | ||
439 | on error. | ||
440 | |||
441 | Attributes are: | ||
442 | - syscall number | ||
443 | - struct perf_event_attr * | ||
444 | - pid or -1 to monitor all pids | ||
445 | - cpu number or -1 to monitor all cpus | ||
446 | - The file descriptor of the group leader or -1 to create a group. | ||
447 | - flags | ||
448 | |||
449 | """ | ||
450 | return syscall(ARCH.sc_perf_evt_open, ctypes.pointer(attr), | ||
451 | ctypes.c_int(pid), ctypes.c_int(cpu), | ||
452 | ctypes.c_int(group_fd), ctypes.c_long(flags)) | ||
453 | |||
454 | PERF_TYPE_TRACEPOINT = 2 | 327 | PERF_TYPE_TRACEPOINT = 2 |
455 | PERF_FORMAT_GROUP = 1 << 3 | 328 | PERF_FORMAT_GROUP = 1 << 3 |
456 | 329 | ||
@@ -495,6 +368,8 @@ class Event(object): | |||
495 | """Represents a performance event and manages its life cycle.""" | 368 | """Represents a performance event and manages its life cycle.""" |
496 | def __init__(self, name, group, trace_cpu, trace_pid, trace_point, | 369 | def __init__(self, name, group, trace_cpu, trace_pid, trace_point, |
497 | trace_filter, trace_set='kvm'): | 370 | trace_filter, trace_set='kvm'): |
371 | self.libc = ctypes.CDLL('libc.so.6', use_errno=True) | ||
372 | self.syscall = self.libc.syscall | ||
498 | self.name = name | 373 | self.name = name |
499 | self.fd = None | 374 | self.fd = None |
500 | self.setup_event(group, trace_cpu, trace_pid, trace_point, | 375 | self.setup_event(group, trace_cpu, trace_pid, trace_point, |
@@ -511,6 +386,25 @@ class Event(object): | |||
511 | if self.fd: | 386 | if self.fd: |
512 | os.close(self.fd) | 387 | os.close(self.fd) |
513 | 388 | ||
389 | def perf_event_open(self, attr, pid, cpu, group_fd, flags): | ||
390 | """Wrapper for the sys_perf_evt_open() syscall. | ||
391 | |||
392 | Used to set up performance events, returns a file descriptor or -1 | ||
393 | on error. | ||
394 | |||
395 | Attributes are: | ||
396 | - syscall number | ||
397 | - struct perf_event_attr * | ||
398 | - pid or -1 to monitor all pids | ||
399 | - cpu number or -1 to monitor all cpus | ||
400 | - The file descriptor of the group leader or -1 to create a group. | ||
401 | - flags | ||
402 | |||
403 | """ | ||
404 | return self.syscall(ARCH.sc_perf_evt_open, ctypes.pointer(attr), | ||
405 | ctypes.c_int(pid), ctypes.c_int(cpu), | ||
406 | ctypes.c_int(group_fd), ctypes.c_long(flags)) | ||
407 | |||
514 | def setup_event_attribute(self, trace_set, trace_point): | 408 | def setup_event_attribute(self, trace_set, trace_point): |
515 | """Returns an initialized ctype perf_event_attr struct.""" | 409 | """Returns an initialized ctype perf_event_attr struct.""" |
516 | 410 | ||
@@ -539,8 +433,8 @@ class Event(object): | |||
539 | if group.events: | 433 | if group.events: |
540 | group_leader = group.events[0].fd | 434 | group_leader = group.events[0].fd |
541 | 435 | ||
542 | fd = perf_event_open(event_attr, trace_pid, | 436 | fd = self.perf_event_open(event_attr, trace_pid, |
543 | trace_cpu, group_leader, 0) | 437 | trace_cpu, group_leader, 0) |
544 | if fd == -1: | 438 | if fd == -1: |
545 | err = ctypes.get_errno() | 439 | err = ctypes.get_errno() |
546 | raise OSError(err, os.strerror(err), | 440 | raise OSError(err, os.strerror(err), |
@@ -575,17 +469,53 @@ class Event(object): | |||
575 | fcntl.ioctl(self.fd, ARCH.ioctl_numbers['RESET'], 0) | 469 | fcntl.ioctl(self.fd, ARCH.ioctl_numbers['RESET'], 0) |
576 | 470 | ||
577 | 471 | ||
578 | class TracepointProvider(object): | 472 | class Provider(object): |
473 | """Encapsulates functionalities used by all providers.""" | ||
474 | @staticmethod | ||
475 | def is_field_wanted(fields_filter, field): | ||
476 | """Indicate whether field is valid according to fields_filter.""" | ||
477 | if not fields_filter: | ||
478 | return True | ||
479 | return re.match(fields_filter, field) is not None | ||
480 | |||
481 | @staticmethod | ||
482 | def walkdir(path): | ||
483 | """Returns os.walk() data for specified directory. | ||
484 | |||
485 | As it is only a wrapper it returns the same 3-tuple of (dirpath, | ||
486 | dirnames, filenames). | ||
487 | """ | ||
488 | return next(os.walk(path)) | ||
489 | |||
490 | |||
491 | class TracepointProvider(Provider): | ||
579 | """Data provider for the stats class. | 492 | """Data provider for the stats class. |
580 | 493 | ||
581 | Manages the events/groups from which it acquires its data. | 494 | Manages the events/groups from which it acquires its data. |
582 | 495 | ||
583 | """ | 496 | """ |
584 | def __init__(self): | 497 | def __init__(self, pid, fields_filter): |
585 | self.group_leaders = [] | 498 | self.group_leaders = [] |
586 | self.filters = get_filters() | 499 | self.filters = self.get_filters() |
587 | self._fields = self.get_available_fields() | 500 | self.update_fields(fields_filter) |
588 | self._pid = 0 | 501 | self.pid = pid |
502 | |||
503 | @staticmethod | ||
504 | def get_filters(): | ||
505 | """Returns a dict of trace events, their filter ids and | ||
506 | the values that can be filtered. | ||
507 | |||
508 | Trace events can be filtered for special values by setting a | ||
509 | filter string via an ioctl. The string normally has the format | ||
510 | identifier==value. For each filter a new event will be created, to | ||
511 | be able to distinguish the events. | ||
512 | |||
513 | """ | ||
514 | filters = {} | ||
515 | filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS) | ||
516 | if ARCH.exit_reasons: | ||
517 | filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons) | ||
518 | return filters | ||
589 | 519 | ||
590 | def get_available_fields(self): | 520 | def get_available_fields(self): |
591 | """Returns a list of available event's of format 'event name(filter | 521 | """Returns a list of available event's of format 'event name(filter |
@@ -603,7 +533,7 @@ class TracepointProvider(object): | |||
603 | 533 | ||
604 | """ | 534 | """ |
605 | path = os.path.join(PATH_DEBUGFS_TRACING, 'events', 'kvm') | 535 | path = os.path.join(PATH_DEBUGFS_TRACING, 'events', 'kvm') |
606 | fields = walkdir(path)[1] | 536 | fields = self.walkdir(path)[1] |
607 | extra = [] | 537 | extra = [] |
608 | for field in fields: | 538 | for field in fields: |
609 | if field in self.filters: | 539 | if field in self.filters: |
@@ -613,6 +543,34 @@ class TracepointProvider(object): | |||
613 | fields += extra | 543 | fields += extra |
614 | return fields | 544 | return fields |
615 | 545 | ||
546 | def update_fields(self, fields_filter): | ||
547 | """Refresh fields, applying fields_filter""" | ||
548 | self._fields = [field for field in self.get_available_fields() | ||
549 | if self.is_field_wanted(fields_filter, field)] | ||
550 | |||
551 | @staticmethod | ||
552 | def get_online_cpus(): | ||
553 | """Returns a list of cpu id integers.""" | ||
554 | def parse_int_list(list_string): | ||
555 | """Returns an int list from a string of comma separated integers and | ||
556 | integer ranges.""" | ||
557 | integers = [] | ||
558 | members = list_string.split(',') | ||
559 | |||
560 | for member in members: | ||
561 | if '-' not in member: | ||
562 | integers.append(int(member)) | ||
563 | else: | ||
564 | int_range = member.split('-') | ||
565 | integers.extend(range(int(int_range[0]), | ||
566 | int(int_range[1]) + 1)) | ||
567 | |||
568 | return integers | ||
569 | |||
570 | with open('/sys/devices/system/cpu/online') as cpu_list: | ||
571 | cpu_string = cpu_list.readline() | ||
572 | return parse_int_list(cpu_string) | ||
573 | |||
616 | def setup_traces(self): | 574 | def setup_traces(self): |
617 | """Creates all event and group objects needed to be able to retrieve | 575 | """Creates all event and group objects needed to be able to retrieve |
618 | data.""" | 576 | data.""" |
@@ -621,9 +579,9 @@ class TracepointProvider(object): | |||
621 | # Fetch list of all threads of the monitored pid, as qemu | 579 | # Fetch list of all threads of the monitored pid, as qemu |
622 | # starts a thread for each vcpu. | 580 | # starts a thread for each vcpu. |
623 | path = os.path.join('/proc', str(self._pid), 'task') | 581 | path = os.path.join('/proc', str(self._pid), 'task') |
624 | groupids = walkdir(path)[1] | 582 | groupids = self.walkdir(path)[1] |
625 | else: | 583 | else: |
626 | groupids = get_online_cpus() | 584 | groupids = self.get_online_cpus() |
627 | 585 | ||
628 | # The constant is needed as a buffer for python libs, std | 586 | # The constant is needed as a buffer for python libs, std |
629 | # streams and other files that the script opens. | 587 | # streams and other files that the script opens. |
@@ -671,9 +629,6 @@ class TracepointProvider(object): | |||
671 | 629 | ||
672 | self.group_leaders.append(group) | 630 | self.group_leaders.append(group) |
673 | 631 | ||
674 | def available_fields(self): | ||
675 | return self.get_available_fields() | ||
676 | |||
677 | @property | 632 | @property |
678 | def fields(self): | 633 | def fields(self): |
679 | return self._fields | 634 | return self._fields |
@@ -707,7 +662,7 @@ class TracepointProvider(object): | |||
707 | self.setup_traces() | 662 | self.setup_traces() |
708 | self.fields = self._fields | 663 | self.fields = self._fields |
709 | 664 | ||
710 | def read(self): | 665 | def read(self, by_guest=0): |
711 | """Returns 'event name: current value' for all enabled events.""" | 666 | """Returns 'event name: current value' for all enabled events.""" |
712 | ret = defaultdict(int) | 667 | ret = defaultdict(int) |
713 | for group in self.group_leaders: | 668 | for group in self.group_leaders: |
@@ -723,16 +678,17 @@ class TracepointProvider(object): | |||
723 | event.reset() | 678 | event.reset() |
724 | 679 | ||
725 | 680 | ||
726 | class DebugfsProvider(object): | 681 | class DebugfsProvider(Provider): |
727 | """Provides data from the files that KVM creates in the kvm debugfs | 682 | """Provides data from the files that KVM creates in the kvm debugfs |
728 | folder.""" | 683 | folder.""" |
729 | def __init__(self): | 684 | def __init__(self, pid, fields_filter, include_past): |
730 | self._fields = self.get_available_fields() | 685 | self.update_fields(fields_filter) |
731 | self._baseline = {} | 686 | self._baseline = {} |
732 | self._pid = 0 | ||
733 | self.do_read = True | 687 | self.do_read = True |
734 | self.paths = [] | 688 | self.paths = [] |
735 | self.reset() | 689 | self.pid = pid |
690 | if include_past: | ||
691 | self.restore() | ||
736 | 692 | ||
737 | def get_available_fields(self): | 693 | def get_available_fields(self): |
738 | """"Returns a list of available fields. | 694 | """"Returns a list of available fields. |
@@ -740,7 +696,12 @@ class DebugfsProvider(object): | |||
740 | The fields are all available KVM debugfs files | 696 | The fields are all available KVM debugfs files |
741 | 697 | ||
742 | """ | 698 | """ |
743 | return walkdir(PATH_DEBUGFS_KVM)[2] | 699 | return self.walkdir(PATH_DEBUGFS_KVM)[2] |
700 | |||
701 | def update_fields(self, fields_filter): | ||
702 | """Refresh fields, applying fields_filter""" | ||
703 | self._fields = [field for field in self.get_available_fields() | ||
704 | if self.is_field_wanted(fields_filter, field)] | ||
744 | 705 | ||
745 | @property | 706 | @property |
746 | def fields(self): | 707 | def fields(self): |
@@ -757,10 +718,9 @@ class DebugfsProvider(object): | |||
757 | 718 | ||
758 | @pid.setter | 719 | @pid.setter |
759 | def pid(self, pid): | 720 | def pid(self, pid): |
721 | self._pid = pid | ||
760 | if pid != 0: | 722 | if pid != 0: |
761 | self._pid = pid | 723 | vms = self.walkdir(PATH_DEBUGFS_KVM)[1] |
762 | |||
763 | vms = walkdir(PATH_DEBUGFS_KVM)[1] | ||
764 | if len(vms) == 0: | 724 | if len(vms) == 0: |
765 | self.do_read = False | 725 | self.do_read = False |
766 | 726 | ||
@@ -771,8 +731,15 @@ class DebugfsProvider(object): | |||
771 | self.do_read = True | 731 | self.do_read = True |
772 | self.reset() | 732 | self.reset() |
773 | 733 | ||
774 | def read(self, reset=0): | 734 | def read(self, reset=0, by_guest=0): |
775 | """Returns a dict with format:'file name / field -> current value'.""" | 735 | """Returns a dict with format:'file name / field -> current value'. |
736 | |||
737 | Parameter 'reset': | ||
738 | 0 plain read | ||
739 | 1 reset field counts to 0 | ||
740 | 2 restore the original field counts | ||
741 | |||
742 | """ | ||
776 | results = {} | 743 | results = {} |
777 | 744 | ||
778 | # If no debugfs filtering support is available, then don't read. | 745 | # If no debugfs filtering support is available, then don't read. |
@@ -789,12 +756,22 @@ class DebugfsProvider(object): | |||
789 | for field in self._fields: | 756 | for field in self._fields: |
790 | value = self.read_field(field, path) | 757 | value = self.read_field(field, path) |
791 | key = path + field | 758 | key = path + field |
792 | if reset: | 759 | if reset == 1: |
793 | self._baseline[key] = value | 760 | self._baseline[key] = value |
761 | if reset == 2: | ||
762 | self._baseline[key] = 0 | ||
794 | if self._baseline.get(key, -1) == -1: | 763 | if self._baseline.get(key, -1) == -1: |
795 | self._baseline[key] = value | 764 | self._baseline[key] = value |
796 | results[field] = (results.get(field, 0) + value - | 765 | increment = (results.get(field, 0) + value - |
797 | self._baseline.get(key, 0)) | 766 | self._baseline.get(key, 0)) |
767 | if by_guest: | ||
768 | pid = key.split('-')[0] | ||
769 | if pid in results: | ||
770 | results[pid] += increment | ||
771 | else: | ||
772 | results[pid] = increment | ||
773 | else: | ||
774 | results[field] = increment | ||
798 | 775 | ||
799 | return results | 776 | return results |
800 | 777 | ||
@@ -813,6 +790,11 @@ class DebugfsProvider(object): | |||
813 | self._baseline = {} | 790 | self._baseline = {} |
814 | self.read(1) | 791 | self.read(1) |
815 | 792 | ||
793 | def restore(self): | ||
794 | """Reset field counters""" | ||
795 | self._baseline = {} | ||
796 | self.read(2) | ||
797 | |||
816 | 798 | ||
817 | class Stats(object): | 799 | class Stats(object): |
818 | """Manages the data providers and the data they provide. | 800 | """Manages the data providers and the data they provide. |
@@ -821,33 +803,32 @@ class Stats(object): | |||
821 | provider data. | 803 | provider data. |
822 | 804 | ||
823 | """ | 805 | """ |
824 | def __init__(self, providers, pid, fields=None): | 806 | def __init__(self, options): |
825 | self.providers = providers | 807 | self.providers = self.get_providers(options) |
826 | self._pid_filter = pid | 808 | self._pid_filter = options.pid |
827 | self._fields_filter = fields | 809 | self._fields_filter = options.fields |
828 | self.values = {} | 810 | self.values = {} |
829 | self.update_provider_pid() | 811 | |
830 | self.update_provider_filters() | 812 | @staticmethod |
813 | def get_providers(options): | ||
814 | """Returns a list of data providers depending on the passed options.""" | ||
815 | providers = [] | ||
816 | |||
817 | if options.debugfs: | ||
818 | providers.append(DebugfsProvider(options.pid, options.fields, | ||
819 | options.dbgfs_include_past)) | ||
820 | if options.tracepoints or not providers: | ||
821 | providers.append(TracepointProvider(options.pid, options.fields)) | ||
822 | |||
823 | return providers | ||
831 | 824 | ||
832 | def update_provider_filters(self): | 825 | def update_provider_filters(self): |
833 | """Propagates fields filters to providers.""" | 826 | """Propagates fields filters to providers.""" |
834 | def wanted(key): | ||
835 | if not self._fields_filter: | ||
836 | return True | ||
837 | return re.match(self._fields_filter, key) is not None | ||
838 | |||
839 | # As we reset the counters when updating the fields we can | 827 | # As we reset the counters when updating the fields we can |
840 | # also clear the cache of old values. | 828 | # also clear the cache of old values. |
841 | self.values = {} | 829 | self.values = {} |
842 | for provider in self.providers: | 830 | for provider in self.providers: |
843 | provider_fields = [key for key in provider.get_available_fields() | 831 | provider.update_fields(self._fields_filter) |
844 | if wanted(key)] | ||
845 | provider.fields = provider_fields | ||
846 | |||
847 | def update_provider_pid(self): | ||
848 | """Propagates pid filters to providers.""" | ||
849 | for provider in self.providers: | ||
850 | provider.pid = self._pid_filter | ||
851 | 832 | ||
852 | def reset(self): | 833 | def reset(self): |
853 | self.values = {} | 834 | self.values = {} |
@@ -873,27 +854,52 @@ class Stats(object): | |||
873 | if pid != self._pid_filter: | 854 | if pid != self._pid_filter: |
874 | self._pid_filter = pid | 855 | self._pid_filter = pid |
875 | self.values = {} | 856 | self.values = {} |
876 | self.update_provider_pid() | 857 | for provider in self.providers: |
858 | provider.pid = self._pid_filter | ||
877 | 859 | ||
878 | def get(self): | 860 | def get(self, by_guest=0): |
879 | """Returns a dict with field -> (value, delta to last value) of all | 861 | """Returns a dict with field -> (value, delta to last value) of all |
880 | provider data.""" | 862 | provider data.""" |
881 | for provider in self.providers: | 863 | for provider in self.providers: |
882 | new = provider.read() | 864 | new = provider.read(by_guest=by_guest) |
883 | for key in provider.fields: | 865 | for key in new if by_guest else provider.fields: |
884 | oldval = self.values.get(key, (0, 0))[0] | 866 | oldval = self.values.get(key, (0, 0))[0] |
885 | newval = new.get(key, 0) | 867 | newval = new.get(key, 0) |
886 | newdelta = newval - oldval | 868 | newdelta = newval - oldval |
887 | self.values[key] = (newval, newdelta) | 869 | self.values[key] = (newval, newdelta) |
888 | return self.values | 870 | return self.values |
889 | 871 | ||
890 | LABEL_WIDTH = 40 | 872 | def toggle_display_guests(self, to_pid): |
891 | NUMBER_WIDTH = 10 | 873 | """Toggle between collection of stats by individual event and by |
892 | DELAY_INITIAL = 0.25 | 874 | guest pid |
893 | DELAY_REGULAR = 3.0 | 875 | |
876 | Events reported by DebugfsProvider change when switching to/from | ||
877 | reading by guest values. Hence we have to remove the excess event | ||
878 | names from self.values. | ||
879 | |||
880 | """ | ||
881 | if any(isinstance(ins, TracepointProvider) for ins in self.providers): | ||
882 | return 1 | ||
883 | if to_pid: | ||
884 | for provider in self.providers: | ||
885 | if isinstance(provider, DebugfsProvider): | ||
886 | for key in provider.fields: | ||
887 | if key in self.values.keys(): | ||
888 | del self.values[key] | ||
889 | else: | ||
890 | oldvals = self.values.copy() | ||
891 | for key in oldvals: | ||
892 | if key.isdigit(): | ||
893 | del self.values[key] | ||
894 | # Update oldval (see get()) | ||
895 | self.get(to_pid) | ||
896 | return 0 | ||
897 | |||
898 | DELAY_DEFAULT = 3.0 | ||
894 | MAX_GUEST_NAME_LEN = 48 | 899 | MAX_GUEST_NAME_LEN = 48 |
895 | MAX_REGEX_LEN = 44 | 900 | MAX_REGEX_LEN = 44 |
896 | DEFAULT_REGEX = r'^[^\(]*$' | 901 | DEFAULT_REGEX = r'^[^\(]*$' |
902 | SORT_DEFAULT = 0 | ||
897 | 903 | ||
898 | 904 | ||
899 | class Tui(object): | 905 | class Tui(object): |
@@ -901,7 +907,10 @@ class Tui(object): | |||
901 | def __init__(self, stats): | 907 | def __init__(self, stats): |
902 | self.stats = stats | 908 | self.stats = stats |
903 | self.screen = None | 909 | self.screen = None |
904 | self.update_drilldown() | 910 | self._delay_initial = 0.25 |
911 | self._delay_regular = DELAY_DEFAULT | ||
912 | self._sorting = SORT_DEFAULT | ||
913 | self._display_guests = 0 | ||
905 | 914 | ||
906 | def __enter__(self): | 915 | def __enter__(self): |
907 | """Initialises curses for later use. Based on curses.wrapper | 916 | """Initialises curses for later use. Based on curses.wrapper |
@@ -929,7 +938,7 @@ class Tui(object): | |||
929 | return self | 938 | return self |
930 | 939 | ||
931 | def __exit__(self, *exception): | 940 | def __exit__(self, *exception): |
932 | """Resets the terminal to its normal state. Based on curses.wrappre | 941 | """Resets the terminal to its normal state. Based on curses.wrapper |
933 | implementation from the Python standard library.""" | 942 | implementation from the Python standard library.""" |
934 | if self.screen: | 943 | if self.screen: |
935 | self.screen.keypad(0) | 944 | self.screen.keypad(0) |
@@ -937,6 +946,86 @@ class Tui(object): | |||
937 | curses.nocbreak() | 946 | curses.nocbreak() |
938 | curses.endwin() | 947 | curses.endwin() |
939 | 948 | ||
949 | def get_all_gnames(self): | ||
950 | """Returns a list of (pid, gname) tuples of all running guests""" | ||
951 | res = [] | ||
952 | try: | ||
953 | child = subprocess.Popen(['ps', '-A', '--format', 'pid,args'], | ||
954 | stdout=subprocess.PIPE) | ||
955 | except: | ||
956 | raise Exception | ||
957 | for line in child.stdout: | ||
958 | line = line.lstrip().split(' ', 1) | ||
959 | # perform a sanity check before calling the more expensive | ||
960 | # function to possibly extract the guest name | ||
961 | if ' -name ' in line[1]: | ||
962 | res.append((line[0], self.get_gname_from_pid(line[0]))) | ||
963 | child.stdout.close() | ||
964 | |||
965 | return res | ||
966 | |||
967 | def print_all_gnames(self, row): | ||
968 | """Print a list of all running guests along with their pids.""" | ||
969 | self.screen.addstr(row, 2, '%8s %-60s' % | ||
970 | ('Pid', 'Guest Name (fuzzy list, might be ' | ||
971 | 'inaccurate!)'), | ||
972 | curses.A_UNDERLINE) | ||
973 | row += 1 | ||
974 | try: | ||
975 | for line in self.get_all_gnames(): | ||
976 | self.screen.addstr(row, 2, '%8s %-60s' % (line[0], line[1])) | ||
977 | row += 1 | ||
978 | if row >= self.screen.getmaxyx()[0]: | ||
979 | break | ||
980 | except Exception: | ||
981 | self.screen.addstr(row + 1, 2, 'Not available') | ||
982 | |||
983 | def get_pid_from_gname(self, gname): | ||
984 | """Fuzzy function to convert guest name to QEMU process pid. | ||
985 | |||
986 | Returns a list of potential pids, can be empty if no match found. | ||
987 | Throws an exception on processing errors. | ||
988 | |||
989 | """ | ||
990 | pids = [] | ||
991 | for line in self.get_all_gnames(): | ||
992 | if gname == line[1]: | ||
993 | pids.append(int(line[0])) | ||
994 | |||
995 | return pids | ||
996 | |||
997 | @staticmethod | ||
998 | def get_gname_from_pid(pid): | ||
999 | """Returns the guest name for a QEMU process pid. | ||
1000 | |||
1001 | Extracts the guest name from the QEMU comma line by processing the | ||
1002 | '-name' option. Will also handle names specified out of sequence. | ||
1003 | |||
1004 | """ | ||
1005 | name = '' | ||
1006 | try: | ||
1007 | line = open('/proc/{}/cmdline' | ||
1008 | .format(pid), 'rb').read().split('\0') | ||
1009 | parms = line[line.index('-name') + 1].split(',') | ||
1010 | while '' in parms: | ||
1011 | # commas are escaped (i.e. ',,'), hence e.g. 'foo,bar' results | ||
1012 | # in # ['foo', '', 'bar'], which we revert here | ||
1013 | idx = parms.index('') | ||
1014 | parms[idx - 1] += ',' + parms[idx + 1] | ||
1015 | del parms[idx:idx+2] | ||
1016 | # the '-name' switch allows for two ways to specify the guest name, | ||
1017 | # where the plain name overrides the name specified via 'guest=' | ||
1018 | for arg in parms: | ||
1019 | if '=' not in arg: | ||
1020 | name = arg | ||
1021 | break | ||
1022 | if arg[:6] == 'guest=': | ||
1023 | name = arg[6:] | ||
1024 | except (ValueError, IOError, IndexError): | ||
1025 | pass | ||
1026 | |||
1027 | return name | ||
1028 | |||
940 | def update_drilldown(self): | 1029 | def update_drilldown(self): |
941 | """Sets or removes a filter that only allows fields without braces.""" | 1030 | """Sets or removes a filter that only allows fields without braces.""" |
942 | if not self.stats.fields_filter: | 1031 | if not self.stats.fields_filter: |
@@ -954,7 +1043,7 @@ class Tui(object): | |||
954 | if pid is None: | 1043 | if pid is None: |
955 | pid = self.stats.pid_filter | 1044 | pid = self.stats.pid_filter |
956 | self.screen.erase() | 1045 | self.screen.erase() |
957 | gname = get_gname_from_pid(pid) | 1046 | gname = self.get_gname_from_pid(pid) |
958 | if gname: | 1047 | if gname: |
959 | gname = ('({})'.format(gname[:MAX_GUEST_NAME_LEN] + '...' | 1048 | gname = ('({})'.format(gname[:MAX_GUEST_NAME_LEN] + '...' |
960 | if len(gname) > MAX_GUEST_NAME_LEN | 1049 | if len(gname) > MAX_GUEST_NAME_LEN |
@@ -970,13 +1059,13 @@ class Tui(object): | |||
970 | if len(regex) > MAX_REGEX_LEN: | 1059 | if len(regex) > MAX_REGEX_LEN: |
971 | regex = regex[:MAX_REGEX_LEN] + '...' | 1060 | regex = regex[:MAX_REGEX_LEN] + '...' |
972 | self.screen.addstr(1, 17, 'regex filter: {0}'.format(regex)) | 1061 | self.screen.addstr(1, 17, 'regex filter: {0}'.format(regex)) |
973 | self.screen.addstr(2, 1, 'Event') | 1062 | if self._display_guests: |
974 | self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH - | 1063 | col_name = 'Guest Name' |
975 | len('Total'), 'Total') | 1064 | else: |
976 | self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH + 7 - | 1065 | col_name = 'Event' |
977 | len('%Total'), '%Total') | 1066 | self.screen.addstr(2, 1, '%-40s %10s%7s %8s' % |
978 | self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH + 7 + 8 - | 1067 | (col_name, 'Total', '%Total', 'CurAvg/s'), |
979 | len('Current'), 'Current') | 1068 | curses.A_STANDOUT) |
980 | self.screen.addstr(4, 1, 'Collecting data...') | 1069 | self.screen.addstr(4, 1, 'Collecting data...') |
981 | self.screen.refresh() | 1070 | self.screen.refresh() |
982 | 1071 | ||
@@ -984,16 +1073,25 @@ class Tui(object): | |||
984 | row = 3 | 1073 | row = 3 |
985 | self.screen.move(row, 0) | 1074 | self.screen.move(row, 0) |
986 | self.screen.clrtobot() | 1075 | self.screen.clrtobot() |
987 | stats = self.stats.get() | 1076 | stats = self.stats.get(self._display_guests) |
988 | 1077 | ||
989 | def sortkey(x): | 1078 | def sortCurAvg(x): |
1079 | # sort by current events if available | ||
990 | if stats[x][1]: | 1080 | if stats[x][1]: |
991 | return (-stats[x][1], -stats[x][0]) | 1081 | return (-stats[x][1], -stats[x][0]) |
992 | else: | 1082 | else: |
993 | return (0, -stats[x][0]) | 1083 | return (0, -stats[x][0]) |
1084 | |||
1085 | def sortTotal(x): | ||
1086 | # sort by totals | ||
1087 | return (0, -stats[x][0]) | ||
994 | total = 0. | 1088 | total = 0. |
995 | for val in stats.values(): | 1089 | for val in stats.values(): |
996 | total += val[0] | 1090 | total += val[0] |
1091 | if self._sorting == SORT_DEFAULT: | ||
1092 | sortkey = sortCurAvg | ||
1093 | else: | ||
1094 | sortkey = sortTotal | ||
997 | for key in sorted(stats.keys(), key=sortkey): | 1095 | for key in sorted(stats.keys(), key=sortkey): |
998 | 1096 | ||
999 | if row >= self.screen.getmaxyx()[0]: | 1097 | if row >= self.screen.getmaxyx()[0]: |
@@ -1001,18 +1099,61 @@ class Tui(object): | |||
1001 | values = stats[key] | 1099 | values = stats[key] |
1002 | if not values[0] and not values[1]: | 1100 | if not values[0] and not values[1]: |
1003 | break | 1101 | break |
1004 | col = 1 | 1102 | if values[0] is not None: |
1005 | self.screen.addstr(row, col, key) | 1103 | cur = int(round(values[1] / sleeptime)) if values[1] else '' |
1006 | col += LABEL_WIDTH | 1104 | if self._display_guests: |
1007 | self.screen.addstr(row, col, '%10d' % (values[0],)) | 1105 | key = self.get_gname_from_pid(key) |
1008 | col += NUMBER_WIDTH | 1106 | self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' % |
1009 | self.screen.addstr(row, col, '%7.1f' % (values[0] * 100 / total,)) | 1107 | (key, values[0], values[0] * 100 / total, |
1010 | col += 7 | 1108 | cur)) |
1011 | if values[1] is not None: | ||
1012 | self.screen.addstr(row, col, '%8d' % (values[1] / sleeptime,)) | ||
1013 | row += 1 | 1109 | row += 1 |
1110 | if row == 3: | ||
1111 | self.screen.addstr(4, 1, 'No matching events reported yet') | ||
1014 | self.screen.refresh() | 1112 | self.screen.refresh() |
1015 | 1113 | ||
1114 | def show_msg(self, text): | ||
1115 | """Display message centered text and exit on key press""" | ||
1116 | hint = 'Press any key to continue' | ||
1117 | curses.cbreak() | ||
1118 | self.screen.erase() | ||
1119 | (x, term_width) = self.screen.getmaxyx() | ||
1120 | row = 2 | ||
1121 | for line in text: | ||
1122 | start = (term_width - len(line)) / 2 | ||
1123 | self.screen.addstr(row, start, line) | ||
1124 | row += 1 | ||
1125 | self.screen.addstr(row + 1, (term_width - len(hint)) / 2, hint, | ||
1126 | curses.A_STANDOUT) | ||
1127 | self.screen.getkey() | ||
1128 | |||
1129 | def show_help_interactive(self): | ||
1130 | """Display help with list of interactive commands""" | ||
1131 | msg = (' b toggle events by guests (debugfs only, honors' | ||
1132 | ' filters)', | ||
1133 | ' c clear filter', | ||
1134 | ' f filter by regular expression', | ||
1135 | ' g filter by guest name', | ||
1136 | ' h display interactive commands reference', | ||
1137 | ' o toggle sorting order (Total vs CurAvg/s)', | ||
1138 | ' p filter by PID', | ||
1139 | ' q quit', | ||
1140 | ' r reset stats', | ||
1141 | ' s set update interval', | ||
1142 | ' x toggle reporting of stats for individual child trace' | ||
1143 | ' events', | ||
1144 | 'Any other key refreshes statistics immediately') | ||
1145 | curses.cbreak() | ||
1146 | self.screen.erase() | ||
1147 | self.screen.addstr(0, 0, "Interactive commands reference", | ||
1148 | curses.A_BOLD) | ||
1149 | self.screen.addstr(2, 0, "Press any key to exit", curses.A_STANDOUT) | ||
1150 | row = 4 | ||
1151 | for line in msg: | ||
1152 | self.screen.addstr(row, 0, line) | ||
1153 | row += 1 | ||
1154 | self.screen.getkey() | ||
1155 | self.refresh_header() | ||
1156 | |||
1016 | def show_filter_selection(self): | 1157 | def show_filter_selection(self): |
1017 | """Draws filter selection mask. | 1158 | """Draws filter selection mask. |
1018 | 1159 | ||
@@ -1059,6 +1200,7 @@ class Tui(object): | |||
1059 | 'This might limit the shown data to the trace ' | 1200 | 'This might limit the shown data to the trace ' |
1060 | 'statistics.') | 1201 | 'statistics.') |
1061 | self.screen.addstr(5, 0, msg) | 1202 | self.screen.addstr(5, 0, msg) |
1203 | self.print_all_gnames(7) | ||
1062 | 1204 | ||
1063 | curses.echo() | 1205 | curses.echo() |
1064 | self.screen.addstr(3, 0, "Pid [0 or pid]: ") | 1206 | self.screen.addstr(3, 0, "Pid [0 or pid]: ") |
@@ -1077,10 +1219,40 @@ class Tui(object): | |||
1077 | self.refresh_header(pid) | 1219 | self.refresh_header(pid) |
1078 | self.update_pid(pid) | 1220 | self.update_pid(pid) |
1079 | break | 1221 | break |
1080 | |||
1081 | except ValueError: | 1222 | except ValueError: |
1082 | msg = '"' + str(pid) + '": Not a valid pid' | 1223 | msg = '"' + str(pid) + '": Not a valid pid' |
1083 | continue | 1224 | |
1225 | def show_set_update_interval(self): | ||
1226 | """Draws update interval selection mask.""" | ||
1227 | msg = '' | ||
1228 | while True: | ||
1229 | self.screen.erase() | ||
1230 | self.screen.addstr(0, 0, 'Set update interval (defaults to %fs).' % | ||
1231 | DELAY_DEFAULT, curses.A_BOLD) | ||
1232 | self.screen.addstr(4, 0, msg) | ||
1233 | self.screen.addstr(2, 0, 'Change delay from %.1fs to ' % | ||
1234 | self._delay_regular) | ||
1235 | curses.echo() | ||
1236 | val = self.screen.getstr() | ||
1237 | curses.noecho() | ||
1238 | |||
1239 | try: | ||
1240 | if len(val) > 0: | ||
1241 | delay = float(val) | ||
1242 | if delay < 0.1: | ||
1243 | msg = '"' + str(val) + '": Value must be >=0.1' | ||
1244 | continue | ||
1245 | if delay > 25.5: | ||
1246 | msg = '"' + str(val) + '": Value must be <=25.5' | ||
1247 | continue | ||
1248 | else: | ||
1249 | delay = DELAY_DEFAULT | ||
1250 | self._delay_regular = delay | ||
1251 | break | ||
1252 | |||
1253 | except ValueError: | ||
1254 | msg = '"' + str(val) + '": Invalid value' | ||
1255 | self.refresh_header() | ||
1084 | 1256 | ||
1085 | def show_vm_selection_by_guest_name(self): | 1257 | def show_vm_selection_by_guest_name(self): |
1086 | """Draws guest selection mask. | 1258 | """Draws guest selection mask. |
@@ -1098,6 +1270,7 @@ class Tui(object): | |||
1098 | 'This might limit the shown data to the trace ' | 1270 | 'This might limit the shown data to the trace ' |
1099 | 'statistics.') | 1271 | 'statistics.') |
1100 | self.screen.addstr(5, 0, msg) | 1272 | self.screen.addstr(5, 0, msg) |
1273 | self.print_all_gnames(7) | ||
1101 | curses.echo() | 1274 | curses.echo() |
1102 | self.screen.addstr(3, 0, "Guest [ENTER or guest]: ") | 1275 | self.screen.addstr(3, 0, "Guest [ENTER or guest]: ") |
1103 | gname = self.screen.getstr() | 1276 | gname = self.screen.getstr() |
@@ -1110,7 +1283,7 @@ class Tui(object): | |||
1110 | else: | 1283 | else: |
1111 | pids = [] | 1284 | pids = [] |
1112 | try: | 1285 | try: |
1113 | pids = get_pid_from_gname(gname) | 1286 | pids = self.get_pid_from_gname(gname) |
1114 | except: | 1287 | except: |
1115 | msg = '"' + gname + '": Internal error while searching, ' \ | 1288 | msg = '"' + gname + '": Internal error while searching, ' \ |
1116 | 'use pid filter instead' | 1289 | 'use pid filter instead' |
@@ -1128,38 +1301,60 @@ class Tui(object): | |||
1128 | 1301 | ||
1129 | def show_stats(self): | 1302 | def show_stats(self): |
1130 | """Refreshes the screen and processes user input.""" | 1303 | """Refreshes the screen and processes user input.""" |
1131 | sleeptime = DELAY_INITIAL | 1304 | sleeptime = self._delay_initial |
1132 | self.refresh_header() | 1305 | self.refresh_header() |
1306 | start = 0.0 # result based on init value never appears on screen | ||
1133 | while True: | 1307 | while True: |
1134 | self.refresh_body(sleeptime) | 1308 | self.refresh_body(time.time() - start) |
1135 | curses.halfdelay(int(sleeptime * 10)) | 1309 | curses.halfdelay(int(sleeptime * 10)) |
1136 | sleeptime = DELAY_REGULAR | 1310 | start = time.time() |
1311 | sleeptime = self._delay_regular | ||
1137 | try: | 1312 | try: |
1138 | char = self.screen.getkey() | 1313 | char = self.screen.getkey() |
1139 | if char == 'x': | 1314 | if char == 'b': |
1315 | self._display_guests = not self._display_guests | ||
1316 | if self.stats.toggle_display_guests(self._display_guests): | ||
1317 | self.show_msg(['Command not available with tracepoints' | ||
1318 | ' enabled', 'Restart with debugfs only ' | ||
1319 | '(see option \'-d\') and try again!']) | ||
1320 | self._display_guests = not self._display_guests | ||
1140 | self.refresh_header() | 1321 | self.refresh_header() |
1141 | self.update_drilldown() | ||
1142 | sleeptime = DELAY_INITIAL | ||
1143 | if char == 'q': | ||
1144 | break | ||
1145 | if char == 'c': | 1322 | if char == 'c': |
1146 | self.stats.fields_filter = DEFAULT_REGEX | 1323 | self.stats.fields_filter = DEFAULT_REGEX |
1147 | self.refresh_header(0) | 1324 | self.refresh_header(0) |
1148 | self.update_pid(0) | 1325 | self.update_pid(0) |
1149 | sleeptime = DELAY_INITIAL | ||
1150 | if char == 'f': | 1326 | if char == 'f': |
1327 | curses.curs_set(1) | ||
1151 | self.show_filter_selection() | 1328 | self.show_filter_selection() |
1152 | sleeptime = DELAY_INITIAL | 1329 | curses.curs_set(0) |
1330 | sleeptime = self._delay_initial | ||
1153 | if char == 'g': | 1331 | if char == 'g': |
1332 | curses.curs_set(1) | ||
1154 | self.show_vm_selection_by_guest_name() | 1333 | self.show_vm_selection_by_guest_name() |
1155 | sleeptime = DELAY_INITIAL | 1334 | curses.curs_set(0) |
1335 | sleeptime = self._delay_initial | ||
1336 | if char == 'h': | ||
1337 | self.show_help_interactive() | ||
1338 | if char == 'o': | ||
1339 | self._sorting = not self._sorting | ||
1156 | if char == 'p': | 1340 | if char == 'p': |
1341 | curses.curs_set(1) | ||
1157 | self.show_vm_selection_by_pid() | 1342 | self.show_vm_selection_by_pid() |
1158 | sleeptime = DELAY_INITIAL | 1343 | curses.curs_set(0) |
1344 | sleeptime = self._delay_initial | ||
1345 | if char == 'q': | ||
1346 | break | ||
1159 | if char == 'r': | 1347 | if char == 'r': |
1160 | self.refresh_header() | ||
1161 | self.stats.reset() | 1348 | self.stats.reset() |
1162 | sleeptime = DELAY_INITIAL | 1349 | if char == 's': |
1350 | curses.curs_set(1) | ||
1351 | self.show_set_update_interval() | ||
1352 | curses.curs_set(0) | ||
1353 | sleeptime = self._delay_initial | ||
1354 | if char == 'x': | ||
1355 | self.update_drilldown() | ||
1356 | # prevents display of current values on next refresh | ||
1357 | self.stats.get() | ||
1163 | except KeyboardInterrupt: | 1358 | except KeyboardInterrupt: |
1164 | break | 1359 | break |
1165 | except curses.error: | 1360 | except curses.error: |
@@ -1227,13 +1422,17 @@ Requirements: | |||
1227 | the large number of files that are possibly opened. | 1422 | the large number of files that are possibly opened. |
1228 | 1423 | ||
1229 | Interactive Commands: | 1424 | Interactive Commands: |
1425 | b toggle events by guests (debugfs only, honors filters) | ||
1230 | c clear filter | 1426 | c clear filter |
1231 | f filter by regular expression | 1427 | f filter by regular expression |
1232 | g filter by guest name | 1428 | g filter by guest name |
1429 | h display interactive commands reference | ||
1430 | o toggle sorting order (Total vs CurAvg/s) | ||
1233 | p filter by PID | 1431 | p filter by PID |
1234 | q quit | 1432 | q quit |
1235 | x toggle reporting of stats for individual child trace events | ||
1236 | r reset stats | 1433 | r reset stats |
1434 | s set update interval | ||
1435 | x toggle reporting of stats for individual child trace events | ||
1237 | Press any other key to refresh statistics immediately. | 1436 | Press any other key to refresh statistics immediately. |
1238 | """ | 1437 | """ |
1239 | 1438 | ||
@@ -1246,7 +1445,7 @@ Press any other key to refresh statistics immediately. | |||
1246 | 1445 | ||
1247 | def cb_guest_to_pid(option, opt, val, parser): | 1446 | def cb_guest_to_pid(option, opt, val, parser): |
1248 | try: | 1447 | try: |
1249 | pids = get_pid_from_gname(val) | 1448 | pids = Tui.get_pid_from_gname(val) |
1250 | except: | 1449 | except: |
1251 | raise optparse.OptionValueError('Error while searching for guest ' | 1450 | raise optparse.OptionValueError('Error while searching for guest ' |
1252 | '"{}", use "-p" to specify a pid ' | 1451 | '"{}", use "-p" to specify a pid ' |
@@ -1268,6 +1467,13 @@ Press any other key to refresh statistics immediately. | |||
1268 | dest='once', | 1467 | dest='once', |
1269 | help='run in batch mode for one second', | 1468 | help='run in batch mode for one second', |
1270 | ) | 1469 | ) |
1470 | optparser.add_option('-i', '--debugfs-include-past', | ||
1471 | action='store_true', | ||
1472 | default=False, | ||
1473 | dest='dbgfs_include_past', | ||
1474 | help='include all available data on past events for ' | ||
1475 | 'debugfs', | ||
1476 | ) | ||
1271 | optparser.add_option('-l', '--log', | 1477 | optparser.add_option('-l', '--log', |
1272 | action='store_true', | 1478 | action='store_true', |
1273 | default=False, | 1479 | default=False, |
@@ -1288,7 +1494,7 @@ Press any other key to refresh statistics immediately. | |||
1288 | ) | 1494 | ) |
1289 | optparser.add_option('-f', '--fields', | 1495 | optparser.add_option('-f', '--fields', |
1290 | action='store', | 1496 | action='store', |
1291 | default=None, | 1497 | default=DEFAULT_REGEX, |
1292 | dest='fields', | 1498 | dest='fields', |
1293 | help='fields to display (regex)', | 1499 | help='fields to display (regex)', |
1294 | ) | 1500 | ) |
@@ -1311,20 +1517,6 @@ Press any other key to refresh statistics immediately. | |||
1311 | return options | 1517 | return options |
1312 | 1518 | ||
1313 | 1519 | ||
1314 | def get_providers(options): | ||
1315 | """Returns a list of data providers depending on the passed options.""" | ||
1316 | providers = [] | ||
1317 | |||
1318 | if options.tracepoints: | ||
1319 | providers.append(TracepointProvider()) | ||
1320 | if options.debugfs: | ||
1321 | providers.append(DebugfsProvider()) | ||
1322 | if len(providers) == 0: | ||
1323 | providers.append(TracepointProvider()) | ||
1324 | |||
1325 | return providers | ||
1326 | |||
1327 | |||
1328 | def check_access(options): | 1520 | def check_access(options): |
1329 | """Exits if the current user can't access all needed directories.""" | 1521 | """Exits if the current user can't access all needed directories.""" |
1330 | if not os.path.exists('/sys/kernel/debug'): | 1522 | if not os.path.exists('/sys/kernel/debug'): |
@@ -1365,8 +1557,7 @@ def main(): | |||
1365 | sys.stderr.write('Did you use a (unsupported) tid instead of a pid?\n') | 1557 | sys.stderr.write('Did you use a (unsupported) tid instead of a pid?\n') |
1366 | sys.exit('Specified pid does not exist.') | 1558 | sys.exit('Specified pid does not exist.') |
1367 | 1559 | ||
1368 | providers = get_providers(options) | 1560 | stats = Stats(options) |
1369 | stats = Stats(providers, options.pid, fields=options.fields) | ||
1370 | 1561 | ||
1371 | if options.log: | 1562 | if options.log: |
1372 | log(stats) | 1563 | log(stats) |
diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt index 109431bdc63c..e5cf836be8a1 100644 --- a/tools/kvm/kvm_stat/kvm_stat.txt +++ b/tools/kvm/kvm_stat/kvm_stat.txt | |||
@@ -29,18 +29,26 @@ meaning of events. | |||
29 | INTERACTIVE COMMANDS | 29 | INTERACTIVE COMMANDS |
30 | -------------------- | 30 | -------------------- |
31 | [horizontal] | 31 | [horizontal] |
32 | *b*:: toggle events by guests (debugfs only, honors filters) | ||
33 | |||
32 | *c*:: clear filter | 34 | *c*:: clear filter |
33 | 35 | ||
34 | *f*:: filter by regular expression | 36 | *f*:: filter by regular expression |
35 | 37 | ||
36 | *g*:: filter by guest name | 38 | *g*:: filter by guest name |
37 | 39 | ||
40 | *h*:: display interactive commands reference | ||
41 | |||
42 | *o*:: toggle sorting order (Total vs CurAvg/s) | ||
43 | |||
38 | *p*:: filter by PID | 44 | *p*:: filter by PID |
39 | 45 | ||
40 | *q*:: quit | 46 | *q*:: quit |
41 | 47 | ||
42 | *r*:: reset stats | 48 | *r*:: reset stats |
43 | 49 | ||
50 | *s*:: set update interval | ||
51 | |||
44 | *x*:: toggle reporting of stats for child trace events | 52 | *x*:: toggle reporting of stats for child trace events |
45 | 53 | ||
46 | Press any other key to refresh statistics immediately. | 54 | Press any other key to refresh statistics immediately. |
@@ -64,6 +72,10 @@ OPTIONS | |||
64 | --debugfs:: | 72 | --debugfs:: |
65 | retrieve statistics from debugfs | 73 | retrieve statistics from debugfs |
66 | 74 | ||
75 | -i:: | ||
76 | --debugfs-include-past:: | ||
77 | include all available data on past events for debugfs | ||
78 | |||
67 | -p<pid>:: | 79 | -p<pid>:: |
68 | --pid=<pid>:: | 80 | --pid=<pid>:: |
69 | limit statistics to one virtual machine (pid) | 81 | limit statistics to one virtual machine (pid) |
diff --git a/virt/kvm/arm/aarch32.c b/virt/kvm/arm/aarch32.c index 528af4b2d09e..79c7c357804b 100644 --- a/virt/kvm/arm/aarch32.c +++ b/virt/kvm/arm/aarch32.c | |||
@@ -60,7 +60,7 @@ static const unsigned short cc_map[16] = { | |||
60 | /* | 60 | /* |
61 | * Check if a trapped instruction should have been executed or not. | 61 | * Check if a trapped instruction should have been executed or not. |
62 | */ | 62 | */ |
63 | bool kvm_condition_valid32(const struct kvm_vcpu *vcpu) | 63 | bool __hyp_text kvm_condition_valid32(const struct kvm_vcpu *vcpu) |
64 | { | 64 | { |
65 | unsigned long cpsr; | 65 | unsigned long cpsr; |
66 | u32 cpsr_cond; | 66 | u32 cpsr_cond; |
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 5976609ef27c..8e89d63005c7 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/kvm_host.h> | 21 | #include <linux/kvm_host.h> |
22 | #include <linux/interrupt.h> | 22 | #include <linux/interrupt.h> |
23 | #include <linux/irq.h> | 23 | #include <linux/irq.h> |
24 | #include <linux/uaccess.h> | ||
24 | 25 | ||
25 | #include <clocksource/arm_arch_timer.h> | 26 | #include <clocksource/arm_arch_timer.h> |
26 | #include <asm/arch_timer.h> | 27 | #include <asm/arch_timer.h> |
@@ -35,6 +36,16 @@ static struct timecounter *timecounter; | |||
35 | static unsigned int host_vtimer_irq; | 36 | static unsigned int host_vtimer_irq; |
36 | static u32 host_vtimer_irq_flags; | 37 | static u32 host_vtimer_irq_flags; |
37 | 38 | ||
39 | static const struct kvm_irq_level default_ptimer_irq = { | ||
40 | .irq = 30, | ||
41 | .level = 1, | ||
42 | }; | ||
43 | |||
44 | static const struct kvm_irq_level default_vtimer_irq = { | ||
45 | .irq = 27, | ||
46 | .level = 1, | ||
47 | }; | ||
48 | |||
38 | void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) | 49 | void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) |
39 | { | 50 | { |
40 | vcpu_vtimer(vcpu)->active_cleared_last = false; | 51 | vcpu_vtimer(vcpu)->active_cleared_last = false; |
@@ -95,7 +106,7 @@ static void kvm_timer_inject_irq_work(struct work_struct *work) | |||
95 | * If the vcpu is blocked we want to wake it up so that it will see | 106 | * If the vcpu is blocked we want to wake it up so that it will see |
96 | * the timer has expired when entering the guest. | 107 | * the timer has expired when entering the guest. |
97 | */ | 108 | */ |
98 | kvm_vcpu_kick(vcpu); | 109 | kvm_vcpu_wake_up(vcpu); |
99 | } | 110 | } |
100 | 111 | ||
101 | static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx) | 112 | static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx) |
@@ -215,7 +226,8 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, | |||
215 | if (likely(irqchip_in_kernel(vcpu->kvm))) { | 226 | if (likely(irqchip_in_kernel(vcpu->kvm))) { |
216 | ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, | 227 | ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, |
217 | timer_ctx->irq.irq, | 228 | timer_ctx->irq.irq, |
218 | timer_ctx->irq.level); | 229 | timer_ctx->irq.level, |
230 | timer_ctx); | ||
219 | WARN_ON(ret); | 231 | WARN_ON(ret); |
220 | } | 232 | } |
221 | } | 233 | } |
@@ -445,23 +457,12 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) | |||
445 | kvm_timer_update_state(vcpu); | 457 | kvm_timer_update_state(vcpu); |
446 | } | 458 | } |
447 | 459 | ||
448 | int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, | 460 | int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) |
449 | const struct kvm_irq_level *virt_irq, | ||
450 | const struct kvm_irq_level *phys_irq) | ||
451 | { | 461 | { |
452 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | 462 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); |
453 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | 463 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); |
454 | 464 | ||
455 | /* | 465 | /* |
456 | * The vcpu timer irq number cannot be determined in | ||
457 | * kvm_timer_vcpu_init() because it is called much before | ||
458 | * kvm_vcpu_set_target(). To handle this, we determine | ||
459 | * vcpu timer irq number when the vcpu is reset. | ||
460 | */ | ||
461 | vtimer->irq.irq = virt_irq->irq; | ||
462 | ptimer->irq.irq = phys_irq->irq; | ||
463 | |||
464 | /* | ||
465 | * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 | 466 | * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 |
466 | * and to 0 for ARMv7. We provide an implementation that always | 467 | * and to 0 for ARMv7. We provide an implementation that always |
467 | * resets the timer to be disabled and unmasked and is compliant with | 468 | * resets the timer to be disabled and unmasked and is compliant with |
@@ -496,6 +497,8 @@ static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff) | |||
496 | void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) | 497 | void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) |
497 | { | 498 | { |
498 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 499 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; |
500 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | ||
501 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | ||
499 | 502 | ||
500 | /* Synchronize cntvoff across all vtimers of a VM. */ | 503 | /* Synchronize cntvoff across all vtimers of a VM. */ |
501 | update_vtimer_cntvoff(vcpu, kvm_phys_timer_read()); | 504 | update_vtimer_cntvoff(vcpu, kvm_phys_timer_read()); |
@@ -504,6 +507,9 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) | |||
504 | INIT_WORK(&timer->expired, kvm_timer_inject_irq_work); | 507 | INIT_WORK(&timer->expired, kvm_timer_inject_irq_work); |
505 | hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | 508 | hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); |
506 | timer->timer.function = kvm_timer_expire; | 509 | timer->timer.function = kvm_timer_expire; |
510 | |||
511 | vtimer->irq.irq = default_vtimer_irq.irq; | ||
512 | ptimer->irq.irq = default_ptimer_irq.irq; | ||
507 | } | 513 | } |
508 | 514 | ||
509 | static void kvm_timer_init_interrupt(void *info) | 515 | static void kvm_timer_init_interrupt(void *info) |
@@ -613,6 +619,30 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) | |||
613 | kvm_vgic_unmap_phys_irq(vcpu, vtimer->irq.irq); | 619 | kvm_vgic_unmap_phys_irq(vcpu, vtimer->irq.irq); |
614 | } | 620 | } |
615 | 621 | ||
622 | static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu) | ||
623 | { | ||
624 | int vtimer_irq, ptimer_irq; | ||
625 | int i, ret; | ||
626 | |||
627 | vtimer_irq = vcpu_vtimer(vcpu)->irq.irq; | ||
628 | ret = kvm_vgic_set_owner(vcpu, vtimer_irq, vcpu_vtimer(vcpu)); | ||
629 | if (ret) | ||
630 | return false; | ||
631 | |||
632 | ptimer_irq = vcpu_ptimer(vcpu)->irq.irq; | ||
633 | ret = kvm_vgic_set_owner(vcpu, ptimer_irq, vcpu_ptimer(vcpu)); | ||
634 | if (ret) | ||
635 | return false; | ||
636 | |||
637 | kvm_for_each_vcpu(i, vcpu, vcpu->kvm) { | ||
638 | if (vcpu_vtimer(vcpu)->irq.irq != vtimer_irq || | ||
639 | vcpu_ptimer(vcpu)->irq.irq != ptimer_irq) | ||
640 | return false; | ||
641 | } | ||
642 | |||
643 | return true; | ||
644 | } | ||
645 | |||
616 | int kvm_timer_enable(struct kvm_vcpu *vcpu) | 646 | int kvm_timer_enable(struct kvm_vcpu *vcpu) |
617 | { | 647 | { |
618 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 648 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; |
@@ -632,6 +662,11 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) | |||
632 | if (!vgic_initialized(vcpu->kvm)) | 662 | if (!vgic_initialized(vcpu->kvm)) |
633 | return -ENODEV; | 663 | return -ENODEV; |
634 | 664 | ||
665 | if (!timer_irqs_are_valid(vcpu)) { | ||
666 | kvm_debug("incorrectly configured timer irqs\n"); | ||
667 | return -EINVAL; | ||
668 | } | ||
669 | |||
635 | /* | 670 | /* |
636 | * Find the physical IRQ number corresponding to the host_vtimer_irq | 671 | * Find the physical IRQ number corresponding to the host_vtimer_irq |
637 | */ | 672 | */ |
@@ -681,3 +716,79 @@ void kvm_timer_init_vhe(void) | |||
681 | val |= (CNTHCTL_EL1PCTEN << cnthctl_shift); | 716 | val |= (CNTHCTL_EL1PCTEN << cnthctl_shift); |
682 | write_sysreg(val, cnthctl_el2); | 717 | write_sysreg(val, cnthctl_el2); |
683 | } | 718 | } |
719 | |||
720 | static void set_timer_irqs(struct kvm *kvm, int vtimer_irq, int ptimer_irq) | ||
721 | { | ||
722 | struct kvm_vcpu *vcpu; | ||
723 | int i; | ||
724 | |||
725 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
726 | vcpu_vtimer(vcpu)->irq.irq = vtimer_irq; | ||
727 | vcpu_ptimer(vcpu)->irq.irq = ptimer_irq; | ||
728 | } | ||
729 | } | ||
730 | |||
731 | int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) | ||
732 | { | ||
733 | int __user *uaddr = (int __user *)(long)attr->addr; | ||
734 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | ||
735 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | ||
736 | int irq; | ||
737 | |||
738 | if (!irqchip_in_kernel(vcpu->kvm)) | ||
739 | return -EINVAL; | ||
740 | |||
741 | if (get_user(irq, uaddr)) | ||
742 | return -EFAULT; | ||
743 | |||
744 | if (!(irq_is_ppi(irq))) | ||
745 | return -EINVAL; | ||
746 | |||
747 | if (vcpu->arch.timer_cpu.enabled) | ||
748 | return -EBUSY; | ||
749 | |||
750 | switch (attr->attr) { | ||
751 | case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: | ||
752 | set_timer_irqs(vcpu->kvm, irq, ptimer->irq.irq); | ||
753 | break; | ||
754 | case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: | ||
755 | set_timer_irqs(vcpu->kvm, vtimer->irq.irq, irq); | ||
756 | break; | ||
757 | default: | ||
758 | return -ENXIO; | ||
759 | } | ||
760 | |||
761 | return 0; | ||
762 | } | ||
763 | |||
764 | int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) | ||
765 | { | ||
766 | int __user *uaddr = (int __user *)(long)attr->addr; | ||
767 | struct arch_timer_context *timer; | ||
768 | int irq; | ||
769 | |||
770 | switch (attr->attr) { | ||
771 | case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: | ||
772 | timer = vcpu_vtimer(vcpu); | ||
773 | break; | ||
774 | case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: | ||
775 | timer = vcpu_ptimer(vcpu); | ||
776 | break; | ||
777 | default: | ||
778 | return -ENXIO; | ||
779 | } | ||
780 | |||
781 | irq = timer->irq.irq; | ||
782 | return put_user(irq, uaddr); | ||
783 | } | ||
784 | |||
785 | int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) | ||
786 | { | ||
787 | switch (attr->attr) { | ||
788 | case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: | ||
789 | case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: | ||
790 | return 0; | ||
791 | } | ||
792 | |||
793 | return -ENXIO; | ||
794 | } | ||
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 3417e184c8e1..a39a1e161e63 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c | |||
@@ -368,6 +368,13 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | |||
368 | kvm_timer_vcpu_put(vcpu); | 368 | kvm_timer_vcpu_put(vcpu); |
369 | } | 369 | } |
370 | 370 | ||
371 | static void vcpu_power_off(struct kvm_vcpu *vcpu) | ||
372 | { | ||
373 | vcpu->arch.power_off = true; | ||
374 | kvm_make_request(KVM_REQ_SLEEP, vcpu); | ||
375 | kvm_vcpu_kick(vcpu); | ||
376 | } | ||
377 | |||
371 | int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, | 378 | int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, |
372 | struct kvm_mp_state *mp_state) | 379 | struct kvm_mp_state *mp_state) |
373 | { | 380 | { |
@@ -387,7 +394,7 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, | |||
387 | vcpu->arch.power_off = false; | 394 | vcpu->arch.power_off = false; |
388 | break; | 395 | break; |
389 | case KVM_MP_STATE_STOPPED: | 396 | case KVM_MP_STATE_STOPPED: |
390 | vcpu->arch.power_off = true; | 397 | vcpu_power_off(vcpu); |
391 | break; | 398 | break; |
392 | default: | 399 | default: |
393 | return -EINVAL; | 400 | return -EINVAL; |
@@ -520,6 +527,10 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) | |||
520 | } | 527 | } |
521 | 528 | ||
522 | ret = kvm_timer_enable(vcpu); | 529 | ret = kvm_timer_enable(vcpu); |
530 | if (ret) | ||
531 | return ret; | ||
532 | |||
533 | ret = kvm_arm_pmu_v3_enable(vcpu); | ||
523 | 534 | ||
524 | return ret; | 535 | return ret; |
525 | } | 536 | } |
@@ -536,21 +547,7 @@ void kvm_arm_halt_guest(struct kvm *kvm) | |||
536 | 547 | ||
537 | kvm_for_each_vcpu(i, vcpu, kvm) | 548 | kvm_for_each_vcpu(i, vcpu, kvm) |
538 | vcpu->arch.pause = true; | 549 | vcpu->arch.pause = true; |
539 | kvm_make_all_cpus_request(kvm, KVM_REQ_VCPU_EXIT); | 550 | kvm_make_all_cpus_request(kvm, KVM_REQ_SLEEP); |
540 | } | ||
541 | |||
542 | void kvm_arm_halt_vcpu(struct kvm_vcpu *vcpu) | ||
543 | { | ||
544 | vcpu->arch.pause = true; | ||
545 | kvm_vcpu_kick(vcpu); | ||
546 | } | ||
547 | |||
548 | void kvm_arm_resume_vcpu(struct kvm_vcpu *vcpu) | ||
549 | { | ||
550 | struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu); | ||
551 | |||
552 | vcpu->arch.pause = false; | ||
553 | swake_up(wq); | ||
554 | } | 551 | } |
555 | 552 | ||
556 | void kvm_arm_resume_guest(struct kvm *kvm) | 553 | void kvm_arm_resume_guest(struct kvm *kvm) |
@@ -558,16 +555,23 @@ void kvm_arm_resume_guest(struct kvm *kvm) | |||
558 | int i; | 555 | int i; |
559 | struct kvm_vcpu *vcpu; | 556 | struct kvm_vcpu *vcpu; |
560 | 557 | ||
561 | kvm_for_each_vcpu(i, vcpu, kvm) | 558 | kvm_for_each_vcpu(i, vcpu, kvm) { |
562 | kvm_arm_resume_vcpu(vcpu); | 559 | vcpu->arch.pause = false; |
560 | swake_up(kvm_arch_vcpu_wq(vcpu)); | ||
561 | } | ||
563 | } | 562 | } |
564 | 563 | ||
565 | static void vcpu_sleep(struct kvm_vcpu *vcpu) | 564 | static void vcpu_req_sleep(struct kvm_vcpu *vcpu) |
566 | { | 565 | { |
567 | struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu); | 566 | struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu); |
568 | 567 | ||
569 | swait_event_interruptible(*wq, ((!vcpu->arch.power_off) && | 568 | swait_event_interruptible(*wq, ((!vcpu->arch.power_off) && |
570 | (!vcpu->arch.pause))); | 569 | (!vcpu->arch.pause))); |
570 | |||
571 | if (vcpu->arch.power_off || vcpu->arch.pause) { | ||
572 | /* Awaken to handle a signal, request we sleep again later. */ | ||
573 | kvm_make_request(KVM_REQ_SLEEP, vcpu); | ||
574 | } | ||
571 | } | 575 | } |
572 | 576 | ||
573 | static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu) | 577 | static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu) |
@@ -575,6 +579,20 @@ static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu) | |||
575 | return vcpu->arch.target >= 0; | 579 | return vcpu->arch.target >= 0; |
576 | } | 580 | } |
577 | 581 | ||
582 | static void check_vcpu_requests(struct kvm_vcpu *vcpu) | ||
583 | { | ||
584 | if (kvm_request_pending(vcpu)) { | ||
585 | if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) | ||
586 | vcpu_req_sleep(vcpu); | ||
587 | |||
588 | /* | ||
589 | * Clear IRQ_PENDING requests that were made to guarantee | ||
590 | * that a VCPU sees new virtual interrupts. | ||
591 | */ | ||
592 | kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu); | ||
593 | } | ||
594 | } | ||
595 | |||
578 | /** | 596 | /** |
579 | * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code | 597 | * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code |
580 | * @vcpu: The VCPU pointer | 598 | * @vcpu: The VCPU pointer |
@@ -620,8 +638,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
620 | 638 | ||
621 | update_vttbr(vcpu->kvm); | 639 | update_vttbr(vcpu->kvm); |
622 | 640 | ||
623 | if (vcpu->arch.power_off || vcpu->arch.pause) | 641 | check_vcpu_requests(vcpu); |
624 | vcpu_sleep(vcpu); | ||
625 | 642 | ||
626 | /* | 643 | /* |
627 | * Preparing the interrupts to be injected also | 644 | * Preparing the interrupts to be injected also |
@@ -650,8 +667,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
650 | run->exit_reason = KVM_EXIT_INTR; | 667 | run->exit_reason = KVM_EXIT_INTR; |
651 | } | 668 | } |
652 | 669 | ||
670 | /* | ||
671 | * Ensure we set mode to IN_GUEST_MODE after we disable | ||
672 | * interrupts and before the final VCPU requests check. | ||
673 | * See the comment in kvm_vcpu_exiting_guest_mode() and | ||
674 | * Documentation/virtual/kvm/vcpu-requests.rst | ||
675 | */ | ||
676 | smp_store_mb(vcpu->mode, IN_GUEST_MODE); | ||
677 | |||
653 | if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) || | 678 | if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) || |
654 | vcpu->arch.power_off || vcpu->arch.pause) { | 679 | kvm_request_pending(vcpu)) { |
680 | vcpu->mode = OUTSIDE_GUEST_MODE; | ||
655 | local_irq_enable(); | 681 | local_irq_enable(); |
656 | kvm_pmu_sync_hwstate(vcpu); | 682 | kvm_pmu_sync_hwstate(vcpu); |
657 | kvm_timer_sync_hwstate(vcpu); | 683 | kvm_timer_sync_hwstate(vcpu); |
@@ -667,7 +693,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
667 | */ | 693 | */ |
668 | trace_kvm_entry(*vcpu_pc(vcpu)); | 694 | trace_kvm_entry(*vcpu_pc(vcpu)); |
669 | guest_enter_irqoff(); | 695 | guest_enter_irqoff(); |
670 | vcpu->mode = IN_GUEST_MODE; | ||
671 | 696 | ||
672 | ret = kvm_call_hyp(__kvm_vcpu_run, vcpu); | 697 | ret = kvm_call_hyp(__kvm_vcpu_run, vcpu); |
673 | 698 | ||
@@ -756,6 +781,7 @@ static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level) | |||
756 | * trigger a world-switch round on the running physical CPU to set the | 781 | * trigger a world-switch round on the running physical CPU to set the |
757 | * virtual IRQ/FIQ fields in the HCR appropriately. | 782 | * virtual IRQ/FIQ fields in the HCR appropriately. |
758 | */ | 783 | */ |
784 | kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); | ||
759 | kvm_vcpu_kick(vcpu); | 785 | kvm_vcpu_kick(vcpu); |
760 | 786 | ||
761 | return 0; | 787 | return 0; |
@@ -806,7 +832,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, | |||
806 | if (irq_num < VGIC_NR_SGIS || irq_num >= VGIC_NR_PRIVATE_IRQS) | 832 | if (irq_num < VGIC_NR_SGIS || irq_num >= VGIC_NR_PRIVATE_IRQS) |
807 | return -EINVAL; | 833 | return -EINVAL; |
808 | 834 | ||
809 | return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level); | 835 | return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level, NULL); |
810 | case KVM_ARM_IRQ_TYPE_SPI: | 836 | case KVM_ARM_IRQ_TYPE_SPI: |
811 | if (!irqchip_in_kernel(kvm)) | 837 | if (!irqchip_in_kernel(kvm)) |
812 | return -ENXIO; | 838 | return -ENXIO; |
@@ -814,7 +840,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, | |||
814 | if (irq_num < VGIC_NR_PRIVATE_IRQS) | 840 | if (irq_num < VGIC_NR_PRIVATE_IRQS) |
815 | return -EINVAL; | 841 | return -EINVAL; |
816 | 842 | ||
817 | return kvm_vgic_inject_irq(kvm, 0, irq_num, level); | 843 | return kvm_vgic_inject_irq(kvm, 0, irq_num, level, NULL); |
818 | } | 844 | } |
819 | 845 | ||
820 | return -EINVAL; | 846 | return -EINVAL; |
@@ -884,7 +910,7 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, | |||
884 | * Handle the "start in power-off" case. | 910 | * Handle the "start in power-off" case. |
885 | */ | 911 | */ |
886 | if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) | 912 | if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) |
887 | vcpu->arch.power_off = true; | 913 | vcpu_power_off(vcpu); |
888 | else | 914 | else |
889 | vcpu->arch.power_off = false; | 915 | vcpu->arch.power_off = false; |
890 | 916 | ||
@@ -1115,9 +1141,6 @@ static void cpu_init_hyp_mode(void *dummy) | |||
1115 | __cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr); | 1141 | __cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr); |
1116 | __cpu_init_stage2(); | 1142 | __cpu_init_stage2(); |
1117 | 1143 | ||
1118 | if (is_kernel_in_hyp_mode()) | ||
1119 | kvm_timer_init_vhe(); | ||
1120 | |||
1121 | kvm_arm_init_debug(); | 1144 | kvm_arm_init_debug(); |
1122 | } | 1145 | } |
1123 | 1146 | ||
@@ -1137,6 +1160,7 @@ static void cpu_hyp_reinit(void) | |||
1137 | * event was cancelled before the CPU was reset. | 1160 | * event was cancelled before the CPU was reset. |
1138 | */ | 1161 | */ |
1139 | __cpu_init_stage2(); | 1162 | __cpu_init_stage2(); |
1163 | kvm_timer_init_vhe(); | ||
1140 | } else { | 1164 | } else { |
1141 | cpu_init_hyp_mode(NULL); | 1165 | cpu_init_hyp_mode(NULL); |
1142 | } | 1166 | } |
diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index 87940364570b..91728faa13fd 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c | |||
@@ -19,10 +19,12 @@ | |||
19 | #include <linux/irqchip/arm-gic-v3.h> | 19 | #include <linux/irqchip/arm-gic-v3.h> |
20 | #include <linux/kvm_host.h> | 20 | #include <linux/kvm_host.h> |
21 | 21 | ||
22 | #include <asm/kvm_emulate.h> | ||
22 | #include <asm/kvm_hyp.h> | 23 | #include <asm/kvm_hyp.h> |
23 | 24 | ||
24 | #define vtr_to_max_lr_idx(v) ((v) & 0xf) | 25 | #define vtr_to_max_lr_idx(v) ((v) & 0xf) |
25 | #define vtr_to_nr_pre_bits(v) ((((u32)(v) >> 26) & 7) + 1) | 26 | #define vtr_to_nr_pre_bits(v) ((((u32)(v) >> 26) & 7) + 1) |
27 | #define vtr_to_nr_apr_regs(v) (1 << (vtr_to_nr_pre_bits(v) - 5)) | ||
26 | 28 | ||
27 | static u64 __hyp_text __gic_v3_get_lr(unsigned int lr) | 29 | static u64 __hyp_text __gic_v3_get_lr(unsigned int lr) |
28 | { | 30 | { |
@@ -118,6 +120,90 @@ static void __hyp_text __gic_v3_set_lr(u64 val, int lr) | |||
118 | } | 120 | } |
119 | } | 121 | } |
120 | 122 | ||
123 | static void __hyp_text __vgic_v3_write_ap0rn(u32 val, int n) | ||
124 | { | ||
125 | switch (n) { | ||
126 | case 0: | ||
127 | write_gicreg(val, ICH_AP0R0_EL2); | ||
128 | break; | ||
129 | case 1: | ||
130 | write_gicreg(val, ICH_AP0R1_EL2); | ||
131 | break; | ||
132 | case 2: | ||
133 | write_gicreg(val, ICH_AP0R2_EL2); | ||
134 | break; | ||
135 | case 3: | ||
136 | write_gicreg(val, ICH_AP0R3_EL2); | ||
137 | break; | ||
138 | } | ||
139 | } | ||
140 | |||
141 | static void __hyp_text __vgic_v3_write_ap1rn(u32 val, int n) | ||
142 | { | ||
143 | switch (n) { | ||
144 | case 0: | ||
145 | write_gicreg(val, ICH_AP1R0_EL2); | ||
146 | break; | ||
147 | case 1: | ||
148 | write_gicreg(val, ICH_AP1R1_EL2); | ||
149 | break; | ||
150 | case 2: | ||
151 | write_gicreg(val, ICH_AP1R2_EL2); | ||
152 | break; | ||
153 | case 3: | ||
154 | write_gicreg(val, ICH_AP1R3_EL2); | ||
155 | break; | ||
156 | } | ||
157 | } | ||
158 | |||
159 | static u32 __hyp_text __vgic_v3_read_ap0rn(int n) | ||
160 | { | ||
161 | u32 val; | ||
162 | |||
163 | switch (n) { | ||
164 | case 0: | ||
165 | val = read_gicreg(ICH_AP0R0_EL2); | ||
166 | break; | ||
167 | case 1: | ||
168 | val = read_gicreg(ICH_AP0R1_EL2); | ||
169 | break; | ||
170 | case 2: | ||
171 | val = read_gicreg(ICH_AP0R2_EL2); | ||
172 | break; | ||
173 | case 3: | ||
174 | val = read_gicreg(ICH_AP0R3_EL2); | ||
175 | break; | ||
176 | default: | ||
177 | unreachable(); | ||
178 | } | ||
179 | |||
180 | return val; | ||
181 | } | ||
182 | |||
183 | static u32 __hyp_text __vgic_v3_read_ap1rn(int n) | ||
184 | { | ||
185 | u32 val; | ||
186 | |||
187 | switch (n) { | ||
188 | case 0: | ||
189 | val = read_gicreg(ICH_AP1R0_EL2); | ||
190 | break; | ||
191 | case 1: | ||
192 | val = read_gicreg(ICH_AP1R1_EL2); | ||
193 | break; | ||
194 | case 2: | ||
195 | val = read_gicreg(ICH_AP1R2_EL2); | ||
196 | break; | ||
197 | case 3: | ||
198 | val = read_gicreg(ICH_AP1R3_EL2); | ||
199 | break; | ||
200 | default: | ||
201 | unreachable(); | ||
202 | } | ||
203 | |||
204 | return val; | ||
205 | } | ||
206 | |||
121 | void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) | 207 | void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) |
122 | { | 208 | { |
123 | struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; | 209 | struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; |
@@ -154,24 +240,27 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) | |||
154 | 240 | ||
155 | switch (nr_pre_bits) { | 241 | switch (nr_pre_bits) { |
156 | case 7: | 242 | case 7: |
157 | cpu_if->vgic_ap0r[3] = read_gicreg(ICH_AP0R3_EL2); | 243 | cpu_if->vgic_ap0r[3] = __vgic_v3_read_ap0rn(3); |
158 | cpu_if->vgic_ap0r[2] = read_gicreg(ICH_AP0R2_EL2); | 244 | cpu_if->vgic_ap0r[2] = __vgic_v3_read_ap0rn(2); |
159 | case 6: | 245 | case 6: |
160 | cpu_if->vgic_ap0r[1] = read_gicreg(ICH_AP0R1_EL2); | 246 | cpu_if->vgic_ap0r[1] = __vgic_v3_read_ap0rn(1); |
161 | default: | 247 | default: |
162 | cpu_if->vgic_ap0r[0] = read_gicreg(ICH_AP0R0_EL2); | 248 | cpu_if->vgic_ap0r[0] = __vgic_v3_read_ap0rn(0); |
163 | } | 249 | } |
164 | 250 | ||
165 | switch (nr_pre_bits) { | 251 | switch (nr_pre_bits) { |
166 | case 7: | 252 | case 7: |
167 | cpu_if->vgic_ap1r[3] = read_gicreg(ICH_AP1R3_EL2); | 253 | cpu_if->vgic_ap1r[3] = __vgic_v3_read_ap1rn(3); |
168 | cpu_if->vgic_ap1r[2] = read_gicreg(ICH_AP1R2_EL2); | 254 | cpu_if->vgic_ap1r[2] = __vgic_v3_read_ap1rn(2); |
169 | case 6: | 255 | case 6: |
170 | cpu_if->vgic_ap1r[1] = read_gicreg(ICH_AP1R1_EL2); | 256 | cpu_if->vgic_ap1r[1] = __vgic_v3_read_ap1rn(1); |
171 | default: | 257 | default: |
172 | cpu_if->vgic_ap1r[0] = read_gicreg(ICH_AP1R0_EL2); | 258 | cpu_if->vgic_ap1r[0] = __vgic_v3_read_ap1rn(0); |
173 | } | 259 | } |
174 | } else { | 260 | } else { |
261 | if (static_branch_unlikely(&vgic_v3_cpuif_trap)) | ||
262 | write_gicreg(0, ICH_HCR_EL2); | ||
263 | |||
175 | cpu_if->vgic_elrsr = 0xffff; | 264 | cpu_if->vgic_elrsr = 0xffff; |
176 | cpu_if->vgic_ap0r[0] = 0; | 265 | cpu_if->vgic_ap0r[0] = 0; |
177 | cpu_if->vgic_ap0r[1] = 0; | 266 | cpu_if->vgic_ap0r[1] = 0; |
@@ -224,26 +313,34 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) | |||
224 | 313 | ||
225 | switch (nr_pre_bits) { | 314 | switch (nr_pre_bits) { |
226 | case 7: | 315 | case 7: |
227 | write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2); | 316 | __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[3], 3); |
228 | write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2); | 317 | __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[2], 2); |
229 | case 6: | 318 | case 6: |
230 | write_gicreg(cpu_if->vgic_ap0r[1], ICH_AP0R1_EL2); | 319 | __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[1], 1); |
231 | default: | 320 | default: |
232 | write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2); | 321 | __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[0], 0); |
233 | } | 322 | } |
234 | 323 | ||
235 | switch (nr_pre_bits) { | 324 | switch (nr_pre_bits) { |
236 | case 7: | 325 | case 7: |
237 | write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2); | 326 | __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[3], 3); |
238 | write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2); | 327 | __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[2], 2); |
239 | case 6: | 328 | case 6: |
240 | write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2); | 329 | __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[1], 1); |
241 | default: | 330 | default: |
242 | write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2); | 331 | __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[0], 0); |
243 | } | 332 | } |
244 | 333 | ||
245 | for (i = 0; i < used_lrs; i++) | 334 | for (i = 0; i < used_lrs; i++) |
246 | __gic_v3_set_lr(cpu_if->vgic_lr[i], i); | 335 | __gic_v3_set_lr(cpu_if->vgic_lr[i], i); |
336 | } else { | ||
337 | /* | ||
338 | * If we need to trap system registers, we must write | ||
339 | * ICH_HCR_EL2 anyway, even if no interrupts are being | ||
340 | * injected, | ||
341 | */ | ||
342 | if (static_branch_unlikely(&vgic_v3_cpuif_trap)) | ||
343 | write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); | ||
247 | } | 344 | } |
248 | 345 | ||
249 | /* | 346 | /* |
@@ -287,3 +384,697 @@ void __hyp_text __vgic_v3_write_vmcr(u32 vmcr) | |||
287 | { | 384 | { |
288 | write_gicreg(vmcr, ICH_VMCR_EL2); | 385 | write_gicreg(vmcr, ICH_VMCR_EL2); |
289 | } | 386 | } |
387 | |||
388 | #ifdef CONFIG_ARM64 | ||
389 | |||
390 | static int __hyp_text __vgic_v3_bpr_min(void) | ||
391 | { | ||
392 | /* See Pseudocode for VPriorityGroup */ | ||
393 | return 8 - vtr_to_nr_pre_bits(read_gicreg(ICH_VTR_EL2)); | ||
394 | } | ||
395 | |||
396 | static int __hyp_text __vgic_v3_get_group(struct kvm_vcpu *vcpu) | ||
397 | { | ||
398 | u32 esr = kvm_vcpu_get_hsr(vcpu); | ||
399 | u8 crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT; | ||
400 | |||
401 | return crm != 8; | ||
402 | } | ||
403 | |||
404 | #define GICv3_IDLE_PRIORITY 0xff | ||
405 | |||
406 | static int __hyp_text __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu, | ||
407 | u32 vmcr, | ||
408 | u64 *lr_val) | ||
409 | { | ||
410 | unsigned int used_lrs = vcpu->arch.vgic_cpu.used_lrs; | ||
411 | u8 priority = GICv3_IDLE_PRIORITY; | ||
412 | int i, lr = -1; | ||
413 | |||
414 | for (i = 0; i < used_lrs; i++) { | ||
415 | u64 val = __gic_v3_get_lr(i); | ||
416 | u8 lr_prio = (val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT; | ||
417 | |||
418 | /* Not pending in the state? */ | ||
419 | if ((val & ICH_LR_STATE) != ICH_LR_PENDING_BIT) | ||
420 | continue; | ||
421 | |||
422 | /* Group-0 interrupt, but Group-0 disabled? */ | ||
423 | if (!(val & ICH_LR_GROUP) && !(vmcr & ICH_VMCR_ENG0_MASK)) | ||
424 | continue; | ||
425 | |||
426 | /* Group-1 interrupt, but Group-1 disabled? */ | ||
427 | if ((val & ICH_LR_GROUP) && !(vmcr & ICH_VMCR_ENG1_MASK)) | ||
428 | continue; | ||
429 | |||
430 | /* Not the highest priority? */ | ||
431 | if (lr_prio >= priority) | ||
432 | continue; | ||
433 | |||
434 | /* This is a candidate */ | ||
435 | priority = lr_prio; | ||
436 | *lr_val = val; | ||
437 | lr = i; | ||
438 | } | ||
439 | |||
440 | if (lr == -1) | ||
441 | *lr_val = ICC_IAR1_EL1_SPURIOUS; | ||
442 | |||
443 | return lr; | ||
444 | } | ||
445 | |||
446 | static int __hyp_text __vgic_v3_find_active_lr(struct kvm_vcpu *vcpu, | ||
447 | int intid, u64 *lr_val) | ||
448 | { | ||
449 | unsigned int used_lrs = vcpu->arch.vgic_cpu.used_lrs; | ||
450 | int i; | ||
451 | |||
452 | for (i = 0; i < used_lrs; i++) { | ||
453 | u64 val = __gic_v3_get_lr(i); | ||
454 | |||
455 | if ((val & ICH_LR_VIRTUAL_ID_MASK) == intid && | ||
456 | (val & ICH_LR_ACTIVE_BIT)) { | ||
457 | *lr_val = val; | ||
458 | return i; | ||
459 | } | ||
460 | } | ||
461 | |||
462 | *lr_val = ICC_IAR1_EL1_SPURIOUS; | ||
463 | return -1; | ||
464 | } | ||
465 | |||
466 | static int __hyp_text __vgic_v3_get_highest_active_priority(void) | ||
467 | { | ||
468 | u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2)); | ||
469 | u32 hap = 0; | ||
470 | int i; | ||
471 | |||
472 | for (i = 0; i < nr_apr_regs; i++) { | ||
473 | u32 val; | ||
474 | |||
475 | /* | ||
476 | * The ICH_AP0Rn_EL2 and ICH_AP1Rn_EL2 registers | ||
477 | * contain the active priority levels for this VCPU | ||
478 | * for the maximum number of supported priority | ||
479 | * levels, and we return the full priority level only | ||
480 | * if the BPR is programmed to its minimum, otherwise | ||
481 | * we return a combination of the priority level and | ||
482 | * subpriority, as determined by the setting of the | ||
483 | * BPR, but without the full subpriority. | ||
484 | */ | ||
485 | val = __vgic_v3_read_ap0rn(i); | ||
486 | val |= __vgic_v3_read_ap1rn(i); | ||
487 | if (!val) { | ||
488 | hap += 32; | ||
489 | continue; | ||
490 | } | ||
491 | |||
492 | return (hap + __ffs(val)) << __vgic_v3_bpr_min(); | ||
493 | } | ||
494 | |||
495 | return GICv3_IDLE_PRIORITY; | ||
496 | } | ||
497 | |||
498 | static unsigned int __hyp_text __vgic_v3_get_bpr0(u32 vmcr) | ||
499 | { | ||
500 | return (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT; | ||
501 | } | ||
502 | |||
503 | static unsigned int __hyp_text __vgic_v3_get_bpr1(u32 vmcr) | ||
504 | { | ||
505 | unsigned int bpr; | ||
506 | |||
507 | if (vmcr & ICH_VMCR_CBPR_MASK) { | ||
508 | bpr = __vgic_v3_get_bpr0(vmcr); | ||
509 | if (bpr < 7) | ||
510 | bpr++; | ||
511 | } else { | ||
512 | bpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT; | ||
513 | } | ||
514 | |||
515 | return bpr; | ||
516 | } | ||
517 | |||
518 | /* | ||
519 | * Convert a priority to a preemption level, taking the relevant BPR | ||
520 | * into account by zeroing the sub-priority bits. | ||
521 | */ | ||
522 | static u8 __hyp_text __vgic_v3_pri_to_pre(u8 pri, u32 vmcr, int grp) | ||
523 | { | ||
524 | unsigned int bpr; | ||
525 | |||
526 | if (!grp) | ||
527 | bpr = __vgic_v3_get_bpr0(vmcr) + 1; | ||
528 | else | ||
529 | bpr = __vgic_v3_get_bpr1(vmcr); | ||
530 | |||
531 | return pri & (GENMASK(7, 0) << bpr); | ||
532 | } | ||
533 | |||
534 | /* | ||
535 | * The priority value is independent of any of the BPR values, so we | ||
536 | * normalize it using the minumal BPR value. This guarantees that no | ||
537 | * matter what the guest does with its BPR, we can always set/get the | ||
538 | * same value of a priority. | ||
539 | */ | ||
540 | static void __hyp_text __vgic_v3_set_active_priority(u8 pri, u32 vmcr, int grp) | ||
541 | { | ||
542 | u8 pre, ap; | ||
543 | u32 val; | ||
544 | int apr; | ||
545 | |||
546 | pre = __vgic_v3_pri_to_pre(pri, vmcr, grp); | ||
547 | ap = pre >> __vgic_v3_bpr_min(); | ||
548 | apr = ap / 32; | ||
549 | |||
550 | if (!grp) { | ||
551 | val = __vgic_v3_read_ap0rn(apr); | ||
552 | __vgic_v3_write_ap0rn(val | BIT(ap % 32), apr); | ||
553 | } else { | ||
554 | val = __vgic_v3_read_ap1rn(apr); | ||
555 | __vgic_v3_write_ap1rn(val | BIT(ap % 32), apr); | ||
556 | } | ||
557 | } | ||
558 | |||
559 | static int __hyp_text __vgic_v3_clear_highest_active_priority(void) | ||
560 | { | ||
561 | u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2)); | ||
562 | u32 hap = 0; | ||
563 | int i; | ||
564 | |||
565 | for (i = 0; i < nr_apr_regs; i++) { | ||
566 | u32 ap0, ap1; | ||
567 | int c0, c1; | ||
568 | |||
569 | ap0 = __vgic_v3_read_ap0rn(i); | ||
570 | ap1 = __vgic_v3_read_ap1rn(i); | ||
571 | if (!ap0 && !ap1) { | ||
572 | hap += 32; | ||
573 | continue; | ||
574 | } | ||
575 | |||
576 | c0 = ap0 ? __ffs(ap0) : 32; | ||
577 | c1 = ap1 ? __ffs(ap1) : 32; | ||
578 | |||
579 | /* Always clear the LSB, which is the highest priority */ | ||
580 | if (c0 < c1) { | ||
581 | ap0 &= ~BIT(c0); | ||
582 | __vgic_v3_write_ap0rn(ap0, i); | ||
583 | hap += c0; | ||
584 | } else { | ||
585 | ap1 &= ~BIT(c1); | ||
586 | __vgic_v3_write_ap1rn(ap1, i); | ||
587 | hap += c1; | ||
588 | } | ||
589 | |||
590 | /* Rescale to 8 bits of priority */ | ||
591 | return hap << __vgic_v3_bpr_min(); | ||
592 | } | ||
593 | |||
594 | return GICv3_IDLE_PRIORITY; | ||
595 | } | ||
596 | |||
597 | static void __hyp_text __vgic_v3_read_iar(struct kvm_vcpu *vcpu, u32 vmcr, int rt) | ||
598 | { | ||
599 | u64 lr_val; | ||
600 | u8 lr_prio, pmr; | ||
601 | int lr, grp; | ||
602 | |||
603 | grp = __vgic_v3_get_group(vcpu); | ||
604 | |||
605 | lr = __vgic_v3_highest_priority_lr(vcpu, vmcr, &lr_val); | ||
606 | if (lr < 0) | ||
607 | goto spurious; | ||
608 | |||
609 | if (grp != !!(lr_val & ICH_LR_GROUP)) | ||
610 | goto spurious; | ||
611 | |||
612 | pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT; | ||
613 | lr_prio = (lr_val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT; | ||
614 | if (pmr <= lr_prio) | ||
615 | goto spurious; | ||
616 | |||
617 | if (__vgic_v3_get_highest_active_priority() <= __vgic_v3_pri_to_pre(lr_prio, vmcr, grp)) | ||
618 | goto spurious; | ||
619 | |||
620 | lr_val &= ~ICH_LR_STATE; | ||
621 | /* No active state for LPIs */ | ||
622 | if ((lr_val & ICH_LR_VIRTUAL_ID_MASK) <= VGIC_MAX_SPI) | ||
623 | lr_val |= ICH_LR_ACTIVE_BIT; | ||
624 | __gic_v3_set_lr(lr_val, lr); | ||
625 | __vgic_v3_set_active_priority(lr_prio, vmcr, grp); | ||
626 | vcpu_set_reg(vcpu, rt, lr_val & ICH_LR_VIRTUAL_ID_MASK); | ||
627 | return; | ||
628 | |||
629 | spurious: | ||
630 | vcpu_set_reg(vcpu, rt, ICC_IAR1_EL1_SPURIOUS); | ||
631 | } | ||
632 | |||
633 | static void __hyp_text __vgic_v3_clear_active_lr(int lr, u64 lr_val) | ||
634 | { | ||
635 | lr_val &= ~ICH_LR_ACTIVE_BIT; | ||
636 | if (lr_val & ICH_LR_HW) { | ||
637 | u32 pid; | ||
638 | |||
639 | pid = (lr_val & ICH_LR_PHYS_ID_MASK) >> ICH_LR_PHYS_ID_SHIFT; | ||
640 | gic_write_dir(pid); | ||
641 | } | ||
642 | |||
643 | __gic_v3_set_lr(lr_val, lr); | ||
644 | } | ||
645 | |||
646 | static void __hyp_text __vgic_v3_bump_eoicount(void) | ||
647 | { | ||
648 | u32 hcr; | ||
649 | |||
650 | hcr = read_gicreg(ICH_HCR_EL2); | ||
651 | hcr += 1 << ICH_HCR_EOIcount_SHIFT; | ||
652 | write_gicreg(hcr, ICH_HCR_EL2); | ||
653 | } | ||
654 | |||
655 | static void __hyp_text __vgic_v3_write_dir(struct kvm_vcpu *vcpu, | ||
656 | u32 vmcr, int rt) | ||
657 | { | ||
658 | u32 vid = vcpu_get_reg(vcpu, rt); | ||
659 | u64 lr_val; | ||
660 | int lr; | ||
661 | |||
662 | /* EOImode == 0, nothing to be done here */ | ||
663 | if (!(vmcr & ICH_VMCR_EOIM_MASK)) | ||
664 | return; | ||
665 | |||
666 | /* No deactivate to be performed on an LPI */ | ||
667 | if (vid >= VGIC_MIN_LPI) | ||
668 | return; | ||
669 | |||
670 | lr = __vgic_v3_find_active_lr(vcpu, vid, &lr_val); | ||
671 | if (lr == -1) { | ||
672 | __vgic_v3_bump_eoicount(); | ||
673 | return; | ||
674 | } | ||
675 | |||
676 | __vgic_v3_clear_active_lr(lr, lr_val); | ||
677 | } | ||
678 | |||
679 | static void __hyp_text __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) | ||
680 | { | ||
681 | u32 vid = vcpu_get_reg(vcpu, rt); | ||
682 | u64 lr_val; | ||
683 | u8 lr_prio, act_prio; | ||
684 | int lr, grp; | ||
685 | |||
686 | grp = __vgic_v3_get_group(vcpu); | ||
687 | |||
688 | /* Drop priority in any case */ | ||
689 | act_prio = __vgic_v3_clear_highest_active_priority(); | ||
690 | |||
691 | /* If EOIing an LPI, no deactivate to be performed */ | ||
692 | if (vid >= VGIC_MIN_LPI) | ||
693 | return; | ||
694 | |||
695 | /* EOImode == 1, nothing to be done here */ | ||
696 | if (vmcr & ICH_VMCR_EOIM_MASK) | ||
697 | return; | ||
698 | |||
699 | lr = __vgic_v3_find_active_lr(vcpu, vid, &lr_val); | ||
700 | if (lr == -1) { | ||
701 | __vgic_v3_bump_eoicount(); | ||
702 | return; | ||
703 | } | ||
704 | |||
705 | lr_prio = (lr_val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT; | ||
706 | |||
707 | /* If priorities or group do not match, the guest has fscked-up. */ | ||
708 | if (grp != !!(lr_val & ICH_LR_GROUP) || | ||
709 | __vgic_v3_pri_to_pre(lr_prio, vmcr, grp) != act_prio) | ||
710 | return; | ||
711 | |||
712 | /* Let's now perform the deactivation */ | ||
713 | __vgic_v3_clear_active_lr(lr, lr_val); | ||
714 | } | ||
715 | |||
716 | static void __hyp_text __vgic_v3_read_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) | ||
717 | { | ||
718 | vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG0_MASK)); | ||
719 | } | ||
720 | |||
721 | static void __hyp_text __vgic_v3_read_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) | ||
722 | { | ||
723 | vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG1_MASK)); | ||
724 | } | ||
725 | |||
726 | static void __hyp_text __vgic_v3_write_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) | ||
727 | { | ||
728 | u64 val = vcpu_get_reg(vcpu, rt); | ||
729 | |||
730 | if (val & 1) | ||
731 | vmcr |= ICH_VMCR_ENG0_MASK; | ||
732 | else | ||
733 | vmcr &= ~ICH_VMCR_ENG0_MASK; | ||
734 | |||
735 | __vgic_v3_write_vmcr(vmcr); | ||
736 | } | ||
737 | |||
738 | static void __hyp_text __vgic_v3_write_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) | ||
739 | { | ||
740 | u64 val = vcpu_get_reg(vcpu, rt); | ||
741 | |||
742 | if (val & 1) | ||
743 | vmcr |= ICH_VMCR_ENG1_MASK; | ||
744 | else | ||
745 | vmcr &= ~ICH_VMCR_ENG1_MASK; | ||
746 | |||
747 | __vgic_v3_write_vmcr(vmcr); | ||
748 | } | ||
749 | |||
750 | static void __hyp_text __vgic_v3_read_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) | ||
751 | { | ||
752 | vcpu_set_reg(vcpu, rt, __vgic_v3_get_bpr0(vmcr)); | ||
753 | } | ||
754 | |||
755 | static void __hyp_text __vgic_v3_read_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) | ||
756 | { | ||
757 | vcpu_set_reg(vcpu, rt, __vgic_v3_get_bpr1(vmcr)); | ||
758 | } | ||
759 | |||
760 | static void __hyp_text __vgic_v3_write_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) | ||
761 | { | ||
762 | u64 val = vcpu_get_reg(vcpu, rt); | ||
763 | u8 bpr_min = __vgic_v3_bpr_min() - 1; | ||
764 | |||
765 | /* Enforce BPR limiting */ | ||
766 | if (val < bpr_min) | ||
767 | val = bpr_min; | ||
768 | |||
769 | val <<= ICH_VMCR_BPR0_SHIFT; | ||
770 | val &= ICH_VMCR_BPR0_MASK; | ||
771 | vmcr &= ~ICH_VMCR_BPR0_MASK; | ||
772 | vmcr |= val; | ||
773 | |||
774 | __vgic_v3_write_vmcr(vmcr); | ||
775 | } | ||
776 | |||
777 | static void __hyp_text __vgic_v3_write_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) | ||
778 | { | ||
779 | u64 val = vcpu_get_reg(vcpu, rt); | ||
780 | u8 bpr_min = __vgic_v3_bpr_min(); | ||
781 | |||
782 | if (vmcr & ICH_VMCR_CBPR_MASK) | ||
783 | return; | ||
784 | |||
785 | /* Enforce BPR limiting */ | ||
786 | if (val < bpr_min) | ||
787 | val = bpr_min; | ||
788 | |||
789 | val <<= ICH_VMCR_BPR1_SHIFT; | ||
790 | val &= ICH_VMCR_BPR1_MASK; | ||
791 | vmcr &= ~ICH_VMCR_BPR1_MASK; | ||
792 | vmcr |= val; | ||
793 | |||
794 | __vgic_v3_write_vmcr(vmcr); | ||
795 | } | ||
796 | |||
797 | static void __hyp_text __vgic_v3_read_apxrn(struct kvm_vcpu *vcpu, int rt, int n) | ||
798 | { | ||
799 | u32 val; | ||
800 | |||
801 | if (!__vgic_v3_get_group(vcpu)) | ||
802 | val = __vgic_v3_read_ap0rn(n); | ||
803 | else | ||
804 | val = __vgic_v3_read_ap1rn(n); | ||
805 | |||
806 | vcpu_set_reg(vcpu, rt, val); | ||
807 | } | ||
808 | |||
809 | static void __hyp_text __vgic_v3_write_apxrn(struct kvm_vcpu *vcpu, int rt, int n) | ||
810 | { | ||
811 | u32 val = vcpu_get_reg(vcpu, rt); | ||
812 | |||
813 | if (!__vgic_v3_get_group(vcpu)) | ||
814 | __vgic_v3_write_ap0rn(val, n); | ||
815 | else | ||
816 | __vgic_v3_write_ap1rn(val, n); | ||
817 | } | ||
818 | |||
819 | static void __hyp_text __vgic_v3_read_apxr0(struct kvm_vcpu *vcpu, | ||
820 | u32 vmcr, int rt) | ||
821 | { | ||
822 | __vgic_v3_read_apxrn(vcpu, rt, 0); | ||
823 | } | ||
824 | |||
825 | static void __hyp_text __vgic_v3_read_apxr1(struct kvm_vcpu *vcpu, | ||
826 | u32 vmcr, int rt) | ||
827 | { | ||
828 | __vgic_v3_read_apxrn(vcpu, rt, 1); | ||
829 | } | ||
830 | |||
831 | static void __hyp_text __vgic_v3_read_apxr2(struct kvm_vcpu *vcpu, | ||
832 | u32 vmcr, int rt) | ||
833 | { | ||
834 | __vgic_v3_read_apxrn(vcpu, rt, 2); | ||
835 | } | ||
836 | |||
837 | static void __hyp_text __vgic_v3_read_apxr3(struct kvm_vcpu *vcpu, | ||
838 | u32 vmcr, int rt) | ||
839 | { | ||
840 | __vgic_v3_read_apxrn(vcpu, rt, 3); | ||
841 | } | ||
842 | |||
843 | static void __hyp_text __vgic_v3_write_apxr0(struct kvm_vcpu *vcpu, | ||
844 | u32 vmcr, int rt) | ||
845 | { | ||
846 | __vgic_v3_write_apxrn(vcpu, rt, 0); | ||
847 | } | ||
848 | |||
849 | static void __hyp_text __vgic_v3_write_apxr1(struct kvm_vcpu *vcpu, | ||
850 | u32 vmcr, int rt) | ||
851 | { | ||
852 | __vgic_v3_write_apxrn(vcpu, rt, 1); | ||
853 | } | ||
854 | |||
855 | static void __hyp_text __vgic_v3_write_apxr2(struct kvm_vcpu *vcpu, | ||
856 | u32 vmcr, int rt) | ||
857 | { | ||
858 | __vgic_v3_write_apxrn(vcpu, rt, 2); | ||
859 | } | ||
860 | |||
861 | static void __hyp_text __vgic_v3_write_apxr3(struct kvm_vcpu *vcpu, | ||
862 | u32 vmcr, int rt) | ||
863 | { | ||
864 | __vgic_v3_write_apxrn(vcpu, rt, 3); | ||
865 | } | ||
866 | |||
867 | static void __hyp_text __vgic_v3_read_hppir(struct kvm_vcpu *vcpu, | ||
868 | u32 vmcr, int rt) | ||
869 | { | ||
870 | u64 lr_val; | ||
871 | int lr, lr_grp, grp; | ||
872 | |||
873 | grp = __vgic_v3_get_group(vcpu); | ||
874 | |||
875 | lr = __vgic_v3_highest_priority_lr(vcpu, vmcr, &lr_val); | ||
876 | if (lr == -1) | ||
877 | goto spurious; | ||
878 | |||
879 | lr_grp = !!(lr_val & ICH_LR_GROUP); | ||
880 | if (lr_grp != grp) | ||
881 | lr_val = ICC_IAR1_EL1_SPURIOUS; | ||
882 | |||
883 | spurious: | ||
884 | vcpu_set_reg(vcpu, rt, lr_val & ICH_LR_VIRTUAL_ID_MASK); | ||
885 | } | ||
886 | |||
887 | static void __hyp_text __vgic_v3_read_pmr(struct kvm_vcpu *vcpu, | ||
888 | u32 vmcr, int rt) | ||
889 | { | ||
890 | vmcr &= ICH_VMCR_PMR_MASK; | ||
891 | vmcr >>= ICH_VMCR_PMR_SHIFT; | ||
892 | vcpu_set_reg(vcpu, rt, vmcr); | ||
893 | } | ||
894 | |||
895 | static void __hyp_text __vgic_v3_write_pmr(struct kvm_vcpu *vcpu, | ||
896 | u32 vmcr, int rt) | ||
897 | { | ||
898 | u32 val = vcpu_get_reg(vcpu, rt); | ||
899 | |||
900 | val <<= ICH_VMCR_PMR_SHIFT; | ||
901 | val &= ICH_VMCR_PMR_MASK; | ||
902 | vmcr &= ~ICH_VMCR_PMR_MASK; | ||
903 | vmcr |= val; | ||
904 | |||
905 | write_gicreg(vmcr, ICH_VMCR_EL2); | ||
906 | } | ||
907 | |||
908 | static void __hyp_text __vgic_v3_read_rpr(struct kvm_vcpu *vcpu, | ||
909 | u32 vmcr, int rt) | ||
910 | { | ||
911 | u32 val = __vgic_v3_get_highest_active_priority(); | ||
912 | vcpu_set_reg(vcpu, rt, val); | ||
913 | } | ||
914 | |||
915 | static void __hyp_text __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, | ||
916 | u32 vmcr, int rt) | ||
917 | { | ||
918 | u32 vtr, val; | ||
919 | |||
920 | vtr = read_gicreg(ICH_VTR_EL2); | ||
921 | /* PRIbits */ | ||
922 | val = ((vtr >> 29) & 7) << ICC_CTLR_EL1_PRI_BITS_SHIFT; | ||
923 | /* IDbits */ | ||
924 | val |= ((vtr >> 23) & 7) << ICC_CTLR_EL1_ID_BITS_SHIFT; | ||
925 | /* SEIS */ | ||
926 | val |= ((vtr >> 22) & 1) << ICC_CTLR_EL1_SEIS_SHIFT; | ||
927 | /* A3V */ | ||
928 | val |= ((vtr >> 21) & 1) << ICC_CTLR_EL1_A3V_SHIFT; | ||
929 | /* EOImode */ | ||
930 | val |= ((vmcr & ICH_VMCR_EOIM_MASK) >> ICH_VMCR_EOIM_SHIFT) << ICC_CTLR_EL1_EOImode_SHIFT; | ||
931 | /* CBPR */ | ||
932 | val |= (vmcr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT; | ||
933 | |||
934 | vcpu_set_reg(vcpu, rt, val); | ||
935 | } | ||
936 | |||
937 | static void __hyp_text __vgic_v3_write_ctlr(struct kvm_vcpu *vcpu, | ||
938 | u32 vmcr, int rt) | ||
939 | { | ||
940 | u32 val = vcpu_get_reg(vcpu, rt); | ||
941 | |||
942 | if (val & ICC_CTLR_EL1_CBPR_MASK) | ||
943 | vmcr |= ICH_VMCR_CBPR_MASK; | ||
944 | else | ||
945 | vmcr &= ~ICH_VMCR_CBPR_MASK; | ||
946 | |||
947 | if (val & ICC_CTLR_EL1_EOImode_MASK) | ||
948 | vmcr |= ICH_VMCR_EOIM_MASK; | ||
949 | else | ||
950 | vmcr &= ~ICH_VMCR_EOIM_MASK; | ||
951 | |||
952 | write_gicreg(vmcr, ICH_VMCR_EL2); | ||
953 | } | ||
954 | |||
955 | int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) | ||
956 | { | ||
957 | int rt; | ||
958 | u32 esr; | ||
959 | u32 vmcr; | ||
960 | void (*fn)(struct kvm_vcpu *, u32, int); | ||
961 | bool is_read; | ||
962 | u32 sysreg; | ||
963 | |||
964 | esr = kvm_vcpu_get_hsr(vcpu); | ||
965 | if (vcpu_mode_is_32bit(vcpu)) { | ||
966 | if (!kvm_condition_valid(vcpu)) | ||
967 | return 1; | ||
968 | |||
969 | sysreg = esr_cp15_to_sysreg(esr); | ||
970 | } else { | ||
971 | sysreg = esr_sys64_to_sysreg(esr); | ||
972 | } | ||
973 | |||
974 | is_read = (esr & ESR_ELx_SYS64_ISS_DIR_MASK) == ESR_ELx_SYS64_ISS_DIR_READ; | ||
975 | |||
976 | switch (sysreg) { | ||
977 | case SYS_ICC_IAR0_EL1: | ||
978 | case SYS_ICC_IAR1_EL1: | ||
979 | if (unlikely(!is_read)) | ||
980 | return 0; | ||
981 | fn = __vgic_v3_read_iar; | ||
982 | break; | ||
983 | case SYS_ICC_EOIR0_EL1: | ||
984 | case SYS_ICC_EOIR1_EL1: | ||
985 | if (unlikely(is_read)) | ||
986 | return 0; | ||
987 | fn = __vgic_v3_write_eoir; | ||
988 | break; | ||
989 | case SYS_ICC_IGRPEN1_EL1: | ||
990 | if (is_read) | ||
991 | fn = __vgic_v3_read_igrpen1; | ||
992 | else | ||
993 | fn = __vgic_v3_write_igrpen1; | ||
994 | break; | ||
995 | case SYS_ICC_BPR1_EL1: | ||
996 | if (is_read) | ||
997 | fn = __vgic_v3_read_bpr1; | ||
998 | else | ||
999 | fn = __vgic_v3_write_bpr1; | ||
1000 | break; | ||
1001 | case SYS_ICC_AP0Rn_EL1(0): | ||
1002 | case SYS_ICC_AP1Rn_EL1(0): | ||
1003 | if (is_read) | ||
1004 | fn = __vgic_v3_read_apxr0; | ||
1005 | else | ||
1006 | fn = __vgic_v3_write_apxr0; | ||
1007 | break; | ||
1008 | case SYS_ICC_AP0Rn_EL1(1): | ||
1009 | case SYS_ICC_AP1Rn_EL1(1): | ||
1010 | if (is_read) | ||
1011 | fn = __vgic_v3_read_apxr1; | ||
1012 | else | ||
1013 | fn = __vgic_v3_write_apxr1; | ||
1014 | break; | ||
1015 | case SYS_ICC_AP0Rn_EL1(2): | ||
1016 | case SYS_ICC_AP1Rn_EL1(2): | ||
1017 | if (is_read) | ||
1018 | fn = __vgic_v3_read_apxr2; | ||
1019 | else | ||
1020 | fn = __vgic_v3_write_apxr2; | ||
1021 | break; | ||
1022 | case SYS_ICC_AP0Rn_EL1(3): | ||
1023 | case SYS_ICC_AP1Rn_EL1(3): | ||
1024 | if (is_read) | ||
1025 | fn = __vgic_v3_read_apxr3; | ||
1026 | else | ||
1027 | fn = __vgic_v3_write_apxr3; | ||
1028 | break; | ||
1029 | case SYS_ICC_HPPIR0_EL1: | ||
1030 | case SYS_ICC_HPPIR1_EL1: | ||
1031 | if (unlikely(!is_read)) | ||
1032 | return 0; | ||
1033 | fn = __vgic_v3_read_hppir; | ||
1034 | break; | ||
1035 | case SYS_ICC_IGRPEN0_EL1: | ||
1036 | if (is_read) | ||
1037 | fn = __vgic_v3_read_igrpen0; | ||
1038 | else | ||
1039 | fn = __vgic_v3_write_igrpen0; | ||
1040 | break; | ||
1041 | case SYS_ICC_BPR0_EL1: | ||
1042 | if (is_read) | ||
1043 | fn = __vgic_v3_read_bpr0; | ||
1044 | else | ||
1045 | fn = __vgic_v3_write_bpr0; | ||
1046 | break; | ||
1047 | case SYS_ICC_DIR_EL1: | ||
1048 | if (unlikely(is_read)) | ||
1049 | return 0; | ||
1050 | fn = __vgic_v3_write_dir; | ||
1051 | break; | ||
1052 | case SYS_ICC_RPR_EL1: | ||
1053 | if (unlikely(!is_read)) | ||
1054 | return 0; | ||
1055 | fn = __vgic_v3_read_rpr; | ||
1056 | break; | ||
1057 | case SYS_ICC_CTLR_EL1: | ||
1058 | if (is_read) | ||
1059 | fn = __vgic_v3_read_ctlr; | ||
1060 | else | ||
1061 | fn = __vgic_v3_write_ctlr; | ||
1062 | break; | ||
1063 | case SYS_ICC_PMR_EL1: | ||
1064 | if (is_read) | ||
1065 | fn = __vgic_v3_read_pmr; | ||
1066 | else | ||
1067 | fn = __vgic_v3_write_pmr; | ||
1068 | break; | ||
1069 | default: | ||
1070 | return 0; | ||
1071 | } | ||
1072 | |||
1073 | vmcr = __vgic_v3_read_vmcr(); | ||
1074 | rt = kvm_vcpu_sys_get_rt(vcpu); | ||
1075 | fn(vcpu, vmcr, rt); | ||
1076 | |||
1077 | return 1; | ||
1078 | } | ||
1079 | |||
1080 | #endif | ||
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 1c44aa35f909..0e1fc75f3585 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/kvm_host.h> | 20 | #include <linux/kvm_host.h> |
21 | #include <linux/io.h> | 21 | #include <linux/io.h> |
22 | #include <linux/hugetlb.h> | 22 | #include <linux/hugetlb.h> |
23 | #include <linux/sched/signal.h> | ||
23 | #include <trace/events/kvm.h> | 24 | #include <trace/events/kvm.h> |
24 | #include <asm/pgalloc.h> | 25 | #include <asm/pgalloc.h> |
25 | #include <asm/cacheflush.h> | 26 | #include <asm/cacheflush.h> |
@@ -1262,6 +1263,24 @@ static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, kvm_pfn_t pfn, | |||
1262 | __coherent_cache_guest_page(vcpu, pfn, size); | 1263 | __coherent_cache_guest_page(vcpu, pfn, size); |
1263 | } | 1264 | } |
1264 | 1265 | ||
1266 | static void kvm_send_hwpoison_signal(unsigned long address, | ||
1267 | struct vm_area_struct *vma) | ||
1268 | { | ||
1269 | siginfo_t info; | ||
1270 | |||
1271 | info.si_signo = SIGBUS; | ||
1272 | info.si_errno = 0; | ||
1273 | info.si_code = BUS_MCEERR_AR; | ||
1274 | info.si_addr = (void __user *)address; | ||
1275 | |||
1276 | if (is_vm_hugetlb_page(vma)) | ||
1277 | info.si_addr_lsb = huge_page_shift(hstate_vma(vma)); | ||
1278 | else | ||
1279 | info.si_addr_lsb = PAGE_SHIFT; | ||
1280 | |||
1281 | send_sig_info(SIGBUS, &info, current); | ||
1282 | } | ||
1283 | |||
1265 | static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, | 1284 | static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, |
1266 | struct kvm_memory_slot *memslot, unsigned long hva, | 1285 | struct kvm_memory_slot *memslot, unsigned long hva, |
1267 | unsigned long fault_status) | 1286 | unsigned long fault_status) |
@@ -1331,6 +1350,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, | |||
1331 | smp_rmb(); | 1350 | smp_rmb(); |
1332 | 1351 | ||
1333 | pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable); | 1352 | pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable); |
1353 | if (pfn == KVM_PFN_ERR_HWPOISON) { | ||
1354 | kvm_send_hwpoison_signal(hva, vma); | ||
1355 | return 0; | ||
1356 | } | ||
1334 | if (is_error_noslot_pfn(pfn)) | 1357 | if (is_error_noslot_pfn(pfn)) |
1335 | return -EFAULT; | 1358 | return -EFAULT; |
1336 | 1359 | ||
diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 4b43e7f3b158..fc8a723ff387 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c | |||
@@ -203,6 +203,24 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) | |||
203 | return reg; | 203 | return reg; |
204 | } | 204 | } |
205 | 205 | ||
206 | static void kvm_pmu_check_overflow(struct kvm_vcpu *vcpu) | ||
207 | { | ||
208 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
209 | bool overflow = !!kvm_pmu_overflow_status(vcpu); | ||
210 | |||
211 | if (pmu->irq_level == overflow) | ||
212 | return; | ||
213 | |||
214 | pmu->irq_level = overflow; | ||
215 | |||
216 | if (likely(irqchip_in_kernel(vcpu->kvm))) { | ||
217 | int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, | ||
218 | pmu->irq_num, overflow, | ||
219 | &vcpu->arch.pmu); | ||
220 | WARN_ON(ret); | ||
221 | } | ||
222 | } | ||
223 | |||
206 | /** | 224 | /** |
207 | * kvm_pmu_overflow_set - set PMU overflow interrupt | 225 | * kvm_pmu_overflow_set - set PMU overflow interrupt |
208 | * @vcpu: The vcpu pointer | 226 | * @vcpu: The vcpu pointer |
@@ -210,37 +228,18 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) | |||
210 | */ | 228 | */ |
211 | void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val) | 229 | void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val) |
212 | { | 230 | { |
213 | u64 reg; | ||
214 | |||
215 | if (val == 0) | 231 | if (val == 0) |
216 | return; | 232 | return; |
217 | 233 | ||
218 | vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= val; | 234 | vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= val; |
219 | reg = kvm_pmu_overflow_status(vcpu); | 235 | kvm_pmu_check_overflow(vcpu); |
220 | if (reg != 0) | ||
221 | kvm_vcpu_kick(vcpu); | ||
222 | } | 236 | } |
223 | 237 | ||
224 | static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) | 238 | static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) |
225 | { | 239 | { |
226 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
227 | bool overflow; | ||
228 | |||
229 | if (!kvm_arm_pmu_v3_ready(vcpu)) | 240 | if (!kvm_arm_pmu_v3_ready(vcpu)) |
230 | return; | 241 | return; |
231 | 242 | kvm_pmu_check_overflow(vcpu); | |
232 | overflow = !!kvm_pmu_overflow_status(vcpu); | ||
233 | if (pmu->irq_level == overflow) | ||
234 | return; | ||
235 | |||
236 | pmu->irq_level = overflow; | ||
237 | |||
238 | if (likely(irqchip_in_kernel(vcpu->kvm))) { | ||
239 | int ret; | ||
240 | ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, | ||
241 | pmu->irq_num, overflow); | ||
242 | WARN_ON(ret); | ||
243 | } | ||
244 | } | 243 | } |
245 | 244 | ||
246 | bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu) | 245 | bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu) |
@@ -451,34 +450,74 @@ bool kvm_arm_support_pmu_v3(void) | |||
451 | return (perf_num_counters() > 0); | 450 | return (perf_num_counters() > 0); |
452 | } | 451 | } |
453 | 452 | ||
454 | static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) | 453 | int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) |
455 | { | 454 | { |
456 | if (!kvm_arm_support_pmu_v3()) | 455 | if (!vcpu->arch.pmu.created) |
457 | return -ENODEV; | 456 | return 0; |
458 | 457 | ||
459 | /* | 458 | /* |
460 | * We currently require an in-kernel VGIC to use the PMU emulation, | 459 | * A valid interrupt configuration for the PMU is either to have a |
461 | * because we do not support forwarding PMU overflow interrupts to | 460 | * properly configured interrupt number and using an in-kernel |
462 | * userspace yet. | 461 | * irqchip, or to not have an in-kernel GIC and not set an IRQ. |
463 | */ | 462 | */ |
464 | if (!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm)) | 463 | if (irqchip_in_kernel(vcpu->kvm)) { |
464 | int irq = vcpu->arch.pmu.irq_num; | ||
465 | if (!kvm_arm_pmu_irq_initialized(vcpu)) | ||
466 | return -EINVAL; | ||
467 | |||
468 | /* | ||
469 | * If we are using an in-kernel vgic, at this point we know | ||
470 | * the vgic will be initialized, so we can check the PMU irq | ||
471 | * number against the dimensions of the vgic and make sure | ||
472 | * it's valid. | ||
473 | */ | ||
474 | if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq)) | ||
475 | return -EINVAL; | ||
476 | } else if (kvm_arm_pmu_irq_initialized(vcpu)) { | ||
477 | return -EINVAL; | ||
478 | } | ||
479 | |||
480 | kvm_pmu_vcpu_reset(vcpu); | ||
481 | vcpu->arch.pmu.ready = true; | ||
482 | |||
483 | return 0; | ||
484 | } | ||
485 | |||
486 | static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) | ||
487 | { | ||
488 | if (!kvm_arm_support_pmu_v3()) | ||
465 | return -ENODEV; | 489 | return -ENODEV; |
466 | 490 | ||
467 | if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features) || | 491 | if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) |
468 | !kvm_arm_pmu_irq_initialized(vcpu)) | ||
469 | return -ENXIO; | 492 | return -ENXIO; |
470 | 493 | ||
471 | if (kvm_arm_pmu_v3_ready(vcpu)) | 494 | if (vcpu->arch.pmu.created) |
472 | return -EBUSY; | 495 | return -EBUSY; |
473 | 496 | ||
474 | kvm_pmu_vcpu_reset(vcpu); | 497 | if (irqchip_in_kernel(vcpu->kvm)) { |
475 | vcpu->arch.pmu.ready = true; | 498 | int ret; |
499 | |||
500 | /* | ||
501 | * If using the PMU with an in-kernel virtual GIC | ||
502 | * implementation, we require the GIC to be already | ||
503 | * initialized when initializing the PMU. | ||
504 | */ | ||
505 | if (!vgic_initialized(vcpu->kvm)) | ||
506 | return -ENODEV; | ||
507 | |||
508 | if (!kvm_arm_pmu_irq_initialized(vcpu)) | ||
509 | return -ENXIO; | ||
476 | 510 | ||
511 | ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num, | ||
512 | &vcpu->arch.pmu); | ||
513 | if (ret) | ||
514 | return ret; | ||
515 | } | ||
516 | |||
517 | vcpu->arch.pmu.created = true; | ||
477 | return 0; | 518 | return 0; |
478 | } | 519 | } |
479 | 520 | ||
480 | #define irq_is_ppi(irq) ((irq) >= VGIC_NR_SGIS && (irq) < VGIC_NR_PRIVATE_IRQS) | ||
481 | |||
482 | /* | 521 | /* |
483 | * For one VM the interrupt type must be same for each vcpu. | 522 | * For one VM the interrupt type must be same for each vcpu. |
484 | * As a PPI, the interrupt number is the same for all vcpus, | 523 | * As a PPI, the interrupt number is the same for all vcpus, |
@@ -512,6 +551,9 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) | |||
512 | int __user *uaddr = (int __user *)(long)attr->addr; | 551 | int __user *uaddr = (int __user *)(long)attr->addr; |
513 | int irq; | 552 | int irq; |
514 | 553 | ||
554 | if (!irqchip_in_kernel(vcpu->kvm)) | ||
555 | return -EINVAL; | ||
556 | |||
515 | if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) | 557 | if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) |
516 | return -ENODEV; | 558 | return -ENODEV; |
517 | 559 | ||
@@ -519,7 +561,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) | |||
519 | return -EFAULT; | 561 | return -EFAULT; |
520 | 562 | ||
521 | /* The PMU overflow interrupt can be a PPI or a valid SPI. */ | 563 | /* The PMU overflow interrupt can be a PPI or a valid SPI. */ |
522 | if (!(irq_is_ppi(irq) || vgic_valid_spi(vcpu->kvm, irq))) | 564 | if (!(irq_is_ppi(irq) || irq_is_spi(irq))) |
523 | return -EINVAL; | 565 | return -EINVAL; |
524 | 566 | ||
525 | if (!pmu_irq_is_valid(vcpu->kvm, irq)) | 567 | if (!pmu_irq_is_valid(vcpu->kvm, irq)) |
@@ -546,6 +588,9 @@ int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) | |||
546 | int __user *uaddr = (int __user *)(long)attr->addr; | 588 | int __user *uaddr = (int __user *)(long)attr->addr; |
547 | int irq; | 589 | int irq; |
548 | 590 | ||
591 | if (!irqchip_in_kernel(vcpu->kvm)) | ||
592 | return -EINVAL; | ||
593 | |||
549 | if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) | 594 | if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) |
550 | return -ENODEV; | 595 | return -ENODEV; |
551 | 596 | ||
diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c index a08d7a93aebb..f1e363bab5e8 100644 --- a/virt/kvm/arm/psci.c +++ b/virt/kvm/arm/psci.c | |||
@@ -57,6 +57,7 @@ static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu) | |||
57 | * for KVM will preserve the register state. | 57 | * for KVM will preserve the register state. |
58 | */ | 58 | */ |
59 | kvm_vcpu_block(vcpu); | 59 | kvm_vcpu_block(vcpu); |
60 | kvm_clear_request(KVM_REQ_UNHALT, vcpu); | ||
60 | 61 | ||
61 | return PSCI_RET_SUCCESS; | 62 | return PSCI_RET_SUCCESS; |
62 | } | 63 | } |
@@ -64,6 +65,8 @@ static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu) | |||
64 | static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) | 65 | static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) |
65 | { | 66 | { |
66 | vcpu->arch.power_off = true; | 67 | vcpu->arch.power_off = true; |
68 | kvm_make_request(KVM_REQ_SLEEP, vcpu); | ||
69 | kvm_vcpu_kick(vcpu); | ||
67 | } | 70 | } |
68 | 71 | ||
69 | static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) | 72 | static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) |
@@ -178,10 +181,9 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type) | |||
178 | * after this call is handled and before the VCPUs have been | 181 | * after this call is handled and before the VCPUs have been |
179 | * re-initialized. | 182 | * re-initialized. |
180 | */ | 183 | */ |
181 | kvm_for_each_vcpu(i, tmp, vcpu->kvm) { | 184 | kvm_for_each_vcpu(i, tmp, vcpu->kvm) |
182 | tmp->arch.power_off = true; | 185 | tmp->arch.power_off = true; |
183 | kvm_vcpu_kick(tmp); | 186 | kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP); |
184 | } | ||
185 | 187 | ||
186 | memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); | 188 | memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); |
187 | vcpu->run->system_event.type = type; | 189 | vcpu->run->system_event.type = type; |
diff --git a/virt/kvm/arm/vgic/vgic-irqfd.c b/virt/kvm/arm/vgic/vgic-irqfd.c index f138ed2e9c63..b7baf581611a 100644 --- a/virt/kvm/arm/vgic/vgic-irqfd.c +++ b/virt/kvm/arm/vgic/vgic-irqfd.c | |||
@@ -34,7 +34,7 @@ static int vgic_irqfd_set_irq(struct kvm_kernel_irq_routing_entry *e, | |||
34 | 34 | ||
35 | if (!vgic_valid_spi(kvm, spi_id)) | 35 | if (!vgic_valid_spi(kvm, spi_id)) |
36 | return -EINVAL; | 36 | return -EINVAL; |
37 | return kvm_vgic_inject_irq(kvm, 0, spi_id, level); | 37 | return kvm_vgic_inject_irq(kvm, 0, spi_id, level, NULL); |
38 | } | 38 | } |
39 | 39 | ||
40 | /** | 40 | /** |
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c index 63e0bbdcddcc..37522e65eb53 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c | |||
@@ -308,34 +308,36 @@ static const struct vgic_register_region vgic_v2_dist_registers[] = { | |||
308 | vgic_mmio_read_v2_misc, vgic_mmio_write_v2_misc, 12, | 308 | vgic_mmio_read_v2_misc, vgic_mmio_write_v2_misc, 12, |
309 | VGIC_ACCESS_32bit), | 309 | VGIC_ACCESS_32bit), |
310 | REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_IGROUP, | 310 | REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_IGROUP, |
311 | vgic_mmio_read_rao, vgic_mmio_write_wi, 1, | 311 | vgic_mmio_read_rao, vgic_mmio_write_wi, NULL, NULL, 1, |
312 | VGIC_ACCESS_32bit), | 312 | VGIC_ACCESS_32bit), |
313 | REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_SET, | 313 | REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_SET, |
314 | vgic_mmio_read_enable, vgic_mmio_write_senable, 1, | 314 | vgic_mmio_read_enable, vgic_mmio_write_senable, NULL, NULL, 1, |
315 | VGIC_ACCESS_32bit), | 315 | VGIC_ACCESS_32bit), |
316 | REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_CLEAR, | 316 | REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_CLEAR, |
317 | vgic_mmio_read_enable, vgic_mmio_write_cenable, 1, | 317 | vgic_mmio_read_enable, vgic_mmio_write_cenable, NULL, NULL, 1, |
318 | VGIC_ACCESS_32bit), | 318 | VGIC_ACCESS_32bit), |
319 | REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_SET, | 319 | REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_SET, |
320 | vgic_mmio_read_pending, vgic_mmio_write_spending, 1, | 320 | vgic_mmio_read_pending, vgic_mmio_write_spending, NULL, NULL, 1, |
321 | VGIC_ACCESS_32bit), | 321 | VGIC_ACCESS_32bit), |
322 | REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_CLEAR, | 322 | REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_CLEAR, |
323 | vgic_mmio_read_pending, vgic_mmio_write_cpending, 1, | 323 | vgic_mmio_read_pending, vgic_mmio_write_cpending, NULL, NULL, 1, |
324 | VGIC_ACCESS_32bit), | 324 | VGIC_ACCESS_32bit), |
325 | REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_SET, | 325 | REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_SET, |
326 | vgic_mmio_read_active, vgic_mmio_write_sactive, 1, | 326 | vgic_mmio_read_active, vgic_mmio_write_sactive, |
327 | NULL, vgic_mmio_uaccess_write_sactive, 1, | ||
327 | VGIC_ACCESS_32bit), | 328 | VGIC_ACCESS_32bit), |
328 | REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_CLEAR, | 329 | REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_CLEAR, |
329 | vgic_mmio_read_active, vgic_mmio_write_cactive, 1, | 330 | vgic_mmio_read_active, vgic_mmio_write_cactive, |
331 | NULL, vgic_mmio_uaccess_write_cactive, 1, | ||
330 | VGIC_ACCESS_32bit), | 332 | VGIC_ACCESS_32bit), |
331 | REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PRI, | 333 | REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PRI, |
332 | vgic_mmio_read_priority, vgic_mmio_write_priority, 8, | 334 | vgic_mmio_read_priority, vgic_mmio_write_priority, NULL, NULL, |
333 | VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), | 335 | 8, VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), |
334 | REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_TARGET, | 336 | REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_TARGET, |
335 | vgic_mmio_read_target, vgic_mmio_write_target, 8, | 337 | vgic_mmio_read_target, vgic_mmio_write_target, NULL, NULL, 8, |
336 | VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), | 338 | VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), |
337 | REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_CONFIG, | 339 | REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_CONFIG, |
338 | vgic_mmio_read_config, vgic_mmio_write_config, 2, | 340 | vgic_mmio_read_config, vgic_mmio_write_config, NULL, NULL, 2, |
339 | VGIC_ACCESS_32bit), | 341 | VGIC_ACCESS_32bit), |
340 | REGISTER_DESC_WITH_LENGTH(GIC_DIST_SOFTINT, | 342 | REGISTER_DESC_WITH_LENGTH(GIC_DIST_SOFTINT, |
341 | vgic_mmio_read_raz, vgic_mmio_write_sgir, 4, | 343 | vgic_mmio_read_raz, vgic_mmio_write_sgir, 4, |
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index 201d5e2e973d..714fa3933546 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c | |||
@@ -456,11 +456,13 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = { | |||
456 | vgic_mmio_read_raz, vgic_mmio_write_wi, 1, | 456 | vgic_mmio_read_raz, vgic_mmio_write_wi, 1, |
457 | VGIC_ACCESS_32bit), | 457 | VGIC_ACCESS_32bit), |
458 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISACTIVER, | 458 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISACTIVER, |
459 | vgic_mmio_read_active, vgic_mmio_write_sactive, NULL, NULL, 1, | 459 | vgic_mmio_read_active, vgic_mmio_write_sactive, |
460 | NULL, vgic_mmio_uaccess_write_sactive, 1, | ||
460 | VGIC_ACCESS_32bit), | 461 | VGIC_ACCESS_32bit), |
461 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICACTIVER, | 462 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICACTIVER, |
462 | vgic_mmio_read_active, vgic_mmio_write_cactive, NULL, NULL, 1, | 463 | vgic_mmio_read_active, vgic_mmio_write_cactive, |
463 | VGIC_ACCESS_32bit), | 464 | NULL, vgic_mmio_uaccess_write_cactive, |
465 | 1, VGIC_ACCESS_32bit), | ||
464 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IPRIORITYR, | 466 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IPRIORITYR, |
465 | vgic_mmio_read_priority, vgic_mmio_write_priority, NULL, NULL, | 467 | vgic_mmio_read_priority, vgic_mmio_write_priority, NULL, NULL, |
466 | 8, VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), | 468 | 8, VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), |
@@ -526,12 +528,14 @@ static const struct vgic_register_region vgic_v3_sgibase_registers[] = { | |||
526 | vgic_mmio_read_pending, vgic_mmio_write_cpending, | 528 | vgic_mmio_read_pending, vgic_mmio_write_cpending, |
527 | vgic_mmio_read_raz, vgic_mmio_write_wi, 4, | 529 | vgic_mmio_read_raz, vgic_mmio_write_wi, 4, |
528 | VGIC_ACCESS_32bit), | 530 | VGIC_ACCESS_32bit), |
529 | REGISTER_DESC_WITH_LENGTH(GICR_ISACTIVER0, | 531 | REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ISACTIVER0, |
530 | vgic_mmio_read_active, vgic_mmio_write_sactive, 4, | 532 | vgic_mmio_read_active, vgic_mmio_write_sactive, |
531 | VGIC_ACCESS_32bit), | 533 | NULL, vgic_mmio_uaccess_write_sactive, |
532 | REGISTER_DESC_WITH_LENGTH(GICR_ICACTIVER0, | 534 | 4, VGIC_ACCESS_32bit), |
533 | vgic_mmio_read_active, vgic_mmio_write_cactive, 4, | 535 | REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ICACTIVER0, |
534 | VGIC_ACCESS_32bit), | 536 | vgic_mmio_read_active, vgic_mmio_write_cactive, |
537 | NULL, vgic_mmio_uaccess_write_cactive, | ||
538 | 4, VGIC_ACCESS_32bit), | ||
535 | REGISTER_DESC_WITH_LENGTH(GICR_IPRIORITYR0, | 539 | REGISTER_DESC_WITH_LENGTH(GICR_IPRIORITYR0, |
536 | vgic_mmio_read_priority, vgic_mmio_write_priority, 32, | 540 | vgic_mmio_read_priority, vgic_mmio_write_priority, 32, |
537 | VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), | 541 | VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), |
diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index 1c17b2a2f105..c1e4bdd66131 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c | |||
@@ -231,56 +231,94 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, | |||
231 | * be migrated while we don't hold the IRQ locks and we don't want to be | 231 | * be migrated while we don't hold the IRQ locks and we don't want to be |
232 | * chasing moving targets. | 232 | * chasing moving targets. |
233 | * | 233 | * |
234 | * For private interrupts, we only have to make sure the single and only VCPU | 234 | * For private interrupts we don't have to do anything because userspace |
235 | * that can potentially queue the IRQ is stopped. | 235 | * accesses to the VGIC state already require all VCPUs to be stopped, and |
236 | * only the VCPU itself can modify its private interrupts active state, which | ||
237 | * guarantees that the VCPU is not running. | ||
236 | */ | 238 | */ |
237 | static void vgic_change_active_prepare(struct kvm_vcpu *vcpu, u32 intid) | 239 | static void vgic_change_active_prepare(struct kvm_vcpu *vcpu, u32 intid) |
238 | { | 240 | { |
239 | if (intid < VGIC_NR_PRIVATE_IRQS) | 241 | if (intid > VGIC_NR_PRIVATE_IRQS) |
240 | kvm_arm_halt_vcpu(vcpu); | ||
241 | else | ||
242 | kvm_arm_halt_guest(vcpu->kvm); | 242 | kvm_arm_halt_guest(vcpu->kvm); |
243 | } | 243 | } |
244 | 244 | ||
245 | /* See vgic_change_active_prepare */ | 245 | /* See vgic_change_active_prepare */ |
246 | static void vgic_change_active_finish(struct kvm_vcpu *vcpu, u32 intid) | 246 | static void vgic_change_active_finish(struct kvm_vcpu *vcpu, u32 intid) |
247 | { | 247 | { |
248 | if (intid < VGIC_NR_PRIVATE_IRQS) | 248 | if (intid > VGIC_NR_PRIVATE_IRQS) |
249 | kvm_arm_resume_vcpu(vcpu); | ||
250 | else | ||
251 | kvm_arm_resume_guest(vcpu->kvm); | 249 | kvm_arm_resume_guest(vcpu->kvm); |
252 | } | 250 | } |
253 | 251 | ||
254 | void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, | 252 | static void __vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, |
255 | gpa_t addr, unsigned int len, | 253 | gpa_t addr, unsigned int len, |
256 | unsigned long val) | 254 | unsigned long val) |
257 | { | 255 | { |
258 | u32 intid = VGIC_ADDR_TO_INTID(addr, 1); | 256 | u32 intid = VGIC_ADDR_TO_INTID(addr, 1); |
259 | int i; | 257 | int i; |
260 | 258 | ||
261 | vgic_change_active_prepare(vcpu, intid); | ||
262 | for_each_set_bit(i, &val, len * 8) { | 259 | for_each_set_bit(i, &val, len * 8) { |
263 | struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); | 260 | struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); |
264 | vgic_mmio_change_active(vcpu, irq, false); | 261 | vgic_mmio_change_active(vcpu, irq, false); |
265 | vgic_put_irq(vcpu->kvm, irq); | 262 | vgic_put_irq(vcpu->kvm, irq); |
266 | } | 263 | } |
267 | vgic_change_active_finish(vcpu, intid); | ||
268 | } | 264 | } |
269 | 265 | ||
270 | void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, | 266 | void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, |
271 | gpa_t addr, unsigned int len, | 267 | gpa_t addr, unsigned int len, |
272 | unsigned long val) | 268 | unsigned long val) |
273 | { | 269 | { |
274 | u32 intid = VGIC_ADDR_TO_INTID(addr, 1); | 270 | u32 intid = VGIC_ADDR_TO_INTID(addr, 1); |
275 | int i; | ||
276 | 271 | ||
272 | mutex_lock(&vcpu->kvm->lock); | ||
277 | vgic_change_active_prepare(vcpu, intid); | 273 | vgic_change_active_prepare(vcpu, intid); |
274 | |||
275 | __vgic_mmio_write_cactive(vcpu, addr, len, val); | ||
276 | |||
277 | vgic_change_active_finish(vcpu, intid); | ||
278 | mutex_unlock(&vcpu->kvm->lock); | ||
279 | } | ||
280 | |||
281 | void vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu, | ||
282 | gpa_t addr, unsigned int len, | ||
283 | unsigned long val) | ||
284 | { | ||
285 | __vgic_mmio_write_cactive(vcpu, addr, len, val); | ||
286 | } | ||
287 | |||
288 | static void __vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, | ||
289 | gpa_t addr, unsigned int len, | ||
290 | unsigned long val) | ||
291 | { | ||
292 | u32 intid = VGIC_ADDR_TO_INTID(addr, 1); | ||
293 | int i; | ||
294 | |||
278 | for_each_set_bit(i, &val, len * 8) { | 295 | for_each_set_bit(i, &val, len * 8) { |
279 | struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); | 296 | struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); |
280 | vgic_mmio_change_active(vcpu, irq, true); | 297 | vgic_mmio_change_active(vcpu, irq, true); |
281 | vgic_put_irq(vcpu->kvm, irq); | 298 | vgic_put_irq(vcpu->kvm, irq); |
282 | } | 299 | } |
300 | } | ||
301 | |||
302 | void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, | ||
303 | gpa_t addr, unsigned int len, | ||
304 | unsigned long val) | ||
305 | { | ||
306 | u32 intid = VGIC_ADDR_TO_INTID(addr, 1); | ||
307 | |||
308 | mutex_lock(&vcpu->kvm->lock); | ||
309 | vgic_change_active_prepare(vcpu, intid); | ||
310 | |||
311 | __vgic_mmio_write_sactive(vcpu, addr, len, val); | ||
312 | |||
283 | vgic_change_active_finish(vcpu, intid); | 313 | vgic_change_active_finish(vcpu, intid); |
314 | mutex_unlock(&vcpu->kvm->lock); | ||
315 | } | ||
316 | |||
317 | void vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu, | ||
318 | gpa_t addr, unsigned int len, | ||
319 | unsigned long val) | ||
320 | { | ||
321 | __vgic_mmio_write_sactive(vcpu, addr, len, val); | ||
284 | } | 322 | } |
285 | 323 | ||
286 | unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu, | 324 | unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu, |
diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h index ea4171acdef3..5693f6df45ec 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.h +++ b/virt/kvm/arm/vgic/vgic-mmio.h | |||
@@ -75,7 +75,7 @@ extern struct kvm_io_device_ops kvm_io_gic_ops; | |||
75 | * The _WITH_LENGTH version instantiates registers with a fixed length | 75 | * The _WITH_LENGTH version instantiates registers with a fixed length |
76 | * and is mutually exclusive with the _PER_IRQ version. | 76 | * and is mutually exclusive with the _PER_IRQ version. |
77 | */ | 77 | */ |
78 | #define REGISTER_DESC_WITH_BITS_PER_IRQ(off, rd, wr, bpi, acc) \ | 78 | #define REGISTER_DESC_WITH_BITS_PER_IRQ(off, rd, wr, ur, uw, bpi, acc) \ |
79 | { \ | 79 | { \ |
80 | .reg_offset = off, \ | 80 | .reg_offset = off, \ |
81 | .bits_per_irq = bpi, \ | 81 | .bits_per_irq = bpi, \ |
@@ -83,6 +83,8 @@ extern struct kvm_io_device_ops kvm_io_gic_ops; | |||
83 | .access_flags = acc, \ | 83 | .access_flags = acc, \ |
84 | .read = rd, \ | 84 | .read = rd, \ |
85 | .write = wr, \ | 85 | .write = wr, \ |
86 | .uaccess_read = ur, \ | ||
87 | .uaccess_write = uw, \ | ||
86 | } | 88 | } |
87 | 89 | ||
88 | #define REGISTER_DESC_WITH_LENGTH(off, rd, wr, length, acc) \ | 90 | #define REGISTER_DESC_WITH_LENGTH(off, rd, wr, length, acc) \ |
@@ -165,6 +167,14 @@ void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, | |||
165 | gpa_t addr, unsigned int len, | 167 | gpa_t addr, unsigned int len, |
166 | unsigned long val); | 168 | unsigned long val); |
167 | 169 | ||
170 | void vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu, | ||
171 | gpa_t addr, unsigned int len, | ||
172 | unsigned long val); | ||
173 | |||
174 | void vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu, | ||
175 | gpa_t addr, unsigned int len, | ||
176 | unsigned long val); | ||
177 | |||
168 | unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu, | 178 | unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu, |
169 | gpa_t addr, unsigned int len); | 179 | gpa_t addr, unsigned int len); |
170 | 180 | ||
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 030248e669f6..96ea597db0e7 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c | |||
@@ -21,6 +21,10 @@ | |||
21 | 21 | ||
22 | #include "vgic.h" | 22 | #include "vgic.h" |
23 | 23 | ||
24 | static bool group0_trap; | ||
25 | static bool group1_trap; | ||
26 | static bool common_trap; | ||
27 | |||
24 | void vgic_v3_set_underflow(struct kvm_vcpu *vcpu) | 28 | void vgic_v3_set_underflow(struct kvm_vcpu *vcpu) |
25 | { | 29 | { |
26 | struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; | 30 | struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; |
@@ -258,6 +262,12 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu) | |||
258 | 262 | ||
259 | /* Get the show on the road... */ | 263 | /* Get the show on the road... */ |
260 | vgic_v3->vgic_hcr = ICH_HCR_EN; | 264 | vgic_v3->vgic_hcr = ICH_HCR_EN; |
265 | if (group0_trap) | ||
266 | vgic_v3->vgic_hcr |= ICH_HCR_TALL0; | ||
267 | if (group1_trap) | ||
268 | vgic_v3->vgic_hcr |= ICH_HCR_TALL1; | ||
269 | if (common_trap) | ||
270 | vgic_v3->vgic_hcr |= ICH_HCR_TC; | ||
261 | } | 271 | } |
262 | 272 | ||
263 | int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq) | 273 | int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq) |
@@ -429,6 +439,26 @@ out: | |||
429 | return ret; | 439 | return ret; |
430 | } | 440 | } |
431 | 441 | ||
442 | DEFINE_STATIC_KEY_FALSE(vgic_v3_cpuif_trap); | ||
443 | |||
444 | static int __init early_group0_trap_cfg(char *buf) | ||
445 | { | ||
446 | return strtobool(buf, &group0_trap); | ||
447 | } | ||
448 | early_param("kvm-arm.vgic_v3_group0_trap", early_group0_trap_cfg); | ||
449 | |||
450 | static int __init early_group1_trap_cfg(char *buf) | ||
451 | { | ||
452 | return strtobool(buf, &group1_trap); | ||
453 | } | ||
454 | early_param("kvm-arm.vgic_v3_group1_trap", early_group1_trap_cfg); | ||
455 | |||
456 | static int __init early_common_trap_cfg(char *buf) | ||
457 | { | ||
458 | return strtobool(buf, &common_trap); | ||
459 | } | ||
460 | early_param("kvm-arm.vgic_v3_common_trap", early_common_trap_cfg); | ||
461 | |||
432 | /** | 462 | /** |
433 | * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT | 463 | * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT |
434 | * @node: pointer to the DT node | 464 | * @node: pointer to the DT node |
@@ -480,6 +510,21 @@ int vgic_v3_probe(const struct gic_kvm_info *info) | |||
480 | if (kvm_vgic_global_state.vcpu_base == 0) | 510 | if (kvm_vgic_global_state.vcpu_base == 0) |
481 | kvm_info("disabling GICv2 emulation\n"); | 511 | kvm_info("disabling GICv2 emulation\n"); |
482 | 512 | ||
513 | #ifdef CONFIG_ARM64 | ||
514 | if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_30115)) { | ||
515 | group0_trap = true; | ||
516 | group1_trap = true; | ||
517 | } | ||
518 | #endif | ||
519 | |||
520 | if (group0_trap || group1_trap || common_trap) { | ||
521 | kvm_info("GICv3 sysreg trapping enabled ([%s%s%s], reduced performance)\n", | ||
522 | group0_trap ? "G0" : "", | ||
523 | group1_trap ? "G1" : "", | ||
524 | common_trap ? "C" : ""); | ||
525 | static_branch_enable(&vgic_v3_cpuif_trap); | ||
526 | } | ||
527 | |||
483 | kvm_vgic_global_state.vctrl_base = NULL; | 528 | kvm_vgic_global_state.vctrl_base = NULL; |
484 | kvm_vgic_global_state.type = VGIC_V3; | 529 | kvm_vgic_global_state.type = VGIC_V3; |
485 | kvm_vgic_global_state.max_gic_vcpus = VGIC_V3_MAX_CPUS; | 530 | kvm_vgic_global_state.max_gic_vcpus = VGIC_V3_MAX_CPUS; |
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 83b24d20ff8f..fed717e07938 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c | |||
@@ -35,11 +35,12 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = { | |||
35 | 35 | ||
36 | /* | 36 | /* |
37 | * Locking order is always: | 37 | * Locking order is always: |
38 | * its->cmd_lock (mutex) | 38 | * kvm->lock (mutex) |
39 | * its->its_lock (mutex) | 39 | * its->cmd_lock (mutex) |
40 | * vgic_cpu->ap_list_lock | 40 | * its->its_lock (mutex) |
41 | * kvm->lpi_list_lock | 41 | * vgic_cpu->ap_list_lock |
42 | * vgic_irq->irq_lock | 42 | * kvm->lpi_list_lock |
43 | * vgic_irq->irq_lock | ||
43 | * | 44 | * |
44 | * If you need to take multiple locks, always take the upper lock first, | 45 | * If you need to take multiple locks, always take the upper lock first, |
45 | * then the lower ones, e.g. first take the its_lock, then the irq_lock. | 46 | * then the lower ones, e.g. first take the its_lock, then the irq_lock. |
@@ -234,10 +235,14 @@ static void vgic_sort_ap_list(struct kvm_vcpu *vcpu) | |||
234 | 235 | ||
235 | /* | 236 | /* |
236 | * Only valid injection if changing level for level-triggered IRQs or for a | 237 | * Only valid injection if changing level for level-triggered IRQs or for a |
237 | * rising edge. | 238 | * rising edge, and in-kernel connected IRQ lines can only be controlled by |
239 | * their owner. | ||
238 | */ | 240 | */ |
239 | static bool vgic_validate_injection(struct vgic_irq *irq, bool level) | 241 | static bool vgic_validate_injection(struct vgic_irq *irq, bool level, void *owner) |
240 | { | 242 | { |
243 | if (irq->owner != owner) | ||
244 | return false; | ||
245 | |||
241 | switch (irq->config) { | 246 | switch (irq->config) { |
242 | case VGIC_CONFIG_LEVEL: | 247 | case VGIC_CONFIG_LEVEL: |
243 | return irq->line_level != level; | 248 | return irq->line_level != level; |
@@ -285,8 +290,10 @@ retry: | |||
285 | * won't see this one until it exits for some other | 290 | * won't see this one until it exits for some other |
286 | * reason. | 291 | * reason. |
287 | */ | 292 | */ |
288 | if (vcpu) | 293 | if (vcpu) { |
294 | kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); | ||
289 | kvm_vcpu_kick(vcpu); | 295 | kvm_vcpu_kick(vcpu); |
296 | } | ||
290 | return false; | 297 | return false; |
291 | } | 298 | } |
292 | 299 | ||
@@ -332,6 +339,7 @@ retry: | |||
332 | spin_unlock(&irq->irq_lock); | 339 | spin_unlock(&irq->irq_lock); |
333 | spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); | 340 | spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); |
334 | 341 | ||
342 | kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); | ||
335 | kvm_vcpu_kick(vcpu); | 343 | kvm_vcpu_kick(vcpu); |
336 | 344 | ||
337 | return true; | 345 | return true; |
@@ -346,13 +354,16 @@ retry: | |||
346 | * false: to ignore the call | 354 | * false: to ignore the call |
347 | * Level-sensitive true: raise the input signal | 355 | * Level-sensitive true: raise the input signal |
348 | * false: lower the input signal | 356 | * false: lower the input signal |
357 | * @owner: The opaque pointer to the owner of the IRQ being raised to verify | ||
358 | * that the caller is allowed to inject this IRQ. Userspace | ||
359 | * injections will have owner == NULL. | ||
349 | * | 360 | * |
350 | * The VGIC is not concerned with devices being active-LOW or active-HIGH for | 361 | * The VGIC is not concerned with devices being active-LOW or active-HIGH for |
351 | * level-sensitive interrupts. You can think of the level parameter as 1 | 362 | * level-sensitive interrupts. You can think of the level parameter as 1 |
352 | * being HIGH and 0 being LOW and all devices being active-HIGH. | 363 | * being HIGH and 0 being LOW and all devices being active-HIGH. |
353 | */ | 364 | */ |
354 | int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, | 365 | int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, |
355 | bool level) | 366 | bool level, void *owner) |
356 | { | 367 | { |
357 | struct kvm_vcpu *vcpu; | 368 | struct kvm_vcpu *vcpu; |
358 | struct vgic_irq *irq; | 369 | struct vgic_irq *irq; |
@@ -374,7 +385,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, | |||
374 | 385 | ||
375 | spin_lock(&irq->irq_lock); | 386 | spin_lock(&irq->irq_lock); |
376 | 387 | ||
377 | if (!vgic_validate_injection(irq, level)) { | 388 | if (!vgic_validate_injection(irq, level, owner)) { |
378 | /* Nothing to see here, move along... */ | 389 | /* Nothing to see here, move along... */ |
379 | spin_unlock(&irq->irq_lock); | 390 | spin_unlock(&irq->irq_lock); |
380 | vgic_put_irq(kvm, irq); | 391 | vgic_put_irq(kvm, irq); |
@@ -431,6 +442,39 @@ int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq) | |||
431 | } | 442 | } |
432 | 443 | ||
433 | /** | 444 | /** |
445 | * kvm_vgic_set_owner - Set the owner of an interrupt for a VM | ||
446 | * | ||
447 | * @vcpu: Pointer to the VCPU (used for PPIs) | ||
448 | * @intid: The virtual INTID identifying the interrupt (PPI or SPI) | ||
449 | * @owner: Opaque pointer to the owner | ||
450 | * | ||
451 | * Returns 0 if intid is not already used by another in-kernel device and the | ||
452 | * owner is set, otherwise returns an error code. | ||
453 | */ | ||
454 | int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner) | ||
455 | { | ||
456 | struct vgic_irq *irq; | ||
457 | int ret = 0; | ||
458 | |||
459 | if (!vgic_initialized(vcpu->kvm)) | ||
460 | return -EAGAIN; | ||
461 | |||
462 | /* SGIs and LPIs cannot be wired up to any device */ | ||
463 | if (!irq_is_ppi(intid) && !vgic_valid_spi(vcpu->kvm, intid)) | ||
464 | return -EINVAL; | ||
465 | |||
466 | irq = vgic_get_irq(vcpu->kvm, vcpu, intid); | ||
467 | spin_lock(&irq->irq_lock); | ||
468 | if (irq->owner && irq->owner != owner) | ||
469 | ret = -EEXIST; | ||
470 | else | ||
471 | irq->owner = owner; | ||
472 | spin_unlock(&irq->irq_lock); | ||
473 | |||
474 | return ret; | ||
475 | } | ||
476 | |||
477 | /** | ||
434 | * vgic_prune_ap_list - Remove non-relevant interrupts from the list | 478 | * vgic_prune_ap_list - Remove non-relevant interrupts from the list |
435 | * | 479 | * |
436 | * @vcpu: The VCPU pointer | 480 | * @vcpu: The VCPU pointer |
@@ -721,8 +765,10 @@ void vgic_kick_vcpus(struct kvm *kvm) | |||
721 | * a good kick... | 765 | * a good kick... |
722 | */ | 766 | */ |
723 | kvm_for_each_vcpu(c, vcpu, kvm) { | 767 | kvm_for_each_vcpu(c, vcpu, kvm) { |
724 | if (kvm_vgic_vcpu_pending_irq(vcpu)) | 768 | if (kvm_vgic_vcpu_pending_irq(vcpu)) { |
769 | kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); | ||
725 | kvm_vcpu_kick(vcpu); | 770 | kvm_vcpu_kick(vcpu); |
771 | } | ||
726 | } | 772 | } |
727 | } | 773 | } |
728 | 774 | ||
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f0fe9d02f6bb..19f0ecb9b93e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -73,17 +73,17 @@ MODULE_LICENSE("GPL"); | |||
73 | 73 | ||
74 | /* Architectures should define their poll value according to the halt latency */ | 74 | /* Architectures should define their poll value according to the halt latency */ |
75 | unsigned int halt_poll_ns = KVM_HALT_POLL_NS_DEFAULT; | 75 | unsigned int halt_poll_ns = KVM_HALT_POLL_NS_DEFAULT; |
76 | module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR); | 76 | module_param(halt_poll_ns, uint, 0644); |
77 | EXPORT_SYMBOL_GPL(halt_poll_ns); | 77 | EXPORT_SYMBOL_GPL(halt_poll_ns); |
78 | 78 | ||
79 | /* Default doubles per-vcpu halt_poll_ns. */ | 79 | /* Default doubles per-vcpu halt_poll_ns. */ |
80 | unsigned int halt_poll_ns_grow = 2; | 80 | unsigned int halt_poll_ns_grow = 2; |
81 | module_param(halt_poll_ns_grow, uint, S_IRUGO | S_IWUSR); | 81 | module_param(halt_poll_ns_grow, uint, 0644); |
82 | EXPORT_SYMBOL_GPL(halt_poll_ns_grow); | 82 | EXPORT_SYMBOL_GPL(halt_poll_ns_grow); |
83 | 83 | ||
84 | /* Default resets per-vcpu halt_poll_ns . */ | 84 | /* Default resets per-vcpu halt_poll_ns . */ |
85 | unsigned int halt_poll_ns_shrink; | 85 | unsigned int halt_poll_ns_shrink; |
86 | module_param(halt_poll_ns_shrink, uint, S_IRUGO | S_IWUSR); | 86 | module_param(halt_poll_ns_shrink, uint, 0644); |
87 | EXPORT_SYMBOL_GPL(halt_poll_ns_shrink); | 87 | EXPORT_SYMBOL_GPL(halt_poll_ns_shrink); |
88 | 88 | ||
89 | /* | 89 | /* |
@@ -3191,6 +3191,12 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) | |||
3191 | return PTR_ERR(file); | 3191 | return PTR_ERR(file); |
3192 | } | 3192 | } |
3193 | 3193 | ||
3194 | /* | ||
3195 | * Don't call kvm_put_kvm anymore at this point; file->f_op is | ||
3196 | * already set, with ->release() being kvm_vm_release(). In error | ||
3197 | * cases it will be called by the final fput(file) and will take | ||
3198 | * care of doing kvm_put_kvm(kvm). | ||
3199 | */ | ||
3194 | if (kvm_create_vm_debugfs(kvm, r) < 0) { | 3200 | if (kvm_create_vm_debugfs(kvm, r) < 0) { |
3195 | put_unused_fd(r); | 3201 | put_unused_fd(r); |
3196 | fput(file); | 3202 | fput(file); |